ZUtf8_16.h文件:
//--------------------------------------------------------------------------- #ifndef ZUtf8_16H #define ZUtf8_16H //--------------------------------------------------------------------------- /* 支持UNICODE,UNICODE BE ,UTF8,ASCII之间的转换的类。 日期:2007-06-15 版本:1.0 作者:小笨象 网站:http://www.9ele.com 邮箱:zxjrainbow@9ele.com //不要发垃圾邮件给我~~ 说明:你可以随意使用本文件,不过如果你修改了其中的BUG, 或者修改得更好了,请你也通知我一下, 让我也能享受一下开源的好处,谢谢。 */ enum EncodingType { uni8Bit = 0, // 默认ASCII uni16BE = 1, uni16LE = 2, // Windows 默认的编码,也就是UNICODE uniUTF8 = 3, uniUTF8NOBOM = 4 // 没有UTF8标识头的UTF8文件 }; class ZUtf8_16 { private: EncodingType m_unicodeMode; // 编码方式 int isUTF8_16(const char *s, unsigned int len, unsigned *cchUnused); EncodingType __fastcall DetermineEncoding(unsigned char *data, size_t iLen); public: __fastcall ZUtf8_16(); __fastcall ~ZUtf8_16(); EncodingType __fastcall GetEncodingType(void){return m_unicodeMode;}; bool __fastcall LoadFromStream(TMemoryStream *pStream, AnsiString &DestText); bool __fastcall StreamSaveToFile(TMemoryStream *pStream, AnsiString FileNameA, EncodingType unicodeMode); }; #endif
ZUtf8_16.cpp文件:
//--------------------------------------------------------------------------- #include <vcl.h> #pragma hdrstop #include <stdio.h> #include "ZUtf8_16.h" #pragma package(smart_init) __fastcall ZUtf8_16::ZUtf8_16() { m_unicodeMode = uni8Bit; } //--------------------------------------------------------------------------- __fastcall ZUtf8_16::~ZUtf8_16() { } //--------------------------------------------------------------------------- int ZUtf8_16::isUTF8_16(const char *s, unsigned int len, unsigned *cchUnused) { int rv = 1; int ASCII7only = 1; const unsigned char *sx = (unsigned char *)s, *endx = sx+len; while(sx < endx) { if(!*sx) { // For detection, we''ll say that NUL means not UTF8 ASCII7only = 0; rv = 0; break; } else if (*sx < 0x80) { // 0nnnnnnn If the byte''s first hex code begins with 0-7, it is an ASCII character. sx++; } else if (*sx < (0x80 + 0x40)) { // 10nnnnnn 8 through B cannot be first hex codes ASCII7only = 0; rv = 0; break; } else if(*sx < (0x80 + 0x40 + 0x20)) { // 110xxxvv 10nnnnnn If it begins with C or D, it is an 11 bit character ASCII7only = 0; if(sx >= endx-1) break; if(!(*sx & 0x1F) || (sx[1]&(0x80+0x40)) != 0x80) { rv = 0; break; } sx += 2; } else if (*sx < (0x80 + 0x40 + 0x20 + 0x10)) { // 1110qqqq 10xxxxvv 10nnnnnn If it begins with E, it is 16 bit ASCII7only = 0; if(sx >= endx-2) break; if(!(*sx |