最近一段做一些关于文字编码方面的东西,常常涉及到各种编码字符之间的转换。主要是做中日文方面的,包括中文gb2312, 日文JIS, SHIFT-JIS,以及他们和Unnicode码之间的转换。 一 GBK <==> Unicode unsigned short GBK2UNI(unsigned short usGBK){ unsigned char szEUC[2] = { usGBK >> 8, usGBK & 0xFF }; unsigned short usUNI; MultiByteToWideChar( 936, 0, (LPCSTR)szEUC, 2, &usUNI, 1 ); return usUNI; } unsigned short UNI2GBK(unsigned short usUNI){ unsigned char szGBK[3]={0}; unsigned short wzUNI[2] = { usUNI, 0 }; unsigned short usGBK; WideCharToMultiByte( 936, 0, wzUNI, 2, (LPSTR)szGBK, 2, 0, 0 ); usGBK = (szGBK[0] << 8) | szGBK[1]; return usGBK;} 二 SHIFT-JIS <==> Unicodeunsigned short SJIS2UNI(unsigned short usSJIS){ unsigned char szEUC[2] = { usSJIS >> 8, usSJIS & 0xFF }; unsigned short usUNI; MultiByteToWideChar( 932, 0, (LPCSTR)szEUC, 2, &usUNI, 1 ); return usUNI; }unsigned short UNI2SJIS(unsigned short usUNI){ unsigned char szSJIS[3] = { 0 }; unsigned short wzUNI[2] = { usUNI, 0 }; unsigned short usSJIS; WideCharToMultiByte( 932, 0, wzUNI, 2, (LPSTR)szSJIS, 2, 0, 0 ); usSJIS = (szSJIS[0] << 8) | szSJIS[1]; return usSJIS;} 三 JIS <=> Unicode unsigned short JIS2UNI(unsigned short usJIS){ unsigned char szEUC[2] = { (usJIS | 0x8080) >> 8, (usJIS | 0x8080) & 0xFF }; unsigned short usUNI; MultiByteToWideChar( 20932, 0, (LPCSTR)szEUC, 2, &usUNI, 1 ); return usUNI; }unsigned short UNI2JIS(unsigned short usUNI){ unsigned char szJIS[3] = { 0 }; unsigned short wzUNI[2] = { usUNI, 0 }; unsigned short usJIS; WideCharToMultiByte( 20932, 0, wzUNI, 2, (LPSTR)szJIS, 2, 0, 0 ); usJIS = (szJIS[0] << 8) | szJIS[1]; return usJIS;} 四 JIS <=> SHIFT-JISunsigned short SJIS2JIS( unsigned short sjis ){ unsigned short ubyte, lbyte; if (((sjis >= 0x8140) && (sjis <= 0x9ffc)) || ((sjis >= 0xe040) && (sjis <= 0xeffc)) ) { ubyte = sjis >> 8; lbyte = sjis & 0x00ff; if ( (lbyte <= 0x3f) || (lbyte == 0x7f) || (lbyte >= 0xfd) ) return 0; if ( ubyte >= 0xe0 ) ubyte -= 0xc0; else ubyte -= 0x80; ubyte = (ubyte << 1) + 0x1f; if ( lbyte >= 0x9f ) { ubyte++; lbyte -= 0x7e; } else { if ( lbyte >= 0x80 ) lbyte--; lbyte -= 0x1f; } return ( ubyte << 8 ) + lbyte; } else { return 0; }} unsigned short JIS2SJIS( unsigned short jis ){ unsigned short ubyte, lbyte; ubyte = jis >> 8; lbyte = jis & 0x00ff; lbyte += 0x1f; if ( lbyte >= 0x7f ) lbyte++; if ( lbyte <= 0x3f ) return 0; if ( (ubyte & 0x0001) == 0 ) { lbyte = jis & 0x00ff; lbyte += 0x7e; ubyte--; if ( lbyte > 0xfd ) return 0; } ubyte -= 0x1f; ubyte = ubyte >> 1; ubyte += 0x80; if ( ubyte >= 0xa0 ) ubyte += 0x40; if ( ((ubyte >= 0x81) && (ubyte <= 0x9f)) || ((ubyte >= 0xe0) && (ubyte <= 0xef)) ) { return (ubyte << 8) + lbyte; } else { return 0; }} UNI2JIS这个函数好像不太好用,其他的都经过测试,没有问题的。现在我还不知道具体原因,现在我从Unicode转到JIS是分两个步骤的,第一个步骤是先将Unicode转到SHIFT-JIS,然后由SHIFT-JIS转到JIS。如果那位朋友知道什么原因,欢迎在我博客里指点指点。 好了,就这些,希望能给需要的朋友们带来一点方便。

评论