UTF8编码解码 -- -- 编程爱好者博客

{******************************************************************************* UTF-8就是以8位为单元对UCS进行编码。从UCS-2到UTF-8的编码方式如下： UCS-2编码(16进制) UTF-8 字节流(二进制) 0000 - 007F 0xxxxxxx 0080 - 07FF 110xxxxx 10xxxxxx 0800 - FFFF 1110xxxx 10xxxxxx 10xxxxxx 例如“汉”字的Unicode编码是6C49。6C49在0800-FFFF之间，所以肯定要用3字节模板了：1110xxxx 10xxxxxx 10xxxxxx。将6C49写成二进制是：0110 110001 001001，用这个比特流依次代替模板中的x，得到：11100110 10110001 10001001，即E6 B1 89。 *******************************************************************************} unit uUnicodeUtils; interfaceuses SysUtils, windows; function Gb2Utf8(const GbStr:String):String;function Utf82Gb(const Utf8Str:String):String; implementation function Gb2Utf8(const GbStr:String):String;var wideStr:array[0..2048] of WideChar; SourceLength:integer; DoneLength:integer; AscNo:integer; Byte1,Byte2,Byte3:integer;begin Result := ''; //不做变化 //Result:=GbStr; //exit; SourceLength := Length(GbStr); if SourceLength = 0 then exit; SourceLength:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, PAnsiChar(GbStr), SourceLength, wideStr, SourceLength); if SourceLength = 0 then exit; DoneLength := 0; repeat AscNo := Integer(wideStr[DoneLength]); case AscNo of 0..$007F: begin Result := Result + Chr(AscNo); end; $0080..$07FF: begin Byte1 := ((AscNo shr 6) and $001F) + $00C0; Byte2 := (AscNo and $003F) + $0080; Result := Result + Chr(Byte1) + Chr(Byte2); end; $0800..$FFFF: begin Byte1 := ((AscNo shr 12) and $000F) + $00E0; Byte2 := ((AscNo shr 6) and $003F) + $0080; Byte3 := (AscNo and $003F) + $0080; Result := Result + Chr(Byte1) + Chr(Byte2) + Chr(Byte3); end; end; Inc(DoneLength); until DoneLength = SourceLength;end; function Utf82Gb(const Utf8Str:String):String;var SourceLength:integer; DoneLength:integer; AscNo:integer; Byte1,Byte2,Byte3:integer;begin Byte1:=0; Byte2:=0; Byte3:=0; Result:=''; //不做变化 //Result:=Utf8Str; //exit; if Trim(Utf8Str)='' then exit; SourceLength:=Length(Utf8Str); DoneLength:=1; repeat AscNo:=ord(Utf8Str[DoneLength]); case (AscNo and $E0) of $E0:begin Byte1:=(AscNo and $0f) shl 12; Inc(DoneLength); if DoneLength>SourceLength then break; AscNo:=ord(Utf8Str[DoneLength]); Byte2:=(AscNo and $3f) shl 6; Inc(DoneLength); if DoneLength>SourceLength then break; AscNo:=ord(Utf8Str[DoneLength]); Byte3:=AscNo and $3f; end; $C0:begin Byte1:=(AscNo and $1f) shl 6; Inc(DoneLength); if DoneLength>SourceLength then break; AscNo:=ord(Utf8Str[DoneLength]); Byte2:=(AscNo and $3f); Byte3:=0; end; 0..$60:begin Byte1:=AscNo; Byte2:=0; Byte3:=0; end; end; //case; Result:=Result+widechar(Byte1+Byte2+Byte3); Inc(DoneLength); if DoneLength>SourceLength then break; until DoneLength > SourceLength;end;end.

博客介绍

正文

UTF8编码解码2006-05-24 19:10:00

评论