正文

Linux下 UTF16转 UTF82010-08-17 13:55:00

【评论】 【打印】 【字体: 】 本文链接:http://blog.pfan.cn/miaowei/51666.html

分享到:

int UTF16ToUTF8(unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
{
    unsigned char* outstart = out;
    const unsigned char* processed = inb;
    unsigned char* outend = out + *outlen;
    unsigned short* in = (unsigned short*) inb;
    unsigned short* inend;
    unsigned int c, d, inlen;
    unsigned char *tmp;
    int bits;
   
    int isLittleEndian = 1;

    if((*inlenb % 2) == 1) (*inlenb)--;
    inlen = *inlenb / 2;
    inend = in + inlen;
    while((in < inend) && (out - outstart + 5 < *outlen))
    {
        if(isLittleEndian)
        {
            c= *in++;
        }
        else
        {
            tmp = (unsigned char *) in;
            c   = *tmp++;
            c   = c | (((unsigned int)*tmp) << 8);
            in++;
        }
        if((c & 0xFC00) == 0xD800)
        {
            if(in >= inend) break;
            if(isLittleEndian) { d = *in++; }
            else
            {
                tmp = (unsigned char *) in;
                d   = *tmp++;
                d   = d | (((unsigned int)*tmp) << 8);
                in++;
            }
            if((d & 0xFC00) == 0xDC00)
            {
                c &= 0x03FF;
                c <<= 10;
                c |= d & 0x03FF;
                c += 0x10000;
            }
            else
            {
                *outlen = out - outstart;
                *inlenb = processed - inb;
                return -1;
            }
        }

        if(out >= outend)    break;
        if     (c <    0x80) { *out++ =  c;                        bits= -6; }
        else if(c <   0x800) { *out++ = ((c >>  6) & 0x1F) | 0xC0; bits=  0; }
        else if(c < 0x10000) { *out++ = ((c >> 12) & 0x0F) | 0xE0; bits=  6; }
        else                 { *out++ = ((c >> 18) & 0x07) | 0xF0; bits= 12; }

        for(; bits >= 0; bits-= 6)
        {
            if (out >= outend)
            break;
            *out++ = ((c >> bits) & 0x3F) | 0x80;
        }
        processed = (const unsigned char*) in;
    }

    *outlen = out - outstart;
    *inlenb = processed - inb;

    return(*outlen);
}

阅读(4595) | 评论(0)


版权声明:编程爱好者网站为此博客服务提供商,如本文牵涉到版权问题,编程爱好者网站不承担相关责任,如有版权问题请直接与本文作者联系解决。谢谢!

评论

暂无评论
您需要登录后才能评论,请 登录 或者 注册