中文轉UTF8 在轉換為wide character 透過此方式轉換即可將文字轉為數字傳送出去
再透過解譯數字長度,將數字組回文字
---------------------------------------------------------------------
宣告以下資訊"
---.h
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
static const unsigned int offsetsFromUTF8[6] = {
0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL
};
int u8_toucs(unsigned int* dest, int sz, char* src, int srcsz);
int u8_toutf8(char* dest, int sz, unsigned int* src, int srcsz);
int u8_wc_toutf8(char* dest, unsigned int ch);
-----.cpp
int u8_toucs(unsigned int* dest, int sz, char* src, int srcsz){
unsigned int ch;
char* src_end = src + srcsz;
int nb;
int i = 0;
while (i < sz - 1) {
nb = trailingBytesForUTF8[(unsigned char)*src];
if (srcsz == -1) {
if (*src == 0)
goto done_toucs;
}
else {
if (src + nb >= src_end)
goto done_toucs;
}
ch = 0;
switch (nb) {
/* these fall through deliberately */
case 3: ch += (unsigned char)*src++; ch <<= 6;
case 2: ch += (unsigned char)*src++; ch <<= 6;
case 1: ch += (unsigned char)*src++; ch <<= 6;
case 0: ch += (unsigned char)*src++;
}
ch -= offsetsFromUTF8[nb];
dest[i++] = ch;
}
done_toucs:
dest[i] = 0;
return i;
}
int u8_toutf8(char* dest, int sz, unsigned int* src, int srcsz)
{
unsigned int ch;
int i = 0;
char* dest_end = dest + sz;
while (srcsz < 0 ? src[i] != 0 : i < srcsz) {
ch = src[i];
if (ch < 0x80) {
if (dest >= dest_end)
return i;
*dest++ = (char)ch;
}
else if (ch < 0x800) {
if (dest >= dest_end - 1)
return i;
*dest++ = (ch >> 6) | 0xC0;
*dest++ = (ch & 0x3F) | 0x80;
}
else if (ch < 0x10000) {
if (dest >= dest_end - 2)
return i;
*dest++ = (ch >> 12) | 0xE0;
*dest++ = ((ch >> 6) & 0x3F) | 0x80;
*dest++ = (ch & 0x3F) | 0x80;
}
else if (ch < 0x110000) {
if (dest >= dest_end - 3)
return i;
*dest++ = (ch >> 18) | 0xF0;
*dest++ = ((ch >> 12) & 0x3F) | 0x80;
*dest++ = ((ch >> 6) & 0x3F) | 0x80;
*dest++ = (ch & 0x3F) | 0x80;
}
i++;
}
if (dest < dest_end)
*dest = '\0';
return i;
}
-----------------------------------------------
Sample: 範例
char result_str2[100] = {};
uint32_t b_ucs[100] = {}; // plenty of space
int b_chars = 0;
char cbuffdata[100] = u8"哈摟哈!";
uint32_t utf_len2 = strlen(cbuffdata);
b_chars = u8_toucs(b_ucs, (utf_len2 + 1) * 4, cbuffdata, utf_len2);
//此動作已完成轉換
b_chars = u8_toutf8(result_str2, (utf_len2 + 1) * 4, b_ucs, utf_len2);
//此動作為轉換回UTF8
----可再搭配其他轉換方式將UTF8轉成ansi顯示
CString csbuf;
ConvertUTF8toANSI(result_str2, &csbuf);
//此函式連結
-----------------------------------------------------------------------
沒有留言:
張貼留言