#if 0 |
#include <stdio.h> |
#include <string.h> |
#include <assert.h> |
|
int enc_unicode_to_utf8_one(unsigned long unic, unsigned char *pOutput, |
int outSize) |
{ |
assert (pOutput != NULL); |
assert (outSize >= 6); |
|
if ( unic <= 0x0000007F ) |
{ |
// * U-00000000 - U-0000007F: 0xxxxxxx |
*pOutput = (unic & 0x7F); |
return 1; |
} |
else if ( unic >= 0x00000080 && unic <= 0x000007FF ) |
{ |
// * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx |
*(pOutput+1) = (unic & 0x3F) | 0x80; |
*pOutput = ((unic >> 6) & 0x1F) | 0xC0; |
return 2; |
} |
else if ( unic >= 0x00000800 && unic <= 0x0000FFFF ) |
{ |
// * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx |
*(pOutput+2) = (unic & 0x3F) | 0x80; |
*(pOutput+1) = ((unic >> 6) & 0x3F) | 0x80; |
*pOutput = ((unic >> 12) & 0x0F) | 0xE0; |
return 3; |
} |
else if ( unic >= 0x00010000 && unic <= 0x001FFFFF ) |
{ |
// * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
*(pOutput+3) = (unic & 0x3F) | 0x80; |
*(pOutput+2) = ((unic >> 6) & 0x3F) | 0x80; |
*(pOutput+1) = ((unic >> 12) & 0x3F) | 0x80; |
*pOutput = ((unic >> 18) & 0x07) | 0xF0; |
return 4; |
} |
else if ( unic >= 0x00200000 && unic <= 0x03FFFFFF ) |
{ |
// * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
*(pOutput+4) = (unic & 0x3F) | 0x80; |
*(pOutput+3) = ((unic >> 6) & 0x3F) | 0x80; |
*(pOutput+2) = ((unic >> 12) & 0x3F) | 0x80; |
*(pOutput+1) = ((unic >> 18) & 0x3F) | 0x80; |
*pOutput = ((unic >> 24) & 0x03) | 0xF8; |
return 5; |
} |
else if ( unic >= 0x04000000 && unic <= 0x7FFFFFFF ) |
{ |
// * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
*(pOutput+5) = (unic & 0x3F) | 0x80; |
*(pOutput+4) = ((unic >> 6) & 0x3F) | 0x80; |
*(pOutput+3) = ((unic >> 12) & 0x3F) | 0x80; |
*(pOutput+2) = ((unic >> 18) & 0x3F) | 0x80; |
*(pOutput+1) = ((unic >> 24) & 0x3F) | 0x80; |
*pOutput = ((unic >> 30) & 0x01) | 0xFC; |
return 6; |
} |
|
return 0; |
} |
//test |
char *unicode_to_utf8(unsigned int *unic, char *pOutput) |
{ |
while (*unic) |
{ |
if ( (*unic) <= 0x0000007F ) |
{ |
// * U-00000000 - U-0000007F: 0xxxxxxx |
*(pOutput++) = ((*unic) & 0x7F); |
} |
else if ( ((*unic) >= 0x00000080) && ((*unic) <= 0x000007FF) ) |
{ |
// * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx |
*(pOutput++) = (((*unic) >> 6) & 0x1F) | 0xC0; |
*(pOutput++) = ((*unic) & 0x3F) | 0x80; |
} |
else if ( ((*unic) >= 0x00000800) && ((*unic) <= 0x0000FFFF) ) |
{ |
// * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx |
*(pOutput++) = (((*unic) >> 12) & 0x0F) | 0xE0; |
*(pOutput++) = (((*unic) >> 6) & 0x3F) | 0x80; |
*(pOutput++) = ((*unic) & 0x3F) | 0x80; |
} |
else if ( ((*unic) >= 0x00010000) && ((*unic) <= 0x001FFFFF) ) |
{ |
// * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
*(pOutput++) = (((*unic) >> 18) & 0x07) | 0xF0; |
*(pOutput++) = (((*unic) >> 12) & 0x3F) | 0x80; |
*(pOutput++) = (((*unic) >> 6) & 0x3F) | 0x80; |
*(pOutput++) = ((*unic) & 0x3F) | 0x80; |
} |
else if ( ((*unic) >= 0x00200000) && ((*unic) <= 0x03FFFFFF) ) |
{ |
// * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
*(pOutput++) = (((*unic) >> 24) & 0x03) | 0xF8; |
*(pOutput++) = (((*unic) >> 18) & 0x3F) | 0x80; |
*(pOutput++) = (((*unic) >> 12) & 0x3F) | 0x80; |
*(pOutput++) = (((*unic) >> 6) & 0x3F) | 0x80; |
*(pOutput++) = ((*unic) & 0x3F) | 0x80; |
} |
else if ( ((*unic) >= 0x04000000) && ((*unic) <= 0x7FFFFFFF) ) |
{ |
// * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
*(pOutput++) = (((*unic) >> 30) & 0x01) | 0xFC; |
*(pOutput++) = (((*unic) >> 24) & 0x3F) | 0x80; |
*(pOutput++) = (((*unic) >> 18) & 0x3F) | 0x80; |
*(pOutput++) = (((*unic) >> 12) & 0x3F) | 0x80; |
*(pOutput++) = (((*unic) >> 6) & 0x3F) | 0x80; |
*(pOutput++) = ((*unic) & 0x3F) | 0x80; |
} |
unic++; |
} |
*pOutput = 0x0000; |
return pOutput; |
} |
// #c---end |
//2) 将一个字符的UTF8编码转换成Unicode(UCS-2和UCS-4)编码. |
//[cpp] view plaincopy |
//<span xmlns="http://www.w3.org/1999/xhtml" style="">// #c--- |
/***************************************************************************** |
* 将一个字符的UTF8编码转换成Unicode(UCS-2和UCS-4)编码. |
* |
* 参数: |
* pInput 指向输入缓冲区, 以UTF-8编码 |
* Unic 指向输出缓冲区, 其保存的数据即是Unicode编码值, |
* 类型为unsigned long . |
* |
* 返回值: |
* 成功则返回该字符的UTF8编码所占用的字节数; 失败则返回0. |
* |
* 注意: |
* 1. UTF8没有字节序问题, 但是Unicode有字节序要求; |
* 字节序分为大端(Big Endian)和小端(Little Endian)两种; |
* 在Intel处理器中采用小端法表示, 在此采用小端法表示. (低地址存低位) |
****************************************************************************/ |
int enc_get_utf8_size( const char *pInput) |
{ |
unsigned char c = *((unsigned char *)pInput); |
printf ( "---c=%c---\n" , c); |
if (c< 0x80) return 0; // 0xxxxxxx 返回0 |
if (c>=0x80 && c<0xC0) return -1; // 10xxxxxx 返回-1 |
if (c>=0xC0 && c<0xE0) return 2; // 110xxxxx 返回2 |
if (c>=0xE0 && c<0xF0) return 3; // 1110xxxx 返回3 |
if (c>=0xF0 && c<0xF8) return 4; // 11110xxx 返回4 |
if (c>=0xF8 && c<0xFC) return 5; // 111110xx 返回5 |
if (c>=0xFC) return 6; // 1111110x 返回6 |
} |
#if 1 |
int enc_utf8_to_unicode_one( const char * pInput, unsigned long *Unic) |
{ |
assert (pInput != NULL && Unic != NULL); |
|
// b1 表示UTF-8编码的pInput中的高字节, b2 表示次高字节, ... |
char b1, b2, b3, b4, b5, b6; |
|
*Unic = 0x0; // 把 *Unic 初始化为全零 |
int utfbytes = enc_get_utf8_size(pInput); |
unsigned char *pOutput = (unsigned char *) Unic; |
printf ( "----utfbytes=%d\n" , utfbytes); |
switch ( utfbytes ) |
{ |
case 0: |
*pOutput = *pInput; |
utfbytes += 1; |
break ; |
|
case 2: |
b1 = *pInput; |
b2 = *(pInput + 1); |
if ( (b2 & 0xE0) != 0x80 ) |
return 0; |
*pOutput = (b1 << 6) + (b2 & 0x3F); |
*(pOutput+1) = (b1 >> 2) & 0x07; |
break ; |
|
case 3: |
b1 = *pInput; |
b2 = *(pInput + 1); |
b3 = *(pInput + 2); |
if ( ((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80) ) |
return 0; |
*pOutput = (b2 << 6) + (b3 & 0x3F); |
*(pOutput+1) = (b1 << 4) + ((b2 >> 2) & 0x0F); |
break ; |
|
case 4: |
b1 = *pInput; |
b2 = *(pInput + 1); |
b3 = *(pInput + 2); |
b4 = *(pInput + 3); |
if ( ((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80) |
|| ((b4 & 0xC0) != 0x80) ) |
return 0; |
*pOutput = (b3 << 6) + (b4 & 0x3F); |
*(pOutput+1) = (b2 << 4) + ((b3 >> 2) & 0x0F); |
*(pOutput+2) = ((b1 << 2) & 0x1C) + ((b2 >> 4) & 0x03); |
break ; |
|
case 5: |
b1 = *pInput; |
b2 = *(pInput + 1); |
b3 = *(pInput + 2); |
b4 = *(pInput + 3); |
b5 = *(pInput + 4); |
if ( ((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80) |
|| ((b4 & 0xC0) != 0x80) || ((b5 & 0xC0) != 0x80) ) |
return 0; |
*pOutput = (b4 << 6) + (b5 & 0x3F); |
*(pOutput+1) = (b3 << 4) + ((b4 >> 2) & 0x0F); |
*(pOutput+2) = (b2 << 2) + ((b3 >> 4) & 0x03); |
*(pOutput+3) = (b1 << 6); |
break ; |
|
case 6: |
b1 = *pInput; |
b2 = *(pInput + 1); |
b3 = *(pInput + 2); |
b4 = *(pInput + 3); |
b5 = *(pInput + 4); |
b6 = *(pInput + 5); |
if ( ((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80) |
|| ((b4 & 0xC0) != 0x80) || ((b5 & 0xC0) != 0x80) |
|| ((b6 & 0xC0) != 0x80) ) |
return 0; |
*pOutput = (b5 << 6) + (b6 & 0x3F); |
*(pOutput+1) = (b5 << 4) + ((b6 >> 2) & 0x0F); |
*(pOutput+2) = (b3 << 2) + ((b4 >> 4) & 0x03); |
*(pOutput+3) = ((b1 << 6) & 0x40) + (b2 & 0x3F); |
break ; |
|
default : |
return 0; |
break ; |
} |
|
return utfbytes; |
} |
// #c---end |
//</span> |
#endif |
char *Unicode2Ascii( char *asc, unsigned int *uni) |
{ |
while (*uni) |
{ |
if (*uni <= 0x80) //0-128 |
{ |
*(asc++) = *uni; |
//*(asc++) = (*uni) & 0x7f; |
} |
else if (*uni < 0x800) //129-2048 |
{ |
/* |
* Two-byte UTF-8... |
*/ |
//*(asc++) = 0xc0 | ((*uni) >> 6); |
//*(asc++) = 0x80 | ((*uni) & 0x3f); |
*(asc++) = 0xc0 | (((*uni) >> 6) & 0x1f); |
*(asc++) = 0x80 | ((*uni) & 0x3f); |
} |
else if (*uni < 0x10000) |
{ |
/* |
* Three-byte UTF-8... |
*/ |
*(asc++) = 0xe0 | ((*uni) >> 12); |
*(asc++) = 0x80 | (((*uni) >> 6) & 0x3f); |
*(asc++) = 0x80 | ((*uni) & 0x3f); |
//*(asc++) = 0x80 | ((*uni) & 0x3f); |
//*(asc++) = 0x80 | (((*uni) >> 6) & 0x3f); |
//*(asc++) = 0xe0 | (((*uni) >> 12) & 0x0f); |
} |
else |
{ |
/* |
* Four-byte UTF-8... |
*/ |
*(asc++) = 0xf0 | ((*uni) >> 18); |
*(asc++) = 0x80 | (((*uni) >> 12) & 0x3f); |
*(asc++) = 0x80 | (((*uni) >> 6) & 0x3f); |
*(asc++) = 0x80 | ((*uni) & 0x3f); |
//*(asc++) = 0x80 | ((*uni) & 0x3f); |
//*(asc++) = 0x80 | (((*uni) >> 6) & 0x3f); |
//*(asc++) = 0x80 | (((*uni) >> 12) & 0x3f); |
//*(asc++) = 0xf0 | (((*uni) >> 18) & 0x07); |
} |
uni++; |
} |
*asc = 0x00; |
return asc; |
} |
unsigned int *Ascii2Unicode(unsigned int *uni, char *asc) |
{ |
while (*asc) |
{ |
if ((*asc & 0x80) == 0) |
{ |
*uni = *asc; |
} |
else if ((*asc & 0xE0) == 0xB0) |
{ |
*uni = (*asc & 0x1F) << 6; |
asc++; |
*uni |= (*asc & 0x3F); |
} |
else if ((*asc & 0xF0) == 0xE0) |
{ |
*uni = (*asc & 0x0F) << 12; |
asc++; |
*uni |= ((*asc & 0x3F) << 6); |
asc++; |
*uni |= (*asc & 0x3F); |
} |
else if ((*asc & 0xF8) == 0xF0) |
{ |
*uni = (*asc & 0x07) << 18; |
asc++; |
*uni |= ((*asc & 0x3F) << 12); |
asc++; |
*uni |= ((*asc & 0x3F) << 6); |
asc++; |
*uni |= (*asc & 0x3F); |
} |
uni++; |
asc++; |
} |
*uni = 0x0000; |
return uni; |
} |
/*********************************************************************************** |
函数名:wstrlen() |
描 述: |
***********************************************************************************/ |
unsigned int wstrlen( const unsigned int *str) |
{ |
const unsigned int *eos = str; |
if (eos == NULL) return 0; |
while (*eos++) ; |
return ((unsigned int )(eos - str - 1)); |
} |
int main() |
{ |
int i = 0; |
//char pOutput[] = "è"; |
//unsigned long uni = 0; |
//enc_utf8_to_unicode_one(pOutput, &uni); |
//for (i = 0; i < 1;i++) |
//printf("------uni[%d]=0x%0x---\n",i, uni); |
|
char temp[4] = "3hц" ; |
unsigned int temp2[4] = {0}; |
Ascii2Unicode(temp2, temp); |
for (i = 0; i<3;i++) |
printf ( "--%s##%d##--temp2= 0x%x\n" , __FILE__,__LINE__, temp2[i]); |
//printf("--%s##%d##--clen=%d\n", __FILE__,__LINE__, strlen(temp)); |
|
//unicode_to_utf8(temp2, temp); |
//for (i = 0; i < 1;i++) |
//printf("---len2=%d---pOutput2=%s\n", strlen(temp), temp); |
/* |
//Unicode2Ascii(pOutput, temp2); |
//for (i = 0; i < 1;i++) |
//printf("---str=%s\n", pOutput); |
*/ |
return 0; |
} |
#endif |
#include <stdio.h> |
#include <string.h> |
#include <stdlib.h> |
int main() |
{ |
char text[] = "123456789" ; |
printf ( "%s\n" , (text + 9-5)); |
return 0; |
} |
by: 发表于:2017-08-10 09:26:03 顶(0) | 踩(0) 回复
??
回复评论