gb2312utf8:C语言实现UTF-8与GB2312互换来源: 发布时间:星期四, 2009年2月12日 浏览:267次 评论:0
如果您对UTF-8、Unicode、GB2312等还是很陌生话请查看http://www.linuxforum.net/books/UTF-8-Unicode.html我这里就不浪费口舌了下面介绍下WinAPI两个:WideCharToMultiByte、MultiByteToWideChar 原型: WideCharToMultiByte( UINTCodePage,//codepage DWORDdwFlags,//performanceandmappingflags LPCWSTRlpWideCharStr,//wide-character cchWideChar,//numberofcharsin LPSTRlpMultiByteStr,//bufferfor cbMultiByte,//buffer LPCSTRlpDefaultChar,//defaultforunmappablechars LPBOOLlpUsedDefaultChar//whendefaultcharused );//将宽转换成多个窄 MultiByteToWideChar( UINTCodePage,//codepage DWORDdwFlags,//character-typeoptions LPCSTRlpMultiByteStr,//tomap cbMultiByte,//numberofsin LPWSTRlpWideCharStr,//wide-characterbuffer cchWideChar//buffer );//将多个窄转换成宽 需要用到些: CStringCXmlProcess::HexToBin(CString)//将16进制数转换成2进制 { (\"0\")\"0000\"; (\"1\")\"0001\"; (\"2\")\"0010\"; (\"3\")\"0011\"; (\"4\")\"0100\"; (\"5\")\"0101\"; (\"6\")\"0110\"; (\"7\")\"0111\"; (\"8\")\"1000\"; (\"9\")\"1001\"; (\"a\")\"1010\"; (\"b\")\"1011\"; (\"c\")\"1100\"; (\"d\")\"1101\"; (\"e\")\"1110\"; (\"f\")\"1111\"; \"\"; } CStringCXmlProcess::BinToHex(CStringBinString)//将2进制数转换成16进制 { (BinString\"0000\")\"0\"; [Page] (BinString\"0001\")\"1\"; (BinString\"0010\")\"2\"; (BinString\"0011\")\"3\"; (BinString\"0100\")\"4\"; (BinString\"0101\")\"5\"; (BinString\"0110\")\"6\"; (BinString\"0111\")\"7\"; (BinString\"1000\")\"8\"; (BinString\"1001\")\"9\"; (BinString\"1010\")\"a\"; (BinString\"1011\")\"b\"; (BinString\"1100\")\"c\"; (BinString\"1101\")\"d\"; (BinString\"1110\")\"e\"; (BinString\"1111\")\"f\"; \"\"; } CXmlProcess::BinToInt(CString)//2进制数据转换成10进制整型 { len=0; tempInt=0; strInt=0; for(i=0;i<.GetLength;i) { tempInt=1; strInt=().GetAt(i)-48; for(k=0;k<7-i;k) { tempInt=2*tempInt; } lentempInt*strInt; } len; } UTF-8转换成GB2312先把UTF-8转换成Unicode.然后再把Unicode通过WideCharToMultiByte转换成GB2312 WCHAR*CXmlProcess::UTF_8ToUnicode(char*ustart)//把UTF-8转换成Unicode { charchar_one; charchar_two; charchar_three; Hchar; Lchar; charuchar[2]; WCHAR*unicode; CString_one; CString_two; CString_three; CStringcombiString; char_one=*ustart; char_two=*(ustart+1); [Page] char_three=*(ustart+2); _one.Format(\"%x\",char_one); _two.Format(\"%x\",char_two); _three.Format(\"%x\",char_three); _three=_three.Right(2); _two=_two.Right(2); _one=_one.Right(2); _three=HexToBin(_three.Left(1))+HexToBin(_three.Right(1)); _two=HexToBin(_two.Left(1))+HexToBin(_two.Right(1)); _one=HexToBin(_one.Left(1))+HexToBin(_one.Right(1)); combiString=_one+_two+_three; combiString=combiString.Right(20); combiString.Delete(4,2); combiString.Delete(10,2); Hchar=BinToInt(combiString.Left(8)); Lchar=BinToInt(combiString.Right(8)); uchar[1]=(char)Hchar; uchar[0]=(char)Lchar; unicode=(WCHAR*)uchar; unicode; } char*CXmlProcess::UnicodeToGB2312(unsigneduData)//把Unicode转换成GB2312 { char*buffer; buffer=char[(WCHAR)]; WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,(WCHAR),NULL,NULL); buffer; } GB2312转换成UTF-8:先把GB2312通过MultiByteToWideChar转换成Unicode.然后再把Unicode通过拆开Unicode后拼装成UTF-8 WCHAR*CXmlProcess::Gb2312ToUnicode(char*gbBuffer)//GB2312转换成 Unicode { WCHAR*uniChar; uniChar=WCHAR[1]; ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1); uniChar; } char*CXmlProcess::UnicodeToUTF_8(WCHAR*UniChar)//Unicode转换成UTF-8 { char*buffer; CStringstrOne; CStringstrTwo; CStringstrThree; CStringstrFour; CStringstrAnd; buffer=char[3]; hInt,lInt; hInt=()((*UniChar)/256); lInt=(*UniChar)%256; CString; .Format(\"%x\",hInt); strTwo=HexToBin(.Right(1)); =.Left(.GetLength-1); strOne=HexToBin(.Right(1)); .Format(\"%x\",lInt); [Page] strFour=HexToBin(.Right(1)); =.Left(.GetLength-1); strThree=HexToBin(.Right(1)); strAnd=strOne+strTwo+strThree+strFour; strAnd.Insert(0,\"1110\"); strAnd.Insert(8,\"10\"); strAnd.Insert(16,\"10\"); strOne=strAnd.Left(8); strAnd=strAnd.Right(16); strTwo=strAnd.Left(8); strThree=strAnd.Right(8); *buffer=(char)BinToInt(strOne); buffer[1]=(char)BinToInt(strTwo); buffer[2]=(char)BinToInt(strThree); buffer; } 例子:将GB2312转换成UTF-8: char*CXmlProcess::translateCharToUTF_8(char*xmlStream,len) { CharLen=0; oldCharLen=0; revCharLen=len; char*CharBuffer; char*finalCharBuffer; char*buffer; CString; buffer=char[(WCHAR)]; CharBuffer=char[(1.5*revCharLen)];//设置最大个缓冲区 while(oldCharLen<revCharLen) { (*(xmlStream+oldCharLen)>=0) { *(CharBuffer+CharLen)=*(xmlStream+oldCharLen); CharLen; oldCharLen; }//如果是英文直接复制就可以 { WCHAR*pbuffer=this->Gb2312ToUnicode(xmlStream+oldCharLen); buffer=this->UnicodeToUTF_8(pbuffer); *(CharBuffer+CharLen)=*buffer; *(CharBuffer+CharLen+1)=*(buffer+1); *(CharBuffer+CharLen+2)=*(buffer+2); CharLen3; oldCharLen2; } } CharBuffer[CharLen]=’’\\0’’; CString1; 1.Format(\"%s\",CharBuffer); finalCharBuffer=char[CharLen+1]; memcpy(finalCharBuffer,CharBuffer,CharLen+1); finalCharBuffer; } 0
相关文章读者评论发表评论 |