XML4C完美兼容中文的补充
xml4c兼容中文的问题一直是大家比较头疼的问题,网上也有很多关于这方面的讨论,但是一直没有太好的结论。在IBM Developerworks的网站上,找到了邹月明先生的一篇文章《剖析XML4c源码,完美兼容中文XML》,该文章对Xml4c的源码进行了剖析,对xml4c的源码进行了修改,从而达到了对中文兼容的目的。我也针对Xml4c的源码按照文章中的说法进行了修改,这种方法在我的debug版本中确实完美的解决了xml4c对中文的完美兼容。但是,问题出现了在我的Release版本中,对中文的解析出现了混乱。
为了能够弄清楚问题的所在,决定对xml4c的release版本进行跟踪,结果发现问题就是出在邹月明先生的文章中所指出的修改之处。该代码对于Debug和Release返回的是不同的结果(首先需要声明的是,我已经在使用该库之前调用了setlocale ( LC_ALL, “Chinese-siimplified” ) ),只要字符串中包含有中文,在Release版本下calcRequiredSize ( const char* const srcText )函数种调用mbstowcs返回的结果就是不正确。下面是《剖析XML4C源码》修改的函数calcRequiredSize代码:
unsigned int Win32LCPTranscoder::calcRequiredSize(const char* const srcText)
{
/*
if(!srcText)
return 0;
unsigned charLen = ::mblen(srcText, MB_CUR_MAX);
if(charLen == -1)
return 0;
else if(charLen != 0 )
charLen = strlen(srcText)/charLen;
if(charLen == -1)
return 0;
return charLen;
*/
if(! srcText){
return 0;
}
unsigned int retVal = ::mbstowcs( 0, srcText, 0 );
if ( retVal == -1) {
return 0;
}
return retVal;
}
这段代码在Debug下正确运行没有问题,但是在Release下,计算长度就是会出现问题,得到的结果不正确。开始以为是在程序开始的设置的本地环境服务被修改了,可是跟踪代码的时候发现并没有被改变,这让人感到很迷惑,不知道是什么原因造成的。当在mbstowcs前设置当前的本地环境服务(setlocale(LC_ALL, NULL );)之后,函数mbstowcs就能够正确的工作了,如果有谁知道原因,可以来信告知(email: [email protected])。
于是,修改之后的代码就如下了:
unsigned int Win32LCPTranscoder::calcRequiredSize(const char* const srcText)
{
if(! srcText){
return 0;
}
#ifndef _DEBUG
// 确保在release下能够得到正确的结果
setlocale ( LC_ALL, NULL );
#endif
unsigned int retVal = ::mbstowcs( 0, srcText, 0 );
if ( retVal == -1) {
return 0;
}
return retVal;
}
当然了,不仅仅是这个函数需要添加这个,对于所有的mbstowcs函数的前面都要添加这个代码:setlocale(LC_ALL, NULL );下面列出的就是需要修改的函数:
unsigned int Win32LCPTranscoder::calcRequiredSize(const char* const srcText
, MemoryManager* const manager)
char* Win32LCPTranscoder::transcode(const XMLCh* const toTranscode);
char* Win32LCPTranscoder::transcode(const XMLCh* const toTranscode,
MemoryManager* const manager)
XMLCh* Win32LCPTranscoder::transcode(const char* const toTranscode)
XMLCh* Win32LCPTranscoder::transcode(const char* const toTranscode,
MemoryManager* const manager)
bool Win32LCPTranscoder::transcode( const char* const toTranscode
, XMLCh* const toFill
, const unsigned int maxChars
, MemoryManager* const manager)
bool Win32LCPTranscoder::transcode( const XMLCh* const toTranscode
, char* const toFill
, const unsigned int maxBytes
, MemoryManager* const manager)
使用函数mbstowcs和wcstombs进行转换,只不过是为了移植的方便,而在win32下已经提供了另外的api来进行转换:WideCharToMultiByte和MultiByteToWideChar,在这些转换函数中,把mbstowcs替换为MultiByteToWideChar,把wcstombs替换为WideCharToMultiByte。这样重新编译后就不会出现这个问题了。
附录:修改之后的Win32TransService.cpp的部分寒暑内容
// ---------------------------------------------------------------------------
// Win32LCPTranscoder: Implementation of the virtual transcoder interface
// ---------------------------------------------------------------------------
unsigned int Win32LCPTranscoder::calcRequiredSize(const char* const srcText
, MemoryManager* const manager)
{
if ( ! srcText )
{
return 0;
}
#ifdef _WIN32
unsigned int retVal = ::MultiByteToWideChar ( CP_ACP,
0,
srcText,
-1,
NULL,
0 );
#else
#ifndef _DEBUG
setlocale( LC_ALL, "" );
#endif
unsigned int retVal = ::mbstowcs( 0, srcText, 0 );
#endif
if ( retVal == -1 )
{
return 0;
}
return retVal;
}
char* Win32LCPTranscoder::transcode(const XMLCh* const toTranscode)
{
if (!toTranscode)
return 0;
char* retVal = 0;
if (*toTranscode)
{
// Calc the needed size
const unsigned int neededLen = calcRequiredSize(toTranscode);
// Allocate a buffer of that size plus one for the null and transcode
retVal = new char[neededLen + 1];
#ifdef _WIN32
::WideCharToMultiByte ( CP_ACP,
0,
toTranscode,
-1,
retVal,
neededLen,
"",
NULL );
#else
#ifndef _DEBUG
setlocale( LC_ALL, "" );
#endif
::wcstombs(retVal, toTranscode, neededLen + 1);
#endif
// And cap it off anyway just to make sure
retVal[neededLen] = 0;
}
else
{
retVal = new char[1];
retVal[0] = 0;
}
return retVal;
}
char* Win32LCPTranscoder::transcode(const XMLCh* const toTranscode,
MemoryManager* const manager)
{
if (!toTranscode)
return 0;
char* retVal = 0;
if (*toTranscode)
{
// Calc the needed size
const unsigned int neededLen = calcRequiredSize(toTranscode, manager);
// Allocate a buffer of that size plus one for the null and transcode
retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char)); //new char[neededLen + 1];
#ifdef _WIN32
::WideCharToMultiByte ( CP_ACP,
0,
toTranscode,
-1,
retVal,
neededLen,
"",
NULL );
#else
#ifndef _DEBUG
setlocale( LC_ALL, "" );
#endif
::wcstombs(retVal, toTranscode, neededLen + 1);
#endif
// And cap it off anyway just to make sure
retVal[neededLen] = 0;
}
else
{
retVal = (char*) manager->allocate(sizeof(char)); //new char[1];
retVal[0] = 0;
}
return retVal;
}
XMLCh* Win32LCPTranscoder::transcode(const char* const toTranscode)
{
if (!toTranscode)
return 0;
XMLCh* retVal = 0;
if (*toTranscode)
{
// Calculate the buffer size required
const unsigned int neededLen = calcRequiredSize(toTranscode);
if (neededLen == 0)
{
retVal = new XMLCh[1];
retVal[0] = 0;
return retVal;
}
// Allocate a buffer of that size plus one for the null and transcode
retVal = new XMLCh[neededLen + 1];
#ifdef _WIN32
::MultiByteToWideChar ( CP_ACP,
0,
toTranscode,
-1,
retVal,
neededLen );
#else
#ifndef _DEBUG
setlocale( LC_ALL, "" );
#endif
::mbstowcs(retVal, toTranscode, neededLen + 1);
#endif
// Cap it off just to make sure. We are so paranoid!
retVal[neededLen] = 0;
}
else
{
retVal = new XMLCh[1];
retVal[0] = 0;
}
return retVal;
}
XMLCh* Win32LCPTranscoder::transcode(const char* const toTranscode,
MemoryManager* const manager)
{
if (!toTranscode)
return 0;
XMLCh* retVal = 0;
if (*toTranscode)
{
// Calculate the buffer size required
const unsigned int neededLen = calcRequiredSize(toTranscode, manager);
if (neededLen == 0)
{
retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
retVal[0] = 0;
return retVal;
}
// Allocate a buffer of that size plus one for the null and transcode
retVal = (XMLCh*) manager->allocate((neededLen + 1) * sizeof(XMLCh)); //new XMLCh[neededLen + 1];
#ifdef _WIN32
::MultiByteToWideChar ( CP_ACP,
0,
toTranscode,
-1,
retVal,
neededLen );
#else
#ifndef _DEBUG
setlocale( LC_ALL, "" );
#endif
::mbstowcs(retVal, toTranscode, neededLen + 1);
#endif
// Cap it off just to make sure. We are so paranoid!
retVal[neededLen] = 0;
}
else
{
retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
retVal[0] = 0;
}
return retVal;
}
bool Win32LCPTranscoder::transcode( const char* const toTranscode
, XMLCh* const toFill
, const unsigned int maxChars
, MemoryManager* const manager)
{
// Check for a couple of psycho corner cases
if (!toTranscode || !maxChars)
{
toFill[0] = 0;
return true;
}
if (!*toTranscode)
{
toFill[0] = 0;
return true;
}
// This one has a fixed size output, so try it and if it fails it fails
#ifdef _WIN32
size_t to_ = ::MultiByteToWideChar ( CP_ACP,
0,
toTranscode,
-1,
toFill,
maxChars );
#else
#ifndef _DEBUG
setlocale( LC_ALL, "" );
#endif
size_t to_ = ::mbstowcs(toFill, toTranscode, maxChars + 1);
#endif
return ( to_ != size_t(-1) );
//
}
bool Win32LCPTranscoder::transcode( const XMLCh* const toTranscode
, char* const toFill
, const unsigned int maxBytes
, MemoryManager* const manager)
{
// Watch for a couple of pyscho corner cases
if (!toTranscode || !maxBytes)
{
toFill[0] = 0;
return true;
}
if (!*toTranscode)
{
toFill[0] = 0;
return true;
}
// This one has a fixed size output, so try it and if it fails it fails
//
#ifdef _WIN32
size_t to_ = ::WideCharToMultiByte ( CP_ACP,
0,
toTranscode,
-1,
toFill,
maxBytes,
"",
NULL );
#else
#ifndef _DEBUG
setlocale( LC_ALL, "" );
#endif
size_t to_ = ::wcstombs(toFill, toTranscode, maxBytes + 1);
#endif
if ( to_ == size_t(-1) )
{
return false;
}
// Cap it off just in case
toFill[maxBytes] = 0;
return true;
}
本文地址:http://com.8s8s.com/it/it773.htm