📄 kwqtextcodec.cpp
字号:
if (iEncoding == ((CCnvCharacterSetConverter::SCharacterSet)(iArrayOfCharacterSetsAvailable->At(index))).Identifier())
return true;
}
return false;
}
/**
* Converts the text buffer from one encoding to Unicode
* If the indicated charset is not supported, or its converter is not available,
* do the convertion from default encoding to Unicode.
*/
QString KWQTextDecoder::toUnicode(const char *chs, int len, bool flush)
{
// make sure the converter is available
bool lastChunk = (len == 0)? true : false;
int bomLength = 0;
HBufC8 * inHBuf = NULL; // buffer to hold combination of remainder and next chunk
CCnvCharacterSetConverter::TEndianness endianness = CCnvCharacterSetConverter::ELittleEndian;
if (len > 2 && chs[0] == FF && chs[1] == FE) {
iEncoding = KCharacterSetIdentifierUnicodeLittle;
bomLength = 2;
} else if (len > 2 && chs[0] == FE && chs[1] == FF) {
iEncoding = KCharacterSetIdentifierUnicodeBig;
endianness = CCnvCharacterSetConverter::EBigEndian;
bomLength = 2;
} else if (len > 3 && chs[0] == EF && chs[1] == BB && chs[2] == BF) {
iEncoding = KCharacterSetIdentifierUtf8;
bomLength = 3;
}
if (!IsConversionSupportedL())
{
iEncoding = KCharacterSetIdentifierIso88591;
}
if (iEncoding == KCharacterSetIdentifierIso88591)
{
return convertLatin1(reinterpret_cast<const unsigned char *>(chs), len);
}
else if(iEncoding == KCharacterSetIdentifierWindow1250 ||
iEncoding == KCharacterSetIdentifierWindow1251 ||
iEncoding == KCharacterSetIdentifierWindow1253 ||
iEncoding == KCharacterSetIdentifierWindow1256 ||
iEncoding == KCharacterSetIdentifierWindow1257
) // work around for Symbian converter hanging when converting windows-125x charset
{
return windows125xToUnicode( chs, len );
}
#if _DEBUG1
if (len)
RFileLogger::HexDump( _L("Browser"), _L("text_chunks.txt"), EFileLoggingModeAppend,
NULL, NULL, (const TUint8*)chs, len );
#endif // _DEBUG1
CCnvCharacterSetConverter* charConv = KWQGlobalServices::InstanceL()->CharConv();
RFs& fileSession = KWQGlobalServices::InstanceL()->FileSession();
// check if the target charset is supported
charConv->PrepareToConvertToOrFromL( iEncoding, *iArrayOfCharacterSetsAvailable, fileSession );
charConv->SetDefaultEndiannessOfForeignCharacters( endianness );
TBuf16<32> tmpBuf;
TBuf16<32> outBuf;
len = len - bomLength ;
TPtrC8 inBuf( (const TUint8*)chs + bomLength, len );
TInt state=CCnvCharacterSetConverter::KStateDefault;
// guess the output size
QString result;
// QString enlarges the buffer by multiplying 2
OOM_PRE_CHECK( len*2 + 1, len*2 + 1, "KWQTextDecoder::toUnicode" )
// try to reserve twice the len of bytes in memory to avoid frequent reallocation
// reserve function reserves 3*len of bytes in momery, to achieve 2*len, times factor of 2/3
result.reserve( len*2/3 + 1 );
if (iRemainderBuf && iRemainderBuf->Length())
{
inHBuf = HBufC8::NewL(iRemainderBuf->Length() + len);
inHBuf->Des().Append(iRemainderBuf->Des());
inHBuf->Des().Append((const unsigned char *)chs, len);
delete iRemainderBuf;
iRemainderBuf = NULL;
inBuf.Set(inHBuf->Des());
}
for(;;)
{
TInt rep = 0; // number of unconvertible characters
TInt rIndx = 0; // index of first unconvertible character
TInt ret = charConv->ConvertToUnicode( tmpBuf, inBuf, state, rep, rIndx );
if (ret==CCnvCharacterSetConverter::EErrorIllFormedInput )
{
// in case the error is caused by incomplete character at end of chunk
if (inBuf.Length() < 3 && !lastChunk)
{
iRemainderBuf = HBufC8::NewL(inBuf.Length());
iRemainderBuf->Des().Append(inBuf);
}
break;
}
// copy result to out buffer
// ### FIXME NOKIA: is it correct to remove the replacement 0xFFFD chars here?
// Maybe we need to replace it with space character
TPtr outPtr( 0, 0 );
// there is some illegal/unconvertible chars
// try to create a buffer with all 0xFFFD chars trimmed
if (rep != 0)
{
if (inBuf.Length() <= 64 && !lastChunk)
{
iRemainderBuf = HBufC8::NewL(inBuf.Length());
iRemainderBuf->Des().Append(inBuf);
break;
}
TInt i= (rIndx -1 )/2;
// first copy the part of buffer before the first 0xFFFD char
outBuf.Append(tmpBuf.Left(i));
// search for more occurance of 0xFFFD chars
// and append valid chars before the last 0xFFFD char
// if there is only one 0xFFFD char, wouldn't enter the loop
for( ; rep && i<tmpBuf.Length(); i++ )
{
if( tmpBuf[i] != 0xFFFD )
outBuf.Append( tmpBuf[i] );
else
rep--;
}
// append the chars after last 0xFFFD char
outBuf.Append(tmpBuf.Right(tmpBuf.Length() - i));
outPtr.Set( (TUint16*)outBuf.Ptr(), outBuf.Length(), outBuf.Length() );
outBuf.Zero();
}
else outPtr.Set( (TUint16*)tmpBuf.Ptr(), tmpBuf.Length(), tmpBuf.Length() );
// append the buffers to final result, will reincrease the size in needed
result.append( (const QChar*)(outPtr.Ptr()), outPtr.Length() );
if( ret == 0 || ret == -1 ) break;
inBuf.Set( inBuf.Right( ret ) );
}
OOM_POST_CHECK_FAILED( return result; )
delete inHBuf;
return result;
}
QCString KWQTextDecoder::fromUnicode(const QString & str)
{
int len = str.length();
if (len==0)
return QCString("");
CCnvCharacterSetConverter* charConv = KWQGlobalServices::InstanceL()->CharConv();
RFs& fileSession = KWQGlobalServices::InstanceL()->FileSession();
// make sure the converter is available
if (!IsConversionSupportedL())
iEncoding = KCharacterSetIdentifierIso88591;
// check if the target charset is supported
QCString outStr(str.length()*3);
charConv->PrepareToConvertToOrFromL( iEncoding, *iArrayOfCharacterSetsAvailable, fileSession );
TBuf8<32> tmpBuf;
TPtrC16 inBuf( str.Des() );
// guess the size of out buffer, try to minimize reallocation
HBufC8* outBuf = HBufC8::NewMaxL( str.length() * 3 + 1 );
TPtr8 ptr( outBuf->Des() );
ptr.SetLength( 0 );
// converting
for(;;)
{
TInt ret = charConv->ConvertFromUnicode( tmpBuf, inBuf );
if( ret == CCnvCharacterSetConverter::EErrorIllFormedInput ) break;
// copy result to out buffer
TInt orgLen = ptr.Length();
if( ptr.Length() + tmpBuf.Length() > ptr.MaxLength() )
{
HBufC8* temp = outBuf->ReAlloc( ptr.MaxLength() + tmpBuf.MaxLength() );
if( !temp )
{
delete outBuf;
return QCString("");
}
outBuf = temp;
ptr.Set( outBuf->Des() );
ptr.SetLength( orgLen );
}
ptr.Append( tmpBuf );
if( ret == 0 ) break;
inBuf.Set( inBuf.Right( ret ) );
}
outStr.append( (const char*)( ptr.PtrZ() ) );
delete outBuf;
return outStr;
}
QString KWQTextDecoder::windows125xToUnicode( const char *chs, int len )
{
QString out;
const unsigned short* cnvTable = 0;
switch( iEncoding ) {
case KCharacterSetIdentifierWindow1250: cnvTable = win1250toUnicode; break;
case KCharacterSetIdentifierWindow1251: cnvTable = win1251toUnicode; break;
case KCharacterSetIdentifierWindow1253: cnvTable = win1253toUnicode; break;
case KCharacterSetIdentifierWindow1256: cnvTable = win1256toUnicode; break;
case KCharacterSetIdentifierWindow1257: cnvTable = win1257toUnicode; break;
default: return out;
}
OOM_PRE_CHECK( len*2, len*2, "KWQTextDecoder::toUnicode" );
out.reserve( len * 2/3 );
const char* p = chs;
const char* top = chs + len;
while(p<top) {
const unsigned char ch = *p++;
out.append( ch < 0x80 ? QChar(ch) : QChar(cnvTable[ch-0x80]) );
}
OOM_POST_CHECK_FAILED(return out;)
return out;
}
void QTextCodec::PrepareForExit()
{
CArrayFix<CCnvCharacterSetConverter::SCharacterSet>* arr
= KWQTextDecoder::iArrayOfCharacterSetsAvailable;
if( arr )
{
// delete charset names hold by each item, maybe SCharacterSet should
// provide a destructor to do this?
for( TInt i=0; i<arr->Count(); ++i )
{
// FIXME: this is a hack to get around the access control of SCharacterSet class,
// anyway, it looks weird of SCharacterSet since it is declared as a struct but it
// has private members. :))
CCnvCharacterSetConverter::SCharacterSet chset = (CCnvCharacterSetConverter::SCharacterSet)arr->At(i);
TUint32 addr = (TUint32)( &chset );
HBufC* buf = *( (HBufC**)( addr + sizeof(CCnvCharacterSetConverter::SCharacterSet) - sizeof(HBufC*) ) );
delete buf;
}
if( arr ) arr->Reset();
delete arr;
arr = 0;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -