📄 kcharsetsdata.cpp
字号:
/* This file is part of the KDE libraries Copyright (C) 1997 Jacek Konieczny (jajcus@zeus.polsl.gliwice.pl) $Id: kcharsetsdata.cpp,v 1.1 2003/09/08 19:42:03 jasonk Exp $ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */#include "kcharsetsdata.h"#include <qdir.h>#include <qfile.h>#include <stdlib.h>#include <stdio.h>#include <stdarg.h>#include <qfontinfo.h>#include <qintdict.h>#include <kapp.h>#include <ksimpleconfig.h>#include <qregexp.h>#include <qstrlist.h>#include <X11/Xlib.h>#ifdef KCH_DEBUGinline void kchdebug(const char *msg,...){ va_list ap; va_start( ap, msg ); // use variable arg list vfprintf( stderr, msg, ap ); va_end( ap ); } #endifKCharsetConverterData::KCharsetConverterData(const KCharsetEntry * inputCharset ,const KCharsetEntry * outputCharset,int flags){ kchdebug("Creating converter from %s to %s...",inputCharset,outputCharset); tempResult=new KCharsetConversionResult(); inAmps=( (flags&KCharsetConverter::INPUT_AMP_SEQUENCES)!=0 ); outAmps=( (flags&KCharsetConverter::OUTPUT_AMP_SEQUENCES)!=0 ); if ( kcharsetsData == 0 ) fatal("KCharsets not initialized!"); isOK=initialize(inputCharset,outputCharset); kchdebug("done"); }KCharsetConverterData::KCharsetConverterData(const KCharsetEntry * inputCharset ,int flags){ kchdebug("Creating converter from %s...",inputCharset); tempResult=new KCharsetConversionResult(); inAmps=( (flags&KCharsetConverter::INPUT_AMP_SEQUENCES)!=0 ); outAmps=( (flags&KCharsetConverter::OUTPUT_AMP_SEQUENCES)!=0 ); if ( kcharsetsData == 0 ) fatal("KCharsets not initialized!"); isOK=initialize(inputCharset,0); kchdebug("done");}KCharsetConverterData::~KCharsetConverterData(){ if (tempResult) delete tempResult; if (convFromUniDict) delete convFromUniDict;}bool KCharsetConverterData::initialize(const KCharsetEntry * inputCharset ,const KCharsetEntry * outputCharset){ convTable=0; convToUniDict=0; convFromUniDict=0; input=inputCharset; if (!input) { kchdebug("Couldn't set input charset to %s\n",inputCharset); return FALSE; } if (outputCharset==0) output=kcharsetsData->conversionHint(input); else output=outputCharset; if (!output) { kchdebug("Couldn't set output charset to %s\n",outputCharset); return FALSE; } setInputSettings(); setOutputSettings(); if ( !input || !output || input==output ) conversionType=NoConversion; else if (unicodeIn && unicodeOut) conversionType=UnicodeUnicode; else if (unicodeIn){ conversionType=FromUnicode; kchdebug("Conversion: from unicode\n"); if (!createFromUnicodeDict()) return FALSE; } else if (unicodeOut){ conversionType=ToUnicode; kchdebug("Conversion: to unicode\n"); if (!getToUnicodeTable()) return FALSE; } else{ conversionType=EightToEight; kchdebug("Conversion: no unicode\n"); if (!getToUnicodeTable()) return FALSE; if (!createFromUnicodeDict()) return FALSE; } return TRUE;} bool KCharsetConverterData::getToUnicodeTable(){ convTable=kcharsetsData->getToUnicodeTable(input); if (!convTable){ convToUniDict=kcharsetsData->getToUnicodeDict(input); if (!convToUniDict){ kchdebug("Couldn't get conversion table nor dictionary\n"); return FALSE; } } return TRUE;}void KCharsetConverterData::setInputSettings(){ const char *name=input->name; // if ( ! stricmp(name,"unicode-1-1-utf-7") ){// warning("Sorry, UTF-7 encoding is not supported yet\n");// inputEnc=UTF7;// inBits=0;// unicodeIn=TRUE;// } // else if ( ! stricmp(name,"unicode-1-1-utf-8") ){ inputEnc=UTF8; inBits=0; unicodeIn=TRUE; } else if ( ! stricmp(name,"unicode") ){ inputEnc=none; inBits=16; unicodeIn=TRUE; } else if ( ! stricmp(name,"us-ascii") ){ inputEnc=none; inBits=7; unicodeIn=FALSE; } else{ inputEnc=none; inBits=8; unicodeIn=FALSE; } }void KCharsetConverterData::setOutputSettings(){ const char *name=output->name; // if ( ! stricmp(name,"unicode-1-1-utf-7") ){// warning("Sorry, UTF-7 encoding is not supported yet\n");// outputEnc=UTF7;// outBits=0;// unicodeOut=TRUE;// } // else if ( ! stricmp(name,"unicode-1-1-utf-8") ){ outputEnc=UTF8; outBits=0; unicodeOut=TRUE; } else if ( ! stricmp(name,"unicode-1-1") ){ warning("Sorry, Unicode probably doesn't work (except UTF-8)\n"); outputEnc=none; outBits=16; unicodeOut=TRUE; } else if ( ! stricmp(name,"us-ascii") ){ outputEnc=none; outBits=7; unicodeOut=FALSE; } else{ outputEnc=none; outBits=8; unicodeOut=FALSE; } }bool KCharsetConverterData::decodeUTF7(const char*,unsigned int &,int &){ return FALSE;}bool KCharsetConverterData::decodeUTF8(const char*str,unsigned int &code ,int &extrachars){ code=0; extrachars=0; unsigned char chr=*str;// kchdebug("str: ");// for(int i=0;i<6 && str[i];i++)// kchdebug("%02x ",(int)(unsigned char)str[i]);// kchdebug("\n"); if ( (chr&0x80)==0 ){ code=chr&0x7f; extrachars=0; } else if ( (chr&0xe0)==0xc0 ){ code=chr&0x1f; extrachars=1; } else if ( (chr&0xf0)==0xe0 ){ code=chr&0x0f; extrachars=2; } else if ( (chr&0xf8)==0xf0 ){ code=chr&0x07; extrachars=3; } else if ( (chr&0xfc)==0xf8 ){ code=chr&0x03; extrachars=4; } else if ( (chr&0xfe)==0xf8 ){ code=chr&0x01; extrachars=5; } else { warning("Invalid UTF-8 sequence %2x%2x...!",(int)chr,(int)str[1]); return FALSE; } int chars=extrachars; while(chars>0){ str++; code<<=6;// kchdebug("Code: %4x char: %2x masked char: %2x\n",code,*str,(*str)&0x3f); code|=(*str)&0x3f; chars--; } return TRUE;}bool KCharsetConverterData::encodeUTF7(unsigned int,QString &){ return FALSE;}bool KCharsetConverterData::encodeUTF8(unsigned int code,QString &result){ if (code<0x80){ result+=(char)code; return TRUE; } // kchdebug("Code: %4x\n",code); int octets=2; unsigned mask1=0xc0; unsigned mask2=0x1f; unsigned range=0x800; int left=24; while(code>range){ if (range>=0x40000000){ warning("Unicode value too big!"); return FALSE; } mask2=(mask2>>1)&0x80; mask1>>=1; range<<=5; octets++; left-=6; }// kchdebug("octets: %i mask1: %x mask2: %x range: %x left: %i\n"// ,octets,mask1,mask2,range,left); unsigned char chr=((code>>((octets-1)*6))&mask2)|mask1;// kchdebug("Chars: %02x ",chr); result+=chr; octets--; unsigned int tmp=(code<<left)&0xffffffff; while(octets>0){ chr=((tmp>>24)&0x3f)|0x80;// kchdebug("%02x ",chr); result+=chr; tmp<<=6; octets--; }// kchdebug("\n"); return TRUE;}void KCharsetConverterData::convert(const char *str,KCharsetConversionResult &r){ convert(str,r,0);}void KCharsetConverterData::convert(const char *str,QList<KCharsetConversionResult> &rl){unsigned unicode; rl.clear(); while(str){ KCharsetConversionResult *l=new KCharsetConversionResult; kchdebug("Created result: %p\n",l); str=convert(str,*l,&unicode); rl.append(l); if (unicode){ KCharsetConversionResult *l=new KCharsetConversionResult; kchdebug("Created result: %p\n",l); kcharsetsData->convert(unicode,*l); rl.append(l); } }} const char * KCharsetConverterData::convert(const char * str ,KCharsetConversionResult &result ,unsigned *pUnicode) { kchdebug("Setteing result charset to %p ",&output); kchdebug("(%s)\n",(const char *)output); result.cCharset=output; kchdebug("----- %s ----- => ",str); if (!isOK) return 0; if (conversionType == NoConversion ){ result.cText=str; return 0; } result.cText=""; int i; int tmp; unsigned *ptr=0; unsigned index=0; unsigned unicode=0; unsigned chr=0; for(i=0;(inBits<=8)?str[i]:(str[i]&&str[i+1]);){ chr=0; index=0; unicode=0; if (inAmps && str[i]=='&'){ kchdebug("Amperstand found\n"); unicode=kcharsetsData->decodeAmp(str+i,tmp); kchdebug("i=%i characters: %i code:%4x\n",i,tmp,unicode); if (tmp>0) i+=tmp-1; } if (unicode==0) switch(inputEnc){ case UTF7: if (decodeUTF7(str+i,unicode,tmp)) i+=tmp; else unicode=(unsigned char)str[i]; break; case UTF8: if (decodeUTF8(str+i,unicode,tmp)) i+=tmp; else unicode=(unsigned char)str[i]; break; default: if (inBits<=8) index=(unsigned char)str[i]; else if (inBits==16) index=(((unsigned char)str[i++])<<8)+(unsigned char)str[i]; break; } kchdebug("Got index: %x\n",index); if (index>0 || unicode>0) switch(conversionType){ case ToUnicode: if (unicode>0) chr=unicode; else if (convTable) chr=convTable[index]; else if (convToUniDict) { ptr=(*convToUniDict)[index]; if (ptr) chr=*ptr; else chr=0; } if (chr==0 && index>0 && index<0x20) chr=index; // control characters - do not change break; case FromUnicode: ptr=(*convFromUniDict)[unicode]; if (ptr) chr=*ptr; else chr=0; if (unicode>0 && unicode<0x20) chr=index; // control characters - do not change break; case UnicodeUnicode: chr=unicode; break; default: if (unicode==0) if (convTable) unicode=convTable[index]; else{ ptr=(*convToUniDict)[index]; if (ptr) unicode=*ptr; else unicode=0; } // kchdebug("Converted to unicode: %4x\n",index); if (unicode){ ptr=(*convFromUniDict)[unicode]; if (ptr) chr=*ptr; else chr=0; } else chr=0; if (chr==0 && index>0 && index<0x20) chr=index; // control characters - do not change break; }// kchdebug("Converted to: %x\n",chr); if (outputEnc==UTF8) encodeUTF8(chr,result.cText); else if (outputEnc==UTF7) encodeUTF7(chr,result.cText); else if (chr==0) if (unicode && pUnicode){ *pUnicode=unicode; i++; if (inBits>8 && str[i]) i++; result.cCharset=output; if (str[i]) return str+i; else return 0; } else if (unicode && unicode<0x20) result.cText+=(char)unicode; else if (outAmps){ if (unicode) result.cText+="&#"+QString().setNum(unicode)+';'; else result.cText+="?"; } else result.cText+="?"; else if (outBits==16){ result.cText+=(char)(chr>>8); result.cText+=(char)(chr&255); } else result.cText+=(char)chr; i++; if (inBits>8 && str[i]) i++; } kchdebug("----- %s -----\n",(const char *)result); if (pUnicode) *pUnicode=0; return 0;}const KCharsetConversionResult & KCharsetConverterData::convert(unsigned code){unsigned chr;const unsigned *ptr; kchdebug("KCCS:convert(code) "); if (convFromUniDict){ ptr=(*convFromUniDict)[code]; if (!ptr) chr=0; else chr=*ptr; } else chr=0; if (chr==0){ kcharsetsData->convert(code,*tempResult); if (!tempResult->cText.isEmpty()) return *tempResult; } if (chr==0) if (outAmps){ if (code) tempResult->cText+="&#"+QString().setNum(code)+';'; else tempResult->cText+="?"; } else tempResult->cText+="?"; else tempResult->cText=chr; return *tempResult;}const KCharsetConversionResult & KCharsetConverterData::convertTag( const char *tag,int &l){ kchdebug("Converting: %s\n",(const char *)tag); return convert(kcharsetsData->decodeAmp(tag,l));}bool KCharsetConverterData::createFromUnicodeDict(){ QIntDict<unsigned> * dict=new QIntDict<unsigned>; dict->setAutoDelete(TRUE); const unsigned *tbl=kcharsetsData->getToUnicodeTable(output); if (tbl) for(int i=0;i<(1<<outBits);i++) dict->insert(tbl[i],new unsigned(i)); else{ QIntDict<unsigned> * dict2=kcharsetsData->getToUnicodeDict(output); if (!dict2){ kchdebug("Couldn't get to unicode table for %s\n",output->name); delete dict; return FALSE; } QIntDictIterator<unsigned> it(*dict2); while( it.current() ){ dict->insert(*it.current(),new unsigned(it.currentKey())); ++it; } } convFromUniDict=dict; return TRUE;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -