qrtlcodec.cpp

来自「qt-embedded-2.3.8.tar.gz源码」· C++ 代码 · 共 524 行
CPP
524 行
/****************************************************************************** $Id: qt/src/3rdparty/tools/qrtlcodec.cpp   2.3.8   edited 2004-08-05 $**** Implementation of QHebrewCodec and QArabicCodec classes**** Copyright (C)1999-2003 Trolltech AS.  All rights reserved.**** This file is part of the tools module of the Qt GUI Toolkit.**** This file may be distributed under the terms of the Q Public License** as defined by Trolltech AS of Norway and appearing in the file** LICENSE.QPL included in the packaging of this file.**** This file may be distributed and/or modified under the terms of the** GNU General Public License version 2 as published by the Free Software** Foundation and appearing in the file LICENSE.GPL included in the** packaging of this file.**** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition** licenses may use this file in accordance with the Qt Commercial License** Agreement provided with the Software.**** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.**** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for**   information about Qt Commercial License Agreements.** See http://www.trolltech.com/qpl/ for QPL licensing information.** See http://www.trolltech.com/gpl/ for GPL licensing information.**** Contact info@trolltech.com if any conditions of this licensing are** not clear to you.************************************************************************//* * Based on work Copyright (c) 1999 Lars Knoll <knoll@mpi-hd.mpg.de> */#include "qrtlcodec.h"#ifndef QT_NO_RTLCODEC// NOT REVISEDstatic const uchar unkn = '?'; // BLACK SQUARE (94) would be betterstatic const ushort heb_to_unicode[128] = {    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,    0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,    0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,    0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,    0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,    0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,    0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD};static const uchar unicode_to_heb_00[32] = {    0xA0, unkn, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,    0xA8, 0xA9, 0xD7, 0xAB, 0xAC, 0xAD, 0xAE, unkn,    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,    0xB8, 0xB9, 0xF7, 0xBB, 0xBC, 0xBD, 0xBE, unkn,};static const uchar unicode_to_heb_05[32] = {    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,    0xF8, 0xF9, 0xFA, unkn, unkn, unkn, unkn, unkn};static const ushort arab_to_unicode[128] = {    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,    0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,    0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,    0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,    0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,    0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,    0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD};static const uchar unicode_to_arab_06[0x60] = {    unkn, unkn, unkn, unkn, unkn, unkn, unkn, unkn,    unkn, unkn, unkn, unkn, 0xac, unkn, unkn, unkn,    unkn, unkn, unkn, unkn, unkn, unkn, unkn, unkn,    unkn, unkn, unkn, 0xbb, unkn, unkn, unkn, 0xbf,    unkn, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,    0xd8, 0xd9, 0xda, unkn, unkn, unkn, unkn, unkn,    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,    0xf0, 0xf1, 0xf2, unkn, unkn, unkn, unkn, unkn,    unkn, unkn, unkn, unkn, unkn, unkn, unkn, unkn,};/* this function assumes the QString is still visually ordered. * Finding the basic direction of the text is not easy in this case, since * a string like "my friend MOLAHS" could (in logical order) mean aswell * "SHALOM my friend" or "my friend SHALOM", depending on the basic direction * one assumes for the text. * * So this function uses some heuristics to find the right answer... */static QChar::Direction findBasicDirection(QString str){    unsigned int pos;    unsigned int len = str.length();    QChar::Direction dir1 = QChar::DirON;    QChar::Direction dir2 = QChar::DirON;    // If the visual representation of the text starts and ends with the same    // directionality, we know the answer.    pos = 0;    while (pos < len)    {	if (str.at(pos).direction() < 2) // DirR or DirL	    {		dir1 = str.at(pos).direction();		break;	    }	pos++;    }    if( pos == len ) // no directional chars, assume QChar::DirL	return QChar::DirL;    pos = len;    while (pos > 0)    {	if (str.at(pos).direction() < 2) // DirR or DirL	    {		dir2 = str.at(pos).direction();		break;	    }	pos--;    }    // both are the same, so we have the direction!    if ( dir1 == dir2 ) return dir1;    // guess with the help of punktuation marks...    // if the sentence ends with a punktuation, we should have a mark    // at one side of the text...    pos = 0;    while (pos < len-1 && str.at(pos).direction() < 2 )    {	if(str.at(pos).category() == QChar::Punctuation_Other)	{	    if( str.at(pos+1).direction() < 2 ) return QChar::DirR;	    else break; // no letter next to the mark... don't know	}	pos++;    }    pos = len;    while (pos < 1 && str.at(pos).direction() < 2 )    {	if(str.at(pos).category() == QChar::Punctuation_Other)	{	    if( str.at(pos-1).direction() < 2 ) return QChar::DirL;	    else break; // no letter next to the mark... don't know	}	pos--;    }    // don't know try DirR...    return QChar::DirR;}int QHebrewCodec::mibEnum() const{    return 11;}const char* QHebrewCodec::name() const{    return "ISO 8859-8";}QString QHebrewCodec::toUnicode(const char* chars, int len) const{    return toUnicode(chars, len, heb_to_unicode);}static void reverse(QString &input, unsigned int a, unsigned int b){    // evil cast... I know...    QChar *chars = (QChar *)input.unicode();    QChar temp;    while (a < b) {	temp = chars[a];	chars[a] = chars[b];	chars[b] = temp;	a++; b--;    }}static void reverse(QString &str, unsigned int a, unsigned int b,		    QChar::Direction dir){    // first reverse....    if(a != 0 || b != str.length()-1 || dir != QChar::DirL)	reverse(str, a, b);    // now, go through it, to see if there is some substring    // we need to reverse again...    QChar::Direction opposite = (dir==QChar::DirL ? QChar::DirR : QChar::DirL);    if ( b != 0 ) {    while(a <= b)    {	QChar::Direction d = str.at(a).direction();	if( d == opposite )	{	    // found something to reverse...	    uint c = a;	    while( c < b && str.at(c).direction() == opposite )		c++;	    c--;	    if( c > a )	    {		reverse(str, a, c, opposite);		a = c;	    }	} else if ( dir == QChar::DirR &&		    d == QChar::DirEN ||		    d == QChar::DirAN )	{	    uint c = a;	    while( c < b)	    {		d = str.at(c).direction();		if ( d != QChar::DirEN && d != QChar::DirES &&		     d != QChar::DirET && d != QChar::DirCS &&		     d != QChar::DirAN )		{		    c--;		    break;		}		c++;	    }	    if( c > a )	    {		reverse(str, a, c, opposite);		a = c;	    }	}	a++;    }    }}/*!  Since hebrew ans arabic are written from left to right,  but iso8859-6/8 assumes visual ordering (as opposed to the  logical ordering of Unicode), we have to reverse the order of the  input string to get it into logical order.  One problem is, that the basic text direction is unknown. So this  function uses some heuristics to find it, and if it can't guess the  right one, it assumes, the basic text direction is right to left.  This behaviour can be overwritten, by putting a control char  at the beginning of the text telling the function which basic text  direction to use. If the basic text direction is left-to-right, the  control char is (uchar) 0xfe, for right-to-left it is 0xff. Both chars  are undefined in the iso 8859-6/8 charsets.  Example: A visually ordered string "english WERBEH english2" would  be recognizes as having a basic left to right direction. so the logically  ordered QString would be "english HEBREW english2".  By prepending a (char)0xff before the string, QHebrewCodec::toUnicode would  use a basic text direction of left-to-right, and the string would thus  become "english2 HEBREW english".  */QString QHebrewCodec::toUnicode(const char* chars, int len,			     const ushort *table) const{    QString r;    const unsigned char * c = (const unsigned char *)chars;    QChar::Direction basicDir = QChar::DirON; // neutral, we don't know    if( len == 0 ) return QString::null;    // Test, if the user gives us a directionality.    // We use 0xFE and 0xFF in ISO8859-6 and 8859-8 for that.    // These chars are undefined in the charsets, and are mapped to    // RTL overwrite    if( c[0] == 0xfe )    {	basicDir = QChar::DirL;	c++; // skip directionality hint    } else if( c[0] == 0xff )    {	basicDir = QChar::DirR;	c++; // skip directionality hint    }    if ( c[0] == '\0' ) return QChar::null;        for( int i=0; i<len; i++ ) {	if ( c[i] > 127 )	    r[i] = table[c[i]-128];	else	    r[i] = c[i];    }    // do transformation from visual byte ordering to logical byte    // ordering    if( basicDir == QChar::DirON )	basicDir = findBasicDirection(r);    reverse(r, 0, r.length()-1, basicDir);    return r;}QCString QHebrewCodec::fromUnicode(const QString& uc, int& len_in_out) const{    // process only len chars...    int l;    if( len_in_out > 0 )	l = QMIN((int)uc.length(),len_in_out);    else	l = (int)uc.length();    QCString rstr;    if( l == 1 ) {	if( !to8bit( uc[0], &rstr ) )	    rstr += unkn;    } else {	QString tmp = uc;	tmp.truncate(l);	QString vis = tmp.visual();	for (int i=0; i<l; i++) {	    const QChar ch = vis[i];	    if( !to8bit( ch, &rstr ) )		rstr += unkn;	}	// len_in_out = cursor - result;    }    if( l > 0 && !rstr.length() )	rstr += unkn;    return rstr;}bool QHebrewCodec::to8bit(const QChar ch, QCString *rstr) const{    bool converted = FALSE;    if( ch.isMark() ) return TRUE; // ignore marks for conversion    if ( ch.row() ) {	if ( ch.row() == 0x05 ) {	    if ( ch.cell() > 0x91 )		converted = TRUE;	    // 0x0591 - 0x05cf: hebrew punktuation... dropped           if ( ch.cell() >= 0xD0 )		*rstr += unicode_to_heb_05[ch.cell()- 0xD0];	} else if ( ch.row() == 0x20 ) {	    if ( ch.cell() == 0x3E )	    {		*rstr += (char)0xAF;		converted = TRUE;	    }	    else if ( ch.cell() == 0x17 )	    {		*rstr += (char)0xCF;		converted = TRUE;	    }	} else {	    converted = FALSE;	}    } else {	if ( ch.cell() < 0x80 )	{	    *rstr += ch.cell();	    converted = TRUE;	}	else if( ch.cell() < 0xA0 )	{	    *rstr += unicode_to_heb_00[ch.cell() - 0xA0];	    converted = TRUE;	}    }    if(converted) return TRUE;    // couldn't convert the char... lets try its decomposition    QString d = ch.decomposition();    if(d.isNull())	return FALSE;    int l = d.length();    for (int i=0; i<l; i++) {	const QChar ch = d[i];	if(to8bit(ch, rstr))	    converted = TRUE;    }    return converted;}int QHebrewCodec::heuristicContentMatch(const char* chars, int len) const{    const unsigned char * c = (const unsigned char *)chars;    int score = 0;    for (int i=0; i<len; i++) {	if(c[i] > 0x80 && heb_to_unicode[c[i] - 0x80] != 0xFFFD)	    score++;	else	    return -1;    }    return score;}// -------------------------------------------------------------------------int QArabicCodec::mibEnum() const{    return 9;}const char* QArabicCodec::name() const{    return "ISO 8859-6";}/*!  \sa QHebrewCodec::toUnicode()  */QString QArabicCodec::toUnicode(const char* chars, int len) const{    return QHebrewCodec::toUnicode(chars, len, arab_to_unicode);}bool QArabicCodec::to8bit(const QChar ch, QCString *rstr) const{    bool converted = TRUE;    if( ch.isMark() ) return TRUE; // ignore marks for conversion    if ( ch.row() ) {	if ( ch.row() == 0x06 ) {	    if ( ch.cell() > 0x5f )		converted = FALSE;	    else		*rstr += unicode_to_arab_06[ch.cell()];	} else {	    converted = FALSE;	}    } else {	if ( ch.cell() < 0x80 )	    *rstr += ch.cell();	else if( ch.cell() == 0xA0 )	    *rstr += (char)0xA0;	else if( ch.cell() == 0xA4 )	    *rstr += (char)0xA4;	else if( ch.cell() == 0xAd )	    *rstr += (char)0xAd;	else	    converted = FALSE;    }    if(converted) return TRUE;    // couldn't convert the char... lets try its decomposition    QString d = ch.decomposition();    if(d.isNull())	return FALSE;    int l = d.length();    for (int i=0; i<l; i++) {	const QChar ch = d[i];	if(to8bit(ch, rstr))	    converted = TRUE;    }    return converted;}int QArabicCodec::heuristicContentMatch(const char* chars, int len) const{    const unsigned char * c = (const unsigned char *)chars;    int score = 0;    for (int i=0; i<len; i++) {	if(c[i] > 0x80 && arab_to_unicode[c[i] - 0x80] != 0xFFFD)	    score++;	else	    return -1;    }    return score;}#endif
qrtlcodec.cpp - 源码说明

本页面展示了「qt-embedded-2.3.8.tar.gz源码」中的 qrtlcodec.cpp 源码文件，采用 C++ 编程语言编写，共 524 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与qt-embedded相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?