📄 qgb18030codec.cpp
字号:
/******************************************************************************** Copyright (C) 1992-2007 Trolltech ASA. All rights reserved.**** This file is part of the plugins of the Qt Toolkit.**** This file may be used under the terms of the GNU General Public** License version 2.0 as published by the Free Software Foundation** and appearing in the file LICENSE.GPL included in the packaging of** this file. Please review the following information to ensure GNU** General Public Licensing requirements will be met:** http://trolltech.com/products/qt/licenses/licensing/opensource/**** If you are unsure which license is appropriate for your use, please** review the following information:** http://trolltech.com/products/qt/licenses/licensing/licensingoverview** or contact the sales department at sales@trolltech.com.**** In addition, as a special exception, Trolltech gives you certain** additional rights. These rights are described in the Trolltech GPL** Exception version 1.0, which can be found at** http://www.trolltech.com/products/qt/gplexception/ and in the file** GPL_EXCEPTION.txt in this package.**** In addition, as a special exception, Trolltech, as the sole copyright** holder for Qt Designer, grants users of the Qt/Eclipse Integration** plug-in the right for the Qt/Eclipse Integration to link to** functionality provided by Qt Designer and its related libraries.**** Trolltech reserves all rights not expressly granted herein.**** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.******************************************************************************//*! \class QGb18030Codec qgb18030codec.h \reentrant \internal*/#include "qgb18030codec.h"#ifndef QT_NO_TEXTCODEC#define InRange(c, lower, upper) (((c) >= (lower)) && ((c) <= (upper)))#define IsLatin(c) ((c) <= 0x7F)#define IsByteInGb2312(c) (InRange((c), 0xA1, 0xFE))#define Is1stByte(c) (InRange((c), 0x81, 0xFE))#define Is2ndByteIn2Bytes(c) (InRange((c), 0x40, 0xFE) && (c) != 0x7F)#define Is2ndByteIn4Bytes(c) (InRange((c), 0x30, 0x39))#define Is2ndByte(c) (Is2ndByteIn2Bytes(c) || Is2ndByteIn4Bytes(c))#define Is3rdByte(c) (InRange((c), 0x81, 0xFE))#define Is4thByte(c) (InRange((c), 0x30, 0x39))#define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))/* User-defined areas: UDA 1: 0xAAA1 - 0xAFFE (564/0) UDA 2: 0xF8A1 - 0xFEFE (658/0) UDA 3: 0xA140 - 0xA7A0 (672/0) */#define IsUDA1(a, b) (InRange((a), 0xAA, 0xAF) && InRange((b), 0xA1, 0xFE))#define IsUDA2(a, b) (InRange((a), 0xF8, 0xFE) && InRange((b), 0xA1, 0xFE))#define IsUDA3(a, b) (InRange((a), 0xA1, 0xA7) && InRange((b), 0x40, 0xA0) && ((b) != 0x7F))typedef struct { quint8 tblBegin; quint8 tblEnd; quint16 tblOffset; quint16 algOffset;} indexTbl_t;static uint qt_Gb18030ToUnicode(const uchar *gbstr, int& len);static int qt_UnicodeToGb18030(uint unicode, uchar *gbchar);int qt_UnicodeToGbk(uint unicode, uchar *gbchar);QGb18030Codec::QGb18030Codec(){}/*! \reimp */QByteArray QGb18030Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const{ char replacement = '?'; int high = -1; if (state) { if (state->flags & ConvertInvalidToNull) replacement = 0; if (state->remainingChars) high = state->state_data[0]; } int invalid = 0; int rlen = 4 *len + 1; QByteArray rstr; rstr.resize(rlen); uchar* cursor = (uchar*)rstr.data(); //qDebug("QGb18030Codec::fromUnicode(const QString& uc, int& lenInOut = %d)", lenInOut); for (int i = 0; i < len; i++) { unsigned short ch = uc[i].unicode(); int len; uchar buf[4]; if (high >= 0) { if (ch >= 0xdc00 && ch < 0xe000) { // valid surrogate pair ++i; uint u = (high-0xd800)*0x400+(ch-0xdc00)+0x10000; len = qt_UnicodeToGb18030(u, buf); if (len >= 2) { for (int j=0; j<len; j++) *cursor++ = buf[j]; } else { *cursor++ = replacement; ++invalid; } high = -1; continue; } else { *cursor++ = replacement; ++invalid; high = -1; } } if (ch < 0x80) { // ASCII *cursor++ = ch; } else if ((ch >= 0xd800 && ch < 0xdc00)) { // surrogates area. check for correct encoding // we need at least one more character, first the high surrogate, then the low one high = ch; } else if ((len = qt_UnicodeToGb18030(ch, buf)) >= 2) { for (int j=0; j<len; j++) *cursor++ = buf[j]; } else { // Error *cursor++ = replacement; ++invalid; } } rstr.resize(cursor - (uchar*)rstr.constData()); if (state) { state->invalidChars += invalid; state->state_data[0] = high; if (high) state->remainingChars = 1; } return rstr;}QString QGb18030Codec::convertToUnicode(const char* chars, int len, ConverterState *state) const{ uchar buf[4]; int nbuf = 0; QChar replacement = QChar::ReplacementCharacter; if (state) { if (state->flags & ConvertInvalidToNull) replacement = QChar::Null; nbuf = state->remainingChars; buf[0] = (state->state_data[0] >> 24) & 0xff; buf[1] = (state->state_data[0] >> 16) & 0xff; buf[2] = (state->state_data[0] >> 8) & 0xff; buf[3] = (state->state_data[0] >> 0) & 0xff; } int invalid = 0; QString result; //qDebug("QGb18030Decoder::toUnicode(const char* chars, int len = %d)", len); for (int i = 0; i < len; i++) { uchar ch = chars[i]; switch (nbuf) { case 0: if (ch < 0x80) { // ASCII result += QLatin1Char(ch); } else if (Is1stByte(ch)) { // GB18030? buf[0] = ch; nbuf = 1; } else { // Invalid result += replacement; ++invalid; } break; case 1: // GB18030 2 bytes if (Is2ndByteIn2Bytes(ch)) { buf[1] = ch; int clen = 2; uint u = qt_Gb18030ToUnicode(buf, clen); if (clen == 2) { result += QValidChar(u); } else { result += replacement; ++invalid; } nbuf = 0; } else if (Is2ndByteIn4Bytes(ch)) { buf[1] = ch; nbuf = 2; } else { // Error result += replacement; ++invalid; nbuf = 0; } break; case 2: // GB18030 3 bytes if (Is3rdByte(ch)) { buf[2] = ch; nbuf = 3; } else { result += replacement; ++invalid; nbuf = 0; } break; case 3: // GB18030 4 bytes if (Is4thByte(ch)) { buf[3] = ch;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -