📄 transcoder.cpp
字号:
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <log4cxx/logstring.h>
#include <log4cxx/helpers/transcoder.h>
#include <log4cxx/helpers/pool.h>
#include <stdlib.h>
#include <log4cxx/helpers/exception.h>
#include <log4cxx/helpers/bytebuffer.h>
#include <log4cxx/helpers/charsetdecoder.h>
#include <log4cxx/helpers/charsetencoder.h>
#include <vector>
#include <apr.h>
#include <apr_strings.h>
#if !defined(LOG4CXX)
#define LOG4CXX 1
#endif
#include <log4cxx/private/log4cxx_private.h>
#if LOG4CXX_LOGCHAR_IS_UNICHAR || LOG4CXX_CFSTRING_API || LOG4CXX_UNICHAR_API
#include <CoreFoundation/CFString.h>
#endif
using namespace log4cxx;
using namespace log4cxx::helpers;
void Transcoder::decodeUTF8(const std::string& src, LogString& dst) {
std::string::const_iterator iter = src.begin();
while(iter != src.end()) {
unsigned int sv = decode(src, iter);
if(sv != 0xFFFF) {
encode(sv, dst);
} else {
dst.append(1, LOSSCHAR);
iter++;
}
}
}
void Transcoder::encodeUTF8(const LogString& src, std::string& dst) {
#if LOG4CXX_LOGCHAR_IS_UTF8
dst.append(src);
#else
LogString::const_iterator iter = src.begin();
while(iter != src.end()) {
unsigned int sv = decode(src, iter);
if(sv != 0xFFFF) {
encode(sv, dst);
} else {
dst.append(1, LOSSCHAR);
iter++;
}
}
#endif
}
char* Transcoder::encodeUTF8(const LogString& src, Pool& p) {
#if LOG4CXX_LOGCHAR_IS_UTF8
return p.pstrdup(src);
#else
std::string tmp;
encodeUTF8(src, tmp);
return p.pstrdup(tmp);
#endif
}
void Transcoder::encodeUTF8(unsigned int sv, ByteBuffer& dst) {
size_t bytes = encodeUTF8(sv, dst.current());
dst.position(dst.position() + bytes);
}
size_t Transcoder::encodeUTF8(unsigned int ch, char* dst) {
if (ch < 0x80) {
dst[0] = (char) ch;
return 1;
} else if (ch < 0x800) {
dst[0] = (char) (0xC0 + (ch >> 6));
dst[1] = (char) (0x80 + (ch & 0x3F));
return 2;
} else if (ch < 0x10000) {
dst[0] = (char) (0xE0 + (ch >> 12));
dst[1] = (char) (0x80 + ((ch >> 6) & 0x3F));
dst[2] = (char) (0x80 + (ch & 0x3F));
return 3;
} else if (ch <= 0x10FFFF) {
dst[0] = (char) (0xF0 + (ch >> 18));
dst[1] = (char) (0x80 + ((ch >> 12) & 0x3F));
dst[2] = (char) (0x80 + ((ch >> 6) & 0x3F));
dst[3] = (char) (0x80 + (ch & 0x3F));
return 4;
} else {
//
// output UTF-8 encoding of 0xFFFF
//
dst[0] = (char) 0xEF;
dst[1] = (char) 0xBF;
dst[2] = (char) 0xBF;
return 3;
}
}
void Transcoder::encodeUTF16BE(unsigned int sv, ByteBuffer& dst) {
size_t bytes = encodeUTF16BE(sv, dst.current());
dst.position(dst.position() + bytes);
}
size_t Transcoder::encodeUTF16BE(unsigned int ch, char* dst) {
if (ch <= 0xFFFF) {
dst[0] = (char) (ch >> 8);
dst[1] = (char) (ch & 0xFF);
return 2;
}
if (ch <= 0x10FFFF) {
unsigned char w = (unsigned char) ((ch >> 16) - 1);
dst[0] = (char) (0xD8 + (w >> 2));
dst[1] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
dst[2] = (char) (0xDC + ((ch & 0x30) >> 4));
dst[3] = (char) (ch & 0xFF);
return 4;
}
dst[0] = dst[1] = (char) 0xFF;
return 2;
}
void Transcoder::encodeUTF16LE(unsigned int sv, ByteBuffer& dst) {
size_t bytes = encodeUTF16LE(sv, dst.current());
dst.position(dst.position() + bytes);
}
size_t Transcoder::encodeUTF16LE(unsigned int ch, char* dst) {
if (ch <= 0xFFFF) {
dst[1] = (char) (ch >> 8);
dst[0] = (char) (ch & 0xFF);
return 2;
}
if (ch <= 0x10FFFF) {
unsigned char w = (unsigned char) ((ch >> 16) - 1);
dst[1] = (char) (0xD8 + (w >> 2));
dst[0] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
dst[3] = (char) (0xDC + ((ch & 0x30) >> 4));
dst[2] = (char) (ch & 0xFF);
return 4;
}
dst[0] = dst[1] = (char) 0xFF;
return 2;
}
unsigned int Transcoder::decode(const std::string& src,
std::string::const_iterator& iter) {
std::string::const_iterator start(iter);
unsigned char ch1 = *(iter++);
if (ch1 <= 0x7F) {
return ch1;
}
//
// should not have continuation character here
//
if ((ch1 & 0xC0) != 0x80 && iter != src.end()) {
unsigned char ch2 = *(iter++);
//
// should be continuation
if ((ch2 & 0xC0) != 0x80) {
iter = start;
return 0xFFFF;
}
if((ch1 & 0xE0) == 0xC0) {
if ((ch2 & 0xC0) == 0x80) {
unsigned int rv = ((ch1 & 0x1F) << 6) + (ch2 & 0x3F);
if (rv >= 0x80) {
return rv;
}
}
iter = start;
return 0xFFFF;
}
if (iter != src.end()) {
unsigned char ch3 = *(iter++);
//
// should be continuation
//
if ((ch3 & 0xC0) != 0x80) {
iter = start;
return 0xFFFF;
}
if ((ch1 & 0xF0) == 0xE0) {
unsigned rv = ((ch1 & 0x0F) << 12)
+ ((ch2 & 0x3F) << 6)
+ (ch3 & 0x3F);
if (rv <= 0x800) {
iter = start;
return 0xFFFF;
}
return rv;
}
if (iter != src.end()) {
unsigned char ch4 = *(iter++);
if ((ch4 & 0xC0) != 0x80) {
iter = start;
return 0xFFFF;
}
unsigned int rv = ((ch1 & 0x07) << 18)
+ ((ch2 & 0x3F) << 12)
+ ((ch3 & 0x3F) << 6)
+ (ch4 & 0x3F);
if (rv > 0xFFFF) {
return rv;
}
}
}
}
iter = start;
return 0xFFFF;
}
void Transcoder::encode(unsigned int sv, std::string& dst) {
char tmp[8];
size_t bytes = encodeUTF8(sv, tmp);
dst.append(tmp, bytes);
}
void Transcoder::decode(const std::string& src, LogString& dst) {
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
dst.append(src);
#else
static CharsetDecoderPtr decoder(CharsetDecoder::getDefaultDecoder());
dst.reserve(dst.size() + src.size());
std::string::const_iterator iter = src.begin();
#if !LOG4CXX_CHARSET_EBCDIC
for(;
iter != src.end() && ((unsigned char) *iter) < 0x80;
iter++) {
dst.append(1, *iter);
}
#endif
if (iter != src.end()) {
size_t offset = iter - src.begin();
ByteBuffer buf(const_cast<char*>(src.data() + offset), src.size() - offset);
while(buf.remaining() > 0) {
log4cxx_status_t stat = decoder->decode(buf, dst);
if(CharsetDecoder::isError(stat)) {
dst.append(1, LOSSCHAR);
buf.position(buf.position() + 1);
}
}
decoder->decode(buf, dst);
}
#endif
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -