📄 charsetencoder.cpp
字号:
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <log4cxx/logstring.h>
#include <log4cxx/helpers/charsetencoder.h>
#include <log4cxx/helpers/bytebuffer.h>
#include <log4cxx/helpers/exception.h>
#include <apr_xlate.h>
#include <log4cxx/helpers/stringhelper.h>
#include <log4cxx/helpers/transcoder.h>
#if !defined(LOG4CXX)
#define LOG4CXX 1
#endif
#include <log4cxx/private/log4cxx_private.h>
#include <apr_portable.h>
#include <log4cxx/helpers/mutex.h>
#include <log4cxx/helpers/synchronized.h>
using namespace log4cxx;
using namespace log4cxx::helpers;
IMPLEMENT_LOG4CXX_OBJECT(CharsetEncoder)
namespace log4cxx
{
namespace helpers {
#if APR_HAS_XLATE
/**
* A character encoder implemented using apr_xlate.
*/
class APRCharsetEncoder : public CharsetEncoder
{
public:
APRCharsetEncoder(const LogString& topage) : pool(), mutex(pool) {
#if LOG4CXX_LOGCHAR_IS_WCHAR
const char* frompage = "WCHAR_T";
#endif
#if LOG4CXX_LOGCHAR_IS_UTF8
const char* frompage = "UTF-8";
#endif
#if LOG4CXX_LOGCHAR_IS_UNICHAR
const char* frompage = "UTF-16";
#endif
std::string tpage(Transcoder::encodeCharsetName(topage));
apr_status_t stat = apr_xlate_open(&convset,
tpage.c_str(),
frompage,
pool.getAPRPool());
if (stat != APR_SUCCESS) {
throw IllegalArgumentException(topage);
}
}
virtual ~APRCharsetEncoder() {
}
virtual log4cxx_status_t encode(const LogString& in,
LogString::const_iterator& iter,
ByteBuffer& out) {
apr_status_t stat;
size_t outbytes_left = out.remaining();
size_t initial_outbytes_left = outbytes_left;
size_t position = out.position();
if (iter == in.end()) {
synchronized sync(mutex);
stat = apr_xlate_conv_buffer(convset, NULL, NULL,
out.data() + position, &outbytes_left);
} else {
LogString::size_type inOffset = (iter - in.begin());
apr_size_t inbytes_left =
(in.size() - inOffset) * sizeof(LogString::value_type);
apr_size_t initial_inbytes_left = inbytes_left;
{
synchronized sync(mutex);
stat = apr_xlate_conv_buffer(convset,
(const char*) (in.data() + inOffset),
&inbytes_left,
out.data() + position,
&outbytes_left);
}
iter += ((initial_inbytes_left - inbytes_left) / sizeof(LogString::value_type));
}
out.position(out.position() + (initial_outbytes_left - outbytes_left));
return stat;
}
private:
APRCharsetEncoder(const APRCharsetEncoder&);
APRCharsetEncoder& operator=(const APRCharsetEncoder&);
Pool pool;
Mutex mutex;
apr_xlate_t *convset;
};
#endif
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_WCSTOMBS
/**
* A character encoder implemented using wcstombs.
*/
class WcstombsCharsetEncoder : public CharsetEncoder
{
public:
WcstombsCharsetEncoder() {
}
/**
* Converts a wchar_t to the default external multibyte encoding.
*/
log4cxx_status_t encode(const LogString& in,
LogString::const_iterator& iter,
ByteBuffer& out) {
log4cxx_status_t stat = APR_SUCCESS;
if (iter != in.end()) {
size_t outbytes_left = out.remaining();
size_t position = out.position();
std::wstring::size_type inOffset = (iter - in.begin());
enum { BUFSIZE = 256 };
wchar_t buf[BUFSIZE];
size_t chunkSize = BUFSIZE - 1;
if (chunkSize * MB_LEN_MAX > outbytes_left) {
chunkSize = outbytes_left / MB_LEN_MAX;
}
if (chunkSize > in.length() - inOffset) {
chunkSize = in.length() - inOffset;
}
memset(buf, 0, BUFSIZE * sizeof(wchar_t));
memcpy(buf,
in.data() + inOffset,
chunkSize * sizeof(wchar_t));
size_t converted = wcstombs(out.data() + position, buf, outbytes_left);
if (converted == (size_t) -1) {
stat = APR_BADARG;
//
// if unconvertable character was encountered
// repeatedly halve source to get fragment that
// can be converted
for(chunkSize /= 2;
chunkSize > 0;
chunkSize /= 2) {
buf[chunkSize] = 0;
converted = wcstombs(out.data() + position, buf, outbytes_left);
if (converted != (size_t) -1) {
iter += chunkSize;
out.position(out.position() + converted);
break;
}
}
} else {
iter += chunkSize;
out.position(out.position() + converted);
}
}
return stat;
}
private:
WcstombsCharsetEncoder(const WcstombsCharsetEncoder&);
WcstombsCharsetEncoder& operator=(const WcstombsCharsetEncoder&);
};
#endif
/**
* Encodes a LogString to US-ASCII.
*/
class USASCIICharsetEncoder : public CharsetEncoder
{
public:
USASCIICharsetEncoder() {
}
virtual log4cxx_status_t encode(const LogString& in,
LogString::const_iterator& iter,
ByteBuffer& out) {
log4cxx_status_t stat = APR_SUCCESS;
if (iter != in.end()) {
while(out.remaining() > 0 && iter != in.end()) {
LogString::const_iterator prev(iter);
unsigned int sv = Transcoder::decode(in, iter);
if (sv <= 0x7F) {
out.put((char) sv);
} else {
iter = prev;
stat = APR_BADARG;
break;
}
}
}
return stat;
}
private:
USASCIICharsetEncoder(const USASCIICharsetEncoder&);
USASCIICharsetEncoder& operator=(const USASCIICharsetEncoder&);
};
/**
* Converts a LogString to ISO-8859-1.
*/
class ISOLatinCharsetEncoder : public CharsetEncoder
{
public:
ISOLatinCharsetEncoder() {
}
virtual log4cxx_status_t encode(const LogString& in,
LogString::const_iterator& iter,
ByteBuffer& out) {
log4cxx_status_t stat = APR_SUCCESS;
if (iter != in.end()) {
while(out.remaining() > 0 && iter != in.end()) {
LogString::const_iterator prev(iter);
unsigned int sv = Transcoder::decode(in, iter);
if (sv <= 0xFF) {
out.put((char) sv);
} else {
iter = prev;
stat = APR_BADARG;
break;
}
}
}
return stat;
}
private:
ISOLatinCharsetEncoder(const ISOLatinCharsetEncoder&);
ISOLatinCharsetEncoder& operator=(const ISOLatinCharsetEncoder&);
};
/**
* Encodes a LogString to a byte array when the encodings are identical.
*/
class TrivialCharsetEncoder : public CharsetEncoder
{
public:
TrivialCharsetEncoder() {
}
virtual log4cxx_status_t encode(const LogString& in,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -