📄 encoding.c
字号:
* -2 if the transcoding fails (for *in is not valid utf8 string or
* the result of transformation can't fit into the encoding we want), or
*/
int
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in) {
int ret = -2;
int written;
int toconv;
if (handler == NULL) return(-1);
if (out == NULL) return(-1);
if (in == NULL) return(-1);
written = out->size - out->use;
toconv = in->use;
if (toconv * 2 >= written) {
xmlBufferGrow(out, toconv);
written = out->size - out->use - 1;
}
/*
* echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
* 45 chars should be sufficient to reach the end of the encoding
* declaration without going too far inside the document content.
*/
written = 45;
if (handler->input != NULL) {
ret = handler->input(&out->content[out->use], &written,
in->content, &toconv);
xmlBufferShrink(in, toconv);
out->use += written;
out->content[out->use] = 0;
}
#ifdef LIBXML_ICONV_ENABLED
else if (handler->iconv_in != NULL) {
ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
&written, in->content, &toconv);
xmlBufferShrink(in, toconv);
out->use += written;
out->content[out->use] = 0;
if (ret == -1) ret = -3;
}
#endif /* LIBXML_ICONV_ENABLED */
#ifdef DEBUG_ENCODING
switch (ret) {
case 0:
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of input\n",
toconv, written);
break;
case -1:
xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
toconv, written, in->use);
break;
case -2:
xmlGenericError(xmlGenericErrorContext,
"input conversion failed due to input error\n");
break;
case -3:
xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
toconv, written, in->use);
break;
default:
xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
}
#endif /* DEBUG_ENCODING */
/*
* Ignore when input buffer is not on a boundary
*/
if (ret == -3) ret = 0;
if (ret == -1) ret = 0;
return(ret);
}
/**
* xmlCharEncInFunc:
* @handler: char encoding transformation data structure
* @out: an xmlBuffer for the output.
* @in: an xmlBuffer for the input
*
* Generic front-end for the encoding handler input function
*
* Returns the number of byte written if success, or
* -1 general error
* -2 if the transcoding fails (for *in is not valid utf8 string or
* the result of transformation can't fit into the encoding we want), or
*/
int
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
xmlBufferPtr in)
{
int ret = -2;
int written;
int toconv;
if (handler == NULL)
return (-1);
if (out == NULL)
return (-1);
if (in == NULL)
return (-1);
toconv = in->use;
if (toconv == 0)
return (0);
written = out->size - out->use;
if (toconv * 2 >= written) {
xmlBufferGrow(out, out->size + toconv * 2);
written = out->size - out->use - 1;
}
if (handler->input != NULL) {
ret = handler->input(&out->content[out->use], &written,
in->content, &toconv);
xmlBufferShrink(in, toconv);
out->use += written;
out->content[out->use] = 0;
}
#ifdef LIBXML_ICONV_ENABLED
else if (handler->iconv_in != NULL) {
ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
&written, in->content, &toconv);
xmlBufferShrink(in, toconv);
out->use += written;
out->content[out->use] = 0;
if (ret == -1)
ret = -3;
}
#endif /* LIBXML_ICONV_ENABLED */
switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of input\n",
toconv, written);
#endif
break;
case -1:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of input, %d left\n",
toconv, written, in->use);
#endif
break;
case -3:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of input, %d left\n",
toconv, written, in->use);
#endif
break;
case -2:
xmlGenericError(xmlGenericErrorContext,
"input conversion failed due to input error\n");
xmlGenericError(xmlGenericErrorContext,
"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
in->content[0], in->content[1],
in->content[2], in->content[3]);
}
/*
* Ignore when input buffer is not on a boundary
*/
if (ret == -3)
ret = 0;
return (written);
}
/**
* xmlCharEncOutFunc:
* @handler: char enconding transformation data structure
* @out: an xmlBuffer for the output.
* @in: an xmlBuffer for the input
*
* Generic front-end for the encoding handler output function
* a first call with @in == NULL has to be made firs to initiate the
* output in case of non-stateless encoding needing to initiate their
* state or the output (like the BOM in UTF16).
* In case of UTF8 sequence conversion errors for the given encoder,
* the content will be automatically remapped to a CharRef sequence.
*
* Returns the number of byte written if success, or
* -1 general error
* -2 if the transcoding fails (for *in is not valid utf8 string or
* the result of transformation can't fit into the encoding we want), or
*/
int
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in) {
int ret = -2;
int written;
int writtentot = 0;
int toconv;
int output = 0;
if (handler == NULL) return(-1);
if (out == NULL) return(-1);
retry:
written = out->size - out->use;
if (written > 0)
written--; /* Gennady: count '/0' */
/*
* First specific handling of in = NULL, i.e. the initialization call
*/
if (in == NULL) {
toconv = 0;
if (handler->output != NULL) {
ret = handler->output(&out->content[out->use], &written,
NULL, &toconv);
if (ret >= 0) { /* Gennady: check return value */
out->use += written;
out->content[out->use] = 0;
}
}
#ifdef LIBXML_ICONV_ENABLED
else if (handler->iconv_out != NULL) {
ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
&written, NULL, &toconv);
out->use += written;
out->content[out->use] = 0;
}
#endif /* LIBXML_ICONV_ENABLED */
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"initialized encoder\n");
#endif
return(0);
}
/*
* Conversion itself.
*/
toconv = in->use;
if (toconv == 0)
return(0);
if (toconv * 2 >= written) {
xmlBufferGrow(out, toconv * 2);
written = out->size - out->use - 1;
}
if (handler->output != NULL) {
ret = handler->output(&out->content[out->use], &written,
in->content, &toconv);
xmlBufferShrink(in, toconv);
out->use += written;
writtentot += written;
out->content[out->use] = 0;
}
#ifdef LIBXML_ICONV_ENABLED
else if (handler->iconv_out != NULL) {
ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
&written, in->content, &toconv);
xmlBufferShrink(in, toconv);
out->use += written;
writtentot += written;
out->content[out->use] = 0;
if (ret == -1) {
if (written > 0) {
/*
* Can be a limitation of iconv
*/
goto retry;
}
ret = -3;
}
}
#endif /* LIBXML_ICONV_ENABLED */
else {
xmlGenericError(xmlGenericErrorContext,
"xmlCharEncOutFunc: no output function !\n");
return(-1);
}
if (ret >= 0) output += ret;
/*
* Attempt to handle error cases
*/
switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of output\n",
toconv, written);
#endif
break;
case -1:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"output conversion failed by lack of space\n");
#endif
break;
case -3:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
toconv, written, in->use);
#endif
break;
case -2: {
int len = in->use;
const xmlChar *utf = (const xmlChar *) in->content;
int cur;
cur = xmlGetUTF8Char(utf, &len);
if (cur > 0) {
xmlChar charref[20];
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"handling output conversion error\n");
xmlGenericError(xmlGenericErrorContext,
"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
in->content[0], in->content[1],
in->content[2], in->content[3]);
#endif
/*
* Removes the UTF8 sequence, and replace it by a charref
* and continue the transcoding phase, hoping the error
* did not mangle the encoder state.
*/
snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
xmlBufferShrink(in, len);
xmlBufferAddHead(in, charref, -1);
goto retry;
} else {
xmlGenericError(xmlGenericErrorContext,
"output conversion failed due to conv error\n");
xmlGenericError(xmlGenericErrorContext,
"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
in->content[0], in->content[1],
in->content[2], in->content[3]);
in->content[0] = ' ';
}
break;
}
}
return(ret);
}
/**
* xmlCharEncCloseFunc:
* @handler: char enconding transformation data structure
*
* Generic front-end for encoding handler close function
*
* Returns 0 if success, or -1 in case of error
*/
int
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
int ret = 0;
if (handler == NULL) return(-1);
if (handler->name == NULL) return(-1);
#ifdef LIBXML_ICONV_ENABLED
/*
* Iconv handlers can be used only once, free the whole block.
* and the associated icon resources.
*/
if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
if (handler->name != NULL)
xmlFree(handler->name);
handler->name = NULL;
if (handler->iconv_out != NULL) {
if (iconv_close(handler->iconv_out))
ret = -1;
handler->iconv_out = NULL;
}
if (handler->iconv_in != NULL) {
if (iconv_close(handler->iconv_in))
ret = -1;
handler->iconv_in = NULL;
}
xmlFree(handler);
}
#endif /* LIBXML_ICONV_ENABLED */
#ifdef DEBUG_ENCODING
if (ret)
xmlGenericError(xmlGenericErrorContext,
"failed to close the encoding handler\n");
else
xmlGenericError(xmlGenericErrorContext,
"closed the encoding handler\n");
#endif
return(ret);
}
/**
* xmlByteConsumed:
* @ctxt: an XML parser context
*
* This function provides the current index of the parser relative
* to the start of the current entity. This function is computed in
* bytes from the beginning starting at zero and finishing at the
* size in byte of the file if parsing a file. The function is
* of constant cost if the input is UTF-8 but can be costly if run
* on non-UTF-8 input.
*
* Returns the index in bytes from the beginning of the entity or -1
* in case the index could not be computed.
*/
long
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
xmlParserInputPtr in;
if (ctxt == NULL) return(-1);
in = ctxt->input;
if (in == NULL) return(-1);
if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
unsigned int unused = 0;
xmlCharEncodingHandler * handler = in->buf->encoder;
/*
* Encoding conversion, compute the number of unused original
* bytes from the input not consumed and substract that from
* the raw consumed value, this is not a cheap operation
*/
if (in->end - in->cur > 0) {
unsigned char convbuf[32000];
const unsigned char *cur = (const unsigned char *)in->cur;
int toconv = in->end - in->cur, written = 32000;
int ret;
if (handler->output != NULL) {
do {
toconv = in->end - cur;
written = 32000;
ret = handler->output(&co
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -