📄 encoding.c
字号:
toconv, written); break; case -1: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", toconv, written, in->use); break; case -2: xmlGenericError(xmlGenericErrorContext, "input conversion failed due to input error\n"); break; case -3: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", toconv, written, in->use); break; default: xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); }#endif /* DEBUG_ENCODING */ /* * Ignore when input buffer is not on a boundary */ if (ret == -3) ret = 0; if (ret == -1) ret = 0; return(ret);}/** * xmlCharEncInFunc: * @handler: char encoding transformation data structure * @out: an xmlBuffer for the output. * @in: an xmlBuffer for the input * * Generic front-end for the encoding handler input function * * Returns the number of byte written if success, or * -1 general error * -2 if the transcoding fails (for *in is not valid utf8 string or * the result of transformation can't fit into the encoding we want), or */intxmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferPtr in){ int ret = -2; int written; int toconv; if (handler == NULL) return (-1); if (out == NULL) return (-1); if (in == NULL) return (-1); toconv = in->use; if (toconv == 0) return (0); written = out->size - out->use; if (toconv * 2 >= written) { xmlBufferGrow(out, out->size + toconv * 2); written = out->size - out->use - 1; } if (handler->input != NULL) { ret = handler->input(&out->content[out->use], &written, in->content, &toconv); xmlBufferShrink(in, toconv); out->use += written; out->content[out->use] = 0; }#ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_in != NULL) { ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], &written, in->content, &toconv); xmlBufferShrink(in, toconv); out->use += written; out->content[out->use] = 0; if (ret == -1) ret = -3; }#endif /* LIBXML_ICONV_ENABLED */ switch (ret) { case 0:#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "converted %d bytes to %d bytes of input\n", toconv, written);#endif break; case -1:#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "converted %d bytes to %d bytes of input, %d left\n", toconv, written, in->use);#endif break; case -3:#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "converted %d bytes to %d bytes of input, %d left\n", toconv, written, in->use);#endif break; case -2: { char buf[50]; snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", in->content[0], in->content[1], in->content[2], in->content[3]); buf[49] = 0; xmlEncodingErr(XML_I18N_CONV_FAILED, "input conversion failed due to input error, bytes %s\n", buf); } } /* * Ignore when input buffer is not on a boundary */ if (ret == -3) ret = 0; return (written? written : ret);}/** * xmlCharEncOutFunc: * @handler: char enconding transformation data structure * @out: an xmlBuffer for the output. * @in: an xmlBuffer for the input * * Generic front-end for the encoding handler output function * a first call with @in == NULL has to be made firs to initiate the * output in case of non-stateless encoding needing to initiate their * state or the output (like the BOM in UTF16). * In case of UTF8 sequence conversion errors for the given encoder, * the content will be automatically remapped to a CharRef sequence. * * Returns the number of byte written if success, or * -1 general error * -2 if the transcoding fails (for *in is not valid utf8 string or * the result of transformation can't fit into the encoding we want), or */intxmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in) { int ret = -2; int written; int writtentot = 0; int toconv; int output = 0; if (handler == NULL) return(-1); if (out == NULL) return(-1);retry: written = out->size - out->use; if (written > 0) written--; /* Gennady: count '/0' */ /* * First specific handling of in = NULL, i.e. the initialization call */ if (in == NULL) { toconv = 0; if (handler->output != NULL) { ret = handler->output(&out->content[out->use], &written, NULL, &toconv); if (ret >= 0) { /* Gennady: check return value */ out->use += written; out->content[out->use] = 0; } }#ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_out != NULL) { ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], &written, NULL, &toconv); out->use += written; out->content[out->use] = 0; }#endif /* LIBXML_ICONV_ENABLED */#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "initialized encoder\n");#endif return(0); } /* * Conversion itself. */ toconv = in->use; if (toconv == 0) return(0); if (toconv * 2 >= written) { xmlBufferGrow(out, toconv * 2); written = out->size - out->use - 1; } if (handler->output != NULL) { ret = handler->output(&out->content[out->use], &written, in->content, &toconv); xmlBufferShrink(in, toconv); out->use += written; writtentot += written; out->content[out->use] = 0; }#ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_out != NULL) { ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], &written, in->content, &toconv); xmlBufferShrink(in, toconv); out->use += written; writtentot += written; out->content[out->use] = 0; if (ret == -1) { if (written > 0) { /* * Can be a limitation of iconv */ goto retry; } ret = -3; } }#endif /* LIBXML_ICONV_ENABLED */ else { xmlEncodingErr(XML_I18N_NO_OUTPUT, "xmlCharEncOutFunc: no output function !\n", NULL); return(-1); } if (ret >= 0) output += ret; /* * Attempt to handle error cases */ switch (ret) { case 0:#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "converted %d bytes to %d bytes of output\n", toconv, written);#endif break; case -1:#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "output conversion failed by lack of space\n");#endif break; case -3:#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", toconv, written, in->use);#endif break; case -2: { int len = in->use; const xmlChar *utf = (const xmlChar *) in->content; int cur; cur = xmlGetUTF8Char(utf, &len); if (cur > 0) { xmlChar charref[20];#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "handling output conversion error\n"); xmlGenericError(xmlGenericErrorContext, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", in->content[0], in->content[1], in->content[2], in->content[3]);#endif /* * Removes the UTF8 sequence, and replace it by a charref * and continue the transcoding phase, hoping the error * did not mangle the encoder state. */ snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur); xmlBufferShrink(in, len); xmlBufferAddHead(in, charref, -1); goto retry; } else { char buf[50]; snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", in->content[0], in->content[1], in->content[2], in->content[3]); buf[49] = 0; xmlEncodingErr(XML_I18N_CONV_FAILED, "output conversion failed due to conv error, bytes %s\n", buf); if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) in->content[0] = ' '; } break; } } return(ret);}/** * xmlCharEncCloseFunc: * @handler: char enconding transformation data structure * * Generic front-end for encoding handler close function * * Returns 0 if success, or -1 in case of error */intxmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { int ret = 0; if (handler == NULL) return(-1); if (handler->name == NULL) return(-1);#ifdef LIBXML_ICONV_ENABLED /* * Iconv handlers can be used only once, free the whole block. * and the associated icon resources. */ if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { if (handler->name != NULL) xmlFree(handler->name); handler->name = NULL; if (handler->iconv_out != NULL) { if (iconv_close(handler->iconv_out)) ret = -1; handler->iconv_out = NULL; } if (handler->iconv_in != NULL) { if (iconv_close(handler->iconv_in)) ret = -1; handler->iconv_in = NULL; } xmlFree(handler); }#endif /* LIBXML_ICONV_ENABLED */#ifdef DEBUG_ENCODING if (ret) xmlGenericError(xmlGenericErrorContext, "failed to close the encoding handler\n"); else xmlGenericError(xmlGenericErrorContext, "closed the encoding handler\n");#endif return(ret);}/** * xmlByteConsumed: * @ctxt: an XML parser context * * This function provides the current index of the parser relative * to the start of the current entity. This function is computed in * bytes from the beginning starting at zero and finishing at the * size in byte of the file if parsing a file. The function is * of constant cost if the input is UTF-8 but can be costly if run * on non-UTF-8 input. * * Returns the index in bytes from the beginning of the entity or -1 * in case the index could not be computed. */longxmlByteConsumed(xmlParserCtxtPtr ctxt) { xmlParserInputPtr in; if (ctxt == NULL) return(-1); in = ctxt->input; if (in == NULL) return(-1); if ((in->buf != NULL) && (in->buf->encoder != NULL)) { unsigned int unused = 0; xmlCharEncodingHandler * handler = in->buf->encoder; /* * Encoding conversion, compute the number of unused original * bytes from the input not consumed and substract that from * the raw consumed value, this is not a cheap operation */ if (in->end - in->cur > 0) { unsigned char convbuf[32000]; const unsigned char *cur = (const unsigned char *)in->cur; int toconv = in->end - in->cur, written = 32000; int ret; if (handler->output != NULL) { do { toconv = in->end - cur; written = 32000; ret = handler->output(&convbuf[0], &written, cur, &toconv); if (ret == -1) return(-1); unused += written; cur += toconv; } while (ret == -2);#ifdef LIBXML_ICONV_ENABLED } else if (handler->iconv_out != NULL) { do { toconv = in->end - cur; written = 32000; ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], &written, cur, &toconv); if (ret < 0) { if (written > 0) ret = -2; else return(-1); } unused += written; cur += toconv; } while (ret == -2);#endif } else { /* could not find a converter */ return(-1); } } if (in->buf->rawconsumed < unused) return(-1); return(in->buf->rawconsumed - unused); } return(in->consumed + (in->cur - in->base));}#ifndef LIBXML_ICONV_ENABLED#ifdef LIBXML_ISO8859X_ENABLED/** * UTF8ToISO8859x: * @out: a pointer to an array of bytes to store the result * @outlen: the length of @out * @in: a pointer to an array of UTF-8 chars * @inlen: the length of @in * @xlattable: the 2-level transcoding table * * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* * block of chars out. * * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. * The value of @outlen after return is the number of ocetes consumed. */static intUTF8ToISO8859x(unsigned char* out, int *outlen, const unsigned char* in, int *inlen, unsigned char const *xlattable) { const u
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -