📄 parserinternals.c

📁 libxml,在UNIX/LINUX下非常重要的一个库,为XML相关应用提供方便.目前上载的是最新版本,若要取得最新版本,请参考里面的readme.
💻 C
📖 第 1 页 / 共 5 页
字号:
    {        char buffer[150];	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",			ctxt->input->cur[0], ctxt->input->cur[1],			ctxt->input->cur[2], ctxt->input->cur[3]);	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,		     "Input is not proper UTF-8, indicate encoding !\n%s",		     BAD_CAST buffer, NULL);    }    *len = 1;    return ((int) *cur);}/** * xmlCopyCharMultiByte: * @out:  pointer to an array of xmlChar * @val:  the char value * * append the char value in the array  * * Returns the number of xmlChar written */intxmlCopyCharMultiByte(xmlChar *out, int val) {    if (out == NULL) return(0);    /*     * We are supposed to handle UTF8, check it's valid     * From rfc2044: encoding of the Unicode values on UTF-8:     *     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)     * 0000 0000-0000 007F   0xxxxxxx     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx      */    if  (val >= 0x80) {	xmlChar *savedout = out;	int bits;	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }	else {	    xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,		    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",			      val);	    return(0);	}	for ( ; bits >= 0; bits-= 6)	    *out++= ((val >> bits) & 0x3F) | 0x80 ;	return (out - savedout);    }    *out = (xmlChar) val;    return 1;}/** * xmlCopyChar: * @len:  Ignored, compatibility * @out:  pointer to an array of xmlChar * @val:  the char value * * append the char value in the array  * * Returns the number of xmlChar written */intxmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {    if (out == NULL) return(0);    /* the len parameter is ignored */    if  (val >= 0x80) {	return(xmlCopyCharMultiByte (out, val));    }    *out = (xmlChar) val;    return 1;}/************************************************************************ *									* *		Commodity functions to switch encodings			* *									* ************************************************************************//** * xmlSwitchEncoding: * @ctxt:  the parser context * @enc:  the encoding value (number) * * change the input functions when discovering the character encoding * of a given entity. * * Returns 0 in case of success, -1 otherwise */intxmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc){    xmlCharEncodingHandlerPtr handler;    if (ctxt == NULL) return(-1);    switch (enc) {	case XML_CHAR_ENCODING_ERROR:	    __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,	                   "encoding unknown\n", NULL, NULL);	    break;	case XML_CHAR_ENCODING_NONE:	    /* let's assume it's UTF-8 without the XML decl */	    ctxt->charset = XML_CHAR_ENCODING_UTF8;	    return(0);	case XML_CHAR_ENCODING_UTF8:	    /* default encoding, no conversion should be needed */	    ctxt->charset = XML_CHAR_ENCODING_UTF8;	    /*	     * Errata on XML-1.0 June 20 2001	     * Specific handling of the Byte Order Mark for	     * UTF-8	     */	    if ((ctxt->input != NULL) &&		(ctxt->input->cur[0] == 0xEF) &&		(ctxt->input->cur[1] == 0xBB) &&		(ctxt->input->cur[2] == 0xBF)) {		ctxt->input->cur += 3;	    }	    return(0);    case XML_CHAR_ENCODING_UTF16LE:    case XML_CHAR_ENCODING_UTF16BE:        /*The raw input characters are encoded         *in UTF-16. As we expect this function         *to be called after xmlCharEncInFunc, we expect         *ctxt->input->cur to contain UTF-8 encoded characters.         *So the raw UTF16 Byte Order Mark         *has also been converted into         *an UTF-8 BOM. Let's skip that BOM.         */        if ((ctxt->input != NULL) &&            (ctxt->input->cur[0] == 0xEF) &&            (ctxt->input->cur[1] == 0xBB) &&            (ctxt->input->cur[2] == 0xBF)) {            ctxt->input->cur += 3;        }	break ;	default:	    break;    }    handler = xmlGetCharEncodingHandler(enc);    if (handler == NULL) {	/*	 * Default handlers.	 */	switch (enc) {	    case XML_CHAR_ENCODING_ERROR:		__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,			       "encoding unknown\n", NULL, NULL);		break;	    case XML_CHAR_ENCODING_NONE:		/* let's assume it's UTF-8 without the XML decl */		ctxt->charset = XML_CHAR_ENCODING_UTF8;		return(0);	    case XML_CHAR_ENCODING_UTF8:	    case XML_CHAR_ENCODING_ASCII:		/* default encoding, no conversion should be needed */		ctxt->charset = XML_CHAR_ENCODING_UTF8;		return(0);	    case XML_CHAR_ENCODING_UTF16LE:		break;	    case XML_CHAR_ENCODING_UTF16BE:		break;	    case XML_CHAR_ENCODING_UCS4LE:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "USC4 little endian", NULL);		break;	    case XML_CHAR_ENCODING_UCS4BE:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "USC4 big endian", NULL);		break;	    case XML_CHAR_ENCODING_EBCDIC:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "EBCDIC", NULL);		break;	    case XML_CHAR_ENCODING_UCS4_2143:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "UCS4 2143", NULL);		break;	    case XML_CHAR_ENCODING_UCS4_3412:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "UCS4 3412", NULL);		break;	    case XML_CHAR_ENCODING_UCS2:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "UCS2", NULL);		break;	    case XML_CHAR_ENCODING_8859_1:	    case XML_CHAR_ENCODING_8859_2:	    case XML_CHAR_ENCODING_8859_3:	    case XML_CHAR_ENCODING_8859_4:	    case XML_CHAR_ENCODING_8859_5:	    case XML_CHAR_ENCODING_8859_6:	    case XML_CHAR_ENCODING_8859_7:	    case XML_CHAR_ENCODING_8859_8:	    case XML_CHAR_ENCODING_8859_9:		/*		 * We used to keep the internal content in the		 * document encoding however this turns being unmaintainable		 * So xmlGetCharEncodingHandler() will return non-null		 * values for this now.		 */		if ((ctxt->inputNr == 1) &&		    (ctxt->encoding == NULL) &&		    (ctxt->input->encoding != NULL)) {		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);		}		ctxt->charset = enc;		return(0);	    case XML_CHAR_ENCODING_2022_JP:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "ISO-2022-JP", NULL);		break;	    case XML_CHAR_ENCODING_SHIFT_JIS:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "Shift_JIS", NULL);		break;	    case XML_CHAR_ENCODING_EUC_JP:		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,			       "encoding not supported %s\n",			       BAD_CAST "EUC-JP", NULL);		break;	}    }    if (handler == NULL)	return(-1);    ctxt->charset = XML_CHAR_ENCODING_UTF8;    return(xmlSwitchToEncoding(ctxt, handler));}/** * xmlSwitchInputEncoding: * @ctxt:  the parser context * @input:  the input stream * @handler:  the encoding handler * * change the input functions when discovering the character encoding * of a given entity. * * Returns 0 in case of success, -1 otherwise */intxmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,                       xmlCharEncodingHandlerPtr handler){    int nbchars;    if (handler == NULL)        return (-1);    if (input == NULL)        return (-1);    if (input->buf != NULL) {        if (input->buf->encoder != NULL) {            /*             * Check in case the auto encoding detetection triggered             * in already.             */            if (input->buf->encoder == handler)                return (0);            /*             * "UTF-16" can be used for both LE and BE             if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,             BAD_CAST "UTF-16", 6)) &&             (!xmlStrncmp(BAD_CAST handler->name,             BAD_CAST "UTF-16", 6))) {             return(0);             }             */            /*             * Note: this is a bit dangerous, but that's what it             * takes to use nearly compatible signature for different             * encodings.             */            xmlCharEncCloseFunc(input->buf->encoder);            input->buf->encoder = handler;            return (0);        }        input->buf->encoder = handler;        /*         * Is there already some content down the pipe to convert ?         */        if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {            int processed;	    unsigned int use;            /*             * Specific handling of the Byte Order Mark for              * UTF-16             */            if ((handler->name != NULL) &&                (!strcmp(handler->name, "UTF-16LE") ||                 !strcmp(handler->name, "UTF-16")) &&                (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {                input->cur += 2;            }            if ((handler->name != NULL) &&                (!strcmp(handler->name, "UTF-16BE")) &&                (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {                input->cur += 2;            }            /*             * Errata on XML-1.0 June 20 2001             * Specific handling of the Byte Order Mark for             * UTF-8             */            if ((handler->name != NULL) &&                (!strcmp(handler->name, "UTF-8")) &&                (input->cur[0] == 0xEF) &&                (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {                input->cur += 3;            }            /*             * Shrink the current input buffer.             * Move it as the raw buffer and create a new input buffer             */            processed = input->cur - input->base;            xmlBufferShrink(input->buf->buffer, processed);            input->buf->raw = input->buf->buffer;            input->buf->buffer = xmlBufferCreate();	    input->buf->rawconsumed = processed;	    use = input->buf->raw->use;            if (ctxt->html) {                /*                 * convert as much as possible of the buffer                 */                nbchars = xmlCharEncInFunc(input->buf->encoder,                                           input->buf->buffer,                                           input->buf->raw);            } else {                /*                 * convert just enough to get                 * '<?xml version="1.0" encoding="xxx"?>'                 * parsed with the autodetected encoding                 * into the parser reading buffer.                 */                nbchars = xmlCharEncFirstLine(input->buf->encoder,                                              input->buf->buffer,                                              input->buf->raw);            }            if (nbchars < 0) {                xmlErrInternal(ctxt,                               "switching encoding: encoder error\n",                               NULL);                return (-1);            }	    input->buf->rawconsumed += use - input->buf->raw->use;            input->base = input->cur = input->buf->buffer->content;            input->end = &input->base[input->buf->buffer->use];        }        return (0);    } else {        if ((input->length == 0) || (input->buf == NULL)) {            /*             * When parsing a static memory array one must know the             * size to be able to convert the buffer.             */            xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);            return (-1);        } else {            int processed;            /*             * Shrink the current input buffer.             * Move it as the raw buffer and create a new input buffer             */            processed = input->cur - input->base;            input->buf->raw = xmlBufferCreate();            xmlBufferAdd(input->buf->raw, input->cur,                         input->length - processed);            input->buf->buffer = xmlBufferCreate();            /*             * convert as much as possible of the raw input             * to the parser reading buffer.             */            nbchars = xmlCharEncInFunc(input->buf->encoder,                                       input->buf->buffer,                                       input->buf->raw);            if (nbchars < 0) {                xmlErrInternal(ctxt,                               "switching encoding: encoder error\n",                               NULL);                return (-1);            }            /*             * Conversion succeeded, get rid of the old buffer             */            if ((input->free != NULL) && (input->base != NULL))                input->free((xmlChar *) input->base);            input->base = input->cur = input->buf->buffer->content;            input->end = &input->base[input->buf->buffer->use];        }    }    return (0);}/** * xmlSwitchToEncoding: * @ctxt:  the parser context * @handler:  the encoding handler
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -