⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 encoding.c.svn-base

📁 这是一个用于解析xml文件的类库。使用这个类库
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
/** * xmlCleanupCharEncodingHandlers: * * Cleanup the memory allocated for the char encoding support, it * unregisters all the encoding handlers and the aliases. */voidxmlCleanupCharEncodingHandlers(void) {    xmlCleanupEncodingAliases();    if (handlers == NULL) return;    for (;nbCharEncodingHandler > 0;) {        nbCharEncodingHandler--;	if (handlers[nbCharEncodingHandler] != NULL) {	    if (handlers[nbCharEncodingHandler]->name != NULL)		xmlFree(handlers[nbCharEncodingHandler]->name);	    xmlFree(handlers[nbCharEncodingHandler]);	}    }    xmlFree(handlers);    handlers = NULL;    nbCharEncodingHandler = 0;    xmlDefaultCharEncodingHandler = NULL;}/** * xmlRegisterCharEncodingHandler: * @handler:  the xmlCharEncodingHandlerPtr handler block * * Register the char encoding handler, surprising, isn't it ? */voidxmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {    if (handlers == NULL) xmlInitCharEncodingHandlers();    if (handler == NULL) {        xmlGenericError(xmlGenericErrorContext,		"xmlRegisterCharEncodingHandler: NULL handler !\n");	return;    }    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {        xmlGenericError(xmlGenericErrorContext, 	"xmlRegisterCharEncodingHandler: Too many handler registered\n");        xmlGenericError(xmlGenericErrorContext,		"\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);	return;    }    handlers[nbCharEncodingHandler++] = handler;}/** * xmlGetCharEncodingHandler: * @enc:  an xmlCharEncoding value. * * Search in the registered set the handler able to read/write that encoding. * * Returns the handler or NULL if not found */xmlCharEncodingHandlerPtrxmlGetCharEncodingHandler(xmlCharEncoding enc) {    xmlCharEncodingHandlerPtr handler;    if (handlers == NULL) xmlInitCharEncodingHandlers();    switch (enc) {        case XML_CHAR_ENCODING_ERROR:	    return(NULL);        case XML_CHAR_ENCODING_NONE:	    return(NULL);        case XML_CHAR_ENCODING_UTF8:	    return(NULL);        case XML_CHAR_ENCODING_UTF16LE:	    return(xmlUTF16LEHandler);        case XML_CHAR_ENCODING_UTF16BE:	    return(xmlUTF16BEHandler);        case XML_CHAR_ENCODING_EBCDIC:            handler = xmlFindCharEncodingHandler("EBCDIC");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("ebcdic");            if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_UCS4BE:            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("UCS-4");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("UCS4");            if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_UCS4LE:            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("UCS-4");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("UCS4");            if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_UCS4_2143:	    break;        case XML_CHAR_ENCODING_UCS4_3412:	    break;        case XML_CHAR_ENCODING_UCS2:            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("UCS-2");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("UCS2");            if (handler != NULL) return(handler);	    break;	    /*	     * We used to keep ISO Latin encodings native in the	     * generated data. This led to so many problems that	     * this has been removed. One can still change this	     * back by registering no-ops encoders for those	     */        case XML_CHAR_ENCODING_8859_1:	    handler = xmlFindCharEncodingHandler("ISO-8859-1");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_8859_2:	    handler = xmlFindCharEncodingHandler("ISO-8859-2");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_8859_3:	    handler = xmlFindCharEncodingHandler("ISO-8859-3");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_8859_4:	    handler = xmlFindCharEncodingHandler("ISO-8859-4");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_8859_5:	    handler = xmlFindCharEncodingHandler("ISO-8859-5");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_8859_6:	    handler = xmlFindCharEncodingHandler("ISO-8859-6");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_8859_7:	    handler = xmlFindCharEncodingHandler("ISO-8859-7");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_8859_8:	    handler = xmlFindCharEncodingHandler("ISO-8859-8");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_8859_9:	    handler = xmlFindCharEncodingHandler("ISO-8859-9");	    if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_2022_JP:            handler = xmlFindCharEncodingHandler("ISO-2022-JP");            if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_SHIFT_JIS:            handler = xmlFindCharEncodingHandler("SHIFT-JIS");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("SHIFT_JIS");            if (handler != NULL) return(handler);            handler = xmlFindCharEncodingHandler("Shift_JIS");            if (handler != NULL) return(handler);	    break;        case XML_CHAR_ENCODING_EUC_JP:            handler = xmlFindCharEncodingHandler("EUC-JP");            if (handler != NULL) return(handler);	    break;	default: 	    break;    }    #ifdef DEBUG_ENCODING    xmlGenericError(xmlGenericErrorContext,	    "No handler found for encoding %d\n", enc);#endif    return(NULL);}/** * xmlFindCharEncodingHandler: * @name:  a string describing the char encoding. * * Search in the registered set the handler able to read/write that encoding. * * Returns the handler or NULL if not found */xmlCharEncodingHandlerPtrxmlFindCharEncodingHandler(const char *name) {    const char *nalias;    const char *norig;    xmlCharEncoding alias;#ifdef LIBXML_ICONV_ENABLED    xmlCharEncodingHandlerPtr enc;    iconv_t icv_in, icv_out;#endif /* LIBXML_ICONV_ENABLED */    char upper[100];    int i;    if (handlers == NULL) xmlInitCharEncodingHandlers();    if (name == NULL) return(xmlDefaultCharEncodingHandler);    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);    /*     * Do the alias resolution     */    norig = name;    nalias = xmlGetEncodingAlias(name);    if (nalias != NULL)	name = nalias;    /*     * Check first for directly registered encoding names     */    for (i = 0;i < 99;i++) {        upper[i] = toupper(name[i]);	if (upper[i] == 0) break;    }    upper[i] = 0;    for (i = 0;i < nbCharEncodingHandler; i++)        if (!strcmp(upper, handlers[i]->name)) {#ifdef DEBUG_ENCODING            xmlGenericError(xmlGenericErrorContext,		    "Found registered handler for encoding %s\n", name);#endif	    return(handlers[i]);	}#ifdef LIBXML_ICONV_ENABLED    /* check whether iconv can handle this */    icv_in = iconv_open("UTF-8", name);    icv_out = iconv_open(name, "UTF-8");    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {	    enc = (xmlCharEncodingHandlerPtr)	          xmlMalloc(sizeof(xmlCharEncodingHandler));	    if (enc == NULL) {	        iconv_close(icv_in);	        iconv_close(icv_out);		return(NULL);	    }	    enc->name = xmlMemStrdup(name);	    enc->input = NULL;	    enc->output = NULL;	    enc->iconv_in = icv_in;	    enc->iconv_out = icv_out;#ifdef DEBUG_ENCODING            xmlGenericError(xmlGenericErrorContext,		    "Found iconv handler for encoding %s\n", name);#endif	    return enc;    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {	    xmlGenericError(xmlGenericErrorContext,		    "iconv : problems with filters for '%s'\n", name);    }#endif /* LIBXML_ICONV_ENABLED */#ifdef DEBUG_ENCODING    xmlGenericError(xmlGenericErrorContext,	    "No handler found for encoding %s\n", name);#endif    /*     * Fallback using the canonical names     */    alias = xmlParseCharEncoding(norig);    if (alias != XML_CHAR_ENCODING_ERROR) {        const char* canon;        canon = xmlGetCharEncodingName(alias);        if ((canon != NULL) && (strcmp(name, canon))) {	    return(xmlFindCharEncodingHandler(canon));        }    }    /* If "none of the above", give up */    return(NULL);}/************************************************************************ *									* *		ICONV based generic conversion functions		* *									* ************************************************************************/#ifdef LIBXML_ICONV_ENABLED/** * xmlIconvWrapper: * @cd:		iconv converter data structure * @out:  a pointer to an array of bytes to store the result * @outlen:  the length of @out * @in:  a pointer to an array of ISO Latin 1 chars * @inlen:  the length of @in * * Returns 0 if success, or  *     -1 by lack of space, or *     -2 if the transcoding fails (for *in is not valid utf8 string or *        the result of transformation can't fit into the encoding we want), or *     -3 if there the last byte can't form a single output char. *      * The value of @inlen after return is the number of octets consumed *     as the return value is positive, else unpredictable. * The value of @outlen after return is the number of ocetes consumed. */static intxmlIconvWrapper(iconv_t cd,    unsigned char *out, int *outlen,    const unsigned char *in, int *inlen) {    size_t icv_inlen = *inlen, icv_outlen = *outlen;    const char *icv_in = (const char *) in;    char *icv_out = (char *) out;    int ret;    ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);    if (in != NULL) {        *inlen -= icv_inlen;        *outlen -= icv_outlen;    } else {        *inlen = 0;        *outlen = 0;    }    if ((icv_inlen != 0) || (ret == -1)) {#ifdef EILSEQ        if (errno == EILSEQ) {            return -2;        } else#endif#ifdef E2BIG        if (errno == E2BIG) {            return -1;        } else#endif#ifdef EINVAL        if (errno == EINVAL) {            return -3;        } else#endif        {            return -3;        }    }    return 0;}#endif /* LIBXML_ICONV_ENABLED *//************************************************************************ *									* *		The real API used by libxml for on-the-fly conversion	* *									* ************************************************************************//** * xmlCharEncFirstLine: * @handler:	char enconding transformation data structure * @out:  an xmlBuffer for the output. * @in:  an xmlBuffer for the input *      * Front-end for the encoding handler input function, but handle only * the very first line, i.e. limit itself to 45 chars. *      * Returns the number of byte written if success, or  *     -1 general error *     -2 if the transcoding fails (for *in is not valid utf8 string or *        the result of transformation can't fit into the encoding we want), or */intxmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,                 xmlBufferPtr in) {    int ret = -2;    int written;    int toconv;    if (handler == NULL) return(-1);    if (out == NULL) return(-1);    if (in == NULL) return(-1);    written = out->size - out->use;    toconv = in->use;    if (toconv * 2 >= written) {        xmlBufferGrow(out, toconv);	written = out->size - out->use - 1;    }    /*     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38     * 45 chars should be sufficient to reach the end of the encoding     * declaration without going too far inside the document content.     */    written = 45;    if (handler->input != NULL) {	ret = handler->input(&out->content[out->use], &written,	                     in->content, &toconv);	xmlBufferShrink(in, toconv);	out->use += written;	out->content[out->use] = 0;    }#ifdef LIBXML_ICONV_ENABLED    else if (handler->iconv_in != NULL) {	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],	                      &written, in->content, &toconv);	xmlBufferShrink(in, toconv);	out->use += written;	out->content[out->use] = 0;	if (ret == -1) ret = -3;    }#endif /* LIBXML_ICONV_ENABLED */#ifdef DEBUG_ENCODING    switch (ret) {        case 0:	    xmlGenericError(xmlGenericErrorContext,		    "converted %d bytes to %d bytes of input\n",	            toconv, written);	    break;        case -1:	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",	            toconv, written, in->use);	    break;        case -2:	    xmlGenericError(xmlGenericErrorContext,		    "input conversion failed due to input error\n");	    break;        case -3:	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",	            toconv, written, in->use);	    break;	default:	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);    }#endif /* DEBUG_ENCODING */    /*     * Ignore when input buffer is not on a boundary     */    if (ret == -3) ret = 0;    if (ret == -1) ret = 0;    return(ret);}/** * xmlCharEncInFunc: * @handler:	char encoding transformation data structure * @out:  an xmlBuffer for the output. * @in:  an xmlBuffer for the input *      * Generic front-end for the encoding handler input function *     

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -