📄 xmlstring.c.svn-base

📁 这是一个用于解析xml文件的类库。使用这个类库
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
上一页 12
    if ((str2 == NULL) || (len == 0))        return(xmlStrdup(str1));    if (str1 == NULL)        return(xmlStrndup(str2, len));    size = xmlStrlen(str1);    ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));    if (ret == NULL) {        xmlErrMemory(NULL, NULL);        return(xmlStrndup(str1, size));    }    memcpy(ret, str1, size * sizeof(xmlChar));    memcpy(&ret[size], str2, len * sizeof(xmlChar));    ret[size + len] = 0;    return(ret);}/** * xmlStrcat: * @cur:  the original xmlChar * array * @add:  the xmlChar * array added * * a strcat for array of xmlChar's. Since they are supposed to be * encoded in UTF-8 or an encoding with 8bit based chars, we assume * a termination mark of '0'. * * Returns a new xmlChar * containing the concatenated string. */xmlChar *xmlStrcat(xmlChar *cur, const xmlChar *add) {    const xmlChar *p = add;    if (add == NULL) return(cur);    if (cur == NULL)         return(xmlStrdup(add));    while (*p != 0) p++; /* non input consuming */    return(xmlStrncat(cur, add, p - add));}/** * xmlStrPrintf: * @buf:   the result buffer. * @len:   the result buffer length. * @msg:   the message with printf formatting. * @...:   extra parameters for the message. * * Formats @msg and places result into @buf. * * Returns the number of characters written to @buf or -1 if an error occurs. */int xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {    va_list args;    int ret;        if((buf == NULL) || (msg == NULL)) {        return(-1);    }        va_start(args, msg);    ret = _vsnprintf((char *) buf, len, (const char *) msg, args);    va_end(args);    buf[len - 1] = 0; /* be safe ! */        return(ret);}/** * xmlStrVPrintf: * @buf:   the result buffer. * @len:   the result buffer length. * @msg:   the message with printf formatting. * @ap:    extra parameters for the message. * * Formats @msg and places result into @buf. * * Returns the number of characters written to @buf or -1 if an error occurs. */int xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) {    int ret;        if((buf == NULL) || (msg == NULL)) {        return(-1);    }        ret = _vsnprintf((char *) buf, len, (const char *) msg, ap);    buf[len - 1] = 0; /* be safe ! */        return(ret);}/************************************************************************ *                                                                      * *              Generic UTF8 handling routines                          * *                                                                      * * From rfc2044: encoding of the Unicode values on UTF-8:               * *                                                                      * * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           * * 0000 0000-0000 007F   0xxxxxxx                                       * * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              * * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     * *                                                                      * * I hope we won't use values > 0xFFFF anytime soon !                   * *                                                                      * ************************************************************************//** * xmlUTF8Size: * @utf: pointer to the UTF8 character * * calculates the internal size of a UTF8 character * * returns the numbers of bytes in the character, -1 on format error */intxmlUTF8Size(const xmlChar *utf) {    xmlChar mask;    int len;    if (utf == NULL)        return -1;    if (*utf < 0x80)        return 1;    /* check valid UTF8 character */    if (!(*utf & 0x40))        return -1;    /* determine number of bytes in char */    len = 2;    for (mask=0x20; mask != 0; mask>>=1) {        if (!(*utf & mask))            return len;        len++;    }    return -1;}/** * xmlUTF8Charcmp: * @utf1: pointer to first UTF8 char * @utf2: pointer to second UTF8 char * * compares the two UCS4 values * * returns result of the compare as with xmlStrncmp */intxmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {    if (utf1 == NULL ) {        if (utf2 == NULL)            return 0;        return -1;    }    return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));}/** * xmlUTF8Strlen: * @utf:  a sequence of UTF-8 encoded bytes * * compute the length of an UTF8 string, it doesn't do a full UTF8 * checking of the content of the string. * * Returns the number of characters in the string or -1 in case of error */intxmlUTF8Strlen(const xmlChar *utf) {    int ret = 0;    if (utf == NULL)        return(-1);    while (*utf != 0) {        if (utf[0] & 0x80) {            if ((utf[1] & 0xc0) != 0x80)                return(-1);            if ((utf[0] & 0xe0) == 0xe0) {                if ((utf[2] & 0xc0) != 0x80)                    return(-1);                if ((utf[0] & 0xf0) == 0xf0) {                    if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)                        return(-1);                    utf += 4;                } else {                    utf += 3;                }            } else {                utf += 2;            }        } else {            utf++;        }        ret++;    }    return(ret);}/** * xmlGetUTF8Char: * @utf:  a sequence of UTF-8 encoded bytes * @len:  a pointer to @bytes len * * Read one UTF8 Char from @utf * * Returns the char value or -1 in case of error, and updates *len with the *        number of bytes consumed */intxmlGetUTF8Char(const unsigned char *utf, int *len) {    unsigned int c;    if (utf == NULL)        goto error;    if (len == NULL)        goto error;    if (*len < 1)        goto error;    c = utf[0];    if (c & 0x80) {        if (*len < 2)            goto error;        if ((utf[1] & 0xc0) != 0x80)            goto error;        if ((c & 0xe0) == 0xe0) {            if (*len < 3)                goto error;            if ((utf[2] & 0xc0) != 0x80)                goto error;            if ((c & 0xf0) == 0xf0) {                if (*len < 4)                    goto error;                if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)                    goto error;                *len = 4;                /* 4-byte code */                c = (utf[0] & 0x7) << 18;                c |= (utf[1] & 0x3f) << 12;                c |= (utf[2] & 0x3f) << 6;                c |= utf[3] & 0x3f;            } else {              /* 3-byte code */                *len = 3;                c = (utf[0] & 0xf) << 12;                c |= (utf[1] & 0x3f) << 6;                c |= utf[2] & 0x3f;            }        } else {          /* 2-byte code */            *len = 2;            c = (utf[0] & 0x1f) << 6;            c |= utf[1] & 0x3f;        }    } else {        /* 1-byte code */        *len = 1;    }    return(c);error:    *len = 0;    return(-1);}/** * xmlCheckUTF8: * @utf: Pointer to putative UTF-8 encoded string. * * Checks @utf for being valid UTF-8. @utf is assumed to be * null-terminated. This function is not super-strict, as it will * allow longer UTF-8 sequences than necessary. Note that Java is * capable of producing these sequences if provoked. Also note, this * routine checks for the 4-byte maximum size, but does not check for * 0x10ffff maximum value. * * Return value: true if @utf is valid. **/intxmlCheckUTF8(const unsigned char *utf){    int ix;    unsigned char c;    for (ix = 0; (c = utf[ix]);) {        if (c & 0x80) {            if ((utf[ix + 1] & 0xc0) != 0x80)                return(0);            if ((c & 0xe0) == 0xe0) {                if ((utf[ix + 2] & 0xc0) != 0x80)                    return(0);                if ((c & 0xf0) == 0xf0) {                    if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)                        return(0);                    ix += 4;                    /* 4-byte code */                } else                    /* 3-byte code */                    ix += 3;            } else                /* 2-byte code */                ix += 2;        } else            /* 1-byte code */            ix++;      }      return(1);}/** * xmlUTF8Strsize: * @utf:  a sequence of UTF-8 encoded bytes * @len:  the number of characters in the array * * storage size of an UTF8 string * * Returns the storage size of * the first 'len' characters of ARRAY * */intxmlUTF8Strsize(const xmlChar *utf, int len) {    const xmlChar   *ptr=utf;    xmlChar         ch;    if (len <= 0)        return(0);    while ( len-- > 0) {        if ( !*ptr )            break;        if ( (ch = *ptr++) & 0x80)            while ( (ch<<=1) & 0x80 )                ptr++;    }    return (ptr - utf);}/** * xmlUTF8Strndup: * @utf:  the input UTF8 * * @len:  the len of @utf (in chars) * * a strndup for array of UTF8's * * Returns a new UTF8 * or NULL */xmlChar *xmlUTF8Strndup(const xmlChar *utf, int len) {    xmlChar *ret;    int i;        if ((utf == NULL) || (len < 0)) return(NULL);    i = xmlUTF8Strsize(utf, len);    ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));    if (ret == NULL) {        xmlGenericError(xmlGenericErrorContext,                "malloc of %ld byte failed\n",                (len + 1) * (long)sizeof(xmlChar));        return(NULL);    }    memcpy(ret, utf, i * sizeof(xmlChar));    ret[i] = 0;    return(ret);}/** * xmlUTF8Strpos: * @utf:  the input UTF8 * * @pos:  the position of the desired UTF8 char (in chars) * * a function to provide the equivalent of fetching a * character from a string array * * Returns a pointer to the UTF8 character or NULL */xmlChar *xmlUTF8Strpos(const xmlChar *utf, int pos) {    xmlChar ch;    if (utf == NULL) return(NULL);    if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )        return(NULL);    while (pos--) {        if ((ch=*utf++) == 0) return(NULL);        if ( ch & 0x80 ) {            /* if not simple ascii, verify proper format */            if ( (ch & 0xc0) != 0xc0 )                return(NULL);            /* then skip over remaining bytes for this char */            while ( (ch <<= 1) & 0x80 )                if ( (*utf++ & 0xc0) != 0x80 )                    return(NULL);        }    }    return((xmlChar *)utf);}/** * xmlUTF8Strloc: * @utf:  the input UTF8 * * @utfchar:  the UTF8 character to be found * * a function to provide the relative location of a UTF8 char * * Returns the relative character position of the desired char * or -1 if not found */intxmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {    int i, size;    xmlChar ch;    if (utf==NULL || utfchar==NULL) return -1;    size = xmlUTF8Strsize(utfchar, 1);        for(i=0; (ch=*utf) != 0; i++) {            if (xmlStrncmp(utf, utfchar, size)==0)                return(i);            utf++;            if ( ch & 0x80 ) {                /* if not simple ascii, verify proper format */                if ( (ch & 0xc0) != 0xc0 )                    return(-1);                /* then skip over remaining bytes for this char */                while ( (ch <<= 1) & 0x80 )                    if ( (*utf++ & 0xc0) != 0x80 )                        return(-1);            }        }    return(-1);}/** * xmlUTF8Strsub: * @utf:  a sequence of UTF-8 encoded bytes * @start: relative pos of first char * @len:   total number to copy * * Create a substring from a given UTF-8 string * Note:  positions are given in units of UTF-8 chars * * Returns a pointer to a newly created string * or NULL if any problem */xmlChar *xmlUTF8Strsub(const xmlChar *utf, int start, int len) {    int            i;    xmlChar ch;    if (utf == NULL) return(NULL);    if (start < 0) return(NULL);    if (len < 0) return(NULL);    /*     * Skip over any leading chars     */    for (i = 0;i < start;i++) {        if ((ch=*utf++) == 0) return(NULL);        if ( ch & 0x80 ) {            /* if not simple ascii, verify proper format */            if ( (ch & 0xc0) != 0xc0 )                return(NULL);            /* then skip over remaining bytes for this char */            while ( (ch <<= 1) & 0x80 )                if ( (*utf++ & 0xc0) != 0x80 )                    return(NULL);        }    }    return(xmlUTF8Strndup(utf, len));}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -