📄 xmlstring.c.svn-base
字号:
if ((str2 == NULL) || (len == 0)) return(xmlStrdup(str1)); if (str1 == NULL) return(xmlStrndup(str2, len)); size = xmlStrlen(str1); ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar)); if (ret == NULL) { xmlErrMemory(NULL, NULL); return(xmlStrndup(str1, size)); } memcpy(ret, str1, size * sizeof(xmlChar)); memcpy(&ret[size], str2, len * sizeof(xmlChar)); ret[size + len] = 0; return(ret);}/** * xmlStrcat: * @cur: the original xmlChar * array * @add: the xmlChar * array added * * a strcat for array of xmlChar's. Since they are supposed to be * encoded in UTF-8 or an encoding with 8bit based chars, we assume * a termination mark of '0'. * * Returns a new xmlChar * containing the concatenated string. */xmlChar *xmlStrcat(xmlChar *cur, const xmlChar *add) { const xmlChar *p = add; if (add == NULL) return(cur); if (cur == NULL) return(xmlStrdup(add)); while (*p != 0) p++; /* non input consuming */ return(xmlStrncat(cur, add, p - add));}/** * xmlStrPrintf: * @buf: the result buffer. * @len: the result buffer length. * @msg: the message with printf formatting. * @...: extra parameters for the message. * * Formats @msg and places result into @buf. * * Returns the number of characters written to @buf or -1 if an error occurs. */int xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) { va_list args; int ret; if((buf == NULL) || (msg == NULL)) { return(-1); } va_start(args, msg); ret = _vsnprintf((char *) buf, len, (const char *) msg, args); va_end(args); buf[len - 1] = 0; /* be safe ! */ return(ret);}/** * xmlStrVPrintf: * @buf: the result buffer. * @len: the result buffer length. * @msg: the message with printf formatting. * @ap: extra parameters for the message. * * Formats @msg and places result into @buf. * * Returns the number of characters written to @buf or -1 if an error occurs. */int xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) { int ret; if((buf == NULL) || (msg == NULL)) { return(-1); } ret = _vsnprintf((char *) buf, len, (const char *) msg, ap); buf[len - 1] = 0; /* be safe ! */ return(ret);}/************************************************************************ * * * Generic UTF8 handling routines * * * * From rfc2044: encoding of the Unicode values on UTF-8: * * * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * * 0000 0000-0000 007F 0xxxxxxx * * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * * * I hope we won't use values > 0xFFFF anytime soon ! * * * ************************************************************************//** * xmlUTF8Size: * @utf: pointer to the UTF8 character * * calculates the internal size of a UTF8 character * * returns the numbers of bytes in the character, -1 on format error */intxmlUTF8Size(const xmlChar *utf) { xmlChar mask; int len; if (utf == NULL) return -1; if (*utf < 0x80) return 1; /* check valid UTF8 character */ if (!(*utf & 0x40)) return -1; /* determine number of bytes in char */ len = 2; for (mask=0x20; mask != 0; mask>>=1) { if (!(*utf & mask)) return len; len++; } return -1;}/** * xmlUTF8Charcmp: * @utf1: pointer to first UTF8 char * @utf2: pointer to second UTF8 char * * compares the two UCS4 values * * returns result of the compare as with xmlStrncmp */intxmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) { if (utf1 == NULL ) { if (utf2 == NULL) return 0; return -1; } return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));}/** * xmlUTF8Strlen: * @utf: a sequence of UTF-8 encoded bytes * * compute the length of an UTF8 string, it doesn't do a full UTF8 * checking of the content of the string. * * Returns the number of characters in the string or -1 in case of error */intxmlUTF8Strlen(const xmlChar *utf) { int ret = 0; if (utf == NULL) return(-1); while (*utf != 0) { if (utf[0] & 0x80) { if ((utf[1] & 0xc0) != 0x80) return(-1); if ((utf[0] & 0xe0) == 0xe0) { if ((utf[2] & 0xc0) != 0x80) return(-1); if ((utf[0] & 0xf0) == 0xf0) { if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80) return(-1); utf += 4; } else { utf += 3; } } else { utf += 2; } } else { utf++; } ret++; } return(ret);}/** * xmlGetUTF8Char: * @utf: a sequence of UTF-8 encoded bytes * @len: a pointer to @bytes len * * Read one UTF8 Char from @utf * * Returns the char value or -1 in case of error, and updates *len with the * number of bytes consumed */intxmlGetUTF8Char(const unsigned char *utf, int *len) { unsigned int c; if (utf == NULL) goto error; if (len == NULL) goto error; if (*len < 1) goto error; c = utf[0]; if (c & 0x80) { if (*len < 2) goto error; if ((utf[1] & 0xc0) != 0x80) goto error; if ((c & 0xe0) == 0xe0) { if (*len < 3) goto error; if ((utf[2] & 0xc0) != 0x80) goto error; if ((c & 0xf0) == 0xf0) { if (*len < 4) goto error; if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80) goto error; *len = 4; /* 4-byte code */ c = (utf[0] & 0x7) << 18; c |= (utf[1] & 0x3f) << 12; c |= (utf[2] & 0x3f) << 6; c |= utf[3] & 0x3f; } else { /* 3-byte code */ *len = 3; c = (utf[0] & 0xf) << 12; c |= (utf[1] & 0x3f) << 6; c |= utf[2] & 0x3f; } } else { /* 2-byte code */ *len = 2; c = (utf[0] & 0x1f) << 6; c |= utf[1] & 0x3f; } } else { /* 1-byte code */ *len = 1; } return(c);error: *len = 0; return(-1);}/** * xmlCheckUTF8: * @utf: Pointer to putative UTF-8 encoded string. * * Checks @utf for being valid UTF-8. @utf is assumed to be * null-terminated. This function is not super-strict, as it will * allow longer UTF-8 sequences than necessary. Note that Java is * capable of producing these sequences if provoked. Also note, this * routine checks for the 4-byte maximum size, but does not check for * 0x10ffff maximum value. * * Return value: true if @utf is valid. **/intxmlCheckUTF8(const unsigned char *utf){ int ix; unsigned char c; for (ix = 0; (c = utf[ix]);) { if (c & 0x80) { if ((utf[ix + 1] & 0xc0) != 0x80) return(0); if ((c & 0xe0) == 0xe0) { if ((utf[ix + 2] & 0xc0) != 0x80) return(0); if ((c & 0xf0) == 0xf0) { if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80) return(0); ix += 4; /* 4-byte code */ } else /* 3-byte code */ ix += 3; } else /* 2-byte code */ ix += 2; } else /* 1-byte code */ ix++; } return(1);}/** * xmlUTF8Strsize: * @utf: a sequence of UTF-8 encoded bytes * @len: the number of characters in the array * * storage size of an UTF8 string * * Returns the storage size of * the first 'len' characters of ARRAY * */intxmlUTF8Strsize(const xmlChar *utf, int len) { const xmlChar *ptr=utf; xmlChar ch; if (len <= 0) return(0); while ( len-- > 0) { if ( !*ptr ) break; if ( (ch = *ptr++) & 0x80) while ( (ch<<=1) & 0x80 ) ptr++; } return (ptr - utf);}/** * xmlUTF8Strndup: * @utf: the input UTF8 * * @len: the len of @utf (in chars) * * a strndup for array of UTF8's * * Returns a new UTF8 * or NULL */xmlChar *xmlUTF8Strndup(const xmlChar *utf, int len) { xmlChar *ret; int i; if ((utf == NULL) || (len < 0)) return(NULL); i = xmlUTF8Strsize(utf, len); ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar)); if (ret == NULL) { xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", (len + 1) * (long)sizeof(xmlChar)); return(NULL); } memcpy(ret, utf, i * sizeof(xmlChar)); ret[i] = 0; return(ret);}/** * xmlUTF8Strpos: * @utf: the input UTF8 * * @pos: the position of the desired UTF8 char (in chars) * * a function to provide the equivalent of fetching a * character from a string array * * Returns a pointer to the UTF8 character or NULL */xmlChar *xmlUTF8Strpos(const xmlChar *utf, int pos) { xmlChar ch; if (utf == NULL) return(NULL); if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) ) return(NULL); while (pos--) { if ((ch=*utf++) == 0) return(NULL); if ( ch & 0x80 ) { /* if not simple ascii, verify proper format */ if ( (ch & 0xc0) != 0xc0 ) return(NULL); /* then skip over remaining bytes for this char */ while ( (ch <<= 1) & 0x80 ) if ( (*utf++ & 0xc0) != 0x80 ) return(NULL); } } return((xmlChar *)utf);}/** * xmlUTF8Strloc: * @utf: the input UTF8 * * @utfchar: the UTF8 character to be found * * a function to provide the relative location of a UTF8 char * * Returns the relative character position of the desired char * or -1 if not found */intxmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) { int i, size; xmlChar ch; if (utf==NULL || utfchar==NULL) return -1; size = xmlUTF8Strsize(utfchar, 1); for(i=0; (ch=*utf) != 0; i++) { if (xmlStrncmp(utf, utfchar, size)==0) return(i); utf++; if ( ch & 0x80 ) { /* if not simple ascii, verify proper format */ if ( (ch & 0xc0) != 0xc0 ) return(-1); /* then skip over remaining bytes for this char */ while ( (ch <<= 1) & 0x80 ) if ( (*utf++ & 0xc0) != 0x80 ) return(-1); } } return(-1);}/** * xmlUTF8Strsub: * @utf: a sequence of UTF-8 encoded bytes * @start: relative pos of first char * @len: total number to copy * * Create a substring from a given UTF-8 string * Note: positions are given in units of UTF-8 chars * * Returns a pointer to a newly created string * or NULL if any problem */xmlChar *xmlUTF8Strsub(const xmlChar *utf, int start, int len) { int i; xmlChar ch; if (utf == NULL) return(NULL); if (start < 0) return(NULL); if (len < 0) return(NULL); /* * Skip over any leading chars */ for (i = 0;i < start;i++) { if ((ch=*utf++) == 0) return(NULL); if ( ch & 0x80 ) { /* if not simple ascii, verify proper format */ if ( (ch & 0xc0) != 0xc0 ) return(NULL); /* then skip over remaining bytes for this char */ while ( (ch <<= 1) & 0x80 ) if ( (*utf++ & 0xc0) != 0x80 ) return(NULL); } } return(xmlUTF8Strndup(utf, len));}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -