📄 xmlstring.c
字号:
if (ret == NULL) {
xmlErrMemory(NULL, NULL);
return(xmlStrndup(str1, size));
}
memcpy(ret, str1, size * sizeof(xmlChar));
memcpy(&ret[size], str2, len * sizeof(xmlChar));
ret[size + len] = 0;
return(ret);
}
/**
* xmlStrcat:
* @cur: the original xmlChar * array
* @add: the xmlChar * array added
*
* a strcat for array of xmlChar's. Since they are supposed to be
* encoded in UTF-8 or an encoding with 8bit based chars, we assume
* a termination mark of '0'.
*
* Returns a new xmlChar * containing the concatenated string.
*/
xmlChar *
xmlStrcat(xmlChar *cur, const xmlChar *add) {
const xmlChar *p = add;
if (add == NULL) return(cur);
if (cur == NULL)
return(xmlStrdup(add));
while (*p != 0) p++; /* non input consuming */
return(xmlStrncat(cur, add, p - add));
}
/**
* xmlStrPrintf:
* @buf: the result buffer.
* @len: the result buffer length.
* @msg: the message with printf formatting.
* @...: extra parameters for the message.
*
* Formats @msg and places result into @buf.
*
* Returns the number of characters written to @buf or -1 if an error occurs.
*/
int
xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
va_list args;
int ret;
if((buf == NULL) || (msg == NULL)) {
return(-1);
}
va_start(args, msg);
ret = vsnprintf((char *) buf, len, (const char *) msg, args);
va_end(args);
buf[len - 1] = 0; /* be safe ! */
return(ret);
}
/**
* xmlStrVPrintf:
* @buf: the result buffer.
* @len: the result buffer length.
* @msg: the message with printf formatting.
* @ap: extra parameters for the message.
*
* Formats @msg and places result into @buf.
*
* Returns the number of characters written to @buf or -1 if an error occurs.
*/
int
xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) {
int ret;
if((buf == NULL) || (msg == NULL)) {
return(-1);
}
ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
buf[len - 1] = 0; /* be safe ! */
return(ret);
}
/************************************************************************
* *
* Generic UTF8 handling routines *
* *
* From rfc2044: encoding of the Unicode values on UTF-8: *
* *
* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
* 0000 0000-0000 007F 0xxxxxxx *
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
* *
* I hope we won't use values > 0xFFFF anytime soon ! *
* *
************************************************************************/
/**
* xmlUTF8Size:
* @utf: pointer to the UTF8 character
*
* calculates the internal size of a UTF8 character
*
* returns the numbers of bytes in the character, -1 on format error
*/
int
xmlUTF8Size(const xmlChar *utf) {
xmlChar mask;
int len;
if (utf == NULL)
return -1;
if (*utf < 0x80)
return 1;
/* check valid UTF8 character */
if (!(*utf & 0x40))
return -1;
/* determine number of bytes in char */
len = 2;
for (mask=0x20; mask != 0; mask>>=1) {
if (!(*utf & mask))
return len;
len++;
}
return -1;
}
/**
* xmlUTF8Charcmp:
* @utf1: pointer to first UTF8 char
* @utf2: pointer to second UTF8 char
*
* compares the two UCS4 values
*
* returns result of the compare as with xmlStrncmp
*/
int
xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
if (utf1 == NULL ) {
if (utf2 == NULL)
return 0;
return -1;
}
return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
}
/**
* xmlUTF8Strlen:
* @utf: a sequence of UTF-8 encoded bytes
*
* compute the length of an UTF8 string, it doesn't do a full UTF8
* checking of the content of the string.
*
* Returns the number of characters in the string or -1 in case of error
*/
int
xmlUTF8Strlen(const xmlChar *utf) {
int ret = 0;
if (utf == NULL)
return(-1);
while (*utf != 0) {
if (utf[0] & 0x80) {
if ((utf[1] & 0xc0) != 0x80)
return(-1);
if ((utf[0] & 0xe0) == 0xe0) {
if ((utf[2] & 0xc0) != 0x80)
return(-1);
if ((utf[0] & 0xf0) == 0xf0) {
if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
return(-1);
utf += 4;
} else {
utf += 3;
}
} else {
utf += 2;
}
} else {
utf++;
}
ret++;
}
return(ret);
}
/**
* xmlGetUTF8Char:
* @utf: a sequence of UTF-8 encoded bytes
* @len: a pointer to the minimum number of bytes present in
* the sequence. This is used to assure the next character
* is completely contained within the sequence.
*
* Read the first UTF8 character from @utf
*
* Returns the char value or -1 in case of error, and sets *len to
* the actual number of bytes consumed (0 in case of error)
*/
int
xmlGetUTF8Char(const unsigned char *utf, int *len) {
unsigned int c;
if (utf == NULL)
goto error;
if (len == NULL)
goto error;
if (*len < 1)
goto error;
c = utf[0];
if (c & 0x80) {
if (*len < 2)
goto error;
if ((utf[1] & 0xc0) != 0x80)
goto error;
if ((c & 0xe0) == 0xe0) {
if (*len < 3)
goto error;
if ((utf[2] & 0xc0) != 0x80)
goto error;
if ((c & 0xf0) == 0xf0) {
if (*len < 4)
goto error;
if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
goto error;
*len = 4;
/* 4-byte code */
c = (utf[0] & 0x7) << 18;
c |= (utf[1] & 0x3f) << 12;
c |= (utf[2] & 0x3f) << 6;
c |= utf[3] & 0x3f;
} else {
/* 3-byte code */
*len = 3;
c = (utf[0] & 0xf) << 12;
c |= (utf[1] & 0x3f) << 6;
c |= utf[2] & 0x3f;
}
} else {
/* 2-byte code */
*len = 2;
c = (utf[0] & 0x1f) << 6;
c |= utf[1] & 0x3f;
}
} else {
/* 1-byte code */
*len = 1;
}
return(c);
error:
if (len != NULL)
*len = 0;
return(-1);
}
/**
* xmlCheckUTF8:
* @utf: Pointer to putative UTF-8 encoded string.
*
* Checks @utf for being valid UTF-8. @utf is assumed to be
* null-terminated. This function is not super-strict, as it will
* allow longer UTF-8 sequences than necessary. Note that Java is
* capable of producing these sequences if provoked. Also note, this
* routine checks for the 4-byte maximum size, but does not check for
* 0x10ffff maximum value.
*
* Return value: true if @utf is valid.
**/
int
xmlCheckUTF8(const unsigned char *utf)
{
int ix;
unsigned char c;
if (utf == NULL)
return(0);
/*
* utf is a string of 1, 2, 3 or 4 bytes. The valid strings
* are as follows (in "bit format"):
* 0xxxxxxx valid 1-byte
* 110xxxxx 10xxxxxx valid 2-byte
* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
*/
for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
ix++;
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
if ((utf[ix+1] & 0xc0 ) != 0x80)
return 0;
ix += 2;
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
if (((utf[ix+1] & 0xc0) != 0x80) ||
((utf[ix+2] & 0xc0) != 0x80))
return 0;
ix += 3;
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
if (((utf[ix+1] & 0xc0) != 0x80) ||
((utf[ix+2] & 0xc0) != 0x80) ||
((utf[ix+3] & 0xc0) != 0x80))
return 0;
ix += 4;
} else /* unknown encoding */
return 0;
}
return(1);
}
/**
* xmlUTF8Strsize:
* @utf: a sequence of UTF-8 encoded bytes
* @len: the number of characters in the array
*
* storage size of an UTF8 string
* the behaviour is not garanteed if the input string is not UTF-8
*
* Returns the storage size of
* the first 'len' characters of ARRAY
*/
int
xmlUTF8Strsize(const xmlChar *utf, int len) {
const xmlChar *ptr=utf;
xmlChar ch;
if (utf == NULL)
return(0);
if (len <= 0)
return(0);
while ( len-- > 0) {
if ( !*ptr )
break;
if ( (ch = *ptr++) & 0x80)
while ((ch<<=1) & 0x80 ) {
ptr++;
if (*ptr == 0) break;
}
}
return (ptr - utf);
}
/**
* xmlUTF8Strndup:
* @utf: the input UTF8 *
* @len: the len of @utf (in chars)
*
* a strndup for array of UTF8's
*
* Returns a new UTF8 * or NULL
*/
xmlChar *
xmlUTF8Strndup(const xmlChar *utf, int len) {
xmlChar *ret;
int i;
if ((utf == NULL) || (len < 0)) return(NULL);
i = xmlUTF8Strsize(utf, len);
ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
if (ret == NULL) {
xmlGenericError(xmlGenericErrorContext,
"malloc of %ld byte failed\n",
(len + 1) * (long)sizeof(xmlChar));
return(NULL);
}
memcpy(ret, utf, i * sizeof(xmlChar));
ret[i] = 0;
return(ret);
}
/**
* xmlUTF8Strpos:
* @utf: the input UTF8 *
* @pos: the position of the desired UTF8 char (in chars)
*
* a function to provide the equivalent of fetching a
* character from a string array
*
* Returns a pointer to the UTF8 character or NULL
*/
const xmlChar *
xmlUTF8Strpos(const xmlChar *utf, int pos) {
xmlChar ch;
if (utf == NULL) return(NULL);
if (pos < 0)
return(NULL);
while (pos--) {
if ((ch=*utf++) == 0) return(NULL);
if ( ch & 0x80 ) {
/* if not simple ascii, verify proper format */
if ( (ch & 0xc0) != 0xc0 )
return(NULL);
/* then skip over remaining bytes for this char */
while ( (ch <<= 1) & 0x80 )
if ( (*utf++ & 0xc0) != 0x80 )
return(NULL);
}
}
return((xmlChar *)utf);
}
/**
* xmlUTF8Strloc:
* @utf: the input UTF8 *
* @utfchar: the UTF8 character to be found
*
* a function to provide the relative location of a UTF8 char
*
* Returns the relative character position of the desired char
* or -1 if not found
*/
int
xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
int i, size;
xmlChar ch;
if (utf==NULL || utfchar==NULL) return -1;
size = xmlUTF8Strsize(utfchar, 1);
for(i=0; (ch=*utf) != 0; i++) {
if (xmlStrncmp(utf, utfchar, size)==0)
return(i);
utf++;
if ( ch & 0x80 ) {
/* if not simple ascii, verify proper format */
if ( (ch & 0xc0) != 0xc0 )
return(-1);
/* then skip over remaining bytes for this char */
while ( (ch <<= 1) & 0x80 )
if ( (*utf++ & 0xc0) != 0x80 )
return(-1);
}
}
return(-1);
}
/**
* xmlUTF8Strsub:
* @utf: a sequence of UTF-8 encoded bytes
* @start: relative pos of first char
* @len: total number to copy
*
* Create a substring from a given UTF-8 string
* Note: positions are given in units of UTF-8 chars
*
* Returns a pointer to a newly created string
* or NULL if any problem
*/
xmlChar *
xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
int i;
xmlChar ch;
if (utf == NULL) return(NULL);
if (start < 0) return(NULL);
if (len < 0) return(NULL);
/*
* Skip over any leading chars
*/
for (i = 0;i < start;i++) {
if ((ch=*utf++) == 0) return(NULL);
if ( ch & 0x80 ) {
/* if not simple ascii, verify proper format */
if ( (ch & 0xc0) != 0xc0 )
return(NULL);
/* then skip over remaining bytes for this char */
while ( (ch <<= 1) & 0x80 )
if ( (*utf++ & 0xc0) != 0x80 )
return(NULL);
}
}
return(xmlUTF8Strndup(utf, len));
}
#define bottom_xmlstring
#include "elfgcchack.h"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -