📄 uri.c
字号:
if (uri->authority != NULL) xmlFree(uri->authority);
uri->authority = NULL;
if (uri->query != NULL) xmlFree(uri->query);
uri->query = NULL;
}
/**
* xmlFreeURI:
* @uri: pointer to an xmlURI
*
* Free up the xmlURI struct
*/
void
xmlFreeURI(xmlURIPtr uri) {
if (uri == NULL) return;
if (uri->scheme != NULL) xmlFree(uri->scheme);
if (uri->server != NULL) xmlFree(uri->server);
if (uri->user != NULL) xmlFree(uri->user);
if (uri->path != NULL) xmlFree(uri->path);
if (uri->fragment != NULL) xmlFree(uri->fragment);
if (uri->opaque != NULL) xmlFree(uri->opaque);
if (uri->authority != NULL) xmlFree(uri->authority);
if (uri->query != NULL) xmlFree(uri->query);
xmlFree(uri);
}
/************************************************************************
* *
* Helper functions *
* *
************************************************************************/
/**
* xmlNormalizeURIPath:
* @path: pointer to the path string
*
* Applies the 5 normalization steps to a path string--that is, RFC 2396
* Section 5.2, steps 6.c through 6.g.
*
* Normalization occurs directly on the string, no new allocation is done
*
* Returns 0 or an error code
*/
int
xmlNormalizeURIPath(char *path) {
char *cur, *out;
if (path == NULL)
return(-1);
/* Skip all initial "/" chars. We want to get to the beginning of the
* first non-empty segment.
*/
cur = path;
while (cur[0] == '/')
++cur;
if (cur[0] == '\0')
return(0);
/* Keep everything we've seen so far. */
out = cur;
/*
* Analyze each segment in sequence for cases (c) and (d).
*/
while (cur[0] != '\0') {
/*
* c) All occurrences of "./", where "." is a complete path segment,
* are removed from the buffer string.
*/
if ((cur[0] == '.') && (cur[1] == '/')) {
cur += 2;
/* '//' normalization should be done at this point too */
while (cur[0] == '/')
cur++;
continue;
}
/*
* d) If the buffer string ends with "." as a complete path segment,
* that "." is removed.
*/
if ((cur[0] == '.') && (cur[1] == '\0'))
break;
/* Otherwise keep the segment. */
while (cur[0] != '/') {
if (cur[0] == '\0')
goto done_cd;
(out++)[0] = (cur++)[0];
}
/* nomalize // */
while ((cur[0] == '/') && (cur[1] == '/'))
cur++;
(out++)[0] = (cur++)[0];
}
done_cd:
out[0] = '\0';
/* Reset to the beginning of the first segment for the next sequence. */
cur = path;
while (cur[0] == '/')
++cur;
if (cur[0] == '\0')
return(0);
/*
* Analyze each segment in sequence for cases (e) and (f).
*
* e) All occurrences of "<segment>/../", where <segment> is a
* complete path segment not equal to "..", are removed from the
* buffer string. Removal of these path segments is performed
* iteratively, removing the leftmost matching pattern on each
* iteration, until no matching pattern remains.
*
* f) If the buffer string ends with "<segment>/..", where <segment>
* is a complete path segment not equal to "..", that
* "<segment>/.." is removed.
*
* To satisfy the "iterative" clause in (e), we need to collapse the
* string every time we find something that needs to be removed. Thus,
* we don't need to keep two pointers into the string: we only need a
* "current position" pointer.
*/
while (1) {
char *segp, *tmp;
/* At the beginning of each iteration of this loop, "cur" points to
* the first character of the segment we want to examine.
*/
/* Find the end of the current segment. */
segp = cur;
while ((segp[0] != '/') && (segp[0] != '\0'))
++segp;
/* If this is the last segment, we're done (we need at least two
* segments to meet the criteria for the (e) and (f) cases).
*/
if (segp[0] == '\0')
break;
/* If the first segment is "..", or if the next segment _isn't_ "..",
* keep this segment and try the next one.
*/
++segp;
if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
|| ((segp[0] != '.') || (segp[1] != '.')
|| ((segp[2] != '/') && (segp[2] != '\0')))) {
cur = segp;
continue;
}
/* If we get here, remove this segment and the next one and back up
* to the previous segment (if there is one), to implement the
* "iteratively" clause. It's pretty much impossible to back up
* while maintaining two pointers into the buffer, so just compact
* the whole buffer now.
*/
/* If this is the end of the buffer, we're done. */
if (segp[2] == '\0') {
cur[0] = '\0';
break;
}
/* Valgrind complained, strcpy(cur, segp + 3); */
/* string will overlap, do not use strcpy */
tmp = cur;
segp += 3;
while ((*tmp++ = *segp++) != 0);
/* If there are no previous segments, then keep going from here. */
segp = cur;
while ((segp > path) && ((--segp)[0] == '/'))
;
if (segp == path)
continue;
/* "segp" is pointing to the end of a previous segment; find it's
* start. We need to back up to the previous segment and start
* over with that to handle things like "foo/bar/../..". If we
* don't do this, then on the first pass we'll remove the "bar/..",
* but be pointing at the second ".." so we won't realize we can also
* remove the "foo/..".
*/
cur = segp;
while ((cur > path) && (cur[-1] != '/'))
--cur;
}
out[0] = '\0';
/*
* g) If the resulting buffer string still begins with one or more
* complete path segments of "..", then the reference is
* considered to be in error. Implementations may handle this
* error by retaining these components in the resolved path (i.e.,
* treating them as part of the final URI), by removing them from
* the resolved path (i.e., discarding relative levels above the
* root), or by avoiding traversal of the reference.
*
* We discard them from the final path.
*/
if (path[0] == '/') {
cur = path;
while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
&& ((cur[3] == '/') || (cur[3] == '\0')))
cur += 3;
if (cur != path) {
out = path;
while (cur[0] != '\0')
(out++)[0] = (cur++)[0];
out[0] = 0;
}
}
return(0);
}
static int is_hex(char c) {
if (((c >= '0') && (c <= '9')) ||
((c >= 'a') && (c <= 'f')) ||
((c >= 'A') && (c <= 'F')))
return(1);
return(0);
}
/**
* xmlURIUnescapeString:
* @str: the string to unescape
* @len: the length in bytes to unescape (or <= 0 to indicate full string)
* @target: optional destination buffer
*
* Unescaping routine, does not do validity checks !
* Output is direct unsigned char translation of %XX values (no encoding)
*
* Returns an copy of the string, but unescaped
*/
char *
xmlURIUnescapeString(const char *str, int len, char *target) {
char *ret, *out;
const char *in;
if (str == NULL)
return(NULL);
if (len <= 0) len = strlen(str);
if (len < 0) return(NULL);
if (target == NULL) {
ret = (char *) xmlMallocAtomic(len + 1);
if (ret == NULL) {
xmlGenericError(xmlGenericErrorContext,
"xmlURIUnescapeString: out of memory\n");
return(NULL);
}
} else
ret = target;
in = str;
out = ret;
while(len > 0) {
if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
in++;
if ((*in >= '0') && (*in <= '9'))
*out = (*in - '0');
else if ((*in >= 'a') && (*in <= 'f'))
*out = (*in - 'a') + 10;
else if ((*in >= 'A') && (*in <= 'F'))
*out = (*in - 'A') + 10;
in++;
if ((*in >= '0') && (*in <= '9'))
*out = *out * 16 + (*in - '0');
else if ((*in >= 'a') && (*in <= 'f'))
*out = *out * 16 + (*in - 'a') + 10;
else if ((*in >= 'A') && (*in <= 'F'))
*out = *out * 16 + (*in - 'A') + 10;
in++;
len -= 3;
out++;
} else {
*out++ = *in++;
len--;
}
}
*out = 0;
return(ret);
}
/**
* xmlURIEscapeStr:
* @str: string to escape
* @list: exception list string of chars not to escape
*
* This routine escapes a string to hex, ignoring reserved characters (a-z)
* and the characters in the exception list.
*
* Returns a new escaped string or NULL in case of error.
*/
xmlChar *
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
xmlChar *ret, ch;
const xmlChar *in;
unsigned int len, out;
if (str == NULL)
return(NULL);
if (str[0] == 0)
return(xmlStrdup(str));
len = xmlStrlen(str);
if (!(len > 0)) return(NULL);
len += 20;
ret = (xmlChar *) xmlMallocAtomic(len);
if (ret == NULL) {
xmlGenericError(xmlGenericErrorContext,
"xmlURIEscapeStr: out of memory\n");
return(NULL);
}
in = (const xmlChar *) str;
out = 0;
while(*in != 0) {
if (len - out <= 3) {
len += 20;
ret = (xmlChar *) xmlRealloc(ret, len);
if (ret == NULL) {
xmlGenericError(xmlGenericErrorContext,
"xmlURIEscapeStr: out of memory\n");
return(NULL);
}
}
ch = *in;
if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
unsigned char val;
ret[out++] = '%';
val = ch >> 4;
if (val <= 9)
ret[out++] = '0' + val;
else
ret[out++] = 'A' + val - 0xA;
val = ch & 0xF;
if (val <= 9)
ret[out++] = '0' + val;
else
ret[out++] = 'A' + val - 0xA;
in++;
} else {
ret[out++] = *in++;
}
}
ret[out] = 0;
return(ret);
}
/**
* xmlURIEscape:
* @str: the string of the URI to escape
*
* Escaping routine, does not do validity checks !
* It will try to escape the chars needing this, but this is heuristic
* based it's impossible to be sure.
*
* Returns an copy of the string, but escaped
*
* 25 May 2001
* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
* according to RFC2396.
* - Carl Douglas
*/
xmlChar *
xmlURIEscape(const xmlChar * str)
{
xmlChar *ret, *segment = NULL;
xmlURIPtr uri;
int ret2;
#define NULLCHK(p) if(!p) { \
xmlGenericError(xmlGenericErrorContext, \
"xmlURIEscape: out of memory\n"); \
return NULL; }
if (str == NULL)
return (NULL);
uri = xmlCreateURI();
if (uri != NULL) {
/*
* Allow escaping errors in the unescaped form
*/
uri->cleanup = 1;
ret2 = xmlParseURIReference(uri, (const char *)str);
if (ret2) {
xmlFreeURI(uri);
return (NULL);
}
}
if (!uri)
return NULL;
ret = NULL;
if (uri->scheme) {
segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
NULLCHK(segment)
ret = xmlStrcat(ret, segment);
ret = xmlStrcat(ret, BAD_CAST ":");
xmlFree(segment);
}
if (uri->authority) {
segment =
xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
NULLCHK(segment)
ret = xmlStrcat(ret, BAD_CAST "//");
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->user) {
segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
NULLCHK(segment)
ret = xmlStrcat(ret,BAD_CAST "//");
ret = xmlStrcat(ret, segment);
ret = xmlStrcat(ret, BAD_CAST "@");
xmlFree(segment);
}
if (uri->server) {
segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
NULLCHK(segment)
if (uri->user == NULL)
ret = xmlStrcat(ret, BAD_CAST "//");
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->port) {
xmlChar port[10];
snprintf((char *) port, 10, "%d", uri->port);
ret = xmlStrcat(ret, BAD_CAST ":");
ret = xmlStrcat(ret, port);
}
if (uri->path) {
segment =
xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
NULLCHK(segment)
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->query) {
segment =
xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
NULLCHK(segment)
ret = xmlStrcat(ret, BAD_CAST "?");
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->opaque) {
segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
NULLCHK(segment)
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->fragment) {
segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
NULLCHK(segment)
ret = xmlStrcat(ret, BAD_CAST "#");
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
xmlFreeURI(uri);
#undef NULLCHK
return (ret);
}
/************************************************************************
* *
* Escaped URI parsing *
* *
************************************************************************/
/**
* xmlParseURIFragment:
* @uri: pointer to an URI structure
* @str: pointer to the string to analyze
*
* Parse an URI fragment string and fills in the appropriate fields
* of the @uri structure.
*
* fragment = *uric
*
* Returns 0 or the error code
*/
static int
xmlParseURIFragment(xmlURIPtr uri, const char **str)
{
const char *cur = *str;
if (str == NULL)
return (-1);
while (IS_URIC(cur) || IS_UNWISE(cur))
NEXT(cur);
if (uri != NULL) {
if (uri->fragment != NULL)
xmlFree(uri->fragment);
uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
}
/**
* xmlParseURIQuery:
* @uri: pointer to an URI structure
* @str: pointer to the string to analyze
*
* Parse the query part of an URI
*
* query = *uric
*
* Returns 0 or the error code
*/
static int
xmlParseURIQuery(xmlURIPtr uri, const char **str)
{
const char *cur = *str;
if (str == NULL)
return (-1);
while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
NEXT(cur);
if (uri != NULL) {
if (uri->query != NULL)
xmlFree(uri->query);
uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
}
/**
* xmlParseURIScheme:
* @uri: pointer to an URI structure
* @str: pointer to the string to analyze
*
* Parse an URI scheme
*
* scheme = alpha *( alpha | digit | "+" | "-" | "." )
*
* Returns 0 or the error code
*/
static int
xmlParseURIScheme(xmlURIPtr uri, const char **str) {
const char *cur;
if (str == NULL)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -