📄 url.c
字号:
return TRUE;
}
/*
* NOTE: our parsing of fragments differ from RFC2396 in that we
* allow '#' and '"' in fragment parts. This is in response to
* a request from ECS.
*/
static BOOL
ParseFragment (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p;
url->s[FRAGMENT_PART] = NULL;
url->len[FRAGMENT_PART] = 0;
if (*start != '#') {
return TRUE;
}
start++;
for (p = start; *p; p++) {
BYTE b = *p;
if (!wae_isuric (b) && (b != '#') && (b != '"')) {
if ((b == '%') && HexToByte (p + 1, &b)) {
p += 2;
}
else
return FALSE;
}
}
url->s[FRAGMENT_PART] = start;
url->len[FRAGMENT_PART] = (UINT16)(p - start);
*pbs = p;
return TRUE;
}
static BOOL
ParseAbsPath (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p;
url->s[PATH_PART] = start;
url->len[PATH_PART] = 0;
if (*start != '/') {
return TRUE;
}
for (p = start; *p; p++) {
BYTE b = *p;
if ((b == '?') || (b == '#')) {
break;
}
if (!wae_ispchar (b) && (b != '/') && (b != ';')) {
if ((b == '%') && HexToByte (p + 1, &b)) {
p += 2;
}
else
return FALSE;
}
}
url->s[PATH_PART] = start;
url->len[PATH_PART] = (UINT16)(p - start);
*pbs = p;
return TRUE;
}
static BOOL
ParseRelPath (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p;
UINT16 len;
url->s[PATH_PART] = start;
url->len[PATH_PART] = 0;
for (p = start; *p; p++) {
BYTE b = *p;
if ((b == '/') || (b == '?') || (b == '#')) {
break;
}
if (b == ':')
return FALSE;
if (!wae_ispchar (b) && (b != '/') && (b != ';')) {
if ((b == '%') && HexToByte (p + 1, &b)) {
p += 2;
}
else
return FALSE;
}
}
len = (UINT16)(p - start);
if (len == 0) {
return TRUE;
}
if (!ParseAbsPath (&p, url)) {
return FALSE;
}
url->s[PATH_PART] -= len;
url->len[PATH_PART] += len;
*pbs = p;
return TRUE;
}
static BOOL
ParseNetPath (BYTE **pbs, URL *url)
{
BYTE *pos = *pbs;
if ((pos[0] != '/') || (pos[1] != '/'))
return TRUE;
pos += 2;
if (!ParseAuthority (&pos, url) ||
!ParseAbsPath (&pos, url))
return FALSE;
*pbs = pos;
return TRUE;
}
static BOOL
ParseRelativeURI (BYTE **pbs, URL *url)
{
BYTE *pos = *pbs;
if ((pos[0] == '/') && (pos[1] == '/')) {
if (!ParseNetPath (&pos, url))
return FALSE;
}
else if (pos[0] == '/') {
if (!ParseAbsPath (&pos, url))
return FALSE;
}
else if (!ParseRelPath (&pos, url))
return FALSE;
if (!ParseQuery (&pos, url))
return FALSE;
*pbs = pos;
return TRUE;
}
static BOOL
ParseHierPart (BYTE **pbs, URL *url)
{
BYTE *pos = *pbs;
if ((pos[0] == '/') && (pos[1] == '/')) {
if (!ParseNetPath (&pos, url))
return FALSE;
}
else {
if (!ParseAbsPath (&pos, url))
return FALSE;
}
if (!ParseQuery (&pos, url))
return FALSE;
*pbs = pos;
return TRUE;
}
static BOOL
ParseOpaquePart (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p;
BYTE b;
url->s[PATH_PART] = start;
url->len[PATH_PART] = 0;
p = start;
b = *p;
if (!*p || (b == '/'))
return TRUE;
if (!wae_isuric (b)) {
if ((b == '%') && HexToByte (p + 1, &b)) {
p += 2;
}
else
return FALSE;
}
p++;
for (;*p; p++) {
b = *p;
if (b == '#')
break;
if (!wae_isuric (b)) {
if ((b == '%') && HexToByte (p + 1, &b)) {
p += 2;
}
else
return FALSE;
}
}
url->s[PATH_PART] = start;
url->len[PATH_PART] = (UINT16)(p - start);
*pbs = p;
return TRUE;
}
/*
* Take a string representation of a URL and parse it into its
* components, and store these as fields in the given URL struct.
* All components are stored in their original (possibly escaped) form.
* Returns TRUE if the URL was valid, FALSE otherwise.
*
* Unless otherwise noted, we parse according to the definition
* in RFC2396.
*/
BOOL
URL_Parse (BYTE *bs, URL *url)
{
BYTE *start = bs;
UINT16 len;
if ((bs == NULL) || (url == NULL))
return FALSE;
URL_Clear (url);
len = B_STRINGLENGTH (bs);
if (!ParseScheme (&start, url))
return FALSE;
if (url->scheme_type == Scheme_empty) {
if (!ParseRelativeURI (&start, url))
return FALSE;
}
else if (start[0] == '/') {
if (!ParseHierPart (&start, url))
return FALSE;
}
else {
if (!ParseOpaquePart (&start, url))
return FALSE;
}
if (!ParseFragment (&start, url))
return FALSE;
/* Make sure that there is nothing left in the string */
return len == (start - bs);
}
/*
* The inverse of the parsing routine: create a URL string
* from its different parts.
* Returns NULL in case of error.
* NOTE: it is the caller's responsibility to deallocate the returned string.
*/
BYTE *
AssembleURLString (URL *url)
{
UINT16 len =
(url->s[SCHEME_PART] != NULL ? url->len[SCHEME_PART] + 1 : 0) +
(url->s[AUTHORITY_PART] != NULL ? 2 : 0) +
(url->s[USERINFO_PART] != NULL ? url->len[USERINFO_PART] + 1 : 0) +
(url->s[HOST_PART] != NULL ? url->len[HOST_PART] : 0) +
(url->s[PORT_PART] != NULL ? url->len[PORT_PART] + 1 : 0) +
(url->s[PATH_PART] != NULL ? url->len[PATH_PART] : 0) +
(url->s[QUERY_PART] != NULL ? url->len[QUERY_PART] + 1 : 0) +
(url->s[FRAGMENT_PART] != NULL ? url->len[FRAGMENT_PART] + 1 : 0);
BYTE *buf, *p;
if ((buf = NEWARRAY (BYTE, len + 1 )) == NULL) {
return NULL;
}
p = buf;
if (url->s[SCHEME_PART] != NULL) {
B_COPYSTRINGN (p, url->s[SCHEME_PART], url->len[SCHEME_PART]);
p += url->len[SCHEME_PART];
*p++ = ':';
}
if (url->s[AUTHORITY_PART] != NULL) {
*p++ = '/';
*p++ = '/';
}
if (url->s[USERINFO_PART] != NULL) {
B_COPYSTRINGN (p, url->s[USERINFO_PART], url->len[USERINFO_PART]);
p += url->len[USERINFO_PART];
*p++ = '@';
}
if (url->s[HOST_PART] != NULL) {
B_COPYSTRINGN (p, url->s[HOST_PART], url->len[HOST_PART]);
p += url->len[HOST_PART];
}
if (url->s[PORT_PART] != NULL) {
*p++ = ':';
B_COPYSTRINGN (p, url->s[PORT_PART], url->len[PORT_PART]);
p += url->len[PORT_PART];
}
if (url->s[PATH_PART] != NULL) {
B_COPYSTRINGN (p, url->s[PATH_PART], url->len[PATH_PART]);
p += url->len[PATH_PART];
}
if (url->s[QUERY_PART] != NULL) {
*p++ = '?';
B_COPYSTRINGN (p, url->s[QUERY_PART], url->len[QUERY_PART]);
p += url->len[QUERY_PART];
}
if (url->s[FRAGMENT_PART] != NULL) {
*p++ = '#';
B_COPYSTRINGN (p, url->s[FRAGMENT_PART], url->len[FRAGMENT_PART]);
p += url->len[FRAGMENT_PART];
}
*p = '\0';
return buf;
}
/*
* Copy part of a URL, while replacing escape sequences with
* the characters they represent, unless they are "reserved".
*/
static void
FetchPart (BYTE **pbs, URL *url, UINT8 whichPart)
{
BYTE *p, *q;
UINT16 len;
INT16 i, j, k;
BYTE b, tmp;
p = *pbs;
q = url->s[whichPart];
len = url->len[whichPart];
if ((q != NULL) && (len > 0)) {
for (i = 0, j = 0; j < len; i++, j++) {
b = q[j];
if ((b == '%') && (j + 2 < len) && HexToByte (&q[j + 1], &tmp)) {
if (wae_isreserved (tmp) || (tmp == '#')) {
for (k = 0; k < 3; k++)
p[i + k] = DowncaseByte (q[j + k]);
i += 2;
}
else {
p[i] = tmp;
}
j += 2;
}
else {
p[i] = q[j];
}
}
p += i;
}
*pbs = p;
}
/*
* Create a "canonical" form of a given URL.
* The following conversions are applied:
* 1) The scheme and authority parts are changed into lower case.
* 2) Default port numbers are removed, other port numbers have
* leading zeros dropped.
* 3) Escape sequences (%xy) are replaced by the character they
* represent, unless it is a reserved character. In the latter
* case, the hex digits in the escape sequence are changed to
* lower case.
* For any two URLs X and Y that compare equal (i.e., the routine
* URL_Equal returns TRUE), this routine will produce identical
* canonical forms.
* Returns FALSE in case of error.
*/
BOOL
b_MakeCanonical (const BYTE *bs, BYTE **can)
{
UINT16 len, port;
BYTE *p, *q, *buf;
INT16 i;
URL url;
if ((bs == NULL) || (can == NULL))
return FALSE;
*can = NULL;
if (!URL_Parse ((BYTE*)bs, &url))
return FALSE;
len = B_STRINGLENGTH (bs);
if ((buf = NEWARRAY (BYTE, len + 1)) == NULL)
return FALSE;
p = buf;
if (url.s[SCHEME_PART] != NULL) {
q = url.s[SCHEME_PART];
for (i = 0; i < url.len[SCHEME_PART]; i++)
p[i] = DowncaseByte (q[i]);
p += url.len[SCHEME_PART];
*p++ = ':';
}
if (url.s[AUTHORITY_PART] != NULL) {
*p++ = '/';
*p++ = '/';
if (url.s[USERINFO_PART] != NULL) {
q = url.s[USERINFO_PART];
for (i = 0; i < url.len[USERINFO_PART]; i++)
p[i] = DowncaseByte (q[i]);
p += url.len[USERINFO_PART];
*p++ = '@';
}
if (url.s[HOST_PART] != NULL) {
q = url.s[HOST_PART];
len = url.len[HOST_PART];
if ((len > 0) && (q[len - 1] == '.'))
len--;
for (i = 0; i < len; i++)
p[i] = DowncaseByte (q[i]);
p += len;
}
/* Get port number: skip default port numbers and empty port numbers,
* and drop leading zeros in the port number. */
q = url.s[PORT_PART];
len = url.len[PORT_PART];
if ((q != NULL) && (len > 0)) {
port = GetNum (q, len);
if (!(((url.scheme_type == Scheme_http) && (port == 80)) ||
((url.scheme_type == Scheme_https) && (port == 443)))) {
*p++ = ':';
if (port == 0) {
*p++ = '0';
}
else {
while (*q == '0') {
q++;
len--;
}
for (i = 0; i < len; i++)
p[i] = q[i];
p += len;
}
}
}
}
FetchPart (&p, &url, PATH_PART);
if (url.s[QUERY_PART] != NULL) {
*p++ = '?';
FetchPart (&p, &url, QUERY_PART);
}
if (url.s[FRAGMENT_PART] != NULL) {
*p++ = '#';
FetchPart (&p, &url, FRAGMENT_PART);
}
*p = '\0';
*can = buf;
return TRUE;
}
/*
* Compute a hash value from a URL and store it in the location pointed
* to by "hv". All equivalent URLs will hash to the same value,
* but two non-equal URLs may also have the same hash value. However,
* the probability of a collision is small.
* The hash function is simply an XOR of the URL string divided
* into four-byte blocks.
* Returns FALSE on error, TRUE otherwise.
*/
BOOL
b_HashURL (const BYTE *bs, UINT32 *hv)
{
BYTE *cb;
UINT16 len;
UINT32 n = 0;
if (!b_MakeCanonical (bs, &cb))
return FALSE;
len = B_STRINGLENGTH (cb);
if (len > 0) {
INT16 i = 0;
/* The following code uses "Duff's Device", i.e., a partially
* unwound loop and with a switch statement to jump into the middle
* of the loop! */
switch (len & 0x3) {
case 0:
while (i < len) {
n ^= cb[i++];
case 3:
n ^= cb[i++] << 8;
case 2:
n ^= cb[i++] << 16;
case 1:
n ^= cb[i++] << 24;
}
}
}
DEALLOC (&cb);
*hv = n;
return TRUE;
}
/*
* Take a string representation of a URL and parse it into its
* components, and store these as fields in the given URL struct.
* All components are stored in their original (possibly escaped) form.
* Returns TRUE if the URL was valid, FALSE otherwise. In the latter
* case, nothing is stored in the URL struct.
*/
BOOL
URL_FromWideString (const WCHAR *ws, URL *url)
{
BOOL bReturn = FALSE;
BOOL bOverflowDetected = FALSE;
BYTE* pbStr = NULL;
if (ws != NULL) {
pbStr = wip_wchar2byte (ws, &bOverflowDetected);
if (pbStr != NULL && !bOverflowDetected) {
bReturn = URL_FromByteString (pbStr, url);
}
else if (!bReturn)
DEALLOC (&pbStr);
}
return bReturn;
}
BOOL
URL_FromByteString (const BYTE *bs, URL *url)
{
return URL_Parse ((BYTE*)bs, url);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -