📄 url.c
字号:
/*
* Given a URL struct, construct and return a string representation
* of the URL.
* Returns NULL in case of error.
* NOTE: It is the callers responsibility to deallocate the returned string.
*/
WCHAR *
URL_ToWideString (URL *url)
{
WCHAR *ws = NULL;
BYTE *tmp = URL_ToByteString (url);
if (tmp == NULL) {
return NULL;
}
ws = wip_byte2wchar (tmp);
DEALLOC (&tmp);
return ws;
}
BYTE *
URL_ToByteString (URL *url)
{
if (url == NULL) {
return NULL;
}
return AssembleURLString (url);
}
#define IS_DOTDOTSEGMENT(x) ((x + 2 < n) && (buf[(x)] == '.') &&\
(buf[(x + 1)] == '.') && (buf[(x + 2)] == '/'))
#define NEXTSEGMENT(x) if (buf[x] == '/') { do x++; while (buf[x] == '/');}\
else { while ((x < n) && (buf[x] != '/')) x++; if (x < n) x++;}
#define PREVSEGMENT(x) if (x > 1) {\
x -= 2;\
while ((x > 0) && (buf[x] != '/')) x--;\
x++;}\
else {\
x = 0;\
}
/*
* Given two path names, an absolute path (starting with '/') in 'base',
* and a relative path in 'rel', combine them into a new absolute path
* removing all unnecessary './' and '../' segments.
* Returns a newly allocated string with the new path.
*/
static BYTE *
RemoveDots (BYTE *base, UINT16 base_len, BYTE *rel, UINT16 rel_len)
{
UINT16 len;
BYTE *buf;
INT16 i, j, k, m, n;
/* Find the right-most '/' character in the base path. */
for (i = base_len - 1; i >= 0; i--) {
if (base[i] == '/') {
break;
}
}
len = i + 1;
if ((buf = NEWARRAY (BYTE, len + rel_len + 1)) == NULL)
return NULL;
/* Append the relative path to the base path. */
B_COPYSTRINGN (buf, base, len);
B_COPYSTRINGN (buf + len, rel, rel_len);
n = len + rel_len;
buf[n] = '\0';
/* Remove all occurrences of './' */
for (i = j = 0; j < n; ) {
if (((j == 0) || (buf[j - 1] == '/')) && (buf[j] == '.') &&
(j < n - 1) && (buf[j + 1] == '/')) {
j += 2;
continue;
}
else if (i != j) {
buf[i] = buf[j];
}
i++, j++;
}
n = i;
/* If the string ends with '.', remove it. */
if (((n == 1) || (n >= 2) && (buf[n - 2] == '/')) && (buf[n - 1] == '.')) {
n--;
}
buf[n] = '\0';
/* Remove all occurrences of '<segment>/../'. */
i = 0;
NEXTSEGMENT (i);
j = i;
NEXTSEGMENT (j);
m = n;
while (j < n) {
if ((buf[i] != '/') && !IS_DOTDOTSEGMENT (i) && IS_DOTDOTSEGMENT (j)) {
k = i;
NEXTSEGMENT (k);
m -= (k - i) + 3;
PREVSEGMENT (i);
NEXTSEGMENT (j);
}
else {
k = j;
NEXTSEGMENT (j);
NEXTSEGMENT (i);
/* Copy one segment from k to i */
if (i != k) {
B_COPYSTRINGN (&buf[i], &buf[k], j - k);
}
}
}
n = m;
/* If the string ends with '<segment>/..', then remove that part. */
if ((n > 3) && (buf[n - 3] == '/') && (buf[n - 2] == '.') &&
(buf[n - 1] == '.')) {
i = n - 2;
PREVSEGMENT (i);
if (!IS_DOTDOTSEGMENT (i)) {
n = i;
}
}
buf[n] = '\0';
return buf;
}
/*
* Given a base URL and a relative URL, resolve the relative reference
* and store as an absolute URL in the string "*abs".
* Returns TRUE on success, FALSE otherwise, in which case nothing
* is stored in "abs".
* NOTE: It is the callers responsibility to deallocate the returned string.
*/
BOOL
URL_Resolve (URL *base, URL *rel, BYTE **abs)
{
URL new_url;
if ((base == NULL) || (rel == NULL) || (abs == NULL))
return FALSE;
if (base->s[SCHEME_PART] == NULL)
/* Not a correct absolute URL. */
return FALSE;
if (base->len[PATH_PART] == 0) {
base->s[PATH_PART] = "/";
base->len[PATH_PART] = 1;
}
/* If rel has a scheme, then it is an absolute reference.
* Just copy it. */
if (rel->s[SCHEME_PART] != NULL) {
if ((*abs = AssembleURLString (rel)) == NULL)
return FALSE;
}
/* If rel has an authority part, just use the scheme from the base part. */
else if (rel->s[AUTHORITY_PART] != NULL) {
new_url = *rel;
new_url.s[SCHEME_PART] = base->s[SCHEME_PART];
new_url.len[SCHEME_PART] = base->len[SCHEME_PART];
new_url.scheme_type = base->scheme_type;
*abs = AssembleURLString (&new_url);
}
else if ((rel->len[PATH_PART] == 0) && (rel->s[QUERY_PART] == NULL)) {
/* 'rel' is empty or just a fragment */
new_url = *base;
new_url.s[FRAGMENT_PART] = rel->s[FRAGMENT_PART];
new_url.len[FRAGMENT_PART] = rel->len[FRAGMENT_PART];
*abs = AssembleURLString (&new_url);
}
else {
/* 'rel' does not have a scheme nor an authority part,
* but is more than just a fragment */
BYTE *tmp;
if (rel->len[PATH_PART] == 0)
tmp = RemoveDots (base->s[PATH_PART], base->len[PATH_PART],
(BYTE *)"", 0);
else if (*(rel->s[PATH_PART]) != '/')
tmp = RemoveDots (base->s[PATH_PART], base->len[PATH_PART],
rel->s[PATH_PART], rel->len[PATH_PART]);
else
tmp = newstring (rel->s[PATH_PART], rel->len[PATH_PART]);
if (tmp == NULL) /* Memory allocation failed */
return FALSE;
new_url = *base;
new_url.s[PATH_PART] = tmp;
new_url.len[PATH_PART] = B_STRINGLENGTH (tmp);
new_url.s[QUERY_PART] = rel->s[QUERY_PART];
new_url.len[QUERY_PART] = rel->len[QUERY_PART];
new_url.s[FRAGMENT_PART] = rel->s[FRAGMENT_PART];
new_url.len[FRAGMENT_PART] = rel->len[FRAGMENT_PART];
*abs = AssembleURLString (&new_url);
DEALLOC (&tmp);
}
return TRUE;
}
BOOL
w_Resolve (const WCHAR *base, const WCHAR *rel, WCHAR **abs)
{
BYTE *b_base, *b_rel, *b_abs;
BOOL err = FALSE;
if ((base == NULL) || (rel == NULL) || (abs == NULL))
return FALSE;
if (((b_base = wip_wchar2byte (base, &err)) == NULL) || err) {
return FALSE;
}
if (((b_rel = wip_wchar2byte (rel, &err)) == NULL) || err) {
DEALLOC (&b_base);
return FALSE;
}
if (!b_Resolve (b_base, b_rel, &b_abs)) {
DEALLOC (&b_base);
DEALLOC (&b_rel);
return FALSE;
}
if ((*abs = wip_byte2wchar (b_abs)) == NULL) {
DEALLOC (&b_base);
DEALLOC (&b_rel);
DEALLOC (&b_abs);
return FALSE;
}
DEALLOC (&b_base);
DEALLOC (&b_rel);
DEALLOC (&b_abs);
return TRUE;
}
BOOL
b_Resolve (const BYTE *base, const BYTE *rel, BYTE **abs)
{
URL base_url, rel_url;
BOOL ok = TRUE;
if ((base == NULL) || (rel == NULL) || (abs == NULL))
return FALSE;
URL_Clear (&rel_url);
if (!URL_FromByteString (base, &base_url) ||
!URL_FromByteString (rel, &rel_url) ||
!URL_Resolve (&base_url, &rel_url, abs)) {
*abs = NULL;
ok = FALSE;
}
return ok;
}
/*
* Return TRUE if the two URLs are equal, FALSE otherwise.
* "whichComponents" is a bitmap indicating which parts of the URLs
* should be included in the comparison.
* Returns FALSE in case of error.
*/
BOOL
URL_Equal (URL *url1, URL *url2, BYTE whichComponents)
{
BYTE *p1, *p2, *path1, *path2;
UINT16 p1_len, p2_len, path1_len, path2_len;
UINT16 port1, port2;
if (url1 == NULL)
return (url2 == NULL);
if (url2 == NULL)
return FALSE;
if (whichComponents & PORT_COMP) {
if ((url1->s[PORT_PART] == NULL) &&
(url1->scheme_type == Scheme_http)) {
p1 = (BYTE *)"80";
p1_len = 2;
}
else if ((url1->s[PORT_PART] == NULL) &&
(url1->scheme_type == Scheme_https)) {
p1 = (BYTE *)"443";
p1_len = 3;
}
else {
p1 = url1->s[PORT_PART];
p1_len = url1->len[PORT_PART];
}
port1 = GetNum (p1, p1_len);
if ((url2->s[PORT_PART] == NULL) &&
(url2->scheme_type == Scheme_http)) {
p2 = (BYTE *)"80";
p2_len = 2;
}
else if ((url2->s[PORT_PART] == NULL) &&
(url2->scheme_type == Scheme_https)) {
p2 = (BYTE *)"443";
p2_len = 3;
}
else {
p2 = url2->s[PORT_PART];
p2_len = url2->len[PORT_PART];
}
port2 = GetNum (p2, p2_len);
}
if (url1->len[PATH_PART] == 0) {
path1 = (BYTE *)"/";
path1_len = 1;
}
else {
path1 = url1->s[PATH_PART];
path1_len = url1->len[PATH_PART];
}
if (url2->len[PATH_PART] == 0) {
path2 = (BYTE *)"/";
path2_len = 1;
}
else {
path2 = url2->s[PATH_PART];
path2_len = url2->len[PATH_PART];
}
return
(!(whichComponents & SCHEME_COMP) ||
CompareStrings (url1->s[SCHEME_PART], url1->len[SCHEME_PART],
url2->s[SCHEME_PART], url2->len[SCHEME_PART], TRUE)) &&
(!(whichComponents & USERINFO_COMP) ||
CompareStrings (url1->s[USERINFO_PART], url1->len[USERINFO_PART],
url2->s[USERINFO_PART], url2->len[USERINFO_PART],
TRUE)) &&
(!(whichComponents & HOST_COMP) ||
CompareStrings (url1->s[HOST_PART], url1->len[HOST_PART],
url2->s[HOST_PART], url2->len[HOST_PART], TRUE)) &&
(!(whichComponents & PORT_COMP) || (port1 == port2)) &&
(!(whichComponents & PATH_COMP) ||
CompareStrings (path1, path1_len, path2, path2_len, FALSE)) &&
(!(whichComponents & QUERY_COMP) ||
CompareStrings (url1->s[QUERY_PART], url1->len[QUERY_PART],
url2->s[QUERY_PART], url2->len[QUERY_PART], FALSE)) &&
(!(whichComponents & FRAG_COMP) ||
CompareStrings (url1->s[FRAGMENT_PART], url1->len[FRAGMENT_PART],
url2->s[FRAGMENT_PART], url2->len[FRAGMENT_PART], FALSE));
}
BOOL
w_EqualURL (const WCHAR *url1, const WCHAR *url2, BYTE whichComponents)
{
BYTE *b_url1, *b_url2;
BOOL err = FALSE;
BOOL res;
if (url1 == NULL)
return (url2 == NULL);
if (url2 == NULL)
return FALSE;
if (((b_url1 = wip_wchar2byte (url1, &err)) == NULL) || err) {
return FALSE;
}
if (((b_url2 = wip_wchar2byte (url2, &err)) == NULL) || err) {
DEALLOC (&b_url1);
return FALSE;
}
res = b_EqualURL (b_url1, b_url2, whichComponents);
DEALLOC (&b_url1);
DEALLOC (&b_url2);
return res;
}
BOOL
b_EqualURL (const BYTE *bs1, const BYTE *bs2, BYTE whichComponents)
{
URL url1, url2;
if (bs1 == NULL)
return (bs2 == NULL);
if (bs2 == NULL)
return FALSE;
return URL_FromByteString (bs1, &url1) &&
URL_FromByteString (bs2, &url2) &&
URL_Equal (&url1, &url2, whichComponents);
}
/*
* Return TRUE if the given string URL has a valid format, FALSE otherwise.
*/
BOOL
w_IsValid (const WCHAR* pchUrl)
{
BOOL bReturn = FALSE;
BOOL bOverflowDetected = FALSE;
BYTE* pbStr = NULL;
if (pchUrl != NULL) {
pbStr = wip_wchar2byte (pchUrl, &bOverflowDetected);
if (pbStr != NULL && !bOverflowDetected) {
bReturn = b_IsValid (pbStr);
}
DEALLOC (&pbStr);
}
return bReturn;
}
BOOL
b_IsValid (const BYTE* pbUrl)
{
URL url;
return URL_Parse ((BYTE*)pbUrl, &url);
}
/*
* Retrieval of the different parts of a URL.
*/
/*
* Return the Scheme of the URL.
* Returns NULL in case of error, or if the URL does not have a scheme part.
* NOTE: it is the responsibility of the caller to deallocate the string.
*/
BYTE*
URL_GetScheme (URL *url)
{
if (url == NULL)
return NULL;
return newstring (url->s[SCHEME_PART], url->len[SCHEME_PART]);
}
Scheme
URL_GetSchemeType (URL *url)
{
if (url == NULL)
return Scheme_empty;
return url->scheme_type;
}
static BOOL
w_GetPart (const WCHAR* pchUrl, UINT16 whichpart, WCHAR **part)
{
URL url;
BYTE *bs = NULL;
BOOL err = FALSE;
if ((pchUrl == NULL) || (part == NULL))
return FALSE;
*part = NULL;
bs = wip_wchar2byte (pchUrl, &err);
if ((bs == NULL) || err ) {
return FALSE;
}
if (!URL_Parse (bs, &url)) {
DEALLOC (&bs);
return FALSE;
}
if (url.s[whichpart] != NULL) {
UINT16 len = url.len[whichpart];
WCHAR *buf = NEWARRAY (WCHAR, len + 1);
WCHAR *p = buf;
BYTE *q = url.s[whichpart];
INT16 i;
if (buf == NULL) {
DEALLOC (&bs);
return FALSE;
}
for (i = 0; i < len; i++) {
p[i] = (WCHAR)q[i];
}
p[len] = (WCHAR)0;
*part = buf;
}
DEALLOC (&bs);
return TRUE;
}
static BOOL
b_GetPart (const BYTE* pbUrl, UINT16 whichpart, BYTE **part)
{
URL url;
if ((pbUrl == NULL) || (part == NULL))
return FALSE;
if (!URL_Parse ((BYTE*)pbUrl, &url)) {
return FALSE;
}
*part = NULL;
if ((url.s[whichpart] != NULL) &&
((*part = newstring (url.s[whichpart], url.len[whichpart])) == NULL)) {
return FALSE;
}
return TRUE;
}
/*
* Extract the scheme of a URL.
* Returns FALSE in case of error, including that the URL is not valid.
* Sets the out-parameter to NULL if the URL does not have a scheme component.
* NOTE: it is the responsibility of the caller to deallocate
* the returned string (applies to w_GetScheme and b_GetScheme).
*/
BOOL
w_GetScheme (const WCHAR* pchUrl, WCHAR **scheme)
{
return w_GetPart (pchUrl, SCHEME_PART, scheme);
}
BOOL
w_GetSchemeType (const WCHAR* pchUrl, Scheme *scheme)
{
URL url;
BYTE *bs = NULL;
BOOL err = FALSE;
BOOL ok = TRUE;
if ((pchUrl == NULL) || (scheme == NULL))
return FALSE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -