📄 url.c
字号:
BYTE b = (BYTE)(*p & 0xff);
if (*p > 0xff || wae_isspecial (b) || (b > 0x7f))
r++;
else
l++;
}
if ((s = NEWARRAY (WCHAR, l + 3 * r + 1)) == NULL)
return NULL;
for (p = pchString, q = s; *p; p++) {
BYTE b = (BYTE)(*p & 0xff);
if (*p > 0xff){
*q++ = '%';
*q++ = '2';
*q++ = '0';
}
else if (wae_isspecial (b) || (b > 0x7f)) {
*q++ = '%';
ByteToHex (b, tmp);
*q++ = (WCHAR)tmp[0];
*q++ = (WCHAR)tmp[1];
}
else
*q++ = *p;
}
*q = '\0';
return s;
}
/*
* Return a copy of 'pchString' where each hexadecimal escape sequence
* of the form "%xy" has been replaced with the character it represents.
* Returns NULL if the original string contains non-ASCII-characters.
* NOTE: it is the callers responsibility to deallocate the returned string.
*/
WCHAR *
w_WMLS_UnescapeString (const WCHAR* pchString)
{
const WCHAR *p;
WCHAR *q, *s;
BYTE b;
BYTE tmp[2];
if (pchString == NULL)
return NULL;
if ((s = NEWARRAY (WCHAR, STRINGLENGTH (pchString) + 1)) == NULL)
return NULL;
for (p = pchString, q = s; *p != 0;) {
if (*p > 0x7f) {
DEALLOC (&s);
return NULL;
}
if (*p == '%') {
if ((*(p + 1) > 0x7f) || (*(p + 2) > 0x7f)) {
DEALLOC (&s);
return NULL;
}
tmp[0] = (BYTE)*(p + 1);
tmp[1] = (BYTE)*(p + 2);
if (HexToByte (tmp, &b)) {
*q++ = (WCHAR)b;
p += 3;
continue;
}
}
*q++ = *p++;
}
*q = '\0';
return s;
}
/*
* Return a copy of 'pchString' where each hexadecimal escape sequence
* of the form "%xy" has been replaced with the character it represents.
* Returns NULL in case of error.
* NOTE: it is the callers responsibility to deallocate the returned string.
*/
WCHAR*
w_UnescapeString (const WCHAR* pchString)
{
const WCHAR *p;
WCHAR *q, *s;
BYTE b;
BYTE tmp[2];
if (pchString == NULL)
return NULL;
if ((s = NEWARRAY (WCHAR, STRINGLENGTH (pchString) + 1)) == NULL)
return NULL;
for (p = pchString, q = s; *p != 0;) {
if (*p == (WCHAR)'%') {
if ((*(p + 1) < 0x80) && (*(p + 2) < 0x80)) {
tmp[0] = (BYTE)*(p + 1);
tmp[1] = (BYTE)*(p + 2);
if (HexToByte (tmp, &b)) {
*q++ = (WCHAR)b;
p += 3;
continue;
}
}
}
*q++ = *p++;
}
*q = '\0';
return s;
}
/*
* Return a copy of 'pbString' where each hexadecimal escape sequence
* of the form "%xy" has been replaced with the character it represents.
* Returns NULL in case of error.
* NOTE: it is the callers responsibility to deallocate the returned string.
*/
BYTE*
b_UnescapeString (const BYTE* pbString)
{
BYTE *s = NULL;
if ((pbString != NULL) &&
((s = NEWARRAY (BYTE, B_STRINGLENGTH (pbString) + 1)) != NULL)) {
UnescapeString (s, pbString);
}
return s;
}
/*
* Copy the string "src" to the string "dst", while replacing
* all escape sequences with the characters they represent.
* Works correctly even if called with the same argument for src and dst.
*/
VOID
UnescapeString (BYTE *dst, const BYTE *src)
{
BYTE b;
if ((src == NULL) || (dst == NULL))
return;
while (*src) {
if ((*src == '%') && HexToByte (src + 1, &b)) {
*dst++ = b;
src += 3;
}
else
*dst++ = *src++;
}
*dst = '\0';
}
/**********************************************************************
* Routines for access control.
**********************************************************************/
/*
* Check that the string 's1' is a componentwise suffix of string 's2'.
* That is, if s1 = "def.com" and s2 = "abc.def.com", then return TRUE,
* but if s1 = "c.def.com" return FALSE.
*/
static BOOL
DomainIsSuffix (BYTE *s1, UINT16 len1, BYTE *s2, UINT16 len2)
{
INT16 i1, i2, k1, k2;
UINT16 l1, l2;
if (len1 == 0)
return TRUE;
if (len2 == 0)
return FALSE;
for (k1 = len1, k2 = len2; (k1 > 0) && (k2 > 0);) {
/* Search backwards for next '.' */
for (i1 = k1 - 1; (i1 >= 0) && (s1[i1] != '.'); i1--);
for (i2 = k2 - 1; (i2 >= 0) && (s2[i2] != '.'); i2--);
l1 = k1 - i1 - 1;
l2 = k2 - i2 - 1;
if (!CompareStrings (&s1[i1 + 1], l1, &s2[i2 + 1], l2, TRUE))
return FALSE;
k1 = i1;
k2 = i2;
}
return (k1 <= 0);
}
/*
* Check that the string 'a' is a componentwise prefix of string 'b'.
* That is, if a = "/abc/def" and b = "/abc/def/jkl", then return TRUE,
* but if a = "/abc/def/j" return FALSE.
*/
static BOOL
PathIsPrefix (BYTE *s1, UINT16 len1, BYTE *s2, UINT16 len2)
{
INT16 i1, i2, k1, k2;
UINT16 l1, l2;
/* Handle special cases first: */
if (len1 == 0)
return TRUE;
if (len2 == 0)
return FALSE;
if ((s1[0] != '/') || (s2[0] != '/'))
return FALSE;
/* General case: */
for (k1 = 0, k2 = 0; (k1 < len1) && (k2 < len2);) {
/* Search forward for next '/' */
for (i1 = k1 + 1; (i1 < len1) && (s1[i1] != '/'); i1++);
for (i2 = k2 + 1; (i2 < len2) && (s2[i2] != '/'); i2++);
l1 = i1 - k1 - 1;
l2 = i2 - k2 - 1;
if ((l1 > 0) && !CompareStrings (&s1[k1 + 1], l1, &s2[k2 + 1], l2, FALSE))
return FALSE;
k1 = i1;
k2 = i2;
}
return (k1 >= len1);
}
/*
* Check if the given absolute URL has access rights,
* given 'domain' and 'path' as access control attributes.
*
* Assumes that all three strings are zero-terminated BYTE strings,
* and that 'abs_url' is a valid absolute URL.
* Returns FALSE if either 'abs_url' or 'domain' is NULL.
* If 'path' is NULL, the routine works as if it were the path "/".
*
* The check is performed by verifying that the domain attribute
* is a component-wise suffix of the domain part of the absolute URL,
* and that the path attribute is a component-wise prefix of
* the path part of the absolute URL. Comparisons of the paths
* are case-sensitive, but comparisons of the domain components are not.
* Handles escape sequences ("%xy") correctly.
*/
BOOL
URL_CheckAccess (BYTE *abs_url, BYTE *domain, BYTE *path)
{
URL url;
if ((abs_url == NULL) || (domain == NULL))
return FALSE;
if (path == NULL)
path = (BYTE *)"/";
if (!URL_FromByteString (abs_url, &url))
return FALSE;
return
(url.s[HOST_PART] != NULL) &&
(url.len[PATH_PART] > 0) &&
DomainIsSuffix (domain, B_STRINGLENGTH (domain),
url.s[HOST_PART], url.len[HOST_PART]) &&
PathIsPrefix (path, B_STRINGLENGTH (path), url.s[PATH_PART],
url.len[PATH_PART]);
}
/*
* Return a copy of 'pbString' where each blank character
* has been replaced by a hexadecimal esacape sequence of the form "%xy".
* Returns NULL in case of error.
* NOTE: it is the callers responsibility to deallocate the returned string.
*/
BYTE*
b_EscapeBlanks (const BYTE* pbString)
{
const BYTE *p;
BYTE *q, *s;
UINT16 l = 0;
UINT16 r = 0;
if (pbString == NULL)
return NULL;
for (p = pbString; *p; p++) {
if (wae_isblank (*p))
r++;
else
l++;
}
if ((s = NEWARRAY (BYTE, l + 3 * r + 1)) == NULL)
return NULL;
for (p = pbString, q = s; *p; p++) {
if (wae_isblank (*p)) {
*q++ = '%';
ByteToHex (*p, q);
q += 2;
}
else
*q++ = *p;
}
*q = '\0';
return s;
}
/*
* Take an incomplete URL, like "www.abc.com", and turn it into
* a correct absolute URL using heuristic methods. This is not a
* well-defined process, rather it makes a guess as to what the user means.
* In the example above, the result would be "http://www.abc.com/".
* In case of failure, NULL is returned.
* NOTE: it is the caller's responsibility to deallocate the returned string.
*/
BYTE *
b_CompleteURLHeuristically (BYTE *bs)
{
URL url, new_url;
BYTE *p, *ns;
if (bs == NULL)
return NULL;
bs = b_EscapeBlanks (bs);
if (!URL_FromByteString (bs, &url)) {
DEALLOC (&bs);
return NULL;
}
new_url = url;
/* The original URL has a scheme different from http(s); we simply
* return a copy of the input string. */
if ((url.s[SCHEME_PART] != NULL) &&
(url.scheme_type != Scheme_http) &&
(url.scheme_type != Scheme_https)) {
return bs;
}
/* The original URL does not have a scheme; we attach 'http:' */
if (url.s[SCHEME_PART] == NULL) {
new_url.s[SCHEME_PART] = (BYTE *)"http";
new_url.len[SCHEME_PART] = 4;
new_url.scheme_type = Scheme_http;
}
/* The original URL does not have an authority part (starting with '//');
* we promote the initial segment of the path, up to (but not including)
* the first '/'-character, or the whole path in case it has no such
* delimiter. If the path is an absolute path (starting with '/'),
* we cannot guess what the host should have been. */
if (url.s[AUTHORITY_PART] == NULL) {
if (url.len[PATH_PART] == 0) {
DEALLOC (&bs);
return NULL;
}
p = strchr (url.s[PATH_PART], '/');
if (p == NULL) {
new_url.s[AUTHORITY_PART] = new_url.s[HOST_PART] = url.s[PATH_PART];
new_url.len[AUTHORITY_PART] = new_url.len[HOST_PART]
= url.len[PATH_PART];
new_url.len[PATH_PART] = 0;
}
else if (p != url.s[PATH_PART]) {
new_url.s[AUTHORITY_PART] = new_url.s[HOST_PART] = url.s[PATH_PART];
new_url.len[AUTHORITY_PART] = new_url.len[HOST_PART]
= (UINT16)(p - url.s[PATH_PART]);
new_url.s[PATH_PART] = p;
new_url.len[PATH_PART] = (UINT16)(url.len[PATH_PART] -
new_url.len[HOST_PART]);
}
else {
/* The URL is simply an absolute path, we cannot deduce the host! */
DEALLOC (&bs);
return NULL;
}
}
/* The URL (original or modified as above), which is now a http(s) URL,
* has no path part; we attach '/', the root path. */
if (new_url.len[PATH_PART] == 0) {
new_url.s[PATH_PART] = (BYTE *)"/";
new_url.len[PATH_PART] = 1;
}
/* Allocate a new buffer and copy all the parts to it. */
ns = AssembleURLString (&new_url);
DEALLOC (&bs);
return ns;
}
/*
* Given two URLs, compute the minimum relative path, i.e., the shortest
* relative path that together with 'bs1' can be resolved to yield
* 'bs2'. If 'includeFragment' is TRUE any fragment part in 'bs2'
* is included in the result, otherwise it is dropped.
* Returns NULL in case of error.
* NOTE: it is the caller's responsibility to deallocate the returned string.
*/
BYTE *
b_MinRelativePath (BYTE *bs1, BYTE *bs2, BOOL includeFragment)
{
URL url1, url2;
BYTE *s1, *s2, *buf, *p, *res;
UINT16 len, len1, len2, l1, l2, n;
INT16 i, i1, i2, k1, k2;
if (!URL_FromByteString (bs1, &url1) ||
!URL_FromByteString (bs2, &url2))
return NULL;
if (!URL_Equal (&url1, &url2, SCHEME_COMP | HOST_COMP | PORT_COMP)) {
return URL_ToByteString (&url2);
}
s1 = url1.s[PATH_PART];
s2 = url2.s[PATH_PART];
if ((s1 == NULL) || (s2 == NULL))
return NULL;
len1 = url1.len[PATH_PART];
len2 = url2.len[PATH_PART];
if ((len1 == 0) || (len2 == 0))
return NULL;
if ((s1[0] != '/') || (s2[0] != '/'))
return NULL;
/* Drop everything after the last '/' in path 1. */
for (i1 = len1 - 1; i1 > 0; i1--) {
if (s1[i1] == '/')
break;
}
len1 = i1 + 1;
/* Now, path 1 ends with a '/'. Search past the initial, equal,
* path segments in the two paths. A path segment starts right
* after a '/' and ends on the next '/'. */
for (k1 = 1, k2 = 1; (k1 < len1) && (k2 < len2);) {
/* Here, k1 is the index following the last seen '/',
* and i1 is moved up to the next '/', or the end of the string. */
for (i1 = k1; (i1 < len1) && (s1[i1] != '/'); i1++);
for (i2 = k2; (i2 < len2) && (s2[i2] != '/'); i2++);
l1 = i1 - k1;
l2 = i2 - k2;
if (!((l1 == l2) &&
CompareStrings (&s1[k1], l1, &s2[k2], l2, FALSE))) {
break;
}
k1 = i1 + 1;
k2 = i2 + 1;
}
/* Count how many more '/' there are in path 1. */
n = 0;
for (i1 = k1; i1 < len1; i1++) {
if (s1[i1] == '/')
n++;
}
/* The path we create will consist of what ever part was left
* in path 2 (i.e., that did not match a corresponding part in
* path 1), plus n "../" segments. Note, that we might end up
* with a string of length 0 here.
* Special case: if the paths differ already in the first
* segment, then just use the absolute path from url2. */
if (k1 == 1) {
len = len2;
if ((buf = NEWARRAY (BYTE, len + 1)) == NULL) {
return NULL;
}
p = buf;
B_COPYSTRINGN (p, s2, len2);
p += len2;
}
else {
len = n * 3 + len2 - k2;
if ((buf = NEWARRAY (BYTE, len + 1)) == NULL) {
return NULL;
}
p = buf;
for (i = 0; i < n; i++) {
B_COPYSTRINGN (p, "../", 3);
p += 3;
}
if (len2 > k2) {
B_COPYSTRINGN (p, &s2[k2], len2 - k2);
p += len2 - k2;
}
}
*p = '\0';
url2.s[PATH_PART] = buf;
url2.len[PATH_PART] = len;
url2.s[AUTHORITY_PART] = url2.s[SCHEME_PART]
= url2.s[HOST_PART] = url2.s[PORT_PART] = NULL;
url2.len[AUTHORITY_PART] = url2.len[SCHEME_PART]
= url2.len[HOST_PART] = url2.len[PORT_PART] = 0;
url2.scheme_type = Scheme_empty;
if (!includeFragment) {
url2.s[FRAGMENT_PART] = NULL;
url2.len[FRAGMENT_PART] = 0;
}
res = AssembleURLString (&url2);
DEALLOC (&buf);
return res;
}
/*
* Check that a URL is valid and has at least a scheme, host, and path
* component.
*/
BOOL
b_UrlIsCompleteValid (const BYTE* pbUrl)
{
URL url;
return
URL_Parse ((BYTE *)pbUrl, &url) &&
(url.s[SCHEME_PART] != NULL) &&
(url.s[HOST_PART] != NULL) &&
(url.len[PATH_PART] > 0);
}
/*
* Return a URL that is a copy of 'old_url', but with 'new_query'
* appended to the query part. If 'old_url' already has a query part,
* that old part will be separated from the new by a '&'-character.
* Returns NULL in case of error.
* NOTE: it is the responsibility of the caller to deallocate
* the returned string.
*/
BYTE *
b_AppendToQuery (BYTE *old_url, BYTE *new_query)
{
URL url;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -