📄 url.c
字号:
/*
* Copyright (C) Ericsson Mobile Communications AB, 2000.
* Licensed to AU-System AB.
* All rights reserved.
*
* This software is covered by the license agreement between
* the end user and AU-System AB, and may be used and copied
* only in accordance with the terms of the said agreement.
*
* Neither Ericsson Mobile Communications AB nor AU-System AB
* assumes any responsibility or liability for any errors or inaccuracies in
* this software, or any consequential, incidental or indirect damage arising
* out of the use of the Generic WAP Client software.
*/
/*
* URL.c
*
* Library of routines for handling URLs.
*
* Created by Anders Edenbrandt, Mon Mar 29 08:21:14 1999.
*
* Revision history:
* 990818, AED: Complete rewrite of practically everything.
* 991123, AED: added new function, b_IsPrefix.
* 991123, AED: parsing all URIs as generic
* 000519, AED: escape blanks in b_CompleteURLHeuristically.
* 001017, IPN: the function b_EscapeBlanks is now also accessable externt from this file.
* 010516, IPN: added new function, w_wmlVariableEscape.
*
*/
#include "cmmnrsrc.h"
#include "wmldef.h"
#include "waectype.h"
#include "url.h"
/****************************
* Private utility routines:
****************************/
/*
* Return a copy of a given string.
* NOTE: it is the caller's responsibility to deallocated the returned
* string.
*/
static BYTE *
newstring (const BYTE *s, UINT16 len)
{
BYTE *pnew = NULL;
if ((s != NULL) && ((pnew = NEWARRAY (BYTE, len + 1)) != NULL)) {
B_COPYSTRINGN (pnew, s, len);
pnew[len] = '\0';
}
return pnew;
}
/*
* Return the value of the unsigned decimal integer stored
* in the first "len" bytes of the string "bs".
*/
static UINT16
GetNum (const BYTE *bs, UINT16 len)
{
UINT16 n = 0;
INT16 i;
for (i = 0; i < len; i++)
n = n * 10 + (bs[i] - '0');
return n;
}
/*
* String comparison routine that returns TRUE if the strings are equal.
* If 'noCase' is TRUE, the case of letters is insignificant.
* Hex escape sequences of the form '%XY' are handled correctly,
* that is, such a sequence is compared as if the character had first
* been unescaped, unless it is a character in the "reserved" set.
* The routine accepts NULL pointers as input, in which case the
* result is TRUE if both string pointers are NULL.
*/
static BOOL
CompareStrings (BYTE *bs1, UINT16 len1, BYTE *bs2, UINT16 len2, BOOL noCase)
{
BYTE b1, b2, tmp;
BOOL reservedEscape1, reservedEscape2;
if (bs1 == NULL)
return bs2 == NULL;
else if (bs2 == NULL)
return FALSE;
for (;(len1 > 0) && (len2 > 0); len1--, len2--) {
reservedEscape1 = reservedEscape2 = FALSE;
b1 = *bs1++;
if ((b1 == '%') && (len1 >= 2) && HexToByte (bs1, &tmp)) {
bs1 += 2;
len1 -= 2;
b1 = tmp;
if (wae_isreserved (tmp) || (tmp == '#')) {
reservedEscape1 = TRUE;
}
}
b2 = *bs2++;
if ((b2 == '%') && (len2 >= 2) && HexToByte (bs2, &tmp)) {
bs2 += 2;
len2 -= 2;
b2 = tmp;
if (wae_isreserved (tmp) || (tmp == '#')) {
reservedEscape2 = TRUE;
}
}
if (noCase) {
b1 = DowncaseByte (b1);
b2 = DowncaseByte (b2);
}
if ((b1 != b2) || (reservedEscape1 ^ reservedEscape2))
return FALSE;
}
return (len1 == len2);
}
/************************************************************
* Public routines:
************************************************************/
static struct {
const Scheme sch;
const BYTE *str;
const UINT8 slen;
} schemes[] = {
{Scheme_empty, (const BYTE *)"", 0},
{Scheme_http, (const BYTE *)"http", 4},
{Scheme_https, (const BYTE *)"https", 5},
{Scheme_file, (const BYTE *)"file", 4},
{Scheme_wtai, (const BYTE *)"wtai", 4},
{Scheme_about, (const BYTE *)"about", 5},
{Scheme_function, (const BYTE *)"function", 8},
{Scheme_wapdevice, (const BYTE *)"wapdevice", 9},
{Scheme_unknown, (const BYTE *)"unknown", 7}
};
#define LAST_SCHEME 8
/*
* Return the scheme type named by the given string. Returns Scheme_unknown
* if it is not one of the predefined types.
*/
static Scheme
GetSchemeType (BYTE *bs, UINT16 len)
{
INT16 i;
if (bs == NULL)
return Scheme_empty;
for (i = 0; i < LAST_SCHEME; i++) {
if (CompareStrings (bs, len,
(BYTE*)schemes[i].str, schemes[i].slen, TRUE)) {
return schemes[i].sch;
}
}
return Scheme_unknown;
}
/*
* Return the scheme type named by the given string. Returns Scheme_unknown
* if its not one of the predefined types.
*/
Scheme
Scheme_FromString (BYTE *sch)
{
return GetSchemeType (sch, B_STRINGLENGTH (sch));
}
/*
* Return a string representation of the Scheme value.
* NOTE: the caller must NOT modify or deallocate the returned string!
*/
const BYTE *
Scheme_ToString (Scheme scheme)
{
return schemes[(scheme < LAST_SCHEME) ? scheme : LAST_SCHEME].str;
}
/*
* Sets all fields in the URL struct to NULL and 0, respectively.
*/
void
URL_Clear (URL *url)
{
UINT16 i;
if (url == NULL)
return;
url->scheme_type = Scheme_empty;
for (i = 0; i < NUM_URL_PARTS; i++) {
url->s[i] = NULL;
url->len[i] = 0;
}
}
/************************************************************
* The parsing of a URL.
* First comes a number of private functions and macros,
* and then the main routine, URL_Parse.
************************************************************/
/*
* Check that the string of length 'len' is a non-empty decimal number.
*/
static BOOL
IsNumber (const BYTE *bs, INT16 len)
{
INT16 i;
if (len <= 0)
return FALSE;
for (i = 0; i < len; i++, bs++)
if (!wae_isdigit (*bs))
return FALSE;
return TRUE;
}
static BOOL
CheckTopLabel (const BYTE *bs, UINT16 len)
{
INT16 i;
if (len == 0)
return FALSE;
if (!wae_isalpha (bs[0]))
return FALSE;
for (i = 1; i < len - 1; i++) {
if (!wae_isalphanum (bs[i]) && (bs[i] != '-'))
return FALSE;
}
return wae_isalphanum (bs[len - 1]);
}
static BOOL
CheckDomainLabel (const BYTE *bs, UINT16 len)
{
INT16 i;
if (len == 0)
return FALSE;
if (!wae_isalphanum (bs[0]))
return FALSE;
for (i = 1; i < len - 1; i++) {
if (!wae_isalphanum (bs[i]) && (bs[i] != '-'))
return FALSE;
}
return wae_isalphanum (bs[len - 1]);
}
static BOOL
CheckHostName (const BYTE *bs, UINT16 len)
{
INT16 i, k;
for (k = 0; k < len;) {
for (i = k; (i < len) && (bs[i] != '.'); i++);
if (i >= len)
return CheckTopLabel (&bs[k], (UINT16)(i - k));
if (!CheckDomainLabel (&bs[k], (UINT16)(i - k)))
return FALSE;
k = i + 1;
}
return TRUE;
}
static BOOL
CheckIPv4address (const BYTE *bs, UINT16 len)
{
INT16 i, k;
UINT16 n = 0;
for (k = 0; k < len;) {
for (i = k; (i < len) && (bs[i] != '.'); i++);
if (i + 1 == len)
return FALSE;
if (!IsNumber (&bs[k], (INT16)(i - k)))
return FALSE;
n++;
k = i + 1;
}
return (n == 4);
}
/*
* Check that the string of length 'len' is a legal host name.
* A host part is eiher a hostname or an IPv4 address.
* In both cases, it is a string separated into parts by '.'
*/
BOOL
ValidateHost (const BYTE *bs, UINT16 len)
{
INT16 i;
if ((len == 0) || (bs[0] == '.'))
return FALSE;
/* Find right-most '.' */
for (i = len - 1; (i > 0) && (bs[i] != '.'); i--);
if (i == 0) {
/* Only one component, must be a top-level domain. */
return CheckTopLabel (bs, len);
}
else if (i == len - 1) {
/* A final '.' can be ignored in a hostname. */
return CheckHostName (bs, (UINT16)(len - 1));
}
else if (wae_isdigit (bs[i + 1])) {
/* If the final component starts with a digit, it must be an IP-address */
return CheckIPv4address (bs, len);
}
else {
return CheckHostName (bs, len);
}
}
/*
* Parsing routines.
* All parsing routines have the same function prototype.
* The parameter "pbs" is a pointer to a pointer indicating
* a position in a string where the parsing should start.
* Upon success, this parameter is updated to point beyond
* the segment of the string consumed by our parsing.
* The second parameter, "url", is a pointer to a URL struct
* where information about discovered components is stored,
* if a component of the specified type is found.
* In case of error, a parsing routine returns FALSE and does not update
* the "pbs" parameter.
*/
static BOOL
ParseScheme (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p = start;
UINT16 len;
url->s[SCHEME_PART] = NULL;
url->len[SCHEME_PART] = 0;
for (p = start; *p; p++) {
BYTE b = *p;
if ((b == ':') || (b == '/') || (b == '?') || (b == '#')) {
break;
}
}
len = (UINT16)(p - start);
if ((len > 0) && (*p == ':')) {
BYTE *q;
if (!wae_isalpha (*start))
return FALSE;
for (q = start + 1; q < p; q++) {
BYTE b = *q;
if (!wae_isalphanum (b) && (b != '+') && (b != '-') && (b != '.')) {
return FALSE;
}
}
url->s[SCHEME_PART] = start;
url->len[SCHEME_PART] = len;
url->scheme_type = GetSchemeType (start, len);
*pbs = p + 1;
}
else {
url->scheme_type = Scheme_empty;
}
return TRUE;
}
static BOOL
ParseUserinfo (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p, *q;
url->s[USERINFO_PART] = NULL;
url->len[USERINFO_PART] = 0;
for (p = start; *p; p++) {
BYTE b = *p;
if (b == '@') {
break;
}
if ((b == '/') || (b == '?') || (b == '#')) {
return TRUE;
}
}
if (!*p) {
return TRUE;
}
for (q = start; q < p; q++) {
BYTE b = *q;
if (!wae_ispchar (b) && (b != ';')) {
return FALSE;
}
}
url->s[USERINFO_PART] = start;
url->len[USERINFO_PART] = (UINT16)(p - start);
*pbs = p + 1;
return TRUE;
}
static BOOL
ParseHost (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p;
UINT16 len;
url->s[HOST_PART] = NULL;
url->len[HOST_PART] = 0;
for (p = start; *p; p++) {
BYTE b = *p;
if ((b == ':') || (b == '/') || (b == '?') || (b == '#')) {
break;
}
}
len = (UINT16)(p - start);
if (len == 0) {
return TRUE;
}
if (!ValidateHost (start, len)) {
return FALSE;
}
url->s[HOST_PART] = start;
url->len[HOST_PART] = len;
*pbs = p;
return TRUE;
}
static BOOL
ParsePort (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p;
url->s[PORT_PART] = NULL;
url->len[PORT_PART] = 0;
if (*start != ':') {
return TRUE;
}
start++;
for (p = start; *p; p++) {
BYTE b = *p;
if ((b == '/') || (b == '?') || (b == '#')) {
break;
}
if (!wae_isdigit (b)) {
return FALSE;
}
}
url->s[PORT_PART] = start;
url->len[PORT_PART] = (UINT16)(p - start);
*pbs = p;
return TRUE;
}
static BOOL
ParseHostPort (BYTE **pbs, URL *url)
{
BYTE *p = *pbs;
if (!ParseHost (&p, url))
return FALSE;
if (url->len[HOST_PART] > 0) {
if (!ParsePort (&p, url))
return FALSE;
}
*pbs = p;
return TRUE;
}
static BOOL
ParseAuthority (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p = start;
if (!ParseUserinfo (&p, url) ||
!ParseHostPort (&p, url))
return FALSE;
url->s[AUTHORITY_PART] = start;
url->len[AUTHORITY_PART] = (UINT16)(p - start);
*pbs = p;
return TRUE;
}
static BOOL
ParseQuery (BYTE **pbs, URL *url)
{
BYTE *start = *pbs;
BYTE *p;
url->s[QUERY_PART] = NULL;
url->len[QUERY_PART] = 0;
if (*start != '?') {
return TRUE;
}
start++;
for (p = start; *p; p++) {
BYTE b = *p;
if (b == '#') {
break;
}
if (!wae_isuric (b)) {
if ((b == '%') && HexToByte (p + 1, &b)) {
p += 2;
}
else
return FALSE;
}
}
url->s[QUERY_PART] = start;
url->len[QUERY_PART] = (UINT16)(p - start);
*pbs = p;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -