📄 url.c

📁 是一个手机功能的模拟程序
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/*
 * Copyright (C) Ericsson Mobile Communications AB, 2000.
 * Licensed to AU-System AB.
 * All rights reserved.
 *
 * This software is covered by the license agreement between
 * the end user and AU-System AB, and may be used and copied
 * only in accordance with the terms of the said agreement.
 *
 * Neither Ericsson Mobile Communications AB nor AU-System AB
 * assumes any responsibility or liability for any errors or inaccuracies in
 * this software, or any consequential, incidental or indirect damage arising
 * out of the use of the Generic WAP Client software.
 */
/*
 * URL.c
 *
 * Library of routines for handling URLs.
 *
 * Created by Anders Edenbrandt, Mon Mar 29 08:21:14 1999.
 *
 * Revision history:
 *   990818, AED: Complete rewrite of practically everything.
 *   991123, AED: added new function, b_IsPrefix.
 *   991123, AED: parsing all URIs as generic
 *   000519, AED: escape blanks in b_CompleteURLHeuristically.
 *   001017, IPN: the function b_EscapeBlanks is now also accessable externt from this file.
 *   010516, IPN: added new function, w_wmlVariableEscape.
 *
 */

#include "cmmnrsrc.h"
#include "wmldef.h"
#include "waectype.h"
#include "url.h"


/****************************
 * Private utility routines:
 ****************************/

/*
 * Return a copy of a given string.
 * NOTE: it is the caller's responsibility to deallocated the returned
 * string.
 */
static BYTE *
newstring (const BYTE *s, UINT16 len)
{
  BYTE *pnew = NULL;

  if ((s != NULL) && ((pnew = NEWARRAY (BYTE, len + 1)) != NULL)) {
    B_COPYSTRINGN (pnew, s, len);
    pnew[len] = '\0';
  }

  return pnew;
}

/*
 * Return the value of the unsigned decimal integer stored
 * in the first "len" bytes of the string "bs".
 */
static UINT16
GetNum (const BYTE *bs, UINT16 len)
{
  UINT16 n = 0;
  INT16  i;

  for (i = 0; i < len; i++)
    n = n * 10 + (bs[i] - '0');

  return n;
}

/*
 * String comparison routine that returns TRUE if the strings are equal.
 * If 'noCase' is TRUE, the case of letters is insignificant.
 * Hex escape sequences of the form '%XY' are handled correctly,
 * that is, such a sequence is compared as if the character had first
 * been unescaped, unless it is a character in the "reserved" set.
 * The routine accepts NULL pointers as input, in which case the
 * result is TRUE if both string pointers are NULL.
*/
static BOOL
CompareStrings (BYTE *bs1, UINT16 len1, BYTE *bs2, UINT16 len2, BOOL noCase)
{
  BYTE b1, b2, tmp;
  BOOL reservedEscape1, reservedEscape2;

  if (bs1 == NULL)
    return bs2 == NULL;
  else if (bs2 == NULL)
    return FALSE;

  for (;(len1 > 0) && (len2 > 0); len1--, len2--) {
    reservedEscape1 = reservedEscape2 = FALSE;

    b1 = *bs1++;
    if ((b1 == '%') && (len1 >= 2) && HexToByte (bs1, &tmp)) {
      bs1 += 2;
      len1 -= 2;
      b1 = tmp;
      if (wae_isreserved (tmp) || (tmp == '#')) {
        reservedEscape1 = TRUE;
      }
    }

    b2 = *bs2++;
    if ((b2 == '%') && (len2 >= 2) && HexToByte (bs2, &tmp)) {
      bs2 += 2;
      len2 -= 2;
      b2 = tmp;
      if (wae_isreserved (tmp) || (tmp == '#')) {
        reservedEscape2 = TRUE;
      }
    }

    if (noCase) {
      b1 = DowncaseByte (b1);
      b2 = DowncaseByte (b2);
    }

    if ((b1 != b2) || (reservedEscape1 ^ reservedEscape2))
      return FALSE;
  }

  return (len1 == len2);
}

/************************************************************
 * Public routines:
 ************************************************************/

static struct {
  const Scheme sch;
  const BYTE   *str;
  const UINT8  slen;
} schemes[] = {
  {Scheme_empty, (const BYTE *)"", 0},
  {Scheme_http, (const BYTE *)"http", 4},
  {Scheme_https, (const BYTE *)"https", 5},
  {Scheme_file, (const BYTE *)"file", 4},
  {Scheme_wtai, (const BYTE *)"wtai", 4},
  {Scheme_about, (const BYTE *)"about", 5},
  {Scheme_function, (const BYTE *)"function", 8},
  {Scheme_wapdevice, (const BYTE *)"wapdevice", 9},
  {Scheme_unknown, (const BYTE *)"unknown", 7}
};
#define LAST_SCHEME 8

/*
 * Return the scheme type named by the given string. Returns Scheme_unknown
 * if it is not one of the predefined types.
 */
static Scheme
GetSchemeType (BYTE *bs, UINT16 len)
{
  INT16 i;

  if (bs == NULL)
    return Scheme_empty;

  for (i = 0; i < LAST_SCHEME; i++) {
    if (CompareStrings (bs, len,
                        (BYTE*)schemes[i].str, schemes[i].slen, TRUE)) {
      return schemes[i].sch;
    }
  }

  return Scheme_unknown;
}

/*
 * Return the scheme type named by the given string. Returns Scheme_unknown
 * if its not one of the predefined types.
 */
Scheme
Scheme_FromString (BYTE *sch)
{
  return GetSchemeType (sch, B_STRINGLENGTH (sch));
}

/*
 * Return a string representation of the Scheme value.
 * NOTE: the caller must NOT modify or deallocate the returned string!
 */
const BYTE *
Scheme_ToString (Scheme scheme)
{
  return schemes[(scheme < LAST_SCHEME) ? scheme : LAST_SCHEME].str;
}

/*
 * Sets all fields in the URL struct to NULL and 0, respectively.
 */
void
URL_Clear (URL *url)
{
  UINT16 i;

  if (url == NULL)
    return;

  url->scheme_type = Scheme_empty;
  for (i = 0; i < NUM_URL_PARTS; i++) {
    url->s[i] = NULL;
    url->len[i] = 0;
  }
}

/************************************************************
 * The parsing of a URL.
 * First comes a number of private functions and macros,
 * and then the main routine, URL_Parse.
 ************************************************************/

/*
 * Check that the string of length 'len' is a non-empty decimal number.
 */
static BOOL
IsNumber (const BYTE *bs, INT16 len)
{
  INT16 i;

  if (len <= 0)
    return FALSE;

  for (i = 0; i < len; i++, bs++)
    if (!wae_isdigit (*bs))
      return FALSE;

  return TRUE;
}


static BOOL
CheckTopLabel (const BYTE *bs, UINT16 len)
{
  INT16 i;

  if (len == 0)
    return FALSE;

  if (!wae_isalpha (bs[0]))
    return FALSE;
  for (i = 1; i < len - 1; i++) {
    if (!wae_isalphanum (bs[i]) && (bs[i] != '-'))
      return FALSE;
  }
  return wae_isalphanum (bs[len - 1]);
}

static BOOL
CheckDomainLabel (const BYTE *bs, UINT16 len)
{
  INT16 i;

  if (len == 0)
    return FALSE;

  if (!wae_isalphanum (bs[0]))
    return FALSE;
  for (i = 1; i < len - 1; i++) {
    if (!wae_isalphanum (bs[i]) && (bs[i] != '-'))
      return FALSE;
  }
  return wae_isalphanum (bs[len - 1]);
}

static BOOL
CheckHostName (const BYTE *bs, UINT16 len)
{
  INT16 i, k;

  for (k = 0; k < len;) {
    for (i = k; (i < len) && (bs[i] != '.'); i++);
    if (i >= len)
      return CheckTopLabel (&bs[k], (UINT16)(i - k));
    if (!CheckDomainLabel (&bs[k], (UINT16)(i - k)))
      return FALSE;
    k = i + 1;
  }

  return TRUE;
}

static BOOL
CheckIPv4address (const BYTE *bs, UINT16 len)
{
  INT16 i, k;
  UINT16 n = 0;

  for (k = 0; k < len;) {
    for (i = k; (i < len) && (bs[i] != '.'); i++);
    if (i + 1 == len)
      return FALSE;
    if (!IsNumber (&bs[k], (INT16)(i - k)))
      return FALSE;
    n++;
    k = i + 1;
  }

  return (n == 4);
}

/*
 * Check that the string of length 'len' is a legal host name.
 * A host part is eiher a hostname or an IPv4 address.
 * In both cases, it is a string separated into parts by '.'
 */
BOOL
ValidateHost (const BYTE *bs, UINT16 len)
{
  INT16 i;

  if ((len == 0) || (bs[0] == '.'))
    return FALSE;

  /* Find right-most '.' */
  for (i = len - 1; (i > 0) && (bs[i] != '.'); i--);

  if (i == 0) {
    /* Only one component, must be a top-level domain. */
    return CheckTopLabel (bs, len);
  }
  else if (i == len - 1) {
    /* A final '.' can be ignored in a hostname. */
    return CheckHostName (bs, (UINT16)(len - 1));
  }
  else if (wae_isdigit (bs[i + 1])) {
    /* If the final component starts with a digit, it must be an IP-address */
    return CheckIPv4address (bs, len);
  }
  else {
    return CheckHostName (bs, len);
  }
}

/*
 * Parsing routines.
 * All parsing routines have the same function prototype.
 * The parameter "pbs" is a pointer to a pointer indicating
 * a position in a string where the parsing should start.
 * Upon success, this parameter is updated to point beyond
 * the segment of the string consumed by our parsing.
 * The second parameter, "url", is a pointer to a URL struct
 * where information about discovered components is stored,
 * if a component of the specified type is found.
 * In case of error, a parsing routine returns FALSE and does not update
 * the "pbs" parameter.
 */

static BOOL
ParseScheme (BYTE **pbs, URL *url)
{
  BYTE *start = *pbs;
  BYTE *p = start;
  UINT16 len;

  url->s[SCHEME_PART] = NULL;
  url->len[SCHEME_PART] = 0;

  for (p = start; *p; p++) { 
    BYTE b = *p;
    if ((b == ':') || (b == '/') || (b == '?') || (b == '#')) {
      break;
    }
  }

  len = (UINT16)(p - start);
  if ((len > 0) && (*p == ':')) {
    BYTE *q;

    if (!wae_isalpha (*start))
      return FALSE;

    for (q = start + 1; q < p; q++) {
      BYTE b = *q;
      if (!wae_isalphanum (b) && (b != '+') && (b != '-') && (b != '.')) {
        return FALSE;
      }
    }
    url->s[SCHEME_PART] = start;
    url->len[SCHEME_PART] = len;
    url->scheme_type = GetSchemeType (start, len);
    *pbs = p + 1;
  }
  else {
    url->scheme_type = Scheme_empty;
  }

  return TRUE;
}

static BOOL
ParseUserinfo (BYTE **pbs, URL *url)
{
  BYTE *start = *pbs;
  BYTE *p, *q;

  url->s[USERINFO_PART] = NULL;
  url->len[USERINFO_PART] = 0;

  for (p = start; *p; p++) {
    BYTE b = *p;
    if (b == '@') {
      break;
    }
    if ((b == '/') || (b == '?') || (b == '#')) {
      return TRUE;
    }
  }
  if (!*p) {
    return TRUE;
  }
  for (q = start; q < p; q++) {
    BYTE b = *q;
    if (!wae_ispchar (b) && (b != ';')) {
      return FALSE;
    }
  }
  url->s[USERINFO_PART] = start;
  url->len[USERINFO_PART] = (UINT16)(p - start);

  *pbs = p + 1;
  return TRUE;
}

static BOOL
ParseHost (BYTE **pbs, URL *url)
{
  BYTE *start = *pbs;
  BYTE *p;
  UINT16 len;

  url->s[HOST_PART] = NULL;
  url->len[HOST_PART] = 0;

  for (p = start; *p; p++) {
    BYTE b = *p;
    if ((b == ':') || (b == '/') || (b == '?') || (b == '#')) {
      break;
    }
  }  
  len = (UINT16)(p - start);
  if (len == 0) {
    return TRUE;
  }
  if (!ValidateHost (start, len)) {
    return FALSE;
  }
  url->s[HOST_PART] = start;
  url->len[HOST_PART] = len;

  *pbs = p;
  return TRUE;
}

static BOOL
ParsePort (BYTE **pbs, URL *url)
{
  BYTE *start = *pbs;
  BYTE *p;

  url->s[PORT_PART] = NULL;
  url->len[PORT_PART] = 0;

  if (*start != ':') {
    return TRUE;
  }
  start++;
  for (p = start; *p; p++) {
    BYTE b = *p;
    if ((b == '/') || (b == '?') || (b == '#')) {
      break;
    }
    if (!wae_isdigit (b)) {
      return FALSE;
    }
  }
  url->s[PORT_PART] = start;
  url->len[PORT_PART] = (UINT16)(p - start);

  *pbs = p;
  return TRUE;
}

static BOOL
ParseHostPort (BYTE **pbs, URL *url)
{
  BYTE *p = *pbs;

  if (!ParseHost (&p, url))
    return FALSE;
  if (url->len[HOST_PART] > 0) {
    if (!ParsePort (&p, url))
      return FALSE;
  }

  *pbs = p;
  return TRUE;
}

static BOOL
ParseAuthority (BYTE **pbs, URL *url)
{
  BYTE *start = *pbs;
  BYTE *p = start;

  if (!ParseUserinfo (&p, url) ||
      !ParseHostPort (&p, url))
    return FALSE;

  url->s[AUTHORITY_PART] = start;
  url->len[AUTHORITY_PART] = (UINT16)(p - start);
  
  *pbs = p;
  return TRUE;
}

static BOOL
ParseQuery (BYTE **pbs, URL *url)
{
  BYTE *start = *pbs;
  BYTE *p;

  url->s[QUERY_PART] = NULL;
  url->len[QUERY_PART] = 0;

  if (*start != '?') {
    return TRUE;
  }
  start++;
  for (p = start; *p; p++) {
    BYTE b = *p;
    if (b == '#') {
      break;
    }
    if (!wae_isuric (b)) {
      if ((b == '%') && HexToByte (p + 1, &b)) {
        p += 2;
      }
      else
        return FALSE;
    }
  }
  url->s[QUERY_PART] = start;
  url->len[QUERY_PART] = (UINT16)(p - start);

  *pbs = p;
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -