📄 url.c

📁 是一个手机功能的模拟程序
💻 C
📖 第 1 页 / 共 5 页
字号:

/*
 * Given a URL struct, construct and return a string representation
 * of the URL.
 * Returns NULL in case of error.
 * NOTE: It is the callers responsibility to deallocate the returned string.
 */
WCHAR *
URL_ToWideString (URL *url)
{
  WCHAR *ws = NULL;
  BYTE  *tmp = URL_ToByteString (url);

  if (tmp == NULL) {
    return NULL;
  }
  ws = wip_byte2wchar (tmp);
  DEALLOC (&tmp);

  return ws;
}


BYTE *
URL_ToByteString (URL *url)
{
  if (url == NULL) {
    return NULL;
  }

  return AssembleURLString (url);
}


#define IS_DOTDOTSEGMENT(x) ((x + 2 < n) && (buf[(x)] == '.') &&\
    (buf[(x + 1)] == '.') && (buf[(x + 2)] == '/'))

#define NEXTSEGMENT(x) if (buf[x] == '/') { do x++; while (buf[x] == '/');}\
  else { while ((x < n) && (buf[x] != '/')) x++; if (x < n) x++;}

#define PREVSEGMENT(x) if (x > 1) {\
  x -= 2;\
  while ((x > 0) && (buf[x] != '/')) x--;\
  x++;}\
  else {\
    x = 0;\
  }

/*
 * Given two path names, an absolute path (starting with '/') in 'base',
 * and a relative path in 'rel', combine them into a new absolute path
 * removing all unnecessary './' and '../' segments.
 * Returns a newly allocated string with the new path.
 */
static BYTE *
RemoveDots (BYTE *base, UINT16 base_len, BYTE *rel, UINT16 rel_len)
{
  UINT16 len;
  BYTE   *buf;
  INT16  i, j, k, m, n;

  /* Find the right-most '/' character in the base path. */
  for (i = base_len - 1; i >= 0; i--) {
    if (base[i] == '/') {
      break;
    }
  }
  len = i + 1;
  if ((buf = NEWARRAY (BYTE, len + rel_len + 1)) == NULL)
    return NULL;

  /* Append the relative path to the base path. */
  B_COPYSTRINGN (buf, base, len);
  B_COPYSTRINGN (buf + len, rel, rel_len);
  n = len + rel_len;
  buf[n] = '\0';

  /* Remove all occurrences of './' */
  for (i = j = 0; j < n; ) {
    if (((j == 0) || (buf[j - 1] == '/')) && (buf[j] == '.') &&
        (j < n - 1) && (buf[j + 1] == '/')) {
      j += 2;
      continue;
    }
    else if (i != j) {
      buf[i] = buf[j];
    }
    i++, j++;
  }
  n = i;

  /* If the string ends with '.', remove it. */
  if (((n == 1) || (n >= 2) && (buf[n - 2] == '/')) && (buf[n - 1] == '.')) {
    n--;
  }
  buf[n] = '\0';

  /* Remove all occurrences of '<segment>/../'. */
  i = 0;
  NEXTSEGMENT (i);
  j = i;
  NEXTSEGMENT (j);
  m = n;
  while (j < n) {
    if ((buf[i] != '/') && !IS_DOTDOTSEGMENT (i) && IS_DOTDOTSEGMENT (j)) {
      k = i;
      NEXTSEGMENT (k);
      m -= (k - i) + 3;
      PREVSEGMENT (i);
      NEXTSEGMENT (j);
    }
    else {
      k = j;
      NEXTSEGMENT (j);
      NEXTSEGMENT (i);
      /* Copy one segment from k to i */
      if (i != k) {
        B_COPYSTRINGN (&buf[i], &buf[k], j - k);
      }
    }
  }
  n = m;

  /* If the string ends with '<segment>/..', then remove that part. */
  if ((n > 3) && (buf[n - 3] == '/') && (buf[n - 2] == '.') &&
      (buf[n - 1] == '.')) {
    i = n - 2;
    PREVSEGMENT (i);
    if (!IS_DOTDOTSEGMENT (i)) {
      n = i;
    }
  }
  buf[n] = '\0';

  return buf;
}

/*
 * Given a base URL and a relative URL, resolve the relative reference
 * and store as an absolute URL in the string "*abs".
 * Returns TRUE on success, FALSE otherwise, in which case nothing
 * is stored in "abs".
 * NOTE: It is the callers responsibility to deallocate the returned string.
 */
BOOL
URL_Resolve (URL *base, URL *rel, BYTE **abs)
{
  URL new_url;

  if ((base == NULL) || (rel == NULL) || (abs == NULL))
    return FALSE;

  if (base->s[SCHEME_PART] == NULL)
    /* Not a correct absolute URL. */
    return FALSE;

  if (base->len[PATH_PART] == 0) {
    base->s[PATH_PART] = "/";
    base->len[PATH_PART] = 1;
  }

  /* If rel has a scheme, then it is an absolute reference.
   * Just copy it. */
  if (rel->s[SCHEME_PART] != NULL) {
    if ((*abs = AssembleURLString (rel)) == NULL)
      return FALSE;
  }

  /* If rel has an authority part, just use the scheme from the base part. */
  else if (rel->s[AUTHORITY_PART] != NULL) {
    new_url = *rel;
    new_url.s[SCHEME_PART] = base->s[SCHEME_PART];
    new_url.len[SCHEME_PART] = base->len[SCHEME_PART];
    new_url.scheme_type = base->scheme_type;

    *abs = AssembleURLString (&new_url);
  }
  else if ((rel->len[PATH_PART] == 0) && (rel->s[QUERY_PART] == NULL)) {
    /* 'rel' is empty or just a fragment */
    new_url = *base;
    new_url.s[FRAGMENT_PART] = rel->s[FRAGMENT_PART];
    new_url.len[FRAGMENT_PART] = rel->len[FRAGMENT_PART];

    *abs = AssembleURLString (&new_url);
  }
  else {
    /* 'rel' does not have a scheme nor an authority part,
     * but is more than just a fragment */
    BYTE *tmp;

    if (rel->len[PATH_PART] == 0)
      tmp = RemoveDots (base->s[PATH_PART], base->len[PATH_PART],
                        (BYTE *)"", 0);
    else if (*(rel->s[PATH_PART]) != '/')
      tmp = RemoveDots (base->s[PATH_PART], base->len[PATH_PART],
                        rel->s[PATH_PART], rel->len[PATH_PART]);
    else
      tmp = newstring (rel->s[PATH_PART], rel->len[PATH_PART]);

    if (tmp == NULL) /* Memory allocation failed */
      return FALSE;

    new_url = *base;
    new_url.s[PATH_PART] = tmp;
    new_url.len[PATH_PART] = B_STRINGLENGTH (tmp);
    new_url.s[QUERY_PART] = rel->s[QUERY_PART];
    new_url.len[QUERY_PART] = rel->len[QUERY_PART];
    new_url.s[FRAGMENT_PART] = rel->s[FRAGMENT_PART];
    new_url.len[FRAGMENT_PART] = rel->len[FRAGMENT_PART];

    *abs = AssembleURLString (&new_url);
    DEALLOC (&tmp);
  }

  return TRUE;
}

BOOL
w_Resolve (const WCHAR *base, const WCHAR *rel, WCHAR **abs)
{
  BYTE *b_base, *b_rel, *b_abs;
  BOOL err = FALSE;

  if ((base == NULL) || (rel == NULL) || (abs == NULL))
    return FALSE;

  if (((b_base = wip_wchar2byte (base, &err)) == NULL) || err) {
    return FALSE;
  }
  if (((b_rel = wip_wchar2byte (rel, &err)) == NULL) || err) {
    DEALLOC (&b_base);
    return FALSE;
  }
  if (!b_Resolve (b_base, b_rel, &b_abs)) {
    DEALLOC (&b_base);
    DEALLOC (&b_rel);
    return FALSE;
  }
  if ((*abs = wip_byte2wchar (b_abs)) == NULL) {
    DEALLOC (&b_base);
    DEALLOC (&b_rel);
    DEALLOC (&b_abs);
    return FALSE;
  } 

  DEALLOC (&b_base);
  DEALLOC (&b_rel);
  DEALLOC (&b_abs);

  return TRUE;
}

BOOL
b_Resolve (const BYTE *base, const BYTE *rel, BYTE **abs)
{
  URL base_url, rel_url;
  BOOL ok = TRUE;

  if ((base == NULL) || (rel == NULL) || (abs == NULL))
    return FALSE;

  URL_Clear (&rel_url);

  if (!URL_FromByteString (base, &base_url) ||
      !URL_FromByteString (rel, &rel_url) ||
      !URL_Resolve (&base_url, &rel_url, abs)) {
    *abs = NULL;
    ok = FALSE;
  }

  return ok;
}


/*
 * Return TRUE if the two URLs are equal, FALSE otherwise.
 * "whichComponents" is a bitmap indicating which parts of the URLs
 * should be included in the comparison.
 * Returns FALSE in case of error.
 */
BOOL
URL_Equal (URL *url1, URL *url2, BYTE whichComponents)
{
  BYTE   *p1, *p2, *path1, *path2;
  UINT16 p1_len, p2_len, path1_len, path2_len;
  UINT16 port1, port2;

  if (url1 == NULL)
    return (url2 == NULL);
  if (url2 == NULL)
    return FALSE;

  if (whichComponents & PORT_COMP) {
    if ((url1->s[PORT_PART] == NULL) &&
        (url1->scheme_type == Scheme_http)) {
      p1 = (BYTE *)"80";
      p1_len = 2;
    }
    else if ((url1->s[PORT_PART] == NULL) &&
             (url1->scheme_type == Scheme_https)) {
      p1 = (BYTE *)"443";
      p1_len = 3;
    }
    else {
      p1 = url1->s[PORT_PART];
      p1_len = url1->len[PORT_PART];
    }
    port1 = GetNum (p1, p1_len);

    if ((url2->s[PORT_PART] == NULL) &&
        (url2->scheme_type == Scheme_http)) {
      p2 = (BYTE *)"80";
      p2_len = 2;
    }
    else if ((url2->s[PORT_PART] == NULL) &&
             (url2->scheme_type == Scheme_https)) {
      p2 = (BYTE *)"443";
      p2_len = 3;
    }
    else {
      p2 = url2->s[PORT_PART];
      p2_len = url2->len[PORT_PART];
    }
    port2 = GetNum (p2, p2_len);
  }

  if (url1->len[PATH_PART] == 0) {
    path1 = (BYTE *)"/";
    path1_len = 1;
  }
  else {
    path1 = url1->s[PATH_PART];
    path1_len = url1->len[PATH_PART];
  }

  if (url2->len[PATH_PART] == 0) {
    path2 = (BYTE *)"/";
    path2_len = 1;
  }
  else {
    path2 = url2->s[PATH_PART];
    path2_len = url2->len[PATH_PART];
  }

  return
    (!(whichComponents & SCHEME_COMP) ||
     CompareStrings (url1->s[SCHEME_PART], url1->len[SCHEME_PART],
                     url2->s[SCHEME_PART], url2->len[SCHEME_PART], TRUE)) &&
    (!(whichComponents & USERINFO_COMP) ||
     CompareStrings (url1->s[USERINFO_PART], url1->len[USERINFO_PART],
                     url2->s[USERINFO_PART], url2->len[USERINFO_PART],
                     TRUE)) &&
    (!(whichComponents & HOST_COMP) ||
     CompareStrings (url1->s[HOST_PART], url1->len[HOST_PART],
                     url2->s[HOST_PART], url2->len[HOST_PART], TRUE)) &&
    (!(whichComponents & PORT_COMP) || (port1 == port2)) &&
    (!(whichComponents & PATH_COMP) ||
     CompareStrings (path1, path1_len, path2, path2_len, FALSE)) &&
    (!(whichComponents & QUERY_COMP) ||
     CompareStrings (url1->s[QUERY_PART], url1->len[QUERY_PART],
                     url2->s[QUERY_PART], url2->len[QUERY_PART], FALSE)) &&
    (!(whichComponents & FRAG_COMP) ||
     CompareStrings (url1->s[FRAGMENT_PART], url1->len[FRAGMENT_PART],
                     url2->s[FRAGMENT_PART], url2->len[FRAGMENT_PART], FALSE));
}

BOOL
w_EqualURL (const WCHAR *url1, const WCHAR *url2, BYTE whichComponents)
{
  BYTE *b_url1, *b_url2;
  BOOL err = FALSE;
  BOOL res;

  if (url1 == NULL)
    return (url2 == NULL);
  if (url2 == NULL)
    return FALSE;

  if (((b_url1 = wip_wchar2byte (url1, &err)) == NULL) || err) {
    return FALSE;
  }
  if (((b_url2 = wip_wchar2byte (url2, &err)) == NULL) || err) {
    DEALLOC (&b_url1);
    return FALSE;
  }
  res = b_EqualURL (b_url1, b_url2, whichComponents);

  DEALLOC (&b_url1);
  DEALLOC (&b_url2);
    
  return res;
}

BOOL
b_EqualURL (const BYTE *bs1, const BYTE *bs2, BYTE whichComponents)
{
  URL url1, url2;

  if (bs1 == NULL)
    return (bs2 == NULL);
  if (bs2 == NULL)
    return FALSE;

  return URL_FromByteString (bs1, &url1) &&
    URL_FromByteString (bs2, &url2) &&
    URL_Equal (&url1, &url2, whichComponents);
}


/*
 * Return TRUE if the given string URL has a valid format, FALSE otherwise.
 */
BOOL
w_IsValid (const WCHAR* pchUrl)
{
  BOOL  bReturn = FALSE;
  BOOL  bOverflowDetected = FALSE;
  BYTE* pbStr = NULL;

  if (pchUrl != NULL) {
    pbStr = wip_wchar2byte (pchUrl, &bOverflowDetected);
    if (pbStr != NULL && !bOverflowDetected) {
      bReturn = b_IsValid (pbStr);
    }
    DEALLOC (&pbStr);
  }
  return bReturn;
}

BOOL
b_IsValid (const BYTE* pbUrl)
{
  URL url;

  return URL_Parse ((BYTE*)pbUrl, &url);
}


/*
 * Retrieval of the different parts of a URL.
 */

/*
 * Return the Scheme of the URL.
 * Returns NULL in case of error, or if the URL does not have a scheme part.
 * NOTE: it is the responsibility of the caller to deallocate the string.
 */
BYTE*
URL_GetScheme (URL *url)
{
  if (url == NULL)
    return NULL;

  return newstring (url->s[SCHEME_PART], url->len[SCHEME_PART]);
}

Scheme
URL_GetSchemeType (URL *url)
{
  if (url == NULL)
    return Scheme_empty;

  return url->scheme_type;
}

static BOOL
w_GetPart (const WCHAR* pchUrl, UINT16 whichpart, WCHAR **part)
{
  URL  url;
  BYTE *bs = NULL;
  BOOL err = FALSE;

  if ((pchUrl == NULL) || (part == NULL))
    return FALSE;
  *part = NULL;

  bs = wip_wchar2byte (pchUrl, &err);
  if ((bs == NULL) || err ) {
    return FALSE;
  }
  if (!URL_Parse (bs, &url)) {
    DEALLOC (&bs);
    return FALSE;
  }

  if (url.s[whichpart] != NULL) {
    UINT16 len = url.len[whichpart];
    WCHAR *buf = NEWARRAY (WCHAR, len + 1);
    WCHAR *p = buf;
    BYTE  *q = url.s[whichpart];
    INT16 i;

    if (buf == NULL) {
      DEALLOC (&bs);
      return FALSE;
    }
    for (i = 0; i < len; i++) {
      p[i] = (WCHAR)q[i];
    }
    p[len] = (WCHAR)0;
    *part = buf;
  }

  DEALLOC (&bs);
  return TRUE;
}

static BOOL
b_GetPart (const BYTE* pbUrl, UINT16 whichpart, BYTE **part)
{
  URL url;

  if ((pbUrl == NULL) || (part == NULL))
    return FALSE;

  if (!URL_Parse ((BYTE*)pbUrl, &url)) {
    return FALSE;
  }
  *part = NULL;
  if ((url.s[whichpart] != NULL) &&
      ((*part = newstring (url.s[whichpart], url.len[whichpart])) == NULL)) {
    return FALSE;
  }

  return TRUE;
}

/*
 * Extract the scheme of a URL.
 * Returns FALSE in case of error, including that the URL is not valid.
 * Sets the out-parameter to NULL if the URL does not have a scheme component.
 * NOTE: it is the responsibility of the caller to deallocate
 * the returned string (applies to w_GetScheme and b_GetScheme).
 */
BOOL
w_GetScheme (const WCHAR* pchUrl, WCHAR **scheme)
{
  return w_GetPart (pchUrl, SCHEME_PART, scheme);
}

BOOL
w_GetSchemeType (const WCHAR* pchUrl, Scheme *scheme)
{
  URL  url;
  BYTE *bs = NULL;
  BOOL err = FALSE;
  BOOL ok = TRUE;

  if ((pchUrl == NULL) || (scheme == NULL))
    return FALSE;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -