⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cookies.c

📁 Wget很好的处理了http和ftp的下载,很值得学习的经典代码
💻 C
📖 第 1 页 / 共 3 页
字号:
/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.   We don't want to call network functions like inet_addr() because   all we need is a check, preferrably one that is small, fast, and   well-defined.  */static boolnumeric_address_p (const char *addr){  const char *p = addr;  REQUIRE_DIGITS (p);           /* A */  REQUIRE_DOT (p);              /* . */  REQUIRE_DIGITS (p);           /* B */  REQUIRE_DOT (p);              /* . */  REQUIRE_DIGITS (p);           /* C */  REQUIRE_DOT (p);              /* . */  REQUIRE_DIGITS (p);           /* D */  if (*p != '\0')    return false;  return true;}/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.   Originally I tried to make the check compliant with rfc2109, but   the sites deviated too often, so I had to fall back to "tail   matching", as defined by the original Netscape's cookie spec.  */static boolcheck_domain_match (const char *cookie_domain, const char *host){  DEBUGP (("cdm: 1"));  /* Numeric address requires exact match.  It also requires HOST to     be an IP address.  */  if (numeric_address_p (cookie_domain))    return 0 == strcmp (cookie_domain, host);  DEBUGP ((" 2"));  /* For the sake of efficiency, check for exact match first. */  if (0 == strcasecmp (cookie_domain, host))    return true;  DEBUGP ((" 3"));  /* HOST must match the tail of cookie_domain. */  if (!match_tail (host, cookie_domain, true))    return false;  /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must     make sure that somebody is not trying to set the cookie for a     subdomain shared by many entities.  For example, "company.co.uk"     must not be allowed to set a cookie for ".co.uk".  On the other     hand, "sso.redhat.de" should be able to set a cookie for     ".redhat.de".     The only marginally sane way to handle this I can think of is to     reject on the basis of the length of the second-level domain name     (but when the top-level domain is unknown), with the assumption     that those of three or less characters could be reserved.  For     example:          .co.org -> works because the TLD is known           .co.uk -> doesn't work because "co" is only two chars long          .com.au -> doesn't work because "com" is only 3 chars long          .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)          .cnn.de -> doesn't work for the same reason (ugh!!)         .abcd.de -> works because "abcd" is 4 chars long      .img.cnn.de -> works because it's not trying to set the 2nd level domain       .cnn.co.uk -> works for the same reason    That should prevent misuse, while allowing reasonable usage.  If    someone knows of a better way to handle this, please let me    know.  */  {    const char *p = cookie_domain;    int dccount = 1;            /* number of domain components */    int ldcl  = 0;              /* last domain component length */    int nldcl = 0;              /* next to last domain component length */    int out;    if (*p == '.')      /* Ignore leading period in this calculation. */      ++p;    DEBUGP ((" 4"));    for (out = 0; !out; p++)      switch (*p)        {        case '\0':          out = 1;          break;        case '.':          if (ldcl == 0)            /* Empty domain component found -- the domain is invalid. */            return false;          if (*(p + 1) == '\0')            {              /* Tolerate trailing '.' by not treating the domain as                 one ending with an empty domain component.  */              out = 1;              break;            }          nldcl = ldcl;          ldcl  = 0;          ++dccount;          break;        default:          ++ldcl;        }    DEBUGP ((" 5"));    if (dccount < 2)      return false;    DEBUGP ((" 6"));    if (dccount == 2)      {        int i;        int known_toplevel = false;        static const char *known_toplevel_domains[] = {          ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"        };        for (i = 0; i < countof (known_toplevel_domains); i++)          if (match_tail (cookie_domain, known_toplevel_domains[i], true))            {              known_toplevel = true;              break;            }        if (!known_toplevel && nldcl <= 3)          return false;      }  }  DEBUGP ((" 7"));  /* Don't allow the host "foobar.com" to set a cookie for domain     "bar.com".  */  if (*cookie_domain != '.')    {      int dlen = strlen (cookie_domain);      int hlen = strlen (host);      /* cookie host:    hostname.foobar.com */      /* desired domain:             bar.com */      /* '.' must be here in host-> ^        */      if (hlen > dlen && host[hlen - dlen - 1] != '.')        return false;    }  DEBUGP ((" 8"));  return true;}static int path_matches (const char *, const char *);/* Check whether PATH begins with COOKIE_PATH. */static boolcheck_path_match (const char *cookie_path, const char *path){  return path_matches (path, cookie_path) != 0;}/* Prepend '/' to string S.  S is copied to fresh stack-allocated   space and its value is modified to point to the new location.  */#define PREPEND_SLASH(s) do {                                   \  char *PS_newstr = (char *) alloca (1 + strlen (s) + 1);       \  *PS_newstr = '/';                                             \  strcpy (PS_newstr + 1, s);                                    \  s = PS_newstr;                                                \} while (0)/* Process the HTTP `Set-Cookie' header.  This results in storing the   cookie or discarding a matching one, or ignoring it completely, all   depending on the contents.  */voidcookie_handle_set_cookie (struct cookie_jar *jar,                          const char *host, int port,                          const char *path, const char *set_cookie){  struct cookie *cookie;  cookies_now = time (NULL);  /* Wget's paths don't begin with '/' (blame rfc1808), but cookie     usage assumes /-prefixed paths.  Until the rest of Wget is fixed,     simply prepend slash to PATH.  */  PREPEND_SLASH (path);  cookie = parse_set_cookie (set_cookie, false);  if (!cookie)    goto out;  /* Sanitize parts of cookie. */  if (!cookie->domain)    {    copy_domain:      /* If the domain was not provided, we use the one we're talking         to, and set exact match.  */      cookie->domain = xstrdup (host);      cookie->domain_exact = 1;      /* Set the port, but only if it's non-default. */      if (port != 80 && port != 443)        cookie->port = port;    }  else    {      if (!check_domain_match (cookie->domain, host))        {          logprintf (LOG_NOTQUIET,                     _("Cookie coming from %s attempted to set domain to %s\n"),                     escnonprint (host), escnonprint (cookie->domain));          xfree (cookie->domain);          goto copy_domain;        }    }  if (!cookie->path)    {      /* The cookie doesn't set path: set it to the URL path, sans the         file part ("/dir/file" truncated to "/dir/").  */      char *trailing_slash = strrchr (path, '/');      if (trailing_slash)        cookie->path = strdupdelim (path, trailing_slash + 1);      else        /* no slash in the string -- can this even happen? */        cookie->path = xstrdup (path);    }  else    {      /* The cookie sets its own path; verify that it is legal. */      if (!check_path_match (cookie->path, path))        {          DEBUGP (("Attempt to fake the path: %s, %s\n",                   cookie->path, path));          goto out;        }    }  /* Now store the cookie, or discard an existing cookie, if     discarding was requested.  */  if (cookie->discard_requested)    {      discard_matching_cookie (jar, cookie);      goto out;    }  store_cookie (jar, cookie);  return; out:  if (cookie)    delete_cookie (cookie);}/* Support for sending out cookies in HTTP requests, based on   previously stored cookies.  Entry point is   `build_cookies_request'.  */   /* Return a count of how many times CHR occurs in STRING. */static intcount_char (const char *string, char chr){  const char *p;  int count = 0;  for (p = string; *p; p++)    if (*p == chr)      ++count;  return count;}/* Find the cookie chains whose domains match HOST and store them to   DEST.   A cookie chain is the head of a list of cookies that belong to a   host/domain.  Given HOST "img.search.xemacs.org", this function   will return the chains for "img.search.xemacs.org",   "search.xemacs.org", and "xemacs.org" -- those of them that exist   (if any), that is.   DEST should be large enough to accept (in the worst case) as many   elements as there are domain components of HOST.  */static intfind_chains_of_host (struct cookie_jar *jar, const char *host,                     struct cookie *dest[]){  int dest_count = 0;  int passes, passcnt;  /* Bail out quickly if there are no cookies in the jar.  */  if (!hash_table_count (jar->chains))    return 0;  if (numeric_address_p (host))    /* If host is an IP address, only check for the exact match. */    passes = 1;  else    /* Otherwise, check all the subdomains except the top-level (last)       one.  As a domain with N components has N-1 dots, the number of       passes equals the number of dots.  */    passes = count_char (host, '.');  passcnt = 0;  /* Find chains that match HOST, starting with exact match and     progressing to less specific domains.  For instance, given HOST     fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then     srk.fer.hr's, then fer.hr's.  */  while (1)    {      struct cookie *chain = hash_table_get (jar->chains, host);      if (chain)        dest[dest_count++] = chain;      if (++passcnt >= passes)        break;      host = strchr (host, '.') + 1;    }  return dest_count;}/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero   otherwise.  */static intpath_matches (const char *full_path, const char *prefix){  int len = strlen (prefix);  if (0 != strncmp (full_path, prefix, len))    /* FULL_PATH doesn't begin with PREFIX. */    return 0;  /* Length of PREFIX determines the quality of the match. */  return len + 1;}/* Return true iff COOKIE matches the provided parameters of the URL   being downloaded: HOST, PORT, PATH, and SECFLAG.   If PATH_GOODNESS is non-NULL, store the "path goodness" value   there.  That value is a measure of how closely COOKIE matches PATH,   used for ordering cookies.  */static boolcookie_matches_url (const struct cookie *cookie,                    const char *host, int port, const char *path,                    bool secflag, int *path_goodness){  int pg;  if (cookie_expired_p (cookie))    /* Ignore stale cookies.  Don't bother unchaining the cookie at       this point -- Wget is a relatively short-lived application, and       stale cookies will not be saved by `save_cookies'.  On the       other hand, this function should be as efficient as       possible.  */    return false;  if (cookie->secure && !secflag)    /* Don't transmit secure cookies over insecure connections.  */    return false;  if (cookie->port != PORT_ANY && cookie->port != port)    return false;  /* If exact domain match is required, verify that cookie's domain is     equal to HOST.  If not, assume success on the grounds of the     cookie's chain having been found by find_chains_of_host.  */  if (cookie->domain_exact      && 0 != strcasecmp (host, cookie->domain))    return false;  pg = path_matches (path, cookie->path);  if (pg == 0)    return false;  if (path_goodness)    /* If the caller requested path_goodness, we return it.  This is       an optimization, so that the caller doesn't need to call       path_matches() again.  */    *path_goodness = pg;  return true;}/* A structure that points to a cookie, along with the additional   information about the cookie's "goodness".  This allows us to sort   the cookies when returning them to the server, as required by the   spec.  */struct weighed_cookie {  struct cookie *cookie;  int domain_goodness;  int path_goodness;};/* Comparator used for uniquifying the list. */static intequality_comparator (const void *p1, const void *p2){  struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;  struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;  int namecmp  = strcmp (wc1->cookie->attr, wc2->cookie->attr);  int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);  /* We only really care whether both name and value are equal.  We     return them in this order only for consistency...  */  return namecmp ? namecmp : valuecmp;}/* Eliminate duplicate cookies.  "Duplicate cookies" are any two   cookies with the same attr name and value.  Whenever a duplicate   pair is found, one of the cookies is removed.  */static inteliminate_dups (struct weighed_cookie *outgoing, int count){  struct weighed_cookie *h;     /* hare */  struct weighed_cookie *t;     /* tortoise */  struct weighed_cookie *end = outgoing + count;  /* We deploy a simple uniquify algorithm: first sort the array     according to our sort criteria, then copy it to itself, comparing     each cookie to its neighbor and ignoring the duplicates.  */  qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);  /* "Hare" runs through all the entries in the array, followed by     "tortoise".  If a duplicate is found, the hare skips it.     Non-duplicate entries are copied to the tortoise ptr.  */  for (h = t = outgoing; h < end; h++)    {      if (h != end - 1)        {          struct cookie *c0 = h[0].cookie;          struct cookie *c1 = h[1].cookie;          if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))            continue;           /* ignore the duplicate */        }      /* If the hare has advanced past the tortoise (because of         previous dups), make sure the values get copied.  Otherwise,         no copying is necessary.  */      if (h != t)        *t++ = *h;      else        t++;    }  return t - outgoing;}/* Comparator used for sorting by quality. */static intgoodness_comparator (const void *p1, const void *p2){  struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -