📄 cookies.c
字号:
/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>. We don't want to call network functions like inet_addr() because all we need is a check, preferrably one that is small, fast, and well-defined. */static boolnumeric_address_p (const char *addr){ const char *p = addr; REQUIRE_DIGITS (p); /* A */ REQUIRE_DOT (p); /* . */ REQUIRE_DIGITS (p); /* B */ REQUIRE_DOT (p); /* . */ REQUIRE_DIGITS (p); /* C */ REQUIRE_DOT (p); /* . */ REQUIRE_DIGITS (p); /* D */ if (*p != '\0') return false; return true;}/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST. Originally I tried to make the check compliant with rfc2109, but the sites deviated too often, so I had to fall back to "tail matching", as defined by the original Netscape's cookie spec. */static boolcheck_domain_match (const char *cookie_domain, const char *host){ DEBUGP (("cdm: 1")); /* Numeric address requires exact match. It also requires HOST to be an IP address. */ if (numeric_address_p (cookie_domain)) return 0 == strcmp (cookie_domain, host); DEBUGP ((" 2")); /* For the sake of efficiency, check for exact match first. */ if (0 == strcasecmp (cookie_domain, host)) return true; DEBUGP ((" 3")); /* HOST must match the tail of cookie_domain. */ if (!match_tail (host, cookie_domain, true)) return false; /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must make sure that somebody is not trying to set the cookie for a subdomain shared by many entities. For example, "company.co.uk" must not be allowed to set a cookie for ".co.uk". On the other hand, "sso.redhat.de" should be able to set a cookie for ".redhat.de". The only marginally sane way to handle this I can think of is to reject on the basis of the length of the second-level domain name (but when the top-level domain is unknown), with the assumption that those of three or less characters could be reserved. For example: .co.org -> works because the TLD is known .co.uk -> doesn't work because "co" is only two chars long .com.au -> doesn't work because "com" is only 3 chars long .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh) .cnn.de -> doesn't work for the same reason (ugh!!) .abcd.de -> works because "abcd" is 4 chars long .img.cnn.de -> works because it's not trying to set the 2nd level domain .cnn.co.uk -> works for the same reason That should prevent misuse, while allowing reasonable usage. If someone knows of a better way to handle this, please let me know. */ { const char *p = cookie_domain; int dccount = 1; /* number of domain components */ int ldcl = 0; /* last domain component length */ int nldcl = 0; /* next to last domain component length */ int out; if (*p == '.') /* Ignore leading period in this calculation. */ ++p; DEBUGP ((" 4")); for (out = 0; !out; p++) switch (*p) { case '\0': out = 1; break; case '.': if (ldcl == 0) /* Empty domain component found -- the domain is invalid. */ return false; if (*(p + 1) == '\0') { /* Tolerate trailing '.' by not treating the domain as one ending with an empty domain component. */ out = 1; break; } nldcl = ldcl; ldcl = 0; ++dccount; break; default: ++ldcl; } DEBUGP ((" 5")); if (dccount < 2) return false; DEBUGP ((" 6")); if (dccount == 2) { int i; int known_toplevel = false; static const char *known_toplevel_domains[] = { ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int" }; for (i = 0; i < countof (known_toplevel_domains); i++) if (match_tail (cookie_domain, known_toplevel_domains[i], true)) { known_toplevel = true; break; } if (!known_toplevel && nldcl <= 3) return false; } } DEBUGP ((" 7")); /* Don't allow the host "foobar.com" to set a cookie for domain "bar.com". */ if (*cookie_domain != '.') { int dlen = strlen (cookie_domain); int hlen = strlen (host); /* cookie host: hostname.foobar.com */ /* desired domain: bar.com */ /* '.' must be here in host-> ^ */ if (hlen > dlen && host[hlen - dlen - 1] != '.') return false; } DEBUGP ((" 8")); return true;}static int path_matches (const char *, const char *);/* Check whether PATH begins with COOKIE_PATH. */static boolcheck_path_match (const char *cookie_path, const char *path){ return path_matches (path, cookie_path) != 0;}/* Prepend '/' to string S. S is copied to fresh stack-allocated space and its value is modified to point to the new location. */#define PREPEND_SLASH(s) do { \ char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \ *PS_newstr = '/'; \ strcpy (PS_newstr + 1, s); \ s = PS_newstr; \} while (0)/* Process the HTTP `Set-Cookie' header. This results in storing the cookie or discarding a matching one, or ignoring it completely, all depending on the contents. */voidcookie_handle_set_cookie (struct cookie_jar *jar, const char *host, int port, const char *path, const char *set_cookie){ struct cookie *cookie; cookies_now = time (NULL); /* Wget's paths don't begin with '/' (blame rfc1808), but cookie usage assumes /-prefixed paths. Until the rest of Wget is fixed, simply prepend slash to PATH. */ PREPEND_SLASH (path); cookie = parse_set_cookie (set_cookie, false); if (!cookie) goto out; /* Sanitize parts of cookie. */ if (!cookie->domain) { copy_domain: /* If the domain was not provided, we use the one we're talking to, and set exact match. */ cookie->domain = xstrdup (host); cookie->domain_exact = 1; /* Set the port, but only if it's non-default. */ if (port != 80 && port != 443) cookie->port = port; } else { if (!check_domain_match (cookie->domain, host)) { logprintf (LOG_NOTQUIET, _("Cookie coming from %s attempted to set domain to %s\n"), escnonprint (host), escnonprint (cookie->domain)); xfree (cookie->domain); goto copy_domain; } } if (!cookie->path) { /* The cookie doesn't set path: set it to the URL path, sans the file part ("/dir/file" truncated to "/dir/"). */ char *trailing_slash = strrchr (path, '/'); if (trailing_slash) cookie->path = strdupdelim (path, trailing_slash + 1); else /* no slash in the string -- can this even happen? */ cookie->path = xstrdup (path); } else { /* The cookie sets its own path; verify that it is legal. */ if (!check_path_match (cookie->path, path)) { DEBUGP (("Attempt to fake the path: %s, %s\n", cookie->path, path)); goto out; } } /* Now store the cookie, or discard an existing cookie, if discarding was requested. */ if (cookie->discard_requested) { discard_matching_cookie (jar, cookie); goto out; } store_cookie (jar, cookie); return; out: if (cookie) delete_cookie (cookie);}/* Support for sending out cookies in HTTP requests, based on previously stored cookies. Entry point is `build_cookies_request'. */ /* Return a count of how many times CHR occurs in STRING. */static intcount_char (const char *string, char chr){ const char *p; int count = 0; for (p = string; *p; p++) if (*p == chr) ++count; return count;}/* Find the cookie chains whose domains match HOST and store them to DEST. A cookie chain is the head of a list of cookies that belong to a host/domain. Given HOST "img.search.xemacs.org", this function will return the chains for "img.search.xemacs.org", "search.xemacs.org", and "xemacs.org" -- those of them that exist (if any), that is. DEST should be large enough to accept (in the worst case) as many elements as there are domain components of HOST. */static intfind_chains_of_host (struct cookie_jar *jar, const char *host, struct cookie *dest[]){ int dest_count = 0; int passes, passcnt; /* Bail out quickly if there are no cookies in the jar. */ if (!hash_table_count (jar->chains)) return 0; if (numeric_address_p (host)) /* If host is an IP address, only check for the exact match. */ passes = 1; else /* Otherwise, check all the subdomains except the top-level (last) one. As a domain with N components has N-1 dots, the number of passes equals the number of dots. */ passes = count_char (host, '.'); passcnt = 0; /* Find chains that match HOST, starting with exact match and progressing to less specific domains. For instance, given HOST fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then srk.fer.hr's, then fer.hr's. */ while (1) { struct cookie *chain = hash_table_get (jar->chains, host); if (chain) dest[dest_count++] = chain; if (++passcnt >= passes) break; host = strchr (host, '.') + 1; } return dest_count;}/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero otherwise. */static intpath_matches (const char *full_path, const char *prefix){ int len = strlen (prefix); if (0 != strncmp (full_path, prefix, len)) /* FULL_PATH doesn't begin with PREFIX. */ return 0; /* Length of PREFIX determines the quality of the match. */ return len + 1;}/* Return true iff COOKIE matches the provided parameters of the URL being downloaded: HOST, PORT, PATH, and SECFLAG. If PATH_GOODNESS is non-NULL, store the "path goodness" value there. That value is a measure of how closely COOKIE matches PATH, used for ordering cookies. */static boolcookie_matches_url (const struct cookie *cookie, const char *host, int port, const char *path, bool secflag, int *path_goodness){ int pg; if (cookie_expired_p (cookie)) /* Ignore stale cookies. Don't bother unchaining the cookie at this point -- Wget is a relatively short-lived application, and stale cookies will not be saved by `save_cookies'. On the other hand, this function should be as efficient as possible. */ return false; if (cookie->secure && !secflag) /* Don't transmit secure cookies over insecure connections. */ return false; if (cookie->port != PORT_ANY && cookie->port != port) return false; /* If exact domain match is required, verify that cookie's domain is equal to HOST. If not, assume success on the grounds of the cookie's chain having been found by find_chains_of_host. */ if (cookie->domain_exact && 0 != strcasecmp (host, cookie->domain)) return false; pg = path_matches (path, cookie->path); if (pg == 0) return false; if (path_goodness) /* If the caller requested path_goodness, we return it. This is an optimization, so that the caller doesn't need to call path_matches() again. */ *path_goodness = pg; return true;}/* A structure that points to a cookie, along with the additional information about the cookie's "goodness". This allows us to sort the cookies when returning them to the server, as required by the spec. */struct weighed_cookie { struct cookie *cookie; int domain_goodness; int path_goodness;};/* Comparator used for uniquifying the list. */static intequality_comparator (const void *p1, const void *p2){ struct weighed_cookie *wc1 = (struct weighed_cookie *)p1; struct weighed_cookie *wc2 = (struct weighed_cookie *)p2; int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr); int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value); /* We only really care whether both name and value are equal. We return them in this order only for consistency... */ return namecmp ? namecmp : valuecmp;}/* Eliminate duplicate cookies. "Duplicate cookies" are any two cookies with the same attr name and value. Whenever a duplicate pair is found, one of the cookies is removed. */static inteliminate_dups (struct weighed_cookie *outgoing, int count){ struct weighed_cookie *h; /* hare */ struct weighed_cookie *t; /* tortoise */ struct weighed_cookie *end = outgoing + count; /* We deploy a simple uniquify algorithm: first sort the array according to our sort criteria, then copy it to itself, comparing each cookie to its neighbor and ignoring the duplicates. */ qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); /* "Hare" runs through all the entries in the array, followed by "tortoise". If a duplicate is found, the hare skips it. Non-duplicate entries are copied to the tortoise ptr. */ for (h = t = outgoing; h < end; h++) { if (h != end - 1) { struct cookie *c0 = h[0].cookie; struct cookie *c1 = h[1].cookie; if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value)) continue; /* ignore the duplicate */ } /* If the hare has advanced past the tortoise (because of previous dups), make sure the values get copied. Otherwise, no copying is necessary. */ if (h != t) *t++ = *h; else t++; } return t - outgoing;}/* Comparator used for sorting by quality. */static intgoodness_comparator (const void *p1, const void *p2){ struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -