📄 cookies.c
字号:
{ c = *++p; state = S_VALUE_PRE; } else if (ISSPACE (c)) /* Ignore space and keep the state. */ c = *++p; else state = S_ERROR; break; case S_VALUE_PRE: if (!c || c == ';') { value_b = value_e = p; if (c == ';') c = *++p; state = S_ATTR_ACTION; } else if (c == '"') { c = *++p; value_b = p; state = S_QUOTED_VALUE; } else if (ISSPACE (c)) c = *++p; else { value_b = p; value_e = NULL; state = S_VALUE; } break; case S_VALUE: if (!c || c == ';' || ISSPACE (c)) { value_e = p; state = S_VALUE_TRAILSPACE; } else { value_e = NULL; /* no trailing space */ c = *++p; } break; case S_QUOTED_VALUE: if (c == '"') { value_e = p; c = *++p; state = S_VALUE_TRAILSPACE; } else if (!c) state = S_ERROR; else c = *++p; break; case S_VALUE_TRAILSPACE: if (c == ';') { c = *++p; state = S_ATTR_ACTION; } else if (!c) state = S_ATTR_ACTION; else if (ISSPACE (c)) c = *++p; else state = S_VALUE; break; case S_ATTR_ACTION: { int legal = callback (cookie, name_b, name_e, value_b, value_e); if (!legal) { if (!silent) { char *name; BOUNDED_TO_ALLOCA (name_b, name_e, name); logprintf (LOG_NOTQUIET, _("Error in Set-Cookie, field `%s'"), name); } state = S_ERROR; break; } state = S_START; } break; case S_DONE: case S_ERROR: /* handled by loop condition */ break; } } if (state == S_DONE) return cookie; delete_cookie (cookie); if (state != S_ERROR) abort (); if (!silent) logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie: %s at position %d.\n"), sc, p - sc); return NULL;}/* Sanity checks. These are important, otherwise it is possible for mailcious attackers to destroy important cookie information and/or violate your privacy. */#define REQUIRE_DIGITS(p) do { \ if (!ISDIGIT (*p)) \ return 0; \ for (++p; ISDIGIT (*p); p++) \ ; \} while (0)#define REQUIRE_DOT(p) do { \ if (*p++ != '.') \ return 0; \} while (0)/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>. We don't want to call network functions like inet_addr() because all we need is a check, preferrably one that is small, fast, and well-defined. */static intnumeric_address_p (const char *addr){ const char *p = addr; REQUIRE_DIGITS (p); /* A */ REQUIRE_DOT (p); /* . */ REQUIRE_DIGITS (p); /* B */ REQUIRE_DOT (p); /* . */ REQUIRE_DIGITS (p); /* C */ REQUIRE_DOT (p); /* . */ REQUIRE_DIGITS (p); /* D */ if (*p != '\0') return 0; return 1;}/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST. Originally I tried to make the check compliant with rfc2109, but the sites deviated too often, so I had to fall back to "tail matching", as defined by the original Netscape's cookie spec. */static intcheck_domain_match (const char *cookie_domain, const char *host){ DEBUGP (("cdm: 1")); /* Numeric address requires exact match. It also requires HOST to be an IP address. */ if (numeric_address_p (cookie_domain)) return 0 == strcmp (cookie_domain, host); DEBUGP ((" 2")); /* For the sake of efficiency, check for exact match first. */ if (0 == strcasecmp (cookie_domain, host)) return 1; DEBUGP ((" 3")); /* HOST must match the tail of cookie_domain. */ if (!match_tail (host, cookie_domain, 1)) return 0; /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must make sure that somebody is not trying to set the cookie for a subdomain shared by many entities. For example, "company.co.uk" must not be allowed to set a cookie for ".co.uk". On the other hand, "sso.redhat.de" should be able to set a cookie for ".redhat.de". The only marginally sane way to handle this I can think of is to reject on the basis of the length of the second-level domain name (but when the top-level domain is unknown), with the assumption that those of three or less characters could be reserved. For example: .co.org -> works because the TLD is known .co.uk -> doesn't work because "co" is only two chars long .com.au -> doesn't work because "com" is only 3 chars long .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh) .cnn.de -> doesn't work for the same reason (ugh!!) .abcd.de -> works because "abcd" is 4 chars long .img.cnn.de -> works because it's not trying to set the 2nd level domain .cnn.co.uk -> works for the same reason That should prevent misuse, while allowing reasonable usage. If someone knows of a better way to handle this, please let me know. */ { const char *p = cookie_domain; int dccount = 1; /* number of domain components */ int ldcl = 0; /* last domain component length */ int nldcl = 0; /* next to last domain component length */ int out; if (*p == '.') /* Ignore leading period in this calculation. */ ++p; DEBUGP ((" 4")); for (out = 0; !out; p++) switch (*p) { case '\0': out = 1; break; case '.': if (ldcl == 0) /* Empty domain component found -- the domain is invalid. */ return 0; if (*(p + 1) == '\0') { /* Tolerate trailing '.' by not treating the domain as one ending with an empty domain component. */ out = 1; break; } nldcl = ldcl; ldcl = 0; ++dccount; break; default: ++ldcl; } DEBUGP ((" 5")); if (dccount < 2) return 0; DEBUGP ((" 6")); if (dccount == 2) { int i; int known_toplevel = 0; static char *known_toplevel_domains[] = { ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int" }; for (i = 0; i < countof (known_toplevel_domains); i++) if (match_tail (cookie_domain, known_toplevel_domains[i], 1)) { known_toplevel = 1; break; } if (!known_toplevel && nldcl <= 3) return 0; } } DEBUGP ((" 7")); /* Don't allow the host "foobar.com" to set a cookie for domain "bar.com". */ if (*cookie_domain != '.') { int dlen = strlen (cookie_domain); int hlen = strlen (host); /* cookie host: hostname.foobar.com */ /* desired domain: bar.com */ /* '.' must be here in host-> ^ */ if (hlen > dlen && host[hlen - dlen - 1] != '.') return 0; } DEBUGP ((" 8")); return 1;}static int path_matches PARAMS ((const char *, const char *));/* Check whether PATH begins with COOKIE_PATH. */static intcheck_path_match (const char *cookie_path, const char *path){ return path_matches (path, cookie_path);}/* Process the HTTP `Set-Cookie' header. This results in storing the cookie or discarding a matching one, or ignoring it completely, all depending on the contents. */voidcookie_jar_process_set_cookie (struct cookie_jar *jar, const char *host, int port, const char *path, const char *set_cookie){ struct cookie *cookie; cookies_now = time (NULL); cookie = parse_set_cookies (set_cookie, update_cookie_field, 0); if (!cookie) goto out; /* Sanitize parts of cookie. */ if (!cookie->domain) { copy_domain: cookie->domain = xstrdup (host); cookie->port = port; } else { if (!check_domain_match (cookie->domain, host)) { logprintf (LOG_NOTQUIET, "Cookie coming from %s attempted to set domain to %s\n", host, cookie->domain); xfree (cookie->domain); goto copy_domain; } } if (!cookie->path) cookie->path = xstrdup (path); else { if (!check_path_match (cookie->path, path)) { DEBUGP (("Attempt to fake the path: %s, %s\n", cookie->path, path)); goto out; } } if (cookie->discard_requested) { discard_matching_cookie (jar, cookie); goto out; } store_cookie (jar, cookie); return; out: if (cookie) delete_cookie (cookie);}/* Support for sending out cookies in HTTP requests, based on previously stored cookies. Entry point is `build_cookies_request'. *//* Find the cookie chains whose domains match HOST and store them to DEST. A cookie chain is the head of a list of cookies that belong to a host/domain. Given HOST "img.search.xemacs.org", this function will return the chains for "img.search.xemacs.org", "search.xemacs.org", and "xemacs.org" -- those of them that exist (if any), that is. DEST should be large enough to accept (in the worst case) as many elements as there are domain components of HOST. */static intfind_chains_of_host (struct cookie_jar *jar, const char *host, struct cookie *dest[]){ int dest_count = 0; int passes, passcnt; /* Bail out quickly if there are no cookies in the jar. */ if (!hash_table_count (jar->chains)) return 0; if (numeric_address_p (host)) /* If host is an IP address, only check for the exact match. */ passes = 1; else /* Otherwise, check all the subdomains except the top-level (last) one. As a domain with N components has N-1 dots, the number of passes equals the number of dots. */ passes = count_char (host, '.'); passcnt = 0; /* Find chains that match HOST, starting with exact match and progressing to less specific domains. For instance, given HOST fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then srk.fer.hr's, then fer.hr's. */ while (1) { struct cookie *chain = hash_table_get (jar->chains, host); if (chain) dest[dest_count++] = chain; if (++passcnt >= passes) break; host = strchr (host, '.') + 1; } return dest_count;}/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero otherwise. */static intpath_matches (const char *full_path, const char *prefix){ int len; if (*prefix != '/') /* Wget's HTTP paths do not begin with '/' (the URL code treats it as a mere separator, inspired by rfc1808), but the '/' is assumed when matching against the cookie stuff. */ return 0; ++prefix; len = strlen (prefix); if (0 != strncmp (full_path, prefix, len)) /* FULL_PATH doesn't begin with PREFIX. */ return 0; /* Length of PREFIX determines the quality of the match. */ return len + 1;}/* Return non-zero iff COOKIE matches the provided parameters of the URL being downloaded: HOST, PORT, PATH, and SECFLAG. If PATH_GOODNESS is non-NULL, store the "path goodness" value there. That value is a measure of how closely COOKIE matches PATH, used for ordering cookies. */static intcookie_matches_url (const struct cookie *cookie, const char *host, int port, const char *path, int secflag, int *path_goodness){ int pg; if (COOKIE_EXPIRED_P (cookie)) /* Ignore stale cookies. Don't bother unchaining the cookie at this point -- Wget is a relatively short-lived application, and stale cookies will not be saved by `save_cookies'. On the other hand, this function should be as efficient as possible. */ return 0; if (cookie->secure && !secflag) /* Don't transmit secure cookies over insecure connections. */ return 0; if (cookie->port != PORT_ANY && cookie->port != port) return 0; /* If exact domain match is required, verify that cookie's domain is equal to HOST. If not, assume success on the grounds of the cookie's chain having been found by find_chains_of_host. */ if (cookie->domain_exact && 0 != strcasecmp (host, cookie->domain)) return 0; pg = path_matches (path, cookie->path); if (!pg) return 0; if (path_goodness) /* If the caller requested path_goodness, we return it. This is an optimization, so that the caller doesn't need to call path_matches() again. */ *path_goodness = pg; return 1;}/* A structure that points to a cookie, along with the additional information about the cookie's "goodness". This allows us to sort the cookies when returning them to the server, as required by the spec. */struct weighed_cookie { struct cookie *cookie; int domain_goodness; int path_goodness;};/* Comparator used for uniquifying the list. */static intequality_comparator (const void *p1, const void *p2){ struct weighed_cookie *wc1 = (struct weighed_cookie *)p1; struct weighed_cookie *wc2 = (struct weighed_cookie *)p2; int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr); int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value); /* We only really care whether both name and value are equal. We return them in this order only for consistency... */ return namecmp ? namecmp : valuecmp;}/* Eliminate duplicate cookies. "Duplicate cookies" are any two cookies with the same attr name and value. Whenever a duplicate pair is found, one of the cookies is removed. */static inteliminate_dups (struct weighed_cookie *outgoing, int count){ struct weighed_cookie *h; /* hare */ struct weighed_cookie *t; /* tortoise */ struct weighed_cookie *end = outgoing + count; /* We deploy a simple uniquify algorithm: first sort the array according to our sort criteria, then copy it to itself, comparing each cookie to its neighbor and ignoring the duplicates. */ qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); /* "Hare" runs through all the entries in the array, followed by
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -