⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cookies.c

📁 wget (command line browser) source code
💻 C
📖 第 1 页 / 共 3 页
字号:
	    {	      c = *++p;	      state = S_VALUE_PRE;	    }	  else if (ISSPACE (c))	    /* Ignore space and keep the state. */	    c = *++p;	  else	    state = S_ERROR;	  break;	case S_VALUE_PRE:	  if (!c || c == ';')	    {	      value_b = value_e = p;	      if (c == ';')		c = *++p;	      state = S_ATTR_ACTION;	    }	  else if (c == '"')	    {	      c = *++p;	      value_b = p;	      state = S_QUOTED_VALUE;	    }	  else if (ISSPACE (c))	    c = *++p;	  else	    {	      value_b = p;	      value_e = NULL;	      state = S_VALUE;	    }	  break;	case S_VALUE:	  if (!c || c == ';' || ISSPACE (c))	    {	      value_e = p;	      state = S_VALUE_TRAILSPACE;	    }	  else	    {	      value_e = NULL;	/* no trailing space */	      c = *++p;	    }	  break;	case S_QUOTED_VALUE:	  if (c == '"')	    {	      value_e = p;	      c = *++p;	      state = S_VALUE_TRAILSPACE;	    }	  else if (!c)	    state = S_ERROR;	  else	    c = *++p;	  break;	case S_VALUE_TRAILSPACE:	  if (c == ';')	    {	      c = *++p;	      state = S_ATTR_ACTION;	    }	  else if (!c)	    state = S_ATTR_ACTION;	  else if (ISSPACE (c))	    c = *++p;	  else	    state = S_VALUE;	  break;	case S_ATTR_ACTION:	  {	    int legal = callback (cookie, name_b, name_e, value_b, value_e);	    if (!legal)	      {		if (!silent)		  {		    char *name;		    BOUNDED_TO_ALLOCA (name_b, name_e, name);		    logprintf (LOG_NOTQUIET,			       _("Error in Set-Cookie, field `%s'"), name);		  }		state = S_ERROR;		break;	      }	    state = S_START;	  }	  break;	case S_DONE:	case S_ERROR:	  /* handled by loop condition */	  break;	}    }  if (state == S_DONE)    return cookie;  delete_cookie (cookie);  if (state != S_ERROR)    abort ();  if (!silent)    logprintf (LOG_NOTQUIET,	       _("Syntax error in Set-Cookie: %s at position %d.\n"),	       sc, p - sc);  return NULL;}/* Sanity checks.  These are important, otherwise it is possible for   mailcious attackers to destroy important cookie information and/or   violate your privacy.  */#define REQUIRE_DIGITS(p) do {			\  if (!ISDIGIT (*p))				\    return 0;					\  for (++p; ISDIGIT (*p); p++)			\    ;						\} while (0)#define REQUIRE_DOT(p) do {			\  if (*p++ != '.')				\    return 0;					\} while (0)/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.  We don't want to call network functions like inet_addr() because all  we need is a check, preferrably one that is small, fast, and  well-defined.  */static intnumeric_address_p (const char *addr){  const char *p = addr;  REQUIRE_DIGITS (p);		/* A */  REQUIRE_DOT (p);		/* . */  REQUIRE_DIGITS (p);		/* B */  REQUIRE_DOT (p);		/* . */  REQUIRE_DIGITS (p);		/* C */  REQUIRE_DOT (p);		/* . */  REQUIRE_DIGITS (p);		/* D */  if (*p != '\0')    return 0;  return 1;}/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.   Originally I tried to make the check compliant with rfc2109, but   the sites deviated too often, so I had to fall back to "tail   matching", as defined by the original Netscape's cookie spec.  */static intcheck_domain_match (const char *cookie_domain, const char *host){  DEBUGP (("cdm: 1"));  /* Numeric address requires exact match.  It also requires HOST to     be an IP address.  */  if (numeric_address_p (cookie_domain))    return 0 == strcmp (cookie_domain, host);  DEBUGP ((" 2"));  /* For the sake of efficiency, check for exact match first. */  if (0 == strcasecmp (cookie_domain, host))    return 1;  DEBUGP ((" 3"));  /* HOST must match the tail of cookie_domain. */  if (!match_tail (host, cookie_domain, 1))    return 0;  /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must     make sure that somebody is not trying to set the cookie for a     subdomain shared by many entities.  For example, "company.co.uk"     must not be allowed to set a cookie for ".co.uk".  On the other     hand, "sso.redhat.de" should be able to set a cookie for     ".redhat.de".     The only marginally sane way to handle this I can think of is to     reject on the basis of the length of the second-level domain name     (but when the top-level domain is unknown), with the assumption     that those of three or less characters could be reserved.  For     example:          .co.org -> works because the TLD is known           .co.uk -> doesn't work because "co" is only two chars long          .com.au -> doesn't work because "com" is only 3 chars long          .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)          .cnn.de -> doesn't work for the same reason (ugh!!)         .abcd.de -> works because "abcd" is 4 chars long      .img.cnn.de -> works because it's not trying to set the 2nd level domain       .cnn.co.uk -> works for the same reason    That should prevent misuse, while allowing reasonable usage.  If    someone knows of a better way to handle this, please let me    know.  */  {    const char *p = cookie_domain;    int dccount = 1;		/* number of domain components */    int ldcl  = 0;		/* last domain component length */    int nldcl = 0;		/* next to last domain component length */    int out;    if (*p == '.')      /* Ignore leading period in this calculation. */      ++p;    DEBUGP ((" 4"));    for (out = 0; !out; p++)      switch (*p)	{	case '\0':	  out = 1;	  break;	case '.':	  if (ldcl == 0)	    /* Empty domain component found -- the domain is invalid. */	    return 0;	  if (*(p + 1) == '\0')	    {	      /* Tolerate trailing '.' by not treating the domain as		 one ending with an empty domain component.  */	      out = 1;	      break;	    }	  nldcl = ldcl;	  ldcl  = 0;	  ++dccount;	  break;	default:	  ++ldcl;	}    DEBUGP ((" 5"));    if (dccount < 2)      return 0;    DEBUGP ((" 6"));    if (dccount == 2)      {	int i;	int known_toplevel = 0;	static char *known_toplevel_domains[] = {	  ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"	};	for (i = 0; i < countof (known_toplevel_domains); i++)	  if (match_tail (cookie_domain, known_toplevel_domains[i], 1))	    {	      known_toplevel = 1;	      break;	    }	if (!known_toplevel && nldcl <= 3)	  return 0;      }  }  DEBUGP ((" 7"));  /* Don't allow the host "foobar.com" to set a cookie for domain     "bar.com".  */  if (*cookie_domain != '.')    {      int dlen = strlen (cookie_domain);      int hlen = strlen (host);      /* cookie host:    hostname.foobar.com */      /* desired domain:             bar.com */      /* '.' must be here in host-> ^        */      if (hlen > dlen && host[hlen - dlen - 1] != '.')	return 0;    }  DEBUGP ((" 8"));  return 1;}static int path_matches PARAMS ((const char *, const char *));/* Check whether PATH begins with COOKIE_PATH. */static intcheck_path_match (const char *cookie_path, const char *path){  return path_matches (path, cookie_path);}/* Process the HTTP `Set-Cookie' header.  This results in storing the   cookie or discarding a matching one, or ignoring it completely, all   depending on the contents.  */voidcookie_jar_process_set_cookie (struct cookie_jar *jar,			       const char *host, int port,			       const char *path, const char *set_cookie){  struct cookie *cookie;  cookies_now = time (NULL);  cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);  if (!cookie)    goto out;  /* Sanitize parts of cookie. */  if (!cookie->domain)    {    copy_domain:      cookie->domain = xstrdup (host);      cookie->port = port;    }  else    {      if (!check_domain_match (cookie->domain, host))	{	  logprintf (LOG_NOTQUIET,		     "Cookie coming from %s attempted to set domain to %s\n",		     host, cookie->domain);	  xfree (cookie->domain);	  goto copy_domain;	}    }  if (!cookie->path)    cookie->path = xstrdup (path);  else    {      if (!check_path_match (cookie->path, path))	{	  DEBUGP (("Attempt to fake the path: %s, %s\n",		   cookie->path, path));	  goto out;	}    }  if (cookie->discard_requested)    {      discard_matching_cookie (jar, cookie);      goto out;    }  store_cookie (jar, cookie);  return; out:  if (cookie)    delete_cookie (cookie);}/* Support for sending out cookies in HTTP requests, based on   previously stored cookies.  Entry point is   `build_cookies_request'.  *//* Find the cookie chains whose domains match HOST and store them to   DEST.   A cookie chain is the head of a list of cookies that belong to a   host/domain.  Given HOST "img.search.xemacs.org", this function   will return the chains for "img.search.xemacs.org",   "search.xemacs.org", and "xemacs.org" -- those of them that exist   (if any), that is.   DEST should be large enough to accept (in the worst case) as many   elements as there are domain components of HOST.  */static intfind_chains_of_host (struct cookie_jar *jar, const char *host,		     struct cookie *dest[]){  int dest_count = 0;  int passes, passcnt;  /* Bail out quickly if there are no cookies in the jar.  */  if (!hash_table_count (jar->chains))    return 0;  if (numeric_address_p (host))    /* If host is an IP address, only check for the exact match. */    passes = 1;  else    /* Otherwise, check all the subdomains except the top-level (last)       one.  As a domain with N components has N-1 dots, the number of       passes equals the number of dots.  */    passes = count_char (host, '.');  passcnt = 0;  /* Find chains that match HOST, starting with exact match and     progressing to less specific domains.  For instance, given HOST     fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then     srk.fer.hr's, then fer.hr's.  */  while (1)    {      struct cookie *chain = hash_table_get (jar->chains, host);      if (chain)	dest[dest_count++] = chain;      if (++passcnt >= passes)	break;      host = strchr (host, '.') + 1;    }  return dest_count;}/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero   otherwise.  */static intpath_matches (const char *full_path, const char *prefix){  int len;  if (*prefix != '/')    /* Wget's HTTP paths do not begin with '/' (the URL code treats it       as a mere separator, inspired by rfc1808), but the '/' is       assumed when matching against the cookie stuff.  */    return 0;  ++prefix;  len = strlen (prefix);  if (0 != strncmp (full_path, prefix, len))    /* FULL_PATH doesn't begin with PREFIX. */    return 0;  /* Length of PREFIX determines the quality of the match. */  return len + 1;}/* Return non-zero iff COOKIE matches the provided parameters of the   URL being downloaded: HOST, PORT, PATH, and SECFLAG.   If PATH_GOODNESS is non-NULL, store the "path goodness" value   there.  That value is a measure of how closely COOKIE matches PATH,   used for ordering cookies.  */static intcookie_matches_url (const struct cookie *cookie,		    const char *host, int port, const char *path,		    int secflag, int *path_goodness){  int pg;  if (COOKIE_EXPIRED_P (cookie))    /* Ignore stale cookies.  Don't bother unchaining the cookie at       this point -- Wget is a relatively short-lived application, and       stale cookies will not be saved by `save_cookies'.  On the       other hand, this function should be as efficient as       possible.  */    return 0;  if (cookie->secure && !secflag)    /* Don't transmit secure cookies over insecure connections.  */    return 0;  if (cookie->port != PORT_ANY && cookie->port != port)    return 0;  /* If exact domain match is required, verify that cookie's domain is     equal to HOST.  If not, assume success on the grounds of the     cookie's chain having been found by find_chains_of_host.  */  if (cookie->domain_exact      && 0 != strcasecmp (host, cookie->domain))    return 0;  pg = path_matches (path, cookie->path);  if (!pg)    return 0;  if (path_goodness)    /* If the caller requested path_goodness, we return it.  This is       an optimization, so that the caller doesn't need to call       path_matches() again.  */    *path_goodness = pg;  return 1;}/* A structure that points to a cookie, along with the additional   information about the cookie's "goodness".  This allows us to sort   the cookies when returning them to the server, as required by the   spec.  */struct weighed_cookie {  struct cookie *cookie;  int domain_goodness;  int path_goodness;};/* Comparator used for uniquifying the list. */static intequality_comparator (const void *p1, const void *p2){  struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;  struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;  int namecmp  = strcmp (wc1->cookie->attr, wc2->cookie->attr);  int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);  /* We only really care whether both name and value are equal.  We     return them in this order only for consistency...  */  return namecmp ? namecmp : valuecmp;}/* Eliminate duplicate cookies.  "Duplicate cookies" are any two   cookies with the same attr name and value.  Whenever a duplicate   pair is found, one of the cookies is removed.  */static inteliminate_dups (struct weighed_cookie *outgoing, int count){  struct weighed_cookie *h;	/* hare */  struct weighed_cookie *t;	/* tortoise */  struct weighed_cookie *end = outgoing + count;  /* We deploy a simple uniquify algorithm: first sort the array     according to our sort criteria, then copy it to itself, comparing     each cookie to its neighbor and ignoring the duplicates.  */  qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);  /* "Hare" runs through all the entries in the array, followed by

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -