📄 url.c

📁 wget (command line browser) source code
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
static intpath_length (const char *url){  const char *q = strpbrk_or_eos (url, "?;#");  return q - url;}/* Find the last occurrence of character C in the range [b, e), or   NULL, if none are present.  This is equivalent to strrchr(b, c),   except that it accepts an END argument instead of requiring the   string to be zero-terminated.  Why is there no memrchr()?  */static const char *find_last_char (const char *b, const char *e, char c){  for (; e > b; e--)    if (*e == c)      return e;  return NULL;}/* Resolve "." and ".." elements of PATH by destructively modifying   PATH and return non-zero if PATH has been modified, zero otherwise.   The algorithm is in spirit similar to the one described in rfc1808,   although implemented differently, in one pass.  To recap, path   elements containing only "." are removed, and ".." is taken to mean   "back up one element".  Single leading and trailing slashes are   preserved.   This function does not handle URL escapes explicitly.  If you're   passing paths from URLs, make sure to unquote "%2e" and "%2E" to   ".", so that this function can find the dots.  (Wget's URL parser   calls reencode_escapes, which see.)   For example, "a/b/c/./../d/.." will yield "a/b/".  More exhaustive   test examples are provided below.  If you change anything in this   function, run test_path_simplify to make sure you haven't broken a   test case.  */static intpath_simplify (char *path){  char *h, *t, *end;  /* Preserve the leading '/'. */  if (path[0] == '/')    ++path;  h = path;			/* hare */  t = path;			/* tortoise */  end = path + strlen (path);  while (h < end)    {      /* Hare should be at the beginning of a path element. */      if (h[0] == '.' && (h[1] == '/' || h[1] == '\0'))	{	  /* Ignore "./". */	  h += 2;	}      else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0'))	{	  /* Handle "../" by retreating the tortoise by one path	     element -- but not past beggining of PATH.  */	  if (t > path)	    {	      /* Move backwards until T hits the beginning of the		 previous path element or the beginning of path. */	      for (--t; t > path && t[-1] != '/'; t--)		;	    }	  h += 3;	}      else if (*h == '/')	{	  /* Ignore empty path elements.  Supporting them well is hard	     (where do you save "http://x.com///y.html"?), and they	     don't bring any practical gain.  Plus, they break our	     filesystem-influenced assumptions: allowing them would	     make "x/y//../z" simplify to "x/y/z", whereas most people	     would expect "x/z".  */	  ++h;	}      else	{	  /* A regular path element.  If H hasn't advanced past T,	     simply skip to the next path element.  Otherwise, copy	     the path element until the next slash.  */	  if (t == h)	    {	      /* Skip the path element, including the slash.  */	      while (h < end && *h != '/')		t++, h++;	      if (h < end)		t++, h++;	    }	  else	    {	      /* Copy the path element, including the final slash.  */	      while (h < end && *h != '/')		*t++ = *h++;	      if (h < end)		*t++ = *h++;	    }	}    }  if (t != h)    *t = '\0';  return t != h;}/* Merge BASE with LINK and return the resulting URI.   Either of the URIs may be absolute or relative, complete with the   host name, or path only.  This tries to reasonably handle all   foreseeable cases.  It only employs minimal URL parsing, without   knowledge of the specifics of schemes.   Perhaps this function should call path_simplify so that the callers   don't have to call url_parse unconditionally.  */char *uri_merge (const char *base, const char *link){  int linklength;  const char *end;  char *merge;  if (url_has_scheme (link))    return xstrdup (link);  /* We may not examine BASE past END. */  end = base + path_length (base);  linklength = strlen (link);  if (!*link)    {      /* Empty LINK points back to BASE, query string and all. */      return xstrdup (base);    }  else if (*link == '?')    {      /* LINK points to the same location, but changes the query	 string.  Examples: */      /* uri_merge("path",         "?new") -> "path?new"     */      /* uri_merge("path?foo",     "?new") -> "path?new"     */      /* uri_merge("path?foo#bar", "?new") -> "path?new"     */      /* uri_merge("path#foo",     "?new") -> "path?new"     */      int baselength = end - base;      merge = xmalloc (baselength + linklength + 1);      memcpy (merge, base, baselength);      memcpy (merge + baselength, link, linklength);      merge[baselength + linklength] = '\0';    }  else if (*link == '#')    {      /* uri_merge("path",         "#new") -> "path#new"     */      /* uri_merge("path#foo",     "#new") -> "path#new"     */      /* uri_merge("path?foo",     "#new") -> "path?foo#new" */      /* uri_merge("path?foo#bar", "#new") -> "path?foo#new" */      int baselength;      const char *end1 = strchr (base, '#');      if (!end1)	end1 = base + strlen (base);      baselength = end1 - base;      merge = xmalloc (baselength + linklength + 1);      memcpy (merge, base, baselength);      memcpy (merge + baselength, link, linklength);      merge[baselength + linklength] = '\0';    }  else if (*link == '/' && *(link + 1) == '/')    {      /* LINK begins with "//" and so is a net path: we need to	 replace everything after (and including) the double slash	 with LINK. */      /* uri_merge("foo", "//new/bar")            -> "//new/bar"      */      /* uri_merge("//old/foo", "//new/bar")      -> "//new/bar"      */      /* uri_merge("http://old/foo", "//new/bar") -> "http://new/bar" */      int span;      const char *slash;      const char *start_insert;      /* Look for first slash. */      slash = memchr (base, '/', end - base);      /* If found slash and it is a double slash, then replace	 from this point, else default to replacing from the	 beginning.  */      if (slash && *(slash + 1) == '/')	start_insert = slash;      else	start_insert = base;      span = start_insert - base;      merge = (char *)xmalloc (span + linklength + 1);      if (span)	memcpy (merge, base, span);      memcpy (merge + span, link, linklength);      merge[span + linklength] = '\0';    }  else if (*link == '/')    {      /* LINK is an absolute path: we need to replace everything	 after (and including) the FIRST slash with LINK.	 So, if BASE is "http://host/whatever/foo/bar", and LINK is	 "/qux/xyzzy", our result should be	 "http://host/qux/xyzzy".  */      int span;      const char *slash;      const char *start_insert = NULL; /* for gcc to shut up. */      const char *pos = base;      int seen_slash_slash = 0;      /* We're looking for the first slash, but want to ignore	 double slash. */    again:      slash = memchr (pos, '/', end - pos);      if (slash && !seen_slash_slash)	if (*(slash + 1) == '/')	  {	    pos = slash + 2;	    seen_slash_slash = 1;	    goto again;	  }      /* At this point, SLASH is the location of the first / after	 "//", or the first slash altogether.  START_INSERT is the	 pointer to the location where LINK will be inserted.  When	 examining the last two examples, keep in mind that LINK	 begins with '/'. */      if (!slash && !seen_slash_slash)	/* example: "foo" */	/*           ^    */	start_insert = base;      else if (!slash && seen_slash_slash)	/* example: "http://foo" */	/*                     ^ */	start_insert = end;      else if (slash && !seen_slash_slash)	/* example: "foo/bar" */	/*           ^        */	start_insert = base;      else if (slash && seen_slash_slash)	/* example: "http://something/" */	/*                           ^  */	start_insert = slash;      span = start_insert - base;      merge = (char *)xmalloc (span + linklength + 1);      if (span)	memcpy (merge, base, span);      memcpy (merge + span, link, linklength);      merge[span + linklength] = '\0';    }  else    {      /* LINK is a relative URL: we need to replace everything	 after last slash (possibly empty) with LINK.	 So, if BASE is "whatever/foo/bar", and LINK is "qux/xyzzy",	 our result should be "whatever/foo/qux/xyzzy".  */      int need_explicit_slash = 0;      int span;      const char *start_insert;      const char *last_slash = find_last_char (base, end, '/');      if (!last_slash)	{	  /* No slash found at all.  Append LINK to what we have,	     but we'll need a slash as a separator.	     Example: if base == "foo" and link == "qux/xyzzy", then	     we cannot just append link to base, because we'd get	     "fooqux/xyzzy", whereas what we want is	     "foo/qux/xyzzy".	     To make sure the / gets inserted, we set	     need_explicit_slash to 1.  We also set start_insert	     to end + 1, so that the length calculations work out	     correctly for one more (slash) character.  Accessing	     that character is fine, since it will be the	     delimiter, '\0' or '?'.  */	  /* example: "foo?..." */	  /*               ^    ('?' gets changed to '/') */	  start_insert = end + 1;	  need_explicit_slash = 1;	}      else if (last_slash && last_slash >= base + 2	       && last_slash[-2] == ':' && last_slash[-1] == '/')	{	  /* example: http://host"  */	  /*                      ^ */	  start_insert = end + 1;	  need_explicit_slash = 1;	}      else	{	  /* example: "whatever/foo/bar" */	  /*                        ^    */	  start_insert = last_slash + 1;	}      span = start_insert - base;      merge = (char *)xmalloc (span + linklength + 1);      if (span)	memcpy (merge, base, span);      if (need_explicit_slash)	merge[span - 1] = '/';      memcpy (merge + span, link, linklength);      merge[span + linklength] = '\0';    }  return merge;}#define APPEND(p, s) do {			\  int len = strlen (s);				\  memcpy (p, s, len);				\  p += len;					\} while (0)/* Use this instead of password when the actual password is supposed   to be hidden.  We intentionally use a generic string without giving   away the number of characters in the password, like previous   versions did.  */#define HIDDEN_PASSWORD "*password*"/* Recreate the URL string from the data in URL.   If HIDE is non-zero (as it is when we're calling this on a URL we   plan to print, but not when calling it to canonicalize a URL for   use within the program), password will be hidden.  Unsafe   characters in the URL will be quoted.  */char *url_string (const struct url *url, int hide_password){  int size;  char *result, *p;  char *quoted_user = NULL, *quoted_passwd = NULL;  int scheme_port  = supported_schemes[url->scheme].default_port;  char *scheme_str = supported_schemes[url->scheme].leading_string;  int fplen = full_path_length (url);  int brackets_around_host = 0;  assert (scheme_str != NULL);  /* Make sure the user name and password are quoted. */  if (url->user)    {      quoted_user = url_escape_allow_passthrough (url->user);      if (url->passwd)	{	  if (hide_password)	    quoted_passwd = HIDDEN_PASSWORD;	  else	    quoted_passwd = url_escape_allow_passthrough (url->passwd);	}    }  if (strchr (url->host, ':'))    brackets_around_host = 1;  size = (strlen (scheme_str)	  + strlen (url->host)	  + (brackets_around_host ? 2 : 0)	  + fplen	  + 1);  if (url->port != scheme_port)    size += 1 + numdigit (url->port);  if (quoted_user)    {      size += 1 + strlen (quoted_user);      if (quoted_passwd)	size += 1 + strlen (quoted_passwd);    }  p = result = xmalloc (size);  APPEND (p, scheme_str);  if (quoted_user)    {      APPEND (p, quoted_user);      if (quoted_passwd)	{	  *p++ = ':';	  APPEND (p, quoted_passwd);	}      *p++ = '@';    }  if (brackets_around_host)    *p++ = '[';  APPEND (p, url->host);  if (brackets_around_host)    *p++ = ']';  if (url->port != scheme_port)    {      *p++ = ':';      p = number_to_string (p, url->port);    }  full_path_write (url, p);  p += fplen;  *p++ = '\0';  assert (p - result == size);  if (quoted_user && quoted_user != url->user)    xfree (quoted_user);  if (quoted_passwd && !hide_password      && quoted_passwd != url->passwd)    xfree (quoted_passwd);  return result;}/* Return non-zero if scheme a is similar to scheme b.    Schemes are similar if they are equal.  If SSL is supported, schemes   are also similar if one is http (SCHEME_HTTP) and the other is https   (SCHEME_HTTPS).  */intschemes_are_similar_p (enum url_scheme a, enum url_scheme b){  if (a == b)    return 1;#ifdef HAVE_SSL  if ((a == SCHEME_HTTP && b == SCHEME_HTTPS)      || (a == SCHEME_HTTPS && b == SCHEME_HTTP))    return 1;#endif  return 0;}#if 0/* Debugging and testing support for path_simplify. *//* Debug: run path_simplify on PATH and return the result in a new   string.  Useful for calling from the debugger.  */static char *ps (char *path){  char *copy = xstrdup (path);  path_simplify (copy);  return copy;}static voidrun_test (char *test, char *expected_result, int expected_change){  char *test_copy = xstrdup (test);  int modified = path_simplify (test_copy);  if (0 != strcmp (test_copy, expected_result))    {      printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",	      test, expected_result, test_copy);    }  if (modified != expected_change)    {      if (expected_change == 1)	printf ("Expected no modification with path_simplify(\"%s\").\n",		test);      else	printf ("Expected modification with path_simplify(\"%s\").\n",		test);    }  xfree (test_copy);}static voidtest_path_simplify (void){  static struct {    char *test, *result;    int should_modify;  } tests[] = {    { "",		"",		0 },    { ".",		"",		1 },    { "..",		"",		1 },    { "foo",		"foo",		0 },    { "foo/bar",	"foo/bar",	0 },    { "foo///bar",	"foo/bar",	1 },    { "foo/.",		"foo/",		1 },    { "foo/./",		"foo/",		1 },    { "foo./",		"foo./",	0 },    { "foo/../bar",	"bar",		1 },    { "foo/../bar/",	"bar/",		1 },    { "foo/bar/..",	"foo/",		1 },    { "foo/bar/../x",	"foo/x",	1 },    { "foo/bar/../x/",	"foo/x/",	1 },    { "foo/..",		"",		1 },    { "foo/../..",	"",		1 },    { "a/b/../../c",	"c",		1 },    { "./a/../b",	"b",		1 }  };  int i;  for (i = 0; i < countof (tests); i++)    {      char *test = tests[i].test;      char *expected_result = tests[i].result;      int   expected_change = tests[i].should_modify;      run_test (test, expected_result, expected_change);    }  /* Now run all the tests with a leading slash before the test case,     to prove that the slash is being preserved.  */  for (i = 0; i < countof (tests); i++)    {      char *test, *expected_result;      int expected_change = tests[i].should_modify;      test = xmalloc (1 + strlen (tests[i].test) + 1);      sprintf (test, "/%s", tests[i].test);      expected_result = xmalloc (1 + strlen (tests[i].result) + 1);      sprintf (expected_result, "/%s", tests[i].result);      run_test (test, expected_result, expected_change);      xfree (test);      xfree (expected_result);    }}#endif
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -