📄 url.c

📁 wget讓你可以在console介面下
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
/* Resolve "." and ".." elements of PATH by destructively modifying   PATH and return non-zero if PATH has been modified, zero otherwise.   The algorithm is in spirit similar to the one described in rfc1808,   although implemented differently, in one pass.  To recap, path   elements containing only "." are removed, and ".." is taken to mean   "back up one element".  Single leading and trailing slashes are   preserved.   For example, "a/b/c/./../d/.." will yield "a/b/".  More exhaustive   test examples are provided below.  If you change anything in this   function, run test_path_simplify to make sure you haven't broken a   test case.  */static intpath_simplify (char *path){  char *h = path;		/* hare */  char *t = path;		/* tortoise */  char *beg = path;		/* boundary for backing the tortoise */  char *end = path + strlen (path);  while (h < end)    {      /* Hare should be at the beginning of a path element. */      if (h[0] == '.' && (h[1] == '/' || h[1] == '\0'))	{	  /* Ignore "./". */	  h += 2;	}      else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0'))	{	  /* Handle "../" by retreating the tortoise by one path	     element -- but not past beggining.  */	  if (t > beg)	    {	      /* Move backwards until T hits the beginning of the		 previous path element or the beginning of path. */	      for (--t; t > beg && t[-1] != '/'; t--)		;	    }	  else	    {	      /* If we're at the beginning, copy the "../" literally		 move the beginning so a later ".." doesn't remove		 it.  */	      beg = t + 3;	      goto regular;	    }	  h += 3;	}      else	{	regular:	  /* A regular path element.  If H hasn't advanced past T,	     simply skip to the next path element.  Otherwise, copy	     the path element until the next slash.  */	  if (t == h)	    {	      /* Skip the path element, including the slash.  */	      while (h < end && *h != '/')		t++, h++;	      if (h < end)		t++, h++;	    }	  else	    {	      /* Copy the path element, including the final slash.  */	      while (h < end && *h != '/')		*t++ = *h++;	      if (h < end)		*t++ = *h++;	    }	}    }  if (t != h)    *t = '\0';  return t != h;}/* Return the length of URL's path.  Path is considered to be   terminated by one of '?', ';', '#', or by the end of the   string.  */static intpath_length (const char *url){  const char *q = strpbrk_or_eos (url, "?;#");  return q - url;}/* Find the last occurrence of character C in the range [b, e), or   NULL, if none are present.  We might want to use memrchr (a GNU   extension) under GNU libc.  */static const char *find_last_char (const char *b, const char *e, char c){  for (; e > b; e--)    if (*e == c)      return e;  return NULL;}/* Merge BASE with LINK and return the resulting URI.   Either of the URIs may be absolute or relative, complete with the   host name, or path only.  This tries to reasonably handle all   foreseeable cases.  It only employs minimal URL parsing, without   knowledge of the specifics of schemes.   I briefly considered making this function call path_simplify after   the merging process, as rfc1738 seems to suggest.  This is a bad   idea for several reasons: 1) it complexifies the code, and 2)   url_parse has to simplify path anyway, so it's wasteful to boot.  */char *uri_merge (const char *base, const char *link){  int linklength;  const char *end;  char *merge;  if (url_has_scheme (link))    return xstrdup (link);  /* We may not examine BASE past END. */  end = base + path_length (base);  linklength = strlen (link);  if (!*link)    {      /* Empty LINK points back to BASE, query string and all. */      return xstrdup (base);    }  else if (*link == '?')    {      /* LINK points to the same location, but changes the query	 string.  Examples: */      /* uri_merge("path",         "?new") -> "path?new"     */      /* uri_merge("path?foo",     "?new") -> "path?new"     */      /* uri_merge("path?foo#bar", "?new") -> "path?new"     */      /* uri_merge("path#foo",     "?new") -> "path?new"     */      int baselength = end - base;      merge = xmalloc (baselength + linklength + 1);      memcpy (merge, base, baselength);      memcpy (merge + baselength, link, linklength);      merge[baselength + linklength] = '\0';    }  else if (*link == '#')    {      /* uri_merge("path",         "#new") -> "path#new"     */      /* uri_merge("path#foo",     "#new") -> "path#new"     */      /* uri_merge("path?foo",     "#new") -> "path?foo#new" */      /* uri_merge("path?foo#bar", "#new") -> "path?foo#new" */      int baselength;      const char *end1 = strchr (base, '#');      if (!end1)	end1 = base + strlen (base);      baselength = end1 - base;      merge = xmalloc (baselength + linklength + 1);      memcpy (merge, base, baselength);      memcpy (merge + baselength, link, linklength);      merge[baselength + linklength] = '\0';    }  else if (*link == '/' && *(link + 1) == '/')    {      /* LINK begins with "//" and so is a net path: we need to	 replace everything after (and including) the double slash	 with LINK. */      /* uri_merge("foo", "//new/bar")            -> "//new/bar"      */      /* uri_merge("//old/foo", "//new/bar")      -> "//new/bar"      */      /* uri_merge("http://old/foo", "//new/bar") -> "http://new/bar" */      int span;      const char *slash;      const char *start_insert;      /* Look for first slash. */      slash = memchr (base, '/', end - base);      /* If found slash and it is a double slash, then replace	 from this point, else default to replacing from the	 beginning.  */      if (slash && *(slash + 1) == '/')	start_insert = slash;      else	start_insert = base;      span = start_insert - base;      merge = (char *)xmalloc (span + linklength + 1);      if (span)	memcpy (merge, base, span);      memcpy (merge + span, link, linklength);      merge[span + linklength] = '\0';    }  else if (*link == '/')    {      /* LINK is an absolute path: we need to replace everything	 after (and including) the FIRST slash with LINK.	 So, if BASE is "http://host/whatever/foo/bar", and LINK is	 "/qux/xyzzy", our result should be	 "http://host/qux/xyzzy".  */      int span;      const char *slash;      const char *start_insert = NULL; /* for gcc to shut up. */      const char *pos = base;      int seen_slash_slash = 0;      /* We're looking for the first slash, but want to ignore	 double slash. */    again:      slash = memchr (pos, '/', end - pos);      if (slash && !seen_slash_slash)	if (*(slash + 1) == '/')	  {	    pos = slash + 2;	    seen_slash_slash = 1;	    goto again;	  }      /* At this point, SLASH is the location of the first / after	 "//", or the first slash altogether.  START_INSERT is the	 pointer to the location where LINK will be inserted.  When	 examining the last two examples, keep in mind that LINK	 begins with '/'. */      if (!slash && !seen_slash_slash)	/* example: "foo" */	/*           ^    */	start_insert = base;      else if (!slash && seen_slash_slash)	/* example: "http://foo" */	/*                     ^ */	start_insert = end;      else if (slash && !seen_slash_slash)	/* example: "foo/bar" */	/*           ^        */	start_insert = base;      else if (slash && seen_slash_slash)	/* example: "http://something/" */	/*                           ^  */	start_insert = slash;      span = start_insert - base;      merge = (char *)xmalloc (span + linklength + 1);      if (span)	memcpy (merge, base, span);      memcpy (merge + span, link, linklength);      merge[span + linklength] = '\0';    }  else    {      /* LINK is a relative URL: we need to replace everything	 after last slash (possibly empty) with LINK.	 So, if BASE is "whatever/foo/bar", and LINK is "qux/xyzzy",	 our result should be "whatever/foo/qux/xyzzy".  */      int need_explicit_slash = 0;      int span;      const char *start_insert;      const char *last_slash = find_last_char (base, end, '/');      if (!last_slash)	{	  /* No slash found at all.  Replace what we have with LINK. */	  start_insert = base;	}      else if (last_slash && last_slash >= base + 2	       && last_slash[-2] == ':' && last_slash[-1] == '/')	{	  /* example: http://host"  */	  /*                      ^ */	  start_insert = end + 1;	  need_explicit_slash = 1;	}      else	{	  /* example: "whatever/foo/bar" */	  /*                        ^    */	  start_insert = last_slash + 1;	}      span = start_insert - base;      merge = (char *)xmalloc (span + linklength + 1);      if (span)	memcpy (merge, base, span);      if (need_explicit_slash)	merge[span - 1] = '/';      memcpy (merge + span, link, linklength);      merge[span + linklength] = '\0';    }  return merge;}#define APPEND(p, s) do {			\  int len = strlen (s);				\  memcpy (p, s, len);				\  p += len;					\} while (0)/* Use this instead of password when the actual password is supposed   to be hidden.  We intentionally use a generic string without giving   away the number of characters in the password, like previous   versions did.  */#define HIDDEN_PASSWORD "*password*"/* Recreate the URL string from the data in URL.   If HIDE is non-zero (as it is when we're calling this on a URL we   plan to print, but not when calling it to canonicalize a URL for   use within the program), password will be hidden.  Unsafe   characters in the URL will be quoted.  */char *url_string (const struct url *url, int hide_password){  int size;  char *result, *p;  char *quoted_host, *quoted_user = NULL, *quoted_passwd = NULL;  int scheme_port  = supported_schemes[url->scheme].default_port;  const char *scheme_str = supported_schemes[url->scheme].leading_string;  int fplen = full_path_length (url);  int brackets_around_host;  assert (scheme_str != NULL);  /* Make sure the user name and password are quoted. */  if (url->user)    {      quoted_user = url_escape_allow_passthrough (url->user);      if (url->passwd)	{	  if (hide_password)	    quoted_passwd = HIDDEN_PASSWORD;	  else	    quoted_passwd = url_escape_allow_passthrough (url->passwd);	}    }  /* In the unlikely event that the host name contains non-printable     characters, quote it for displaying to the user.  */  quoted_host = url_escape_allow_passthrough (url->host);  /* Undo the quoting of colons that URL escaping performs.  IPv6     addresses may legally contain colons, and in that case must be     placed in square brackets.  */  if (quoted_host != url->host)    unescape_single_char (quoted_host, ':');  brackets_around_host = strchr (quoted_host, ':') != NULL;  size = (strlen (scheme_str)	  + strlen (quoted_host)	  + (brackets_around_host ? 2 : 0)	  + fplen	  + 1);  if (url->port != scheme_port)    size += 1 + numdigit (url->port);  if (quoted_user)    {      size += 1 + strlen (quoted_user);      if (quoted_passwd)	size += 1 + strlen (quoted_passwd);    }  p = result = xmalloc (size);  APPEND (p, scheme_str);  if (quoted_user)    {      APPEND (p, quoted_user);      if (quoted_passwd)	{	  *p++ = ':';	  APPEND (p, quoted_passwd);	}      *p++ = '@';    }  if (brackets_around_host)    *p++ = '[';  APPEND (p, quoted_host);  if (brackets_around_host)    *p++ = ']';  if (url->port != scheme_port)    {      *p++ = ':';      p = number_to_string (p, url->port);    }  full_path_write (url, p);  p += fplen;  *p++ = '\0';  assert (p - result == size);  if (quoted_user && quoted_user != url->user)    xfree (quoted_user);  if (quoted_passwd && !hide_password && quoted_passwd != url->passwd)    xfree (quoted_passwd);  if (quoted_host != url->host)    xfree (quoted_host);  return result;}/* Return non-zero if scheme a is similar to scheme b.    Schemes are similar if they are equal.  If SSL is supported, schemes   are also similar if one is http (SCHEME_HTTP) and the other is https   (SCHEME_HTTPS).  */intschemes_are_similar_p (enum url_scheme a, enum url_scheme b){  if (a == b)    return 1;#ifdef HAVE_SSL  if ((a == SCHEME_HTTP && b == SCHEME_HTTPS)      || (a == SCHEME_HTTPS && b == SCHEME_HTTP))    return 1;#endif  return 0;}#if 0/* Debugging and testing support for path_simplify. *//* Debug: run path_simplify on PATH and return the result in a new   string.  Useful for calling from the debugger.  */static char *ps (char *path){  char *copy = xstrdup (path);  path_simplify (copy);  return copy;}static voidrun_test (char *test, char *expected_result, int expected_change){  char *test_copy = xstrdup (test);  int modified = path_simplify (test_copy);  if (0 != strcmp (test_copy, expected_result))    {      printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",	      test, expected_result, test_copy);    }  if (modified != expected_change)    {      if (expected_change == 1)	printf ("Expected modification with path_simplify(\"%s\").\n",		test);      else	printf ("Expected no modification with path_simplify(\"%s\").\n",		test);    }  xfree (test_copy);}static voidtest_path_simplify (void){  static struct {    char *test, *result;    int should_modify;  } tests[] = {    { "",			"",		0 },    { ".",			"",		1 },    { "./",			"",		1 },    { "..",			"..",		0 },    { "../",			"../",		0 },    { "foo",			"foo",		0 },    { "foo/bar",		"foo/bar",	0 },    { "foo///bar",		"foo///bar",	0 },    { "foo/.",			"foo/",		1 },    { "foo/./",			"foo/",		1 },    { "foo./",			"foo./",	0 },    { "foo/../bar",		"bar",		1 },    { "foo/../bar/",		"bar/",		1 },    { "foo/bar/..",		"foo/",		1 },    { "foo/bar/../x",		"foo/x",	1 },    { "foo/bar/../x/",		"foo/x/",	1 },    { "foo/..",			"",		1 },    { "foo/../..",		"..",		1 },    { "foo/../../..",		"../..",	1 },    { "foo/../../bar/../../baz", "../../baz",	1 },    { "a/b/../../c",		"c",		1 },    { "./a/../b",		"b",		1 }  };  int i;  for (i = 0; i < countof (tests); i++)    {      char *test = tests[i].test;      char *expected_result = tests[i].result;      int   expected_change = tests[i].should_modify;      run_test (test, expected_result, expected_change);    }}#endif
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -