📄 url.c

📁 wget (command line browser) source code
💻 C
📖 第 1 页 / 共 4 页
字号:
full_path_length (const struct url *url){  int len = 0;#define FROB(el) if (url->el) len += 1 + strlen (url->el)  FROB (path);  FROB (params);  FROB (query);#undef FROB  return len;}/* Write out the full path. */static voidfull_path_write (const struct url *url, char *where){#define FROB(el, chr) do {			\  char *f_el = url->el;				\  if (f_el) {					\    int l = strlen (f_el);			\    *where++ = chr;				\    memcpy (where, f_el, l);			\    where += l;					\  }						\} while (0)  FROB (path, '/');  FROB (params, ';');  FROB (query, '?');#undef FROB}/* Public function for getting the "full path".  E.g. if u->path is   "foo/bar" and u->query is "param=value", full_path will be   "/foo/bar?param=value". */char *url_full_path (const struct url *url){  int length = full_path_length (url);  char *full_path = (char *)xmalloc(length + 1);  full_path_write (url, full_path);  full_path[length] = '\0';  return full_path;}/* Escape unsafe and reserved characters, except for the slash   characters.  */static char *url_escape_dir (const char *dir){  char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);  char *h, *t;  if (newdir == dir)    return (char *)dir;  /* Unescape slashes in NEWDIR. */  h = newdir;			/* hare */  t = newdir;			/* tortoise */  for (; *h; h++, t++)    {      /* url_escape_1 having converted '/' to "%2F" exactly. */      if (*h == '%' && h[1] == '2' && h[2] == 'F')	{	  *t = '/';	  h += 2;	}      else	*t = *h;    }  *t = '\0';  return newdir;}/* Sync u->path and u->url with u->dir and u->file.  Called after   u->file or u->dir have been changed, typically by the FTP code.  */static voidsync_path (struct url *u){  char *newpath, *efile, *edir;  xfree (u->path);  /* u->dir and u->file are not escaped.  URL-escape them before     reassembling them into u->path.  That way, if they contain     separators like '?' or even if u->file contains slashes, the     path will be correctly assembled.  (u->file can contain slashes     if the URL specifies it with %2f, or if an FTP server returns     it.)  */  edir = url_escape_dir (u->dir);  efile = url_escape_1 (u->file, urlchr_unsafe | urlchr_reserved, 1);  if (!*edir)    newpath = xstrdup (efile);  else    {      int dirlen = strlen (edir);      int filelen = strlen (efile);      /* Copy "DIR/FILE" to newpath. */      char *p = newpath = xmalloc (dirlen + 1 + filelen + 1);      memcpy (p, edir, dirlen);      p += dirlen;      *p++ = '/';      memcpy (p, efile, filelen);      p += filelen;      *p++ = '\0';    }  u->path = newpath;  if (edir != u->dir)    xfree (edir);  if (efile != u->file)    xfree (efile);  /* Regenerate u->url as well.  */  xfree (u->url);  u->url = url_string (u, 0);}/* Mutators.  Code in ftp.c insists on changing u->dir and u->file.   This way we can sync u->path and u->url when they get changed.  */voidurl_set_dir (struct url *url, const char *newdir){  xfree (url->dir);  url->dir = xstrdup (newdir);  sync_path (url);}voidurl_set_file (struct url *url, const char *newfile){  xfree (url->file);  url->file = xstrdup (newfile);  sync_path (url);}voidurl_free (struct url *url){  xfree (url->host);  xfree (url->path);  xfree (url->url);  FREE_MAYBE (url->params);  FREE_MAYBE (url->query);  FREE_MAYBE (url->fragment);  FREE_MAYBE (url->user);  FREE_MAYBE (url->passwd);  xfree (url->dir);  xfree (url->file);  xfree (url);}/* Create all the necessary directories for PATH (a file).  Calls   mkdirhier() internally.  */intmkalldirs (const char *path){  const char *p;  char *t;  struct stat st;  int res;  p = path + strlen (path);  for (; *p != '/' && p != path; p--)    ;  /* Don't create if it's just a file.  */  if ((p == path) && (*p != '/'))    return 0;  t = strdupdelim (path, p);  /* Check whether the directory exists.  */  if ((stat (t, &st) == 0))    {      if (S_ISDIR (st.st_mode))	{	  xfree (t);	  return 0;	}      else	{	  /* If the dir exists as a file name, remove it first.  This	     is *only* for Wget to work with buggy old CERN http	     servers.  Here is the scenario: When Wget tries to	     retrieve a directory without a slash, e.g.	     http://foo/bar (bar being a directory), CERN server will	     not redirect it too http://foo/bar/ -- it will generate a	     directory listing containing links to bar/file1,	     bar/file2, etc.  Wget will lose because it saves this	     HTML listing to a file `bar', so it cannot create the	     directory.  To work around this, if the file of the same	     name exists, we just remove it and create the directory	     anyway.  */	  DEBUGP (("Removing %s because of directory danger!\n", t));	  unlink (t);	}    }  res = make_directory (t);  if (res != 0)    logprintf (LOG_NOTQUIET, "%s: %s", t, strerror (errno));  xfree (t);  return res;}/* Functions for constructing the file name out of URL components.  *//* A growable string structure, used by url_file_name and friends.   This should perhaps be moved to utils.c.   The idea is to have a convenient and efficient way to construct a   string by having various functions append data to it.  Instead of   passing the obligatory BASEVAR, SIZEVAR and TAILPOS to all the   functions in questions, we pass the pointer to this struct.  */struct growable {  char *base;  int size;  int tail;};/* Ensure that the string can accept APPEND_COUNT more characters past   the current TAIL position.  If necessary, this will grow the string   and update its allocated size.  If the string is already large   enough to take TAIL+APPEND_COUNT characters, this does nothing.  */#define GROW(g, append_size) do {					\  struct growable *G_ = g;						\  DO_REALLOC (G_->base, G_->size, G_->tail + append_size, char);	\} while (0)/* Return the tail position of the string. */#define TAIL(r) ((r)->base + (r)->tail)/* Move the tail position by APPEND_COUNT characters. */#define TAIL_INCR(r, append_count) ((r)->tail += append_count)/* Append the string STR to DEST.  NOTICE: the string in DEST is not   terminated.  */static voidappend_string (const char *str, struct growable *dest){  int l = strlen (str);  GROW (dest, l);  memcpy (TAIL (dest), str, l);  TAIL_INCR (dest, l);}/* Append CH to DEST.  For example, append_char (0, DEST)   zero-terminates DEST.  */static voidappend_char (char ch, struct growable *dest){  GROW (dest, 1);  *TAIL (dest) = ch;  TAIL_INCR (dest, 1);}enum {  filechr_not_unix    = 1,	/* unusable on Unix, / and \0 */  filechr_not_windows = 2,	/* unusable on Windows, one of \|/<>?:*" */  filechr_control     = 4	/* a control character, e.g. 0-31 */};#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))/* Shorthands for the table: */#define U filechr_not_unix#define W filechr_not_windows#define C filechr_control#define UW U|W#define UWC U|W|C/* Table of characters unsafe under various conditions (see above).   Arguably we could also claim `%' to be unsafe, since we use it as   the escape character.  If we ever want to be able to reliably   translate file name back to URL, this would become important   crucial.  Right now, it's better to be minimal in escaping.  */const static unsigned char filechr_table[256] ={UWC,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */  C,  C,  C,  C,   C,  C,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */  C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */  C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */  0,  0,  W,  0,   0,  0,  0,  0,   /* SP  !   "   #    $   %   &   '   */  0,  0,  W,  0,   0,  0,  0, UW,   /* (   )   *   +    ,   -   .   /   */  0,  0,  0,  0,   0,  0,  0,  0,   /* 0   1   2   3    4   5   6   7   */  0,  0,  W,  0,   W,  0,  W,  W,   /* 8   9   :   ;    <   =   >   ?   */  0,  0,  0,  0,   0,  0,  0,  0,   /* @   A   B   C    D   E   F   G   */  0,  0,  0,  0,   0,  0,  0,  0,   /* H   I   J   K    L   M   N   O   */  0,  0,  0,  0,   0,  0,  0,  0,   /* P   Q   R   S    T   U   V   W   */  0,  0,  0,  0,   W,  0,  0,  0,   /* X   Y   Z   [    \   ]   ^   _   */  0,  0,  0,  0,   0,  0,  0,  0,   /* `   a   b   c    d   e   f   g   */  0,  0,  0,  0,   0,  0,  0,  0,   /* h   i   j   k    l   m   n   o   */  0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */  0,  0,  0,  0,   0,  0,  0,  0,   /* x   y   z   {    |   }   ~   DEL */  C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 128-143 */  C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 144-159 */  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,};#undef U#undef W#undef C#undef UW#undef UWC/* FN_PORT_SEP is the separator between host and port in file names   for non-standard port numbers.  On Unix this is normally ':', as in   "www.xemacs.org:4001/index.html".  Under Windows, we set it to +   because Windows can't handle ':' in file names.  */#define FN_PORT_SEP  (opt.restrict_files_os != restrict_windows ? ':' : '+')/* FN_QUERY_SEP is the separator between the file name and the URL   query, normally '?'.  Since Windows cannot handle '?' as part of   file name, we use '@' instead there.  */#define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@')/* Quote path element, characters in [b, e), as file name, and append   the quoted string to DEST.  Each character is quoted as per   file_unsafe_char and the corresponding table.   If ESCAPED_P is non-zero, the path element is considered to be   URL-escaped and will be unescaped prior to inspection.  */static voidappend_uri_pathel (const char *b, const char *e, int escaped_p,		   struct growable *dest){  const char *p;  int quoted, outlen;  int mask;  if (opt.restrict_files_os == restrict_unix)    mask = filechr_not_unix;  else    mask = filechr_not_windows;  if (opt.restrict_files_ctrl)    mask |= filechr_control;  /* Copy [b, e) to PATHEL and URL-unescape it. */  if (escaped_p)    {      char *unescaped;      BOUNDED_TO_ALLOCA (b, e, unescaped);      url_unescape (unescaped);      b = unescaped;      e = unescaped + strlen (unescaped);    }  /* Walk the PATHEL string and check how many characters we'll need     to add for file quoting.  */  quoted = 0;  for (p = b; p < e; p++)    if (FILE_CHAR_TEST (*p, mask))      ++quoted;  /* e-b is the string length.  Each quoted char means two additional     characters in the string, hence 2*quoted.  */  outlen = (e - b) + (2 * quoted);  GROW (dest, outlen);  if (!quoted)    {      /* If there's nothing to quote, we don't need to go through the	 string the second time.  */      memcpy (TAIL (dest), b, outlen);    }  else    {      char *q = TAIL (dest);      for (p = b; p < e; p++)	{	  if (!FILE_CHAR_TEST (*p, mask))	    *q++ = *p;	  else	    {	      unsigned char ch = *p;	      *q++ = '%';	      *q++ = XNUM_TO_DIGIT (ch >> 4);	      *q++ = XNUM_TO_DIGIT (ch & 0xf);	    }	}      assert (q - TAIL (dest) == outlen);    }  TAIL_INCR (dest, outlen);}/* Append to DEST the directory structure that corresponds the   directory part of URL's path.  For example, if the URL is   http://server/dir1/dir2/file, this appends "/dir1/dir2".   Each path element ("dir1" and "dir2" in the above example) is   examined, url-unescaped, and re-escaped as file name element.   Additionally, it cuts as many directories from the path as   specified by opt.cut_dirs.  For example, if opt.cut_dirs is 1, it   will produce "bar" for the above example.  For 2 or more, it will   produce "".   Each component of the path is quoted for use as file name.  */static voidappend_dir_structure (const struct url *u, struct growable *dest){  char *pathel, *next;  int cut = opt.cut_dirs;  /* Go through the path components, de-URL-quote them, and quote them     (if necessary) as file names.  */  pathel = u->path;  for (; (next = strchr (pathel, '/')) != NULL; pathel = next + 1)    {      if (cut-- > 0)	continue;      if (pathel == next)	/* Ignore empty pathels.  */	continue;      if (dest->tail)	append_char ('/', dest);      append_uri_pathel (pathel, next, 1, dest);    }}/* Return a unique file name that matches the given URL as good as   possible.  Does not create directories on the file system.  */char *url_file_name (const struct url *u){  struct growable fnres;  char *u_file, *u_query;  char *fname, *unique;  fnres.base = NULL;  fnres.size = 0;  fnres.tail = 0;  /* Start with the directory prefix, if specified. */  if (opt.dir_prefix)    append_string (opt.dir_prefix, &fnres);  /* If "dirstruct" is turned on (typically the case with -r), add     the host and port (unless those have been turned off) and     directory structure.  */  if (opt.dirstruct)    {      if (opt.add_hostdir)	{	  if (fnres.tail)	    append_char ('/', &fnres);	  append_string (u->host, &fnres);	  if (u->port != scheme_default_port (u->scheme))	    {	      char portstr[24];	      number_to_string (portstr, u->port);	      append_char (FN_PORT_SEP, &fnres);	      append_string (portstr, &fnres);	    }	}      append_dir_structure (u, &fnres);    }  /* Add the file name. */  if (fnres.tail)    append_char ('/', &fnres);  u_file = *u->file ? u->file : "index.html";  append_uri_pathel (u_file, u_file + strlen (u_file), 0, &fnres);  /* Append "?query" to the file name. */  u_query = u->query && *u->query ? u->query : NULL;  if (u_query)    {      append_char (FN_QUERY_SEP, &fnres);      append_uri_pathel (u_query, u_query + strlen (u_query), 1, &fnres);    }  /* Zero-terminate the file name. */  append_char ('\0', &fnres);  fname = fnres.base;  /* Check the cases in which the unique extensions are not used:     1) Clobbering is turned off (-nc).     2) Retrieval with regetting.     3) Timestamping is used.     4) Hierarchy is built.     The exception is the case when file does exist and is a     directory (see `mkalldirs' for explanation).  */  if ((opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct)      && !(file_exists_p (fname) && !file_non_directory_p (fname)))    return fname;  unique = unique_name (fname, 1);  if (unique != fname)    xfree (fname);  return unique;}/* Return the length of URL's path.  Path is considered to be   terminated by one of '?', ';', '#', or by the end of the   string.  */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -