url.c

来自「Wget很好的处理了http和ftp的下载,很值得学习的经典代码」· C语言代码 · 共 2,138 行 · 第 1/5 页
2,138 行
      /* If we suspect that a transformation has rendered what         url_string might return different from URL_ENCODED, rebuild         u->url using url_string.  */      u->url = url_string (u, URL_AUTH_SHOW);      if (url_encoded != url)        xfree ((char *) url_encoded);    }  else    {      if (url_encoded == url)        u->url = xstrdup (url);      else        u->url = url_encoded;    }  return u; error:  /* Cleanup in case of error: */  if (url_encoded && url_encoded != url)    xfree (url_encoded);  /* Transmit the error code to the caller, if the caller wants to     know.  */  if (error)    *error = error_code;  return NULL;}/* Return the error message string from ERROR_CODE, which should have   been retrieved from url_parse.  The error message is translated.  */const char *url_error (int error_code){  assert (error_code >= 0 && error_code < countof (parse_errors));  return _(parse_errors[error_code]);}/* Split PATH into DIR and FILE.  PATH comes from the URL and is   expected to be URL-escaped.   The path is split into directory (the part up to the last slash)   and file (the part after the last slash), which are subsequently   unescaped.  Examples:   PATH                 DIR           FILE   "foo/bar/baz"        "foo/bar"     "baz"   "foo/bar/"           "foo/bar"     ""   "foo"                ""            "foo"   "foo/bar/baz%2fqux"  "foo/bar"     "baz/qux" (!)   DIR and FILE are freshly allocated.  */static voidsplit_path (const char *path, char **dir, char **file){  char *last_slash = strrchr (path, '/');  if (!last_slash)    {      *dir = xstrdup ("");      *file = xstrdup (path);    }  else    {      *dir = strdupdelim (path, last_slash);      *file = xstrdup (last_slash + 1);    }  url_unescape (*dir);  url_unescape (*file);}/* Note: URL's "full path" is the path with the query string and   params appended.  The "fragment" (#foo) is intentionally ignored,   but that might be changed.  For example, if the original URL was   "http://host:port/foo/bar/baz;bullshit?querystring#uselessfragment",   the full path will be "/foo/bar/baz;bullshit?querystring".  *//* Return the length of the full path, without the terminating   zero.  */static intfull_path_length (const struct url *url){  int len = 0;#define FROB(el) if (url->el) len += 1 + strlen (url->el)  FROB (path);  FROB (params);  FROB (query);#undef FROB  return len;}/* Write out the full path. */static voidfull_path_write (const struct url *url, char *where){#define FROB(el, chr) do {                      \  char *f_el = url->el;                         \  if (f_el) {                                   \    int l = strlen (f_el);                      \    *where++ = chr;                             \    memcpy (where, f_el, l);                    \    where += l;                                 \  }                                             \} while (0)  FROB (path, '/');  FROB (params, ';');  FROB (query, '?');#undef FROB}/* Public function for getting the "full path".  E.g. if u->path is   "foo/bar" and u->query is "param=value", full_path will be   "/foo/bar?param=value". */char *url_full_path (const struct url *url){  int length = full_path_length (url);  char *full_path = xmalloc (length + 1);  full_path_write (url, full_path);  full_path[length] = '\0';  return full_path;}/* Unescape CHR in an otherwise escaped STR.  Used to selectively   escaping of certain characters, such as "/" and ":".  Returns a   count of unescaped chars.  */static voidunescape_single_char (char *str, char chr){  const char c1 = XNUM_TO_DIGIT (chr >> 4);  const char c2 = XNUM_TO_DIGIT (chr & 0xf);  char *h = str;                /* hare */  char *t = str;                /* tortoise */  for (; *h; h++, t++)    {      if (h[0] == '%' && h[1] == c1 && h[2] == c2)        {          *t = chr;          h += 2;        }      else        *t = *h;    }  *t = '\0';}/* Escape unsafe and reserved characters, except for the slash   characters.  */static char *url_escape_dir (const char *dir){  char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);  if (newdir == dir)    return (char *)dir;  unescape_single_char (newdir, '/');  return newdir;}/* Sync u->path and u->url with u->dir and u->file.  Called after   u->file or u->dir have been changed, typically by the FTP code.  */static voidsync_path (struct url *u){  char *newpath, *efile, *edir;  xfree (u->path);  /* u->dir and u->file are not escaped.  URL-escape them before     reassembling them into u->path.  That way, if they contain     separators like '?' or even if u->file contains slashes, the     path will be correctly assembled.  (u->file can contain slashes     if the URL specifies it with %2f, or if an FTP server returns     it.)  */  edir = url_escape_dir (u->dir);  efile = url_escape_1 (u->file, urlchr_unsafe | urlchr_reserved, 1);  if (!*edir)    newpath = xstrdup (efile);  else    {      int dirlen = strlen (edir);      int filelen = strlen (efile);      /* Copy "DIR/FILE" to newpath. */      char *p = newpath = xmalloc (dirlen + 1 + filelen + 1);      memcpy (p, edir, dirlen);      p += dirlen;      *p++ = '/';      memcpy (p, efile, filelen);      p += filelen;      *p = '\0';    }  u->path = newpath;  if (edir != u->dir)    xfree (edir);  if (efile != u->file)    xfree (efile);  /* Regenerate u->url as well.  */  xfree (u->url);  u->url = url_string (u, URL_AUTH_SHOW);}/* Mutators.  Code in ftp.c insists on changing u->dir and u->file.   This way we can sync u->path and u->url when they get changed.  */voidurl_set_dir (struct url *url, const char *newdir){  xfree (url->dir);  url->dir = xstrdup (newdir);  sync_path (url);}voidurl_set_file (struct url *url, const char *newfile){  xfree (url->file);  url->file = xstrdup (newfile);  sync_path (url);}voidurl_free (struct url *url){  xfree (url->host);  xfree (url->path);  xfree (url->url);  xfree_null (url->params);  xfree_null (url->query);  xfree_null (url->fragment);  xfree_null (url->user);  xfree_null (url->passwd);  xfree (url->dir);  xfree (url->file);  xfree (url);}/* Create all the necessary directories for PATH (a file).  Calls   make_directory internally.  */intmkalldirs (const char *path){  const char *p;  char *t;  struct_stat st;  int res;  p = path + strlen (path);  for (; *p != '/' && p != path; p--)    ;  /* Don't create if it's just a file.  */  if ((p == path) && (*p != '/'))    return 0;  t = strdupdelim (path, p);  /* Check whether the directory exists.  */  if ((stat (t, &st) == 0))    {      if (S_ISDIR (st.st_mode))        {          xfree (t);          return 0;        }      else        {          /* If the dir exists as a file name, remove it first.  This             is *only* for Wget to work with buggy old CERN http             servers.  Here is the scenario: When Wget tries to             retrieve a directory without a slash, e.g.             http://foo/bar (bar being a directory), CERN server will             not redirect it too http://foo/bar/ -- it will generate a             directory listing containing links to bar/file1,             bar/file2, etc.  Wget will lose because it saves this             HTML listing to a file `bar', so it cannot create the             directory.  To work around this, if the file of the same             name exists, we just remove it and create the directory             anyway.  */          DEBUGP (("Removing %s because of directory danger!\n", t));          unlink (t);        }    }  res = make_directory (t);  if (res != 0)    logprintf (LOG_NOTQUIET, "%s: %s", t, strerror (errno));  xfree (t);  return res;}/* Functions for constructing the file name out of URL components.  *//* A growable string structure, used by url_file_name and friends.   This should perhaps be moved to utils.c.   The idea is to have a convenient and efficient way to construct a   string by having various functions append data to it.  Instead of   passing the obligatory BASEVAR, SIZEVAR and TAILPOS to all the   functions in questions, we pass the pointer to this struct.  */struct growable {  char *base;  int size;  int tail;};/* Ensure that the string can accept APPEND_COUNT more characters past   the current TAIL position.  If necessary, this will grow the string   and update its allocated size.  If the string is already large   enough to take TAIL+APPEND_COUNT characters, this does nothing.  */#define GROW(g, append_size) do {                                       \  struct growable *G_ = g;                                              \  DO_REALLOC (G_->base, G_->size, G_->tail + append_size, char);        \} while (0)/* Return the tail position of the string. */#define TAIL(r) ((r)->base + (r)->tail)/* Move the tail position by APPEND_COUNT characters. */#define TAIL_INCR(r, append_count) ((r)->tail += append_count)/* Append the string STR to DEST.  NOTICE: the string in DEST is not   terminated.  */static voidappend_string (const char *str, struct growable *dest){  int l = strlen (str);  GROW (dest, l);  memcpy (TAIL (dest), str, l);  TAIL_INCR (dest, l);}/* Append CH to DEST.  For example, append_char (0, DEST)   zero-terminates DEST.  */static voidappend_char (char ch, struct growable *dest){  GROW (dest, 1);  *TAIL (dest) = ch;  TAIL_INCR (dest, 1);}enum {  filechr_not_unix    = 1,      /* unusable on Unix, / and \0 */  filechr_not_windows = 2,      /* unusable on Windows, one of \|/<>?:*" */  filechr_control     = 4       /* a control character, e.g. 0-31 */};#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))/* Shorthands for the table: */#define U filechr_not_unix#define W filechr_not_windows#define C filechr_control#define UW U|W#define UWC U|W|C/* Table of characters unsafe under various conditions (see above).   Arguably we could also claim `%' to be unsafe, since we use it as   the escape character.  If we ever want to be able to reliably   translate file name back to URL, this would become important   crucial.  Right now, it's better to be minimal in escaping.  */static const unsigned char filechr_table[256] ={UWC,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */  C,  C,  C,  C,   C,  C,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */  C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */  C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */  0,  0,  W,  0,   0,  0,  0,  0,   /* SP  !   "   #    $   %   &   '   */  0,  0,  W,  0,   0,  0,  0, UW,   /* (   )   *   +    ,   -   .   /   */  0,  0,  0,  0,   0,  0,  0,  0,   /* 0   1   2   3    4   5   6   7   */  0,  0,  W,  0,   W,  0,  W,  W,   /* 8   9   :   ;    <   =   >   ?   */  0,  0,  0,  0,   0,  0,  0,  0,   /* @   A   B   C    D   E   F   G   */  0,  0,  0,  0,   0,  0,  0,  0,   /* H   I   J   K    L   M   N   O   */  0,  0,  0,  0,   0,  0,  0,  0,   /* P   Q   R   S    T   U   V   W   */  0,  0,  0,  0,   W,  0,  0,  0,   /* X   Y   Z   [    \   ]   ^   _   */  0,  0,  0,  0,   0,  0,  0,  0,   /* `   a   b   c    d   e   f   g   */  0,  0,  0,  0,   0,  0,  0,  0,   /* h   i   j   k    l   m   n   o   */  0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */  0,  0,  0,  0,   W,  0,  0,  C,   /* x   y   z   {    |   }   ~   DEL */  C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 128-143 */  C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 144-159 */  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,};#undef U#undef W#undef C#undef UW#undef UWC/* FN_PORT_SEP is the separator between host and port in file names   for non-standard port numbers.  On Unix this is normally ':', as in   "www.xemacs.org:4001/index.html".  Under Windows, we set it to +   because Windows can't handle ':' in file names.  */
url.c - 源码说明

本页面展示了「Wget很好的处理了http和ftp的下载,很值得学习的经典代码」中的 url.c 源码文件，采用 C语言编程语言编写，共 2,138 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Wget相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?