⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 url.c

📁 Wget很好的处理了http和ftp的下载,很值得学习的经典代码
💻 C
📖 第 1 页 / 共 5 页
字号:
  return SCHEME_INVALID;}#define SCHEME_CHAR(ch) (ISALNUM (ch) || (ch) == '-' || (ch) == '+')/* Return 1 if the URL begins with any "scheme", 0 otherwise.  As   currently implemented, it returns true if URL begins with   [-+a-zA-Z0-9]+: .  */boolurl_has_scheme (const char *url){  const char *p = url;  /* The first char must be a scheme char. */  if (!*p || !SCHEME_CHAR (*p))    return false;  ++p;  /* Followed by 0 or more scheme chars. */  while (*p && SCHEME_CHAR (*p))    ++p;  /* Terminated by ':'. */  return *p == ':';}intscheme_default_port (enum url_scheme scheme){  return supported_schemes[scheme].default_port;}voidscheme_disable (enum url_scheme scheme){  supported_schemes[scheme].flags |= scm_disabled;}/* Skip the username and password, if present in the URL.  The   function should *not* be called with the complete URL, but with the   portion after the scheme.   If no username and password are found, return URL.  */static const char *url_skip_credentials (const char *url){  /* Look for '@' that comes before terminators, such as '/', '?',     '#', or ';'.  */  const char *p = (const char *)strpbrk (url, "@/?#;");  if (!p || *p != '@')    return url;  return p + 1;}/* Parse credentials contained in [BEG, END).  The region is expected   to have come from a URL and is unescaped.  */static boolparse_credentials (const char *beg, const char *end, char **user, char **passwd){  char *colon;  const char *userend;  if (beg == end)    return false;               /* empty user name */  colon = memchr (beg, ':', end - beg);  if (colon == beg)    return false;               /* again empty user name */  if (colon)    {      *passwd = strdupdelim (colon + 1, end);      userend = colon;      url_unescape (*passwd);    }  else    {      *passwd = NULL;      userend = end;    }  *user = strdupdelim (beg, userend);  url_unescape (*user);  return true;}/* Used by main.c: detect URLs written using the "shorthand" URL forms   originally popularized by Netscape and NcFTP.  HTTP shorthands look   like this:   www.foo.com[:port]/dir/file   -> http://www.foo.com[:port]/dir/file   www.foo.com[:port]            -> http://www.foo.com[:port]   FTP shorthands look like this:   foo.bar.com:dir/file          -> ftp://foo.bar.com/dir/file   foo.bar.com:/absdir/file      -> ftp://foo.bar.com//absdir/file   If the URL needs not or cannot be rewritten, return NULL.  */char *rewrite_shorthand_url (const char *url){  const char *p;  char *ret;  if (url_scheme (url) != SCHEME_INVALID)    return NULL;  /* Look for a ':' or '/'.  The former signifies NcFTP syntax, the     latter Netscape.  */  p = strpbrk (url, ":/");  if (p == url)    return NULL;  /* If we're looking at "://", it means the URL uses a scheme we     don't support, which may include "https" when compiled without     SSL support.  Don't bogusly rewrite such URLs.  */  if (p && p[0] == ':' && p[1] == '/' && p[2] == '/')    return NULL;  if (p && *p == ':')    {      /* Colon indicates ftp, as in foo.bar.com:path.  Check for         special case of http port number ("localhost:10000").  */      int digits = strspn (p + 1, "0123456789");      if (digits && (p[1 + digits] == '/' || p[1 + digits] == '\0'))        goto http;      /* Turn "foo.bar.com:path" to "ftp://foo.bar.com/path". */      ret = aprintf ("ftp://%s", url);      ret[6 + (p - url)] = '/';    }  else    {    http:      /* Just prepend "http://" to URL. */      ret = aprintf ("http://%s", url);    }  return ret;}static void split_path (const char *, char **, char **);/* Like strpbrk, with the exception that it returns the pointer to the   terminating zero (end-of-string aka "eos") if no matching character   is found.  */static inline char *strpbrk_or_eos (const char *s, const char *accept){  char *p = strpbrk (s, accept);  if (!p)    p = strchr (s, '\0');  return p;}/* Turn STR into lowercase; return true if a character was actually   changed. */static boollowercase_str (char *str){  bool changed = false;  for (; *str; str++)    if (ISUPPER (*str))      {        changed = true;        *str = TOLOWER (*str);      }  return changed;}static const char *init_seps (enum url_scheme scheme){  static char seps[8] = ":/";  char *p = seps + 2;  int flags = supported_schemes[scheme].flags;  if (flags & scm_has_params)    *p++ = ';';  if (flags & scm_has_query)    *p++ = '?';  if (flags & scm_has_fragment)    *p++ = '#';  *p++ = '\0';  return seps;}static const char *parse_errors[] = {#define PE_NO_ERROR                     0  N_("No error"),#define PE_UNSUPPORTED_SCHEME           1  N_("Unsupported scheme"),#define PE_INVALID_HOST_NAME            2  N_("Invalid host name"),#define PE_BAD_PORT_NUMBER              3  N_("Bad port number"),#define PE_INVALID_USER_NAME            4  N_("Invalid user name"),#define PE_UNTERMINATED_IPV6_ADDRESS    5  N_("Unterminated IPv6 numeric address"),#define PE_IPV6_NOT_SUPPORTED           6  N_("IPv6 addresses not supported"),#define PE_INVALID_IPV6_ADDRESS         7  N_("Invalid IPv6 numeric address")};/* Parse a URL.   Return a new struct url if successful, NULL on error.  In case of   error, and if ERROR is not NULL, also set *ERROR to the appropriate   error code. */struct url *url_parse (const char *url, int *error){  struct url *u;  const char *p;  bool path_modified, host_modified;  enum url_scheme scheme;  const char *seps;  const char *uname_b,     *uname_e;  const char *host_b,      *host_e;  const char *path_b,      *path_e;  const char *params_b,    *params_e;  const char *query_b,     *query_e;  const char *fragment_b,  *fragment_e;  int port;  char *user = NULL, *passwd = NULL;  char *url_encoded = NULL;  int error_code;  scheme = url_scheme (url);  if (scheme == SCHEME_INVALID)    {      error_code = PE_UNSUPPORTED_SCHEME;      goto error;    }  url_encoded = reencode_escapes (url);  p = url_encoded;  p += strlen (supported_schemes[scheme].leading_string);  uname_b = p;  p = url_skip_credentials (p);  uname_e = p;  /* scheme://user:pass@host[:port]... */  /*                    ^              */  /* We attempt to break down the URL into the components path,     params, query, and fragment.  They are ordered like this:       scheme://host[:port][/path][;params][?query][#fragment]  */  path_b     = path_e     = NULL;  params_b   = params_e   = NULL;  query_b    = query_e    = NULL;  fragment_b = fragment_e = NULL;  /* Initialize separators for optional parts of URL, depending on the     scheme.  For example, FTP has params, and HTTP and HTTPS have     query string and fragment. */  seps = init_seps (scheme);  host_b = p;  if (*p == '[')    {      /* Handle IPv6 address inside square brackets.  Ideally we'd         just look for the terminating ']', but rfc2732 mandates         rejecting invalid IPv6 addresses.  */      /* The address begins after '['. */      host_b = p + 1;      host_e = strchr (host_b, ']');      if (!host_e)        {          error_code = PE_UNTERMINATED_IPV6_ADDRESS;          goto error;        }#ifdef ENABLE_IPV6      /* Check if the IPv6 address is valid. */      if (!is_valid_ipv6_address(host_b, host_e))        {          error_code = PE_INVALID_IPV6_ADDRESS;          goto error;        }      /* Continue parsing after the closing ']'. */      p = host_e + 1;#else      error_code = PE_IPV6_NOT_SUPPORTED;      goto error;#endif      /* The closing bracket must be followed by a separator or by the         null char.  */      /* http://[::1]... */      /*             ^   */      if (!strchr (seps, *p))        {          /* Trailing garbage after []-delimited IPv6 address. */          error_code = PE_INVALID_HOST_NAME;          goto error;        }    }  else    {      p = strpbrk_or_eos (p, seps);      host_e = p;    }  ++seps;                       /* advance to '/' */  if (host_b == host_e)    {      error_code = PE_INVALID_HOST_NAME;      goto error;    }  port = scheme_default_port (scheme);  if (*p == ':')    {      const char *port_b, *port_e, *pp;      /* scheme://host:port/tralala */      /*              ^             */      ++p;      port_b = p;      p = strpbrk_or_eos (p, seps);      port_e = p;      /* Allow empty port, as per rfc2396. */      if (port_b != port_e)        for (port = 0, pp = port_b; pp < port_e; pp++)          {            if (!ISDIGIT (*pp))              {                /* http://host:12randomgarbage/blah */                /*               ^                  */                error_code = PE_BAD_PORT_NUMBER;                goto error;              }            port = 10 * port + (*pp - '0');            /* Check for too large port numbers here, before we have               a chance to overflow on bogus port values.  */            if (port > 0xffff)              {                error_code = PE_BAD_PORT_NUMBER;                goto error;              }          }    }  /* Advance to the first separator *after* '/' (either ';' or '?',     depending on the scheme).  */  ++seps;  /* Get the optional parts of URL, each part being delimited by     current location and the position of the next separator.  */#define GET_URL_PART(sepchar, var) do {                         \  if (*p == sepchar)                                            \    var##_b = ++p, var##_e = p = strpbrk_or_eos (p, seps);      \  ++seps;                                                       \} while (0)  GET_URL_PART ('/', path);  if (supported_schemes[scheme].flags & scm_has_params)    GET_URL_PART (';', params);  if (supported_schemes[scheme].flags & scm_has_query)    GET_URL_PART ('?', query);  if (supported_schemes[scheme].flags & scm_has_fragment)    GET_URL_PART ('#', fragment);#undef GET_URL_PART  assert (*p == 0);  if (uname_b != uname_e)    {      /* http://user:pass@host */      /*        ^         ^    */      /*     uname_b   uname_e */      if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd))        {          error_code = PE_INVALID_USER_NAME;          goto error;        }    }  u = xnew0 (struct url);  u->scheme = scheme;  u->host   = strdupdelim (host_b, host_e);  u->port   = port;  u->user   = user;  u->passwd = passwd;  u->path = strdupdelim (path_b, path_e);  path_modified = path_simplify (u->path);  split_path (u->path, &u->dir, &u->file);  host_modified = lowercase_str (u->host);  /* Decode %HH sequences in host name.  This is important not so much     to support %HH sequences in host names (which other browser     don't), but to support binary characters (which will have been     converted to %HH by reencode_escapes).  */  if (strchr (u->host, '%'))    {      url_unescape (u->host);      host_modified = true;    }  if (params_b)    u->params = strdupdelim (params_b, params_e);  if (query_b)    u->query = strdupdelim (query_b, query_e);  if (fragment_b)    u->fragment = strdupdelim (fragment_b, fragment_e);  if (path_modified || u->fragment || host_modified || path_b == path_e)    {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -