📄 url.c

📁 linux下的网络下载工具prozilla的源码
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
  if (!l)    return URLUNKNOWN;  /* Add protocol offset.  */  url += l;  /* Is there an `@' character?  */  for (p = url; *p && *p != '/'; p++)    if (*p == '@')      break;  /* If not, return.  */  if (*p != '@')    return URLOK;  /* Else find the username and password.  */  for (p = q = col = url; *p && *p != '/'; p++)  {    if (*p == ':' && !*user)    {      *user = kmalloc(p - url + 1);      memcpy(*user, url, p - url);      (*user)[p - url] = '\0';      col = p + 1;    }    if (*p == '@')      q = p;  }  /* Decide whether you have only the username or both.  */  where = *user ? passwd : user;  *where = kmalloc(q - col + 1);  memcpy(*where, col, q - col);  (*where)[q - col] = '\0';  return URLOK;}/* Return the URL as fine-formed string, with a proper protocol, optional port   number, directory and optional user/password.  If `hide' is non-zero (as it   is when we're calling this on a URL we plan to print, but not when calling it   to canonicalize a URL for use within the program), password will be hidden.   The forbidden characters in the URL will be cleansed.  */char *str_url(const urlinfo * u, int hide){  char *res, *host, *user, *passwd, *proto_name, *dir, *file;  int i, l, ln, lu, lh, lp, lf, ld;  unsigned short proto_default_port;  /* Look for the protocol name.  */  for (i = 0; i < ARRAY_SIZE(sup_protos); i++)    if (sup_protos[i].ind == u->proto)      break;  if (i == ARRAY_SIZE(sup_protos))    return NULL;  proto_name = sup_protos[i].name;  proto_default_port = sup_protos[i].port;  host = encode_string(u->host);  dir = encode_string(u->dir);  file = encode_string(u->file);  user = passwd = NULL;  if (u->user)    user = encode_string(u->user);  if (u->passwd)  {    if (hide)      /* Don't output the password, or someone might see it over the user's         shoulder (or in saved wget output).  Don't give away the number of         characters in the password, either, as we did in past versions of         this code, when we replaced the password characters with 'x's. */      passwd = kstrdup("<password>");    else      passwd = encode_string(u->passwd);  }  if (u->proto == URLFTP && *dir == '/')  {    char *tmp = kmalloc(strlen(dir) + 3);    /*sprintf (tmp, "%%2F%s", dir + 1); */    tmp[0] = '%';    tmp[1] = '2';    tmp[2] = 'F';    strcpy(tmp + 3, dir + 1);    kfree(dir);    dir = tmp;  }  ln = strlen(proto_name);  lu = user ? strlen(user) : 0;  lp = passwd ? strlen(passwd) : 0;  lh = strlen(host);  ld = strlen(dir);  lf = strlen(file);  res = kmalloc(ln + lu + lp + lh + ld + lf + 20);	/* safe sex */  /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name,     (user ? user : ""), (passwd ? ":" : ""),     (passwd ? passwd : ""), (user ? "@" : ""),     host, u->port, dir, *dir ? "/" : "", file); */  l = 0;  memcpy(res, proto_name, ln);  l += ln;  if (user)  {    memcpy(res + l, user, lu);    l += lu;    if (passwd)    {      res[l++] = ':';      memcpy(res + l, passwd, lp);      l += lp;    }    res[l++] = '@';  }  memcpy(res + l, host, lh);  l += lh;  if (u->port != proto_default_port)  {    res[l++] = ':';    sprintf(res + l, "%ld", (long) u->port);    l += numdigit(u->port);  }  res[l++] = '/';  memcpy(res + l, dir, ld);  l += ld;  if (*dir)    res[l++] = '/';  strcpy(res + l, file);  kfree(host);  kfree(dir);  kfree(file);  kfree(user);  kfree(passwd);  return res;}/* Extract the given URL of the form   (http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?   1. hostname (terminated with `/' or `:')   2. port number (terminated with `/'), or chosen for the protocol   3. dirname (everything after hostname)   Most errors are handled.  No allocation is done, you must supply   pointers to allocated memory.   ...and a host of other stuff :-)   - Recognizes hostname:dir/file for FTP and     hostname (:portnum)?/dir/file for HTTP.   - Parses the path to yield directory and file   - Parses the URL to yield the username and passwd (if present)   - Decodes the strings, in case they contain "forbidden" characters   - Writes the result to struct urlinfo   If the argument STRICT is set, it recognizes only the canonical   form.  */uerr_t parseurl(const char *url, urlinfo * u, int strict){  int i, l, abs_ftp;  int recognizable;		/* Recognizable URL is the one where				   the protocol name was explicitly				   named, i.e. it wasn't deduced from				   the URL format.  */  uerr_t type;  memset(u, 0, sizeof(urlinfo));  recognizable = has_proto(url);  if (strict && !recognizable)    return URLUNKNOWN;  for (i = 0, l = 0; i < ARRAY_SIZE(sup_protos); i++)  {    l = strlen(sup_protos[i].name);    if (!strncasecmp(sup_protos[i].name, url, l))      break;  }  /* If protocol is recognizable, but unsupported, bail out, else     suppose unknown.  */  if (recognizable && i == ARRAY_SIZE(sup_protos))    return URLUNKNOWN;  else if (i == ARRAY_SIZE(sup_protos))    type = URLUNKNOWN;  else    u->proto = type = sup_protos[i].ind;  if (type == URLUNKNOWN)    l = 0;  /* Allow a username and password to be specified (i.e. just skip     them for now).  */  if (recognizable)    l += skip_uname(url + l);  for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);  if (i == l)    return URLBADHOST;  /* Get the hostname.  */  u->host = strdupdelim(url + l, url + i);  /* Assume no port has been given.  */  u->port = 0;  if (url[i] == ':')  {    /* We have a colon delimiting the hostname.  It could mean that       a port number is following it, or a directory.  */    if (isdigit(url[++i]))	/* A port number */    {      if (type == URLUNKNOWN)	u->proto = type = URLHTTP;      for (; url[i] && url[i] != '/'; i++)	if (isdigit(url[i]))	  u->port = 10 * u->port + (url[i] - '0');	else	  return URLBADPORT;      if (!u->port)	return URLBADPORT;    } else if (type == URLUNKNOWN)	/* or a directory */      u->proto = type = URLFTP;    else			/* or just a misformed port number */      return URLBADPORT;  } else if (type == URLUNKNOWN)    u->proto = type = URLHTTP;  if (!u->port)  {    int ind;    for (ind = 0; ind < ARRAY_SIZE(sup_protos); ind++)      if (sup_protos[ind].ind == type)	break;    if (ind == ARRAY_SIZE(sup_protos))      return URLUNKNOWN;    u->port = sup_protos[ind].port;  }  /* Some delimiter troubles...  */  if (url[i] == '/' && url[i - 1] != ':')    ++i;  if (type == URLHTTP)    while (url[i] && url[i] == '/')      ++i;  u->path = kmalloc(strlen(url + i) + 8);  strcpy(u->path, url + i);  if (type == URLFTP)  {    u->ftp_type = process_ftp_type(u->path);    /* #### We don't handle type `d' correctly yet.  */    if (!u->ftp_type || toupper(u->ftp_type) == 'D')      u->ftp_type = 'I';  }  /* Parse the username and password (if existing).  */  parse_uname(url, &u->user, &u->passwd);  /* Decode the strings, as per RFC 1738.  */  decode_string(u->host);  decode_string(u->path);  if (u->user)    decode_string(u->user);  if (u->passwd)    decode_string(u->passwd);  /* Parse the directory.  */  parse_dir(u->path, &u->dir, &u->file);  /* Simplify the directory.  */  path_simplify(u->dir);  /* Remove the leading `/' in HTTP.  */  if (type == URLHTTP && *u->dir == '/')    strcpy(u->dir, u->dir + 1);  /* Strip trailing `/'.  */  l = strlen(u->dir);  if (l > 1 && u->dir[l - 1] == '/')    u->dir[l - 1] = '\0';  /* Re-create the path: */  abs_ftp = (u->proto == URLFTP && *u->dir == '/');  /*  sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",     abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */  strcpy(u->path, abs_ftp ? "%2F" : "/");  strcat(u->path, abs_ftp ? (u->dir + 1) : u->dir);  strcat(u->path, *u->dir ? "/" : "");  strcat(u->path, u->file);  ENCODE(u->path);  /* Create the clean URL.  */  u->url = str_url(u, 0);  return URLOK;}/****************************************************************************** This function constructs and returns a malloced copy of the relative link from two pieces of information: local name of the referring file (s1) and local name of the referred file (s2). So, if s1 is "jagor.srce.hr/index.html" and s2 is "jagor.srce.hr/images/news.gif", new name should be "images/news.gif". Alternately, if the s1 is "fly.cc.fer.hr/ioccc/index.html", and s2 is "fly.cc.fer.hr/images/fly.gif", new name should be "../images/fly.gif". Caveats: s1 should not begin with '/', unless s2 begins with '/' too. s1 should not contain things like ".." and such -- construct_relative("fly/ioccc/../index.html", "fly/images/fly.gif") will fail. (workaround is to call path_simplify on s1).******************************************************************************/char *construct_relative(const char *s1, const char *s2){  int i, cnt, sepdirs1;  char *res;  if (*s2 == '/')    return kstrdup(s2);  /* s1 should *not* be absolute, if s2 wasn't. */  assert(*s1 != '/');  i = cnt = 0;  /* Skip the directories common to both strings. */  while (1)  {    for (;	 s1[i] && s2[i] && s1[i] == s2[i] && s1[i] != '/'	 && s2[i] != '/'; i++);    if (s1[i] == '/' && s2[i] == '/')      cnt = ++i;    else      break;  }  for (sepdirs1 = 0; s1[i]; i++)    if (s1[i] == '/')      ++sepdirs1;  /* Now, construct the file as of:     - ../ repeated sepdirs1 time     - all the non-mutual directories of s2. */  res = kmalloc(3 * sepdirs1 + strlen(s2 + cnt) + 1);  for (i = 0; i < sepdirs1; i++)    memcpy(res + 3 * i, "../", 3);  strcpy(res + 3 * i, s2 + cnt);  return res;}/****************************************************************************** Add a URL to the list.******************************************************************************/urlpos *add_url(urlpos * l, const char *url, const char *file){  urlpos *t, *b;  t = kmalloc(sizeof(urlpos));  memset(t, 0, sizeof(*t));  t->url = kstrdup(url);  t->local_name = kstrdup(file);  if (!l)    return t;  b = l;  while (l->next)    l = l->next;  l->next = t;  return b;}/* Find the last occurrence of character C in the range [b, e), or   NULL, if none are present.  This is almost completely equivalent to   { *e = '\0'; return strrchr(b); }, except that it doesn't change   the contents of the string.  */const char *find_last_char(const char *b, const char *e, char c){  for (; e > b; e--)    if (*e == c)      return e;  return NULL;}/* Resolve the result of "linking" a base URI (BASE) to a   link-specified URI (LINK).   Either of the URIs may be absolute or relative, complete with the   host name, or path only.  This tries to behave "reasonably" in all   foreseeable cases.  It employs little specific knowledge about   protocols or URL-specific stuff -- it just works on strings.   The parameters LINKLENGTH is useful if LINK is not zero-terminated.   See uri_merge for a gentler interface to this functionality.   #### This function should handle `./' and `../' so that the evil   path_simplify can go.  */char *uri_merge_1(const char *base, const char *link, int linklength,		  int no_proto){  char *constr;  if (no_proto)  {    const char *end = base + urlpath_length(base);    if (*link != '/')    {      /* LINK is a relative URL: we need to replace everything         after last slash (possibly empty) with LINK.         So, if BASE is "whatever/foo/bar", and LINK is "qux/xyzzy",         our result should be "whatever/foo/qux/xyzzy".  */      int need_explicit_slash = 0;      int span;      const char *start_insert;      const char *last_slash = find_last_char(base, end, '/');      if (!last_slash)      {	/* No slash found at all.  Append LINK to what we have,	   but we'll need a slash as a separator.	   Example: if base == "foo" and link == "qux/xyzzy", then	   we cannot just append link to base, because we'd get	   "fooqux/xyzzy", whereas what we want is	   "foo/qux/xyzzy".	   To make sure the / gets inserted, we set	   need_explicit_slash to 1.  We also set start_insert	   to end + 1, so that the length calculations work out	   correctly for one more (slash) character.  Accessing	   that character is fine, since it will be the	   delimiter, '\0' or '?'.  */	/* example: "foo?..." */	/*               ^    ('?' gets changed to '/') */	start_insert = end + 1;	need_explicit_slash = 1;      } else if (last_slash && last_slash != base		 && *(last_slash - 1) == '/')      {	/* example: http://host"  */	/*                      ^ */	start_insert = end + 1;	need_explicit_slash = 1;      } else      {	/* example: "whatever/foo/bar" */	/*                        ^    */	start_insert = last_slash + 1;      }      span = start_insert - base;      constr = kmalloc(span + linklength + 1);      if (span)	memcpy(constr, base, span);      if (need_explicit_slash)	constr[span - 1] = '/';      if (linklength)	memcpy(constr + span, link, linklength);      constr[span + linklength] = '\0';    } else			/* *link == `/' */    {      /* LINK is an absolute path: we need to replace everything         after (and including) the FIRST slash with LINK.         So, if BASE is "http://host/whatever/foo/bar", and LINK is         "/qux/xyzzy", our result should be         "http://host/qux/xyzzy".  */      int span;      const char *slash;      const char *start_insert = NULL;	/* for gcc to shut up. */      const char *pos = base;      int seen_slash_slash = 0;      /* We're looking for the first slash, but want to ignore         double slash. */    again:      slash = memchr(pos, '/', end - pos);      if (slash && !seen_slash_slash)	if (*(slash + 1) == '/')	{	  pos = slash + 2;	  seen_slash_slash = 1;	  goto again;	}      /* At this point, SLASH is the location of the first / after         "//", or the first slash altogether.  START_INSERT is the         pointer to the location where LINK will be inserted.  When         examining the last two examples, keep in mind that LINK         begins with '/'. */      if (!slash && !seen_slash_slash)	/* example: "foo" */	/*           ^    */	start_insert = base;      else if (!slash && seen_slash_slash)	/* example: "http://foo" */	/*                     ^ */	start_insert = end;      else if (slash && !seen_slash_slash)	/* example: "foo/bar" */	/*           ^        */	start_insert = base;      else if (slash && seen_slash_slash)	/* example: "http://something/" */	/*                           ^  */	start_insert = slash;      span = start_insert - base;      constr = kmalloc(span + linklength + 1);      if (span)	memcpy(constr, base, span);      if (linklength)	memcpy(constr + span, link, linklength);      constr[span + linklength] = '\0';    }  } else			/* !no_proto */  {    constr = strdupdelim(link, link + linklength);  }  return constr;}/* Merge BASE with LINK and return the resulting URI.  This is an   interface to uri_merge_1 that assumes that LINK is a   zero-terminated string.  */char *uri_merge(const char *base, const char *link){  return uri_merge_1(base, link, strlen(link), !has_proto(link));}/****************************************************************************** Perform a "deep" free of the urlinfo structure. The structure should have been created with newurl, but need not have been used. If free_pointer is non-0, free the pointer itself.******************************************************************************/void freeurl(urlinfo * u, int complete){  assert(u != NULL);  if (u->url)    kfree(u->url);  if (u->host)    kfree(u->host);  if (u->path)    kfree(u->path);  if (u->file)    kfree(u->file);  if (u->dir)    kfree(u->dir);  if (u->user)    kfree(u->user);  if (u->passwd)    kfree(u->passwd);  if (u->referer)    kfree(u->referer);  if (complete)    kfree(u);}
上一页 12
💿 文件大小 218 K
👤 上传用户 zyhunicom
📂 所属分类网络
🏷️ 相关标签

#prozilla #linux #网络
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -