⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 url.c

📁 linux下的网络下载工具prozilla的源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* URL handling.   Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.      This program is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published by   the Free Software Foundation; either version 2 of the License, or   (at your option) any later version.      This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.      You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */#ifdef HAVE_CONFIG_H#  include <config.h>#endif				/*				 * HAVE_CONFIG_H 				 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <sys/types.h>#include <unistd.h>#include <errno.h>#include <assert.h>#include "main.h"#include "url.h"#include "misc.h"/* Is X "."?  */#define DOTP(x) ((*(x) == '.') && (!*(x + 1)))/* Is X ".."?  */#define DDOTP(x) ((*(x) == '.') && (*(x + 1) == '.') && (!*(x + 2)))char *protostrings[] = {  "cid:",  "clsid:",  "file:",  "finger:",  "ftp:",  "gopher:",  "hdl:",  "http:",  "https:",  "ilu:",  "ior:",  "irc:",  "java:",  "javascript:",  "lifn:",  "mailto:",  "mid:",  "news:",  "nntp:",  "path:",  "prospero:",  "rlogin:",  "service:",  "shttp:",  "snews:",  "stanf:",  "telnet:",  "tn3270:",  "wais:",  "whois++:",  NULL};/* TODO remove this stupid things... *//* Similar to former, but for supported protocols: */proto_t sup_protos[] = {  {"http://", URLHTTP, DEFAULT_HTTP_PORT},  {"ftp://", URLFTP, DEFAULT_FTP_PORT}  /* { "file://", URLFILE, DEFAULT_FTP_PORT } */};/* Support for encoding and decoding of URL strings.  We determine   whether a character is unsafe through   table lookup.  This   code assumes ASCII character set and 8-bit chars.  */enum {  urlchr_reserved = 1,  urlchr_unsafe = 2};#define R  urlchr_reserved#define U  urlchr_unsafe#define RU R|U#define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))/* rfc1738 reserved chars.  We don't use this yet; preservation of   reserved chars will be implemented when I integrate the new   `reencode_string' function.  */#define RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)/* Unsafe chars:   - anything <= 32;   - stuff from rfc1738 ("<>\"#%{}|\\^~[]`");   - '@' and ':'; needed for encoding URL username and password.   - anything >= 127. */#define UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)/* Convert the ASCII character X to a hex-digit.  X should be between   '0' and '9', or between 'A' and 'F', or between 'a' and 'f'.  The   result is a number between 0 and 15.  If X is not a hexadecimal   digit character, the result is undefined.  */#define XCHAR_TO_XDIGIT(x)			\  (((x) >= '0' && (x) <= '9') ?			\   ((x) - '0') : (toupper(x) - 'A' + 10))/* The reverse of the above: convert a HEX digit in the [0, 15] range   to an ASCII character representing it.  The A-F characters are   always in upper case.  */#define XDIGIT_TO_XCHAR(x) (((x) < 10) ? ((x) + '0') : ((x) - 10 + 'A'))#define ARRAY_SIZE(array) (sizeof (array) / sizeof (*(array)))static const unsigned char urlchr_table[256] = {  U, U, U, U, U, U, U, U,	/* NUL SOH STX ETX  EOT ENQ ACK BEL */  U, U, U, U, U, U, U, U,	/* BS  HT  LF  VT   FF  CR  SO  SI  */  U, U, U, U, U, U, U, U,	/* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */  U, U, U, U, U, U, U, U,	/* CAN EM  SUB ESC  FS  GS  RS  US  */  U, 0, U, U, 0, U, R, 0,	/* SP  !   "   #    $   %   &   '   */  0, 0, 0, R, 0, 0, 0, R,	/* (   )   *   +    ,   -   .   /   */  0, 0, 0, 0, 0, 0, 0, 0,	/* 0   1   2   3    4   5   6   7   */  0, 0, U, R, U, R, U, R,	/* 8   9   :   ;    <   =   >   ?   */  RU, 0, 0, 0, 0, 0, 0, 0,	/* @   A   B   C    D   E   F   G   */  0, 0, 0, 0, 0, 0, 0, 0,	/* H   I   J   K    L   M   N   O   */  0, 0, 0, 0, 0, 0, 0, 0,	/* P   Q   R   S    T   U   V   W   */  0, 0, 0, U, U, U, U, 0,	/* X   Y   Z   [    \   ]   ^   _   */  U, 0, 0, 0, 0, 0, 0, 0,	/* `   a   b   c    d   e   f   g   */  0, 0, 0, 0, 0, 0, 0, 0,	/* h   i   j   k    l   m   n   o   */  0, 0, 0, 0, 0, 0, 0, 0,	/* p   q   r   s    t   u   v   w   */  0, 0, 0, U, U, U, U, U,	/* x   y   z   {    |   }   ~   DEL */  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,};/* Returns 1 if the URL begins with a protocol (supported or   unsupported), 0 otherwise.  */int has_proto(const char *url){  char **s;  for (s = protostrings; *s; s++)    if (strncasecmp(url, *s, strlen(*s)) == 0)      return 1;  return 0;}/* Skip the username and password, if present here.  The function   should be called *not* with the complete URL, but with the part   right after the protocol.   If no username and password are found, return 0.  */int skip_uname(const char *url){  const char *p;  const char *q = NULL;  for (p = url; *p && *p != '/'; p++)    if (*p == '@')      q = p;  /* If a `@' was found before the first occurrence of `/', skip     it.  */  if (q != NULL)    return q - url + 1;  else    return 0;}/* Decodes the forms %xy in a URL to the character the hexadecimal   code of which is xy.  xy are hexadecimal digits from   [0123456789ABCDEF] (case-insensitive).  If x or y are not   hex-digits or `%' precedes `\0', the sequence is inserted   literally.  */void decode_string(char *s){  char *t = s;			/* t - tortoise */  char *h = s;			/* h - hare     */  for (; *h; h++, t++)  {    if (*h != '%')    {    copychar:      *t = *h;    } else    {      /* Do nothing if '%' is not followed by two hex digits. */      if (!*(h + 1) || !*(h + 2)	  || !(isxdigit(*(h + 1)) && isxdigit(*(h + 2))))	goto copychar;      *t = (XCHAR_TO_XDIGIT(*(h + 1)) << 4) + XCHAR_TO_XDIGIT(*(h + 2));      h += 2;    }  }  *t = '\0';}/* Like encode_string, but return S if there are no unsafe chars.  */char *encode_string_maybe(const char *s){  const char *p1;  char *p2, *newstr;  int newlen;  int addition = 0;  for (p1 = s; *p1; p1++)    if (UNSAFE_CHAR(*p1))      addition += 2;		/* Two more characters (hex digits) */  if (!addition)    return (char *) s;  newlen = (p1 - s) + addition;  newstr = kmalloc(newlen + 1);  p1 = s;  p2 = newstr;  while (*p1)  {    if (UNSAFE_CHAR(*p1))    {      const unsigned char c = *p1++;      *p2++ = '%';      *p2++ = XDIGIT_TO_XCHAR(c >> 4);      *p2++ = XDIGIT_TO_XCHAR(c & 0xf);    } else      *p2++ = *p1++;  }  *p2 = '\0';  assert(p2 - newstr == newlen);  return newstr;}/* Encode the unsafe characters (as determined by UNSAFE_CHAR) in a   given string, returning a malloc-ed %XX encoded string.  */char *encode_string(const char *s){  char *encoded = encode_string_maybe(s);  if (encoded != s)    return encoded;  else    return kstrdup(s);}/* Encode unsafe characters in PTR to %xx.  If such encoding is done,   the old value of PTR is freed and PTR is made to point to the newly   allocated storage.  */#define ENCODE(ptr) do {			\  char *e_new = encode_string_maybe (ptr);	\  if (e_new != ptr)				\    {						\      kfree (ptr);				\      ptr = e_new;				\    }						\} while (0)/* Returns the protocol type if URL's protocol is supported, or   URLUNKNOWN if not.  */uerr_t urlproto(const char *url){  int i;  for (i = 0; i < ARRAY_SIZE(sup_protos); i++)    if (!strncasecmp(url, sup_protos[i].name, strlen(sup_protos[i].name)))      return sup_protos[i].ind;  for (i = 0; url[i] && url[i] != ':' && url[i] != '/'; i++);  if (url[i] == ':')  {    for (++i; url[i] && url[i] != '/'; i++)      if (!isdigit(url[i]))	return URLBADPORT;    if (url[i - 1] == ':')      return URLFTP;    else      return URLHTTP;  } else    return URLHTTP;}/* If PATH ends with `;type=X', return the character X.  */char process_ftp_type(char *path){  int len = strlen(path);  if (len >= 7 && !memcmp(path + len - 7, ";type=", 6))  {    path[len - 7] = '\0';    return path[len - 1];  } else    return '\0';}/* Canonicalize PATH, and return a new path.  The new path differs from PATH   in that:	Multple `/'s are collapsed to a single `/'.	Leading `./'s and trailing `/.'s are removed.	Trailing `/'s are removed.	Non-leading `../'s and trailing `..'s are handled by removing	portions of the path.   E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates   from GNU Bash.   Changes for Wget:	Always use '/' as stub_char.	Don't check for local things using canon_stat.	Change the original string instead of strdup-ing.	React correctly when beginning with `./' and `../'.  */void path_simplify(char *path){  register int i, start, ddot;  char stub_char;  if (!*path)    return;  /*stub_char = (*path == '/') ? '/' : '.'; */  stub_char = '/';  /* Addition: Remove all `./'-s preceding the string.  If `../'-s     precede, put `/' in front and remove them too.  */  i = 0;  ddot = 0;  while (1)  {    if (path[i] == '.' && path[i + 1] == '/')      i += 2;    else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')    {      i += 3;      ddot = 1;    } else      break;  }  if (i)    strcpy(path, path + i - ddot);  /* Replace single `.' or `..' with `/'.  */  if ((path[0] == '.' && path[1] == '\0')      || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))  {    path[0] = stub_char;    path[1] = '\0';    return;  }  /* Walk along PATH looking for things to compact.  */  i = 0;  while (1)  {    if (!path[i])      break;    while (path[i] && path[i] != '/')      i++;    start = i++;    /* If we didn't find any slashes, then there is nothing left to do.  */    if (!path[start])      break;    /* Handle multiple `/'s in a row.  */    while (path[i] == '/')      i++;    if ((start + 1) != i)    {      strcpy(path + start + 1, path + i);      i = start + 1;    }    /* Check for trailing `/'.  */    if (start && !path[i])    {    zero_last:      path[--i] = '\0';      break;    }    /* Check for `../', `./' or trailing `.' by itself.  */    if (path[i] == '.')    {      /* Handle trailing `.' by itself.  */      if (!path[i + 1])	goto zero_last;      /* Handle `./'.  */      if (path[i + 1] == '/')      {	strcpy(path + i, path + i + 1);	i = (start < 0) ? 0 : start;	continue;      }      /* Handle `../' or trailing `..' by itself.  */      if (path[i + 1] == '.' && (path[i + 2] == '/' || !path[i + 2]))      {	while (--start > -1 && path[start] != '/');	strcpy(path + start + 1, path + i + 2);	i = (start < 0) ? 0 : start;	continue;      }    }				/* path == '.' */  }				/* while */  if (!*path)  {    *path = stub_char;    path[1] = '\0';  }}/* Special versions of DOTP and DDOTP for parse_dir().  They work like   DOTP and DDOTP, but they also recognize `?' as end-of-string   delimiter.  This is needed for correct handling of query   strings.  */#define PD_DOTP(x)  ((*(x) == '.') && (!*((x) + 1) || *((x) + 1) == '?'))#define PD_DDOTP(x) ((*(x) == '.') && (*(x) == '.')		\		     && (!*((x) + 2) || *((x) + 2) == '?'))/* Like strlen(), but allow the URL to be ended with '?'.  */int urlpath_length(const char *url){  const char *q = strchr(url, '?');  if (q)    return q - url;  return strlen(url);}/* Build the directory and filename components of the path.  Both   components are *separately* malloc-ed strings!  It does not change   the contents of path.   If the path ends with "." or "..", they are (correctly) counted as   directories.  */void parse_dir(const char *path, char **dir, char **file){  int i, l;  l = urlpath_length(path);  for (i = l; i && path[i] != '/'; i--);  if (!i && *path != '/')	/* Just filename */  {    if (PD_DOTP(path) || PD_DDOTP(path))    {      *dir = strdupdelim(path, path + l);      *file = kstrdup(path + l);	/* normally empty, but could					   contain ?... */    } else    {      *dir = kstrdup("");	/* This is required because of FTP */      *file = kstrdup(path);    }  } else if (!i)		/* /filename */  {    if (PD_DOTP(path + 1) || PD_DDOTP(path + 1))    {      *dir = strdupdelim(path, path + l);      *file = kstrdup(path + l);	/* normally empty, but could					   contain ?... */    } else    {      *dir = kstrdup("/");      *file = kstrdup(path + 1);    }  } else			/* Nonempty directory with or without a filename */  {    if (PD_DOTP(path + i + 1) || PD_DDOTP(path + i + 1))    {      *dir = strdupdelim(path, path + l);      *file = kstrdup(path + l);	/* normally empty, but could					   contain ?... */    } else    {      *dir = strdupdelim(path, path + i);      *file = kstrdup(path + i + 1);    }  }}/* Skip the protocol part of the URL, e.g. `http://'.  If no protocol   part is found, returns 0.  */int skip_proto(const char *url){  char **s;  int l;  for (s = protostrings; *s; s++)    if (!strncasecmp(*s, url, strlen(*s)))      break;  if (!*s)    return 0;  l = strlen(*s);  /* HTTP and FTP protocols are expected to yield exact host names     (i.e. the `//' part must be skipped, too).  */  if (!strcmp(*s, "http:") || !strcmp(*s, "ftp:"))    l += 2;  return l;}/* Find the optional username and password within the URL, as per   RFC1738.  The returned user and passwd char pointers are   malloc-ed.  */static uerr_t parse_uname(const char *url, char **user, char **passwd){  int l;  const char *p, *q, *col;  char **where;  *user = NULL;  *passwd = NULL;  /* Look for the end of the protocol string.  */  l = skip_proto(url);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -