📄 url.c

📁 prozgui是一款Linxu下著名的下载工具
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* URL handling.   Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.      This program is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published by   the Free Software Foundation; either version 2 of the License, or   (at your option) any later version.      This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.      You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  *//* $Id: url.c,v 1.23 2001/10/27 11:24:40 kalum Exp $ */#include "common.h"#include "prozilla.h"#include "url.h"#include "misc.h"/* NULL-terminated list of strings to be recognized as prototypes (URL   schemes). Note that recognized doesn't mean supported -- only HTTP   and FTP are supported for now.   However, a string that does not match anything in the list will be   considered a relative URL.  Thus it's important that this list has   anything anyone could think of being legal.      There are wild things here. :-) Take a look at   <URL:http://www.w3.org/pub/WWW/Addressing/schemes.html> to see more   fun.  *//* Is X "."?  */#define DOTP(x) ((*(x) == '.') && (!*(x + 1)))/* Is X ".."?  */#define DDOTP(x) ((*(x) == '.') && (*(x + 1) == '.') && (!*(x + 2)))char *protostrings[] = {  "cid:",  "clsid:",  "file:",  "finger:",  "ftp:",  "gopher:",  "hdl:",  "http:",  "https:",  "ilu:",  "ior:",  "irc:",  "java:",  "javascript:",  "lifn:",  "mailto:",  "mid:",  "news:",  "nntp:",  "path:",  "prospero:",  "rlogin:",  "service:",  "shttp:",  "snews:",  "stanf:",  "telnet:",  "tn3270:",  "wais:",  "whois++:",  NULL};/* TODO remove this stupid things... *//* Similar to former, but for supported protocols: */proto_t sup_protos[] = {  {"http://", URLHTTP, DEFAULT_HTTP_PORT},  {"ftp://", URLFTP, DEFAULT_FTP_PORT}  /* { "file://", URLFILE, DEFAULT_FTP_PORT } */};/* Support for encoding and decoding of URL strings.  We determine   whether a character is unsafe through   table lookup.  This   code assumes ASCII character set and 8-bit chars.  */enum {  urlchr_reserved = 1,  urlchr_unsafe = 2};#define R  urlchr_reserved#define U  urlchr_unsafe#define RU R|U#define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))/* rfc1738 reserved chars.  We don't use this yet; preservation of   reserved chars will be implemented when I integrate the new   `reencode_string' function.  */#define RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)/* Unsafe chars:   - anything <= 32;   - stuff from rfc1738 ("<>\"#%{}|\\^~[]`");   - '@' and ':'; needed for encoding URL username and password.   - anything >= 127. */#define UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)/* Convert the ASCII character X to a hex-digit.  X should be between   '0' and '9', or between 'A' and 'F', or between 'a' and 'f'.  The   result is a number between 0 and 15.  If X is not a hexadecimal   digit character, the result is undefined.  */#define XCHAR_TO_XDIGIT(x)			\  (((x) >= '0' && (x) <= '9') ?			\   ((x) - '0') : (toupper(x) - 'A' + 10))/* The reverse of the above: convert a HEX digit in the [0, 15] range   to an ASCII character representing it.  The A-F characters are   always in upper case.  */#define XDIGIT_TO_XCHAR(x) (((x) < 10) ? ((x) + '0') : ((x) - 10 + 'A'))#define ARRAY_SIZE(array) (sizeof (array) / sizeof (*(array)))const static unsigned char urlchr_table[256] = {  U, U, U, U, U, U, U, U,	/* NUL SOH STX ETX  EOT ENQ ACK BEL */  U, U, U, U, U, U, U, U,	/* BS  HT  LF  VT   FF  CR  SO  SI  */  U, U, U, U, U, U, U, U,	/* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */  U, U, U, U, U, U, U, U,	/* CAN EM  SUB ESC  FS  GS  RS  US  */  U, 0, U, U, 0, U, R, 0,	/* SP  !   "   #    $   %   &   '   */  0, 0, 0, R, 0, 0, 0, R,	/* (   )   *   +    ,   -   .   /   */  0, 0, 0, 0, 0, 0, 0, 0,	/* 0   1   2   3    4   5   6   7   */  0, 0, U, R, U, R, U, R,	/* 8   9   :   ;    <   =   >   ?   */  RU, 0, 0, 0, 0, 0, 0, 0,	/* @   A   B   C    D   E   F   G   */  0, 0, 0, 0, 0, 0, 0, 0,	/* H   I   J   K    L   M   N   O   */  0, 0, 0, 0, 0, 0, 0, 0,	/* P   Q   R   S    T   U   V   W   */  0, 0, 0, U, U, U, U, 0,	/* X   Y   Z   [    \   ]   ^   _   */  U, 0, 0, 0, 0, 0, 0, 0,	/* `   a   b   c    d   e   f   g   */  0, 0, 0, 0, 0, 0, 0, 0,	/* h   i   j   k    l   m   n   o   */  0, 0, 0, 0, 0, 0, 0, 0,	/* p   q   r   s    t   u   v   w   */  0, 0, 0, U, U, U, U, U,	/* x   y   z   {    |   }   ~   DEL */  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,  U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,};/* Returns 1 if the URL begins with a protocol (supported or   unsupported), 0 otherwise.  */int has_proto(const char *url){  char **s;  for (s = protostrings; *s; s++)    if (strncasecmp(url, *s, strlen(*s)) == 0)      return 1;  return 0;}/* Skip the username and password, if present here.  The function   should be called *not* with the complete URL, but with the part   right after the protocol.   If no username and password are found, return 0.  */int skip_uname(const char *url){  const char *p;  const char *q = NULL;  for (p = url; *p && *p != '/'; p++)    if (*p == '@')      q = p;  /* If a `@' was found before the first occurrence of `/', skip     it.  */  if (q != NULL)    return q - url + 1;  else    return 0;}/* Decodes the forms %xy in a URL to the character the hexadecimal   code of which is xy.  xy are hexadecimal digits from   [0123456789ABCDEF] (case-insensitive).  If x or y are not   hex-digits or `%' precedes `\0', the sequence is inserted   literally.  */void decode_string(char *s){  char *t = s;			/* t - tortoise */  char *h = s;			/* h - hare     */  for (; *h; h++, t++)  {  if (*h != '%')//    if(1)    {    copychar:      *t = *h;    } else    {      /* Do nothing if '%' is not followed by two hex digits. */      if (!*(h + 1) || !*(h + 2)	  || !(isxdigit(*(h + 1)) && isxdigit(*(h + 2))))	goto copychar;      *t = (XCHAR_TO_XDIGIT(*(h + 1)) << 4) + XCHAR_TO_XDIGIT(*(h + 2));      h += 2;    }  }  *t = '\0';}/* Like encode_string, but return S if there are no unsafe chars.  */char *encode_string_maybe(const char *s){  const char *p1;  char *p2, *newstr;  int newlen;  int addition = 0;  /*Changes Grendel: (*p1!='%') added */    for (p1 = s; *p1; p1++)    if ((*p1!='%') && UNSAFE_CHAR(*p1))      addition += 2;	/* Two more characters (hex digits) */  if (!addition)    return (char *) s;  newlen = (p1 - s) + addition;  newstr = (char *) kmalloc(newlen + 1);  p1 = s;  p2 = newstr;  while (*p1)  {    //    if (UNSAFE_CHAR(*p1))if ((*p1!='%') && UNSAFE_CHAR(*p1))/*	  if(0)*/    {      const unsigned char c = *p1++;      *p2++ = '%';      *p2++ = XDIGIT_TO_XCHAR(c >> 4);      *p2++ = XDIGIT_TO_XCHAR(c & 0xf);    } else      *p2++ = *p1++;  }  *p2 = '\0';  assert(p2 - newstr == newlen);  return newstr;}/* Encode the unsafe characters (as determined by UNSAFE_CHAR) in a   given string, returning a malloc-ed %XX encoded string.  */char *encode_string(const char *s){  char *encoded = encode_string_maybe(s);  if (encoded != s)    return encoded;  else    return kstrdup(s);}/* Encode unsafe characters in PTR to %xx.  If such encoding is done,   the old value of PTR is freed and PTR is made to point to the newly   allocated storage.  */#define ENCODE(ptr) do {			\  char *e_new = encode_string_maybe (ptr);	\  if (e_new != ptr)				\    {						\      kfree (ptr);				\      ptr = e_new;				\    }						\} while (0)/* Returns the protocol type if URL's protocol is supported, or   URLUNKNOWN if not.  */uerr_t urlproto(const char *url){  int i;  for (i = 0; i < ARRAY_SIZE(sup_protos); i++)    if (!strncasecmp(url, sup_protos[i].name, strlen(sup_protos[i].name)))      return sup_protos[i].ind;  for (i = 0; url[i] && url[i] != ':' && url[i] != '/'; i++);  if (url[i] == ':')  {    for (++i; url[i] && url[i] != '/'; i++)      if (!isdigit(url[i]))	return URLBADPORT;    if (url[i - 1] == ':')      return URLFTP;    else      return URLHTTP;  } else    return URLHTTP;}/* If PATH ends with `;type=X', return the character X.  */char process_ftp_type(char *path){  int len = strlen(path);  if (len >= 7 && !memcmp(path + len - 7, ";type=", 6))  {    path[len - 7] = '\0';    return path[len - 1];  } else    return '\0';}/* Canonicalize PATH, and return a new path.  The new path differs from PATH   in that:	Multple `/'s are collapsed to a single `/'.	Leading `./'s and trailing `/.'s are removed.	Trailing `/'s are removed.	Non-leading `../'s and trailing `..'s are handled by removing	portions of the path.   E.g. "a/b/c/./../d/.." will yield "a/b".  This function originates   from GNU Bash.   Changes for Wget:	Always use '/' as stub_char.	Don't check for local things using canon_stat.	Change the original string instead of strdup-ing.	React correctly when beginning with `./' and `../'.  */void path_simplify(char *path){  register int i, start, ddot;  char stub_char;  if (!*path)    return;  /*stub_char = (*path == '/') ? '/' : '.'; */  stub_char = '/';  /* Addition: Remove all `./'-s preceding the string.  If `../'-s     precede, put `/' in front and remove them too.  */  i = 0;  ddot = 0;  while (1)  {    if (path[i] == '.' && path[i + 1] == '/')      i += 2;    else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/')    {      i += 3;      ddot = 1;    } else      break;  }  if (i)    strcpy(path, path + i - ddot);  /* Replace single `.' or `..' with `/'.  */  if ((path[0] == '.' && path[1] == '\0')      || (path[0] == '.' && path[1] == '.' && path[2] == '\0'))  {    path[0] = stub_char;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -