📄 url.c
字号:
full_path_length (const struct url *url){ int len = 0;#define FROB(el) if (url->el) len += 1 + strlen (url->el) FROB (path); FROB (params); FROB (query);#undef FROB return len;}/* Write out the full path. */static voidfull_path_write (const struct url *url, char *where){#define FROB(el, chr) do { \ char *f_el = url->el; \ if (f_el) { \ int l = strlen (f_el); \ *where++ = chr; \ memcpy (where, f_el, l); \ where += l; \ } \} while (0) FROB (path, '/'); FROB (params, ';'); FROB (query, '?');#undef FROB}/* Public function for getting the "full path". E.g. if u->path is "foo/bar" and u->query is "param=value", full_path will be "/foo/bar?param=value". */char *url_full_path (const struct url *url){ int length = full_path_length (url); char *full_path = (char *)xmalloc(length + 1); full_path_write (url, full_path); full_path[length] = '\0'; return full_path;}/* Escape unsafe and reserved characters, except for the slash characters. */static char *url_escape_dir (const char *dir){ char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1); char *h, *t; if (newdir == dir) return (char *)dir; /* Unescape slashes in NEWDIR. */ h = newdir; /* hare */ t = newdir; /* tortoise */ for (; *h; h++, t++) { /* url_escape_1 having converted '/' to "%2F" exactly. */ if (*h == '%' && h[1] == '2' && h[2] == 'F') { *t = '/'; h += 2; } else *t = *h; } *t = '\0'; return newdir;}/* Sync u->path and u->url with u->dir and u->file. Called after u->file or u->dir have been changed, typically by the FTP code. */static voidsync_path (struct url *u){ char *newpath, *efile, *edir; xfree (u->path); /* u->dir and u->file are not escaped. URL-escape them before reassembling them into u->path. That way, if they contain separators like '?' or even if u->file contains slashes, the path will be correctly assembled. (u->file can contain slashes if the URL specifies it with %2f, or if an FTP server returns it.) */ edir = url_escape_dir (u->dir); efile = url_escape_1 (u->file, urlchr_unsafe | urlchr_reserved, 1); if (!*edir) newpath = xstrdup (efile); else { int dirlen = strlen (edir); int filelen = strlen (efile); /* Copy "DIR/FILE" to newpath. */ char *p = newpath = xmalloc (dirlen + 1 + filelen + 1); memcpy (p, edir, dirlen); p += dirlen; *p++ = '/'; memcpy (p, efile, filelen); p += filelen; *p++ = '\0'; } u->path = newpath; if (edir != u->dir) xfree (edir); if (efile != u->file) xfree (efile); /* Regenerate u->url as well. */ xfree (u->url); u->url = url_string (u, 0);}/* Mutators. Code in ftp.c insists on changing u->dir and u->file. This way we can sync u->path and u->url when they get changed. */voidurl_set_dir (struct url *url, const char *newdir){ xfree (url->dir); url->dir = xstrdup (newdir); sync_path (url);}voidurl_set_file (struct url *url, const char *newfile){ xfree (url->file); url->file = xstrdup (newfile); sync_path (url);}voidurl_free (struct url *url){ xfree (url->host); xfree (url->path); xfree (url->url); FREE_MAYBE (url->params); FREE_MAYBE (url->query); FREE_MAYBE (url->fragment); FREE_MAYBE (url->user); FREE_MAYBE (url->passwd); xfree (url->dir); xfree (url->file); xfree (url);}/* Create all the necessary directories for PATH (a file). Calls mkdirhier() internally. */intmkalldirs (const char *path){ const char *p; char *t; struct stat st; int res; p = path + strlen (path); for (; *p != '/' && p != path; p--) ; /* Don't create if it's just a file. */ if ((p == path) && (*p != '/')) return 0; t = strdupdelim (path, p); /* Check whether the directory exists. */ if ((stat (t, &st) == 0)) { if (S_ISDIR (st.st_mode)) { xfree (t); return 0; } else { /* If the dir exists as a file name, remove it first. This is *only* for Wget to work with buggy old CERN http servers. Here is the scenario: When Wget tries to retrieve a directory without a slash, e.g. http://foo/bar (bar being a directory), CERN server will not redirect it too http://foo/bar/ -- it will generate a directory listing containing links to bar/file1, bar/file2, etc. Wget will lose because it saves this HTML listing to a file `bar', so it cannot create the directory. To work around this, if the file of the same name exists, we just remove it and create the directory anyway. */ DEBUGP (("Removing %s because of directory danger!\n", t)); unlink (t); } } res = make_directory (t); if (res != 0) logprintf (LOG_NOTQUIET, "%s: %s", t, strerror (errno)); xfree (t); return res;}/* Functions for constructing the file name out of URL components. *//* A growable string structure, used by url_file_name and friends. This should perhaps be moved to utils.c. The idea is to have a convenient and efficient way to construct a string by having various functions append data to it. Instead of passing the obligatory BASEVAR, SIZEVAR and TAILPOS to all the functions in questions, we pass the pointer to this struct. */struct growable { char *base; int size; int tail;};/* Ensure that the string can accept APPEND_COUNT more characters past the current TAIL position. If necessary, this will grow the string and update its allocated size. If the string is already large enough to take TAIL+APPEND_COUNT characters, this does nothing. */#define GROW(g, append_size) do { \ struct growable *G_ = g; \ DO_REALLOC (G_->base, G_->size, G_->tail + append_size, char); \} while (0)/* Return the tail position of the string. */#define TAIL(r) ((r)->base + (r)->tail)/* Move the tail position by APPEND_COUNT characters. */#define TAIL_INCR(r, append_count) ((r)->tail += append_count)/* Append the string STR to DEST. NOTICE: the string in DEST is not terminated. */static voidappend_string (const char *str, struct growable *dest){ int l = strlen (str); GROW (dest, l); memcpy (TAIL (dest), str, l); TAIL_INCR (dest, l);}/* Append CH to DEST. For example, append_char (0, DEST) zero-terminates DEST. */static voidappend_char (char ch, struct growable *dest){ GROW (dest, 1); *TAIL (dest) = ch; TAIL_INCR (dest, 1);}enum { filechr_not_unix = 1, /* unusable on Unix, / and \0 */ filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */ filechr_control = 4 /* a control character, e.g. 0-31 */};#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))/* Shorthands for the table: */#define U filechr_not_unix#define W filechr_not_windows#define C filechr_control#define UW U|W#define UWC U|W|C/* Table of characters unsafe under various conditions (see above). Arguably we could also claim `%' to be unsafe, since we use it as the escape character. If we ever want to be able to reliably translate file name back to URL, this would become important crucial. Right now, it's better to be minimal in escaping. */const static unsigned char filechr_table[256] ={UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */ C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */ 0, 0, W, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' */ 0, 0, W, 0, 0, 0, 0, UW, /* ( ) * + , - . / */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */ 0, 0, W, 0, W, 0, W, W, /* 8 9 : ; < = > ? */ 0, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */ 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */ 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */ 0, 0, 0, 0, W, 0, 0, 0, /* X Y Z [ \ ] ^ _ */ 0, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */ 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */ 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ 0, 0, 0, 0, 0, 0, 0, 0, /* x y z { | } ~ DEL */ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,};#undef U#undef W#undef C#undef UW#undef UWC/* FN_PORT_SEP is the separator between host and port in file names for non-standard port numbers. On Unix this is normally ':', as in "www.xemacs.org:4001/index.html". Under Windows, we set it to + because Windows can't handle ':' in file names. */#define FN_PORT_SEP (opt.restrict_files_os != restrict_windows ? ':' : '+')/* FN_QUERY_SEP is the separator between the file name and the URL query, normally '?'. Since Windows cannot handle '?' as part of file name, we use '@' instead there. */#define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@')/* Quote path element, characters in [b, e), as file name, and append the quoted string to DEST. Each character is quoted as per file_unsafe_char and the corresponding table. If ESCAPED_P is non-zero, the path element is considered to be URL-escaped and will be unescaped prior to inspection. */static voidappend_uri_pathel (const char *b, const char *e, int escaped_p, struct growable *dest){ const char *p; int quoted, outlen; int mask; if (opt.restrict_files_os == restrict_unix) mask = filechr_not_unix; else mask = filechr_not_windows; if (opt.restrict_files_ctrl) mask |= filechr_control; /* Copy [b, e) to PATHEL and URL-unescape it. */ if (escaped_p) { char *unescaped; BOUNDED_TO_ALLOCA (b, e, unescaped); url_unescape (unescaped); b = unescaped; e = unescaped + strlen (unescaped); } /* Walk the PATHEL string and check how many characters we'll need to add for file quoting. */ quoted = 0; for (p = b; p < e; p++) if (FILE_CHAR_TEST (*p, mask)) ++quoted; /* e-b is the string length. Each quoted char means two additional characters in the string, hence 2*quoted. */ outlen = (e - b) + (2 * quoted); GROW (dest, outlen); if (!quoted) { /* If there's nothing to quote, we don't need to go through the string the second time. */ memcpy (TAIL (dest), b, outlen); } else { char *q = TAIL (dest); for (p = b; p < e; p++) { if (!FILE_CHAR_TEST (*p, mask)) *q++ = *p; else { unsigned char ch = *p; *q++ = '%'; *q++ = XNUM_TO_DIGIT (ch >> 4); *q++ = XNUM_TO_DIGIT (ch & 0xf); } } assert (q - TAIL (dest) == outlen); } TAIL_INCR (dest, outlen);}/* Append to DEST the directory structure that corresponds the directory part of URL's path. For example, if the URL is http://server/dir1/dir2/file, this appends "/dir1/dir2". Each path element ("dir1" and "dir2" in the above example) is examined, url-unescaped, and re-escaped as file name element. Additionally, it cuts as many directories from the path as specified by opt.cut_dirs. For example, if opt.cut_dirs is 1, it will produce "bar" for the above example. For 2 or more, it will produce "". Each component of the path is quoted for use as file name. */static voidappend_dir_structure (const struct url *u, struct growable *dest){ char *pathel, *next; int cut = opt.cut_dirs; /* Go through the path components, de-URL-quote them, and quote them (if necessary) as file names. */ pathel = u->path; for (; (next = strchr (pathel, '/')) != NULL; pathel = next + 1) { if (cut-- > 0) continue; if (pathel == next) /* Ignore empty pathels. */ continue; if (dest->tail) append_char ('/', dest); append_uri_pathel (pathel, next, 1, dest); }}/* Return a unique file name that matches the given URL as good as possible. Does not create directories on the file system. */char *url_file_name (const struct url *u){ struct growable fnres; char *u_file, *u_query; char *fname, *unique; fnres.base = NULL; fnres.size = 0; fnres.tail = 0; /* Start with the directory prefix, if specified. */ if (opt.dir_prefix) append_string (opt.dir_prefix, &fnres); /* If "dirstruct" is turned on (typically the case with -r), add the host and port (unless those have been turned off) and directory structure. */ if (opt.dirstruct) { if (opt.add_hostdir) { if (fnres.tail) append_char ('/', &fnres); append_string (u->host, &fnres); if (u->port != scheme_default_port (u->scheme)) { char portstr[24]; number_to_string (portstr, u->port); append_char (FN_PORT_SEP, &fnres); append_string (portstr, &fnres); } } append_dir_structure (u, &fnres); } /* Add the file name. */ if (fnres.tail) append_char ('/', &fnres); u_file = *u->file ? u->file : "index.html"; append_uri_pathel (u_file, u_file + strlen (u_file), 0, &fnres); /* Append "?query" to the file name. */ u_query = u->query && *u->query ? u->query : NULL; if (u_query) { append_char (FN_QUERY_SEP, &fnres); append_uri_pathel (u_query, u_query + strlen (u_query), 1, &fnres); } /* Zero-terminate the file name. */ append_char ('\0', &fnres); fname = fnres.base; /* Check the cases in which the unique extensions are not used: 1) Clobbering is turned off (-nc). 2) Retrieval with regetting. 3) Timestamping is used. 4) Hierarchy is built. The exception is the case when file does exist and is a directory (see `mkalldirs' for explanation). */ if ((opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct) && !(file_exists_p (fname) && !file_non_directory_p (fname))) return fname; unique = unique_name (fname, 1); if (unique != fname) xfree (fname); return unique;}/* Return the length of URL's path. Path is considered to be terminated by one of '?', ';', '#', or by the end of the string. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -