📄 url.c
字号:
/* If we suspect that a transformation has rendered what url_string might return different from URL_ENCODED, rebuild u->url using url_string. */ u->url = url_string (u, URL_AUTH_SHOW); if (url_encoded != url) xfree ((char *) url_encoded); } else { if (url_encoded == url) u->url = xstrdup (url); else u->url = url_encoded; } return u; error: /* Cleanup in case of error: */ if (url_encoded && url_encoded != url) xfree (url_encoded); /* Transmit the error code to the caller, if the caller wants to know. */ if (error) *error = error_code; return NULL;}/* Return the error message string from ERROR_CODE, which should have been retrieved from url_parse. The error message is translated. */const char *url_error (int error_code){ assert (error_code >= 0 && error_code < countof (parse_errors)); return _(parse_errors[error_code]);}/* Split PATH into DIR and FILE. PATH comes from the URL and is expected to be URL-escaped. The path is split into directory (the part up to the last slash) and file (the part after the last slash), which are subsequently unescaped. Examples: PATH DIR FILE "foo/bar/baz" "foo/bar" "baz" "foo/bar/" "foo/bar" "" "foo" "" "foo" "foo/bar/baz%2fqux" "foo/bar" "baz/qux" (!) DIR and FILE are freshly allocated. */static voidsplit_path (const char *path, char **dir, char **file){ char *last_slash = strrchr (path, '/'); if (!last_slash) { *dir = xstrdup (""); *file = xstrdup (path); } else { *dir = strdupdelim (path, last_slash); *file = xstrdup (last_slash + 1); } url_unescape (*dir); url_unescape (*file);}/* Note: URL's "full path" is the path with the query string and params appended. The "fragment" (#foo) is intentionally ignored, but that might be changed. For example, if the original URL was "http://host:port/foo/bar/baz;bullshit?querystring#uselessfragment", the full path will be "/foo/bar/baz;bullshit?querystring". *//* Return the length of the full path, without the terminating zero. */static intfull_path_length (const struct url *url){ int len = 0;#define FROB(el) if (url->el) len += 1 + strlen (url->el) FROB (path); FROB (params); FROB (query);#undef FROB return len;}/* Write out the full path. */static voidfull_path_write (const struct url *url, char *where){#define FROB(el, chr) do { \ char *f_el = url->el; \ if (f_el) { \ int l = strlen (f_el); \ *where++ = chr; \ memcpy (where, f_el, l); \ where += l; \ } \} while (0) FROB (path, '/'); FROB (params, ';'); FROB (query, '?');#undef FROB}/* Public function for getting the "full path". E.g. if u->path is "foo/bar" and u->query is "param=value", full_path will be "/foo/bar?param=value". */char *url_full_path (const struct url *url){ int length = full_path_length (url); char *full_path = xmalloc (length + 1); full_path_write (url, full_path); full_path[length] = '\0'; return full_path;}/* Unescape CHR in an otherwise escaped STR. Used to selectively escaping of certain characters, such as "/" and ":". Returns a count of unescaped chars. */static voidunescape_single_char (char *str, char chr){ const char c1 = XNUM_TO_DIGIT (chr >> 4); const char c2 = XNUM_TO_DIGIT (chr & 0xf); char *h = str; /* hare */ char *t = str; /* tortoise */ for (; *h; h++, t++) { if (h[0] == '%' && h[1] == c1 && h[2] == c2) { *t = chr; h += 2; } else *t = *h; } *t = '\0';}/* Escape unsafe and reserved characters, except for the slash characters. */static char *url_escape_dir (const char *dir){ char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1); if (newdir == dir) return (char *)dir; unescape_single_char (newdir, '/'); return newdir;}/* Sync u->path and u->url with u->dir and u->file. Called after u->file or u->dir have been changed, typically by the FTP code. */static voidsync_path (struct url *u){ char *newpath, *efile, *edir; xfree (u->path); /* u->dir and u->file are not escaped. URL-escape them before reassembling them into u->path. That way, if they contain separators like '?' or even if u->file contains slashes, the path will be correctly assembled. (u->file can contain slashes if the URL specifies it with %2f, or if an FTP server returns it.) */ edir = url_escape_dir (u->dir); efile = url_escape_1 (u->file, urlchr_unsafe | urlchr_reserved, 1); if (!*edir) newpath = xstrdup (efile); else { int dirlen = strlen (edir); int filelen = strlen (efile); /* Copy "DIR/FILE" to newpath. */ char *p = newpath = xmalloc (dirlen + 1 + filelen + 1); memcpy (p, edir, dirlen); p += dirlen; *p++ = '/'; memcpy (p, efile, filelen); p += filelen; *p = '\0'; } u->path = newpath; if (edir != u->dir) xfree (edir); if (efile != u->file) xfree (efile); /* Regenerate u->url as well. */ xfree (u->url); u->url = url_string (u, URL_AUTH_SHOW);}/* Mutators. Code in ftp.c insists on changing u->dir and u->file. This way we can sync u->path and u->url when they get changed. */voidurl_set_dir (struct url *url, const char *newdir){ xfree (url->dir); url->dir = xstrdup (newdir); sync_path (url);}voidurl_set_file (struct url *url, const char *newfile){ xfree (url->file); url->file = xstrdup (newfile); sync_path (url);}voidurl_free (struct url *url){ xfree (url->host); xfree (url->path); xfree (url->url); xfree_null (url->params); xfree_null (url->query); xfree_null (url->fragment); xfree_null (url->user); xfree_null (url->passwd); xfree (url->dir); xfree (url->file); xfree (url);}/* Create all the necessary directories for PATH (a file). Calls make_directory internally. */intmkalldirs (const char *path){ const char *p; char *t; struct_stat st; int res; p = path + strlen (path); for (; *p != '/' && p != path; p--) ; /* Don't create if it's just a file. */ if ((p == path) && (*p != '/')) return 0; t = strdupdelim (path, p); /* Check whether the directory exists. */ if ((stat (t, &st) == 0)) { if (S_ISDIR (st.st_mode)) { xfree (t); return 0; } else { /* If the dir exists as a file name, remove it first. This is *only* for Wget to work with buggy old CERN http servers. Here is the scenario: When Wget tries to retrieve a directory without a slash, e.g. http://foo/bar (bar being a directory), CERN server will not redirect it too http://foo/bar/ -- it will generate a directory listing containing links to bar/file1, bar/file2, etc. Wget will lose because it saves this HTML listing to a file `bar', so it cannot create the directory. To work around this, if the file of the same name exists, we just remove it and create the directory anyway. */ DEBUGP (("Removing %s because of directory danger!\n", t)); unlink (t); } } res = make_directory (t); if (res != 0) logprintf (LOG_NOTQUIET, "%s: %s", t, strerror (errno)); xfree (t); return res;}/* Functions for constructing the file name out of URL components. *//* A growable string structure, used by url_file_name and friends. This should perhaps be moved to utils.c. The idea is to have a convenient and efficient way to construct a string by having various functions append data to it. Instead of passing the obligatory BASEVAR, SIZEVAR and TAILPOS to all the functions in questions, we pass the pointer to this struct. */struct growable { char *base; int size; int tail;};/* Ensure that the string can accept APPEND_COUNT more characters past the current TAIL position. If necessary, this will grow the string and update its allocated size. If the string is already large enough to take TAIL+APPEND_COUNT characters, this does nothing. */#define GROW(g, append_size) do { \ struct growable *G_ = g; \ DO_REALLOC (G_->base, G_->size, G_->tail + append_size, char); \} while (0)/* Return the tail position of the string. */#define TAIL(r) ((r)->base + (r)->tail)/* Move the tail position by APPEND_COUNT characters. */#define TAIL_INCR(r, append_count) ((r)->tail += append_count)/* Append the string STR to DEST. NOTICE: the string in DEST is not terminated. */static voidappend_string (const char *str, struct growable *dest){ int l = strlen (str); GROW (dest, l); memcpy (TAIL (dest), str, l); TAIL_INCR (dest, l);}/* Append CH to DEST. For example, append_char (0, DEST) zero-terminates DEST. */static voidappend_char (char ch, struct growable *dest){ GROW (dest, 1); *TAIL (dest) = ch; TAIL_INCR (dest, 1);}enum { filechr_not_unix = 1, /* unusable on Unix, / and \0 */ filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */ filechr_control = 4 /* a control character, e.g. 0-31 */};#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))/* Shorthands for the table: */#define U filechr_not_unix#define W filechr_not_windows#define C filechr_control#define UW U|W#define UWC U|W|C/* Table of characters unsafe under various conditions (see above). Arguably we could also claim `%' to be unsafe, since we use it as the escape character. If we ever want to be able to reliably translate file name back to URL, this would become important crucial. Right now, it's better to be minimal in escaping. */static const unsigned char filechr_table[256] ={UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */ C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */ 0, 0, W, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' */ 0, 0, W, 0, 0, 0, 0, UW, /* ( ) * + , - . / */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */ 0, 0, W, 0, W, 0, W, W, /* 8 9 : ; < = > ? */ 0, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */ 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */ 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */ 0, 0, 0, 0, W, 0, 0, 0, /* X Y Z [ \ ] ^ _ */ 0, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */ 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */ 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ 0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,};#undef U#undef W#undef C#undef UW#undef UWC/* FN_PORT_SEP is the separator between host and port in file names for non-standard port numbers. On Unix this is normally ':', as in "www.xemacs.org:4001/index.html". Under Windows, we set it to + because Windows can't handle ':' in file names. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -