📄 convert.c
字号:
"...old-contents..." <--- size ---> (with quotes) OR: ...old-contents... <--- size --> (no quotes) */ if (*p == '\"' || *p == '\'') { quote_char = *p; quote_flag = 1; ++p; size -= 2; /* disregard opening and closing quote */ } putc (quote_char, fp); fputs (new_text, fp); /* Look for fragment identifier, if any. */ if (find_fragment (p, size, &frag_beg, &frag_end)) fwrite (frag_beg, 1, frag_end - frag_beg, fp); p += size; if (quote_flag) ++p; putc (quote_char, fp); return p;}/* The same as REPLACE_ATTR, but used when replacing <meta http-equiv=refresh content="new_text"> because we need to append "timeout_value; URL=" before the next_text. */static const char *replace_attr_refresh_hack (const char *p, int size, FILE *fp, const char *new_text, int timeout){ /* "0; URL=..." */ char *new_with_timeout = (char *)alloca (numdigit (timeout) + 6 /* "; URL=" */ + strlen (new_text) + 1); sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text); return replace_attr (p, size, fp, new_with_timeout);}/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not preceded by '&'. If the character is not found, return zero. If the character is found, return 1 and set BP and EP to point to the beginning and end of the region. This is used for finding the fragment indentifiers in URLs. */static intfind_fragment (const char *beg, int size, const char **bp, const char **ep){ const char *end = beg + size; int saw_amp = 0; for (; beg < end; beg++) { switch (*beg) { case '&': saw_amp = 1; break; case '#': if (!saw_amp) { *bp = beg; *ep = end; return 1; } /* fallthrough */ default: saw_amp = 0; } } return 0;}/* Quote FILE for use as local reference to an HTML file. We quote ? as %3F to avoid passing part of the file name as the parameter when browsing the converted file through HTTP. However, it is safe to do this only when `--html-extension' is turned on. This is because converting "index.html?foo=bar" to "index.html%3Ffoo=bar" would break local browsing, as the latter isn't even recognized as an HTML file! However, converting "index.html?foo=bar.html" to "index.html%3Ffoo=bar.html" should be safe for both local and HTTP-served browsing. */static char *local_quote_string (const char *file){ const char *file_sans_qmark; int qm; if (!opt.html_extension) return html_quote_string (file); qm = count_char (file, '?'); if (qm) { const char *from = file; char *to, *newname; /* qm * 2 because we replace each question mark with "%3F", i.e. replace one char with three, hence two more. */ int fsqlen = strlen (file) + qm * 2; to = newname = (char *)alloca (fsqlen + 1); for (; *from; from++) { if (*from != '?') *to++ = *from; else { *to++ = '%'; *to++ = '3'; *to++ = 'F'; } } assert (to - newname == fsqlen); *to = '\0'; file_sans_qmark = newname; } else file_sans_qmark = file; return html_quote_string (file_sans_qmark);}/* Book-keeping code for dl_file_url_map, dl_url_file_map, downloaded_html_list, and downloaded_html_set. Other code calls these functions to let us know that a file has been downloaded. */#define ENSURE_TABLES_EXIST do { \ if (!dl_file_url_map) \ dl_file_url_map = make_string_hash_table (0); \ if (!dl_url_file_map) \ dl_url_file_map = make_string_hash_table (0); \} while (0)/* Return 1 if S1 and S2 are the same, except for "/index.html". The three cases in which it returns one are (substitute any substring for "foo"): m("foo/index.html", "foo/") ==> 1 m("foo/", "foo/index.html") ==> 1 m("foo", "foo/index.html") ==> 1 m("foo", "foo/" ==> 1 m("foo", "foo") ==> 1 */static intmatch_except_index (const char *s1, const char *s2){ int i; const char *lng; /* Skip common substring. */ for (i = 0; *s1 && *s2 && *s1 == *s2; s1++, s2++, i++) ; if (i == 0) /* Strings differ at the very beginning -- bail out. We need to check this explicitly to avoid `lng - 1' reading outside the array. */ return 0; if (!*s1 && !*s2) /* Both strings hit EOF -- strings are equal. */ return 1; else if (*s1 && *s2) /* Strings are randomly different, e.g. "/foo/bar" and "/foo/qux". */ return 0; else if (*s1) /* S1 is the longer one. */ lng = s1; else /* S2 is the longer one. */ lng = s2; /* foo */ /* foo/ */ /* foo/index.html */ /* or */ /* foo/index.html */ /* ^ */ /* ^ */ if (*lng != '/') /* The right-hand case. */ --lng; if (*lng == '/' && *(lng + 1) == '\0') /* foo */ /* foo/ */ return 1; return 0 == strcmp (lng, "/index.html");}static intdissociate_urls_from_file_mapper (void *key, void *value, void *arg){ char *mapping_url = (char *)key; char *mapping_file = (char *)value; char *file = (char *)arg; if (0 == strcmp (mapping_file, file)) { hash_table_remove (dl_url_file_map, mapping_url); xfree (mapping_url); xfree (mapping_file); } /* Continue mapping. */ return 0;}/* Remove all associations from various URLs to FILE from dl_url_file_map. */static voiddissociate_urls_from_file (const char *file){ hash_table_map (dl_url_file_map, dissociate_urls_from_file_mapper, (char *)file);}/* Register that URL has been successfully downloaded to FILE. This is used by the link conversion code to convert references to URLs to references to local files. It is also being used to check if a URL has already been downloaded. */voidregister_download (const char *url, const char *file){ char *old_file, *old_url; ENSURE_TABLES_EXIST; /* With some forms of retrieval, it is possible, although not likely or particularly desirable. If both are downloaded, the second download will override the first one. When that happens, dissociate the old file name from the URL. */ if (hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url)) { if (0 == strcmp (url, old_url)) /* We have somehow managed to download the same URL twice. Nothing to do. */ return; if (match_except_index (url, old_url) && !hash_table_contains (dl_url_file_map, url)) /* The two URLs differ only in the "index.html" ending. For example, one is "http://www.server.com/", and the other is "http://www.server.com/index.html". Don't remove the old one, just add the new one as a non-canonical entry. */ goto url_only; hash_table_remove (dl_file_url_map, file); xfree (old_file); xfree (old_url); /* Remove all the URLs that point to this file. Yes, there can be more than one such URL, because we store redirections as multiple entries in dl_url_file_map. For example, if URL1 redirects to URL2 which gets downloaded to FILE, we map both URL1 and URL2 to FILE in dl_url_file_map. (dl_file_url_map only points to URL2.) When another URL gets loaded to FILE, we want both URL1 and URL2 dissociated from it. This is a relatively expensive operation because it performs a linear search of the whole hash table, but it should be called very rarely, only when two URLs resolve to the same file name, *and* the "<file>.1" extensions are turned off. In other words, almost never. */ dissociate_urls_from_file (file); } hash_table_put (dl_file_url_map, xstrdup (file), xstrdup (url)); url_only: /* A URL->FILE mapping is not possible without a FILE->URL mapping. If the latter were present, it should have been removed by the above `if'. So we could write: assert (!hash_table_contains (dl_url_file_map, url)); The above is correct when running in recursive mode where the same URL always resolves to the same file. But if you do something like: wget URL URL then the first URL will resolve to "FILE", and the other to "FILE.1". In that case, FILE.1 will not be found in dl_file_url_map, but URL will still point to FILE in dl_url_file_map. */ if (hash_table_get_pair (dl_url_file_map, url, &old_url, &old_file)) { hash_table_remove (dl_url_file_map, url); xfree (old_url); xfree (old_file); } hash_table_put (dl_url_file_map, xstrdup (url), xstrdup (file));}/* Register that FROM has been redirected to TO. This assumes that TO is successfully downloaded and already registered using register_download() above. */voidregister_redirection (const char *from, const char *to){ char *file; ENSURE_TABLES_EXIST; file = hash_table_get (dl_url_file_map, to); assert (file != NULL); if (!hash_table_contains (dl_url_file_map, from)) hash_table_put (dl_url_file_map, xstrdup (from), xstrdup (file));}/* Register that the file has been deleted. */voidregister_delete_file (const char *file){ char *old_url, *old_file; ENSURE_TABLES_EXIST; if (!hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url)) return; hash_table_remove (dl_file_url_map, file); xfree (old_file); xfree (old_url); dissociate_urls_from_file (file);}/* Register that FILE is an HTML file that has been downloaded. */voidregister_html (const char *url, const char *file){ if (!downloaded_html_set) downloaded_html_set = make_string_hash_table (0); else if (hash_table_contains (downloaded_html_set, file)) return; /* The set and the list should use the same copy of FILE, but the slist interface insists on strduping the string it gets. Oh well. */ string_set_add (downloaded_html_set, file); downloaded_html_list = slist_prepend (downloaded_html_list, file);}/* Cleanup the data structures associated with recursive retrieving (the variables above). */voidconvert_cleanup (void){ if (dl_file_url_map) { free_keys_and_values (dl_file_url_map); hash_table_destroy (dl_file_url_map); dl_file_url_map = NULL; } if (dl_url_file_map) { free_keys_and_values (dl_url_file_map); hash_table_destroy (dl_url_file_map); dl_url_file_map = NULL; } if (downloaded_html_set) string_set_free (downloaded_html_set); slist_free (downloaded_html_list); downloaded_html_list = NULL;}/* Book-keeping code for downloaded files that enables extension hacks. *//* This table should really be merged with dl_file_url_map and downloaded_html_files. This was originally a list, but I changed it to a hash table beause it was actually taking a lot of time to find things in it. */static struct hash_table *downloaded_files_hash;/* We're storing "modes" of type downloaded_file_t in the hash table. However, our hash tables only accept pointers for keys and values. So when we need a pointer, we use the address of a downloaded_file_t variable of static storage. */ static downloaded_file_t *downloaded_mode_to_ptr (downloaded_file_t mode){ static downloaded_file_t v1 = FILE_NOT_ALREADY_DOWNLOADED, v2 = FILE_DOWNLOADED_NORMALLY, v3 = FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, v4 = CHECK_FOR_FILE; switch (mode) { case FILE_NOT_ALREADY_DOWNLOADED: return &v1; case FILE_DOWNLOADED_NORMALLY: return &v2; case FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED: return &v3; case CHECK_FOR_FILE: return &v4; } return NULL;}/* Remembers which files have been downloaded. In the standard case, should be called with mode == FILE_DOWNLOADED_NORMALLY for each file we actually download successfully (i.e. not for ones we have failures on or that we skip due to -N). When we've downloaded a file and tacked on a ".html" extension due to -E, call this function with FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED rather than FILE_DOWNLOADED_NORMALLY. If you just want to check if a file has been previously added without adding it, call with mode == CHECK_FOR_FILE. Please be sure to call this function with local filenames, not remote URLs. */downloaded_file_tdownloaded_file (downloaded_file_t mode, const char *file){ downloaded_file_t *ptr; if (mode == CHECK_FOR_FILE) { if (!downloaded_files_hash) return FILE_NOT_ALREADY_DOWNLOADED; ptr = hash_table_get (downloaded_files_hash, file); if (!ptr) return FILE_NOT_ALREADY_DOWNLOADED; return *ptr; } if (!downloaded_files_hash) downloaded_files_hash = make_string_hash_table (0); ptr = hash_table_get (downloaded_files_hash, file); if (ptr) return *ptr; ptr = downloaded_mode_to_ptr (mode); hash_table_put (downloaded_files_hash, xstrdup (file), &ptr); return FILE_NOT_ALREADY_DOWNLOADED;}static intdf_free_mapper (void *key, void *value, void *ignored){ xfree (key); return 0;}voiddownloaded_files_free (void){ if (downloaded_files_hash) { hash_table_map (downloaded_files_hash, df_free_mapper, NULL); hash_table_destroy (downloaded_files_hash); downloaded_files_hash = NULL; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -