📄 convert.c

📁 wget (command line browser) source code
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
       "...old-contents..."       <---    size    --->  (with quotes)     OR:       ...old-contents...       <---    size   -->    (no quotes)   */  if (*p == '\"' || *p == '\'')    {      quote_char = *p;      quote_flag = 1;      ++p;      size -= 2;		/* disregard opening and closing quote */    }  putc (quote_char, fp);  fputs (new_text, fp);  /* Look for fragment identifier, if any. */  if (find_fragment (p, size, &frag_beg, &frag_end))    fwrite (frag_beg, 1, frag_end - frag_beg, fp);  p += size;  if (quote_flag)    ++p;  putc (quote_char, fp);  return p;}/* The same as REPLACE_ATTR, but used when replacing   <meta http-equiv=refresh content="new_text"> because we need to   append "timeout_value; URL=" before the next_text.  */static const char *replace_attr_refresh_hack (const char *p, int size, FILE *fp,			   const char *new_text, int timeout){  /* "0; URL=..." */  char *new_with_timeout = (char *)alloca (numdigit (timeout)					   + 6 /* "; URL=" */					   + strlen (new_text)					   + 1);  sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text);  return replace_attr (p, size, fp, new_with_timeout);}/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not   preceded by '&'.  If the character is not found, return zero.  If   the character is found, return 1 and set BP and EP to point to the   beginning and end of the region.   This is used for finding the fragment indentifiers in URLs.  */static intfind_fragment (const char *beg, int size, const char **bp, const char **ep){  const char *end = beg + size;  int saw_amp = 0;  for (; beg < end; beg++)    {      switch (*beg)	{	case '&':	  saw_amp = 1;	  break;	case '#':	  if (!saw_amp)	    {	      *bp = beg;	      *ep = end;	      return 1;	    }	  /* fallthrough */	default:	  saw_amp = 0;	}    }  return 0;}/* Quote FILE for use as local reference to an HTML file.   We quote ? as %3F to avoid passing part of the file name as the   parameter when browsing the converted file through HTTP.  However,   it is safe to do this only when `--html-extension' is turned on.   This is because converting "index.html?foo=bar" to   "index.html%3Ffoo=bar" would break local browsing, as the latter   isn't even recognized as an HTML file!  However, converting   "index.html?foo=bar.html" to "index.html%3Ffoo=bar.html" should be   safe for both local and HTTP-served browsing.  */static char *local_quote_string (const char *file){  const char *file_sans_qmark;  int qm;  if (!opt.html_extension)    return html_quote_string (file);  qm = count_char (file, '?');  if (qm)    {      const char *from = file;      char *to, *newname;      /* qm * 2 because we replace each question mark with "%3F",	 i.e. replace one char with three, hence two more.  */      int fsqlen = strlen (file) + qm * 2;      to = newname = (char *)alloca (fsqlen + 1);      for (; *from; from++)	{	  if (*from != '?')	    *to++ = *from;	  else	    {	      *to++ = '%';	      *to++ = '3';	      *to++ = 'F';	    }	}      assert (to - newname == fsqlen);      *to = '\0';      file_sans_qmark = newname;    }  else    file_sans_qmark = file;  return html_quote_string (file_sans_qmark);}/* Book-keeping code for dl_file_url_map, dl_url_file_map,   downloaded_html_list, and downloaded_html_set.  Other code calls   these functions to let us know that a file has been downloaded.  */#define ENSURE_TABLES_EXIST do {			\  if (!dl_file_url_map)					\    dl_file_url_map = make_string_hash_table (0);	\  if (!dl_url_file_map)					\    dl_url_file_map = make_string_hash_table (0);	\} while (0)/* Return 1 if S1 and S2 are the same, except for "/index.html".  The   three cases in which it returns one are (substitute any substring   for "foo"):   m("foo/index.html", "foo/")  ==> 1   m("foo/", "foo/index.html")  ==> 1   m("foo", "foo/index.html")   ==> 1   m("foo", "foo/"              ==> 1   m("foo", "foo")              ==> 1  */static intmatch_except_index (const char *s1, const char *s2){  int i;  const char *lng;  /* Skip common substring. */  for (i = 0; *s1 && *s2 && *s1 == *s2; s1++, s2++, i++)    ;  if (i == 0)    /* Strings differ at the very beginning -- bail out.  We need to       check this explicitly to avoid `lng - 1' reading outside the       array.  */    return 0;  if (!*s1 && !*s2)    /* Both strings hit EOF -- strings are equal. */    return 1;  else if (*s1 && *s2)    /* Strings are randomly different, e.g. "/foo/bar" and "/foo/qux". */    return 0;  else if (*s1)    /* S1 is the longer one. */    lng = s1;  else    /* S2 is the longer one. */    lng = s2;  /* foo            */            /* foo/           */  /* foo/index.html */  /* or */  /* foo/index.html */  /*    ^           */            /*     ^          */  if (*lng != '/')    /* The right-hand case. */    --lng;  if (*lng == '/' && *(lng + 1) == '\0')    /* foo  */    /* foo/ */    return 1;  return 0 == strcmp (lng, "/index.html");}static intdissociate_urls_from_file_mapper (void *key, void *value, void *arg){  char *mapping_url = (char *)key;  char *mapping_file = (char *)value;  char *file = (char *)arg;  if (0 == strcmp (mapping_file, file))    {      hash_table_remove (dl_url_file_map, mapping_url);      xfree (mapping_url);      xfree (mapping_file);    }  /* Continue mapping. */  return 0;}/* Remove all associations from various URLs to FILE from dl_url_file_map. */static voiddissociate_urls_from_file (const char *file){  hash_table_map (dl_url_file_map, dissociate_urls_from_file_mapper,		  (char *)file);}/* Register that URL has been successfully downloaded to FILE.  This   is used by the link conversion code to convert references to URLs   to references to local files.  It is also being used to check if a   URL has already been downloaded.  */voidregister_download (const char *url, const char *file){  char *old_file, *old_url;  ENSURE_TABLES_EXIST;  /* With some forms of retrieval, it is possible, although not likely     or particularly desirable.  If both are downloaded, the second     download will override the first one.  When that happens,     dissociate the old file name from the URL.  */  if (hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url))    {      if (0 == strcmp (url, old_url))	/* We have somehow managed to download the same URL twice.	   Nothing to do.  */	return;      if (match_except_index (url, old_url)	  && !hash_table_contains (dl_url_file_map, url))	/* The two URLs differ only in the "index.html" ending.  For	   example, one is "http://www.server.com/", and the other is	   "http://www.server.com/index.html".  Don't remove the old	   one, just add the new one as a non-canonical entry.  */	goto url_only;      hash_table_remove (dl_file_url_map, file);      xfree (old_file);      xfree (old_url);      /* Remove all the URLs that point to this file.  Yes, there can	 be more than one such URL, because we store redirections as	 multiple entries in dl_url_file_map.  For example, if URL1	 redirects to URL2 which gets downloaded to FILE, we map both	 URL1 and URL2 to FILE in dl_url_file_map.  (dl_file_url_map	 only points to URL2.)  When another URL gets loaded to FILE,	 we want both URL1 and URL2 dissociated from it.         This is a relatively expensive operation because it performs         a linear search of the whole hash table, but it should be         called very rarely, only when two URLs resolve to the same         file name, *and* the "<file>.1" extensions are turned off.         In other words, almost never.  */      dissociate_urls_from_file (file);    }  hash_table_put (dl_file_url_map, xstrdup (file), xstrdup (url)); url_only:  /* A URL->FILE mapping is not possible without a FILE->URL mapping.     If the latter were present, it should have been removed by the     above `if'.  So we could write:         assert (!hash_table_contains (dl_url_file_map, url));     The above is correct when running in recursive mode where the     same URL always resolves to the same file.  But if you do     something like:         wget URL URL     then the first URL will resolve to "FILE", and the other to     "FILE.1".  In that case, FILE.1 will not be found in     dl_file_url_map, but URL will still point to FILE in     dl_url_file_map.  */  if (hash_table_get_pair (dl_url_file_map, url, &old_url, &old_file))    {      hash_table_remove (dl_url_file_map, url);      xfree (old_url);      xfree (old_file);    }  hash_table_put (dl_url_file_map, xstrdup (url), xstrdup (file));}/* Register that FROM has been redirected to TO.  This assumes that TO   is successfully downloaded and already registered using   register_download() above.  */voidregister_redirection (const char *from, const char *to){  char *file;  ENSURE_TABLES_EXIST;  file = hash_table_get (dl_url_file_map, to);  assert (file != NULL);  if (!hash_table_contains (dl_url_file_map, from))    hash_table_put (dl_url_file_map, xstrdup (from), xstrdup (file));}/* Register that the file has been deleted. */voidregister_delete_file (const char *file){  char *old_url, *old_file;  ENSURE_TABLES_EXIST;  if (!hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url))    return;  hash_table_remove (dl_file_url_map, file);  xfree (old_file);  xfree (old_url);  dissociate_urls_from_file (file);}/* Register that FILE is an HTML file that has been downloaded. */voidregister_html (const char *url, const char *file){  if (!downloaded_html_set)    downloaded_html_set = make_string_hash_table (0);  else if (hash_table_contains (downloaded_html_set, file))    return;  /* The set and the list should use the same copy of FILE, but the     slist interface insists on strduping the string it gets.  Oh     well. */  string_set_add (downloaded_html_set, file);  downloaded_html_list = slist_prepend (downloaded_html_list, file);}/* Cleanup the data structures associated with recursive retrieving   (the variables above).  */voidconvert_cleanup (void){  if (dl_file_url_map)    {      free_keys_and_values (dl_file_url_map);      hash_table_destroy (dl_file_url_map);      dl_file_url_map = NULL;    }  if (dl_url_file_map)    {      free_keys_and_values (dl_url_file_map);      hash_table_destroy (dl_url_file_map);      dl_url_file_map = NULL;    }  if (downloaded_html_set)    string_set_free (downloaded_html_set);  slist_free (downloaded_html_list);  downloaded_html_list = NULL;}/* Book-keeping code for downloaded files that enables extension   hacks.  *//* This table should really be merged with dl_file_url_map and   downloaded_html_files.  This was originally a list, but I changed   it to a hash table beause it was actually taking a lot of time to   find things in it.  */static struct hash_table *downloaded_files_hash;/* We're storing "modes" of type downloaded_file_t in the hash table.   However, our hash tables only accept pointers for keys and values.   So when we need a pointer, we use the address of a   downloaded_file_t variable of static storage.  */   static downloaded_file_t *downloaded_mode_to_ptr (downloaded_file_t mode){  static downloaded_file_t    v1 = FILE_NOT_ALREADY_DOWNLOADED,    v2 = FILE_DOWNLOADED_NORMALLY,    v3 = FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED,    v4 = CHECK_FOR_FILE;  switch (mode)    {    case FILE_NOT_ALREADY_DOWNLOADED:      return &v1;    case FILE_DOWNLOADED_NORMALLY:      return &v2;    case FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED:      return &v3;    case CHECK_FOR_FILE:      return &v4;    }  return NULL;}/* Remembers which files have been downloaded.  In the standard case,   should be called with mode == FILE_DOWNLOADED_NORMALLY for each   file we actually download successfully (i.e. not for ones we have   failures on or that we skip due to -N).   When we've downloaded a file and tacked on a ".html" extension due   to -E, call this function with   FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED rather than   FILE_DOWNLOADED_NORMALLY.   If you just want to check if a file has been previously added   without adding it, call with mode == CHECK_FOR_FILE.  Please be   sure to call this function with local filenames, not remote   URLs.  */downloaded_file_tdownloaded_file (downloaded_file_t mode, const char *file){  downloaded_file_t *ptr;  if (mode == CHECK_FOR_FILE)    {      if (!downloaded_files_hash)	return FILE_NOT_ALREADY_DOWNLOADED;      ptr = hash_table_get (downloaded_files_hash, file);      if (!ptr)	return FILE_NOT_ALREADY_DOWNLOADED;      return *ptr;    }  if (!downloaded_files_hash)    downloaded_files_hash = make_string_hash_table (0);  ptr = hash_table_get (downloaded_files_hash, file);  if (ptr)    return *ptr;  ptr = downloaded_mode_to_ptr (mode);  hash_table_put (downloaded_files_hash, xstrdup (file), &ptr);  return FILE_NOT_ALREADY_DOWNLOADED;}static intdf_free_mapper (void *key, void *value, void *ignored){  xfree (key);  return 0;}voiddownloaded_files_free (void){  if (downloaded_files_hash)    {      hash_table_map (downloaded_files_hash, df_free_mapper, NULL);      hash_table_destroy (downloaded_files_hash);      downloaded_files_hash = NULL;    }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -