📄 retr.c

📁 wget讓你可以在console介面下
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* File retrieval.   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.This file is part of GNU Wget.GNU Wget is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or (atyour option) any later version.GNU Wget is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with Wget; if not, write to the Free SoftwareFoundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.In addition, as a special exception, the Free Software Foundationgives permission to link the code of its release of Wget with theOpenSSL project's "OpenSSL" library (or with modified versions of itthat use the same license as the "OpenSSL" library), and distributethe linked executables.  You must obey the GNU General Public Licensein all respects for all of the code used other than "OpenSSL".  If youmodify this file, you may extend this exception to your version of thefile, but you are not obligated to do so.  If you do not wish to doso, delete this exception statement from your version.  */#include <config.h>#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#ifdef HAVE_UNISTD_H# include <unistd.h>#endif /* HAVE_UNISTD_H */#include <errno.h>#ifdef HAVE_STRING_H# include <string.h>#else# include <strings.h>#endif /* HAVE_STRING_H */#include <assert.h>#include "wget.h"#include "utils.h"#include "retr.h"#include "progress.h"#include "url.h"#include "recur.h"#include "ftp.h"#include "host.h"#include "connect.h"#include "hash.h"#include "convert.h"#include "ptimer.h"#ifndef errnoextern int errno;#endif/* Total size of downloaded files.  Used to enforce quota.  */SUM_SIZE_INT total_downloaded_bytes;/* If non-NULL, the stream to which output should be written.  This   stream is initialized when `-O' is used.  */FILE *output_stream;/* Whether output_document is a regular file we can manipulate,   i.e. not `-' or a device file. */int output_stream_regular;static struct {  wgint chunk_bytes;  double chunk_start;  double sleep_adjust;} limit_data;static voidlimit_bandwidth_reset (void){  limit_data.chunk_bytes = 0;  limit_data.chunk_start = 0;  limit_data.sleep_adjust = 0;}/* Limit the bandwidth by pausing the download for an amount of time.   BYTES is the number of bytes received from the network, and TIMER   is the timer that started at the beginning of download.  */static voidlimit_bandwidth (wgint bytes, struct ptimer *timer){  double delta_t = ptimer_read (timer) - limit_data.chunk_start;  double expected;  limit_data.chunk_bytes += bytes;  /* Calculate the amount of time we expect downloading the chunk     should take.  If in reality it took less time, sleep to     compensate for the difference.  */  expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;  if (expected > delta_t)    {      double slp = expected - delta_t + limit_data.sleep_adjust;      double t0, t1;      if (slp < 200)	{	  DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",		   slp, number_to_static_string (limit_data.chunk_bytes),		   delta_t));	  return;	}      DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",	       slp, number_to_static_string (limit_data.chunk_bytes),	       limit_data.sleep_adjust));      t0 = ptimer_read (timer);      xsleep (slp / 1000);      t1 = ptimer_measure (timer);      /* Due to scheduling, we probably slept slightly longer (or	 shorter) than desired.  Calculate the difference between the	 desired and the actual sleep, and adjust the next sleep by	 that amount.  */      limit_data.sleep_adjust = slp - (t1 - t0);      /* If sleep_adjust is very large, it's likely due to suspension	 and not clock inaccuracy.  Don't enforce those.  */      if (limit_data.sleep_adjust > 500)	limit_data.sleep_adjust = 500;      else if (limit_data.sleep_adjust < -500)	limit_data.sleep_adjust = -500;    }  limit_data.chunk_bytes = 0;  limit_data.chunk_start = ptimer_read (timer);}#ifndef MIN# define MIN(i, j) ((i) <= (j) ? (i) : (j))#endif/* Write data in BUF to OUT.  However, if *SKIP is non-zero, skip that   amount of data and decrease SKIP.  Increment *TOTAL by the amount   of data written.  */static intwrite_data (FILE *out, const char *buf, int bufsize, wgint *skip,	    wgint *written){  if (!out)    return 1;  if (*skip > bufsize)    {      *skip -= bufsize;      return 1;    }  if (*skip)    {      buf += *skip;      bufsize -= *skip;      *skip = 0;      if (bufsize == 0)	return 1;    }  fwrite (buf, 1, bufsize, out);  *written += bufsize;  /* Immediately flush the downloaded data.  This should not hinder     performance: fast downloads will arrive in large 16K chunks     (which stdio would write out immediately anyway), and slow     downloads wouldn't be limited by disk speed.  */  fflush (out);  return !ferror (out);}/* Read the contents of file descriptor FD until it the connection   terminates or a read error occurs.  The data is read in portions of   up to 16K and written to OUT as it arrives.  If opt.verbose is set,   the progress is shown.   TOREAD is the amount of data expected to arrive, normally only used   by the progress gauge.   STARTPOS is the position from which the download starts, used by   the progress gauge.  If QTYREAD is non-NULL, the value it points to   is incremented by the amount of data read from the network.  If   QTYWRITTEN is non-NULL, the value it points to is incremented by   the amount of data written to disk.  The time it took to download   the data (in milliseconds) is stored to ELAPSED.   The function exits and returns the amount of data read.  In case of   error while reading data, -1 is returned.  In case of error while   writing data, -2 is returned.  */intfd_read_body (int fd, FILE *out, wgint toread, wgint startpos,	      wgint *qtyread, wgint *qtywritten, double *elapsed, int flags){  int ret = 0;  static char dlbuf[16384];  int dlbufsize = sizeof (dlbuf);  struct ptimer *timer = NULL;  double last_successful_read_tm = 0;  /* The progress gauge, set according to the user preferences. */  void *progress = NULL;  /* Non-zero if the progress gauge is interactive, i.e. if it can     continually update the display.  When true, smaller timeout     values are used so that the gauge can update the display when     data arrives slowly. */  int progress_interactive = 0;  int exact = flags & rb_read_exactly;  wgint skip = 0;  /* How much data we've read/written.  */  wgint sum_read = 0;  wgint sum_written = 0;  if (flags & rb_skip_startpos)    skip = startpos;  if (opt.verbose)    {      /* If we're skipping STARTPOS bytes, pass 0 as the INITIAL	 argument to progress_create because the indicator doesn't	 (yet) know about "skipping" data.  */      progress = progress_create (skip ? 0 : startpos, startpos + toread);      progress_interactive = progress_interactive_p (progress);    }  if (opt.limit_rate)    limit_bandwidth_reset ();  /* A timer is needed for tracking progress, for throttling, and for     tracking elapsed time.  If either of these are requested, start     the timer.  */  if (progress || opt.limit_rate || elapsed)    {      timer = ptimer_new ();      last_successful_read_tm = 0;    }  /* Use a smaller buffer for low requested bandwidths.  For example,     with --limit-rate=2k, it doesn't make sense to slurp in 16K of     data and then sleep for 8s.  With buffer size equal to the limit,     we never have to sleep for more than one second.  */  if (opt.limit_rate && opt.limit_rate < dlbufsize)    dlbufsize = opt.limit_rate;  /* Read from FD while there is data to read.  Normally toread==0     means that it is unknown how much data is to arrive.  However, if     EXACT is set, then toread==0 means what it says: that no data     should be read.  */  while (!exact || (sum_read < toread))    {      int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;      double tmout = opt.read_timeout;      if (progress_interactive)	{	  /* For interactive progress gauges, always specify a ~1s	     timeout, so that the gauge can be updated regularly even	     when the data arrives very slowly or stalls.  */	  tmout = 0.95;	  if (opt.read_timeout)	    {	      double waittm;	      waittm = (ptimer_read (timer) - last_successful_read_tm) / 1000;	      if (waittm + tmout > opt.read_timeout)		{		  /* Don't let total idle time exceed read timeout. */		  tmout = opt.read_timeout - waittm;		  if (tmout < 0)		    {		      /* We've already exceeded the timeout. */		      ret = -1, errno = ETIMEDOUT;		      break;		    }		}	    }	}      ret = fd_read (fd, dlbuf, rdsize, tmout);      if (progress_interactive && ret < 0 && errno == ETIMEDOUT)	ret = 0;		/* interactive timeout, handled above */      else if (ret <= 0)	break;			/* EOF or read error */      if (progress || opt.limit_rate)	{	  ptimer_measure (timer);	  if (ret > 0)	    last_successful_read_tm = ptimer_read (timer);	}      if (ret > 0)	{	  sum_read += ret;	  if (!write_data (out, dlbuf, ret, &skip, &sum_written))	    {	      ret = -2;	      goto out_;	    }	}      if (opt.limit_rate)	limit_bandwidth (ret, timer);      if (progress)	progress_update (progress, ret, ptimer_read (timer));#ifdef WINDOWS      if (toread > 0 && !opt.quiet)	ws_percenttitle (100.0 *			 (startpos + sum_read) / (startpos + toread));#endif    }  if (ret < -1)    ret = -1; out_:  if (progress)    progress_finish (progress, ptimer_read (timer));  if (elapsed)    *elapsed = ptimer_read (timer);  if (timer)    ptimer_destroy (timer);  if (qtyread)    *qtyread += sum_read;  if (qtywritten)    *qtywritten += sum_written;  return ret;}/* Read a hunk of data from FD, up until a terminator.  The terminator   is whatever the TERMINATOR function determines it to be; for   example, it can be a line of data, or the head of an HTTP response.   The function returns the data read allocated with malloc.   In case of error, NULL is returned.  In case of EOF and no data   read, NULL is returned and errno set to 0.  In case of EOF with   data having been read, the data is returned, but it will   (obviously) not contain the terminator.   The idea is to be able to read a line of input, or otherwise a hunk   of text, such as the head of an HTTP request, without crossing the   boundary, so that the next call to fd_read etc. reads the data   after the hunk.  To achieve that, this function does the following:   1. Peek at available data.   2. Determine whether the peeked data, along with the previously      read data, includes the terminator.      2a. If yes, read the data until the end of the terminator, and          exit.      2b. If no, read the peeked data and goto 1.   The function is careful to assume as little as possible about the   implementation of peeking.  For example, every peek is followed by   a read.  If the read returns a different amount of data, the   process is retried until all data arrives safely.   SIZEHINT is the buffer size sufficient to hold all the data in the   typical case (it is used as the initial buffer size).  MAXSIZE is   the maximum amount of memory this function is allowed to allocate,   or 0 if no upper limit is to be enforced.   This function should be used as a building block for other   functions -- see fd_read_line as a simple example.  */char *fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize){  long bufsize = sizehint;  char *hunk = xmalloc (bufsize);  int tail = 0;			/* tail position in HUNK */  assert (maxsize >= bufsize);  while (1)    {      const char *end;      int pklen, rdlen, remain;      /* First, peek at the available data. */      pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1.0);      if (pklen < 0)	{	  xfree (hunk);	  return NULL;	}      end = terminator (hunk, tail, pklen);      if (end)	{	  /* The data contains the terminator: we'll drain the data up	     to the end of the terminator.  */	  remain = end - (hunk + tail);	  if (remain == 0)	    {	      /* No more data needs to be read. */	      hunk[tail] = '\0';	      return hunk;	    }	  if (bufsize - 1 < tail + remain)	    {	      bufsize = tail + remain + 1;	      hunk = xrealloc (hunk, bufsize);	    }	}      else	/* No terminator: simply read the data we know is (or should	   be) available.  */	remain = pklen;      /* Now, read the data.  Note that we make no assumptions about	 how much data we'll get.  (Some TCP stacks are notorious for	 read returning less data than the previous MSG_PEEK.)  */      rdlen = fd_read (fd, hunk + tail, remain, 0.0);      if (rdlen < 0)	{	  xfree_null (hunk);	  return NULL;	}      tail += rdlen;      hunk[tail] = '\0';      if (rdlen == 0)	{	  if (tail == 0)	    {	      /* EOF without anything having been read */	      xfree (hunk);	      errno = 0;	      return NULL;	    }	  else	    /* EOF seen: return the data we've read. */	    return hunk;	}      if (end && rdlen == remain)	/* The terminator was seen and the remaining data drained --	   we got what we came for.  */	return hunk;      /* Keep looping until all the data arrives. */      if (tail == bufsize - 1)	{	  /* Double the buffer size, but refuse to allocate more than	     MAXSIZE bytes.  */	  if (maxsize && bufsize >= maxsize)	    {	      xfree (hunk);	      errno = ENOMEM;	      return NULL;	    }	  bufsize <<= 1;	  if (maxsize && bufsize > maxsize)	    bufsize = maxsize;	  hunk = xrealloc (hunk, bufsize);	}    }}static const char *line_terminator (const char *hunk, int oldlen, int peeklen){  const char *p = memchr (hunk + oldlen, '\n', peeklen);  if (p)    /* p+1 because we want the line to include '\n' */    return p + 1;  return NULL;}/* The maximum size of the single line we agree to accept.  This is   not meant to impose an arbitrary limit, but to protect the user   from Wget slurping up available memory upon encountering malicious   or buggy server output.  Define it to 0 to remove the limit.  */#define FD_READ_LINE_MAX 4096/* Read one line from FD and return it.  The line is allocated using   malloc, but is never larger than FD_READ_LINE_MAX.   If an error occurs, or if no data can be read, NULL is returned.   In the former case errno indicates the error condition, and in the   latter case, errno is NULL.  */char *fd_read_line (int fd){  return fd_read_hunk (fd, line_terminator, 128, FD_READ_LINE_MAX);}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -