⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 http.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
					add_buffer (mimebuf, buf, strlen (buf));					sprintf (buf, "Last-Modified: %s\n",						 mkrfc850 (&(up->lmt)));					add_buffer (mimebuf, buf, strlen (buf));					sprintf (buf,						 "Content-Type: text/x-soif\n");					add_buffer (mimebuf, buf, strlen (buf));#if 0 /* kjl/15apr2002 we don't do anything with SOIF yet */					/*					 *  Pull the SOIF out of the production database					 *  and store it into the file					 */					urldb_writesoif (urlbuf->data, up->fp);#endif					in_http_data = 1;					break;	/* Don't parse any more of the stuff */				}				in_http_header = 1;				in_http_data = 0;				up->http_version = xstrdup ("HTTP/1.0");				bufp += strlen ("HTTP/1.0");				n -= strlen ("HTTP/1.0");				while (isspace (*bufp) && n > 0)					bufp++, n--;				up->http_status_code = atoi (bufp);				while (isspace (*bufp) && n > 0)					bufp++, n--;				if (!HTTP_VALID_STATUS (up->http_status_code)) {					errorlog					    ("Failed HTTP/1.0 transfer for %s: %s (status code: %d)\n",					     up->url, bufp,					     up->http_status_code);					close (s);					fclose (up->fp);					free_buffer (urlbuf);					return 10;				}				up->http_reason_line = xstrdup (bufp);				tmp = strchr (up->http_reason_line, '\n');				if (tmp != NULL)					*tmp = '\0';				if (HTTP_ERROR_STATUS (up->http_status_code)) {					errorlog					    ("HTTP/1.0 transfer error for %s: %s\n",					     up->url, up->http_reason_line);					close (s);					fclose (up->fp);					free_buffer (urlbuf);					return 10;				}				while (*bufp != '\n' && n > 0)					bufp++, n--;				bufp++, n--;	/* start of header */				mimebuf = create_buffer (BUFSIZ);			}		}		if (in_http_header) {			Debug (21, 9,			       ("in_http_header, line %d, n=%d\n", __LINE__,				n));			for (i = 0; i < n; i++) {				if (bufp[i] != '\r') {	/* Skip CR chars */					if (bufp[i] == '\n') {						if (last_char_was_eol) {							in_http_header = 0;							in_http_data = 1;							Debug (21, 9,							       ("Found end of header\n"));							break;						} else {							last_char_was_eol = 1;						}					} else {						last_char_was_eol = 0;					}				}			}			Debug (21, 9, ("Adding %d bytes to mimebuf\n", i));			add_buffer (mimebuf, bufp, i);			i = i + 1;			bufp += i;			n -= i;		}#ifdef FOLLOW_REDIRECTS		if (in_http_data && mimebuf)			if (HTTP_REDIRECTION_STATUS (up->http_status_code) &&			    (p =			     strstr (mimebuf->data,				     "Location:")) != (char *) NULL) {				Debug (21, 1, ("Received HTTP Redirection\n"));				if (sscanf (p, "%s %s", junk, newURL) != 2)					goto redirect_fallthru;				Debug (21, 1, ("New URL: %s\n", newURL));				if ((new_up =				     url_open (newURL)) == (URL *) NULL)					goto redirect_fallthru;				if (new_up->type != URL_HTTP) {					url_close (new_up);					goto redirect_fallthru;				}				/*				 *  copy over URL info from the new URL				 *  into the original 'up' structure.				 *  Note that we leave  alone the				 *  up->filename field.				 */				if (up->redir_from_url == (char *) NULL)					up->redir_from_url = xstrdup (up->url);				xfree (up->url);				xfree (up->raw_pathname);				xfree (up->pathname);				xfree (up->host);				up->url = xstrdup (newURL);				up->type = new_up->type;				up->raw_pathname =				    xstrdup (new_up->raw_pathname);				up->pathname = xstrdup (new_up->pathname);				up->host = xstrdup (new_up->host);				up->port = new_up->port;				Debug (21, 5,				       ("http_get: redir: up->redir_from   %s\n",					up->redir_from_url));				Debug (21, 5,				       ("http_get: redir: up->type         %d\n",					up->type));				Debug (21, 5,				       ("http_get: redir: up->raw_pathname %s\n",					up->raw_pathname));				Debug (21, 5,				       ("http_get: redir: up->pathname     %s\n",					up->pathname));				Debug (21, 5,				       ("http_get: redir: up->host         %s\n",					up->host));				Debug (21, 5,				       ("http_get: redir: up->port         %d\n",					up->port));				url_close (new_up);				/*				 *  free memory allocated in this function				 *  which is going to be re-allocated by the				 *  recursive function call				 */				xfree (up->http_reason_line);				up->http_reason_line = NULL;				xfree (up->http_version);				up->http_version = NULL;				free_buffer (mimebuf);				close (s);				fclose (up->fp);				up->fp = 0;				free_buffer (urlbuf);				return (-1);	/* Return redirect info */			}	      redirect_fallthru:#endif#ifdef HTTP_AUTHENTICATION		if (in_http_data && mimebuf) {			if (!HTTP_UNAUTHORIZED_STATUS (up->http_status_code))				goto auth_fallthru;			/*      Loop Detection          */			Debug (21, 1,			       ("Got Unauthorized, will try some passwords\n"));			if (up->auth_realm != (char *) NULL) {				Log ("http_get: %s UNAUTHORIZED with realm %s\n", up->url, up->auth_realm);				xfree (up->http_reason_line);				up->http_reason_line = NULL;				xfree (up->http_version);				up->http_version = NULL;				free_buffer (mimebuf);				free_buffer (urlbuf);				fclose (up->fp);				close (s);				return 10;	/* 'hard' error from httpd */			}			Debug (21, 1, ("Looking for auth line...\n"));			if ((p = strstr (mimebuf->data, "WWW-Authenticate:")) ||			    (p = strstr (mimebuf->data, "WWW-authenticate:"))) {				Debug (21, 1, ("%s\n", p));				if (sscanf				    (p, "%s %s %[^\n]", junk, type, realm) != 3)					goto auth_fallthru;				Debug (21, 1, ("junk=%s\n", junk));				Debug (21, 1, ("type=%s\n", type));				Debug (21, 1, ("realm=%s\n", realm));				u = realm;				if ((t = strchr (u, '=')))					u = t + 1;				if ((t = strchr (u, '"')))					u = t + 1;				if ((t = strrchr (u, '"')))					*t = '\0';				xfree (up->auth_type);				up->auth_type = 0;				xfree (up->auth_realm);				up->auth_realm = 0;				up->auth_realm = xstrdup (u);				up->auth_type = xstrdup (type);				Debug (21, 1, ("type=%s\n", up->auth_type));				Debug (21, 1, ("realm=%s\n", up->auth_realm));				/*				 * free memory allocated in this function				 * which is going to be re-allocated by the				 * recursive function call				 */				xfree (up->http_reason_line);				up->http_reason_line = NULL;				xfree (up->http_version);				up->http_version = NULL;				free_buffer (mimebuf);				free_buffer (urlbuf);				close (s);				fclose (up->fp);				return http_get (up);			} else {				Debug (21, 1, ("NO AUTH LINE\n"));			}		}	      auth_fallthru:#endif		if (in_http_data && n > 0) {			if ((nw = fwrite (bufp, 1, n, up->fp)) != n)				log_errno ("HTTP fwrite");			nbytes += nw;			if (nbytes > HTTP_MAX_TRANSFER) {				Log ("http_get: WARNING!: %s has exceeded %d bytes of data, aborting transfer.\n", up->url, HTTP_MAX_TRANSFER);				break;			}		}	}	if (mimebuf != NULL) {		Debug (21, 9,		       ("Copying mimebuf to up structure, length=%d\n",			mimebuf->length));		up->http_mime_hdr = xmalloc (mimebuf->length + 1);		memcpy (up->http_mime_hdr, mimebuf->data, mimebuf->length);		up->http_mime_hdr[mimebuf->length] = '\0';		free_buffer (mimebuf);	}	/* Clean up */	fclose (up->fp);	close (s);	free_buffer (urlbuf);	return (in_http_data == 1 ? 0 : 10);}/* *  do_read() - read that performs timeout, assumes theSocket is set *  for nonblocking I/O .  Based on MyRead() from ccache. */static intdo_read (s, buf, sz)int s;char *buf;int sz;{	fd_set readDetect;	struct timeval timeout;	int err, readBytes = 0;	extern int select ();	/* read until timeout or amount of requested bytes read */	while (1) {		char *t = NULL;		int to = XFER_TIMEOUT;		if ((t = getenv ("HARVEST_XFER_TIMEOUT")) != NULL)			to = atol (t);		FD_ZERO (&readDetect);		FD_SET (s, &readDetect);		timeout.tv_sec = to;		timeout.tv_usec = 0;		/* wait for data for seconds */		err = select (s + 1, &readDetect, NULL, NULL, &timeout);		if (err < 0) {			if (errno == EINTR)				continue;			if (errno == ECONNRESET)				return (0);			log_errno ("select");			return (-1);		}		/* timeout on the read */		if (err == 0) {			read_timeout = 1;			return (-1);		}		if (FD_ISSET (s, &readDetect)) {			if ((readBytes = read (s, buf, sz)) < 0)				if (errno == ECONNRESET)					return (0);			break;		}	}	return (readBytes);}static voidhttp_init_auth (){	FILE *fp;	static char xbuf[4][BUFSIZ];	char *t = NULL;	struct http_auth *a = NULL;	HTTPAuth = NULL;	if ((t = getenv ("HARVEST_HTTP_AUTHENTICATIONS")) == NULL) {		Debug (21, 1,		       ("http_init_auth: No Authentication file passed in the environment.\n"));		return;	}	Debug (21, 1, ("http_init_auth: open %s\n", t));	if ((fp = fopen (t, "r")) != NULL) {		while (fscanf (fp, "%s %s %s %s\n",			       xbuf[0], xbuf[1], xbuf[2], xbuf[3]) == 4) {			Debug (21, 1,			       ("http_init_auth: got type=%s realm=%s username=%s passwd=%s\n",				xbuf[0], xbuf[1], xbuf[2], xbuf[3]));			rfc1738_unescape (xbuf[1]);			a = (struct http_auth *)			    xmalloc (sizeof (struct http_auth));			a->type = xstrdup (xbuf[0]);			a->realm = xstrdup (xbuf[1]);			a->username = xstrdup (xbuf[2]);			a->passwd = xstrdup (xbuf[3]);			a->next = 0;			HTTPAuth = a;		}	}	fclose (fp);}/* * *  ACKNOWLEDGEMENT: *       This code is taken from rpem distribution, and was originally *       written by Mark Riordan. * *  routines to convert a buffer of bytes to/from RFC 1113 *  printable encoding format. * *  allocates the buffer for the encoded string.  The caller must free it. */static char *ht_uuencode (inbuf)char *inbuf;{	static char six2pr[64] = {		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'	};#define ENC(c) six2pr[c]	char *outbuf, *s;	int i;	int nbytes = strlen (inbuf);	Debug (21, 5, ("ht_uuencode(): called.  inbuf=%s\n", inbuf));	s = outbuf = (char *) xmalloc (strlen (inbuf) * 2);	for (i = 0; i < nbytes; i += 3) {		*(outbuf++) = ENC (inbuf[0] >> 2);		*(outbuf++) =		    ENC (((inbuf[0] << 4) & 060) | ((inbuf[1] >> 4) & 017));		*(outbuf++) =		    ENC (((inbuf[1] << 2) & 074) | ((inbuf[2] >> 6) & 03));		*(outbuf++) = ENC (inbuf[2] & 077);		inbuf += 3;	}	if (i == nbytes + 1) {		/* There were only 2 bytes in that last group */		outbuf[-1] = '=';	} else if (i == nbytes + 2) {		/* There was only 1 byte in that last group */		outbuf[-1] = '=';		outbuf[-2] = '=';	}	outbuf[0] = '\0';	Debug (21, 5, ("ht_uuencode(): returning %s\n", s));	return s;}static char *http_make_auth (realm)char *realm;{	struct http_auth *a = NULL;	static char namepw[BUFSIZ];	static int inited = 0;	if (!inited) {		http_init_auth ();		inited = 1;	}	Debug (21, 5, ("http_make_auth: called.  realm=%s\n", realm));	for (a = HTTPAuth; a; a = a->next) {		if (!strcasecmp (realm, a->realm)) {			if (a->encoded == (char *) 0) {				sprintf (namepw, "%s:%s",					 a->username ? a->username : "",					 a->passwd ? a->passwd : "");				a->encoded = ht_uuencode (namepw);			}			return xstrdup (a->encoded);		}	}	Debug (21, 5,	       ("http_make_auth: No authentication for realm %s\n", realm));	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -