⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ccache_util.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
static char rcsid[] = "ccache_util.c,v 1.15 1995/11/29 00:48:33 duane Exp";/* * ccache_util.c - common util functions for daemon and client stub * *      MyRead() *      AddURL() *      GetURL() *      GetParam() *      AddParam() *      SocketWrite() * *  David Merkel & Mark Peterson, University of Colorado - Boulder, July 1994   * *  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. *   *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): *   *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. *   *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. *   *  TERMS OF USE *     *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. *     *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. *   *  DERIVATIVE WORKS *   *    Users may make derivative works from the Harvest software, subject  *    to the following constraints: *   *      - You must include the above copyright notice and these  *        accompanying paragraphs in all forms of derivative works,  *        and any documentation and other materials related to such  *        distribution and use acknowledge that the software was  *        developed at the above institutions. *   *      - You must notify IRTF-RD regarding your distribution of  *        the derivative work. *   *      - You must clearly notify users that your are distributing  *        a modified version and not the original Harvest software. *   *      - Any derivative product is also subject to these copyright  *        and use restrictions. *   *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. *   *  HISTORY OF FREE SOFTWARE STATUS *   *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards.   *   */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <unistd.h>#include <errno.h>#include <sys/types.h>#include <sys/time.h>#include <sys/socket.h>#include <netinet/in.h>#include <arpa/inet.h>#include <netdb.h>#include "ccache.h"/* * MyRead() - read that performs timeout, assumes theSocket is set * for nonblocking I/O  */int MyRead(theSocket, message, msgLen, seconds)     int theSocket;     char *message;     int msgLen;     int seconds;{	fd_set readDetect;	struct timeval timeout;	int err, readBytes = 0, justRead = 0;	memset(&timeout, '\0', sizeof(struct timeval));	/* read until timeout or amount of requested bytes read */	while (readBytes < msgLen) {		FD_ZERO(&readDetect);		FD_SET(theSocket, &readDetect);		timeout.tv_sec = seconds;		timeout.tv_usec = 0;		/* wait for data for seconds */		err = select(theSocket + 1, &readDetect, NULL, NULL, &timeout);		if (err < 0) {#if DEBUG > 3			printf("select error\n");#endif			if (errno == EINTR)				continue;			perror("select");			exit(1);		}		/* timeout on the read */		if (err == 0) {#if DEBUG > 3			printf("read timeout\n");#endif			return (readBytes);		}		if (FD_ISSET(theSocket, &readDetect)) {			justRead = read(theSocket, message, msgLen);			if (!justRead)				return (0);			readBytes += justRead;		}	}	return (readBytes);}/*  * AddURL() - adds URL struct to byte stream message for transmission */int AddURL(theURL, theMessage, bufSize, startIndex, lastParam)     URL *theURL;     char **theMessage;     int bufSize;     int startIndex;     Boolean lastParam;{	int index = 0, length = bufSize;	index = AddParam(theURL->url, STR, theMessage, &length, startIndex, FALSE);	index = AddParam((char *) &(theURL->type), INT, theMessage,	    &length, index, FALSE);	index = AddParam(theURL->pathname, STR, theMessage, &length, index, FALSE);	index = AddParam(theURL->host, STR, theMessage, &length, index, FALSE);	index = AddParam((char *) &(theURL->port), INT, theMessage,	    &length, index, FALSE);	index = AddParam(theURL->user, STR, theMessage,	    &length, index, FALSE);	index = AddParam(theURL->password, STR, theMessage,	    &length, index, FALSE);	index = AddParam((char *) &(theURL->gophertype), INT, theMessage,	    &length, index, FALSE);	index = AddParam(theURL->filename, STR, theMessage,	    &length, index, FALSE);#ifdef USE_MD5	index = AddParam((char *) &(theURL->fp), POINTER, theMessage,	    &length, index, FALSE);	index = AddParam(theURL->md5, STR, theMessage,	    &length, index, lastParam);#endif#ifndef USE_MD5	index = AddParam((char *) &(theURL->fp), POINTER, theMessage,	    &length, index, lastParam);#endif	return (index);}/* * GetURL() - retrieves and assembles a URL struct from a byte * stream */int GetURL(theURL, theSocket)     URL *theURL;     int theSocket;{	if (GetParam((char **) &(theURL->url), STR, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->type), INT, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->pathname), STR, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->host), STR, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->port), INT, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->user), STR, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->password), STR, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->gophertype), INT, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->filename), STR, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->fp), POINTER, theSocket) < 0)		return (-1);	if (GetParam((char **) &(theURL->md5), STR, theSocket) < 0)		return (-1);	return (1);}/*  * GetParam() - Get one parameter from a byte stream * NOTE: paramBuf must be **, and NOT pre malloc'd.  this routine mallocs * for you */int GetParam(paramBuf, paramType, theSocket)     char **paramBuf;     PType paramType;     int theSocket;{	Boolean gotEndMark = FALSE;	Boolean gotCR = FALSE;	Boolean gotLF = FALSE;	char *tempBuf, *tempPtr, cast[sizeof(int)];	int index = 0, readLength, bufSize = INIT_PARAM_LEN;	tempBuf = (char *) malloc(bufSize);	if (!tempBuf)		return (-1);	/* read bytes until we find either '.<CR><LF>' or '!<CR><LF>'	 */	while (!gotLF) {		readLength = MyRead(theSocket, &tempBuf[index], 1, PARAM_TIMEOUT);		if (readLength <= 0) {			free(tempBuf);			return (-1);		}		if (index >= bufSize - 1) {			bufSize = index + REALLOC_BLK;			tempBuf = realloc(tempBuf, bufSize);			if (!tempBuf)				return (-1);		}		switch (tempBuf[index]) {		case BLOCK_END:		case PARAM_END:			gotEndMark = TRUE;			gotCR = FALSE;			break;		case CARRG_RET:			if (gotEndMark) {				gotEndMark = FALSE;				gotCR = TRUE;			} else {				gotEndMark = FALSE;				gotCR = FALSE;			}			break;		case LINE_FEED:			if ((!gotEndMark) && (gotCR))				gotLF = TRUE;			else {				gotEndMark = FALSE;				gotCR = FALSE;			}			break;		default:			gotEndMark = FALSE;			gotCR = FALSE;			break;		}		index++;	}	/* based on paramType, copy data into return buffer */	switch (paramType) {	case STR:	case POINTER:	case MD5:		*paramBuf = (char *) malloc(index - TERM_LEN);		memcpy(*paramBuf, tempBuf, index - TERM_LEN);		break;	case INT:		memcpy(cast, tempBuf, sizeof(int));		memcpy(*paramBuf, cast, sizeof(int));		break;	default:		return (-1);	}	free(tempBuf);	return (index - TERM_LEN);}/* * AddParam() - Add a paramter into a buffer, use urld protocol... * end of parameter gets '.<CR><LF>' and last param in block * gets '!<CR<LF>' */int AddParam(theParam, paramType, theMessage, msgLen, startIndex, lastParam)     char *theParam;     PType paramType;     char **theMessage;		/* pointer to message buffer */     int *msgLen;		/* current size of *theMessage */     int startIndex;		/* index for insertion into buffer */     Boolean lastParam;{	char *tempPtr;	Boolean nullParam = FALSE;	int paramSize;	/* NULL parameters must be represented in byte stream */	if (!theParam) {		nullParam = TRUE;		paramSize = 0;	}	/* determine size of parameter */	else {		switch (paramType) {		case INT:			paramSize = sizeof(int);			break;		case STR:			/* make sure to include '\0' */			paramSize = strlen(theParam) + 1;			break;		case POINTER:			paramSize = sizeof(FILE *);			break;		case MD5:			paramSize = MD5_LEN;			break;		}	}	/* realloc if buffer not big enough */	if (paramSize + startIndex + TERM_LEN > *msgLen) {		*theMessage = realloc(*theMessage, startIndex + paramSize + TERM_LEN);		if (!*theMessage)			return (-1);	}	/* null params get double separators to indicate nothing between	 * separators	 */	if (!nullParam)		memcpy(&((*theMessage)[startIndex]), theParam, paramSize);	else		(*theMessage)[startIndex] = PARAM_END;	if (lastParam)		(*theMessage)[startIndex + paramSize] = BLOCK_END;	else		(*theMessage)[startIndex + paramSize] = PARAM_END;	(*theMessage)[startIndex + paramSize + 1] = CARRG_RET;	(*theMessage)[startIndex + paramSize + 2] = LINE_FEED;#if DEBUG > 5	if (lastParam) {		FILE *theFile = fopen("./cutil_test.log", "w");		int dbugIndex = 0;		while (dbugIndex < startIndex + paramSize + 3) {			(void) fputc((*theMessage)[dbugIndex], theFile);			dbugIndex++;		}		(void) fputc(CARRG_RET, theFile);		(void) fclose(theFile);	}#endif	return (startIndex + paramSize + TERM_LEN);}/******************************************************************************** function name: SocketWrite()**** preconditions: theSocket is a valid socket, and outBuf is a preallocated**      char * holding data to be written to theSocket.**** postconditions: The data in outBuf has been written to theSocket, and the**      total number of bytes written to theSocket has been returned to the**      caller.**** author/credits: Code written by W. Richard Stevens,**      from UNIX Network Programming, c1990, Prentice Hall, pp. 279-280********************************************************************************/int SocketWrite(theSocket, outBuf, numOfBytes)     int theSocket;		/* socket to write to */     char *outBuf;		/* the data to write to the socket */     int numOfBytes;		/* number of bytes inside of outBuf */{	int bytesLeft, bytesWritten, i = 0;	bytesLeft = numOfBytes;	while (bytesLeft > 0) {		bytesWritten = write(theSocket, &outBuf[i], bytesLeft);		if (bytesWritten <= 0)			return (bytesWritten);		bytesLeft -= bytesWritten;		i += bytesWritten;	}	return (numOfBytes - bytesLeft);}void PrintURL(theURL)     URL *theURL;{	printf("url: %s\n", theURL->url);	printf("type: %d\n", theURL->type);	printf("pathname: %s\n", theURL->pathname);	printf("host: %s\n", theURL->host);	printf("port: %d\n", theURL->port);	printf("user: %s\n", theURL->user);	printf("password: %s\n", theURL->password);	printf("gophertype: %d\n", theURL->gophertype);	printf("filename: %s\n", theURL->filename);	if (theURL->fp)		printf("valid fp\n");	else		printf("null fp\n");	printf("md5: %s\n", theURL->md5);}/* * gethostinhex() takes as input either a hostname or a hostip. * it converts this to a long if at all possible.  If invalid * data is given, a -1 is returned. */unsigned long gethostinhex(char *host_data){	struct hostent *host_info;	/* Host entry */	u_long IPinHex;		/* Storage for the IP as a long */	/*	 * If the input is a hostname which is known put	 * the information in a struct and pull the IP	 * from that.  If it's not, assume it's a number	 * in dot format.  In the case that this is invalid	 * input it returns a -1.	 */	if ((host_info = gethostbyname(host_data)) == NULL)		IPinHex = htonl(inet_network((char *) host_data));	else		IPinHex = (*(int *) host_info->h_addr);	return (IPinHex);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -