⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 util.h

📁 harvest是一个下载html网页得机器人
💻 H
字号:
/* *  util.h - Common utilities for the Harvest Indexing system * *  $Id: util.h,v 2.2 1997/05/30 18:00:29 sxw Exp $ * *  AUTHOR: Harvest derived * *  Harvest Indexer http://www.tardis.ed.ac.uk/harvest/ *  --------------------------------------------------- * *  The Harvest Indexer is a continued development of code developed by *  the Harvest Project. Development is carried out by numerous individuals *  in the Internet community, and is not officially connected with the *  original Harvest Project or its funding sources. *  *  Please mail harvest@tardis.ed.ac.uk if you are interested in participating *  in the development effort. * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. *   *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. *   *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//*  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. *   *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): *   *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. *   *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. *   *  TERMS OF USE *     *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. *     *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. *   *  DERIVATIVE WORKS *   *    Users may make derivative works from the Harvest software, subject  *    to the following constraints: *   *      - You must include the above copyright notice and these  *        accompanying paragraphs in all forms of derivative works,  *        and any documentation and other materials related to such  *        distribution and use acknowledge that the software was  *        developed at the above institutions. *   *      - You must notify IRTF-RD regarding your distribution of  *        the derivative work. *   *      - You must clearly notify users that your are distributing  *        a modified version and not the original Harvest software. *   *      - Any derivative product is also subject to these copyright  *        and use restrictions. *   *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. *   *  HISTORY OF FREE SOFTWARE STATUS *   *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards.   *   */#ifndef _UTIL_H_#define _UTIL_H_#include <stdio.h>#include "config.h"/* Buffer structure for buffer management routines */struct gbuf {			/* Growing and shrinking buffer */	char *data;		/* Data buffer */	int length;		/* Current length of data buffer */	int size;		/* Size allocated in the Data buffer */	int default_size;	/* Default size of the Data buffer */};typedef struct gbuf Buffer;	/* Growing buffer */#define stradd_buffer(b,s)	add_buffer((b), (s), strlen(s))#ifndef _PARAMS#if defined(__STDC__) || defined(__cplusplus) || defined(__STRICT_ANSI__)#define _PARAMS(ARGS) ARGS#else /* Traditional C */     #define _PARAMS(ARGS) ()      #endif /* __STDC__ */              #endif /* _PARAMS */   /* from buffer.c	Buffer manipulation routines */Buffer *create_buffer _PARAMS((int));		/* New buffer */void grow_buffer _PARAMS((Buffer *));		/* Increase buffer size */void increase_buffer _PARAMS((Buffer *, int));	/* Increase buffer size */void shrink_buffer _PARAMS((Buffer *));		/* Reduce buffer size */void add_buffer _PARAMS((Buffer *, char *, int));/* Add data to a buffer */void free_buffer _PARAMS((Buffer *));		/* Clean up a buffer *//* from host.c */char *getfullhostname _PARAMS(());		/* Fully qualified hostname */char *getmylogin _PARAMS(());			/* getlogin(3) clone */char *getrealhost _PARAMS((char *));		/* Real DNS hostname *//* from log.c */void init_log _PARAMS((FILE *, FILE *));	/* Initialize log routines */void init_log3 _PARAMS((char *,FILE *,FILE *));	/* Initialize log routines */void log_errno _PARAMS((char *));		/* Same as perror(3) */void log_errno2 _PARAMS((char *,int,char *));	/* Same as perror(3) file,line*/void fatal_errno _PARAMS((char *));		/* Same as perror(3) & exit */#ifdef __STRICT_ANSI__#include <stdarg.h>void Log _PARAMS((char *, ...));		/* Log a message */void errorlog _PARAMS((char *, ...));		/* Log an error message */void fatal _PARAMS((char *, ...));		/* Log error msg and exit */#elsevoid Log _PARAMS(());void errorlog _PARAMS(());void fatal _PARAMS(());#endif/* from strdup.c */#ifdef NO_STRDUPchar *strdup _PARAMS((char *));			/* Duplicate a string */#endifchar *xstrdup _PARAMS((char *));		/* Duplicate a string *//* from string.c */void parse_argv _PARAMS((char **, char *));	/* Parse a command string *//* from system.c */int do_system _PARAMS((char *));		/* Wrapper for system(3) */int run_cmd _PARAMS((char *));			/* Simple system(3) */int do_system_lifetime _PARAMS((char *, int));	/* Limited system(3) */void close_all_fds _PARAMS((int));		/* Closes all fd's */void close_all_fds_except _PARAMS((int,int*));	/* Closes all fd's except */void setsocket_linger _PARAMS((int,int));	/* set SO_LINGER *//* from xmalloc.c */void *xmalloc _PARAMS((size_t));		/* Wrapper for malloc(3) */void *xrealloc _PARAMS((void *, size_t));	/* Wrapper for realloc(3) */void xfree _PARAMS((void *));			/* Wrapper for free(3) *//* from harvest.c */char *harvest_bindir _PARAMS((void));char *harvest_libdir _PARAMS((void));char *harvest_topdir _PARAMS((void));void harvest_add_path _PARAMS((char *));#define harvest_add_gatherer_path() 	harvest_add_path("gatherer:")#define harvest_add_broker_path() 	harvest_add_path("broker:")#define harvest_add_cache_path() 	harvest_add_path("cache:")#define harvest_add_replicator_path()	harvest_add_path("replicator:")/* from debug.c */#ifndef MAX_DEBUG_LEVELS#define MAX_DEBUG_LEVELS 256#endif#ifndef MAINextern int Harvest_do_debug;extern int Harvest_debug_levels[];#endif#undef debug_ok_fast#ifdef USE_NO_DEBUGGING#define debug_ok_fast(S,L) 0 /* empty */#else#define debug_ok_fast(S,L) \	( \        (Harvest_do_debug) && \        ((Harvest_debug_levels[S] == -2) || \         ((Harvest_debug_levels[S] != -1) && \	   ((L) <= Harvest_debug_levels[S]))) \	)#endif#undef Debug#ifdef USE_NO_DEBUGGING#define Debug(section, level, X) /* empty */;#else#define Debug(section, level, X) \        {if (debug_ok_fast((section),(level))) {Log X;}} /* no parens */#endifextern void debug_reset _PARAMS((void));extern void debug_enable _PARAMS((int, int));extern void debug_disable _PARAMS((int));extern void debug_flag _PARAMS((char *));extern int  debug_ok _PARAMS((int, int));extern void debug_init _PARAMS((void));#include <string.h>#include <sys/types.h>#include <sys/wait.h>#include <ctype.h>#include <unistd.h>#if !defined(__ultrix) || !defined(SOCK_STREAM)  /* not protected */#include <sys/socket.h>#endif#include <sys/time.h>#include <netinet/in.h>#include <arpa/ftp.h>#include <arpa/inet.h>#if !defined(__ultrix) || !defined(HOST_NOT_FOUND)  /* not protected */#include <netdb.h>#endif#ifdef USE_HOST_CACHE#define HOST_CACHE_TTL 3600/* Apparently AIX defines MAXHOSTNAMELEN as 32.  We have to compensate */#if defined(_HARVEST_AIX_)#undef MAXHOSTNAMELEN#define MAXHOSTNAMELEN 64#endif/* Some versions of Solaris with BIND don't define MAXHOSTNAMELEN */#ifndef MAXHOSTNAMELEN#define MAXHOSTNAMELEN 254#endiftypedef struct _host {    char	key[MAXHOSTNAMELEN];	/* www.bar.com */    char        fqdn[MAXHOSTNAMELEN];	/* real.bar.com */    char	dotaddr[16];		/* 128.138.213.10 */    char	ipaddr[4];    time_t      last_t;			/* last access of this info */    int         n;			/* # of requests for this host */    int         addrlen;		/* length of 'ipaddr', always 4 */    struct _host *next;} Host;extern Host   *thisHost;int   host_cache_init _PARAMS(());Host  *get_host _PARAMS((char *hostname));int   delete_host _PARAMS((Host *h));int   expire_host_cache _PARAMS((time_t timeout));void  dump_host_cache _PARAMS((int, int));#endif /* USE_HOST_CACHE */extern char *mkrfc850 _PARAMS((time_t *));extern time_t parse_rfc850 _PARAMS((char *));extern char *rfc1738_escape _PARAMS((char *));#endif /* _UTIL_H_ */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -