⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 config.h

📁 harvest是一个下载html网页得机器人
💻 H
字号:
/* *  config.h - Master configuration file for the Harvest system. * *  $Id: config.h,v 2.3 2000/02/03 12:45:56 sxw Exp $ * *  AUTHOR: Harvest derived * *  Harvest Indexer http://harvest.sourceforge.net/ *  ----------------------------------------------- * *  The Harvest Indexer is a continued development of code developed by *  the Harvest Project. Development is carried out by numerous individuals *  in the Internet community, and is not officially connected with the *  original Harvest Project or its funding sources. * *  Please mail lee@arco.de if you are interested in participating *  in the development effort. * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. * *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. * *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): * *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. * *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. * *  TERMS OF USE * *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. * *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. * *  DERIVATIVE WORKS * *    Users may make derivative works from the Harvest software, subject *    to the following constraints: * *      - You must include the above copyright notice and these *        accompanying paragraphs in all forms of derivative works, *        and any documentation and other materials related to such *        distribution and use acknowledge that the software was *        developed at the above institutions. * *      - You must notify IRTF-RD regarding your distribution of *        the derivative work. * *      - You must clearly notify users that your are distributing *        a modified version and not the original Harvest software. * *      - Any derivative product is also subject to these copyright *        and use restrictions. * *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. * *  HISTORY OF FREE SOFTWARE STATUS * *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards. * */#ifndef _CONFIG_H_#define _CONFIG_H_#include "autoconf.h"	/* For GNU autoconf variables */#include "paths.h"	/* For GNU autoconf program/subst variables */#include "version.h"	/* For Harvest version number *//* *  USE_TMPDIR - default temporary directory into which files are extracted. */#ifndef USE_TMPDIR#define USE_TMPDIR	"/tmp"#endif/* *  Y2K_TIME - define to log time in form YYYYMMDD rather than YYYMMDD */#ifndef Y2K_TIME#define Y2K_TIME#endif/* *  USE_LOCAL_CACHE - define to use the Gatherer's local disk cache */#ifndef USE_LOCAL_CACHE#define USE_LOCAL_CACHE#endif/* *  USE_HOST_CACHE - define if you want to use a DNS hostname/IP cache */#ifndef USE_HOST_CACHE#define USE_HOST_CACHE#endif/* *  USE_WAIS_RELAY - define if you want the Cache to support WAIS proxy */#ifndef USE_WAIS_RELAY#define USE_WAIS_RELAY#endif/**************************************************************************** *--------------------------------------------------------------------------* * DO *NOT* MAKE ANY CHANGES below here unless you know what you're doing...* *--------------------------------------------------------------------------* ****************************************************************************//* *  USE_T_URI - generate T_URI (URI attribute used by Glimpse) for each *  retrieved file. */#ifndef USE_T_URI#define USE_T_URI#endif/* *  USE_MD5 - generates MD5 (cryptographic checksums) for each retrieved file. */#ifndef USE_MD5#define USE_MD5#endif/* *  GDBM_GROWTH_BUG - define to workaround the GDBM replace bug/feature; *  this will cause Harvest to reorganize GDBM db's after to many replaces. */#ifndef GDBM_GROWTH_BUG#undef GDBM_GROWTH_BUG#endif/* *  REAL_FILE_URLS - causes the Gatherer to interpret 'file' URLs as *  specified by Mosaic.  If the hostname field is the same as the *  current host, then the URL is treated as a local file, otherwise, *  the 'file' URL is treated as an 'ftp' URL. */#ifndef REAL_FILE_URLS#undef REAL_FILE_URLS#endif/* *  LOG_TIMES - each log message is prepended with the current time. */#ifndef LOG_TIMES#define LOG_TIMES#endif/* *  USE_LOG_SYNC - define to synchonize multiple processes writing to log file */#ifndef USE_LOG_SYNC#define USE_LOG_SYNC#endif/* *  XFER_TIMEOUT is the number of seconds that liburl will wait on a read() *  before giving up. */#ifndef XFER_TIMEOUT#define XFER_TIMEOUT	120	/* 2 minutes */#endif/* *  USE_PCINDEX - defines .unnest types for the PC software Gatherer */#ifndef USE_PCINDEX#define USE_PCINDEX#endif/* *  USE_NO_DEBUGGING - don't compile in debugging output */#ifndef USE_NO_DEBUGGING#undef USE_NO_DEBUGGING#endif/* *  HOLD_NNTP - Holds NNTP connections to do multiple articles per session */#ifndef HOLD_NNTP#define HOLD_NNTP 1#endif/* *  USE_CCACHE - define to use the FTP connection cache for liburl */#ifndef USE_CCACHE#undef USE_CCACHE#endif/* *  FOLLOW_REDIRECTS - define to follow HTTP redirect messages */#ifndef FOLLOW_REDIRECTS#define FOLLOW_REDIRECTS#endif/* *  HTTP_MAX_REDIRECTS - the maximum number of HTTP redirect hops to make */#ifndef HTTP_MAX_REDIRECTS#define HTTP_MAX_REDIRECTS       3#endif/* *  HTTP_AUTHENTICATION - define to support sending HTTP username/passwords */#ifndef HTTP_AUTHENTICATION#define HTTP_AUTHENTICATION#endif/* *  HTTP_MAX_TRANSFER - maxmimum size of an HTTP transfer */#ifndef HTTP_MAX_TRANSFER#define HTTP_MAX_TRANSFER        (10<<20)#endif/* *  DONT_RETRY_FAILS - define to mark failed URLs as visited, and so *  stop repeated attempts to fetch them */#ifndef DONT_RETRY_FAILS#define DONT_RETRY_FAILS#endif/* *  NO_STRDUP - define if standard C library doesn't have strdup(3). */#ifndef NO_STRDUP#ifndef HAVE_STRDUP#define NO_STRDUP#endif#endif/* *  NO_STRERROR - define if standard C library doesn't have strerror(3). */#ifndef NO_STRERROR#ifndef HAVE_STRERROR#define NO_STRERROR#endif#endif/* *  MAX_TYPES is the max # of types that the type recognition supports. */#ifndef MAX_TYPES#define MAX_TYPES	512#endif/* *  MAX_FILTERS is the max # of filters that the enumerators support. */#ifndef MAX_FILTERS#define MAX_FILTERS     512#endif/* *  URL_WILDCARD_LIMIT is the maximum number of wildcards in a LocalMapping. */#ifndef URL_WILDCARD_LIMIT#define URL_WILDCARD_LIMIT 256#endif/* *  CMD_TAR - command for tar */#ifndef CMD_TAR#define CMD_TAR		"tar"#endif/* *  USE_BYNAME - name of the configuration file for by name type recog. */#ifndef USE_BYNAME#define USE_BYNAME	"byname.cf"#endif/* *  USE_BYCONTENET - name of the configuration file for file content type recog. */#ifndef USE_BYCONTENT#define USE_BYCONTENT	"bycontent.cf"#endif/* *  USE_BYURL - name of the configuration file for by URL type recog. */#ifndef USE_BYURL#define USE_BYURL	"byurl.cf"#endif/* *  USE_MAGIC - default name and location of the magic file. */#ifndef USE_MAGIC#define USE_MAGIC	"magic"#endif/* *  USE_STOPLIST - name of the stoplist configuration file */#ifndef USE_STOPLIST#define USE_STOPLIST 	"stoplist.cf"#endif#if defined(USE_POSIX_REGEX) || defined(USE_GNU_REGEX)#include <GNUregex.h>#elif HAVE_REGEX_H#include <regex.h>#endif#if defined(USE_BSD_REGEX)extern int re_comp(), re_exec();#endif#ifdef USE_POSIX_REGEX#ifndef USE_RE_SYNTAX#define USE_RE_SYNTAX	REG_EXTENDED	/* default Syntax */#endif#endif	/* internal quicksum needs good regex support */#ifdef USE_POSIX_REGEX#ifndef USE_QUICKSUM#define USE_QUICKSUM#endif#ifndef USE_QUICKSUM_FILE#define USE_QUICKSUM_FILE	"quick-sum.cf"#endif#endif#ifndef BLKDEV_IOSIZE#include <sys/param.h>		/* try to find it... */#endif#ifdef BLKDEV_IOSIZE#define MIN_XFER BLKDEV_IOSIZE	/* minimum number of bytes per disk xfer */#else#define MIN_XFER 512		/* make reasonable guess */#endif#ifndef BUFSIZ#include <stdio.h>		/* try to find it... */#ifndef BUFSIZ#define BUFSIZ  4096		/* make reasonable guess */#endif#endif#if defined(SYSTYPE_SYSV) || defined(__svr4__) || defined(SYSTYPE_SVR4)#define _HARVEST_SYSV_#else#define _HARVEST_BSD_#endif/* define the _HARVEST_TYPE_ based on a guess of the OS */#if defined(__sun__)                            /* SUN */#define _HARVEST_SUN_#if defined(_HARVEST_SYSV_)                     /* SOLARIS */#define _HARVEST_SOLARIS_#else                                           /* SUNOS */#define _HARVEST_SUNOS_#endif#elif defined(__hpux)                           /* HP-UX - SysV-like? */#define _HARVEST_HPUX_#define _HARVEST_SYSV_#elif defined(__osf__)                          /* OSF/1 */#define _HARVEST_OSF_#elif defined(__ultrix)                         /* Ultrix */#define _HARVEST_ULTRIX_#elif defined(_AIX)                             /* AIX */#define _HARVEST_AIX_#elif defined(__linux__)                        /* Linux */#define _HARVEST_LINUX_#elif defined(__FreeBSD__)                      /* FreeBSD */#define _HARVEST_FREEBSD_#elif defined(__sgi__)                          /* SGI */#define _HARVEST_SGI_#elif defined(__CYGWIN__)#define _HARVEST_CYGWIN_                         /* Cygwin under Windows */#endif#endif /* _CONFIG_H_ */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -