⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gid.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
static char rcsid[] = "$Id: gid.c,v 2.1 1997/03/21 17:19:58 sxw Exp $";/*  *  gid.c -- Gatherer Id mgmt *   *  DEBUG:  section  79, level 1, 5, 9	Broker gatherer-id management *  AUTHOR: Harvest derived (Darren Hardy) * *  Harvest Indexer http://www.tardis.ed.ac.uk/harvest/ *  --------------------------------------------------- * *  The Harvest Indexer is a continued development of code developed by *  the Harvest Project. Development is carried out by numerous individuals *  in the Internet community, and is not officially connected with the *  original Harvest Project or its funding sources. *  *  Please mail harvest@tardis.ed.ac.uk if you are interested in participating *  in the development effort. * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. *   *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. *   *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//*  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. *   *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): *   *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. *   *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. *   *  TERMS OF USE *     *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. *     *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. *   *  DERIVATIVE WORKS *   *    Users may make derivative works from the Harvest software, subject  *    to the following constraints: *   *      - You must include the above copyright notice and these  *        accompanying paragraphs in all forms of derivative works,  *        and any documentation and other materials related to such  *        distribution and use acknowledge that the software was  *        developed at the above institutions. *   *      - You must notify IRTF-RD regarding your distribution of  *        the derivative work. *   *      - You must clearly notify users that your are distributing  *        a modified version and not the original Harvest software. *   *      - Any derivative product is also subject to these copyright  *        and use restrictions. *   *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. *   *  HISTORY OF FREE SOFTWARE STATUS *   *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards.   *   */#include "broker.h"#include "log.h"/*********************************************************************** 		Registry GathererID support ***********************************************************************//* *  The GathererID support allows fast, efficient internal representation *  of GathererIDs.  Rather than each in-memory Registry entry *  containing the full text of the GathererID, it instead contains *  a GathererID number which is used in the GathererID cache. *//* Global variables */GathererID *gid_cache[MAX_GATHERER_ID];/* Local variables */static int ngid_cache;		/* Number of valid GathererIDs in the cache *//* Local functions */static int RG_gid_cache_add();static GathererID *RG_gid_cache_lookup_bynum();static GathererID *RG_gid_cache_lookup();static GathererID *RG_gid_cache_lookup3();/* *  RG_gid_init - Initializes the GathererID cache */void RG_gid_init(){	Debug(79,1,("RG_gid_init: Initializing the Gatherer ID mgmt\n"));	memset(gid_cache, '\0', sizeof(GathererID *) * MAX_GATHERER_ID);	ngid_cache = 0;}/* *  RG_gid_destroy - Frees the GathererID cache */void RG_gid_destroy(){	int i;	Debug(79,1,("RG_gid_free: Destroying the Gatherer ID mgmt\n"));	for (i = 0; i < MAX_GATHERER_ID; i++) {		RG_gid_free(gid_cache[i]);		gid_cache[i] = NULL;	}	ngid_cache = 0;}/* *  RG_gid_register - Registers the given GathererID with the cache. *  Returns the GID identifier. * *  The Collector calls this function to assign a GID identifier for *  the Registry entry. */int RG_gid_register(gid)GathererID *gid;{	if (gid == NULL) 		return -1;	Debug(79,5,("RG_gid_register: Registering %p\n", gid));	return(RG_gid_cache_add(gid));}/* *  RG_gid - Returns the GathererID for the given GID identifier. * *  Any module can call this to retrieve the (read-only) text associated  *  with the GID identifier. */GathererID *RG_gid(i)int i;{	Debug(79,5,("RG_gid: Accessing %d\n", i));	return(RG_gid_cache_lookup_bynum(i));}/* *  RG_gid_new - Generates a new GathererID with the given information. */GathererID *RG_gid_new(gn, gh, gv)char *gn, *gh, *gv;{	static GathererID *p;	p = (GathererID *) xmalloc(sizeof(GathererID));	p->gn = xstrdup(gn);	p->gns = strlen(p->gn);	p->gh = xstrdup(gh);	p->ghs = strlen(p->gh);	p->gv = xstrdup(gv);	p->gvs = strlen(p->gv);	p->GID = -1;	Debug(79,9,("RG_gid_new: Created %p: %s %s %s\n", p, gn, gh, gv));	return(p);}/* *  RG_gid_free - Destroys the given GathererID */void RG_gid_free(p)GathererID *p;{	if (p != NULL) {		Debug(79,9,("RG_gid_free: Destroying %p %d\n", p, p->GID));		if (p->gn != NULL) xfree(p->gn);		if (p->gh != NULL) xfree(p->gh);		if (p->gv != NULL) xfree(p->gv);		xfree(p);	}}/* *  RG_gid_cache_add() - Adds the given GID to the cache.  Returns the gid. */static int RG_gid_cache_add(gid)GathererID *gid;{	GathererID *p;	Debug(79,5,("RG_gid_cache_add: Trying %p %d\n", gid, gid->GID));	/* See if it's already in the cache? */	if ((p = RG_gid_cache_lookup(gid)) == NULL) {		Debug(79,5,("RG_gid_cache_add: New %p %d\n", gid, gid->GID));		if (ngid_cache >= MAX_GATHERER_ID) {			errorlog("Registry: Increase MAX_GATHERER_ID: %d\n", 				MAX_GATHERER_ID);			return ERROR;		}		gid_cache[ngid_cache] = RG_gid_new(gid->gn, gid->gh, gid->gv);		gid_cache[ngid_cache]->GID = ngid_cache;		p = gid_cache[ngid_cache];		Debug(79,9,("RG_gid_cache_add: Added %d %s %d %s %d %s %d\n",			p->GID, p->gn, p->gns, p->gh, p->ghs, p->gv, p->gvs));		LOGGID(p->GID, p->gn, p->gh, p->gv);		ngid_cache++;	}	Debug(79,5,("RG_gid_cache_add: Found %p %d\n", p, p->GID));	return(p->GID);}/* *  RG_gid_cache_lookup - Returns NULL if no matching Gid is found. *  Otherwise, returns the GathererID of the matching Gid. */static GathererID *RG_gid_cache_lookup3(gn, gh, gv)char *gn, *gh, *gv;{	int i, gns, ghs, gvs;	gns = strlen(gn);	ghs = strlen(gh);	gvs = strlen(gv);	for (i = 0; i < ngid_cache; i++) {		if (gid_cache[i]->gns != gns)			continue;		if (gid_cache[i]->ghs != ghs)			continue;		if (gid_cache[i]->gvs != gvs)			continue;		if (strcmp(gid_cache[i]->gn, gn) != 0)			continue;		if (strcmp(gid_cache[i]->gh, gh) != 0)			continue;		if (strcmp(gid_cache[i]->gv, gv) != 0)			continue;		return(gid_cache[i]);	/* match */	}	return(NULL);}/* *  RG_gid_cache_lookup_bygid - Returns NULL if no matching Gid is found. *  Otherwise, returns the GathererID of the matching Gid. */static GathererID *RG_gid_cache_lookup(p)GathererID *p;{	if (p == NULL)		return(NULL);	if (p->GID == -1)		return(RG_gid_cache_lookup3(p->gn, p->gh, p->gv));	return(RG_gid_cache_lookup_bynum(p->GID));}/* *  RG_gid_cache_lookup_bynum - Returns NULL if no matching Gid is found. *  Otherwise, returns the GathererID of the matching Gid. */static GathererID *RG_gid_cache_lookup_bynum(i)int i;{	if (i >= 0 && i < ngid_cache)		return(gid_cache[i]);	return(NULL);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -