⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dumpregistry.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
static char rcsid[] = "$Id: dumpregistry.c,v 2.1 1997/03/21 17:19:54 sxw Exp $";/* *  dumpregistry.c - Prints out the Broker's registry file. *  Assumes that the Broker's admin directory is in the current directory. * *  Usage:  dumpregistry [-count] [BrokerDirectory] * *  DEBUG: none *  AUTHOR: Harvest derived (Darren Hardy) * *  Harvest Indexer http://harvest.sourceforge.net/ *  ----------------------------------------------- * *  The Harvest Indexer is a continued development of code developed by *  the Harvest Project. Development is carried out by numerous individuals *  in the Internet community, and is not officially connected with the *  original Harvest Project or its funding sources. * *  Please mail lee@arco.de if you are interested in participating *  in the development effort. * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. * *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//*  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. * *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): * *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. * *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. * *  TERMS OF USE * *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. * *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. * *  DERIVATIVE WORKS * *    Users may make derivative works from the Harvest software, subject *    to the following constraints: * *      - You must include the above copyright notice and these *        accompanying paragraphs in all forms of derivative works, *        and any documentation and other materials related to such *        distribution and use acknowledge that the software was *        developed at the above institutions. * *      - You must notify IRTF-RD regarding your distribution of *        the derivative work. * *      - You must clearly notify users that your are distributing *        a modified version and not the original Harvest software. * *      - Any derivative product is also subject to these copyright *        and use restrictions. * *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. * *  HISTORY OF FREE SOFTWARE STATUS * *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards. * */#include "broker.h"#include <locale.h>static int docount = 0;char *DIRpath = ".";time_t Cur_Time;struct indexing_routines *INDEXER = NULL;	/* dummy */static int do_RG_Free_Entry();static void usage(){	fprintf(stderr, "Usage:  dumpregistry [-count] [BrokerDirectory]\n");	exit(1);}void print_reg_t(i, rt)int i;reg_t *rt;{	GathererID *gid;	if (docount)		return;	gid = RG_gid(rt->GID);	if (gid == NULL)		printf("ERROR: GID is NULL\n");	printf("---------------- %5d --------------\n", i);	printf("OID: %d\n", rt->FD);	printf("URL: %s\n", rt->url);	printf("Description: %s\n", rt->desc);	printf("Gatherer-Host: %s\n", gid->gh);	printf("Gatherer-Name: %s\n", gid->gn);	printf("Gatherer-Version: %s\n", gid->gv);	printf("Last-Modification-Time: %d\n", rt->lmt);	printf("MD5: %s\n", rt->md5 ? rt->md5 : "");	printf("Refresh-Rate: %d\n", rt->refresh_rate);	printf("Time-To-Live: %d\n", rt->ttl);	printf("Update-Time: %d\n", rt->update_time);}int main(argc, argv)int argc;char *argv[];{	int i, j, k;	reg_t *t;	REGISTRY_HEADER *rhdr;	(void) setlocale (LC_ALL, "");	Cur_Time = time(NULL);	if (argc > 1 && argv[1] != NULL && !strcmp(argv[1], "-count")) {		docount = 1;		argc--;		argv++;	}	if (argc > 1 && argv[1] != NULL) {		DIRpath = xstrdup(argv[1]);	}	if (init_registry_file() != SUCCESS) {		fprintf(stderr, "dumpregistry: Cannot open the Registry.\n");		usage();	}	if ((rhdr = read_header()) == NULL) {		fprintf(stderr, "dumpregistry: Bad Registry header.\n");		exit(1);	}	for (i = j = k = 0; TRUE; i++) {		t = xmalloc(sizeof(reg_t));		memset(t, '\0', sizeof(reg_t));		switch (get_record(t)) {		case ERROR:			do_RG_Free_Entry(t);			fprintf(stderr, "dumpregistry: Cannot read record number %d, aborting dump.\n", i);			finish_registry_file();			exit(1);		case ENTRY_DELETED:			do_RG_Free_Entry(t);			if (!docount) {				printf("------------- %5d -----------\n", i);				printf("ENTRY DELETED.\n");			}			j++;			break;		case REGISTRY_EOF:			do_RG_Free_Entry(t);			printf("----------------------------------------\n");			printf("SUMMARY\n");			printf("ACTUAL ENTRIES: %d, %d\n", i, rhdr->nrecords);			printf("DELETED ENTRIES: %d, %d\n", j, rhdr->nrecords_deleted);			printf("VALID ENTRIES: %d, %d\n", k, rhdr->nrecords_valid);			finish_registry_file();			exit(0);		case SUCCESS:			print_reg_t(i, t);			do_RG_Free_Entry(t);			k++;			break;		default:			fprintf(stderr, "dumpregistry: Unexpected error encountered, contact developers.\n");			do_RG_Free_Entry(t);			exit(2);		}	}	exit(0);}static int do_RG_Free_Entry(tmp)reg_t *tmp;{	if (tmp) {		if (tmp->url)			xfree(tmp->url);		if (tmp->md5)			xfree(tmp->md5);		if (tmp->desc)			xfree(tmp->desc);		xfree(tmp);	}	return SUCCESS;}int LOG_statlog(){	/* dummy */	return SUCCESS;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -