⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mergedb.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
static char rcsid[] = "mergedb.c,v 1.20 1996/01/04 03:46:44 duane Exp";/* *  mergedb.c - Merge Automatic and Manual version of Essence database. * *  Usage: mergedb production automatic manual [manual ...] * *  This program allows administrators to manually add information about *  objects (URLs) to the Essence database.  In order to preserve all of *  the automatically generated information about objects, the manually- *  and automatically-generated information are merged into a new, *  production database.  All manually-generated information overrides *  any automatically-generated information. * *  Darren Hardy, hardy@cs.colorado.edu, March 1994 * *  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. * *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): * *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. * *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. * *  TERMS OF USE * *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. * *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. * *  DERIVATIVE WORKS * *    Users may make derivative works from the Harvest software, subject *    to the following constraints: * *      - You must include the above copyright notice and these *        accompanying paragraphs in all forms of derivative works, *        and any documentation and other materials related to such *        distribution and use acknowledge that the software was *        developed at the above institutions. * *      - You must notify IRTF-RD regarding your distribution of *        the derivative work. * *      - You must clearly notify users that your are distributing *        a modified version and not the original Harvest software. * *      - Any derivative product is also subject to these copyright *        and use restrictions. * *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. * *  HISTORY OF FREE SOFTWARE STATUS * *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <gdbm.h>#include "util.h"#include "template.h"/* Local functions */static void usage();static void mergedb();/* Local variables */static int nreplace = 0;static void usage(){	fprintf(stderr, "Usage: mergedb production automatic manual [manual ...]\n");	exit(1);}static void process_manual_template(proddb, data, key)     GDBM_FILE proddb;     datum data, key;{	Template *mt;	datum k;	init_parse_template_string(data.dptr, data.dsize);	if ((mt = parse_template()) == NULL) {		errorlog("Manual SOIF object is corrupt: %s\n", key.dptr);		exit(1);	}	finish_parse_template();	k.dptr = strdup(mt->url);	k.dsize = strlen(k.dptr) + 1;	if (gdbm_store(proddb, k, data, GDBM_REPLACE))		errorlog("gdbm_store: %s: %s\n", k.dptr, gdbm_strerror(gdbm_errno));	free_template(mt);	free(k.dptr);}static void add_manual_to_production(filename, proddb)     char *filename;     GDBM_FILE proddb;{	GDBM_FILE dbf;	datum k, nk, d;	dbf = gdbm_open(filename, 0, GDBM_READER, 0644, NULL);	if (dbf == NULL) {		errorlog("gdbm_open: %s: %s\n", filename, gdbm_strerror(gdbm_errno));		return;	}	k = gdbm_firstkey(dbf);	while (k.dptr) {		nk = gdbm_nextkey(dbf, k);		d = gdbm_fetch(dbf, k);		process_manual_template(proddb, d, k);		free(k.dptr);		free(d.dptr);		k = nk;	}	gdbm_close(dbf);}/* *  merge_auto_data() - Converts ad and pd to templates, then merges the *  pd template into the ad template, then replaces the data for the *  proddb with the newly constructed template. */static void merge_auto_data(proddb, k, ad, pd)     GDBM_FILE proddb;     datum k, ad, pd;{	Template *at, *pt;	Buffer *b;	datum d;	init_parse_template_string(ad.dptr, ad.dsize);	at = parse_template();	finish_parse_template();	init_parse_template_string(pd.dptr, pd.dsize);	pt = parse_template();	finish_parse_template();	if (at == NULL || pt == NULL)		return;	merge_AVList(at->list, pt->list);	b = init_print_template(NULL);	print_template(at);	d.dptr = b->data;	d.dsize = b->length;	gdbm_store(proddb, k, d, GDBM_REPLACE);	nreplace++;	finish_print_template();}/* *  merge_auto() - Merge the automatic information into the production *  database. */static void merge_auto(autodb, proddb)     GDBM_FILE autodb;     GDBM_FILE proddb;{	datum ad, pd, k, nk;	if (autodb == NULL)		return;	k = gdbm_firstkey(autodb);	while (k.dptr) {		nk = gdbm_nextkey(autodb, k);		ad = gdbm_fetch(autodb, k);		if (gdbm_exists(proddb, k)) {			pd = gdbm_fetch(proddb, k);			merge_auto_data(proddb, k, ad, pd);			free(pd.dptr);		} else {			gdbm_store(proddb, k, ad, GDBM_INSERT);		}		free(k.dptr);		free(ad.dptr);		k = nk;	}}/* *  To merge N manual databases with one automatic databases, the N *  manual databases are consolidated into the production database. *  (Entire Templates are overwritten if they were present in an earlier *  manual database).  Then each automatic template is merged into *  the production database. */int main(argc, argv)     int argc;     char *argv[];{	GDBM_FILE proddb, autodb;	int value = 256;	if (argc < 4)		usage();	init_log3("mergedb", stdout, stderr);	proddb = gdbm_open(*++argv, 0, GDBM_NEWDB, 0644, NULL);	if (proddb == NULL) {		errorlog("gdbm_open: %s: %s\n", *argv, gdbm_strerror(gdbm_errno));		log_errno(*argv);		exit(1);	}	gdbm_setopt(proddb, GDBM_CACHESIZE, &value, sizeof(int));	autodb = gdbm_open(*++argv, 0, GDBM_READER, 0644, NULL);	if (autodb == NULL && gdbm_errno != GDBM_EMPTY_DATABASE) {		errorlog("gdbm_open: %s: %s\n", *argv, gdbm_strerror(gdbm_errno));		gdbm_close(proddb);		exit(1);	}	argc -= 2;	while (--argc > 0) {		add_manual_to_production(*++argv, proddb);	}	if (autodb != NULL) {		merge_auto(autodb, proddb);		gdbm_close(autodb);	}	if (nreplace > 64) {		gdbm_sync(proddb);		gdbm_reorganize(proddb);	}	gdbm_close(proddb);	exit(0);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -