📄 db.c
字号:
static char rcsid[] = "db.c,v 1.43 1996/01/05 20:28:53 duane Exp";/* * db.c - Storage Manager for the Essence system * * DEBUG: section 61, level 1 Gatherer essence database routines * * Darren Hardy, hardy@cs.colorado.edu, February 1994 * * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <unistd.h>#include <sys/param.h>#include <time.h>#include <gdbm.h>#include "util.h"#include "url.h"#include "template.h"#include "essence.h"/* Local variables */static char dbfile[MAXPATHLEN + 1]; /* WORKING.gdbm */static char prodbfile[MAXPATHLEN + 1]; /* PRODUCTION.gdbm */static char indexfile[MAXPATHLEN + 1]; /* INDEX.gdbm */static char md5file[MAXPATHLEN + 1]; /* MD5.gdbm */static char reffile[MAXPATHLEN + 1]; /* REFRESH.gdbm */static GDBM_FILE dbf = NULL; /* WORKING.gdbm */static GDBM_FILE pdbf = NULL; /* PRODUCTION.gdbm */static GDBM_FILE idbf = NULL; /* INDEX.gdbm */static GDBM_FILE mdbf = NULL; /* MD5.gdbm */static GDBM_FILE rdbf = NULL; /* REFRESH.gdbm */static int ndeletions = 0; /* num of deletion operations */static int null_filter = 0; /* dbcheck is nop? */static int max_deletions;/* Local functions */static void dbcheck_refresh();static Buffer *soif_to_buffer();/* * init_db() - Initialize database routines. n is the number of deletion * operations allowed before reorganizing the GDBM database. If n == 0, * don't reorganize GDBM database. */void init_db(dbdir, n) char *dbdir; int n;{ max_deletions = n; ndeletions = 0; sprintf(dbfile, "%s/WORKING.gdbm", dbdir ? dbdir : topdir); sprintf(prodbfile, "%s/PRODUCTION.gdbm", dbdir ? dbdir : topdir); sprintf(indexfile, "%s/INDEX.gdbm", dbdir ? dbdir : topdir); sprintf(md5file, "%s/MD5.gdbm", dbdir ? dbdir : topdir); sprintf(reffile, "%s/REFRESH.gdbm", dbdir ? dbdir : topdir); /* Initialize WORKING.gdbm */ dbf = gdbm_open(dbfile, 0, GDBM_NEWDB, 0644, NULL); if (dbf == NULL) { /* Cannot run without the working db */ log_errno(dbfile); fatal("gdbm_open: %s: %s\n", dbfile, gdbm_strerror(gdbm_errno)); } pdbf = gdbm_open(prodbfile, 0, GDBM_READER, 0644, NULL); idbf = gdbm_open(indexfile, 0, GDBM_READER, 0644, NULL); mdbf = gdbm_open(md5file, 0, GDBM_READER, 0644, NULL); rdbf = NULL; if (pdbf == NULL || idbf == NULL || mdbf == NULL) { /* Act as a nop filter */ Log("WARNING: Incremental Gatherering will NOT be supported on this run.\n"); Log("\tunable to locate these database(s) needed for incremental gatherering:\n"); if (pdbf == NULL) Log("\t%s\n", prodbfile); if (idbf == NULL) Log("\t%s\n", indexfile); if (mdbf == NULL) Log("\t%s\n", md5file); null_filter = 1; } if (null_filter) return; /* We don't need the refresh database if we have a null filter */ rdbf = gdbm_open(reffile, 0, GDBM_NEWDB, 0644, NULL); if (rdbf == NULL) { Log("WARNING: gdbm_open: %s: %s\n", reffile, gdbm_strerror(gdbm_errno)); log_errno(reffile); }}/* * finish_db() - Cleaned up after database routines. */void finish_db(){ if (dbf == NULL) return; gdbm_sync(dbf); /* sync to disk */#ifdef GDBM_GROWTH_BUG if (ndeletions > 0 && gdbm_reorganize(dbf)) Log("WARNING: gdbm_reorganize: %s: %s\n", dbfile, gdbm_strerror(gdbm_errno));#endif ndeletions = 0; if (dbf != NULL) { gdbm_close(dbf); dbf = NULL; } if (pdbf != NULL) { gdbm_close(pdbf); pdbf = NULL; } if (idbf != NULL) { gdbm_close(idbf); idbf = NULL; } if (mdbf != NULL) { gdbm_close(mdbf); mdbf = NULL; } if (rdbf != NULL) { gdbm_close(rdbf); rdbf = NULL; }}/* * duplicate_url() - Returns non-zero if the URL is already in the * database; zero otherwise. */int duplicate_url(url) char *url;{ datum k; int r; k.dptr = url; k.dsize = strlen(url) + 1; r = gdbm_exists(dbf, k); return (r);}/* * duplicate_url() - Returns non-zero if the URL is already in any of * the databases (WORKING or PRODUCTION); zero otherwise. */int duplicate_url_any(url) char *url;{ datum k; int r; k.dptr = url; k.dsize = strlen(url) + 1; r = gdbm_exists(dbf, k); if (r == 0 && pdbf != NULL) r = gdbm_exists(pdbf, k); return (r);}/* * add_template() - Adds the template to the database. If should_append * is non-zero, then the template is appended to any existing * template data for the URL. */void add_template(template, object) Template *template; DataObject *object;{ datum k, d; Buffer *b = NULL; Template *ct = NULL; int appending = 0; Debug(61, 1, ("add_template(%s)\n", template->url)); /* Set the key */ k.dptr = strdup(template->url); k.dsize = strlen(k.dptr) + 1; /* store terminating null char, too */ if (gdbm_exists(dbf, k)) { datum curd; /* If a template already exists, then check nested file. */ if ((object->flags & F_NESTED) == 0) { errorlog("Existing GDBM Entry for non-nested %s\n", template->url); xfree(k.dptr); return; } /* Grab the existing template and parse it into a Template */ curd = gdbm_fetch(dbf, k); init_parse_template_string(curd.dptr, curd.dsize); ct = parse_template(); finish_parse_template(); free(curd.dptr); /* Verify that the template was parsable */ if (ct == NULL) { errorlog("Template for %s in %s is malformed.\n", k.dptr, dbfile); xfree(k.dptr); return; } /* Embed the current template within old template. */ if (embed_template(template, ct) == NULL) { errorlog("add_template: Failed to embed template: %s\n",
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -