📄 cleandb.c
字号:
static char rcsid[] = "cleandb.c,v 1.22 1996/01/17 10:07:46 duane Exp";/* * cleandb - Cleans up a GDBM database to prepare it for production use. * Verifies that each SOIF template is legal, verifies that each * SOIF template is printed with the libtemplate routine, and verifies * that each SOIF object contains an 'Update-Time', and 'Gatherer-*' attr. * * Usage: cleandb [-truncate] file * * Darren Hardy, hardy@cs.colorado.edu, May 1994 * * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <string.h>#include <ctype.h>#include <time.h>#include <gdbm.h>#include "util.h"#include "template.h"/* * MAX_BYTES - Maximum number of bytes allowed in a field during -truncate. */#ifndef MAX_BYTES#define MAX_BYTES (8 * 1024)#endif/* Local functions */static void usage();static void check_template();static int do_truncate = 0;static void usage(){ fprintf(stderr, "Usage: cleandb [-truncate] db\n"); exit(1);}static void remove_keywords(t) Template *t;{ AVList *walker = t->list; /* Remove any keyword data that's longer than MAX_BYTES */ while (walker) { if (strstr(walker->data->attribute, "eyword")) { if (walker->data->vsize > MAX_BYTES) { Log("Trimmed %d bytes from %s attribute (%s)\n", walker->data->vsize - MAX_BYTES, t->url, walker->data->attribute); walker->data->vsize = MAX_BYTES; } } walker = walker->next; }}static void check_template(dbf, k, d) GDBM_FILE dbf; datum k; datum d;{ Template *t = NULL; Buffer *b = NULL; datum nd; /* Parse the template to ensure correctness */ init_parse_template_string(d.dptr, d.dsize); t = parse_template(); finish_parse_template(); if (t == NULL) { /* Unparsable; delete it */ Log("Deleting invalid SOIF: Unparsable: %s\n", k.dptr); gdbm_delete(dbf, k); xfree(k.dptr); xfree(d.dptr); return; } if (extract_AVPair(t->list, T_UPDATE) == NULL) { Log("Deleting invalid SOIF: No %s: %s\n", T_UPDATE, k.dptr); gdbm_delete(dbf, k); xfree(k.dptr); xfree(d.dptr); free_template(t); return; } if (extract_AVPair(t->list, T_GHOST) == NULL) { Log("Deleting invalid SOIF: No %s: %s\n", T_GHOST, k.dptr); gdbm_delete(dbf, k); xfree(k.dptr); xfree(d.dptr); free_template(t); return; } if (extract_AVPair(t->list, T_GNAME) == NULL) { Log("Deleting invalid SOIF: No %s: %s\n", T_GNAME, k.dptr); gdbm_delete(dbf, k); xfree(k.dptr); xfree(d.dptr); free_template(t); return; } if (extract_AVPair(t->list, T_GVERSION) == NULL) { Log("Deleting invalid SOIF: No %s: %s\n", T_GVERSION, k.dptr); gdbm_delete(dbf, k); xfree(k.dptr); xfree(d.dptr); free_template(t); return; } if (do_truncate) remove_keywords(t); /* Verify that the stored data is the same as the parsed template */ b = init_print_template(NULL); print_template(t); nd.dptr = b->data; nd.dsize = b->length; if (d.dsize != nd.dsize) /* Different templates, replace */ (void) gdbm_store(dbf, k, nd, GDBM_REPLACE); /* Clean up */ xfree(k.dptr); xfree(d.dptr); finish_print_template(); free_template(t);}int main(argc, argv) int argc; char *argv[];{ GDBM_FILE dbf; datum d, k, nk; init_log3("cleandb", stdout, stderr); if (argc > 1 && !strcmp(argv[1], "-truncate")) { argc--; argv++; do_truncate = 1; } if (argc != 2) usage(); dbf = gdbm_open(argv[1], 0, GDBM_WRITER, 0644, NULL); if (dbf == NULL) { errorlog("gdbm_open: %s: %s\n", argv[1], gdbm_strerror(gdbm_errno)); log_errno(argv[1]); usage(); } k = gdbm_firstkey(dbf); while (k.dptr) { nk = gdbm_nextkey(dbf, k); d = gdbm_fetch(dbf, k); check_template(dbf, k, d); k = nk; } gdbm_close(dbf); exit(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -