📄 candidate.c
字号:
static char rcsid[] = "candidate.c,v 1.19 1996/01/05 20:28:52 duane Exp";/* * candidate.c - Candidate Selection for the Essence system. * * Darren Hardy, hardy@cs.colorado.edu, February 1994 * * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <string.h>#include <stdlib.h>#include "util.h"#include "url.h"#include "essence.h"static char *stoptypes[MAX_TYPES];static char *allowtypes[MAX_TYPES];/* * init_stoplist() - Initializes candidate selection step */void init_stoplist(){ FILE *fp; int i; char buf[BUFSIZ], *s; /* Grab the allow list */ i = 0; if (stoplist != NULL && (fp = fopen(stoplist, "r")) != NULL) { while (fgets(buf, BUFSIZ, fp)) { if (buf[0] == '#') continue; s = strtok(buf, " \t\n"); if (s != NULL) stoptypes[i++] = strdup(s); } fclose(fp); } for (; i < MAX_TYPES; i++) stoptypes[i] = NULL; /* Grab the allow list */ i = 0; if (allowlist != NULL && (fp = fopen(allowlist, "r")) != NULL) { while (fgets(buf, BUFSIZ, fp)) { if (buf[0] == '#') continue; s = strtok(buf, " \t\n"); if (s != NULL) allowtypes[i++] = strdup(s); } fclose(fp); } for (; i < MAX_TYPES; i++) allowtypes[i] = NULL;#ifdef NO_UNIX_RECURSE /* Add Directory by hand */ for (i = 0; i < MAX_TYPES && stoptypes[i]; i++) if (!strcmp(stoptypes[i], "Directory")) break; if (stoptypes[i] == NULL) stoptypes[i] = strdup("Directory");#endif}/* * finish_stoplist() - Cleans up after candidate selection step */void finish_stoplist(){ int i; for (i = 0; i < MAX_TYPES; i++) { if (stoptypes[i]) xfree(stoptypes[i]); if (allowtypes[i]) xfree(allowtypes[i]); }}/* * allow_bytype() - Candidate selection on an object determined by * its type. Only allows objects with matching types. Returns non-zero * if the object should be a candidate; returns zero otherwise. */int allow_bytype(object) DataObject *object;{ int i; if (!object || !object->type) return (0); for (i = 0; allowtypes[i] != NULL && i < MAX_TYPES; i++) { if (!strcmp(allowtypes[i], object->type)) return (1); } return (0);}/* * stop_bytype() - Candidate selection on an object determined by * its type. Returns non-zero if the object should be not be a * candidate; returns zero otherwise. */int stop_bytype(object) DataObject *object;{ int i; if (!object || !object->type) return (0); for (i = 0; stoptypes[i] != NULL && i < MAX_TYPES; i++) { if (!strcmp(stoptypes[i], object->type)) return (1); } return (0);}/* * stop_byname() - Candidate selection on an object determined by * its name. Returns non-zero if the object should be not be a * candidate; returns zero otherwise. */int stop_byname(object) DataObject *object;{ return (0);}/* * stop_byduplicate() - Candidate selection on an object determined by * a duplicate in the database. A duplicate need not be an exact match; * it could be another version of the object (like the compressed * version). Returns non-zero if the object should not be a candidate; * returns zero otherwise. */int stop_byduplicate(object) DataObject *object;{ char *s, *q, buf[BUFSIZ]; int r; /* * If the object is not nested, then check to see if it's in db */ if ((object->flags & F_NESTED) == 0) { r = duplicate_url(object->url->url); if (r) return (r); } /* * If the object is compressed then check to see if the * uncompressed version has already been done. */ if (!strcmp(object->type, "BZIP2Compressed") || !strcmp(object->type, "Compressed") || !strcmp(object->type, "GNUCompressed") || !strcmp(object->type, "BZIP2CompressedTar") || !strcmp(object->type, "CompressedTar") || !strcmp(object->type, "GNUCompressedTar")) { s = strdup(object->url->url); if ((q = strrchr(s, '.')) == NULL) { /* strip .Z, .bz2, .gz, etc */ xfree(s); return (0); } *q = '\0'; r = duplicate_url_any(s); xfree(s); return (r); } /* * Now check to see if the compressed version was already in the * database. */ sprintf(buf, "%s.Z", object->url->url); r = duplicate_url_any(buf); if (r) return (r); sprintf(buf, "%s.gz", object->url->url); r = duplicate_url_any(buf); if (r) return (r); /* * If we have a PostScript file, prefer the Dvi or Text version. * This is a hack and doesn't work in all cases. For example, * won't remove .ps.Z + .dvi.Z. */ if (!strcmp(object->type, "PostScript")) { s = strdup(object->url->url); if ((q = strrchr(s, '.')) == NULL) { /* strip .ps */ xfree(s); return (0); } *q = '\0'; sprintf(buf, "%s.dvi", s); /* use DVI instead */ r = duplicate_url_any(buf); sprintf(buf, "%s.txt", s); /* use Text instead */ xfree(s); if (r) return (r); r = duplicate_url_any(buf); if (r) return (r); } return (r);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -