📄 prepurls.c
字号:
static char rcsid[] = "$Id: prepurls.c,v 2.2 1997/08/27 18:09:34 sxw Exp $";/* * prepurls.c - Prepares URLs using an enumerator (for Root node) * and a URL-stat (for Leaf nodes). * * Usage: prepurls [--root cmd] [--leaf cmd] * * The cmds for root and leaf take URLs as stdin and output URLs + stamp. * * Input: * ROOT\tURL Opt1 ... OptN * ... * LEAF\tURL * * Output: * URL MD5:xxxx * ... * URL Last-Modification-Time:xxxx * * * DEBUG: none * AUTHOR: Harvest derived * * Harvest Indexer http://www.tardis.ed.ac.uk/harvest/ * --------------------------------------------------- * * The Harvest Indexer is a continued development of code developed by * the Harvest Project. Development is carried out by numerous individuals * in the Internet community, and is not officially connected with the * original Harvest Project or its funding sources. * * Please mail harvest@tardis.ed.ac.uk if you are interested in participating * in the development effort. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "util.h"static char *rootcmd = "enum";static char *leafcmd = "staturl";static FILE *rootnodes = NULL;static FILE *leafnodes = NULL;static void usage(){ fprintf(stderr, "Usage: prepurls [--root cmd] [--leaf cmd]\n"); exit(1);}int main(argc, argv) int argc; char *argv[];{ char buf[BUFSIZ], *s; int rpid, lpid, cpid, rootpipe[2], leafpipe[2], ncaught = 0; FILE *logfp = NULL; int leafSeen; if (getenv("HARVEST_GATHERER_LOGFILE") != (char *) NULL) logfp = fopen(getenv("HARVEST_GATHERER_LOGFILE"), "a+"); if (logfp == (FILE *) NULL) logfp = stderr; init_log3("prepurls", logfp, stderr); debug_init(); /* Parse the command line */ while (--argc > 0) { ++argv; if (!strcmp(*argv, "--root")) { if (--argc < 1) usage(); rootcmd = strdup(*++argv); } else if (!strcmp(*argv, "--leaf")) { if (--argc < 1) usage(); leafcmd = strdup(*++argv); } } if (!rootcmd || !leafcmd) usage(); /* Start up the root node processor */ if (pipe(rootpipe) < 0) { log_errno("pipe"); exit(1); } if ((rpid = fork()) < 0) { log_errno("fork"); exit(1); } if (rpid != 0) { /* parent */ close(rootpipe[0]); } else { /* child */ char *argv[64]; dup2(rootpipe[0], 0); close(rootpipe[1]); memset(argv, '\0', sizeof(argv)); parse_argv(argv, rootcmd); execvp(argv[0], argv); log_errno("execvp"); _exit(1); } if ((rootnodes = fdopen(rootpipe[1], "w")) == NULL) { log_errno("fdopen"); exit(1); } /* * Process the root nodes. */ leafSeen = 0; while (fgets(buf, BUFSIZ, stdin)) { if (!strncmp(buf, "ROOT", strlen("ROOT"))) { s = buf + strlen("ROOT"); while (isspace(*s)) s++; fprintf(rootnodes, "%s", s); fflush(rootnodes);#ifdef DEBUG Log("Passing Root: %s\n", s);#endif } else if (!strncmp(buf, "LEAF", strlen("LEAF"))) { leafSeen = 1; break; } else if (buf[0] == '#') { /* * Pass comments */ fprintf(rootnodes, "%s", buf); } else { Log("Illegal Input: %s\n", buf); } } fclose(rootnodes); close(rootpipe[1]); ncaught = 0; while (ncaught < 1) { cpid = wait(NULL); if (cpid == rpid) ncaught++; } /* Start up the leaf node processor */ if (pipe(leafpipe) < 0) { log_errno("pipe"); exit(1); } if ((lpid = fork()) < 0) { log_errno("fork"); exit(1); } if (lpid != 0) { /* parent */ close(leafpipe[0]); } else { /* child */ char *argv[64]; dup2(leafpipe[0], 0); close(leafpipe[1]); memset(argv, '\0', sizeof(argv)); parse_argv(argv, leafcmd); execvp(argv[0], argv); log_errno("execvp"); _exit(1); } if ((leafnodes = fdopen(leafpipe[1], "w")) == NULL) { log_errno("fdopen"); exit(1); } /* * Process the leafnodes */ while (leafSeen || fgets(buf, BUFSIZ, stdin)) { leafSeen = 0; if (!strncmp(buf, "LEAF", strlen("LEAF"))) { s = buf + strlen("LEAF"); while (isspace(*s)) s++; fprintf(leafnodes, "%s", s); fflush(leafnodes);#ifdef DEBUG Log("Passing Leaf: %s\n", s);#endif } else if (buf[0] == '#') { /* * Pass comments */ fprintf(leafnodes, "%s", buf); } else { Log("Illegal Input: %s\n", buf); } } fclose(leafnodes); close(leafpipe[1]); ncaught = 0; while (ncaught < 1) { lpid = wait(NULL); if (cpid == rpid) ncaught++; } exit(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -