📄 fileenum.c
字号:
static char rcsid[] = "$Id: fileenum.c,v 2.1 1997/03/21 19:21:29 sxw Exp $";/* * fileenum.c - Enumerates file: RootNode URLs into LeafNode URLs * * Usage: fileenum [-n] pathname * -n == don't expand directory name using chdir/getwd * * Outputs the following format: * * URL of tree root * URL <tab> last-modification-time * ... * URL <tab> last-modification-time * * Jim Guyton, University of Colorado, Boulder, May 1994 * Duane Wessels, University of Colorado, Boulder, Sept 1995 * * DEBUG: section 46, level 1 Gatherer enumeration for file:// URLs * AUTHOR: Harvest derived * * Harvest Indexer http://www.tardis.ed.ac.uk/harvest/ * --------------------------------------------------- * * The Harvest Indexer is a continued development of code developed by * the Harvest Project. Development is carried out by numerous individuals * in the Internet community, and is not officially connected with the * original Harvest Project or its funding sources. * * Please mail harvest@tardis.ed.ac.uk if you are interested in participating * in the development effort. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <sys/types.h>#include <dirent.h>#include <signal.h>#include <sys/stat.h>#include <sys/socket.h>#include <sys/param.h>#include <netdb.h>#include "util.h"#include "url.h"#define PUBLIC extern#include "filter.h"/* Local variables */static char *hostname = NULL;static char *my_pgmname = NULL;static char tree_root[BUFSIZ];static int max_depth = 0;static int url_max = 0;static int nurls = 0;/* Local functions */static void sigdie();static void usage();static void doit();static void sigdie(){ exit(0);}static void usage(){ fprintf(stderr, "usage: %s File-URL\n", my_pgmname); exit(1);}static void doit(path, depth) char *path; int depth;{ int pathlen = 0; /* Length of source pathname */ struct stat statbuf; DIR *dirf = NULL; char *fullname = NULL; struct dirent *dp = NULL; int y = 0; fullname = xmalloc(MAXPATHLEN + 1); pathlen = strlen(path); if (max_depth > 0 && depth > max_depth) { Debug(46, 1, ("Maximum Depth of %d Reached: %s\n", max_depth, path)); xfree(fullname); return; } strcpy(fullname, path); if (fullname[pathlen - 1] != '/') { /* Add trailing / if needed */ strcat(fullname, "/"); pathlen++; } /* Do the enumeration */ if ((dirf = opendir(path)) == NULL) { log_errno2(__FILE__, __LINE__, path); xfree(fullname); return; } while ((dp = readdir(dirf)) != NULL) { if (strcmp(dp->d_name, ".") == 0) continue; if (strcmp(dp->d_name, "..") == 0) continue; fullname[pathlen] = '\0'; strcat(fullname, dp->d_name); /* Check to see if it passes the filter */ if ((y = filter_match(fullname, url_filter, nurl_filter))) { Debug(46, 1, ("Removing Candidate: [%s] %s\n", Filter_Type_Name[y], fullname)); continue; } if (stat(fullname, &statbuf) < 0) { log_errno2(__FILE__, __LINE__, fullname); continue; } if (S_ISREG(statbuf.st_mode)) { fprintf(stdout, "file://%s%s\t%lu\n", hostname, rfc1738_escape(fullname), statbuf.st_mtime); fflush(stdout); if (nurls++ >= url_max) { Log("Truncating RootNode %s at %d LeafNode URLs\n", tree_root, url_max); sigdie(); } } if ((statbuf.st_mode & S_IFMT) == S_IFDIR) { strcat(fullname, "/"); doit(fullname, depth + 1); } } closedir(dirf); xfree(fullname);}int main(argc, argv) int argc; char *argv[];{ char *startpath = NULL; char *s = NULL; FILE *logfp = NULL; URL *up = NULL; int cur_depth = 0;#ifdef USE_HOST_CACHE host_cache_init();#endif debug_init(); my_pgmname = xstrdup(argv[0]); while (argc > 1 && argv[1][0] == '-') { if (strncmp(argv[1], "-D", 2) == 0) { debug_flag(argv[1]); argv++; argc--; } else usage(); } if (argc != 2) usage(); signal(SIGTERM, sigdie); if (getenv("HARVEST_GATHERER_LOGFILE") != (char *) NULL) logfp = fopen(getenv("HARVEST_GATHERER_LOGFILE"), "a+"); if (logfp == (FILE *) NULL) logfp = stderr; init_log3(my_pgmname, logfp, stderr); Debug(46, 1, ("Running File Enumeration: %s\n", argv[1])); up = url_open(argv[1]); if (up == (URL *) NULL) { Log("Bad URL: %s\n", argv[1]); exit(1); } if (up->type != URL_FILE) { Log("Not a File URL: %s\n", argv[1]); exit(1); } hostname = xstrdup(up->host); /* argh. We should check here to see if the given host is the */ /* local machine. -DW */ max_depth = url_max = 0; if ((s = getenv("HARVEST_URL_MAX")) != NULL) url_max = atoi(s); if ((s = getenv("HARVEST_DEPTH_MAX")) != NULL) max_depth = atoi(s); if ((s = getenv("HARVEST_DEPTH_CUR")) != NULL) cur_depth = atoi(s); if (url_max < 1) url_max = 250; /* hard-coded maximum */ if (max_depth < 1) max_depth = 0; /* hard-coded maximum */ host_filterfile = NULL; url_filterfile = getenv("HARVEST_URL_FILTER"); filter_initialize(); startpath = up->pathname; /* first line is start of URL enumeration space */ sprintf(tree_root, "file://%s%s", hostname, rfc1738_escape(startpath)); fprintf(stdout, "%s\n", tree_root); fflush(stdout); doit(startpath, cur_depth); exit(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -