📄 unnest.c
字号:
static char rcsid[] = "unnest.c,v 1.55 1996/03/26 04:12:12 wessels Exp";/* * unnest.c - Presentation Unnesting for the Essence system. * * DEBUG: section 65, level 1 Gatherer essence object unnesting * * Darren Hardy, hardy@cs.colorado.edu, February 1994 * * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <string.h>#include <memory.h>#include <errno.h>#include <sys/param.h>#include <sys/types.h>#include <fcntl.h>#include <dirent.h>#include <time.h>#include "util.h"#include "essence.h"/* Local functions */static int default_extract(); /* extraction routines */static int extract_BZIP2Compressed();static int extract_BZIP2CompressedTar();static int extract_Compressed();static int extract_CompressedTar();static int extract_GNUCompressed();static int extract_GNUCompressedTar();static int extract_ShellArchive();static int extract_Tar();static int extract_Uuencoded();static int extract_PCZipped();static int extract_MacBinHex();static char *filename_to_url();/* recursion routines */static DataObjectList *recurse_unix_directory();static DataObjectList *recurse_external();static int mk_timestamp();/* Local variables */static char extracted_filename[MAXPATHLEN + 1];static char unnestdir[MAXPATHLEN + 1];static time_t timestamp;/* * README for adding new Presentation Unnesting routines: * * Methods for unnesting various types. To add unnesting for a new type, * write a new extraction routine and add (type, proc) to this table. * * If extract is NULL, then the nested type is treated as an exploder. * The program type.unnest is run with the URL as the first argument, * and the filename that contains the data as the second argument. * The program generates a stream of SOIF templates that contain * manual information about the data. To add a new exploder, add * the type to this table, and set the extract field to NULL, then * write a SOIF generated and install it as type.unnest. */struct nested_type { char *type; /* Nested file type */ int (*extract) (); /* how to extract the data */ int do_timestamp; /* timestamp to determine extracted files */} nested_types[] = {/* normal unnesting that generates new objects to process */ { "BZIP2Compressed", extract_BZIP2Compressed, 0 }, { "BZIP2CompressedTar", extract_BZIP2CompressedTar, 1 }, { "Compressed", extract_Compressed, 0 }, { "CompressedTar", extract_CompressedTar, 1 }, { "GNUCompressed", extract_GNUCompressed, 0 }, { "GNUCompressedTar", extract_GNUCompressedTar, 1 }, { "ShellArchive", extract_ShellArchive, 1 }, { "Tar", extract_Tar, 1 }, { "Uuencoded", extract_Uuencoded, 1 }, { "PCZipped", extract_PCZipped, 1 },/* exploders that generate a stream of SOIF tmpls to use as manual info */ { "Exploder", NULL, 0 }, { "IAFA", NULL, 0 }, { "LSM", NULL, 0 },#ifdef USE_PCINDEX { "PCIndex", NULL, 0 }, { "Cica-PCIndex", NULL, 0 }, { "Garbo-PCIndex", NULL, 0 }, { "Garbo-Win-PCIndex", NULL, 0 }, { "Hobbes-PCIndex", NULL, 0 }, { "Lowell-PCIndex", NULL, 0 }, { "Oakland-PCIndex", NULL, 0 }, { "Umich-PCIndex", NULL, 0 },#endif { "MacBinHex", extract_MacBinHex, 1 },/* currently unsupported */ { "PCCompressed", default_extract, 0 }, { NULL, default_extract, 0 }};/* * presentation_unnest() - Determines if the given object is has * any presentation nesting. If the object is nested, then it * unnests the object until no more nesting is uncovered. Returns * the list of unnested objects when the object is successfully unnested; * otherwise, returns NULL. */DataObjectList *presentation_unnest(object) DataObject *object;{ static DataObjectList *dol = NULL; DataObjectList *walker = NULL, *tdol = NULL; int cur; DIR *dirp; struct dirent *dp; struct stat sb; char buf[MAXPATHLEN + 1], *s;#if 0 /* we don't need a new directory for each nested item, use a * temporary directory for all nested objects. kjl/3oct2000 */ static char unnestsubdir[MAXPATHLEN + 1]; static int count = 0;#endif Debug(65, 1, ("Unnesting: %s\n", object->url->url)); /* Check to see if we simply need to recurse to unnest */ if (object->url->type == URL_FILE && !strcmp(object->type, "Directory")) { return (recurse_unix_directory(object)); } /* Locate the object's type in the unnesting configuration table */ for (cur = 0; nested_types[cur].type != NULL; cur++) { if (!strcmp(nested_types[cur].type, object->type)) { break; } } if (nested_types[cur].type == NULL) { errorlog("pn: %s not a unnestable type\n", object->type); return (NULL); } if (nested_types[cur].extract == NULL) { char cmd[BUFSIZ]; if (object_retrieve(object)) { return (NULL); } sprintf(cmd, "%s.unnest '%s' '%s'", nested_types[cur].type, object->url->url, object->url->filename); return (recurse_external(object, cmd)); } if (object->flags & F_NO_ACCESS) return (NULL); /* * We check for the newly created files, two ways: 1) we generate * the new filenames based on the old filename (eg. foo.Z -> foo), * or we check stat(2) to see which files were created recently. */ if (nested_types[cur].do_timestamp && mk_timestamp()) return (NULL);#if 0 /* we don't need subdirectory. kjl/10oct2000 */ sprintf(unnestsubdir, "%s/%05d", unnestdir, ++count); if (mkdir(unnestsubdir, 0775) < 0) { log_errno2(__FILE__, __LINE__, unnestsubdir); return (NULL); }#endif#if 0 /* We'll need to create new file(s) in the unnestdir, so first cd */ Debug(65, 1, ("presentation_unnest: chdir %s\n", unnestsubdir)); if (chdir(unnestsubdir) < 0) { log_errno2(__FILE__, __LINE__, unnestsubdir); return (NULL); }#endif /* We'll need to create new file(s) in the unnestdir, so first cd */ Debug(65, 1, ("presentation_unnest: chdir %s\n", unnestdir)); if (chdir(unnestdir) < 0) { log_errno2(__FILE__, __LINE__, unnestdir); return (NULL); } /* * Run the Extraction process, then gather the extracted * files and return as DataObjectList */ if ((*nested_types[cur].extract) (object)) { if (chdir(topdir) < 0) { /* go back to previous directory */ log_errno2(__FILE__, __LINE__, topdir); } return (NULL); /* extraction failed */ } /* Go back to previous directory */ Debug(65, 1, ("presentation_unnest: chdir %s\n", topdir)); if (chdir(topdir) < 0) { log_errno2(__FILE__, __LINE__, topdir); } /* We already know the extracted file */ if (nested_types[cur].do_timestamp == 0) { char *s = filename_to_url(extracted_filename); dol = create_dol(s, object->flags | F_TEMPORARY | F_NESTED); xfree(s); if (dol == NULL) return (NULL); if (object->parent_url) dol->object->parent_url = strdup(object->parent_url); else dol->object->parent_url = strdup(object->url->url); return (dol); } /* * We need to check the creation times to discover all of the * files generated. */ dol = walker = NULL;#if 0 if ((dirp = opendir(unnestsubdir)) == NULL) { log_errno2(__FILE__, __LINE__, unnestsubdir); errorlog("presentation_unnest: Cannot open directory: %s\n", unnestsubdir); return (NULL); } while ((dp = readdir(dirp)) != NULL) { if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) continue; sprintf(buf, "%s/%s", unnestsubdir, dp->d_name);#endif if ((dirp = opendir(unnestdir)) == NULL) { log_errno2(__FILE__, __LINE__, unnestdir); errorlog("presentation_unnest: Cannot open directory: %s\n", unnestdir); return (NULL); } while ((dp = readdir(dirp)) != NULL) { if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) continue; sprintf(buf, "%s/%s", unnestdir, dp->d_name); if (lstat(buf, &sb) < 0) { log_errno2(__FILE__, __LINE__, buf); errorlog("presentation_unnest: Cannot stat: %s\n", buf); continue; } if (timestamp > sb.st_ctime) /* old file */ continue; if (!strcmp(object->basename, dp->d_name)) /* same file */ continue; s = filename_to_url(buf); tdol = create_dol(s, object->flags | F_TEMPORARY | F_NESTED);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -