📄 main.c
字号:
sprintf(buf, "%s/%s", t, USE_STOPLIST); if ((fd = open(buf, O_RDONLY)) >= 0) { stoplist = strdup(buf); close(fd); break; } } free(s); } if (stoplist == NULL) { errorlog("Unable to locate %s in %s.\n", USE_STOPLIST, libpath); exit(1); } if (tmpdir == NULL) { tmpdir = getenv("TMPDIR") ? strdup(getenv("TMPDIR")) : strdup(USE_TMPDIR); } if (access(stoplist, R_OK) < 0) { log_errno(stoplist); exit(1); } if ((allowlist != NULL) && (access(allowlist, R_OK) < 0)) { log_errno(allowlist); exit(1); } if (input_file != NULL && strcmp(input_file, "-") != 0 && access(input_file, R_OK) < 0) { log_errno(input_file); usage(); } if (access(tmpdir, W_OK) < 0) { log_errno(tmpdir); exit(1); } do_startup(); /* NOTE: DO NOT catch SIGCHLD; we always do explict waits in Essence */ signal(SIGABRT, do_shutdown); /* die gracefully */ signal(SIGTERM, do_shutdown); signal(SIGINT, do_shutdown); /* Process */ if (input_file != NULL) { FILE *fp; char buf[BUFSIZ], tbuf[BUFSIZ], *s; int t; if (!strcmp(input_file, "-")) fp = stdin; else { if ((fp = fopen(input_file, "r")) == NULL) { log_errno(input_file); usage(); } } /* * The input looks like: * URL<tab>MD5:adfasdfasdfasdfasdfasd * URL<tab>Last-Modification-Time:12345 */ while (fgets(buf, BUFSIZ, fp) != NULL) { strcpy(tbuf, buf); /* make a copy */ Debug(62, 1, ("Input Line: %s", tbuf)); if (buf[0] == '#') { if (strncmp(buf, "#env:", 5) == 0) { char* ptr, *dup; int len; ptr = buf+5; while (isspace(*ptr)) { ++ptr; } len = strlen(ptr); while (len && isspace(ptr[len-1])) { ptr[--len] = '\0'; } if ((dup = xmalloc(len+1))) { strcpy(dup, ptr); putenv(dup); } } continue; /* skip rest */ } if ((s = strrchr(buf, '\n')) == NULL) { errorlog("Illegal input: %s\n", tbuf); continue; } *s = '\0'; /* strip newline */ if ((s = strchr(buf, '\t')) != NULL) { *s++ = '\0'; /* delineate at the tab */ } /* * For MD5's: check database and skip if unchanged * For LMT's: check database and skip if unchanged * For no meta data, just pass it through */ if (s && !strncasecmp(s, T_MD5, strlen(T_MD5))) { if (dbcheck_md5(buf, s + strlen(T_MD5) + 1)) { continue; } } else if (s && !strncasecmp(s, T_LMT, strlen(T_LMT))) { t = atoi(s + strlen(T_LMT) + 1); if (dbcheck_timestamp(buf, t)) { continue; } } obj = create_data_object(buf, object_flags); if (obj == NULL) { errorlog("Cannot create object for %s\n", tbuf); continue; } /* Type Recognition */ if (obj->type == NULL && type_recognize(obj)) { errorlog("Cannot recognize type for %s\n", obj->url->url); continue; } if (is_nested_type(obj->type)) { nested = 1; init_presentation_unnest(); } else { nested = 0; } process_object(obj); if (nested) { finish_presentation_unnest(); } free_data_object(obj); } fclose(fp); } else { for (; argc > 0; argc--, argv++) { obj = create_data_object(*argv, object_flags); if (obj == NULL) { errorlog("Cannot create object for %s\n", *argv); continue; } /* Type Recognition */ if (obj->type == NULL && type_recognize(obj)) { errorlog("Cannot recognize type for %s\n", obj->url->url); continue; } if (is_nested_type(obj->type)) { nested = 1; init_presentation_unnest(); } else { nested = 0; } process_object(obj); if (nested) { finish_presentation_unnest(); } free_data_object(obj); } } /* Clean up */ do_shutdown(0); exit(0);}static void do_startup(){ char *libpathbuf, *s, *t; int fd; char buf[BUFSIZ]; libpathbuf = xmalloc(strlen(libpath) + 64); memset(libpathbuf, '\0', strlen(libpath) + 64); sprintf(libpathbuf, "SUMMARIZER_LIBPATH=%s", libpath); if (putenv(libpathbuf) < 0) { log_errno("putenv"); } sprintf(byname, "%s/%s", default_libpath, USE_BYNAME); sprintf(byurl, "%s/%s", default_libpath, USE_BYURL); sprintf(bycontent, "%s/%s", default_libpath, USE_BYCONTENT); sprintf(magic, "%s/%s", default_libpath, USE_MAGIC); s = strdup(libpath); for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) { sprintf(buf, "%s/%s", t, USE_BYNAME); if ((fd = open(buf, O_RDONLY)) >= 0) { strcpy(byname, buf); close(fd); break; } } free(s); s = strdup(libpath); for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) { sprintf(buf, "%s/%s", t, USE_BYURL); if ((fd = open(buf, O_RDONLY)) >= 0) { strcpy(byurl, buf); close(fd); break; } } free(s); s = strdup(libpath); for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) { sprintf(buf, "%s/%s", t, USE_BYCONTENT); if ((fd = open(buf, O_RDONLY)) >= 0) { strcpy(bycontent, buf); close(fd); break; } } free(s); s = strdup(libpath); for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) { sprintf(buf, "%s/%s", t, USE_MAGIC); if ((fd = open(buf, O_RDONLY)) >= 0) { strcpy(magic, buf); close(fd); break; } } free(s); init_url(); init_gatherer_id(); if (init_type_recognize(byname, bycontent, byurl, magic)) { errorlog("init_type_recognize(%s, %s, %s, %s) failed.\n", byname, bycontent, byurl, magic); exit(1); } init_stoplist(); if (!do_typeonly) { init_summarize(); init_db(dbdir, max_deletions); }}static void print_memory_stats(){#if defined(DEBUG) && defined(_HARVEST_OSF_) struct mallinfo mi = mallinfo(); Log("malloc statistics:\n"); Log(" total space in arena: %d\n", mi.arena); Log(" number of ordinary blocks: %d\n", mi.ordblks); Log(" number of small blocks: %d\n", mi.smblks); Log(" number of holding blocks: %d\n", mi.hblks); Log(" space in holding blocks: %d\n", mi.hblkhd); Log(" space in small blocks in use: %d\n", mi.usmblks); Log(" space in free blocks: %d\n", mi.fsmblks); Log(" space in ordinary blocks in use: %d\n", mi.uordblks); Log(" space in free blocks: %d\n", mi.fordblks); Log(" cost of enabling keep option: %d\n", mi.keepcost);#endif return;}static void do_shutdown(x) int x;{ finish_url(); finish_type_recognize(); finish_stoplist(); if (!do_typeonly) { finish_summarize(); finish_db(); } if (x != 0) Log("Terminated abnormally (%d)...\n", x); else Log("Terminated normally.\n"); print_memory_stats(); exit(x);}static void init_gatherer_id(){ gatherer_id = xmalloc(sizeof(struct GID)); gatherer_id->name = strdup(gname ? gname : "Essence"); gatherer_id->version = strdup(gver ? gver : HARVEST_VERSION); if (ghost) { gatherer_id->host = strdup(ghost); } else { ghost = strdup(getfullhostname()); gatherer_id->host = strdup(ghost); } Log("Running Gatherer...\n"); Log("Gatherer-Name:\t%s\n", gatherer_id->name); Log("Gatherer-Host:\t%s\n", gatherer_id->host); Log("Gatherer-Version:\t%s\n", gatherer_id->version);}/* * process_object() - Main guts of Essence. First, types the object, * performs candidate selection, then either unnest it or summarizes it. */static void process_object(object) DataObject *object;{#ifdef DEBUG print_memory_stats();#endif Debug(62, 1, ("process_object(%s)\n", object->url->url)); /* Candidate Selection by Name */ if (allowlist == NULL && stop_byname(object)) { Log("Removing %s from candidate list -- name.\n", object->url->url); return; } /* Type Recognition */ if (object->type == NULL && type_recognize(object)) { errorlog("Cannot recognize type for %s\n", object->url->url); return; } /* Print the type and return if type's only; print directly to stdout */ if (do_typeonly) { printf("Type: %s %s\n", object->type, object->url->url); return; } /* print "URL <TAB> Type" */ /* print (L) if local mapping worked */ Log("%s\t%s%s\n", object->url->url, object->type, object->url->flags & URL_FLAG_LOCAL_MAPPED ? " [L]" : ""); /* Candidate Selection by Type and by Duplicate */ if (allowlist != NULL && !allow_bytype(object)) { Log("Removing %s (%s) from candidate list -- type.\n", object->url->url, object->type); return; } if (allowlist == NULL && stop_bytype(object)) { Log("Removing %s (%s) from candidate list -- type.\n", object->url->url, object->type); return; } if (allowlist == NULL && stop_byduplicate(object)) { if (do_dupremove) { db_delete_byurl(object->url->url); } else { Log("Removing %s (%s) from candidate list -- duplicate.\n", object->url->url, object->type); return; } } /* Summarize or Presentation Unnest */ if (object->flags & F_MANUAL) { summarize(object); } else if (is_nested_type(object->type)) { nested_feeder(object); } else if (!do_typeonly) { summarize(object); }}/* * nested_feeder() - Takes a nested object an unnests it. * XXX: Should re-write so that the unnester is an iterator. */static void nested_feeder(object) DataObject *object;{ DataObjectList *ol, *walker, *tol; int nc = 0, nmakefile = 0; /* Summarize it first */ summarize_nested_object(object); /* Unnest the object */ if ((ol = presentation_unnest(object)) == NULL) { errorlog("Cannot unnest %s\n", object->url->url); return; } /* Type the extracted data first */ for (walker = ol; walker; walker = walker->next) { if (walker->object == NULL) { errorlog("Fatal Internal: NULL object from unnest.\n"); exit(1); } Debug(62, 1, ("Extracted: %s %p\n", walker->object->url->url, walker->object->type)); if (walker->object->type == NULL) (void) type_recognize(walker->object); } /* Recognize bundles */ for (walker = ol; walker; walker = walker->next) { if (walker->object->type == NULL) continue; if (!strcmp(walker->object->type, "C")) nc++; else if (!strcmp(walker->object->type, "CHeader")) nc++; else if (!strcmp(walker->object->type, "Makefile")) nmakefile++; } if (nc > 1 && nmakefile > 0 && !strcmp(object->type, "Directory")) { xfree(object->type); object->type = strdup("SourceDistribution"); process_object(object); free_dol(ol); return; } /* Process the extracted files */ walker = ol; while (walker != NULL) { process_object(walker->object); tol = walker; walker = walker->next; free_data_object(tol->object); xfree(tol); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -