⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unnest.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
		xfree(s);		if (tdol == NULL)			continue;		if (object->parent_url)			tdol->object->parent_url = strdup(object->parent_url);		else			tdol->object->parent_url = strdup(object->url->url);		if (walker == NULL) {			dol = tdol;			walker = dol;		} else {			walker->next = tdol;			walker = walker->next;		}	}	closedir(dirp);	return (dol);}/* *  is_nested_type() - Determines if the given type is has any presentation *  nesting.  Returns a non-zero if true; zero otherwise. */int is_nested_type(t)     char *t;{	int i;	if (t == NULL)		return (0);	if (!strcmp("Directory", t))		return (1);	for (i = 0; nested_types[i].type; i++)		if (!strcmp(t, nested_types[i].type))			return (1);	return (0);}void init_presentation_unnest(){	char *s = tempnam(tmpdir, "unnest");	if (s == NULL) {		fatal("Cannot create tmp directory for unnest\n");	}	strcpy(unnestdir, s);	xfree(s);	if (mkdir(unnestdir, 0755) < 0) {		log_errno2(__FILE__, __LINE__, unnestdir);		fatal("init_presentation_unnest: Cannot create %s\n",		    unnestdir);	}	memset(extracted_filename, '\0', MAXPATHLEN + 1);}void finish_presentation_unnest(){	char buf[BUFSIZ];	sprintf(buf, "/bin/rm -rf '%s'", unnestdir);	Debug(65, 1, ("finish_presentation_unnest: %s\n", buf));	run_cmd(buf);#if 0	/* temporary files are deleted with remove_local_file(), kjl3oct2000 */	if (extracted_filename[0])		if (unlink(extracted_filename) < 0)			log_errno2(__FILE__, __LINE__, extracted_filename);#endif}static int mk_timestamp(){	timestamp = time(NULL);	(void) sleep(1);	return (0);}static DataObjectList *recurse_unix_directory(object)     DataObject *object;{	static DataObjectList *dol = NULL;	DataObjectList *walker, *tdol;	struct dirent *dp;	DIR *dirp;	char buf[MAXPATHLEN + 1];	if ((dirp = opendir(object->url->filename)) == NULL)		return (NULL);	dol = walker = NULL;	while ((dp = readdir(dirp)) != NULL) {		if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, ".."))			continue;		sprintf(buf, "%s/%s", object->url->url, dp->d_name);		if ((tdol = create_dol(buf, object->flags)) == NULL)			continue;		if (object->parent_url)			tdol->object->parent_url = strdup(object->parent_url);		if (walker == NULL) {			dol = tdol;			walker = dol;		} else {			walker->next = tdol;			walker = walker->next;		}	}	closedir(dirp);	return (dol);}/* *  extraction routines * *  each function extracts the given object into the unnestdir *  and returns 0 on success, non-zero on error. */static int default_extract(object)     DataObject *object;{	errorlog("Extraction for %s is UNIMPLEMENTED.\n", object->type);	extracted_filename[0] = '\0';	return (1);}static int extract_BZIP2Compressed(object)     DataObject *object;{	char cmd[BUFSIZ], *s;	sprintf(extracted_filename, "%s/%s", unnestdir, object->basename);	if ((s = strrchr(extracted_filename, '.')) != NULL)		*s = '\0';	sprintf(cmd, "%s -dc \"%s\" > \"%s\"",	    CMD_BZIP2, object->url->filename, extracted_filename);	Debug(65, 1, ("extract_BZIP2Compressed: %s\n", cmd));	return (do_system(cmd));}static int extract_BZIP2CompressedTar(object)     DataObject *object;{	char cmd[BUFSIZ];	sprintf(cmd, "%s -dc \"%s\" | %s -xf - ", CMD_BZIP2,	    object->url->filename, CMD_TAR);	Debug(65, 1, ("extract_BZIP2CompressedTar: %s\n", cmd));	return (do_system(cmd));}static int extract_Compressed(object)     DataObject *object;{	char cmd[BUFSIZ], *s;	sprintf(extracted_filename, "%s/%s", unnestdir, object->basename);	if ((s = strrchr(extracted_filename, '.')) != NULL)		*s = '\0';	sprintf(cmd, "%s < \"%s\" > \"%s\"",	    CMD_UNCOMPRESS, object->url->filename, extracted_filename);	Debug(65, 1, ("extract_Compressed: %s\n", cmd));	return (do_system(cmd));}static int extract_CompressedTar(object)     DataObject *object;{	char cmd[BUFSIZ];	sprintf(cmd, "%s < \"%s\" | %s -xf - ", CMD_UNCOMPRESS,	    object->url->filename, CMD_TAR);	Debug(65, 1, ("extract_CompressedTar: %s\n", cmd));	return (do_system(cmd));}static int extract_GNUCompressed(object)     DataObject *object;{	char cmd[BUFSIZ], *s;	sprintf(extracted_filename, "%s/%s", unnestdir, object->basename);	if ((s = strrchr(extracted_filename, '.')) != NULL)		*s = '\0';	sprintf(cmd, "%s -dc \"%s\" > \"%s\"", CMD_GZIP,	    object->url->filename, extracted_filename);	Debug(65, 1, ("extract_GNUCompressed: %s\n", cmd));	return (do_system(cmd));}static int extract_GNUCompressedTar(object)     DataObject *object;{	char cmd[BUFSIZ];	sprintf(cmd, "%s -dc \"%s\" | %s -xf - ", CMD_GZIP,	    object->url->filename, CMD_TAR);	Debug(65, 1, ("extract_GNUCompressedTar: %s\n", cmd));	return (do_system(cmd));}static int extract_Tar(object)     DataObject *object;{	char cmd[BUFSIZ];	sprintf(cmd, "%s -xf - < \"%s\" ", CMD_TAR, object->url->filename);	Debug(65, 1, ("extract_Tar: %s\n", cmd));	return (do_system(cmd));}static int extract_Uuencoded(object)     DataObject *object;{	char cmd[BUFSIZ];	sprintf(cmd, "%s \"%s\" ", CMD_UUDECODE, object->url->filename);	Debug(65, 1, ("extract_Uuencoded: %s\n", cmd));	return (do_system(cmd));}static int extract_ShellArchive(object)     DataObject *object;{	char cmd[BUFSIZ];	sprintf(cmd, "%s < \"%s\" > /dev/null",		CMD_UNSHAR, object->url->filename);	Debug(65, 1, ("extract_ShellArchive: %s\n", cmd));	return (do_system(cmd));}static int extract_MacBinHex(object)     DataObject *object;{	char cmd[BUFSIZ];	sprintf(cmd, "%s < \"%s\" ", CMD_HEXBIN, object->url->filename);	Debug(65, 1, ("extract_MacBinHex: %s\n", cmd));	return (do_system(cmd));}/* *  extract-PCZippped - extracts a ZIP archive using UnZip version 5.40 *  written by the Info-ZIP workgroup (David Kirschbaum, consolidator). *  Software was posted to comp.sources.unix. */static int extract_PCZipped(object)     DataObject *object;{	char cmd[BUFSIZ];	sprintf(cmd, "%s -qq -n -a \"%s\"", CMD_UNZIP, object->url->filename);	Debug(65, 1, ("extract_PCZipped: %s\n", cmd));	return (do_system(cmd));}static char *filename_to_url(fname)     char *fname;{	static char *p;	p = xmalloc(BUFSIZ);	sprintf(p, "file://%s%s", getfullhostname(), fname);	return (p);}/* *  unnest_dbcheck() - Returns non-zero if the template has not changed. *  If the template contains a Last-Modification-Time, or an MD5, *  then grab the template from the PRODUCTION.gdbm database (if it *  exists) and compare to see if the template has changed.  Otherwise, *  generate an MD5 based on the printed SOIF version of the template, *  and use that as a basis for the db check. */static int unnest_dbcheck(t)     Template *t;{	AVPair *avp;	int ts;	if ((avp = extract_AVPair(t->list, T_MD5)) != NULL) {		return (dbcheck_md5(t->url, avp->value));	}	if ((avp = extract_AVPair(t->list, T_LMT)) != NULL) {		ts = atoi(avp->value);		return (dbcheck_timestamp(t->url, ts));	}	return (0);}/* *  recurse_external() - Runs an external process that generates a stream *  of SOIF objects.  For each SOIF object, it converts it to an *  appropriate DataObject and adds it to an object list whose objects *  each have an AVList. */static DataObjectList *recurse_external(object, cmd)     DataObject *object;     char *cmd;{	static DataObjectList *dol = NULL;	DataObjectList *walker = NULL, *tdol = NULL;	Template *t = NULL;	FILE *fp = NULL;	int exit_status;	Debug(65, 1, ("recurse_external: %s: %s\n", object->url->url, cmd));	if ((fp = popen(cmd, "r")) == NULL) {		log_errno2(__FILE__, __LINE__, cmd);		return (NULL);	}	dol = walker = NULL;	while (1) {		/*		 *  Since the dbcheck routines need the SOIF library,		 *  we need to explicitly re-init the SOIF library		 *  on each call.		 */		init_parse_template_file(fp);		if ((t = parse_template()) == NULL) {			if (is_parse_end_of_input())				break;	/* EOF */			else				continue;	/* Error, try again */		}		finish_parse_template();		/*		 *  If the user wants fake-md5s then compute an		 *  MD5 based on the input SOIF template.		 */		if (do_fakemd5s && extract_AVPair(t->list, T_MD5) == NULL) {			Buffer *b;			char *newmd5;			extern char *get_md5_string();			b = init_print_template(NULL);			print_template(t);			newmd5 = get_md5_string(b->data, b->length);			finish_print_template();			FAST_add_AVList(t->list, T_MD5, newmd5, 32);			xfree(newmd5);		}		if (unnest_dbcheck(t)) {			free_template(t);			continue;		}		tdol = create_dol(t->url, F_NO_ACCESS | F_MANUAL);		if (tdol == NULL) {			free_template(t);			continue;		}		tdol->object->parent_url = NULL;		tdol->object->type = NULL;		tdol->object->ttype = strdup(t->template_type);		tdol->object->avl = t->list;		if (walker == NULL) {			walker = dol = tdol;		} else {			walker->next = tdol;			walker = walker->next;		}		/* free template manually, since we want to keep the list */		xfree(t->template_type);		xfree(t->url);		xfree(t);	}	exit_status = pclose(fp);	/* some OS's violate POSIX by returning -1 with ECHILD even	 * though wait() etc. haven't been called.  Ignore it. */	if (exit_status != 0 && errno != ECHILD)		errorlog("recurse_external: exit status %d for \"%s\"\n",			exit_status, cmd);	return (dol);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -