⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 enum.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
		if ((dup2 (pfd[0], 0) < 0) ||	/* read:pipe -> stdin */		    (dup2 (fd, 1) < 0)) {	/* stdout -> tmpfile */			fatal_errno ("dup2");		}		/* Clean up all file descriptors except std{in,out,err} */		close_all_fds (3);		/* Need our own copy */		argv[0] = xstrdup (user_enum_pgm);		argv[1] = xstrdup (enum_arg);		argv[2] = NULL;		set_envs ();	/* pass per-enum ENV vars */		execvp (argv[0], argv);	/* run the script */		sprintf (buf, "execvp: %s", argv[0]);		log_errno (buf);		_exit (1);	}	/* parent */	(void) close (pfd[0]);	cpinfo = (cpi *) xmalloc (sizeof (cpi));	cpinfo->childpid = pid;	cpinfo->result_file = tmpfile;	cpinfo->wfp = fdopen (pfd[1], "w");	return (cpinfo);}static voidfinish_user_enum_pgm_pipes (cpinfo)cpi *cpinfo;{	FILE *fp;	char buf[BUFSIZ];	fclose (cpinfo->wfp);	/* give child EOF */	Debug (40, 9, ("waiting for user enum %d\n", cpinfo->childpid));	(void) waitpid (cpinfo->childpid, NULL, 0);	/* blocking wait */	/* results are ready */	Debug (40, 9, ("Reading User Enum results: %s\n", cpinfo->result_file));	if ((fp = fopen (cpinfo->result_file, "r")) != NULL) {		/* Collect results and put them into the todo list */		while (fgets (buf, sizeof (buf), fp) != NULL) {			Debug (40, 1, ("user_enum: Carryover URL: %s\n", buf));			add_to_todo (buf);		}		fclose (fp);	}	/* clean up */	(void) unlink (cpinfo->result_file);	xfree (cpinfo->result_file);	xfree (cpinfo);}/* *  get_data() - Processes the Enumeration results.  Returns the *  number of LeafNode URLs generated.  Reads results from in, *  sends generated URLs to out. */static intget_data (in, pid, out)FILE *in;int pid;FILE *out;{	char buf[BUFSIZ], *name, *stamp, *t, buf2[BUFSIZ];	int nleafs = 0;	Debug (40, 9, ("get_data: %p %d %p\n", in, pid, out));	/* Grab the Root from the first line */	if (fgets (buf, sizeof (buf), in) == NULL) {		Log ("WARNING: Enumeration did not produce any results: %s.\n",		     tree_root);		return (nleafs);	}	if ((t = strchr (buf, '\n')) != NULL)		*t = '\0';	root = xstrdup (buf);	while (fgets (buf, sizeof (buf), in) != NULL) {		if ((t = strchr (buf, '\n')) != NULL)			*t = '\0';		Debug (40, 9, ("get_data: Read '%s'\n", buf));		/* Parse the URL,Timestamp pair */		if ((t = strchr (buf, '\t')) == NULL)			continue;		*t = '\0';		name = buf;		stamp = ++t;		nleafs++;		if (out != NULL) {			Debug (40, 9, ("get_data: Writing result: %s\n", name));			fprintf (out, "%s\n", name);			(void) fflush (out);	/* ignore SIGPIPE */		}		if (!strncmp (stamp, "Depth=", strlen ("Depth="))) {			Debug (40, 1, ("enum: Carryover URL: %s, %s\n",				       name, stamp));			sprintf (buf2, "%s %d %s %d %s %d %s %s %s\n",				 name, url_max, url_filter,				 host_max, host_filter, delay,				 stamp + strlen ("Depth="),				 access_types, user_enum_pgm);			add_to_todo (buf2);		} else if (proddb != NULL) {			process_url_stamp (name, stamp);			add_to_newdb (name, stamp);		} else {			do_add (name, stamp);		}	}	if (nleafs == 0)		Log ("WARNING: Enumeration did not produce any URLs: %s.\n",		     tree_root);	Debug (40, 9, ("get_data: returning %d\n", nleafs));	return (nleafs);}/* *  do_kid - Runs the command using fd0 as stdin and fd1 as stdout. */static intdo_kid (fd0, fd1, cmd)int fd0, fd1;char *cmd;{	int pid;	char *argv[64], buf[BUFSIZ];	Debug (40, 5, ("Running Enumeration Command: %s\n", cmd));	/* need to use fork() since parse_argv() will cause mem leak */	if ((pid = fork ()) < 0) {		fatal_errno ("fork");	}	if (pid > 0)		return pid;	/* parent leaves now */	/* child */	/* Clean up all file descriptors except std{in,out,err} */	if ((dup2 (fd0, 0) < 0) ||	/* fd0 -> stdin      */	    (dup2 (fd1, 1) < 0)) {	/* fd1 -> stdout     */		fatal_errno ("dup2");	}	close_all_fds (3);	memset (argv, '\0', sizeof (argv));	parse_argv (argv, cmd);	/* parse the command */	set_envs ();		/* pass per-enum ENV vars */	execvp (argv[0], argv);	/* Run the enumeration command */	sprintf (buf, "execvp: %s", argv[0]);	log_errno (buf);	_exit (1);	return (0);}/* *  do_an_enumeration() - Performs an enumeration for the given command, *  and URL. */static intdo_an_enumeration (up, cmd)URL *up;char *cmd;{	cpi *cpinfo = NULL;	FILE *in = NULL;	int pfd[2], pid, r;	/*	 *  If the production database is not available, then we need a	 *  temporary database to store the Timestamps/MD5s.	 */	if (proddb != NULL) {		/* unlink (tmpdbfile); */		crremove (tmpdbfile);		newdb = gdbm_open (tmpdbfile, 0, GDBM_NEWDB, 0644, 0);		if (newdb == NULL) {			errorlog ("gdbm_open: %s: %s\n", tmpdbfile,				  gdbm_strerror (gdbm_errno));			log_errno (tmpdbfile);			fatal ("Internal enum error 4.\n");		}	}	/*	 *  Create a one-way pipe from the enumeration process to ourselves,	 *  then run the enumeration, and process the results.	 */	if (pipe (pfd) < 0) {		fatal_errno ("pipe");	}	pid = do_kid (0, pfd[1], cmd);	/* Run the enumeration */	close (pfd[1]);		/* Close write side */	in = fdopen (pfd[0], "r");	/* Fluff up read side */	/* open pipe to user_enum_pgm unless it is false (CMD_FALSE) */	if (strcmp (user_enum_pgm, CMD_FALSE))		cpinfo = create_user_enum_pgm_pipes (up->url);	/*	 *  process the results from <url>enum and pass the	 *  new URLs to the user_enum_pgm if set	 */	r = get_data (in, pid, cpinfo ? cpinfo->wfp : (FILE *) NULL);	/* cleanup user_enum_pgm if set */	if (cpinfo)		finish_user_enum_pgm_pipes (cpinfo);	Debug (40, 9, ("waiting for enumeration %d\n", pid));	(void) waitpid (pid, NULL, 0);	/* Wait for enumeration to finish */	fclose (in);		/* Close read side */	(void) close (pfd[0]);	/* just in case */	/* Clean up */	if (proddb != NULL) {		if (find_deleted)			enum_to_get_deleted ();		gdbm_close (newdb);	/* don't need this anymore */		/* (void) unlink (tmpdbfile); */		crremove (tmpdbfile);	}	return (r);}/* ---------------------------------------------------------------------- */static voidusage (){	fprintf (stderr, "\Usage:  enum [-db database] [-del deleted-file] [-log logfile]\n\             [-delete] [-tmpdb database] < url_enum_list\n");	exit (1);}intmain (argc, argv)int argc;char *argv[];{	URL *up = NULL;	static char buf[BUFSIZ];	static char cmd[BUFSIZ];	static char depth_str[BUFSIZ];	char *t = NULL;	int i;	int enumix;	nodespec *ns = NULL;	char *enumerator;	signal (SIGPIPE, SIG_IGN);	/* ignore ALL broken pipes */#ifdef USE_HOST_CACHE	host_cache_init ();#endif	init (argc, argv);	/* parse cmd line and opens */	init_url ();		/* must call to initialize the url cache */	Log ("Starting RootNode enumeration.\n");	/* Read the entire workload into the todo list */	while (fgets (buf, sizeof (buf), stdin) != NULL) {		if ((t = strchr (buf, '\n')))			*t = '\0';		Debug (40, 9, ("reading stdin: %s\n", buf));		add_to_todo (buf);	}/* *  Use a linked list to hold the rootnodes to enumerate.  New nodespec's *  are appended at the tail of the list (ns_tail) in get_data().  As the *  list is traversed, the structures are free'd and ns_head always points *  to the current node in the list. */	while ((ns = ns_head) != NULL) {		if (ns->buf == (char *) NULL)			continue;		Debug (40, 1, ("Processing RootNode: %s\n", ns->buf));		if (sscanf (ns->buf, "%s %d %s %d %s %d %s %s %s %s\n",			    url, &url_max, url_filter, &host_max,			    host_filter, &delay, depth_str, access_types,			    user_enum_pgm, search_type) == 10) {			Debug (40, 9,			       ("Enum Received:\n\tURL: %s\n\tURL-Max: %d\n\tURL-Filter: %s\n\tHost-Max: %d\n\tHost-Filter: %s\n\tDelay: %d\n\tDepth: %s\n\tAccess-Types: %s\n\tUser-Enumerate-Program: %s\n\tSearch-Type: %s\n",				url, url_max, url_filter, host_max, host_filter,				delay, depth_str, access_types, user_enum_pgm,				search_type));			cur_depth = max_depth = 0;			if (strchr (depth_str, ':'))				sscanf (depth_str, "%d:%d", &cur_depth,					&max_depth);			else				sscanf (depth_str, "%d", &max_depth);			Debug (40, 5,			       ("Processing: [%d:%d] %s\n", cur_depth,				max_depth, url));			if ((up = url_open (url)) == NULL) {				errorlog ("Invalid URL: %s\n", url);				goto while_done;			}			/* find the enumerator for this type */			for (i = 0, enumix = -1;			     i < sizeof (enums) / sizeof (enums[0]); i++) {				if (up->type == enums[i].url_type) {					enumix = i;					break;				}			}			if (enumix < 0) {				errorlog ("No Enumerator available for: %s\n",					  up->url);				url_close (up);				goto while_done;			}			/* Prepare to run the enumerator */			cur_attr = enums[enumix].stamp_kind;			enumerator = NULL;			if (strcasecmp (search_type, "Depth") == 0)				enumerator = enums[enumix].url_enum_depth;			if (strcasecmp (search_type, "Breadth") == 0)				enumerator = enums[enumix].url_enum_breadth;			if (enumerator == NULL) {				errorlog ("Bad Search-Type supplied\n");				url_close (up);				goto while_done;			}			switch (enums[enumix].arg_types) {			case Arg_Path:				sprintf (cmd, "%s \"%s\"", enumerator,					 up->pathname);				break;			case Arg_URL:				sprintf (cmd, "%s \"%s\"", enumerator, up->url);				break;			case Arg_Host_File:				sprintf (cmd, "%s \"%s\" \"%s\"", enumerator,					 up->host, up->pathname);				break;			default:				fatal ("Internal enum error 1.\n");			}			tree_root = xstrdup (up->url);			(void) do_an_enumeration (up, cmd);			url_close (up);			xfree (tree_root);		} else if (ns->buf[0] == '#') {			/*			 * Pass comments			 */			printf ("%s\n", ns->buf);		}	      while_done:		xfree (ns->buf);		ns_head = ns->next;		xfree (ns);	}	Debug (40, 1, ("enum: exited normally.\n"));	finish_url ();	finish ();	exit (0);}static voidset_envs (){	static char buf[BUFSIZ];	/* Pass the parameters via the environment */	sprintf (buf, "HARVEST_URL_MAX=%d", url_max);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}	sprintf (buf, "HARVEST_URL_FILTER=%s", url_filter);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}	sprintf (buf, "HARVEST_HOST_MAX=%d", host_max);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}	sprintf (buf, "HARVEST_HOST_FILTER=%s", host_filter);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}	sprintf (buf, "HARVEST_URL_DELAY=%d", delay);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}	sprintf (buf, "HARVEST_DEPTH_MAX=%d", max_depth);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}	sprintf (buf, "HARVEST_DEPTH_CUR=%d", cur_depth);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}	sprintf (buf, "HARVEST_ACCESS_TYPES=%s", access_types);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}	sprintf (buf, "HARVEST_ENUMERATE_PROGRAM=%s", user_enum_pgm);	if (putenv (xstrdup (buf))) {		errorlog ("Cannot pass the parameters: %s\n", buf);	}}static voidadd_to_todo (s)char *s;{	nodespec *ns = NULL;	Debug (40, 9, ("add_to_todo: %s\n", s));	ns = (nodespec *) xmalloc (sizeof (nodespec));	ns->buf = xstrdup (s);	ns->next = NULL;	if (ns_tail)		ns_tail->next = ns;	ns_tail = ns;	if (!ns_head)		ns_head = ns;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -