⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ftpenum.pl

📁 harvest是一个下载html网页得机器人
💻 PL
字号:
: # *-*-perl-*-*    eval 'exec perl -S $0 "$@"'    if $running_under_some_shell;##  ftpenum.pl - Enumerate FTP directories##  Usage: ftpenum.pl [--keep-userinfo] hostname directory login password##  Description: FTP hostname, cd to directory, ls -lR, and return#  a complete list of URL <tab> timestamp, where timestamp is the#  UNIX time(3) in decimal.  Ignores all symbolic links.##  If the directory contains an ls-lR.gz, or ls-lR.Z, or ls-lR file,#  then ftpenum.pl uses that file rather than the remote LIST command#  to retrieve the recursive directory listing.##  Jim Guyton & Darren Hardy, hardy@cs.colorado.edu, April 1994##  ftpenum.pl,v 1.23 1996/01/08 09:08:23 duane Exp##########################################################################  Copyright (c) 1994, 1995.  All rights reserved.##    The Harvest software was developed by the Internet Research Task#    Force Research Group on Resource Discovery (IRTF-RD):##          Mic Bowman of Transarc Corporation.#          Peter Danzig of the University of Southern California.#          Darren R. Hardy of the University of Colorado at Boulder.#          Udi Manber of the University of Arizona.#          Michael F. Schwartz of the University of Colorado at Boulder.#          Duane Wessels of the University of Colorado at Boulder.##    This copyright notice applies to software in the Harvest#    ``src/'' directory only.  Users should consult the individual#    copyright notices in the ``components/'' subdirectories for#    copyright information about other software bundled with the#    Harvest source code distribution.##  TERMS OF USE##    The Harvest software may be used and re-distributed without#    charge, provided that the software origin and research team are#    cited in any use of the system.  Most commonly this is#    accomplished by including a link to the Harvest Home Page#    (http://harvest.cs.colorado.edu/) from the query page of any#    Broker you deploy, as well as in the query result pages.  These#    links are generated automatically by the standard Broker#    software distribution.##    The Harvest software is provided ``as is'', without express or#    implied warranty, and with no support nor obligation to assist#    in its use, correction, modification or enhancement.  We assume#    no liability with respect to the infringement of copyrights,#    trade secrets, or any patents, and are not responsible for#    consequential damages.  Proper use of the Harvest software is#    entirely the responsibility of the user.##  DERIVATIVE WORKS##    Users may make derivative works from the Harvest software, subject#    to the following constraints:##      - You must include the above copyright notice and these#        accompanying paragraphs in all forms of derivative works,#        and any documentation and other materials related to such#        distribution and use acknowledge that the software was#        developed at the above institutions.##      - You must notify IRTF-RD regarding your distribution of#        the derivative work.##      - You must clearly notify users that your are distributing#        a modified version and not the original Harvest software.##      - Any derivative product is also subject to these copyright#        and use restrictions.##    Note that the Harvest software is NOT in the public domain.  We#    retain copyright, as specified above.##  HISTORY OF FREE SOFTWARE STATUS##    Originally we required sites to license the software in cases#    where they were going to build commercial products/services#    around Harvest.  In June 1995 we changed this policy.  We now#    allow people to use the core Harvest software (the code found in#    the Harvest ``src/'' directory) for free.  We made this change#    in the interest of encouraging the widest possible deployment of#    the technology.  The Harvest software is really a reference#    implementation of a set of protocols and formats, some of which#    we intend to standardize.  We encourage commercial#    re-implementations of code complying to this set of standards.##$| = 1;		# everything is written to stdout immediately$ENV{'HARVEST_HOME'} = "/usr/local/harvest" if (!defined($ENV{'HARVEST_HOME'}));unshift(@INC, "$ENV{'HARVEST_HOME'}/lib");	# use local installationrequire 'ftp.pl';require 'lsparse.pl';@F = split(/\//, $0); $progname = pop @F;$debug = 0;&debug_init;$tmpfile = "/tmp/ftpenum.$$";$tmpfile = $ENV{'TMPDIR'} . "/ftpenum.$$" if (defined($ENV{'TMPDIR'}));#  Ignore all ftp.pl error messages.if ($debug > 5) {	$ftp'showfd = STDERR;} else {	open(DEVNULL, "> /dev/null") ||		die "ftpenum.pl: Cannot write to /dev/null: $!\n";	$ftp'showfd = DEVNULL;}# Option Flags#$keep_userinfo	= 0;# Option Processing#while ($ARGV[0] =~ /^--(.*)/o) {	$option = $1;	if ($option eq 'keep-userinfo') {		$keep_userinfo = 1;		shift (@ARGV);	}}&usage() if ($#ARGV != 3);$host = shift(@ARGV);$dir  = shift(@ARGV);$login = shift(@ARGV);$password = shift(@ARGV);# Special case: don't keep userinfo if username is 'anonymous'#$keep_userinfo = 0	if ($login =~ /^anonymous$/io);##  Enumeration parameters#$tree_root = "ftp://$host$dir";$start_dir_depth = &get_depth($dir);$max_depth = 0;if (defined($ENV{'HARVEST_DEPTH_MAX'}) && $ENV{'HARVEST_DEPTH_MAX'} > 0) {	$max_depth = $ENV{'HARVEST_DEPTH_MAX'};}$start_depth = 0;if (defined($ENV{'HARVEST_DEPTH_CUR'}) && $ENV{'HARVEST_DEPTH_CUR'} > 0) {	$start_depth = $ENV{'HARVEST_DEPTH_CUR'};}$nurls = 0;$nurls = 0;$url_max = 0;$url_max = $ENV{'HARVEST_URL_MAX'} if (defined($ENV{'HARVEST_URL_MAX'}));$url_max = 250 if ($url_max < 1);$url_ffile = $ENV{'HARVEST_URL_FILTER'};$ftp_port = 21;$retry_call = 1;$attempts   = 5;die "$progname: Cannot connect to $host\n"	unless (&ftp'open($host, $ftp_port, $retry_call, $attempts));print STDERR "$progname: connected to $host, port $ftp_port\n"	if ($debug);die "$progname: Cannot login to $host\n"	unless (&ftp'login($login, $password));print STDERR "$progname: logged in\n"	if ($debug >= 5);#die "$progname: cwd to $host:$dir failed.\n"exit (1)	unless (&ftp'cwd($dir));$cwd = &ftp'pwd();##  First line is RootNode URL of the enumeration space#$url = &path_to_url($host, $cwd);print STDOUT "$url\n";$lsparse'fstype = "unix";$lsparse'name   = "$progname";##  Now, get a recursive directory listing.  First try to retrieve a#  ls-lR file to save the server from computing the ls-lR on-the-fly.#  We can support GNU zipped, ucb compressed, and uncompress ls-lR files.#  If no file is available, then perform the LIST -lR command.#$did_shortcut = 0;&ftp'type("I");if (&ftp'get("ls-lR.gz", $tmpfile, 0)) {	print STDERR "$progname: Got gziped ls-lR\n" if ($debug);	open(ftp'NS, "gzip -dc $tmpfile |") ||		die "$progname: gzip -dc $tmpfile: $!\n";	$did_shortcut = 1;} elsif (&ftp'get("ls-lR.Z", $tmpfile, 0)) {	print STDERR "$progname: Got ucb compressed ls-lR\n" if ($debug);	open(ftp'NS, "uncompress -c < $tmpfile |") ||		die "$progname: uncompress -c < $tmpfile: $!\n";	$did_shortcut = 1;} elsif (&ftp'get("ls-lR", $tmpfile, 0)) {	print STDERR "$progname: Got standard ls-lR\n" if ($debug);	open(ftp'NS, "$tmpfile") ||		die "$progname: Cannot read $tmpfile: $!\n";	$did_shortcut = 1;} elsif (&ftp'dir_open("-lR")) {	$did_shortcut = 0;} else {	die "$progname: Cannot get remote directory listing: $ftp'response\n";}$rls = "ftp'NS";                # the port from ftp packageif(! &lsparse'reset($cwd)) {          # don't use $dir here	die "$progname: lsparse reset failed";}while (!eof($rls)) {	( $path, $size, $time, $type, $mode ) = &lsparse'line($rls);        $path =~ s/\/\.\//\//g;                 # remove /./ components	last if ($path eq '');	next if (&filter_match($path));	$depth = &get_depth($path) + $start_depth - $start_dir_depth;	printf STDERR "$progname: Processing [%2d] %s\n", $depth, $path		if ($debug);	print STDERR "--> SIZE=$size TIME=$time TYPE=$type MODE=$mode\n"		if ($debug >= 5);	if ($type eq "f") {		$url = &path_to_url($host, $path);		print STDOUT "$url\t$time\n";	# OK, pass along		if ($nurls++ >= $url_max) {			print STDERR "$progname: Truncating RootNode $tree_root at $url_max LeafNode URLs\n";			&sigdie();		}	}}&sigdie();	# END OF PROGRAMsub sigdie {	if ($did_shortcut) {		close($rls);		unlink($tmpfile);	} else {		&ftp'dir_close();	}	&ftp'quit();	exit(0);}## very simple pathname to ftp-style URL#sub path_to_url {	local($host, $path) = @_;	$host = "$login:$password$host"		# add user:pw to URL		if ($keep_userinfo);	$path = &cleanup_path($path);	$path =~ s/\/\.\//\//g;			# remove /./ components	$path = '/' . $path			# add leading slash		unless ($path =~ /^\//);	$ret = "ftp://$host$path";	return $ret;}## if path contains any weird characters, convert 'em to hex# as per the draft URL document#sub cleanup_path {	local($path) = @_;	#	#  RFC 1738 defines that these characters should be escaped	#	$rfc1738_escape = '<>"#%{}|\\^~[]`\' ';	$ret = "";	for ($i = 0 ; $i < length($path) ; $i++) {		$c = substr($path, $i, 1);		$do_escape = 0;		for ($j = 0; $j < length($rfc1738_escape); $j++) {			$ec = substr($rfc1738_escape, $j, 1);			$do_escape = 1, last if ($c eq $ec);	# esc char		}		# we %ab encode funny characters		if ($do_escape) {			$ret = $ret . sprintf("%%%02x", ord($c));		} else {			$ret = $ret . $c;		}	}	return $ret;}sub usage {	print STDERR "Usage: ftpenum.pl [--keep-userinfo] hostname directory login password\n";	exit(1);}sub filter_match {	local($path) = @_;	local($depth) = &get_depth($path) + $start_depth - $start_dir_depth;	if ($max_depth > 0 && $depth > $max_depth) {		print STDERR "$progname: Maximum depth of $max_depth reached: $path\n" if ($debug);		return 1;	}	return &compute_filter($path) if (defined($url_ffile));	return 0;}sub read_filter {	open(FILTER, "< $url_ffile") ||		die "$progname: Cannot read $url_ffile: $!\n";	while (<FILTER>) {		next if (/^\n/o || /^#/o);		chop;		push (@URLFilter, $_);	}	close(FILTER);	return 0;}sub compute_filter {	local($data) = @_;	open(FILTER, "< $url_ffile") ||		die "$progname: Cannot read $url_ffile: $!\n";	&read_filter unless (@URLFilter);	foreach $line (@URLFilter) {		next unless ($line =~ /^\s*(\S+)\s+(\S+)\s*$/);		$allow_deny = $1;		$re = $2;		$rvalue = 0 if ($allow_deny =~ /allow/io);		$rvalue = 1 if ($allow_deny =~ /deny/io);		return $rvalue			if ($data =~ /$re/);	}	return 0;}#sub get_depth {#	local($path) = @_;#	$tmp = $path;#	$tmp =~ s/[^\/]+//g;#	$r = length($tmp);#	undef $tmp;#	return $r;#}sub get_depth {	local($path) = @_;	$path =~ s/^\///;	$path =~ s/\/$//;	@F = split (/\/+/, $path);	$#F + 1;}sub debug_init {	local($dbg) = $ENV{'HARVEST_DEBUG'};	return unless ($dbg ne '');	local (@F) = split(/\s+/, $dbg);	local($d);	foreach $d (@F) {		$debug = $1 if ($d =~ /-D45,(\d)/);	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -