⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lsparse.pl

📁 harvest是一个下载html网页得机器人
💻 PL
📖 第 1 页 / 共 2 页
字号:
		}		else {			printf( "Unmatched line: %s", $_ );			return( '', 0, 0, 0, 0 );		}	}	return( '', 0, 0, 0, 0 );}# Convert NetWare file access mode chars at the start of a DIR entry # into a Unix access number.sub netware_to_mode{	local( $chars ) = @_;	local( @kind, $c );	# Split and remove first three characters	@kind = split( //, $kind );	shift( @kind );		# omit directory "d" field	shift( @kind );		# omit space separator	shift( @kind );		# omit left square bracket	$mode = 0;		# init $mode to no access	foreach $c ( @kind ){		if( $c eq 'R' )	{$mode |= 0x644;}	## r/w r r		if( $c eq 'W' ) {$mode |= 0x222;}	## w   w w		if( $c eq 'F' ) {$mode |= 0x444;}	## r   r r		}	return $mode;}# --------------------- parse VMS dir output# for each file or directory line found return a tuple of# (pathname, size, time, type, mode)# pathname is a full pathname relative to the directory set by reset()# size is the size in bytes (this is always 0 for directories)# time is a Un*x time value for the file# type is "f" for a file, "d" for a directory and#         "l linkname" for a symlinksub lsparse'line_vms{	local( $fh ) = @_;	local( $non_crud, $perm_denied );	if( eof( $fh ) ){		return( "", 0, 0, 0 );	}	while( <$fh> ){		# Stomp on carriage returns		s/\015//g;		# I'm about to look at this at lot		study;		if( /^\s*$/ ){			next;		}		if( /^\s*Total of/i ){			# Just a size report ignore			next;		}		if( /\%RMS-E-PRV|insufficient privilege/i ){			# A permissions error - skip the line			next;		}		# Upper case is so ugly		if( ! $lsparse'vms_keep_case ){			tr/A-Z/a-z/;		}		# DISK$ANON:[ANONYMOUS.UNIX]		if( /^([^:]+):\[([^\]+]+)\]\s*$/ ){			# The directory name			# Use the Unix convention of /'s in filenames not			# .'s			$currdir = '/' . $2;			$currdir =~ s,\.,/,g;			$currdir =~ s,/+,/,g;			$currdir =~ s,^/$vms_strip,,;			if( $currdir eq '' ){				next;			}			$match = $currdir;			$match =~ s/([\+\(\)\[\]\*\?])/\\$1/g;#print ">>>match=$match currdir=$currdir\n";			return( substr( $currdir, 1 ), 0, 0, 'd', 0 );		}			# MultiNet FTP	# DSPD.MAN;1  9   1-APR-1991 12:55 [SG,ROSENBLUM] (RWED,RWED,RE,RE)	# CMU/VMS-IP FTP	# [VMSSERV.FILES]ALARM.DIR;1      1/3          5-MAR-1993 18:09		local( $dir, $file, $vers, $size, $lsdate, $got );		$got = 0;		# For now ignore user and mode		if( /^((\S+);(\d+))?\s+(\d+)\s+(\d+-\S+-\d+\s+\d+:\d+)/ ){			($file, $vers, $size, $lsdate) = ($2,$3,$4,$5);			$got = 1;		}		elsif( /^(\[([^\]]+)\](\S+);(\d+))?\s+\d+\/\d+\s+(\d+-\S+-\d+\s+\d+:\d+)\s*$/ ){			($dir,$file,$vers,$lsdate) = ($2,$3,$4,$5);			$got = 1;		}		# The sizes mean nothing under unix...		$size = 0;				if( $got ){			local( $time ) = &main'lstime_to_time( $lsdate );			local( $type ) = 'f';			local( $mode ) = 0444;			# Handle wrapped long filenames			if( $filename ne '' ){				$file = $filename;				$vers = $version;				if( $directory ){					$dir = $directory;				}			}			if( defined( $dir ) ){				$dir =~ s/\./\//g;				$file = $dir . '/' . $file;			}			$filename = '';			if( $file =~ /^(.*)\.dir(;\d+)?$/ ){				if( ! $vms_keep_dotdir ){					$file = $1 . $2;				}				$type = 'd';				$mode = 0555;			}			$lsparse'vers = $vers;#print "file=|$file| match=|$match| vms_strip=|$vms_strip|\n";			$file =~ s,^,/,;			$file =~ s,^/$match,,;			if( ! defined( $dir ) ){				$file = "$currdir/$file";			}			$file =~ s,^$vms_strip,,;			$file =~ s,/+,/,g;#print  "file=|$file|\n";			return( substr( $file, 1 ), $size, $time, $type, $mode );		}		elsif( /^\[([^\]]+)\](\S+);(\d+)\s*$/ ){			# If a filename is long then it is on a line by itself			# with the details on the next line			local( $d, $f, $v ) = ($1, $2, $3);			$d =~ s/\./\//g;			$directory = $d;			$filename = $f;			$version = $v;		}		elsif( /^(\S+);(\d+)\s*$/ ){			# If a filename is long then it is on a line by itself			# with the details on the next line			$filename = $1;			$version = $2;		}		else {			printf( "Unmatched line: %s", $_ );		}	}	return( '', 0, 0, 0, 0 );}# --------------------- parse output from dos ftp server# for each file or directory line found return a tuple of# (pathname, size, time, type, mode)# pathname is a full pathname relative to the directory set by reset()# size is the size in bytes (this is always 0 for directories)# time is a Un*x time value for the file# type is "f" for a file, "d" for a directory and#         "l linkname" for a symlinksub lsparse'line_dosftp{	local( $fh ) = @_;	while( 1 ){		if( $pending ){			$_ = $pending;			$pending = '';		}		else {			if( eof( $fh ) ){				return( "", 0, 0, 0 );			}			$_ = <$fh>;			# Ignore the summary at the end and blank lines			if( /^\d+ files?\./ || /^\s+$/ ){				next;			}		}		# Stomp on carriage returns		s/\015//g;		# I'm about to look at this at lot		study;		if( m|(\S+)\s+(\S+)?\s+(\d+):(\d+)\s+(\d+)/(\d+)/(\d+)\s*(.*)| ){			local( $file, $commasize, $hrs, $min, $mon, $day, $yr ) =				($1, $2, $3, $4, $5, $6, $7);			$pending = $8;			# TODO: fix hacky 19$yr			local( $lsdate ) = "$day-$mon-19$yr $hrs:$min";			local( $time ) = &main'lstime_to_time( $lsdate );			local( $type ) = '?';			local( $mode ) = 0;			local( $size ) = $commasize;			$size =~ s/,//g;			if( $file =~ m:(.*)/$: ){				$file = $1;				$type = 'd';					$size = 0;   # Don't believe the report size			}			else {				# (hopefully) a regular file				$type = 'f';			}						$currdir =~ s,/+,/,g;			$file =~ s,^/$match,,;			$file = "/$currdir/$file";			$file =~ s,/+,/,g;			return( substr( $file, 1 ), $size, $time, $type, $mode );		}		else {			printf( "Unmatched line: %s", $_ );		}	}	return( '', 0, 0, 0, 0 );}# --------------------- parse standard MACOS Unix-like ls output# for each file or directory line found return a tuple of# (pathname, size, time, type, mode)# pathname is a full pathname relative to the directory set by reset()# size is the size in bytes (this is always 0 for directories)# time is a Un*x time value for the file# type is "f" for a file, "d" for a directory and#         "l linkname" for a symlinksub lsparse'line_macos{	local( $fh ) = @_;	local( $non_crud, $perm_denied );	if( eof( $fh ) ){		return( "", 0, 0, 0 );	}	while( <$fh> ){		# Stomp on carriage returns		s/\015//g;		# I'm about to look at this at lot		study;		if( /^([\-rwxd]{10}).*\s(\d+\s+)?(\S+)\s+\d+\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)\n/ ){			local( $kind, $size, $lsdate, $file ) = ($1, $3, $4, $6);						local( $time ) = &main'lstime_to_time( $lsdate );			local( $type ) = '?';			local( $mode ) = 0;			if( $kind =~ /^-/ ){				# (hopefully) a regular file				$type = 'f';			}			elsif( $kind =~ /^d/ ){				$type = 'd';					$size = 0;   # Don't believe the report size			}						$currdir =~ s,/+,/,g;			$file =~ s,^/$match,,;			$file = "/$currdir/$file";			$file =~ s,/+,/,g;			return( substr( $file, 1 ), $size, $time, $type, $mode );		}		else {			printf( "Unmatched line: %s", $_ );		}	}	return( '', 0, 0, 0, 0 );}# --------------------- parse lsparse log file format# lsparse'line_lsparse() is for input in lsparse's internal form,# as it might have been written to a log file during a previous# run of a program that uses lsparse.  The format is:#     filename size time type mode# where size and time are in decimal, mode is in decimal or octal,# and type is one or two words.sub lsparse'line_lsparse{	local( $fh ) = @_;	if( $lsparse'readtime ){		alarm( $lsparse'readtime );	}	if( eof( $fh ) ){		alarm( 0 );		return( "", 0, 0, 0 );	}	while( <$fh> ){		if( /^(\S+)\s+(\d+)\s+(\d+)\s+((l\s+)?\S+)\s+(\d+)\n$/ ){			# looks good.			# note that $type is two words iff it starts with 'l'			local( $name, $size, $time, $type, $mode )				= ( $1, $2, $3, $4, $6 );						$mode = oct($mode) if $mode =~ /^0/;			return( $name, $size, $time, $type, $mode );		}		else {			printf( "Unmatched line: %s\n", $_ );		}	}	alarm( 0 );	return( '', 0, 0, 0, 0 );}# --------------------- Info-Mac all-files# -r     1974 Jul 21 00:06 00readme.txt# lr        3 Sep  8 08:34 AntiVirus -> vir# ...# This is the format used at sumex-aim.stanford.edu for the info-mac area.# (see info-mac/help/all-files.txt.gz).#sub lsparse'line_infomac{	local( $fh ) = @_;	if( $lsparse'readtime ){		alarm( $lsparse'readtime );	}	if( eof( $fh ) ){		alarm( 0 );		return( "", 0, 0, 0 );	}	while( <$fh> ){		next if /^;/;		if( /^([l-].)\s*(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)\n/ ){			local( $kind, $size, $lsdate, $file ) = ($1, $2, $3, $5);						local( $time ) = &main'lstime_to_time( $lsdate );			# This should be a symlink			if( $kind =~ /^l/ && $file =~ /(.*) -> (.*)/ ){				$file = $1;				$type = "l $2";			}			elsif( $kind =~ /^[\-F]/ ){				# (hopefully) a regular file				$type = 'f';			}			else {				printf( "Unparsable info-mac line: %s\n", $_ );				next;			}						return( $file, $size, $time, $type, 0444 );		}		else {			printf( "Unmatched line: %s\n", $_ );		}	}	alarm( 0 );	return( '', 0, 0, 0, 0 );}# --------------------- CTAN files list#    22670 Mon Jul 20 12:36:34 1992 pub/tex/biblio/bibtex/contrib/aaai-named.bst#sub lsparse'line_ctan{	local( $fh ) = @_;	if( $lsparse'readtime ){		alarm( $lsparse'readtime );	}	if( eof( $fh ) ){		alarm( 0 );		return( "", 0, 0, 0 );	}	while( <$fh> ){		if( /^\s*(\d+)\s+(\w\w\w\s+\w\w\w\s+\d+\s+\d+:\d+:\d+\s+\d+)\s+(.*)\n/ ){			local( $size, $lsdate, $file ) = ($1, $2, $3);						local( $time ) = &main'lstime_to_time( $lsdate );			return( $file, $size, $time, 'f', 0444 );		}		else {			printf( "Unmatched line: %s\n", $_ );		}	}	alarm( 0 );	return( '', 0, 0, 0, 0 );}# -----1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -