⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 creategatherer

📁 harvest是一个下载html网页得机器人
💻
📖 第 1 页 / 共 2 页
字号:
# EnumSpec with optionals file# this hashtable store RootNode optionsmy %EnumSpec = (	'Collection-Spec-Url' => {		'syntax' => 'URL=URL-Max[,URL-Filter-filename]',		'name' => 'URL-Max',		'paraDefault' => '1000',		'optionalFile' => 1		},	'Collection-Spec-Host' => {		'syntax' => 'Host=Host-Max[,Host-Filter-filename]',		'name' => 'Host-Max',		'paraDefault' => '1',		'optionalFile' => 1		},	'Collection-Spec-Access' => {		'syntax' => 'Access=HTTP|FTP|Gopher',		'name' => 'Access',		'paraDefault' => 'HTTP',		},	'Collection-Spec-Delay' => {		'syntax' => 'Delay=Seconds',		'name' => 'Delay',		'paraDefault' => '3',		},	'Collection-Spec-Depth' => {		'syntax' => 'Depth=Number',		'name' => 'Depth',		'paraDefault' => '0',		},	'Collection-Spec-Search' => {		'syntax' => 'Search=Breadth|Depth',		'name' => 'Search',		'paraDefault' => 'Breadth',		}	);#foreach my $spec (keys(%EnumSpec)){#print <<EOT;#DEBUG $spec#	name: $EnumSpec{$spec}{'name'}#	syntax: $EnumSpec{$spec}{'syntax'}#	paraDefault: $EnumSpec{$spec}{'paraDefault'}#EOT#print <<EOT if(defined($EnumSpec{$spec}{'optionalFile'}));#	optionalFile: $EnumSpec{$spec}{'optionalFile'}#EOT#}foreach my $spec (sort {$b cmp $a} (keys(%EnumSpec))){  $vals{$spec} = 'no';  &get_ans("Do you want to use ".$EnumSpec{$spec}{'syntax'},	   $spec);  next if($vals{$spec} =~/no/io);  my $name = $EnumSpec{$spec}{'name'};  my $code = 'Collection-'.$name;  $vals{$code} = $EnumSpec{$spec}{'paraDefault'};  &get_ans("Enter un value for ".$name,	   $code);  # result contruction  $result .= "\t$name=$vals{$code}";  next unless(defined($EnumSpec{$spec}{'optionalFile'}));  my $file = $code.'-File';  &get_ans("Enter un valid file or return to skip",	   $file);  next unless(defined($vals{$file}));  while(! -r $vals{$file}){    last if(-r $vals{$file});    print "The file $vals{$file} doesn't exist\n";	&get_ans("Enter a valid file name or return to skip",		$file);  }  # result contruction  $result .= ",$vals{$file}";}&hr;print <<EOT;   RootNode specifications$resultEOT$vals{'YorN'} = 'yes';	&get_ans("This line is it correct",		'YorN');return unless($vals{'YorN'} =~/[yes]/io);open(OUT,">>$file") || die("can't add url point in $file : $!");print OUT "$result\n";close(OUT);print <<EOT;   Added to $fileEOT}sub print_info {&hr();print <<EOT;   List of All variablesEOTforeach $key (keys %vals) {	next if ($key eq "YorN");	next if ($key eq "Skeleton-Directory");	next if $key =~ /mkdir/io;	printf "%21s: ", $key;	if ($vals{$key} eq "No_Default") {		print "none";	} else {		print $vals{$key};	}	print "\n";}}sub build_gatherer {my @files = ("RunGatherer","RunGatherd","collection.cf.head","collection.cf.tail");my @template_vars = ("Gatherer-Directory","Gatherer-Name","Gatherer-Port");&hr;print <<EOT;    Build the Gatherer	1. Create the directory $vals{'Gatherer-Directory'} if necessary	2. Add scripts to run-update the gatherer	3. Generate the $vals{'Gatherer-Name'}.cf config fileEOT$vals{"YorN"} = "yes";&get_ans("Should I build this Gatherer now?", "YorN");return if ($vals{"YorN"} !~ /^y/io);### Directoriesif($vals{"mkdir-group"}){mkdir($vals{'mkdir-group'}, 0755) || die	"Cannot make $vals{'mkdir-group'}: $!\n";}if($vals{"mkdir"}){mkdir($vals{'mkdir'}, 0755) || die	"Cannot make $vals{'mkdir'}: $!\n";}### skeletonforeach my $file (@files){	open(IN,"< $vals{'Skeleton-Directory'}/$file.in")	|| die "can't read $vals{'Skeleton-Directory'}/$file.in : $!";	open(OUT,"> $vals{'Gatherer-Directory'}/$file")	|| die "can't write $vals{'Gatherer-Directory'}/$file : $!";	while(<IN>){	foreach $s (@template_vars){		#print "AVANT: $_";		s/\@$s\@/$vals{$s}/g;		s/\@HARVEST_HOME\@/$ENV{'HARVEST_HOME'}/g;		#print "APRES: $_";	}	print OUT $_;	}	close(OUT);close(IN);}if(chmod(0755,"$vals{'Gatherer-Directory'}/RunGatherer","$vals{'Gatherer-Directory'}/RunGatherd") != 2){	die("can't change executable mode : $!"); }### fichier de confmy $file = "$vals{'Gatherer-Directory'}/$vals{'Gatherer-Name'}.cf";# entete de xxx.cf`cat $vals{'Gatherer-Directory'}/collection.cf.head > $file`;if ($? >> 8){ die("can't write in $file : $!") }$vals{"YorN"} = "yes";while (1) {        print "\n";        &get_ans("Would you like to add a URL point to the Gatherer now?","YorN");        last if ($vals{"YorN"} =~ /^n/io);        &add_url_point("$file");        $vals{"YorN"} = "no";}# pied de page de xxx.cf`cat $vals{'Gatherer-Directory'}/collection.cf.tail >> $file`;if ($? >> 8){ die("can't write in $file : $!") }unlink("$vals{'Gatherer-Directory'}/collection.cf.tail") || die("can't delete $vals{'Gatherer-Directory'}/collection.cf.tail : $!");unlink("$vals{'Gatherer-Directory'}/collection.cf.head") || die("can't delete $vals{'Gatherer-Directory'}/collection.cf.head : $!");if($vals{'Gatherer-Group'}){  my $dir = $vals{'Gatherer-Directory'};  $dir =~ s/^(.*)\/$vals{'Gatherer-Name'}$/$1/;  &hr;  print <<EOT;    Group files in $dir    Add informations in specific files for group GatherersEOT  %commands = (#	file			cmd	"$dir/RunGatherers" => 	'RunGatherer',	"$dir/RunGatherds" => 	'RunGatherd',  );  foreach my $f (keys(%commands)) {    &new_script($f,$commands{$f});  }}}sub new_script{# generate un script $file who exec $cmd in background modemy $file = shift;my $cmd = shift;die("function new_script must have 2 arguments") unless $file && $cmd;print <<EOT;   . file $fileEOTmy $date = `date`;chop($date);unless(-f $file){	open(OUT,"> $file") || die("can't write in file $file : $!");	print OUT <<EOT;#!/bin/sh### automatic genereted from $0 v$VERSION### $dateif [ \$USER ne 'harvest' ];then        cat <<End_of_text;    You execute this script under user $ENV{'USER'} !!!!!    You MUST run it with the user harvestEnd_of_textexit 1fiEOT} else {	open(OUT,">> $file") || die("can't write in file $file : $!");}print OUT <<EOT;$vals{'Gatherer-Directory'}/$cmd &EOTclose(OUT);chmod(0775,$file) || die("can't chmod(775) file $file : $!");}sub save_data {my $fileconf = "$ENV{'HARVEST_HOME'}/.CreateGatherer";my @list_key = (	'Gatherer-Host',	'Gatherer-Group',	'Gatherer-Port',	'Contact-Email'	);my $keynb = 0;### increase the Port$vals{'Gatherer-Port'}++;&hr;print <<EOT;   Save those datas in $fileconf:EOTforeach my $key (@list_key){  next unless $vals{"$key"};  printf("%21s : %s\n",$key,$vals{"$key"});}print "\n";$vals{"YorN"} = "yes";&get_ans("Should I save those datas ?", "YorN");return if ($vals{"YorN"} !~ /^y/io);open(OUT,"> $fileconf") || die("can't write in file $fileconf : $!");foreach my $key (@list_key){  next unless $vals{"$key"};  print OUT "$key\t\t".$vals{"$key"}."\n";  $keynb++;}close(OUT);print <<EOT;    $keynb Data saved !!!!EOT}sub hr {print <<EOT;   ------------------------------------------------EOT}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -