check-~1.pl

来自「一百个病毒的源代码包括熊猫烧香等极其具有研究价值」· PL 代码 · 共 266 行
266 行
#! /usr/bin/perl -w## Copyright 1999/2000  August Hoerandl (august.hoerandl@gmx.at)#                 http://elina.htlw16.ac.at/~hoerandl## parts from search.pl which is# Copyright 1999 Jose M. Vidal# Jose M. Vidal, vidal@multiagent.com, http://jmvidal.ece.sc.edu## This program is free software.  You can redistribute it and/or modify# it under the terms of the GNU General Public License## create database with meta info ## actions#  read database#    read to @allRecords#  read metadata#    read to $meta{$URL}, $date{$URL}, $error{$URL} #  check all (expired) urls#    based on $date{$URL}#  dump metadata #  dump database with meta info#  dump errordata## History#  0.1 #  4. Sept. 1999 - first version#  0.2#  12. Sept. 1999 - little bug with multiple line meta corrected#                   added option & getoptions#  0.3#  21. Sept. 1999 - merged back file detection from Jose##  0.4#  26. Oct 1999 - added code for: meta Classification#  31. Oct 1999 - added noproxy, showrecheck, $time, error.html##  0.5 #  12. Nov 1999 - added  checkheadline#                 strip metainfo (multiple blanks, html)#                 convert iso#   #  26. Nov. 1999 - added $urlto, $urlmaxsize#  29. Nov. 1999 - added $agent#  23. Jan. 2000 - fixed bug which used up all memory #                  added <meta value="..."> and <meta contents="...">#  10. Feb. 2000 - use LWP::Parallel::UserAgent# http://theoryx5.uwinnipeg.ca/CPAN/data/ParallelUserAgent/Bundle/ParallelUA.html#                  install: perl -MCPAN -e 'install Bundle::ParallelUA' #                  --maxpar number of host to query in parallel ##  11. Feb. 2000 - fixed dump_database (replaces $error)##  20. Feb. 2000 - fixed make_url  #                  (thanks to Andreas Hille for beta testing)#                  urlmaxsize to handle very big pages#                  urlmaxresponse to handle endless small answers     #  26. Feb. 2000 - added sorting (errorcode) to dump_errordata_html##   4. Mar. 2000 - added  $req->previous / $urldata to allow redirection##  22. Mar. 2000 - added sorting to dump_errordata_html (errorcount)$version = "0.65";$database   = "/usr/local/httpd/cgi-bin/urls.db";      # Name of db file.$newdb = "";             # result: database$metadata = "meta.db";   # result: metainfo$errordata = "error.db"; # result: urls with error$error_recheck = 10; # number of days for recheck after error$age_recheck = 40;  # number of days for recheck # add some randomness - so we won't fetch all urls on the same day $random = .5;   # random number of days added (factor)$dumpfreq = 10;  # when to dump metadata (number of urls read) $maxurls = 2;   # max url to fetch recursive (frames)$maxcheck = 1000; # max number of urls to check in one run$sleep = 2; # time to sleep between url requests - not supported !!!$debug = 0; # set to 1 for debug output$showrecheck = 0; #  set to 1 to see next recheck$maxpar = 10; # maximum requests in parallel$showhash = 1; # show hashes on progress$checkerror = 0; # check all urls with errorcount > 0$reseterror = 0; # reset all error counters - only with $checkerror$checknometa = 0; # check all urls with meta data = ""$checkheadline = 0; # add <h1>, <h2>, ... to meta info - see $whichheadline$whichheadline = "1-2"; # which headlines to check$urlto = 0; # timeout for getting url (seconds)$urlmaxsize = 0; # max size of page to get$urlmaxresponse = 500; # max number of responses (parts)# useragent to send in http request ("spider name")$agent = "check-meta - spider"; $noproxy = 0; # dont use proxy from environment# lines shouldn磘 get too long $maxlinelength = 4*1024; # max line length during get meta$test1 = 0; # simple test$test2 = 0; # more tests$test3 = 0; # even more tests$test4 = 0; # even more tests# ------------------------------------------------------------------# no user serviceable parts below# ------------------------------------------------------------------use LWP;use HTTP::Request::Common;use Getopt::Long;require LWP::Parallel::UserAgent;use HTTP::Request; use LWP::Parallel::UserAgent qw(:CALLBACK);#use LWP::Debug qw(+);#require LWP::Debug;# ------------------------------------------------------------------$numlines_bk2site = 9;   #  number of lines - standard$numlines_metadata = 10; #  number of lines - with metadata# ------------------------------------------------------------------$countmeta = 0;$counthead = 0;# ------------------------------------------------------------------sub norm_url {  local($url) = @_;  my $req = HTTP::Request->new('GET', $url);  return $req->url;}# ------------------------------------------------------------------sub read_database {  open(DB,"<$database") ||    die ("Can't open $database; $!");  $db_line1 = <DB>;  chop($db_line1);  if ($db_line1 eq "#bk2site urls.db generated file"){    printf "bk2site urls.db generated file\n" if $debug;    $numlines = $numlines_bk2site;  }  else {    printf "bk2site urls.db has already metadata\n" if $debug;    $numlines = $numlines_metadata;  };  $db_line2 = <DB>;  $db_line3 = <DB>;  $db_line4 = <DB>;  @allRecords = <DB>; # there are numlines lines per record,                      # so mutiply index by 8  close(DB);  ## 0 is the type  ## 1 is the ParentTitle  ## 2 is the url or relative dir  ## 3 is the Title  ## 4 is the comment  ## 5 is the creation time  ## 6 is the last modified time  ## 7 is the last visit time  ## 8 is the number of user hits  ## 9 is the meta info  $typeN = 0;  #  $parentTitleN = 1;  $urlN = 2;  #  $titleN = 3;  #  $commentN = 4;  #  $creationtimeN = 5;  #  $modifiedtimeN = 6;  #  $visittimeN = 7;  #  $urlhitsN = 8;  #  $numlines = 10; ##0-9, as above  #  my $numRecords = 15;  # test only  my $numRecords = ($#allRecords + 1)/$numlines;  my $i;  for ($i=0; $i < $numRecords; ++$i){ #initialize meta    my $cr = ($i * $numlines);    next if (($allRecords[$cr + $typeN]) eq "FOLDER\n");    my $url =    $allRecords[$cr + $urlN];    chop($url);    $url = &norm_url($url);     print "url $url added\n" if $debug;    $error{$url} = 0;    $date{$url} = 0;    $meta{$url} = "";  }}sub dump_database {  print "\ndumping database $newdb\n";  open(DB,">$newdb") ||    die ("Can't open $newdb; $!");#  print DB "#bk2site urls.db generated file metadata";  if ($db_line1 eq "#bk2site urls.db generated file") {    print DB "#bk2site urls.db generated file metadata\n";  }  else {    print DB "$db_line1\n";  }  print DB "$db_line2";  print DB "$db_line3";  print DB "$db_line4";  my $numRecords = ($#allRecords + 1)/$numlines;  for ($i=0; $i < $numRecords; $i++) {    my $cr = ($i * $numlines);    my $url = $allRecords[$cr + $urlN];    chop($url);    for($j = 0; $j <  $numlines_bk2site; $j++) {      print DB "$allRecords[$cr + $j]";    }    if (defined $error{$url} && $error{$url} > 0) {      print DB "\n";    } elsif (defined $meta{$url}) {      print DB "$meta{$url}\n";    } else {      print DB "\n";    }  }  close DB;}# ------------------------------------------------------------------# add url + filenamesub make_url {  local($url, $file) = @_;  print "\nmake url [$url][$file]\n" if $debug;   if ( $file =~ /^http/) { # full url    return $file;  } elsif ( $file =~ /^\//) {  # filename starting with /    $url =~ /(.*\/\/[^\/]*)\//; # get only host part    return $1 . $file;  }  if ( $url =~ /\/$/ ) { # url ends with /    return $url . $file;  } elsif( $url =~ /(.*\/)[^\/]+/ ) { # url with ..../file.ext    # print "+\n";    return $1 . $file;  } else {    return $url . "/" . $file;  }}sub conv_iso {  local($line) = @_;  $line =~ s/&AElig;/
check-~1.pl - 源码说明

本页面展示了「一百个病毒的源代码包括熊猫烧香等极其具有研究价值」中的 check-~1.pl 源码文件，采用 PL 编程语言编写，共 266 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与病毒相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?