⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 manip.pm

📁 harvest是一个下载html网页得机器人
💻 PM
📖 第 1 页 / 共 5 页
字号:
    # A special regular expression for /YYYY:HH:MN:SS used by Apache    my($apachetime)='(/\d{4}):' . "$DD$hm$DD$ms$DD";    my($time)="";    $ampm="";    $date="";    # Substitute all special time expressions.    if (/(^|[^a-z])$timeexp($|[^a-z])/i) {      $tmp=$2;      $tmp=$Lang{$L}{"TimesH"}{lc($tmp)};      s/(^|[^a-z])$timeexp($|[^a-z])/$1 $tmp $3/i;    }    # Remove some punctuation    s/[,]/ /g;    # Make sure that ...7EST works (i.e. a timezone immediately following    # a digit.    s/(\d)$zone(\s+|$|[0-9])/$1 $2$3/i;    $zone = '\s+'.$zone;    # Remove the time    $iso=1;    $midnight=0;    $from="24${hm}00(?:${ms}00)?";    $falsefrom="${hm}24${ms}00";   # Don't trap XX:24:00    $to="00${hm}00${ms}00";    $midnight=1  if (!/$falsefrom/  &&  s/$from/$to/);    $h=$mn=$s=0;    if (/$D$mnsec/i || /$ampmexp/i) {      $iso=0;      $tmp=0;      $tmp=1  if (/$mnsec$zone2?\s*$/i);  # or /$mnsec$zone/ ??      $tmp=0  if (/$ampmexp/i);      if (s/$apachetime$zone()/$1 /i  ||          s/$apachetime$zone2?/$1 /i  ||          s/(^|[^a-z])$at\s*$D$mnsec$zone()/$1 /i  ||          s/(^|[^a-z])$at\s*$D$mnsec$zone2?/$1 /i  ||          s/(^|[^0-9])(\d)$mnsec$zone()/$1 /i ||          s/(^|[^0-9])(\d)$mnsec$zone2?/$1 /i ||          (s/(t)$D$mnsec$zone()/$1 /i and (($iso=-$tmp) || 1))  ||          (s/(t)$D$mnsec$zone2?/$1 /i and (($iso=-$tmp) || 1))  ||          (s/()$DD$mnsec$zone()/ /i and (($iso=$tmp) || 1)) ||          (s/()$DD$mnsec$zone2?/ /i and (($iso=$tmp) || 1))  ||          s/(^|$at\s*|\s+)$D()()\s*$ampmexp$zone()/ /i  ||          s/(^|$at\s*|\s+)$D()()\s*$ampmexp$zone2?/ /i  ||          0         ) {        ($h,$mn,$s,$ampm,$z,$z2)=($2,$3,$4,$5,$6,$7);        if (defined ($z)) {          if ($z =~ /^[+-]\d{2}:\d{2}$/) {            $z=~ s/://;          } elsif ($z =~ /^[+-]\d{2}$/) {            $z .= "00";          }        }        $time=1;        &Date_TimeCheck(\$h,\$mn,\$s,\$ampm);        $y=$m=$d="";        # We're going to be calling TimeCheck again below (when we check the        # final date), so get rid of $ampm so that we don't have an error        # due to "15:30:00 PM".  It'll get reset below.        $ampm="";        if (/^\s*$/) {          &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});          last PARSE;        }      }    }    $time=0  if ($time ne "1");    s/\s+$//;    s/^\s+//;    # dateTtime ISO 8601 formats    my($orig)=$_;    s/t$//i  if ($iso<0);            # Parse ISO 8601 dates now (which may still have a zone stuck to it).    if ( ($iso && /^[0-9-]+(W[0-9-]+)?$zone?$/i)  ||         ($iso && /^[0-9-]+(W[0-9-]+)?$zone2?$/i)  ||         ($iso && /^[0-9-]+(T[0-9-]+)?$zone?$/i)  ||         ($iso && /^[0-9-]+(T[0-9-]+)?$zone2?$/i)  ||         0) {      # ISO 8601 dates      s,-, ,g;            # Change all ISO8601 seps to spaces      s/^\s+//;      s/\s+$//;      if (/^$D4\s*$DD\s*$DD\s*t?$DD(?:$DD(?:$DD\d*)?)?$zone2?$/i  ||          /^$D4\s*$DD\s*$DD\s*t?$DD(?:$DD(?:$DD\d*)?)?$zone?()$/i ||          /^$DD\s+$DD\s*$DD\s*t?$DD(?:$DD(?:$DD\d*)?)?$zone2?$/i  ||          /^$DD\s+$DD\s*$DD\s*t?$DD(?:$DD(?:$DD\d*)?)?$zone?()$/i ||          0         ) {        # ISO 8601 Dates with times        #    YYYYMMDDHHMNSSFFFF        #    YYYYMMDDHHMNSS        #    YYYYMMDDHHMN        #    YYYYMMDDHH        #    YY MMDDHHMNSSFFFF        #    YY MMDDHHMNSS        #    YY MMDDHHMN        #    YY MMDDHH        ($y,$m,$d,$h,$mn,$s,$tmp,$z2)=($1,$2,$3,$4,$5,$6,$7,$8);        if ($h==24 && (! defined $mn || $mn==0) && (! defined $s || $s==0)) {          $h=0;          $midnight=1;        }        $z=""    if (! $h);        return ""  if ($tmp  and  $z);        $z=$tmp    if ($tmp  and  $tmp);        return ""  if ($time);        last PARSE;      } elsif (/^$D4(?:\s*$DD(?:\s*$DD)?)?$/  ||               /^$DD(?:\s+$DD(?:\s*$DD)?)?$/) {        # ISO 8601 Dates        #    YYYYMMDD        #    YYYYMM        #    YYYY        #    YY MMDD        #    YY MM        #    YY        ($y,$m,$d)=($1,$2,$3);        last PARSE;      } elsif (/^$YY\s+$D\s+$D/) {        # YY-M-D        ($y,$m,$d)=($1,$2,$3);        last PARSE;      } elsif (/^$YY\s*W$DD\s*(\d)?$/i) {        # YY-W##-D        ($y,$wofm,$dofw)=($1,$2,$3);        ($y,$m,$d)=&Date_NthWeekOfYear($y,$wofm,$dofw);        last PARSE;      } elsif (/^$D4\s*(\d{3})$/ ||               /^$DD\s*(\d{3})$/) {        # YYDOY        ($y,$which)=($1,$2);        ($y,$m,$d)=&Date_NthDayOfYear($y,$which);        last PARSE;      } elsif ($iso<0) {        # We confused something like 1999/August12:00:00        # with a dateTtime format        $_=$orig;      } else {        return "";      }    }    # All deltas that are not ISO-8601 dates are NOT dates.    return ""  if ($Curr{"InCalc"}  &&  $delta);    if ($delta) {      &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});      return &DateCalc_DateDelta($Curr{"Now"},$delta);    }    # Check for some special types of dates (next, prev)    foreach $from (keys %{ $Lang{$L}{"Repl"} }) {      $to=$Lang{$L}{"Repl"}{$from};      s/(^|[^a-z])$from($|[^a-z])/$1$to$2/i;    }    if (/$wom/i  ||  /$future/i  ||  /$later/i  ||  /$past/i  ||        /$next/i  ||  /$prev/i  ||  /^$week$/i  ||  /$wkabb/i) {      $tmp=0;      if (/^$wom\s*$week$of\s*$month\s*$YY?$/i) {        # last friday in October 95        ($wofm,$dofw,$m,$y)=($1,$2,$3,$4);        # fix $m, $y        return ""  if (&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk));        $dofw=$week{lc($dofw)};        $wofm=$wom{lc($wofm)};        # Get the first day of the month        $date=&Date_Join($y,$m,1,$h,$mn,$s);        if ($wofm==-1) {          $date=&DateCalc_DateDelta($date,"+0:1:0:0:0:0:0",\$err,0);          $date=&Date_GetPrev($date,$dofw,0);        } else {          for ($i=0; $i<$wofm; $i++) {            if ($i==0) {              $date=&Date_GetNext($date,$dofw,1);            } else {              $date=&Date_GetNext($date,$dofw,0);            }          }        }        last PARSE;      } elsif (/^$last$day$of\s*$month(?:$of?\s*$YY)?/i) {        # last day in month        ($m,$y)=($1,$2);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $y=&Date_FixYear($y)  if (! defined $y  or  length($y)<4);        $m=$month{lc($m)};        $d=&Date_DaysInMonth($m,$y);        last PARSE;      } elsif (/^$week$/i) {        # friday        ($dofw)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&Date_GetPrev($Curr{"Now"},$Cnf{"FirstDay"},1);        $date=&Date_GetNext($date,$dofw,1,$h,$mn,$s);        last PARSE;      } elsif (/^$next\s*$week$/i) {        # next friday        ($dofw)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&Date_GetNext($Curr{"Now"},$dofw,0,$h,$mn,$s);        last PARSE;      } elsif (/^$prev\s*$week$/i) {        # last friday        ($dofw)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&Date_GetPrev($Curr{"Now"},$dofw,0,$h,$mn,$s);        last PARSE;      } elsif (/^$next$wkabb$/i) {        # next week        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"+0:0:1:0:0:0:0",\$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^$prev$wkabb$/i) {        # last week        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"-0:0:1:0:0:0:0",\$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^$next$mabb$/i) {        # next month        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"+0:1:0:0:0:0:0",\$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^$prev$mabb$/i) {        # last month        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"-0:1:0:0:0:0:0",\$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^$future\s*(\d+)$day$/i  ||               /^(\d+)$day$later$/i) {        # in 2 days        # 2 days later        ($num)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"+0:0:0:$num:0:0:0",                                  \$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^(\d+)$day$past$/i) {        # 2 days ago        ($num)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"-0:0:0:$num:0:0:0",                                 \$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^$future\s*(\d+)$wkabb$/i  ||               /^(\d+)$wkabb$later$/i) {        # in 2 weeks        # 2 weeks later        ($num)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"+0:0:$num:0:0:0:0",                                  \$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^(\d+)$wkabb$past$/i) {        # 2 weeks ago        ($num)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"-0:0:$num:0:0:0:0",                                 \$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^$future\s*(\d+)$mabb$/i  ||               /^(\d+)$mabb$later$/i) {        # in 2 months        # 2 months later        ($num)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"+0:$num:0:0:0:0:0",                                  \$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^(\d+)$mabb$past$/i) {        # 2 months ago        ($num)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"-0:$num:0:0:0:0:0",                                  \$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^$week$future\s*(\d+)$wkabb$/i  ||               /^$week\s*(\d+)$wkabb$later$/i) {        # friday in 2 weeks        # friday 2 weeks later        ($dofw,$num)=($1,$2);        $tmp="+";      } elsif (/^$week\s*(\d+)$wkabb$past$/i) {        # friday 2 weeks ago        ($dofw,$num)=($1,$2);        $tmp="-";      } elsif (/^$future\s*(\d+)$wkabb$on$week$/i  ||               /^(\d+)$wkabb$later$on$week$/i) {        # in 2 weeks on friday        # 2 weeks later on friday        ($num,$dofw)=($1,$2);        $tmp="+"      } elsif (/^(\d+)$wkabb$past$on$week$/i) {        # 2 weeks ago on friday        ($num,$dofw)=($1,$2);        $tmp="-";      } elsif (/^$week\s*$wkabb$/i) {        # monday week    (British date: in 1 week on monday)        $dofw=$1;        $num=1;        $tmp="+";      } elsif (/^$now\s*$wkabb$/i) {        # today week     (British date: 1 week from today)        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},"+0:0:1:0:0:0:0",\$err,0);        $date=&Date_SetTime($date,$h,$mn,$s)  if (defined $h);        last PARSE;      } elsif (/^$offset\s*$wkabb$/i) {        # tomorrow week  (British date: 1 week from tomorrow)        ($offset)=($1);        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $offset=$Lang{$L}{"OffsetH"}{lc($offset)};        $date=&DateCalc_DateDelta($Curr{"Now"},$offset,\$err,0);        $date=&DateCalc_DateDelta($date,"+0:0:1:0:0:0:0",\$err,0);        if ($time) {          return ""            if (&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk));          $date=&Date_SetTime($date,$h,$mn,$s);        }        last PARSE;      }      if ($tmp) {        &Date_Init()  if (! $Cnf{"UpdateCurrTZ"});        $date=&DateCalc_DateDelta($Curr{"Now"},                                  $tmp . "0:0:$num:0:0:0:0",\$err,0);        $date=&Date_GetPrev($date,$Cnf{"FirstDay"},1);        $date=&Date_GetNext($date,$dofw,1,$h,$mn,$s);        last PARSE;      }    }    # Change (2nd, second) to 2    $tmp=0;    if (/(^|[^a-z0-9])$dom($|[^a-z0-9])/i) {      if (/^\s*$dom\s*$/) {        ($d)=($1);        $d=$dom{lc($d)};        $m=$Curr{"M"};        last PARSE;      }      $tmp=lc($2);      $tmp=$dom{"$tmp"};      s/(^|[^a-z])$dom($|[^a-z])/$1 $tmp $3/i;      s/^\s+//;      s/\s+$//;    }    # Another set of special dates (Nth week)    if (/^$D\s*$week(?:$of?\s*$YY)?$/i) {      # 22nd sunday in 1996      ($which,$dofw,$y)=($1,$2,$3);      $y=$Curr{"Y"}  if (! $y);      $tmp=&Date_GetNext("$y-01-01",$dofw,0);      if ($which>1) {        $tmp=&DateCalc_DateDelta($tmp,"+0:0:".($which-1).":0:0:0:0",\$err,0);      }      ($y,$m,$d)=(&Date_Split($tmp))[0..2];      last PARSE;    } elsif (/^$week$wkabb\s*$D(?:$of?\s*$YY)?$/i  ||             /^$week\s*$D$wkabb(?:$of?\s*$YY)?$/i) {      # sunday week 22 in 1996      # sunday 22nd week in 1996      ($dofw,$which,$y)=($1,$2,$3);      ($y,$m,$d)=&Date_NthWeekOfYear($y,$which,$dofw);      last PARSE;    }    # Get rid of day of week    if (/(^|[^a-z])$week($|[^a-z])/i) {      $wk=$2;      (s/(^|[^a-z])$week,/$1 /i) ||        s/(^|[^a-z])$week($|[^a-z])/$1 $3/i;      s/^\s+//;      s/\s+$//;    }    {      # So that we can handle negative epoch times, let's convert      # things like "epoch -" to "epochNEGATIVE " before we strip out      # the $sep chars, which include '-'.      s,epoch\s*-,epochNEGATIVE ,g;      # Non-ISO8601 dates      s,\s*$sep\s*, ,g;     # change all non-ISO8601 seps to spaces      s,^\s*,,;             # remove leading/trailing space      s,\s*$,,;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -