⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgmls.pl

📁 harvest是一个下载html网页得机器人
💻 PL
字号:
#! /usr/bin/perl# This is a skeleton of a perl script for processing the output of# sgmls.  You must change the parts marked with "XXX".# XXX This is for troff: in data, turn \ into \e (which prints as \).# Backslashes in SDATA entities are left as backslashes.$backslash_in_data = "\\e";$prog = $0;$prog =~ s|.*/||;$level = 0;while (<STDIN>) {    chop;    $command = substr($_, 0, 1);    substr($_, 0, 1) = "";    if ($command eq '(') {	&start_element($_);	$level++;    }    elsif ($command eq ')') {	$level--;	&end_element($_);	foreach $key (keys %attribute_value) {	    @splitkey = split($;, $key);	    if ($splitkey[0] == $level) {		delete $attribute_value{$key};		delete $attribute_type{$key};	    }	}    }    elsif ($command eq '-') {	&unescape_data($_);	&data($_);    }    elsif ($command eq 'A') {	@field = split(/ /, $_, 3);	$attribute_type{$level,$field[0]} = $field[1];	&unescape_data($field[2]);	$attribute_value{$level,$field[0]} = $field[2];    }    elsif ($command eq '&') {	&entity($_);    }    elsif ($command eq 'D') {	@field = split(/ /, $_, 4);	$data_attribute_type{$field[0], $field[1]} = $field[2];	&unescape_data($field[3]);	$data_attribute_value{$field[0], $field[1]} = $field[3];    }    elsif ($command eq 'N') {	$notation{$_} = 1;	if (defined($sysid)) {	    $notation_sysid{$_} = $sysid;	    undef($sysid);	}	if (defined($pubid)) {	    $notation_pubid{$_} = $pubid;	    undef($pubid);	}    }    elsif ($command eq 'I') {        @field = split(/ /, $_, 3);	$entity_type{$field[0]} = $field[1];	&unescape($field[2]);	# You may want to substitute \e for \ if the type is CDATA.	$entity_text{$field[0]} = $field[2];	$entity_code{$field[0]} = 'I';    }    elsif ($command eq 'E') {	@field = split(/ /, $_);	$entity_code{$field[0]} = 'E';	$entity_type{$field[0]} = $field[1];	$entity_notation{$field[0]} = $field[2];	if (defined(@files)) {	    foreach $i (0..$#files) {		$entity_filename{$field[0], $i} = $files[i];	    }	    undef(@files);	}	if (defined($sysid)) {	    $entity_sysid{$field[0]} = $sysid;	    undef($sysid);	}	if (defined($pubid)) {	    $entity_pubid{$field[0]} = $pubid;	    undef($pubid);	}    }    elsif ($command eq 'S') {	$entity_code{$_} = 'S';	if (defined(@files)) {	    foreach $i (0..$#files) {		$entity_filename{$_, $i} = $files[i];	    }	    undef(@files);	}	if (defined($sysid)) {	    $entity_sysid{$_} = $sysid;	    undef($sysid);	}	if (defined($pubid)) {	    $entity_pubid{$_} = $pubid;	    undef($pubid);	}    }    elsif ($command eq '?') {	&unescape($_);	&pi($_);    }    elsif ($command eq 'L') {	@field = split(/ /, $_);	$lineno = $field[0];	if ($#field >= 1) {	    &unescape($field[1]);	    $filename = $field[1];	}    }    elsif ($command eq 'V') {	@field = split(/ /, $_, 2);	&unescape($field[1]);	$environment{$field[0]} = $field[1];    }    elsif ($command eq '{') {	&start_subdoc($_);    }    elsif ($command eq '}') {	&end_subdoc($_);    }    elsif ($command eq 'f') {	&unescape($_);	push(@files, $_);    }    elsif ($command eq 'p') {	&unescape($_);	$pubid = $_;    }    elsif ($command eq 's') {	&unescape($_);	$sysid = $_;    }    elsif ($command eq 'C') {	$conforming = 1;    }    else {	warn "$prog:$ARGV:$.: unrecognized command \`$command'\n";    }}sub unescape {    $_[0] =~ s/\\([0-7][0-7]?[0-7]?|.)/&esc($1)/eg;}sub esc {    local($_) = $_[0];    if ($_ eq '012' || $_ eq '12') {	"";			# ignore RS    }    elsif (/^[0-7]/) {	sprintf("%c", oct);    }    elsif ($_ eq 'n') {	"\n";    }    elsif ($_ eq '|') {	"";    }    elsif ($_ eq "\\") {	"\\";    }    else {	$_;    }}sub unescape_data {    local($sdata) = 0;    $_[0] =~ s/\\([0-7][0-7]?[0-7]?|.)/&esc_data($1)/eg;}sub esc_data {    local($_) = $_[0];    if ($_ eq '012' || $_ eq '12') {	"";			# ignore RS    }    elsif (/^[0-7]/) {	sprintf("%c", oct);    }    elsif ($_ eq 'n') {	"\n";    }    elsif ($_ eq '|') {	$sdata = !$sdata;	"";    }    elsif ($_ eq "\\") {	$sdata ? "\\" : $backslash_in_data;    }    else {	$_;    }}sub start_element {    local($gi) = $_[0];    # XXX}sub end_element {    local($gi) = $_[0];    # XXX}sub data {    local($data) = $_[0];    # XXX}# A processing instruction.sub pi {    local($data) = $_[0];    # XXX}# A reference to an external entity.sub entity {    local($name) = $_[0];    # XXX}sub start_subdoc {    local($name) = $_[0];    # XXX}sub end_subdoc {    local($name) = $_[0];    # XXX}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -