📄 sgml.sum
字号:
foreach $att (sort keys %ATT) { $val = $ATT{$att}; chop $val if ($val =~ /\n$/); # chop last newline if present next if ($val eq ""); next if ($att =~ /[ \t]/); # don't output atts with whitespace print $att . "{" . length($val) . "}:\t"; print $val . "\n";}@t = times;$dt = time - $start_t;printf STDERR ("SGML.sum $doctype: %d tags, time: %.2fu %.2fs (%.2fu %.2fs), %02d:%02d\n", $ntags, $t[0], $t[1], $t[2], $t[3], $dt / 60, $dt % 60) if ($print_times);exit 0;#========================================================================# SUBROUTINESsub parse_sgml { local ($tag, %VARS) = @_; local ($buf); # holds data content for current tag local (%NEWVARS); # holds SGML attrs for next tag local ($pass); local (@atts, $att); local ($k); $r_depth++; print STDERR "parse_sgml: Recursion Level $r_depth\n" if ($debug); foreach $k (keys %VARS) { foreach $att (&select_vars ("$tag:$k", %VARS)) { &add_to_attribute ($VARS{$k}, $att); } } while (<SGMLS>) { # Hrvoje Stipetic HS - few changes in this block # Optional: convert octal char codes to 8-bit chars # (helps on systems without locale support) s/\\{0}\\(\d\d\d)/sprintf("%c", oct $1)/ge; print STDERR "sgmls> $_" if ($debug); if (/^\((.*)$/) { # SGML start GI $buf .= &parse_sgml ($1, %NEWVARS); $ntags++; undef %NEWVARS; # no undef - bug - HS } last if (/^\)$tag$/); # SGML end GI if (/^A(\S+) (\S+) ?(.*)$/) { # SGML attr # There is no second space if IMPLIED - HS next if ($2 eq 'IMPLIED'); $NEWVARS{$1} = $3; } $buf .= $1 if (/^-(.*)$/); # SGML data } # end tag found, choose SOIF attrs # @atts = &select_content ($tag, %VARS); local ($pass) = $#atts == $[-1 ? 1 : 0; foreach $att (@atts) { print STDERR "parse_sgml: Adding text to attribute '$att'\n" if ($debug); ($pass = 1, next) if ($att eq 'parent'); ($pass = 0, last) if ($att eq 'ignore'); &add_to_attribute ($buf, $att); } print STDERR "parse_sgml: Returning from level $r_depth\n" if ($debug); $r_depth--; return undef unless ($pass); $buf .= "\n"; $buf;}# Selects output SOIF attribute based on SGML tag and variables# Only used to print _CONTENT_ between tags, ie: <B>content</B># Would not be called on HTML tags such as IMG, META which have no content#sub select_content { local ($tag, %VARS) = @_; local ($t); local (@s); local (@r) = (); local ($s); local ($av); local ($a); local ($v); print STDERR "tag=$tag\n" if ($debug); tagloop: foreach $t (@soifkeys) { print STDERR "select_content: checking SOIF{$t}...\n" if ($debug); @s = split (',', $t); # s[0] is tag, s[1..n] are att=val $s = shift @s; print STDERR "select_content: checking '$tag' eq '$s'\n" if ($debug); next unless ($tag eq $s); foreach $av (@s) { ($a,$v) = split (/=/, $av); print STDERR "A=$a\tV=$v\tVARS{$a}=$VARS{$a}\n" if ($debug); next tagloop unless ($VARS{$a} eq $v); } @r = split (/,/, $SOIF{$t}); last; } print STDERR "select_content: Returning (@r)\n" if ($debug); return @r;}# Selects output SOIF attribute based on SGML tag, variables# Only used to find entries of the form TAG:VARIABLE in the table#sub select_vars { local ($tagvar, %VARS) = @_; local (@atts) = (); local ($att); local ($t); local (@s); local ($s); local ($a); local ($v); local ($av); print STDERR "tagvar=$tagvar\n" if ($debug); # Support for HTML.sum table entry like this one: # <TAG:ATTRX,ATTRY=VALUE> soifattr1,soifattr2 # Primary goal is to support Dublin Core. (HS) tagloop: foreach $t (@soifkeys) { @s = split (',', $t); # s[0] is tag:var, s[1..n] are att=val $s = shift @s; if ($s eq $tagvar) { foreach $av (@s) { ($a,$v) = split (/=/, $av); print STDERR "A=$a\tV=$v\tVARS{$a}=$VARS{$a}\n" if ($debug); next tagloop unless ($VARS{$a} eq $v); } foreach $att (split (/,/, $SOIF{$t})) { if ($att =~ /\$/) { # $NAME, $HTTP-EQUIV $att =~ s/\$//; if (defined $VARS{$att}) { $att = $VARS{$att}; $att =~ tr/A-Z/a-z/; # Fix SOIF attribute names $att =~ s/[^\w-]/_/g if ($fix_attrib_names); # Add custom prefix for META # generated SOIF attributes $att = $meta_prefix . $att if ($tagvar =~ /^META/); push (@atts, $att); } } else { push (@atts, $att); } } last; } } @atts;}# Add a string to an attribute#sub add_to_attribute { local ($buf, $a) = @_; return if ($a eq 'ignore' || $a eq ''); print STDERR "ADDING: $buf\n" if ($debug); print STDERR " TO: $a\n" if ($debug); $buf = &wrapline ($buf) if (length($buf) < $MAXWRAPSIZE); $ATT{$a} .= $buf . "\n";}# Build a temp filename#sub tempnam { srand time; local ($f) = sprintf ("sgml%d%06x", $$, rand (0xFFFFFF)); return "$ENV{'TMPDIR'}/$f" if (defined $ENV{'TMPDIR'} && -d $ENV{'TMPDIR'}); return "/var/tmp/$f" if (-d "/var/tmp"); return "/tmp/$f";}sub temperrnam { srand time; local ($f) = sprintf ("err%d%06x", $$, rand (0xFFFFFF)); return "$ENV{'TMPDIR'}/$f" if (defined $ENV{'TMPDIR'} && -d $ENV{'TMPDIR'}); return "/var/tmp/$f" if (-d "/var/tmp"); return "/tmp/$f";}# Parse cmdline options. Taken from /usr/local/lib/perl/getopts.pl#sub Getopts { local($argumentative) = @_; local(@args,$_,$first,$rest); local($errs) = 0; local($[) = 0; local($pos) = 0; @args = split( / */, $argumentative ); while(@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) { ($first,$rest) = ($1,$2); $pos = index($argumentative,$first); if($pos >= $[) { if($args[$pos+1] eq ':') { shift(@ARGV); if($rest eq '') { ++$errs unless @ARGV; $rest = shift(@ARGV); } eval "\$opt_$first = \$rest;"; } else { eval "\$opt_$first = 1"; if($rest eq '') { shift(@ARGV); } else { $ARGV[0] = "-$rest"; } } } else { print STDERR "Unknown option: $first\n"; ++$errs; if($rest ne '') { $ARGV[0] = "-$rest"; } else { shift(@ARGV); } } } $errs == 0;}# Do wraparound for long lines.# NOTE, this function can severely munge the spacing of a line. The first# thing it does is split all words on whitespace and then re-joins# the words separated by single spaces. But we think this is# probably OK. SOIF is intendted to be used as indexing data,# and not necessarily displayable for human consumption.#sub wrapline { local ($_) = @_; return $_ if length $_ < 72; $_ = join (" ", split) . "\n"; local ($l) = length $_; local ($i) = 0; local ($j); local ($s1); local ($s2); while (($l - $i) > 72) { $j = rindex ($_, ' ', $i + 72); $j = index ($_, ' ', $i) if ($j <= $i); $j = $l if ($j < $i); $s1 = substr ($_, 0, $j); $s2 = substr ($_, $j+1); $_ = $s1 . "\n" . $s2; $i = $j; } $_;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -