📄 html.pm
字号:
} elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) { print $fh $tagmap->{$tagname} || next; my @to_unget; while(1) { push @to_unget, $self->get_token; last if $to_unget[-1]->is_end and $to_unget[-1]->tagname eq $tagname; # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens) } my $name = $self->linearize_tokens(@to_unget); print $fh "<a "; print $fh "class='u' href='#___top' title='click to go to top of document'\n" if $tagname =~ m/^head\d$/s; if(defined $name) { my $esc = esc( $self->section_name_tidy( $name ) ); print $fh qq[name="$esc"]; DEBUG and print "Linearized ", scalar(@to_unget), " tokens as \"$name\".\n"; push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name] if $ToIndex{ $tagname }; # Obviously, this discards all formatting codes (saving # just their content), but ahwell. } else { # ludicrously long, so nevermind DEBUG and print "Linearized ", scalar(@to_unget), " tokens, but it was too long, so nevermind.\n"; } print $fh "\n>"; $self->unget_token(@to_unget); } elsif ($tagname eq 'Data') { my $next = $self->get_token; next unless defined $next; unless( $next->type eq 'text' ) { $self->unget_token($next); next; } DEBUG and print " raw text ", $next->text, "\n"; printf $fh "\n" . $next->text . "\n"; next; } else { if( $tagname =~ m/^over-/s ) { push @stack, ''; } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) { print $fh $stack[-1]; $stack[-1] = ''; } print $fh $tagmap->{$tagname} || next; ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted" or $tagname eq 'X'; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } elsif( $type eq 'end' ) { if( ($tagname = $token->tagname) =~ m/^over-/s ) { if( my $end = pop @stack ) { print $fh $end; } } elsif( $tagname =~ m/^item-/s and @stack) { $stack[-1] = $tagmap->{"/$tagname"}; if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) { $self->unget_token($next); if( $next->type eq 'start' and $next->tagname !~ m/^item-/s ) { print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"}; $stack[-1] = $tagmap->{"/item-body"}; } } next; } print $fh $tagmap->{"/$tagname"} || next; --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X'; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } elsif( $type eq 'text' ) { esc($type = $token->text); # reuse $type, why not $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap; print $fh $type; } } return 1;}############################################################################sub do_link { my($self, $token) = @_; my $type = $token->attr('type'); if(!defined $type) { $self->whine("Typeless L!?", $token->attr('start_line')); } elsif( $type eq 'pod') { return $self->do_pod_link($token); } elsif( $type eq 'url') { return $self->do_url_link($token); } elsif( $type eq 'man') { return $self->do_man_link($token); } else { $self->whine("L of unknown type $type!?", $token->attr('start_line')); } return 'FNORG'; # should never get called}# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -sub do_url_link { return $_[1]->attr('to') }sub do_man_link { return undef } # But subclasses are welcome to override this if they have man # pages somewhere URL-accessible.sub do_pod_link { # And now things get really messy... my($self, $link) = @_; my $to = $link->attr('to'); my $section = $link->attr('section'); return undef unless( # should never happen (defined $to and length $to) or (defined $section and length $section) ); $section = $self->section_escape($section) if defined $section and length($section .= ''); # (stringify) DEBUG and printf "Resolving \"%s\" \"%s\"...\n", $to || "(nil)", $section || "(nil)"; { # An early hack: my $complete_url = $self->resolve_pod_link_by_table($to, $section); if( $complete_url ) { DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ", $complete_url, "\n (Returning that.)\n"; return $complete_url; } else { DEBUG > 4 and print " resolve_pod_link_by_table(T,S)", " didn't return anything interesting.\n"; } } if(defined $to and length $to) { # Give this routine first hack again my $there = $self->resolve_pod_link_by_table($to); if(defined $there and length $there) { DEBUG > 1 and print "resolve_pod_link_by_table(T) gives $there\n"; } else { $there = $self->resolve_pod_page_link($to, $section); # (I pass it the section value, but I don't see a # particular reason it'd use it.) DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n"; unless( defined $there and length $there ) { DEBUG and print "Can't resolve $to\n"; return undef; } # resolve_pod_page_link returning undef is how it # can signal that it gives up on making a link } $to = $there; } #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n"; my $out = (defined $to and length $to) ? $to : ''; $out .= "#" . $section if defined $section and length $section; unless(length $out) { # sanity check DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n", $to || "(nil)", $section || "(nil)"; return undef; } DEBUG and print "Resolved to $out\n"; return $out; }# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .sub section_escape { my($self, $section) = @_; return $self->section_url_escape( $self->section_name_tidy($section) );}sub section_name_tidy { my($self, $section) = @_; $section =~ tr/ /_/; $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters $section = $self->unicode_escape_url($section); $section = '_' unless length $section; return $section;}sub section_url_escape { shift->general_url_escape(@_) }sub pagepath_url_escape { shift->general_url_escape(@_) }sub general_url_escape { my($self, $string) = @_; $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg; # express Unicode things as urlencode(utf(orig)). # A pretty conservative escaping, behoovey even for query components # of a URL (see RFC 2396) $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; # Yes, stipulate the list without a range, so that this can work right on # all charsets that this module happens to run under. # Altho, hmm, what about that ord? Presumably that won't work right # under non-ASCII charsets. Something should be done # about that, I guess? return $string;}#--------------------------------------------------------------------------## Oh look, a yawning portal to Hell! Let's play touch football right by it!#sub resolve_pod_page_link { # resolve_pod_page_link must return a properly escaped URL my $self = shift; return $self->batch_mode() ? $self->resolve_pod_page_link_batch_mode(@_) : $self->resolve_pod_page_link_singleton_mode(@_) ;}sub resolve_pod_page_link_singleton_mode { my($self, $it) = @_; return undef unless defined $it and length $it; my $url = $self->pagepath_url_escape($it); $url =~ s{::$}{}s; # probably never comes up anyway $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM? return undef unless length $url; return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;}sub resolve_pod_page_link_batch_mode { my($self, $to) = @_; DEBUG > 1 and print " During batch mode, resolving $to ...\n"; my @path = grep length($_), split m/::/s, $to, -1; unless( @path ) { # sanity DEBUG and print "Very odd! Splitting $to gives (nil)!\n"; return undef; } $self->batch_mode_rectify_path(\@path); my $out = join('/', map $self->pagepath_url_escape($_), @path) . $HTML_EXTENSION; DEBUG > 1 and print " => $out\n"; return $out;}sub batch_mode_rectify_path { my($self, $pathbits) = @_; my $level = $self->batch_mode_current_level; $level--; # how many levels up to go to get to the root if($level < 1) { unshift @$pathbits, '.'; # just to be pretty } else { unshift @$pathbits, ('..') x $level; } return;}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~sub resolve_pod_link_by_table { # A crazy hack to allow specifying custom L<foo> => URL mappings return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut my($self, $to, $section) = @_; # TODO: add a method that actually populates podhtml_LOT from a file? if(defined $section) { $to = '' unless defined $to and length $to; return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef! } else { return $self->{'podhtml_LOT'}{$to}; # quite possibly undef! } return;}###########################################################################sub linearize_tokens { # self, tokens my $self = shift; my $out = ''; my $t; while($t = shift @_) { if(!ref $t or !UNIVERSAL::can($t, 'is_text')) { $out .= $t; # a string, or some insane thing } elsif($t->is_text) { $out .= $t->text; } elsif($t->is_start and $t->tag eq 'X') { # Ignore until the end of this X<...> sequence: my $x_open = 1; while($x_open) { next if( ($t = shift @_)->is_text ); if( $t->is_start and $t->tag eq 'X') { ++$x_open } elsif($t->is_end and $t->tag eq 'X') { --$x_open } } } } return undef if length $out > $Linearization_Limit; return $out;}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~sub unicode_escape_url { my($self, $string) = @_; $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg; # Turn char 1234 into "(1234)" return $string;}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~sub esc { # a function. if(defined wantarray) { if(wantarray) { @_ = splice @_; # break aliasing } else { my $x = shift; $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; return $x; } } foreach my $x (@_) { # Escape things very cautiously: $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg if defined $x; # Leave out "- so that "--" won't make it thru in X-generated comments # with text in them. # Yes, stipulate the list without a range, so that this can work right on # all charsets that this module happens to run under. # Altho, hmm, what about that ord? Presumably that won't work right # under non-ASCII charsets. Something should be done about that. } return @_;}#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~1;__END__=head1 NAMEPod::Simple::HTML - convert Pod to HTML=head1 SYNOPSIS perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod=head1 DESCRIPTIONThis class is for making an HTML rendering of a Pod document.This is a subclass of L<Pod::Simple::PullParser> and inherits all itsmethods (and options).Note that if you want to do a batch conversion of a lot of Poddocuments to HTML, you should see the module L<Pod::Simple::HTMLBatch>.=head1 CALLING FROM THE COMMAND LINETODO perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html=head1 CALLING FROM PERLTODO make a new object, set any options, and use parse_from_file=head1 METHODSTODOall (most?) accessorized methods=head1 SUBCLASSINGTODO can just set any of: html_css html_javascript title_prefix 'html_header_before_title', 'html_header_after_title', 'html_footer',maybe override do_pod_linkmaybe override do_beginning do_end=head1 SEE ALSOL<Pod::Simple>, L<Pod::Simple::HTMLBatch>TODO: a corpus of sample Pod input and HTML output? Or commonidioms?=head1 COPYRIGHT AND DISCLAIMERSCopyright (c) 2002-2004 Sean M. Burke. All rights reserved.This library is free software; you can redistribute it and/or modify itunder the same terms as Perl itself.This program is distributed in the hope that it will be useful, butwithout any warranty; without even the implied warranty ofmerchantability or fitness for a particular purpose.=head1 AUTHORSean M. Burke C<sburke@cpan.org>=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -