html.pm

来自「ARM上的如果你对底层感兴趣」· PM 代码 · 共 1,572 行 · 第 1/3 页

PM
1,572
字号
package Pod::Html;

use Pod::Functions;
use Getopt::Long;	# package for handling command-line parameters
require Exporter;
use vars qw($VERSION);
$VERSION = 1.01;
@ISA = Exporter;
@EXPORT = qw(pod2html htmlify);
use Cwd;

use Carp;

use strict;

use Config;

=head1 NAME

Pod::Html - module to convert pod files to HTML

=head1 SYNOPSIS

    use Pod::Html;
    pod2html([options]);

=head1 DESCRIPTION

Converts files from pod format (see L<perlpod>) to HTML format.  It
can automatically generate indexes and cross-references, and it keeps
a cache of things it knows how to cross-reference.

=head1 ARGUMENTS

Pod::Html takes the following arguments:

=over 4

=item help

    --help

Displays the usage message.

=item htmlroot

    --htmlroot=name

Sets the base URL for the HTML files.  When cross-references are made,
the HTML root is prepended to the URL.

=item infile

    --infile=name

Specify the pod file to convert.  Input is taken from STDIN if no
infile is specified.

=item outfile

    --outfile=name

Specify the HTML file to create.  Output goes to STDOUT if no outfile
is specified.

=item podroot

    --podroot=name

Specify the base directory for finding library pods.

=item podpath

    --podpath=name:...:name

Specify which subdirectories of the podroot contain pod files whose
HTML converted forms can be linked-to in cross-references.

=item libpods

    --libpods=name:...:name

List of page names (eg, "perlfunc") which contain linkable C<=item>s.

=item netscape

    --netscape

Use Netscape HTML directives when applicable.

=item nonetscape

    --nonetscape

Do not use Netscape HTML directives (default).

=item index

    --index

Generate an index at the top of the HTML file (default behaviour).

=item noindex

    --noindex

Do not generate an index at the top of the HTML file.


=item recurse

    --recurse

Recurse into subdirectories specified in podpath (default behaviour).

=item norecurse

    --norecurse

Do not recurse into subdirectories specified in podpath.

=item title

    --title=title

Specify the title of the resulting HTML file.

=item verbose

    --verbose

Display progress messages.

=back

=head1 EXAMPLE

    pod2html("pod2html",
	     "--podpath=lib:ext:pod:vms", 
	     "--podroot=/usr/src/perl",
	     "--htmlroot=/perl/nmanual",
	     "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
	     "--recurse",
	     "--infile=foo.pod",
	     "--outfile=/perl/nmanual/foo.html");

=head1 AUTHOR

Tom Christiansen, E<lt>tchrist@perl.comE<gt>.

=head1 BUGS

Has trouble with C<> etc in = commands.

=head1 SEE ALSO

L<perlpod>

=head1 COPYRIGHT

This program is distributed under the Artistic License.

=cut

my $dircache = "pod2html-dircache";
my $itemcache = "pod2html-itemcache";

my @begin_stack = ();		# begin/end stack

my @libpods = ();	    	# files to search for links from C<> directives
my $htmlroot = "/";	    	# http-server base directory from which all
				#   relative paths in $podpath stem.
my $htmlfile = "";		# write to stdout by default
my $podfile = "";		# read from stdin by default
my @podpath = ();		# list of directories containing library pods.
my $podroot = ".";		# filesystem base directory from which all
				#   relative paths in $podpath stem.
my $recurse = 1;		# recurse on subdirectories in $podpath.
my $verbose = 0;		# not verbose by default
my $doindex = 1;   	    	# non-zero if we should generate an index
my $listlevel = 0;		# current list depth
my @listitem = ();		# stack of HTML commands to use when a =item is
				#   encountered.  the top of the stack is the
				#   current list.
my @listdata = ();		# similar to @listitem, but for the text after
				#   an =item
my @listend = ();		# similar to @listitem, but the text to use to
				#   end the list.
my $ignore = 1;			# whether or not to format text.  we don't
				#   format text until we hit our first pod
				#   directive.

my %items_named = ();		# for the multiples of the same item in perlfunc
my @items_seen = ();
my $netscape = 0;		# whether or not to use netscape directives.
my $title;			# title to give the pod(s)
my $top = 1;			# true if we are at the top of the doc.  used
				#   to prevent the first <HR> directive.
my $paragraph;			# which paragraph we're processing (used
				#   for error messages)
my %pages = ();			# associative array used to find the location
				#   of pages referenced by L<> links.
my %sections = ();		# sections within this page
my %items = ();			# associative array used to find the location
				#   of =item directives referenced by C<> links
my $Is83;                       # is dos with short filenames (8.3)

sub init_globals {
$dircache = "pod2html-dircache";
$itemcache = "pod2html-itemcache";

@begin_stack = ();		# begin/end stack

@libpods = ();	    	# files to search for links from C<> directives
$htmlroot = "/";	    	# http-server base directory from which all
				#   relative paths in $podpath stem.
$htmlfile = "";		# write to stdout by default
$podfile = "";		# read from stdin by default
@podpath = ();		# list of directories containing library pods.
$podroot = ".";		# filesystem base directory from which all
				#   relative paths in $podpath stem.
$recurse = 1;		# recurse on subdirectories in $podpath.
$verbose = 0;		# not verbose by default
$doindex = 1;   	    	# non-zero if we should generate an index
$listlevel = 0;		# current list depth
@listitem = ();		# stack of HTML commands to use when a =item is
				#   encountered.  the top of the stack is the
				#   current list.
@listdata = ();		# similar to @listitem, but for the text after
				#   an =item
@listend = ();		# similar to @listitem, but the text to use to
				#   end the list.
$ignore = 1;			# whether or not to format text.  we don't
				#   format text until we hit our first pod
				#   directive.

@items_seen = ();
%items_named = ();
$netscape = 0;		# whether or not to use netscape directives.
$title = '';			# title to give the pod(s)
$top = 1;			# true if we are at the top of the doc.  used
				#   to prevent the first <HR> directive.
$paragraph = '';			# which paragraph we're processing (used
				#   for error messages)
%sections = ();		# sections within this page

# These are not reinitialised here but are kept as a cache.
# See get_cache and related cache management code.
#%pages = ();			# associative array used to find the location
				#   of pages referenced by L<> links.
#%items = ();			# associative array used to find the location
				#   of =item directives referenced by C<> links
$Is83=$^O eq 'dos';
}

sub pod2html {
    local(@ARGV) = @_;
    local($/);
    local $_;

    init_globals();

    $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());

    # cache of %pages and %items from last time we ran pod2html

    #undef $opt_help if defined $opt_help;

    # parse the command-line parameters
    parse_command_line();

    # set some variables to their default values if necessary
    local *POD;
    unless (@ARGV && $ARGV[0]) { 
	$podfile  = "-" unless $podfile;	# stdin
	open(POD, "<$podfile")
		|| die "$0: cannot open $podfile file for input: $!\n";
    } else {
	$podfile = $ARGV[0];  # XXX: might be more filenames
	*POD = *ARGV;
    } 
    $htmlfile = "-" unless $htmlfile;	# stdout
    $htmlroot = "" if $htmlroot eq "/";	# so we don't get a //

    # read the pod a paragraph at a time
    warn "Scanning for sections in input file(s)\n" if $verbose;
    $/ = "";
    my @poddata  = <POD>;
    close(POD);

    # scan the pod for =head[1-6] directives and build an index
    my $index = scan_headings(\%sections, @poddata);

    unless($index) {
	warn "No pod in $podfile\n" if $verbose;
	return;
    }

    # open the output file
    open(HTML, ">$htmlfile")
	    || die "$0: cannot open $htmlfile file for output: $!\n";

    # put a title in the HTML file
    $title = '';
    TITLE_SEARCH: {
	for (my $i = 0; $i < @poddata; $i++) { 
	    if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
		for my $para ( @poddata[$i, $i+1] ) { 
		    last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
		}
	    } 

	} 
    } 
    if (!$title and $podfile =~ /\.pod$/) {
	# probably a split pod so take first =head[12] as title
	for (my $i = 0; $i < @poddata; $i++) { 
	    last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
	} 
	warn "adopted '$title' as title for $podfile\n"
	    if $verbose and $title;
    } 
    if ($title) {
	$title =~ s/\s*\(.*\)//;
    } else {
	warn "$0: no title for $podfile";
	$podfile =~ /^(.*)(\.[^.\/]+)?$/;
	$title = ($podfile eq "-" ? 'No Title' : $1);
	warn "using $title" if $verbose;
    }
    print HTML <<END_OF_HEAD;
<HTML>
<HEAD>
<TITLE>$title</TITLE>
<LINK REV="made" HREF="mailto:$Config{perladmin}">
</HEAD>

<BODY>

END_OF_HEAD

    # load/reload/validate/cache %pages and %items
    get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);

    # scan the pod for =item directives
    scan_items("", \%items, @poddata);

    # put an index at the top of the file.  note, if $doindex is 0 we
    # still generate an index, but surround it with an html comment.
    # that way some other program can extract it if desired.
    $index =~ s/--+/-/g;
    print HTML "<!-- INDEX BEGIN -->\n";
    print HTML "<!--\n" unless $doindex;
    print HTML $index;
    print HTML "-->\n" unless $doindex;
    print HTML "<!-- INDEX END -->\n\n";
    print HTML "<HR>\n" if $doindex;

    # now convert this file
    warn "Converting input file\n" if $verbose;
    foreach my $i (0..$#poddata) {
	$_ = $poddata[$i];
	$paragraph = $i+1;
	if (/^(=.*)/s) {	# is it a pod directive?
	    $ignore = 0;
	    $_ = $1;
	    if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
		process_begin($1, $2);
	    } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
		process_end($1, $2);
	    } elsif (/^=cut/) {			# =cut
		process_cut();
	    } elsif (/^=pod/) {			# =pod
		process_pod();
	    } else {
		next if @begin_stack && $begin_stack[-1] ne 'html';

		if (/^=(head[1-6])\s+(.*\S)/s) {	# =head[1-6] heading
		    process_head($1, $2);
		} elsif (/^=item\s*(.*\S)/sm) {	# =item text
		    process_item($1);
		} elsif (/^=over\s*(.*)/) {		# =over N
		    process_over();
		} elsif (/^=back/) {		# =back
		    process_back();
		} elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
		    process_for($1,$2);
		} else {
		    /^=(\S*)\s*/;
		    warn "$0: $podfile: unknown pod directive '$1' in "
		       . "paragraph $paragraph.  ignoring.\n";
		}
	    }
	    $top = 0;
	}
	else {
	    next if $ignore;
	    next if @begin_stack && $begin_stack[-1] ne 'html';
	    my $text = $_;
	    process_text(\$text, 1);
	    print HTML "<P>\n$text";
	}
    }

    # finish off any pending directives
    finish_list();
    print HTML <<END_OF_TAIL;
</BODY>

</HTML>
END_OF_TAIL

    # close the html file
    close(HTML);

    warn "Finished\n" if $verbose;
}

##############################################################################

my $usage;			# see below
sub usage {
    my $podfile = shift;
    warn "$0: $podfile: @_\n" if @_;
    die $usage;
}

$usage =<<END_OF_USAGE;
Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
           --podpath=<name>:...:<name> --podroot=<name>
           --libpods=<name>:...:<name> --recurse --verbose --index
           --netscape --norecurse --noindex

  --flush      - flushes the item and directory caches.
  --help       - prints this message.
  --htmlroot   - http-server base directory from which all relative paths
                 in podpath stem (default is /).
  --index      - generate an index at the top of the resulting html
                 (default).
  --infile     - filename for the pod to convert (input taken from stdin
                 by default).
  --libpods    - colon-separated list of pages to search for =item pod
                 directives in as targets of C<> and implicit links (empty
                 by default).  note, these are not filenames, but rather
                 page names like those that appear in L<> links.
  --netscape   - will use netscape html directives when applicable.
  --nonetscape - will not use netscape directives (default).
  --outfile    - filename for the resulting html file (output sent to
                 stdout by default).
  --podpath    - colon-separated list of directories containing library
                 pods.  empty by default.
  --podroot    - filesystem base directory from which all relative paths
                 in podpath stem (default is .).
  --noindex    - don't generate an index at the top of the resulting html.
  --norecurse  - don't recurse on those subdirectories listed in podpath.
  --recurse    - recurse on those subdirectories listed in podpath
                 (default behavior).
  --title      - title that will appear in resulting html file.
  --verbose    - self-explanatory

END_OF_USAGE

sub parse_command_line {
    my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
    my $result = GetOptions(
			    'flush'      => \$opt_flush,
			    'help'       => \$opt_help,
			    'htmlroot=s' => \$opt_htmlroot,
			    'index!'     => \$opt_index,
			    'infile=s'   => \$opt_infile,
			    'libpods=s'  => \$opt_libpods,
			    'netscape!'  => \$opt_netscape,
			    'outfile=s'  => \$opt_outfile,
			    'podpath=s'  => \$opt_podpath,
			    'podroot=s'  => \$opt_podroot,
			    'norecurse'  => \$opt_norecurse,
			    'recurse!'   => \$opt_recurse,
			    'title=s'    => \$opt_title,
			    'verbose'    => \$opt_verbose,
			   );
    usage("-", "invalid parameters") if not $result;

    usage("-") if defined $opt_help;	# see if the user asked for help
    $opt_help = "";			# just to make -w shut-up.

    $podfile  = $opt_infile if defined $opt_infile;
    $htmlfile = $opt_outfile if defined $opt_outfile;

    @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
    @libpods  = split(":", $opt_libpods) if defined $opt_libpods;

    warn "Flushing item and directory caches\n"
	if $opt_verbose && defined $opt_flush;
    unlink($dircache, $itemcache) if defined $opt_flush;

    $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
    $podroot  = $opt_podroot if defined $opt_podroot;

    $doindex  = $opt_index if defined $opt_index;
    $recurse  = $opt_recurse if defined $opt_recurse;
    $title    = $opt_title if defined $opt_title;
    $verbose  = defined $opt_verbose ? 1 : 0;
    $netscape = $opt_netscape if defined $opt_netscape;
}


my $saved_cache_key;

sub get_cache {
    my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
    my @cache_key_args = @_;

    # A first-level cache:
    # Don't bother reading the cache files if they still apply
    # and haven't changed since we last read them.

    my $this_cache_key = cache_key(@cache_key_args);

    return if $saved_cache_key and $this_cache_key eq $saved_cache_key;

    # load the cache of %pages and %items if possible.  $tests will be
    # non-zero if successful.
    my $tests = 0;
    if (-f $dircache && -f $itemcache) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?