html.pm

来自「UNIX下perl实现代码」· PM 代码 · 共 2,026 行 · 第 1/4 页
2,026 行
	warn "$0: $podfile: unterminated list at =head in paragraph $paragraph.  ignoring.\n";        while( $listlevel ){            process_back();        }    }    print HTML "<P>\n";    if( $level == 1 && ! $top ){	print HTML "<A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A>\n"	    if $hasindex and $backlink;	print HTML "<HR>\n"    }    my $name = htmlify( depod( $heading ) );    my $convert = process_text( \$heading );    print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";}## emit_item_tag - print an =item's text# Note: The global $EmittedItem is used for inhibiting self-references.#my $EmittedItem;sub emit_item_tag($$$){    my( $otext, $text, $compact ) = @_;    my $item = fragment_id( $text );    $EmittedItem = $item;    ### print STDERR "emit_item_tag=$item ($text)\n";    print HTML '<STRONG>';    if ($items_named{$item}++) {	print HTML process_text( \$otext );    } else {	my $name = 'item_' . $item;	print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';    }    print HTML "</STRONG><BR>\n";    undef( $EmittedItem );}sub emit_li {    my( $tag ) = @_;    if( $items_seen[$listlevel]++ == 0 ){	push( @listend, "</$tag>" );	print HTML "<$tag>\n";    }    print HTML $tag eq 'DL' ? '<DT>' : '<LI>';}## process_item - convert a pod item tag and convert it to HTML format.#sub process_item {    my( $otext ) = @_;    # lots of documents start a list without doing an =over.  this is    # bad!  but, the proper thing to do seems to be to just assume    # they did do an =over.  so warn them once and then continue.    if( $listlevel == 0 ){	warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n";	process_over();    }    # formatting: insert a paragraph if preceding item has >1 paragraph    if( $after_lpar ){	print HTML "<P></P>\n";	$after_lpar = 0;    }    # remove formatting instructions from the text    my $text = depod( $otext );    # all the list variants:    if( $text =~ /\A\*/ ){ # bullet        emit_li( 'UL' );	if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text	    my $tag = $1;	    $otext =~ s/\A\*\s+//;	    emit_item_tag( $otext, $tag, 1 );	}    } elsif( $text =~ /\A\d+/ ){ # numbered list	emit_li( 'OL' );	if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text	    my $tag = $1;	    $otext =~ s/\A\d+\.?\s*//;	    emit_item_tag( $otext, $tag, 1 );	}    } else {			# definition list	emit_li( 'DL' );	if ($text =~ /\A(.+)\Z/s ){ # should have text	    emit_item_tag( $otext, $text, 1 );	}       print HTML '<DD>';    }    print HTML "\n";}## process_over - process a pod over tag and start a corresponding HTML list.#sub process_over {    # start a new list    $listlevel++;    push( @items_seen, 0 );    $after_lpar = 0;}## process_back - process a pod back tag and convert it to HTML format.#sub process_back {    if( $listlevel == 0 ){	warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n";	return;    }    # close off the list.  note, I check to see if $listend[$listlevel] is    # defined because an =item directive may have never appeared and thus    # $listend[$listlevel] may have never been initialized.    $listlevel--;    if( defined $listend[$listlevel] ){	print HTML '<P></P>' if $after_lpar;	print HTML $listend[$listlevel];        print HTML "\n";        pop( @listend );    }    $after_lpar = 0;    # clean up item count    pop( @items_seen );}## process_cut - process a pod cut tag, thus start ignoring pod directives.#sub process_cut {    $ignore = 1;}## process_pod - process a pod pod tag, thus stop ignoring pod directives# until we see a corresponding cut.#sub process_pod {    # no need to set $ignore to 0 cause the main loop did it}## process_for - process a =for pod tag.  if it's for html, spit# it out verbatim, if illustration, center it, otherwise ignore it.#sub process_for {    my($whom, $text) = @_;    if ( $whom =~ /^(pod2)?html$/i) {	print HTML $text;    } elsif ($whom =~ /^illustration$/i) {        1 while chomp $text;	for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {	  $text .= $ext, last if -r "$text$ext";	}        print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};    }}## process_begin - process a =begin pod tag.  this pushes# whom we're beginning on the begin stack.  if there's a# begin stack, we only print if it us.#sub process_begin {    my($whom, $text) = @_;    $whom = lc($whom);    push (@begin_stack, $whom);    if ( $whom =~ /^(pod2)?html$/) {	print HTML $text if $text;    }}## process_end - process a =end pod tag.  pop the# begin stack.  die if we're mismatched.#sub process_end {    my($whom, $text) = @_;    $whom = lc($whom);    if ($begin_stack[-1] ne $whom ) {	die "Unmatched begin/end at chunk $paragraph\n"    }     pop( @begin_stack );}## process_pre - indented paragraph, made into <PRE></PRE>#sub process_pre {    my( $text ) = @_;    my( $rest );    return if $ignore;    $rest = $$text;    # insert spaces in place of tabs    $rest =~ s#.*#	    my $line = $&;	    1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;	    $line;	#eg;    # convert some special chars to HTML escapes    $rest =~ s/&/&amp;/g;    $rest =~ s/</&lt;/g;    $rest =~ s/>/&gt;/g;    $rest =~ s/"/&quot;/g;    # try and create links for all occurrences of perl.* within    # the preformatted text.    $rest =~ s{	         (\s*)(perl\w+)	      }{		 if ( defined $pages{$2} ){	# is a link		     qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);		 } elsif (defined $pages{dosify($2)}) {	# is a link		     qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);		 } else {		     "$1$2";		 }	      }xeg;     $rest =~ s{		 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?               }{                  my $url ;                  if ( $htmlfileurl ne '' ){		     # Here, we take advantage of the knowledge 		     # that $htmlfileurl ne '' implies $htmlroot eq ''.		     # Since $htmlroot eq '', we need to prepend $htmldir		     # on the fron of the link to get the absolute path		     # of the link's target. We check for a leading '/'		     # to avoid corrupting links that are #, file:, etc.		     my $old_url = $3 ;		     $old_url = "$htmldir$old_url" if $old_url =~ m{^\/}; 		     $url = relativize_url( "$old_url.html", $htmlfileurl );	          } else {		     $url = "$3.html" ;		  }		  "$1$url" ;	       }xeg;    # Look for embedded URLs and make them into links.  We don't    # relativize them since they are best left as the author intended.    my $urls = '(' . join ('|', qw{                http                telnet		mailto		news                gopher                file                wais                ftp            } )         . ')';      my $ltrs = '\w';    my $gunk = '/#~:.?+=&%@!\-';    my $punc = '.:?\-';    my $any  = "${ltrs}${gunk}${punc}";    $rest =~ s{        \b                          # start at word boundary        (                           # begin $1  {          $urls     :               # need resource and a colon	  (?!:)                     # Ignore File::, among others.          [$any] +?                 # followed by on or more                                    #  of any valid character, but                                    #  be conservative and take only                                    #  what you need to....        )                           # end   $1  }        (?=                         # look-ahead non-consumptive assertion                [$punc]*            # either 0 or more puntuation                [^$any]             #   followed by a non-url char            |                       # or else                $                   #   then end of the string        )      }{<A HREF="$1">$1</A>}igox;    # text should be as it is (verbatim)    $$text = $rest;}## pure text processing## pure_text/inIS_text: differ with respect to automatic C<> recognition.# we don't want this to happen within IS#sub pure_text($){    my $text = shift();    process_puretext( $text, \$ptQuote, 1 );}sub inIS_text($){    my $text = shift();    process_puretext( $text, \$ptQuote, 0 );}## process_puretext - process pure text (without pod-escapes) converting#  double-quotes and handling implicit C<> links.#sub process_puretext {    my($text, $quote, $notinIS) = @_;    ## Guessing at func() or [$@%&]*var references in plain text is destined    ## to produce some strange looking ref's. uncomment to disable:    ## $notinIS = 0;    my(@words, $lead, $trail);    # convert double-quotes to single-quotes    if( $$quote && $text =~ s/"/''/s ){        $$quote = 0;    }    while ($text =~ s/"([^"]*)"/``$1''/sg) {};    $$quote = 1 if $text =~ s/"/``/s;    # keep track of leading and trailing white-space    $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");    $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");    # split at space/non-space boundaries    @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );    # process each word individually    foreach my $word (@words) {	# skip space runs 	next if $word =~ /^\s*$/;	# see if we can infer a link	if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {	    # has parenthesis so should have been a C<> ref            ## try for a pagename (perlXXX(1))?            my( $func, $args ) = ( $1, $2 );            if( $args =~ /^\d+$/ ){                my $url = page_sect( $word, '' );                if( defined $url ){                    $word = "<A HREF=\"$url\">the $word manpage</A>";                    next;                }            }            ## try function name for a link, append tt'ed argument list            $word = emit_C( $func, '', "($args)");#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {##	    # perl variables, should be a C<> ref##	    $word = emit_C( $word );	} elsif ($word =~ m,^\w+://\w,) {	    # looks like a URL            # Don't relativize it: leave it as the author intended	    $word = qq(<A HREF="$word">$word</A>);	} elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {	    # looks like an e-mail address	    my ($w1, $w2, $w3) = ("", $word, "");	    ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;	    ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;	    $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);	} elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?	    $word = html_escape($word) if $word =~ /["&<>]/;	    $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;	} else { 	    $word = html_escape($word) if $word =~ /["&<>]/;	}    }    # put everything back together    return $lead . join( '', @words ) . $trail;}## process_text - handles plaintext that appears in the input pod file.# there may be pod commands embedded within the text so those must be# converted to html commands.#sub process_text1($$;$$);sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }sub process_text {    return if $ignore;    my( $tref ) = @_;    my $res = process_text1( 0, $tref );    $$tref = $res;}sub process_text1($$;$$){    my( $lev, $rstr, $func, $closing ) = @_;    my $res = '';    unless (defined $func) {	$func = '';	$lev++;    }    if( $func eq 'B' ){	# B<text> - boldface	$res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';    } elsif( $func eq 'C' ){	# C<code> - can be a ref or <CODE></CODE>	# need to extract text	my $par = go_ahead( $rstr, 'C', $closing );	## clean-up of the link target        my $text = depod( $par );	### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;        ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n"; 	$res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );    } elsif( $func eq 'E' ){	# E<x> - convert to character	$$rstr =~ s/^([^>]*)>//;	my $escape = $1;	$escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;	$res = "&$escape;";    } elsif( $func eq 'F' ){	# F<filename> - italizice	$res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';    } elsif( $func eq 'I' ){	# I<text> - italizice	$res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';    } elsif( $func eq 'L' ){	# L<link> - link	## L<text|cross-ref> => produce text, use cross-ref for linking 	## L<cross-ref> => make text from cross-ref	## need to extract text	my $par = go_ahead( $rstr, 'L', $closing );        # some L<>'s that shouldn't be:	# a) full-blown URL's are emitted as-is        if( $par =~ m{^\w+://}s ){	    return make_URL_href( $par );	}        # b) C<...> is stripped and treated as C<>        if( $par =~ /^C<(.*)>$/ ){	    my $text = depod( $1 ); 	    return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );	}	# analyze the contents	$par =~ s/\n/ /g;   # undo word-wrapped tags        my $opar = $par;	my $linktext;	if( $par =~ s{^([^|]+)\|}{} ){	    $linktext = $1;	}    	# make sure sections start with a /	$par =~ s{^"}{/"};	my( $page, $section, $ident );	# check for link patterns	if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident            # we've got a name/ident (no quotes)             ( $page, $ident ) = ( $1, $2 );            ### print STDERR "--> L<$par> to page $page, ident $ident\n";	} elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"            # even though this should be a "section", we go for ident first	    ( $page, $ident ) = ( $1, $2 );            ### print STDERR "--> L<$par> to page $page, section $section\n";	} elsif( $par =~ /\s/ ){  # this must be a section with missing quotes	    ( $page, $section ) = ( '', $par );            ### print STDERR "--> L<$par> to void page, section $section\n";        } else {	    ( $page, $section ) = ( $par, '' );            ### print STDERR "--> L<$par> to page $par, void section\n";	}        # now, either $section or $ident is defined. the convoluted logic        # below tries to resolve L<> according to what the user specified.        # failing this, we try to find the next best thing...        my( $url, $ltext, $fid );        RESOLVE: {            if( defined $ident ){                ## try to resolve $ident as an item	        ( $url, $fid ) = coderef( $page, $ident );                if( $url ){                    if( ! defined( $linktext ) ){                        $linktext = $ident;                        $linktext .= " in " if $ident && $page;
html.pm - 源码说明

本页面展示了「UNIX下perl实现代码」中的 html.pm 源码文件，采用 PM 编程语言编写，共 2,026 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与UNIX相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?