📄 rdsrc.pl
字号:
#!/usr/bin/perl# Read the source-form of the NASM manual and generate the various# output forms.# TODO:## Ellipsis support would be nice.# Source-form features:# ---------------------# # Bullet \b# Bullets the paragraph. Rest of paragraph is indented to cope. In# HTML, consecutive groups of bulleted paragraphs become unordered# lists.# # Emphasis \e{foobar}# produces `_foobar_' in text and italics in HTML, PS, RTF# # Inline code \c{foobar}# produces ``foobar'' in text, and fixed-pitch font in HTML, PS, RTF# # Display code# \c line one# \c line two# produces fixed-pitch font where appropriate, and doesn't break# pages except sufficiently far into the middle of a display.# # Chapter, header and subheader# \C{intro} Introduction# \H{whatsnasm} What is NASM?# \S{free} NASM Is Free# dealt with as appropriate. Chapters begin on new sides, possibly# even new _pages_. (Sub)?headers are good places to begin new# pages. Just _after_ a (sub)?header isn't.# The keywords can be substituted with \K and \k.## Keyword \K{cintro} \k{cintro}# Expands to `Chapter 1', `Section 1.1', `Section 1.1.1'. \K has an# initial capital whereas \k doesn't. In HTML, will produce# hyperlinks.# # Web link \W{http://foobar/}{text} or \W{mailto:me@here}\c{me@here}# the \W prefix is ignored except in HTML; in HTML the last part# becomes a hyperlink to the first part.# # Literals \{ \} \\# In case it's necessary, they expand to the real versions.# # Nonbreaking hyphen \-# Need more be said?# # Source comment \## Causes everything after it on the line to be ignored by the# source-form processor.## Indexable word \i{foobar} (or \i\e{foobar} or \i\c{foobar}, equally)# makes word appear in index, referenced to that point# \i\c comes up in code style even in the index; \i\e doesn't come# up in emphasised style.## Indexable non-displayed word \I{foobar} or \I\c{foobar}# just as \i{foobar} except that nothing is displayed for it## Index rewrite# \IR{foobar} \c{foobar} operator, uses of# tidies up the appearance in the index of something the \i or \I# operator was applied to## Index alias# \IA{foobar}{bazquux}# aliases one index tag (as might be supplied to \i or \I) to# another, so that \I{foobar} has the effect of \I{bazquux}, and# \i{foobar} has the effect of \I{bazquux}foobar## Metadata# \M{key}{something}# defines document metadata, such as authorship, title and copyright;# different output formats use this differently.#$diag = 1, shift @ARGV if $ARGV[0] eq "-d";$| = 1;$tstruct_previtem = $node = "Top";$nodes = ($node);$tstruct_level{$tstruct_previtem} = 0;$tstruct_last[$tstruct_level{$tstruct_previtem}] = $tstruct_previtem;$MAXLEVEL = 10; # really 3, but play safe ;-)# Read the file; pass a paragraph at a time to the paragraph processor.print "Reading input...";$pname = "para000000";@pnames = @pflags = ();$para = undef;while (<>) { chomp; if (!/\S/ || /^\\(IA|IR|M)/) { # special case: \IA \IR \M imply new-paragraph &got_para($para); $para = undef; } if (/\S/) { s/\\#.*$//; # strip comments $para .= " " . $_; }}&got_para($para);print "done.\n";# Now we've read in the entire document and we know what all the# heading keywords refer to. Go through and fix up the \k references.print "Fixing up cross-references...";&fixup_xrefs;print "done.\n";# Sort the index tags, according to the slightly odd order I've decided on.print "Sorting index tags...";&indexsort;print "done.\n";if ($diag) { print "Writing index-diagnostic file..."; &indexdiag; print "done.\n";}# OK. Write out the various output files.print "Producing text output: ";&write_txt;print "done.\n";print "Producing HTML output: ";&write_html;print "done.\n";print "Producing Texinfo output: ";&write_texi;print "done.\n";print "Producing WinHelp output: ";&write_hlp;print "done.\n";print "Producing Documentation Intermediate Paragraphs: ";&write_dip;print "done.\n";sub got_para { local ($_) = @_; my $pflags = "", $i, $w, $l, $t; return if !/\S/; @$pname = (); # Strip off _leading_ spaces, then determine type of paragraph. s/^\s*//; $irewrite = undef; if (/^\\c[^{]/) { # A code paragraph. The paragraph-array will contain the simple # strings which form each line of the paragraph. $pflags = "code"; while (/^\\c (([^\\]|\\[^c])*)(.*)$/) { $l = $1; $_ = $3; $l =~ s/\\{/{/g; $l =~ s/\\}/}/g; $l =~ s/\\\\/\\/g; push @$pname, $l; } $_ = ''; # suppress word-by-word code } elsif (/^\\C/) { # A chapter heading. Define the keyword and allocate a chapter # number. $cnum++; $hnum = 0; $snum = 0; $xref = "chapter-$cnum"; $pflags = "chap $cnum :$xref"; die "badly formatted chapter heading: $_\n" if !/^\\C{([^}]*)}\s*(.*)$/; $refs{$1} = "chapter $cnum"; $node = "Chapter $cnum"; &add_item($node, 1); $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node; $xrefs{$1} = $xref; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\A/) { # An appendix heading. Define the keyword and allocate an appendix # letter. $cnum++; $cnum = 'A' if $cnum =~ /[0-9]+/; $hnum = 0; $snum = 0; $xref = "appendix-$cnum"; $pflags = "appn $cnum :$xref"; die "badly formatted appendix heading: $_\n" if !/^\\A{([^}]*)}\s*(.*)$/; $refs{$1} = "appendix $cnum"; $node = "Appendix $cnum"; &add_item($node, 1); $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node; $xrefs{$1} = $xref; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\H/) { # A major heading. Define the keyword and allocate a section number. $hnum++; $snum = 0; $xref = "section-$cnum.$hnum"; $pflags = "head $cnum.$hnum :$xref"; die "badly formatted heading: $_\n" if !/^\\[HP]{([^}]*)}\s*(.*)$/; $refs{$1} = "section $cnum.$hnum"; $node = "Section $cnum.$hnum"; &add_item($node, 2); $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node; $xrefs{$1} = $xref; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\S/) { # A sub-heading. Define the keyword and allocate a section number. $snum++; $xref = "section-$cnum.$hnum.$snum"; $pflags = "subh $cnum.$hnum.$snum :$xref"; die "badly formatted subheading: $_\n" if !/^\\S{([^}]*)}\s*(.*)$/; $refs{$1} = "section $cnum.$hnum.$snum"; $node = "Section $cnum.$hnum.$snum"; &add_item($node, 3); $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node; $xrefs{$1} = $xref; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\IR/) { # An index-rewrite. die "badly formatted index rewrite: $_\n" if !/^\\IR{([^}]*)}\s*(.*)$/; $irewrite = $1; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\IA/) { # An index-alias. die "badly formatted index alias: $_\n" if !/^\\IA{([^}]*)}{([^}]*)}\s*$/; $idxalias{$1} = $2; return; # avoid word-by-word code } elsif (/^\\M/) { # Metadata die "badly formed metadata: $_\n" if !/^\\M{([^}]*)}{([^}]*)}\s*$/; $metadata{$1} = $2; return; # avoid word-by-word code } elsif (/^\\b/) { # A bulleted paragraph. Strip off the initial \b and let the # word-by-word code take care of the rest. $pflags = "bull"; s/^\\b\s*//; } else { # A normal paragraph. Just set $pflags: the word-by-word code does # the rest. $pflags = "norm"; } # The word-by-word code: unless @$pname is already defined (which it # will be in the case of a code paragraph), split the paragraph up # into words and push each on @$pname. # # Each thing pushed on @$pname should have a two-character type # code followed by the text. # # Type codes are: # "n " for normal # "da" for a dash # "es" for first emphasised word in emphasised bit # "e " for emphasised in mid-emphasised-bit # "ee" for last emphasised word in emphasised bit # "eo" for single (only) emphasised word # "c " for code # "k " for cross-ref # "kK" for capitalised cross-ref # "w " for Web link # "wc" for code-type Web link # "x " for beginning of resolved cross-ref; generates no visible output, # and the text is the cross-reference code # "xe" for end of resolved cross-ref; text is same as for "x ". # "i " for point to be indexed: the text is the internal index into the # index-items arrays # "sp" for space while (/\S/) { s/^\s*//, push @$pname, "sp" if /^\s/; $indexing = $qindex = 0; if (/^(\\[iI])?\\c/) { $qindex = 1 if $1 eq "\\I"; $indexing = 1, s/^\\[iI]// if $1; s/^\\c//; die "badly formatted \\c: \\c$_\n" if !/{(([^\\}]|\\.)*)}(.*)$/; $w = $1; $_ = $3; $w =~ s/\\{/{/g; $w =~ s/\\}/}/g; $w =~ s/\\-/-/g; $w =~ s/\\\\/\\/g; (push @$pname,"i"),$lastp = $#$pname if $indexing; push @$pname,"c $w" if !$qindex; $$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing; } elsif (/^\\[iIe]/) { /^(\\[iI])?(\\e)?/; $emph = 0; $qindex = 1 if $1 eq "\\I"; $indexing = 1, $type = "\\i" if $1; $emph = 1, $type = "\\e" if $2; s/^(\\[iI])?(\\e?)//; die "badly formatted $type: $type$_\n" if !/{(([^\\}]|\\.)*)}(.*)$/; $w = $1; $_ = $3; $w =~ s/\\{/{/g; $w =~ s/\\}/}/g; $w =~ s/\\-/-/g; $w =~ s/\\\\/\\/g; $t = $emph ? "es" : "n "; @ientry = (); (push @$pname,"i"),$lastp = $#$pname if $indexing; foreach $i (split /\s+/,$w) { # \e and \i can be multiple words push @$pname,"$t$i","sp" if !$qindex; ($ii=$i) =~ tr/A-Z/a-z/, push @ientry,"n $ii","sp" if $indexing; $t = $emph ? "e " : "n "; } $w =~ tr/A-Z/a-z/, pop @ientry if $indexing; $$pname[$lastp] = &addidx($node, $w, @ientry) if $indexing; pop @$pname if !$qindex; # remove final space if (substr($$pname[$#$pname],0,2) eq "es" && !$qindex) { substr($$pname[$#$pname],0,2) = "eo"; } elsif ($emph && !$qindex) { substr($$pname[$#$pname],0,2) = "ee"; } } elsif (/^\\[kK]/) { $t = "k "; $t = "kK" if /^\\K/; s/^\\[kK]//; die "badly formatted \\k: \\c$_\n" if !/{([^}]*)}(.*)$/; $_ = $2; push @$pname,"$t$1"; } elsif (/^\\W/) { s/^\\W//; die "badly formatted \\W: \\W$_\n" if !/{([^}]*)}(\\i)?(\\c)?{(([^\\}]|\\.)*)}(.*)$/; $l = $1; $w = $4; $_ = $6; $t = "w "; $t = "wc" if $3 eq "\\c"; $indexing = 1 if $2; $w =~ s/\\{/{/g; $w =~ s/\\}/}/g; $w =~ s/\\-/-/g; $w =~ s/\\\\/\\/g; (push @$pname,"i"),$lastp = $#$pname if $indexing; push @$pname,"$t<$l>$w"; $$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing; } else { die "what the hell? $_\n" if !/^(([^\s\\\-]|\\[\\{}\-])*-?)(.*)$/; die "painful death! $_\n" if !length $1; $w = $1; $_ = $3; $w =~ s/\\{/{/g; $w =~ s/\\}/}/g; $w =~ s/\\-/-/g; $w =~ s/\\\\/\\/g; if ($w eq "-") { push @$pname,"da"; } else { push @$pname,"n $w"; } } } if ($irewrite ne undef) { &addidx(undef, $irewrite, @$pname); @$pname = (); } else { push @pnames, $pname; push @pflags, $pflags; $pname++; }}sub addidx { my ($node, $text, @ientry) = @_; $text = $idxalias{$text} || $text; if ($node eq undef || !$idxmap{$text}) { @$ientry = @ientry; $idxmap{$text} = $ientry; $ientry++; } if ($node) { $idxnodes{$node,$text} = 1; return "i $text"; }}sub indexsort { my $iitem, $ientry, $i, $piitem, $pcval, $cval, $clrcval; @itags = map { # get back the original data as the 1st elt of each list $_->[0] } sort { # compare auxiliary (non-first) elements of lists $a->[1] cmp $b->[1] || $a->[2] cmp $b->[2] || $a->[0] cmp $b->[0] } map { # transform array into list of 3-element lists my $ientry = $idxmap{$_}; my $a = substr($$ientry[0],2); $a =~ tr/A-Za-z//cd; [$_, uc($a), substr($$ientry[0],0,2)] } keys %idxmap; # Having done that, check for comma-hood. $cval = 0; foreach $iitem (@itags) { $ientry = $idxmap{$iitem}; $clrcval = 1; $pcval = $cval; FL:for ($i=0; $i <= $#$ientry; $i++) { if ($$ientry[$i] =~ /^(n .*,)(.*)/) { $$ientry[$i] = $1; splice @$ientry,$i+1,0,"n $2" if length $2; $commapos{$iitem} = $i+1; $cval = join("\002", @$ientry[0..$i]); $clrcval = 0; last FL; } } $cval = undef if $clrcval; $commanext{$iitem} = $commaafter{$piitem} = 1 if $cval and ($cval eq $pcval); $piitem = $iitem; }}sub indexdiag { my $iitem,$ientry,$w,$ww,$foo,$node; open INDEXDIAG,">index.diag"; foreach $iitem (@itags) { $ientry = $idxmap{$iitem}; print INDEXDIAG "<$iitem> "; foreach $w (@$ientry) { $ww = &word_txt($w); print INDEXDIAG $ww unless $ww eq "\001"; } print INDEXDIAG ":"; $foo = " "; foreach $node (@nodes) { (print INDEXDIAG $foo,$node), $foo = ", " if $idxnodes{$node,$iitem}; } print INDEXDIAG "\n"; } close INDEXDIAG;}sub fixup_xrefs { my $pname, $p, $i, $j, $k, $caps, @repl; for ($p=0; $p<=$#pnames; $p++) { next if $pflags[$p] eq "code"; $pname = $pnames[$p]; for ($i=$#$pname; $i >= 0; $i--) { if ($$pname[$i] =~ /^k/) { $k = $$pname[$i]; $caps = ($k =~ /^kK/); $k = substr($k,2); $repl = $refs{$k}; die "undefined keyword `$k'\n" unless $repl; substr($repl,0,1) =~ tr/a-z/A-Z/ if $caps; @repl = (); push @repl,"x $xrefs{$k}"; foreach $j (split /\s+/,$repl) { push @repl,"n $j"; push @repl,"sp"; } pop @repl; # remove final space push @repl,"xe$xrefs{$k}"; splice @$pname,$i,1,@repl; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -