📄 genps.pl
字号:
#!/usr/bin/perl
#
# Format the documentation as PostScript
#
require 'psfonts.ph'; # The fonts we want to use
require 'pswidth.ph'; # PostScript string width
use Fcntl;
#
# PostScript configurables; these values are also available to the
# PostScript code itself
#
%psconf = (
pagewidth => 595, # Page width in PostScript points
pageheight => 792, # Page height in PostScript points
lmarg => 100, # Left margin in PostScript points
rmarg => 50, # Right margin in PostScript points
topmarg => 100, # Top margin in PostScript points
botmarg => 100, # Bottom margin in PostScript points
plmarg => 50, # Page number position relative to left margin
prmarg => 0, # Page number position relative to right margin
pymarg => 50, # Page number position relative to bot margin
startcopyright => 75, # How much above the bottom margin is the
# copyright notice stuff
bulladj => 12, # How much to indent a bullet paragraph
tocind => 12, # TOC indentation per level
tocpnz => 24, # Width of TOC page number only zone
tocdots => 8, # Spacing between TOC dots
idxspace => 24, # Minimum space between index title and pg#
idxindent => 24, # How much to indent a subindex entry
idxgutter => 24, # Space between index columns
idxcolumns => 2, # Number of index columns
);
%psbool = (
colorlinks => 0, # Set links in blue rather than black
);
# Known paper sizes
%papersizes = (
'a5' => [421, 595], # ISO half paper size
'b5' => [501, 709], # ISO small paper size
'a4' => [595, 842], # ISO standard paper size
'letter' => [612, 792], # US common paper size
'pa4' => [595, 792], # Compromise ("portable a4")
'b4' => [709,1002], # ISO intermediate paper size
'legal' => [612,1008], # US intermediate paper size
'a3' => [842,1190], # ISO double paper size
'11x17' => [792,1224], # US double paper size
);
#
# Parse the command line
#
undef $input;
while ( $arg = shift(@ARGV) ) {
if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
$parm = $2;
$true = ($1 eq '') ? 1 : 0;
if ( $true && defined($papersizes{$parm}) ) {
$psconf{pagewidth} = $papersizes{$parm}->[0];
$psconf{pageheight} = $papersizes{$parm}->[1];
} elsif ( defined($psbool{$parm}) ) {
$psbool{$parm} = $true;
} elsif ( $true && defined($psconf{$parm}) ) {
$psconf{$parm} = shift(@ARGV);
} elsif ( $parm =~ /^(title|subtitle|year|author|license)$/ ) {
$metadata{$parm} = shift(@ARGV);
} else {
die "$0: Unknown option: $arg\n";
}
} else {
$input = $arg;
}
}
#
# Document formatting parameters
#
$paraskip = 6; # Space between paragraphs
$chapstart = 30; # Space before a chapter heading
$chapskip = 24; # Space after a chapter heading
$tocskip = 6; # Space between TOC entries
# Configure post-paragraph skips for each kind of paragraph
%skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
'head' => $paraskip, 'subh' => $paraskip,
'norm' => $paraskip, 'bull' => $paraskip,
'code' => $paraskip, 'toc0' => $tocskip,
'toc1' => $tocskip, 'toc2' => $tocskip);
# Custom encoding vector. This is basically the same as
# ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
# but with the "naked" accents at \200-\237 moved to the \000-\037
# range (ASCII control characters), and a few extra characters thrown
# in. It is basically a modified Windows 1252 codepage, minus, for
# now, the euro sign (\200 is reserved for euro.)
@NASMEncoding =
(
undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,
undef, undef, undef, undef, undef, undef, 'dotlessi', 'grave',
'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent',
'dieresis', undef, 'ring', 'cedilla', undef, 'hungarumlaut',
'ogonek', 'caron', 'space', 'exclam', 'quotedbl', 'numbersign',
'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
'parenright', 'asterisk', 'plus', 'comma', 'minus', 'period',
'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e',
'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
'asciitilde', undef, undef, undef, 'quotesinglbase', 'florin',
'quotedblbase', 'ellipsis', 'dagger', 'dbldagger', 'circumflex',
'perthousand', 'Scaron', 'guilsinglleft', 'OE', undef, 'Zcaron',
undef, undef, 'grave', 'quotesingle', 'quotedblleft',
'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark',
'scaron', 'guilsignlright', 'oe', undef, 'zcaron', 'Ydieresis',
'space', 'exclamdown', 'cent', 'sterling', 'currency', 'yen',
'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine',
'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron',
'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu',
'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
'ordmasculine', 'guillemotright', 'onequarter', 'onehalf',
'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute',
'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex',
'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute',
'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls',
'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring',
'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde',
'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide',
'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute',
'thorn', 'ydieresis'
);
# Name-to-byte lookup hash
%charcode = ();
for ( $i = 0 ; $i < 256 ; $i++ ) {
$charcode{$NASMEncoding[$i]} = chr($i);
}
#
# First, format the stuff coming from the front end into
# a cleaner representation
#
if ( defined($input) ) {
sysopen(PARAS, $input, O_RDONLY) or
die "$0: cannot open $input: $!\n";
} else {
open(PARAS, "<&STDIN") or die "$0: $!\n";
}
while ( defined($line = <PARAS>) ) {
chomp $line;
$data = <PARAS>;
chomp $data;
if ( $line =~ /^meta :(.*)$/ ) {
$metakey = $1;
$metadata{$metakey} = $data;
} elsif ( $line =~ /^indx :(.*)$/ ) {
$ixentry = $1;
push(@ixentries, $ixentry);
$ixterms{$ixentry} = [split(/\037/, $data)];
# Look for commas. This is easier done on the string
# representation, so do it now.
if ( $data =~ /^(.*)\,\037sp\037/ ) {
$ixprefix = $1;
$ixprefix =~ s/\037n $//; # Discard possible font change at end
$ixhasprefix{$ixentry} = $ixprefix;
if ( !$ixprefixes{$ixprefix} ) {
$ixcommafirst{$ixentry}++;
}
$ixprefixes{$ixprefix}++;
} else {
# A complete term can also be used as a prefix
$ixprefixes{$data}++;
}
} else {
push(@ptypes, $line);
push(@paras, [split(/\037/, $data)]);
}
}
close(PARAS);
#
# Convert an integer to a chosen base
#
sub int2base($$) {
my($i,$b) = @_;
my($s) = '';
my($n) = '';
my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
return '0' if ($i == 0);
if ( $i < 0 ) { $n = '-'; $i = -$i; }
while ( $i ) {
$s = substr($z,$i%$b,1) . $s;
$i = int($i/$b);
}
return $n.$s;
}
#
# Convert a string to a rendering array
#
sub string2array($)
{
my($s) = @_;
my(@a) = ();
$s =~ s/ \- / $charcode{'endash'} /g; # Replace " - " with en dash
while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
push(@a, [0,$1]);
$s = $2;
}
return @a;
}
#
# Take a crossreference name and generate the PostScript name for it.
#
# This hack produces a somewhat smaller PDF...
#%ps_xref_list = ();
#$ps_xref_next = 0;
#sub ps_xref($) {
# my($s) = @_;
# my $q = $ps_xref_list{$s};
# return $q if ( defined($ps_xref_list{$s}) );
# $q = 'X'.int2base($ps_xref_next++, 52);
# $ps_xref_list{$s} = $q;
# return $q;
#}
# Somewhat bigger PDF, but one which obeys # URLs
sub ps_xref($) {
return @_[0];
}
#
# Flow lines according to a particular font set and width
#
# A "font set" is represented as an array containing
# arrays of pairs: [<size>, <metricref>]
#
# Each line is represented as:
# [ [type,first|last,aux,fontset,page,ypos,optional col],
# [rendering array] ]
#
# A space character may be "squeezed" by up to this much
# (as a fraction of the normal width of a space.)
#
$ps_space_squeeze = 0.00; # Min space width 100%
sub ps_flow_lines($$$@) {
my($wid, $fontset, $type, @data) = @_;
my($fonts) = $$fontset{fonts};
my($e);
my($w) = 0; # Width of current line
my($sw) = 0; # Width of current line due to spaces
my(@l) = (); # Current line
my(@ls) = (); # Accumulated output lines
my(@xd) = (); # Metadata that goes with subsequent text
my $hasmarker = 0; # Line has -6 marker
my $pastmarker = 0; # -6 marker found
# If there is a -6 marker anywhere in the paragraph,
# *each line* output needs to have a -6 marker
foreach $e ( @data ) {
$hasmarker = 1 if ( $$e[0] == -6 );
}
$w = 0;
foreach $e ( @data ) {
if ( $$e[0] < 0 ) {
# Type is metadata. Zero width.
if ( $$e[0] == -6 ) {
$pastmarker = 1;
}
if ( $$e[0] == -1 || $$e[0] == -6 ) {
# -1 (end anchor) or -6 (marker) goes with the preceeding
# text, otherwise with the subsequent text
push(@l, $e);
} else {
push(@xd, $e);
}
} else {
my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
\@NASMEncoding) *
($fontset->{fonts}->[$$e[0]][0]/1000);
my $sp = $$e[1];
$sp =~ tr/[^ ]//d; # Delete nonspaces
my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
\@NASMEncoding) *
($fontset->{fonts}->[$$e[0]][0]/1000);
if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
# Begin new line
# Search backwards for previous space chunk
my $lx = scalar(@l)-1;
my @rm = ();
while ( $lx >= 0 ) {
while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
# Skip metadata
$pastmarker = 0 if ( $l[$lx]->[0] == -6 );
$lx--;
};
if ( $lx >= 0 ) {
if ( $l[$lx]->[1] eq ' ' ) {
splice(@l, $lx, 1);
@rm = splice(@l, $lx);
last; # Found place to break
} else {
$lx--;
}
}
}
# Now @l contains the stuff to remain on the old line
# If we broke the line inside a link, then split the link
# into two.
my $lkref = undef;
foreach my $lc ( @l ) {
if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
$lkref = $lc;
} elsif ( $$lc[0] == -1 ) {
undef $lkref;
}
}
if ( defined($lkref) ) {
push(@l, [-1,undef]); # Terminate old reference
unshift(@rm, $lkref); # Duplicate reference on new line
}
if ( $hasmarker ) {
if ( $pastmarker ) {
unshift(@rm,[-6,undef]); # New line starts with marker
} else {
push(@l,[-6,undef]); # Old line ends with marker
}
}
push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
@l = @rm;
$w = $sw = 0;
# Compute the width of the remainder array
for my $le ( @l ) {
if ( $$le[0] >= 0 ) {
my $xew = ps_width($$le[1],
$fontset->{fonts}->[$$le[0]][1],
\@NASMEncoding) *
($fontset->{fonts}->[$$le[0]][0]/1000);
my $xsp = $$le[1];
$xsp =~ tr/[^ ]//d; # Delete nonspaces
my $xsw = ps_width($xsp,
$fontset->{fonts}->[$$le[0]][1],
\@NASMEncoding) *
($fontset->{fonts}->[$$le[0]][0]/1000);
$w += $xew; $sw += $xsw;
}
}
}
push(@l, @xd); # Accumulated metadata
@xd = ();
if ( $$e[1] ne '' ) {
push(@l, $e);
$w += $ew; $sw += $esw;
}
}
}
push(@l,@xd);
if ( scalar(@l) ) {
push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
}
# Mark the first line as first and the last line as last
if ( scalar(@ls) ) {
$ls[0]->[0]->[1] |= 1; # First in para
$ls[-1]->[0]->[1] |= 2; # Last in para
}
return @ls;
}
#
# Once we have broken things into lines, having multiple chunks
# with the same font index is no longer meaningful. Merge
# adjacent chunks to keep down the size of the whole file.
#
sub ps_merge_chunks(@) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -