genps.pl

来自「nasm的全套源代码,有些我做了些修改,以方便您更方便更容易调试成功,方便学习做」· PL 代码 · 共 1,190 行 · 第 1/3 页
1,190 行
#!/usr/bin/perl
#
# Format the documentation as PostScript
#

require 'psfonts.ph';		# The fonts we want to use
require 'pswidth.ph';		# PostScript string width

use Fcntl;

#
# PostScript configurables; these values are also available to the
# PostScript code itself
#
%psconf = (
	   pagewidth => 595,    # Page width in PostScript points
	   pageheight => 792,	# Page height in PostScript points
	   lmarg => 100,	# Left margin in PostScript points
	   rmarg => 50,		# Right margin in PostScript points
	   topmarg => 100,	# Top margin in PostScript points
	   botmarg => 100,	# Bottom margin in PostScript points
	   plmarg => 50,	# Page number position relative to left margin
	   prmarg => 0,		# Page number position relative to right margin
	   pymarg => 50,	# Page number position relative to bot margin
	   startcopyright => 75, # How much above the bottom margin is the
	                         # copyright notice stuff
	   bulladj => 12,	# How much to indent a bullet paragraph
	   tocind => 12,	# TOC indentation per level
	   tocpnz => 24,	# Width of TOC page number only zone
	   tocdots => 8,	# Spacing between TOC dots
	   idxspace => 24,	# Minimum space between index title and pg#
	   idxindent => 24,	# How much to indent a subindex entry
	   idxgutter => 24,	# Space between index columns
	   idxcolumns => 2,	# Number of index columns
	   );

%psbool = (
	   colorlinks => 0,	# Set links in blue rather than black
	   );

# Known paper sizes
%papersizes = (
	       'a5'     => [421, 595], # ISO half paper size
	       'b5'     => [501, 709], # ISO small paper size
	       'a4'     => [595, 842], # ISO standard paper size
	       'letter' => [612, 792], # US common paper size
	       'pa4'    => [595, 792], # Compromise ("portable a4")
	       'b4'     => [709,1002], # ISO intermediate paper size
	       'legal'  => [612,1008], # US intermediate paper size
	       'a3'     => [842,1190], # ISO double paper size
	       '11x17'  => [792,1224], # US double paper size
	       );

#
# Parse the command line
#
undef $input;
while ( $arg = shift(@ARGV) ) {
    if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
	$parm = $2;
	$true = ($1 eq '') ? 1 : 0;
	if ( $true && defined($papersizes{$parm}) ) {
	    $psconf{pagewidth}  = $papersizes{$parm}->[0];
	    $psconf{pageheight} = $papersizes{$parm}->[1];
	} elsif ( defined($psbool{$parm}) ) {
	    $psbool{$parm} = $true;
	} elsif ( $true && defined($psconf{$parm}) ) {
	    $psconf{$parm} = shift(@ARGV);
	} elsif ( $parm =~ /^(title|subtitle|year|author|license)$/ ) {
	    $metadata{$parm} = shift(@ARGV);
	} else {
	    die "$0: Unknown option: $arg\n";
	}
    } else {
	$input = $arg;
    }
}

#
# Document formatting parameters
# 
$paraskip = 6;			# Space between paragraphs
$chapstart = 30;		# Space before a chapter heading
$chapskip = 24;			# Space after a chapter heading
$tocskip = 6;			# Space between TOC entries

# Configure post-paragraph skips for each kind of paragraph
%skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
	      'head' => $paraskip, 'subh' => $paraskip,
	      'norm' => $paraskip, 'bull' => $paraskip,
	      'code' => $paraskip, 'toc0' => $tocskip,
	      'toc1' => $tocskip,  'toc2' => $tocskip);

# Custom encoding vector.  This is basically the same as
# ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
# but with the "naked" accents at \200-\237 moved to the \000-\037
# range (ASCII control characters), and a few extra characters thrown
# in.  It is basically a modified Windows 1252 codepage, minus, for
# now, the euro sign (\200 is reserved for euro.)

@NASMEncoding =
(
 undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,
 undef, undef, undef, undef, undef, undef, 'dotlessi', 'grave',
 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent',
 'dieresis', undef, 'ring', 'cedilla', undef, 'hungarumlaut',
 'ogonek', 'caron', 'space', 'exclam', 'quotedbl', 'numbersign',
 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
 'parenright', 'asterisk', 'plus', 'comma', 'minus', 'period',
 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e',
 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
 'asciitilde', undef, undef, undef, 'quotesinglbase', 'florin',
 'quotedblbase', 'ellipsis', 'dagger', 'dbldagger', 'circumflex',
 'perthousand', 'Scaron', 'guilsinglleft', 'OE', undef, 'Zcaron',
 undef, undef, 'grave', 'quotesingle', 'quotedblleft',
 'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark',
 'scaron', 'guilsignlright', 'oe', undef, 'zcaron', 'Ydieresis',
 'space', 'exclamdown', 'cent', 'sterling', 'currency', 'yen',
 'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine',
 'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron',
 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu',
 'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
 'ordmasculine', 'guillemotright', 'onequarter', 'onehalf',
 'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
 'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute',
 'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex',
 'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
 'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute',
 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls',
 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring',
 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
 'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde',
 'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide',
 'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute',
 'thorn', 'ydieresis'
);

# Name-to-byte lookup hash
%charcode = ();
for ( $i = 0 ; $i < 256 ; $i++ ) {
    $charcode{$NASMEncoding[$i]} = chr($i);
}

#
# First, format the stuff coming from the front end into
# a cleaner representation
#
if ( defined($input) ) {
    sysopen(PARAS, $input, O_RDONLY) or
	die "$0: cannot open $input: $!\n";
} else {
    open(PARAS, "<&STDIN") or die "$0: $!\n";
}
while ( defined($line = <PARAS>) ) {
    chomp $line;
    $data = <PARAS>;
    chomp $data;
    if ( $line =~ /^meta :(.*)$/ ) {
	$metakey = $1;
	$metadata{$metakey} = $data;
    } elsif ( $line =~ /^indx :(.*)$/ ) {
	$ixentry = $1;
	push(@ixentries, $ixentry);
	$ixterms{$ixentry} = [split(/\037/, $data)];
	# Look for commas.  This is easier done on the string
	# representation, so do it now.
	if ( $data =~ /^(.*)\,\037sp\037/ ) {
	    $ixprefix = $1;
	    $ixprefix =~ s/\037n $//; # Discard possible font change at end
	    $ixhasprefix{$ixentry} = $ixprefix;
	    if ( !$ixprefixes{$ixprefix} ) {
		$ixcommafirst{$ixentry}++;
	    }
	    $ixprefixes{$ixprefix}++;
	} else {
	    # A complete term can also be used as a prefix
	    $ixprefixes{$data}++;
	}
    } else {
	push(@ptypes, $line);
	push(@paras, [split(/\037/, $data)]);
    }
}
close(PARAS);

#
# Convert an integer to a chosen base
#
sub int2base($$) {
    my($i,$b) = @_;
    my($s) = '';
    my($n) = '';
    my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
    return '0' if ($i == 0);
    if ( $i < 0 ) { $n = '-'; $i = -$i; }
    while ( $i ) {
	$s = substr($z,$i%$b,1) . $s;
	$i = int($i/$b);
    }
    return $n.$s;
}    

#
# Convert a string to a rendering array
#
sub string2array($)
{
    my($s) = @_;
    my(@a) = ();
    
    $s =~ s/ \- / $charcode{'endash'} /g;	# Replace " - " with en dash

    while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
	push(@a, [0,$1]);
	$s = $2;
    }

    return @a;
}

#
# Take a crossreference name and generate the PostScript name for it.
#
# This hack produces a somewhat smaller PDF...
#%ps_xref_list = ();
#$ps_xref_next = 0;
#sub ps_xref($) {
#    my($s) = @_;
#    my $q = $ps_xref_list{$s};
#    return $q if ( defined($ps_xref_list{$s}) );
#    $q = 'X'.int2base($ps_xref_next++, 52);
#    $ps_xref_list{$s} = $q;
#    return $q;
#}

# Somewhat bigger PDF, but one which obeys # URLs
sub ps_xref($) {
    return @_[0];
}

#
# Flow lines according to a particular font set and width
#
# A "font set" is represented as an array containing
# arrays of pairs: [<size>, <metricref>]
#
# Each line is represented as:
# [ [type,first|last,aux,fontset,page,ypos,optional col],
#   [rendering array] ]
#
# A space character may be "squeezed" by up to this much
# (as a fraction of the normal width of a space.)
#
$ps_space_squeeze = 0.00;	# Min space width 100%
sub ps_flow_lines($$$@) {
    my($wid, $fontset, $type, @data) = @_;
    my($fonts) = $$fontset{fonts};
    my($e);
    my($w)  = 0;		# Width of current line
    my($sw) = 0;		# Width of current line due to spaces
    my(@l)  = ();		# Current line
    my(@ls) = ();		# Accumulated output lines
    my(@xd) = ();		# Metadata that goes with subsequent text
    my $hasmarker = 0;		# Line has -6 marker
    my $pastmarker = 0;		# -6 marker found

    # If there is a -6 marker anywhere in the paragraph,
    # *each line* output needs to have a -6 marker
    foreach $e ( @data ) {
	$hasmarker = 1 if ( $$e[0] == -6 );
    }

    $w = 0;
    foreach $e ( @data ) {
	if ( $$e[0] < 0 ) {
	    # Type is metadata.  Zero width.
	    if ( $$e[0] == -6 ) { 
		$pastmarker = 1;
	    }
	    if ( $$e[0] == -1 || $$e[0] == -6 ) {
		# -1 (end anchor) or -6 (marker) goes with the preceeding
		# text, otherwise with the subsequent text
		push(@l, $e);
	    } else {
		push(@xd, $e);
	    }
	} else {
	    my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
			      \@NASMEncoding) *
		($fontset->{fonts}->[$$e[0]][0]/1000);
	    my $sp = $$e[1];
	    $sp =~ tr/[^ ]//d;	# Delete nonspaces
	    my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
			       \@NASMEncoding) *
		($fontset->{fonts}->[$$e[0]][0]/1000);
	    
	    if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
		# Begin new line
		# Search backwards for previous space chunk
		my $lx = scalar(@l)-1;
		my @rm = ();
		while ( $lx >= 0 ) {
		    while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
			# Skip metadata
			$pastmarker = 0 if ( $l[$lx]->[0] == -6 );
			$lx--;
		    };
		    if ( $lx >= 0 ) {
			if ( $l[$lx]->[1] eq ' ' ) {
			    splice(@l, $lx, 1);
			    @rm = splice(@l, $lx);
			    last; # Found place to break
			} else {
			    $lx--;
			}
		    }
		}

		# Now @l contains the stuff to remain on the old line
		# If we broke the line inside a link, then split the link
		# into two.
		my $lkref = undef;
		foreach my $lc ( @l ) {
		    if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
			$lkref = $lc;
		    } elsif ( $$lc[0] == -1 ) {
			undef $lkref;
		    }
		}

		if ( defined($lkref) ) {
		    push(@l, [-1,undef]); # Terminate old reference
		    unshift(@rm, $lkref); # Duplicate reference on new line
		}

		if ( $hasmarker ) {
		    if ( $pastmarker ) {
			unshift(@rm,[-6,undef]); # New line starts with marker
		    } else {
			push(@l,[-6,undef]); # Old line ends with marker
		    }
		}

		push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
		@l = @rm;

		$w = $sw = 0;
		# Compute the width of the remainder array
		for my $le ( @l ) {
		    if ( $$le[0] >= 0 ) {
			my $xew = ps_width($$le[1],
					   $fontset->{fonts}->[$$le[0]][1],
					   \@NASMEncoding) *
			    ($fontset->{fonts}->[$$le[0]][0]/1000);
			my $xsp = $$le[1];
			$xsp =~ tr/[^ ]//d;	# Delete nonspaces
			my $xsw = ps_width($xsp,
					   $fontset->{fonts}->[$$le[0]][1],
					   \@NASMEncoding) *
			    ($fontset->{fonts}->[$$le[0]][0]/1000);
			$w += $xew;  $sw += $xsw;
		    }
		}
	    }
	    push(@l, @xd);	# Accumulated metadata
	    @xd = ();
	    if ( $$e[1] ne '' ) {
		push(@l, $e);
		$w += $ew; $sw += $esw;
	    }
	}
    }
    push(@l,@xd);
    if ( scalar(@l) ) {
	push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);	# Final line
    }

    # Mark the first line as first and the last line as last
    if ( scalar(@ls) ) {
	$ls[0]->[0]->[1] |= 1;	   # First in para
	$ls[-1]->[0]->[1] |= 2;    # Last in para
    }
    return @ls;
}

#
# Once we have broken things into lines, having multiple chunks
# with the same font index is no longer meaningful.  Merge
# adjacent chunks to keep down the size of the whole file.
#
sub ps_merge_chunks(@) {
genps.pl - 源码说明

本页面展示了「nasm的全套源代码,有些我做了些修改,以方便您更方便更容易调试成功,方便学习做编译器」中的 genps.pl 源码文件，采用 PL 编程语言编写，共 1,190 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与nasm相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?