📄 reflow.pm
字号:
print_lines($indent1 . $line); } elsif ($line =~ /^($pin|\t).*\S/) { # current line may be poetry, check next line: $last = $line; $line = get_line(); if (!defined($line)) { process($last); last; } if ($line =~ /^($pin|\t).*\S/) { # found some poetry, skip indented lines until end of input # or a non-indented line found: reflow_para(); print_lines($indent1 . $last); print_lines($indent1 . $line); while (defined($line = get_line())) { last unless (($line =~ /^($pin|\t).*\S/) || ($noreflow ne "" && $line =~ /$noreflow/)); print_lines($indent1 . $line); } last unless (defined($line)); # poetry at end of document # $line is a non-poetic line } else { # $last had a poetry indent, but current line doesn't. # Process last line: process($last); } } # end of first poetry test # current line is non-poetic, so process it: process($line); } } # reflow any remaining @words: reflow_para();}# Process a non-poetry line by pushing the words onto @words# If the line is blank, then reflow the paragraph of @words:sub process($) { my ($line) = @_; # current line is non-poetry # remove spaces around dashes: $line =~ s/([^-])[ \t]*--[ \t]*([^-])/$1--$2/g; # protect ". . ." ellipses: $line =~ s/ \. \. \./\377\.\377\.\377\./g; $line =~ s/\. \. \./\.\377\.\377\./g; @linewords = split(/\s+/, $line); shift(@linewords) if (@linewords && ($linewords[0] eq "")); # If last word of previous line ends in a single hyphen, # then append first word of this line: if (@linewords && @words && ($words[$#words] =~ /[a-zA-Z0-9]-$/)) { $words[$#words] .= shift(@linewords); } if ($#linewords == -1) { # No words on this line if ($oneparagraph !~ /[Yy]/) { # end of paragraph reflow_para(); print_lines("$indent1\n"); } } else { # add @linewords to @words, # split on em dashes, ie word--word # Move "--" from beginning of current word to end of last word: if (($#words >= 0) && ($linewords[0] =~ s/^--[^a-zA-Z0-9]*//)) { $words[$#words] .= $&; shift(@linewords) if ($linewords[0] eq ""); } my $word; foreach $word (@linewords) { if ($word =~ /[^-]--[a-zA-Z0-9]/) { @tmp = split(/--/, $word); # restore the hyphens: grep(s/$/--/, @tmp); # remove an extra one at the end: $tmp[$#tmp] =~ s/--$//; # append @tmp to @words: push (@words, @tmp); } else { # append $word to @words: push (@words, $word); } } }}sub reflow_para { return() unless (@words); reflow_penalties(); $lastbreak = 0; $linkbreak[$wordcount] = 0; # Create space for the result: my $result = " " x (($wordcount + 2) * 8); $result = reflow_trial(unpack("H*", pack("i*", @$optimum)), $maximum, $wordcount, $penaltylimit, $semantic, $shortlast, unpack("H*", pack("i*", @word_len)), unpack("H*", pack("i*", @space_len)), unpack("H*", pack("i*", @extra)), $result); @linkbreak = unpack("i*", pack("H*", $result)); $lastbreak = shift(@linkbreak); compute_output(); grep (s/\377/ /g, @output); print_lines(@output); @words = ();}# Add spaces to ends of sentances and calculate @extra array of penaltiessub reflow_penalties { my $j; $wordcount = $#words + 1; # Add paragraph indentation to first word: $words[0] = $indent1 . $words[0] if ($wordcount); for ($j = 0; $j < $wordcount+1; $j++) { $extra[$j] = 0; } for ($j = 0; $j < $wordcount; $j++) { if ($words[$j] =~ /^(\w+)["')]*([\.\:])["')]*$/) { # Period or colon if (!defined($abbrev{$1}) || ($2 eq ":")) { # End of sentence $extra[$j] += $sentence / 2; $extra[$j-1] -= $sentence if ($j > 0); $extra[$j+1] -= $sentence; $words[$j] = $words[$j] . " " unless ($frenchspacing =~ /[Yy]/); } else{ # Don't break "Mr. X" $extra[$j] -= $namebreak if ($abbrev{$1} == 1); } } if (($words[$j] =~ /[\?\!]["')]*$/) # !? after word && (($j >= $#words) || ($words[$j+1] =~ /^[^a-zA-Z]*[A-Z]/))) { $extra[$j] += $sentence / 2; $extra[$j-1] -= $sentence if ($j > 0); $extra[$j+1] -= $sentence; $words[$j] = $words[$j] . " " unless ($frenchspacing =~ /[Yy]/); } if ($words[$j] =~ /\,$/) { # Comma after word $extra[$j] += $dependent / 2; $extra[$j-1] -= $dependent if ($j > 0); $extra[$j+1] -= $dependent; } if ($words[$j] =~ /[\;\"\'\)]$|--$/) { # Punctuation after word $extra[$j] += $independent / 2; $extra[$j-1] -= $independent if ($j > 0); $extra[$j+1] -= $independent; } if (($j < $#words) && ($words[$j+1] =~ /^\(/)) { # Next word has opening parenthesis $extra[$j] += $independent / 2; $extra[$j-1] -= $independent if ($j > 0); $extra[$j+1] -= $independent; } if (($j < $#words) && ($words[$j] =~ /[A-Z]/ && $words[$j] !~ /\./ && $words[$j+1] =~ /[A-Z]/)) { $extra[$j] -= $namebreak; # Don't break "United States" } $extra[$j] -= $connectives{$words[$j]} * $connpenalty if (defined($connectives{$words[$j]})); } @word_len = (); # Length of each word (excluding spaces) @space_len = (); # Length the space after this word for ($j = 0; $j < $wordcount; $j++) { if ($words[$j] =~ /--$/) { $word_len[$j] = length($words[$j]); $space_len[$j] = 0; } elsif ($words[$j] =~ / $/) { $word_len[$j] = length($words[$j]) - 1; $space_len[$j] = 2; } else { $word_len[$j] = length($words[$j]); $space_len[$j] = 1; } } # First word already has $indent1 added and will not be indented further: $word_len[0] -= length($indent2) if ($wordcount);}# compute @output from $wordcount, @words, $lastbreak and @linkbreaksub compute_output { my ($j, $terminus); @output = (); $terminus = $wordcount-1; for ($j = 0; $terminus >= 0; $j++) { $output[$j] = join(' ', @words[$lastbreak+1..$terminus])."\n"; #print "j = $j, lastbreak = $lastbreak:\noutput = $output[$j]\n"; $terminus = $lastbreak; $lastbreak = $linkbreak[$lastbreak]; } @output = reverse(@output); # trim spaces after hyphens: map { s/([^-])[ \t]*--[ \t*]([^-])/$1--$2/g } @output; # Add the indent to all but the first line: map { $_ = $indent2 . $_ } @output[1..$#output];}1;__END__# Below is stub documentation for your module. You better edit it!=head1 NAMEText::Reflow - Perl module for reflowing text files using Knuth's paragraphing algorithm.=head1 SYNOPSIS use Text::Reflow qw(reflow_file reflow_string reflow_array); reflow_file($infile, $outfile, key => value, ...); $output = reflow_string($input, key => value, ...); $output = reflow_array(\@input, key => value, ...);=head1 DESCRIPTIONThese routines will reflow the paragraphs in the given file,filehandle, string or array using Knuth's paragraphing algorithm(as used in TeX) to pick "good" places to break the lines.Each routine takes ascii text data with paragraphs separatedby blank lines and reflows the paragraphs. If two or more linesin a row are "indented" then they are assumed to be a quoted poemand are passed through unchanged (but see below)The reflow algorithm tries to keep the lines the same lengthbut also tries to break at punctuation, and avoid breaking withina proper name or after certain I<connectives> ("a", "the", etc.). Theresult is a file with a more "ragged" right margin than is producedby C<fmt> or C<Text::Wrap> but it is easier to read since fewerphrases are broken across line breaks.For C<reflow_file>, if $infile is the empty string, then the inputis taken from STDIN and if $outfile is the empty string, the outputis written to STDOUT. Otherwise, $infile and $outfile may be a string,a FileHandle reference or a FileHandle glob.A typical invocation is: reflow_file("myfile", "");which reflows the whole of F<myfile> and prints the result to STDOUT.=head2 KEYWORD OPTIONSThe behaviour of Reflow can be adjusted by setting various keyword options.These can be set globally by referencing the appropriatevariable in the Text::Reflow package, for example: $Text::Reflow::maximum = 80; $Text::Reflow::optimum = 75;will set the maximum line length to 80 characters and the optimumline length to 75 characters for all subsequent reflow operations.Or they can be passed to a reflow_ function as a keyword parameter,for example: $out = reflow_string($in, maximum => 80, optimum => 75);in which case the new options only apply to this call.The following options are currently implemented, with their default values:=over 4=item optimum => [65]The optimum line length in characters. This can be either a numberor a reference to an array of numbers: in the latter case,each optimal line length is tried in turn for each paragraph,and the one which leads to the best overall paragraph is chosen.This results in less ragged paragraphs, but some paragraphs willbe wider or narrower overall than others.=item maximum => 75The maximum allowed line length.=item indent => ""Each line of output has this string prepended. C<indent =E<gt> string>is equivalent to C<indent1 =E<gt> string, indent2 =E<gt> string>.=item indent1 => ""A string which is used to indent the first line in any paragraph.=item indent2 => ""A string which is used to indent the second and subsequent line in any paragraph.=item quote => ""Characters to strip from the beginning of a line before processing.To reflow a quoted email message and then restore the quotes youmight want to use quote => "> ", indent => "> "=item skipto => ""Skip to the first line starting with the given pattern before startingto reflow. This is useful for skipping Project Gutenberg headersor contents tables.=item skipindented => 2If C<skipindented> = 0 then all indented lines are flowedin with the surrounding paragraph. If C<skipindented> = 1 thenany indented line will not be reflowed. If C<skipindented> = 2then any two or more adjacent indented lines will not be reflowed.The purpose of the default value is to allow poetry to passthrough unchanged, but not to allow a paragraph indentationfrom preventing the first line of the paragraph from being reflowed.=item noreflow => ""A pattern to indicate that certain lines should not be reflowed.For example, a table of contents might have a line of dots.The option: noreflow => '(\.\s*){4}\.'will not reflow any lines containing five or more consecutive dots.=item frenchspacing => 'n'Normally two spaces are put at the end of a sentance or a clause.The C<frenchspacing> option (taken from the TeX macro of the same name)disables this feature.=item oneparagraph => 'n'Set this to 'y' if you want the whole input to be flowed into a singleparagraph, ignoring blank lines in the input.=item semantic => 30This parameter indicates the extent to which semantic factors matter(breaking on punctuation, avoiding a break within a clause etc.).Set this to zero to minimise the raggedness of the right margin,at the expense of readability.=item namebreak => 10Penalty for splitting up a name=item sentence => 20Penalty for sentence widows and orphans (ie splitting a lineimmediately after the first word in a sentence, or beforethe last word in a sentence)=item independent => 10Penalty for independent clause widows and orphans.=item dependent => 6Penalty for dependent clause widows and orphans.=item shortlast => 5Penalty for a short last line in a paragraph (one or two words).=item connpenalty => 1Multiplier for the "negative penalty" for breaking at a connective.In other words, increasing this value makes connectives an evenmore attractive place to break a line.=head2 EXPORTNone by default.=head1 AUTHOROriginal C<reflow> perl script written by Michael Larsen, larsen@edu.upenn.math.Modified, enhanced and converted to a perl module with XSUBby Martin Ward, Martin.Ward@durham.ac.uk=head1 SEE ALSOperl(1).See "TeX the Program" by Donald Knuth for a description of the algorithm used.=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -