📄 html2texi.pl

📁 python s60 1.4.5版本的源代码
💻 PL
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
    { return; }

  if ($he->tag() eq "p")
    { my $ref_pcontent = $he->content();
      if (defined $ref_pcontent)
	{ my @pcontent = @{$ref_pcontent};
	  # print "reorder_dt_and_dl found a <p>\n"; $he->dump();
	  if ((scalar(@pcontent) >= 1)
	      && (ref $pcontent[0]) && ($pcontent[0]->tag() eq "dl")
	      && $pcontent[0]->implicit())
	    { my $ref_dlcontent = $pcontent[0]->content();
	      # print "reorder_dt_and_dl found a <p> and implicit <dl>\n";
	      if (defined $ref_dlcontent)
		{ my @dlcontent = @{$ref_dlcontent};
		  if ((scalar(@dlcontent) >= 1)
		      && (ref $dlcontent[0]) && ($dlcontent[0]->tag() eq "dt"))
		    { my $ref_dtcontent = $dlcontent[0]->content();
		      # print "reorder_dt_and_dl found a <p>, implicit <dl>, and <dt>\n";
		      if (defined $ref_dtcontent)
			{ my @dtcontent = @{$ref_dtcontent};
			  if ((scalar(@dtcontent) > 0)
			      && (ref $dtcontent[$#dtcontent])
			      && ($dtcontent[$#dtcontent]->tag() eq "dl"))
			    { my $ref_dl2content = $dtcontent[$#dtcontent]->content();
			      # print "reorder_dt_and_dl found a <p>, implicit <dl>, <dt>, and <dl>\n";
			      if (defined $ref_dl2content)
				{ my @dl2content = @{$ref_dl2content};
				  if ((scalar(@dl2content) > 0)
				      && (ref ($dl2content[0]))
				      && ($dl2content[0]->tag() eq "dd"))
			    {
			      # print "reorder_dt_and_dl found a <p>, implicit <dl>, <dt>, <dl>, and <dd>\n";
			      # print STDERR "CHANGING\n"; $he->dump();
			      html_replace_by_ignore($dtcontent[$#dtcontent]);
			      splice(@{$ref_dlcontent}, 1, 0, @dl2content);
			      # print STDERR "CHANGED TO:\n"; $he->dump();
			      return 0; # don't traverse children
			    } } } } } } } } }
  return 1;
}


# If we find a paragraph that looks like
# <P>
#   <HR>
#   <UL>
# then accumulate its links into a contents_list and delete the paragraph.
sub process_if_child_links ( $$$ )
{ my ($he, $startflag) = (check_args(3, @_))[0,1]; #  ignore depth argument
  if (!$startflag)
    { return; }

  if ($he->tag() eq "p")
    { my $ref_content = $he->content();
      if (defined $ref_content)
	{ my @content = @{$ref_content};
	  if ((scalar(@content) == 2)
	      && (ref $content[0]) && $content[0]->tag() eq "hr"
	      && (ref $content[1]) && $content[1]->tag() eq "ul")
	    { process_child_links($he);
	      $he->delete();
	      return 0; } } }
  return 1;
}


# If we find
#     <H4>
#       "Footnotes"
#     <DL>
#       <DT>
#         <A NAME="foot560">
#           "...borrow"
#         <A HREF="refcountsInPython.html#tex2html2" NAME="foot560">
#           "1.2"
#       <DD>
#         "The metaphor of ``borrowing'' a reference is not completely correct: the owner still has a copy of the reference. "
#       ...
# then record the footnote information and delete the section and list.

my $process_if_footnotes_expect_dl_next = 0;

sub process_if_footnotes ( $$$ )
{ my ($he, $startflag) = (check_args(3, @_))[0,1]; #  ignore depth argument
  if (!$startflag)
    { return; }

  if (($he->tag() eq "h4")
      && has_single_content_string($he)
      && ($ {$he->content}[0] eq "Footnotes"))
    { html_replace_by_ignore($he);
      $process_if_footnotes_expect_dl_next = 1;
      return 0; }

  if ($process_if_footnotes_expect_dl_next && ($he->tag() eq "dl"))
    { my $ref_content = $he->content();
      if (defined $ref_content)
	{ $process_if_footnotes_expect_dl_next = 0;
	  my @content = @{$ref_content};
	  for (my $i=0; $i<$#content; $i+=2)
	    { my $he_dt = $content[$i];
	      my $he_dd = $content[$i+1];
	      if (($he_dt->tag ne "dt") || ($he_dd->tag ne "dd"))
		{ $he->dump;
		  die "expected <DT> and <DD> at positions $i and ", $i+1; }
	      my @dt_content = @{$he_dt->content()};
	      if ((scalar(@dt_content) != 2)
		  || ($dt_content[0]->tag ne "a")
		  || ($dt_content[1]->tag ne "a"))
		{ $he_dt->dump;
		  die "Expected 2 anchors as content of <DT>"; }
	      my ($dt1_name, $dt1_href, $dt1_content) = anchor_info($dt_content[0]);
	      my ($dt2_name, $dt2_href, $dt2_content) = anchor_info($dt_content[0]);
	      # unused: $dt1_href, $dt1_content, $dt2_href, $dt2_content
	      if ($dt1_name ne $dt2_name)
		{ $he_dt->dump;
		  die "Expected identical names for anchors"; }
	      html_replace_by_ignore($he_dd);
	      $he_dd->tag("div"); # has no effect
	      $footnotes{$dt1_name} = $he_dd; }
	  html_replace_by_ignore($he);
	  return 0; } }

  if ($process_if_footnotes_expect_dl_next)
    { $he->dump;
      die "Expected <DL> for footnotes next"; }

  return 1;
}



## Merge two adjacent paragraphs containing <DL> items, such as:
#     <P>
#       <DL>
#         <DT>
#           ...
#         <DD>
#           ...
#     <P>
#       <DL>
#         <DT>
#           ...
#         <DD>
#           ...

sub merge_dl ( $$$ )
{ my ($he, $startflag) = (check_args(3, @_))[0,1]; #  ignore depth argument
  if (!$startflag)
    { return; }

  my $ref_content = $he->content;
  if (!defined $ref_content)
    { return; }
  my $i = 0;
  while ($i < scalar(@{$ref_content})-1)
    { my $p1 = $ {$ref_content}[$i];
      if ((ref $p1) && ($p1->tag eq "p")
	  && has_single_content_with_tag($p1, "dl"))
	{ my $dl1 = $ {$p1->content}[0];
	  # In this loop, rhs, not lhs, of < comparison changes,
	  # because we are removing elements from the content of $he.
	  while ($i < scalar(@{$ref_content})-1)
	    { my $p2 = $ {$ref_content}[$i+1];
	      if (!((ref $p2) && ($p2->tag eq "p")
		    && has_single_content_with_tag($p2, "dl")))
		{ last; }
	      # Merge these two elements.
	      splice(@{$ref_content}, $i+1, 1); # remove $p2
	      my $dl2 = $ {$p2->content}[0];
	      $dl1->push_content(@{$dl2->content}); # put $dl2's content in $dl1
	    }
	  # extra increment because next element isn't a candidate for $p1
	  $i++; }
      $i++; }
  return 1;
}



###########################################################################
### Testing
###

sub test ( $$ )
{ my ($action, $file) = check_args(2, @_);

  # General testing
  if (($action eq "view") || ($action eq ""))
    { # # $file = "/homes/gws/mernst/www/links.html";
      # # $file = "/homes/gws/mernst/www/index.html";
      # # $file = "/homes/fish/mernst/java/gud/doc/manual.html";
      # # $file = "/projects/cecil/cecil/doc/manuals/stdlib-man/stdlib/stdlib.html";
      # # $file = "/homes/fish/mernst/tmp/python-doc/html/index.html";
      # $file = "/homes/fish/mernst/tmp/python-doc/html/api/complexObjects.html";
      my $tree = file_to_tree($file);

      ## Testing
      # print STDERR $tree->as_HTML;
      $tree->dump();

      # print STDERR $tree->tag(), "\n";
      # print STDERR @{$tree->content()}, "\n";
      # 
      # for (@{ $tree->extract_links(qw(a img)) }) {
      #   my ($link, $linkelem) = @$_;
      #   print STDERR "$link ", $linkelem->as_HTML;
      #   }
      # 
      # print STDERR @{$tree->extract_links()}, "\n";

      # my @top_level_elts = @{$tree->content()};

      # if scalar(@{$tree->content()})
      return;
    }

  elsif ($action eq "raw")
    { my $tree = new HTML::TreeBuilder;
      $tree->ignore_unknown(1);
      # $tree->warn(1);
      $tree->parse_file($file);

      $tree->dump();

      # cleanup_parse_tree($tree);
      # $tree->dump();
      return;
    }

  # Test dealing with a section.
  elsif ($action eq "section")
    { # my $file;
      # $file = "/homes/fish/mernst/tmp/python-doc/html/api/intro.html";
      # $file = "/homes/fish/mernst/tmp/python-doc/html/api/includes.html";
      # $file = "/homes/fish/mernst/tmp/python-doc/html/api/complexObjects.html";
      process_section_file($file, 0, "Title");
    }

  # Test dealing with many sections
  elsif (0)
    { my @files = ("/homes/fish/mernst/tmp/python-doc/html/api/about.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/abstract.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/api.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/cObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/complexObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/concrete.html",
		   # "/homes/fish/mernst/tmp/python-doc/html/api/contents.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/countingRefs.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/debugging.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/dictObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/embedding.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/exceptionHandling.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/exceptions.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/fileObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/floatObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/front.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/fundamental.html",
		   # "/homes/fish/mernst/tmp/python-doc/html/api/genindex.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/importing.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/includes.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/index.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/initialization.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/intObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/intro.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/listObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/longObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/mapObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/mapping.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/newTypes.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/node24.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/noneObject.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/number.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/numericObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/object.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/objects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/os.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/otherObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/processControl.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/refcountDetails.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/refcounts.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/sequence.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/sequenceObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/standardExceptions.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/stringObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/threads.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/tupleObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/typeObjects.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/types.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/utilities.html",
		   "/homes/fish/mernst/tmp/python-doc/html/api/veryhigh.html");
      for my $file (@files)
	{ print STDERR "\n", "=" x 75, "\n", "$file:\n";
	  process_section_file($file, 0, "Title");
	}
    }

  # Test dealing with index.
  elsif ($action eq "index")
    { # my $file;
      # $file = "/homes/fish/mernst/tmp/python-doc/html/api/genindex.html";

      process_index_file($file, "\@cindex");
      print_index_info();
    }

  else
    { die "Unrecognized action `$action'"; }
}


###########################################################################
### Main loop
###

sub process_contents_file ( $ )
{ my ($file) = check_args(1, @_);

  # could also use File::Basename
  my $info_file = $file;
  $info_file =~ s/(\/?index)?\.html$//;
  if ($info_file eq "")
    { chomp($info_file = `pwd`); }
  $info_file =~ s/^.*\///;	# not the most efficient way to remove dirs

  $html_directory = $file;
  $html_directory =~ s/(\/|^)[^\/]+$/$1/;

  my $texi_file = "$info_file.texi";
  open(TEXI, ">$texi_file");

  print TEXI "\\input texinfo   \@c -*-texinfo-*-\n";
  print TEXI "\@c %**start of header\n";
  print TEXI "\@setfilename $info_file\n";

  # 2. Summary Description and Copyright
  #      The "Summary Description and Copyright" segment describes the
  #      document and contains the copyright notice and copying permissions
  #      for the Info file.  The segment must be enclosed between `@ifinfo'
  #      and `@end ifinfo' commands so that the formatters place it only in
  #      the Info file.
  # 
  # The summary description and copyright segment does not appear in the
  # printed document.
  # 
  #      @ifinfo
  #      This is a short example of a complete Texinfo file.
  #      
  #      Copyright @copyright{} 1990 Free Software Foundation, Inc.
  #      @end ifinfo


  # 3. Title and Copyright
  #      The "Title and Copyright" segment contains the title and copyright
  #      pages and copying permissions for the printed manual.  The segment
  #      must be enclosed between `@titlepage' and `@end titlepage'
  #      commands.  The title and copyright page appear only in the printed
  #      manual.
  # 
  # The titlepage segment does not appear in the Info file.
  # 
  #      @titlepage
  #      @sp 10
  #      @comment The title is printed in a large font.
  #      @center @titlefont{Sample Title}
  #      
  #      @c The following two commands start the copyright page.
  #      @page
  #      @vskip 0pt plus 1filll
  #      Copyright @copyright{} 1990 Free Software Foundation, Inc.
  #      @end titlepage


  # 4. `Top' Node and Master Menu
  #      The "Master Menu" contains a complete menu of all the nodes in the
  #      whole Info file.  It appears only in the Info file, in the `Top'
  #      node.
  # 
  # The `Top' node contains the master menu for the Info file.  Since a
  # printed manual uses a table of contents rather than a menu, the master
  # menu appears only in the Info file.
  # 
  #      @node    Top,       First Chapter, ,         (dir)
  #      @comment node-name, next,          previous, up
  # 
  #      @menu
  #      * First Chapter::    The first chapter is the
  #                           only chapter in this sample.
  #      * Concept Index::    This index has two entries.
  #      @end menu



  $current_ref_tdf = [ "Top", 0, $ARGV[0] ];
  process_section_file($file, 0, "Top");
  while (scalar(@contents_list))
  { $current_ref_tdf = shift @contents_list;
    process_section_file($ {$current_ref_tdf}[2], $ {$current_ref_tdf}[1], $ {$current_ref_tdf}[0]);
  }

  print TEXI "\n";
  for my $indextitle (@index_titles)
    { print TEXI "\@node $indextitle\n";
      print TEXI "\@unnumbered $indextitle\n";
      print TEXI "\@printindex $ {$index_info{$indextitle}}[1]\n";
      print TEXI "\n"; }

  print TEXI "\@contents\n";
  print TEXI "\@bye\n";
  close(TEXI);
}

# This needs to be last so global variable initializations are reached.

if (scalar(@ARGV) == 0)
{ die "No arguments supplied to html2texi.pl"; }

if ($ARGV[0] eq "-test")
{ my @test_args = @ARGV[1..$#ARGV];
  if (scalar(@test_args) == 0)
    { test("", "index.html"); }
  elsif (scalar(@test_args) == 1)
    { test("", $test_args[0]); }
  elsif (scalar(@test_args) == 2)
    { test($test_args[0], $test_args[1]); }
  else
    { die "Too many test arguments passed to html2texi: ", join(" ", @ARGV); }
  exit();
}

if (scalar(@ARGV) != 1)
{ die "Pass one argument, the main/contents page"; }

process_contents_file($ARGV[0]);

# end of html2texi.pl
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -