📄 cvs2cl.pl

📁 java写的html的解析器parser
💻 PL
📖 第 1 页 / 共 5 页
字号:
      foreach my $tag (@{$$qunkref{'tags'}}) {        $non_unanimous_tags{$tag} = 1;      }    }  }  # Any tag held by all qunks will be printed specially... but only if  # there are multiple qunks in the first place!  if ((scalar (@qunkrefs)) > 1) {    foreach my $tag (keys (%non_unanimous_tags)) {      my $everyone_has_this_tag = 1;      foreach my $qunkref (@qunkrefs) {        if ((! (defined ($$qunkref{'tags'})))            or (! (grep ($_ eq $tag, @{$$qunkref{'tags'}})))) {          $everyone_has_this_tag = 0;        }      }      if ($everyone_has_this_tag) {        $unanimous_tags{$tag} = 1;        delete $non_unanimous_tags{$tag};      }    }  }  if ($XML_Output)  {    # If outputting XML, then our task is pretty simple, because we    # don't have to detect common dir, common tags, branch prefixing,    # etc.  We just output exactly what we have, and don't worry about    # redundancy or readability.    foreach my $qunkref (@qunkrefs)    {      my $filename    = $$qunkref{'filename'};      my $cvsstate    = $$qunkref{'cvsstate'};      my $revision    = $$qunkref{'revision'};      my $tags        = $$qunkref{'tags'};      my $branch      = $$qunkref{'branch'};      my $branchroots = $$qunkref{'branchroots'};      $filename = &xml_escape ($filename);   # probably paranoia      $revision = &xml_escape ($revision);   # definitely paranoia      $beauty .= "<file>\n";      $beauty .= "<name>${filename}</name>\n";      $beauty .= "<cvsstate>${cvsstate}</cvsstate>\n";      $beauty .= "<revision>${revision}</revision>\n";      if ($branch) {        $branch   = &xml_escape ($branch);     # more paranoia        $beauty .= "<branch>${branch}</branch>\n";      }      foreach my $tag (@$tags) {        $tag = &xml_escape ($tag);  # by now you're used to the paranoia        $beauty .= "<tag>${tag}</tag>\n";      }      foreach my $root (@$branchroots) {        $root = &xml_escape ($root);  # which is good, because it will continue        $beauty .= "<branchroot>${root}</branchroot>\n";      }      $beauty .= "</file>\n";    }    # Theoretically, we could go home now.  But as long as we're here,    # let's print out the common_dir and utags, as a convenience to    # the receiver (after all, earlier code calculated that stuff    # anyway, so we might as well take advantage of it).    if ((scalar (keys (%unanimous_tags))) > 1) {      foreach my $utag ((keys (%unanimous_tags))) {        $utag = &xml_escape ($utag);   # the usual paranoia        $beauty .= "<utag>${utag}</utag>\n";      }    }    if ($common_dir) {      $common_dir = &xml_escape ($common_dir);      $beauty .= "<commondir>${common_dir}</commondir>\n";    }    # That's enough for XML, time to go home:    return $beauty;  }  # Else not XML output, so complexly compactify for chordate  # consumption.  At this point we have enough global information  # about all the qunks to organize them non-redundantly for output.  if ($common_dir) {    # Note that $common_dir still has its trailing slash    $beauty .= "$common_dir: ";  }  if ($Show_Branches)  {    # For trailing revision numbers.    my @brevisions;    foreach my $branch (keys (%all_branches))    {      foreach my $qunkref (@qunkrefs)      {        if ((defined ($$qunkref{'branch'}))            and ($$qunkref{'branch'} eq $branch))        {          if ($fbegun) {            # kff todo: comma-delimited in XML too?  Sure.            $beauty .= ", ";          }          else {            $fbegun = 1;          }          my $fname = substr ($$qunkref{'filename'}, length ($common_dir));          $beauty .= $fname;          $$qunkref{'printed'} = 1;  # Just setting a mark bit, basically          if ($Show_Tags && (defined @{$$qunkref{'tags'}})) {            my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});            if (@tags) {              $beauty .= " (tags: ";              $beauty .= join (', ', @tags);              $beauty .= ")";            }          }          if ($Show_Revisions) {            # Collect the revision numbers' last components, but don't            # print them -- they'll get printed with the branch name            # later.            $$qunkref{'revision'} =~ /.+\.([\d]+)$/;            push (@brevisions, $1);            # todo: we're still collecting branch roots, but we're not            # showing them anywhere.  If we do show them, it would be            # nifty to just call them revision "0" on a the branch.            # Yeah, that's the ticket.          }        }      }      $beauty .= " ($branch";      if (@brevisions) {        if ((scalar (@brevisions)) > 1) {          $beauty .= ".[";          $beauty .= (join (',', @brevisions));          $beauty .= "]";        }        else {          # Square brackets are spurious here, since there's no range to          # encapsulate          $beauty .= ".$brevisions[0]";        }      }      $beauty .= ")";    }  }  # Okay; any qunks that were done according to branch are taken care  # of, and marked as printed.  Now print everyone else.  my %fileinfo_printed;  foreach my $qunkref (@qunkrefs)  {    next if (defined ($$qunkref{'printed'}));   # skip if already printed    my $b = substr ($$qunkref{'filename'}, length ($common_dir));    # todo: Shlomo's change was this:    # $beauty .= substr ($$qunkref{'filename'},    #              (($common_dir eq "./") ? "" : length ($common_dir)));    $$qunkref{'printed'} = 1;  # Set a mark bit.    if ($Show_Revisions || $Show_Tags || $Show_Dead)    {      my $started_addendum = 0;      if ($Show_Revisions) {        $started_addendum = 1;        $b .= " (";        $b .= "$$qunkref{'revision'}";      }      if ($Show_Dead && $$qunkref{'cvsstate'} =~ /dead/)      {	# Deliberately not using $started_addendum. Keeping it simple.	$b .= "[DEAD]";      }      if ($Show_Tags && (defined $$qunkref{'tags'})) {        my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});        if ((scalar (@tags)) > 0) {          if ($started_addendum) {            $b .= ", ";          }          else {            $b .= " (tags: ";          }          $b .= join (', ', @tags);          $started_addendum = 1;        }      }      if ($started_addendum) {        $b .= ")";      }    }    unless ( exists $fileinfo_printed{$b} ) {      if ($fbegun) {        $beauty .= ", ";      } else {        $fbegun = 1;      }      $beauty .= $b, $fileinfo_printed{$b} = 1;    }  }  # Unanimous tags always come last.  if ($Show_Tags && %unanimous_tags)  {    $beauty .= " (utags: ";    $beauty .= join (', ', sort keys (%unanimous_tags));    $beauty .= ")";  }  # todo: still have to take care of branch_roots?  $beauty = "* $beauty:";  return $beauty;}sub min ($$) { $_[0] < $_[1] ? $_[0] : $_[1] }sub common_path_prefix ($$){  my ($path1, $path2) = @_;  # For compatibility (with older versions of cvs2cl.pl), we think in UN*X  # terms, and mould windoze filenames to match.  Is this really appropriate?  # If a file is checked in under UN*X, and cvs log run on windoze, which way  # do the path separators slope?  Can we use fileparse as per the local  # conventions?  If so, we should probably have a user option to specify an  # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid  # the nasty \-/ transmogrification below.  my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;  # Transmogrify Windows filenames to look like Unix.  # (It is far more likely that someone is running cvs2cl.pl under  # Windows than that they would genuinely have backslashes in their  # filenames.)  tr!\\!/!    for $dir1, $dir2;  my ($accum1, $accum2, $last_common_prefix) = ('') x 3;  my @path1 = grep length($_), split qr!/!, $dir1;  my @path2 = grep length($_), split qr!/!, $dir2;  my @common_path;  for (0..min($#path1,$#path2)) {    if ( $path1[$_] eq $path2[$_]) {      push @common_path, $path1[$_];    } else {      last;    }  }  return join '', map "$_/", @common_path;}sub preprocess_msg_text (){  my $text = shift;  # Strip out carriage returns (as they probably result from DOSsy editors).  $text =~ s/\r\n/\n/g;  # If it *looks* like two newlines, make it *be* two newlines:  $text =~ s/\n\s*\n/\n\n/g;  if ($XML_Output)  {    $text = &xml_escape ($text);    $text = "<msg>${text}</msg>\n";  }  elsif (! $No_Wrap)  {    # Strip off lone newlines, but only for lines that don't begin with    # whitespace or a mail-quoting character, since we want to preserve    # that kind of formatting.  Also don't strip newlines that follow a    # period; we handle those specially next.  And don't strip    # newlines that precede an open paren.    1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g);    # If a newline follows a period, make sure that when we bring up the    # bottom sentence, it begins with two spaces.    1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g);  }  return $text;}sub last_line_len (){  my $files_list = shift;  my @lines = split (/\n/, $files_list);  my $last_line = pop (@lines);  return length ($last_line);}# A custom wrap function, sensitive to some common constructs used in# log entries.sub wrap_log_entry (){  my $text = shift;                  # The text to wrap.  my $left_pad_str = shift;          # String to pad with on the left.  # These do NOT take left_pad_str into account:  my $length_remaining = shift;      # Amount left on current line.  my $max_line_length  = shift;      # Amount left for a blank line.  my $wrapped_text = "";             # The accumulating wrapped entry.  my $user_indent = "";              # Inherited user_indent from prev line.  my $first_time = 1;                # First iteration of the loop?  my $suppress_line_start_match = 0; # Set to disable line start checks.  my @lines = split (/\n/, $text);  while (@lines)   # Don't use `foreach' here, it won't work.  {    my $this_line = shift (@lines);    chomp $this_line;    if ($this_line =~ /^(\s+)/) {      $user_indent = $1;    }    else {      $user_indent = "";    }    # If it matches any of the line-start regexps, print a newline now...    if ($suppress_line_start_match)    {      $suppress_line_start_match = 0;    }    elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)           || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)           || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)           || ($this_line =~ /^(\s+)(\S+)/)           || ($this_line =~ /^(\s*)- +/)           || ($this_line =~ /^()\s*$/)           || ($this_line =~ /^(\s*)\*\) +/)           || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))    {      # Make a line break immediately, unless header separator is set      # and this line is the first line in the entry, in which case      # we're getting the blank line for free already and shouldn't      # add an extra one.      unless (($After_Header ne " ") and ($first_time))      {        if ($this_line =~ /^()\s*$/) {          $suppress_line_start_match = 1;          $wrapped_text .= "\n${left_pad_str}";        }        $wrapped_text .= "\n${left_pad_str}";      }      $length_remaining = $max_line_length - (length ($user_indent));    }    # Now that any user_indent has been preserved, strip off leading    # whitespace, so up-folding has no ugly side-effects.    $this_line =~ s/^\s*//;    # Accumulate the line, and adjust parameters for next line.    my $this_len = length ($this_line);    if ($this_len == 0)    {      # Blank lines should cancel any user_indent level.      $user_indent = "";      $length_remaining = $max_line_length;    }    elsif ($this_len >= $length_remaining) # Line too long, try breaking it.    {      # Walk backwards from the end.  At first acceptable spot, break      # a new line.      my $idx = $length_remaining - 1;      if ($idx < 0) { $idx = 0 };      while ($idx > 0)      {        if (substr ($this_line, $idx, 1) =~ /\s/)        {          my $line_now = substr ($this_line, 0, $idx);          my $next_line = substr ($this_line, $idx);          $this_line = $line_now;          # Clean whitespace off the end.          chomp $this_line;          # The current line is ready to be printed.          $this_line .= "\n${left_pad_str}";          # Make sure the next line is allowed full room.          $length_remaining = $max_line_length - (length ($user_indent));          # Strip next_line, but then preserve any user_indent.          $next_line =~ s/^\s*//;          # Sneak a peek at the user_indent of the upcoming line, so          # $next_line (which will now precede it) can inherit that          # indent level.  Otherwise, use whatever user_indent level          # we currently have, which might be none.          my $next_next_line = shift (@lines);          if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {            $next_line = $1 . $next_line if (defined ($1));            # $length_remaining = $max_line_length - (length ($1));            $next_next_line =~ s/^\s*//;          }          else {            $next_line = $user_indent . $next_line;          }          if (defined ($next_next_line)) {            unshift (@lines, $next_next_line);          }          unshift (@lines, $next_line);          # Our new next line might, coincidentally, begin with one of          # the line-start regexps, so we temporarily turn off          # sensitivity to that until we're past the line.
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -