📄 cvs2cl.pl

📁 java写的html的解析器parser
💻 PL
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
#!/bin/shexec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-#!perl -w#################################################################                                                        ###### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ######                                                        ################################################################### $Revision: 1.1 $## $Date: 2003/09/23 03:41:34 $## $Author: derrickoswald $####   (C) 2001,2002,2003 Martyn J. Pearce <fluffy@cpan.org>, under the GNU GPL.##   (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.####   (Extensively hacked on by Melissa O'Neill <oneill@cs.sfu.ca>.)##   (Gecos hacking by Robin Johnson <robbat2@orbis-terrarum.net>.)#### cvs2cl.pl is free software; you can redistribute it and/or modify## it under the terms of the GNU General Public License as published by## the Free Software Foundation; either version 2, or (at your option)## any later version.#### cvs2cl.pl is distributed in the hope that it will be useful,## but WITHOUT ANY WARRANTY; without even the implied warranty of## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the## GNU General Public License for more details.#### You may have received a copy of the GNU General Public License## along with cvs2cl.pl; see the file COPYING.  If not, write to the## Free Software Foundation, Inc., 59 Temple Place - Suite 330,## Boston, MA 02111-1307, USA.use strict;use Text::Wrap;use Time::Local;use File::Basename qw( fileparse );use User::pwent;# The Plan:## Read in the logs for multiple files, spit out a nice ChangeLog that# mirrors the information entered during `cvs commit'.## The problem presents some challenges. In an ideal world, we could# detect files with the same author, log message, and checkin time --# each <filelist, author, time, logmessage> would be a changelog entry.# We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*# so checkins can span a range of times.  Also, the directory structure# could be hierarchical.## Another question is whether we really want to have the ChangeLog# exactly reflect commits. An author could issue two related commits,# with different log entries, reflecting a single logical change to the# source. GNU style ChangeLogs group these under a single author/date.# We try to do the same.## So, we parse the output of `cvs log', storing log messages in a# multilevel hash that stores the mapping:#   directory => author => time => message => filelist# As we go, we notice "nearby" commit times and store them together# (i.e., under the same timestamp), so they appear in the same log# entry.## When we've read all the logs, we twist this mapping into# a time => author => message => filelist mapping for each directory.## If we're not using the `--distributed' flag, the directory is always# considered to be `./', even as descend into subdirectories.############### Globals ################use constant MAILNAME => "/etc/mailname";# What we run to generate it:my $Log_Source_Command = "cvs log";# In case we have to print it out:my $VERSION = '$Revision: 1.1 $';$VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;## Vars set by options:# Print debugging messages?my $Debug = 0;# Just show version and exit?my $Print_Version = 0;# Just print usage message and exit?my $Print_Usage = 0;# Single top-level ChangeLog, or one per subdirectory?my $Distributed = 0;# What file should we generate (defaults to "ChangeLog")?my $Log_File_Name = "ChangeLog";# Grab most recent entry date from existing ChangeLog file, just add# to that ChangeLog.my $Cumulative = 0;# `cvs log -d`, this will repeat the last entry in the old log.  This is OK,# as it guarantees at least one entry in the update changelog, which means# that there will always be a date to extract for the next update.  The repeat# entry can be removed in postprocessing, if necessary.my $Update = 0;# Expand usernames to email addresses based on a map file?my $User_Map_File = "";my $User_Passwd_File;my $Mail_Domain;# Output log in chronological order? [default is reverse chronological order]my $Chronological_Order = 0;# Grab user details via gecosmy $Gecos = 0;# User domain for gecos email addressesmy $Domain = "";# Output to a file or to stdout?my $Output_To_Stdout = 0;# Eliminate empty log messages?my $Prune_Empty_Msgs = 0;# Tags of which not to outputmy %ignore_tags;# Show only revisions with Tagsmy %show_tags;# Don't call Text::Wrap on the body of the messagemy $No_Wrap = 0;# Don't do any pretty print processingmy $Summary = 0;# Separates header from log message.  Code assumes it is either " " or# "\n\n", so if there's ever an option to set it to something else,# make sure to go through all conditionals that use this var.my $After_Header = " ";# XML Encodingmy $XML_Encoding = '';# Format more for programs than for humans.my $XML_Output = 0;# Do some special tweaks for log data that was written in FSF# ChangeLog style.my $FSF_Style = 0;# Show times in UTC instead of local timemy $UTC_Times = 0;# Show times in output?my $Show_Times = 1;# Show day of week in output?my $Show_Day_Of_Week = 0;# Show revision numbers in output?my $Show_Revisions = 0;# Show dead files in output?my $Show_Dead = 0;# Show tags (symbolic names) in output?my $Show_Tags = 0;# Show tags separately in output?my $Show_Tag_Dates = 0;# Show branches by symbolic name in output?my $Show_Branches = 0;# Show only revisions on these branches or their ancestors.my @Follow_Branches;# Don't bother with files matching this regexp.my @Ignore_Files;# How exactly we match entries.  We definitely want "o",# and user might add "i" by using --case-insensitive option.my $Case_Insensitive = 0;# Maybe only show log messages matching a certain regular expression.my $Regexp_Gate = "";# Pass this global option string along to cvs, to the left of `log':my $Global_Opts = "";# Pass this option string along to the cvs log subcommand:my $Command_Opts = "";# Read log output from stdin instead of invoking cvs log?my $Input_From_Stdin = 0;# Don't show filenames in output.my $Hide_Filenames = 0;# Don't shorten directory names from filenames.my $Common_Dir = 1;# Max checkin duration. CVS checkin is not atomic, so we may have checkin# times that span a range of time. We assume that checkins will last no# longer than $Max_Checkin_Duration seconds, and that similarly, no# checkins will happen from the same users with the same message less# than $Max_Checkin_Duration seconds apart.my $Max_Checkin_Duration = 180;# What to put at the front of [each] ChangeLog.my $ChangeLog_Header = "";# Whether to enable 'delta' mode, and for what start/end tags.my $Delta_Mode = 0;my $Delta_From = "";my $Delta_To = "";my $TestCode;# Whether to parse filenames from the RCS filename, and if so what# prefix to strip.my $RCS_Mode = 0;my $RCS_Root = "";## end vars set by options.# latest observed times for the start/end tags in delta modemy $Delta_StartTime = 0;my $Delta_EndTime = 0;# In 'cvs log' output, one long unbroken line of equal signs separates# files:my $file_separator = "======================================="                   . "======================================";# In 'cvs log' output, a shorter line of dashes separates log messages# within a file:my $logmsg_separator = "----------------------------";my $No_Ancestors = 0;############### End globals ############&parse_options ();if ( defined $TestCode ) {  eval $TestCode;  die "Eval failed: '$@'\n"    if $@;} else {  &derive_change_log ();}### Everything below is subroutine definitions. ###sub run_ext {  my ($cmd) = @_;  $cmd = [$cmd]    unless ref $cmd;  local $" = ' ';  my $out = qx"@$cmd 2>&1";  my $rv  = $?;  my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);  return $out, $exit, $sig, $core;}# If accumulating, grab the boundary date from pre-existing ChangeLog.sub maybe_grab_accumulation_date (){  if (! $Cumulative || $Update) {    return "";  }  # else  open (LOG, "$Log_File_Name")      or die ("trouble opening $Log_File_Name for reading ($!)");  my $boundary_date;  while (<LOG>)  {    if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/)    {      $boundary_date = "$1";      last;    }  }  close (LOG);  return $boundary_date;}# Fills up a ChangeLog structure in the current directory.sub derive_change_log (){  # See "The Plan" above for a full explanation.  my %grand_poobah;  my $file_full_path;  my $time;  my $revision;  my $author;  my $state;  my $lines;  my $cvsstate;  my $msg_txt;  my $detected_file_separator;  my %tag_date_printed;  # Might be adding to an existing ChangeLog  my $accumulation_date = &maybe_grab_accumulation_date ();  if ($accumulation_date) {    # Insert -d immediately after 'cvs log'    my $Log_Date_Command = "-d\'>${accumulation_date}\'";    $Log_Source_Command =~ s/(^.*log\S*)/$1 $Log_Date_Command/;    &debug ("(adding log msg starting from $accumulation_date)\n");  }  # We might be expanding usernames  my %usermap;  # In general, it's probably not very maintainable to use state  # variables like this to tell the loop what it's doing at any given  # moment, but this is only the first one, and if we never have more  # than a few of these, it's okay.  my $collecting_symbolic_names = 0;  my %symbolic_names;    # Where tag names get stored.  my %branch_names;      # We'll grab branch names while we're at it.  my %branch_numbers;    # Save some revisions for @Follow_Branches  my @branch_roots;      # For showing which files are branch ancestors.  # Bleargh.  Compensate for a deficiency of custom wrapping.  if (($After_Header ne " ") and $FSF_Style)  {    $After_Header .= "\t";  }  if (! $Input_From_Stdin) {    &debug ("(run \"${Log_Source_Command}\")\n");    open (LOG_SOURCE, "$Log_Source_Command |")        or die "unable to run \"${Log_Source_Command}\"";  }  else {    open (LOG_SOURCE, "-") or die "unable to open stdin for reading";  }  binmode LOG_SOURCE;  %usermap = &maybe_read_user_map_file ();  while (<LOG_SOURCE>)  {    # Canonicalize line endings    s/\r$//;    my $new_full_path;    # If on a new file and don't see filename, skip until we find it, and    # when we find it, grab it.    if (! (defined $file_full_path))    {      if (/^Working file: (.*)/) {        $new_full_path = $1;      } elsif ($RCS_Mode && m|^RCS file: $RCS_Root/(.*),v$|) {        $new_full_path = $1;      }    }    if (defined $new_full_path)    {      $file_full_path = $new_full_path;      if (@Ignore_Files)      {        my $base;        ($base, undef, undef) = fileparse ($file_full_path);        # Ouch, I wish trailing operators in regexps could be        # evaluated on the fly!        if ($Case_Insensitive) {          if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) {            undef $file_full_path;          }        }        elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) {          undef $file_full_path;        }      }      next;    }    # Just spin wheels if no file defined yet.    next if (! $file_full_path);    # Collect tag names in case we're asked to print them in the output.    if (/^symbolic names:$/) {      $collecting_symbolic_names = 1;      next;  # There's no more info on this line, so skip to next    }    if ($collecting_symbolic_names)    {      # All tag names are listed with whitespace in front in cvs log      # output; so if see non-whitespace, then we're done collecting.      if (/^\S/) {        $collecting_symbolic_names = 0;      }      else    # we're looking at a tag name, so parse & store it      {        # According to the Cederqvist manual, in node "Tags", tag        # names must start with an uppercase or lowercase letter and        # can contain uppercase and lowercase letters, digits, `-',        # and `_'.  However, it's not our place to enforce that, so        # we'll allow anything CVS hands us to be a tag:        /^\s+([^:]+): ([\d.]+)$/;
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -