📄 cvs2cl.pl
字号:
if ($lst) { @branch_roots = split (/;\s+/, $lst); } else { undef @branch_roots; } next; } else { # Ugh. This really bothers me. Suppose we see a log entry # like this: # # ---------------------------- # revision 1.1 # date: 1999/10/17 03:07:38; author: jrandom; state: Exp; # branches: 1.1.2; # Intended first line of log message begins here. # ---------------------------- # # The question is, how we can tell the difference between that # log message and a *two*-line log message whose first line is # # "branches: 1.1.2;" # # See the problem? The output of "cvs log" is inherently # ambiguous. # # For now, we punt: we liberally assume that people don't # write log messages like that, and just toss a "branches:" # line if we see it but are not showing branches. I hope no # one ever loses real log data because of this. next; } } # If have file name, time, and author, then we're just grabbing # log message texts: $detected_file_separator = /^$file_separator$/o; if ($detected_file_separator && ! (defined $revision)) { # No revisions for this file; can happen, e.g. "cvs log -d DATE" goto CLEAR; } unless ($detected_file_separator || /^$logmsg_separator$/o) { $msg_txt .= $_; # Normally, just accumulate the message... next; } # ... until a msg separator is encountered: # Ensure the message contains something: if ((! $msg_txt) || ($msg_txt =~ /^\s*\.\s*$|^\s*$/) || ($msg_txt =~ /\*\*\* empty log message \*\*\*/)) { if ($Prune_Empty_Msgs) { goto CLEAR; } # else $msg_txt = "[no log message]\n"; } ### Store it all in the Grand Poobah: { my $dir_key; # key into %grand_poobah my %qunk; # complicated little jobbie, see below # Each revision of a file has a little data structure (a `qunk') # associated with it. That data structure holds not only the # file's name, but any additional information about the file # that might be needed in the output, such as the revision # number, tags, branches, etc. The reason to have these things # arranged in a data structure, instead of just appending them # textually to the file's name, is that we may want to do a # little rearranging later as we write the output. For example, # all the files on a given tag/branch will go together, followed # by the tag in parentheses (so trunk or otherwise non-tagged # files would go at the end of the file list for a given log # message). This rearrangement is a lot easier to do if we # don't have to reparse the text. # # A qunk looks like this: # # { # filename => "hello.c", # revision => "1.4.3.2", # time => a timegm() return value (moment of commit) # tags => [ "tag1", "tag2", ... ], # branch => "branchname" # There should be only one, right? # branchroots => [ "branchtag1", "branchtag2", ... ] # } if ($Distributed) { # Just the basename, don't include the path. ($qunk{'filename'}, $dir_key, undef) = fileparse ($file_full_path); } else { $dir_key = "./"; $qunk{'filename'} = $file_full_path; } # This may someday be used in a more sophisticated calculation # of what other files are involved in this commit. For now, we # don't use it, because the common-commit-detection algorithm is # hypothesized to be "good enough" as it stands. $qunk{'time'} = $time; # We might be including revision numbers and/or tags and/or # branch names in the output. Most of the code from here to # loop-end deals with organizing these in qunk. $qunk{'revision'} = $revision; # Grab the branch, even though we may or may not need it: $qunk{'revision'} =~ /((?:\d+\.)+)\d+/; my $branch_prefix = $1; $branch_prefix =~ s/\.$//; # strip off final dot if ($branch_names{$branch_prefix}) { $qunk{'branch'} = $branch_names{$branch_prefix}; } # If there's anything in the @branch_roots array, then this # revision is the root of at least one branch. We'll display # them as branch names instead of revision numbers, the # substitution for which is done directly in the array: if (@branch_roots) { my @roots = map { $branch_names{$_} } @branch_roots; $qunk{'branchroots'} = \@roots; } # Save tags too. if (defined ($symbolic_names{$revision})) { $qunk{'tags'} = $symbolic_names{$revision}; delete $symbolic_names{$revision}; } # Add this file to the list # (We use many spoonfuls of autovivication magic. Hashes and arrays # will spring into existence if they aren't there already.) &debug ("(pushing log msg for ${dir_key}$qunk{'filename'})\n"); # Store with the files in this commit. Later we'll loop through # again, making sure that revisions with the same log message # and nearby commit times are grouped together as one commit. push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, \%qunk); } CLEAR: # Make way for the next message undef $msg_txt; undef $time; undef $revision; undef $author; undef @branch_roots; # Maybe even make way for the next file: if ($detected_file_separator) { undef $file_full_path; undef %branch_names; undef %branch_numbers; undef %symbolic_names; } } close (LOG_SOURCE); ### Process each ChangeLog while (my ($dir,$authorhash) = each %grand_poobah) { &debug ("DOING DIR: $dir\n"); # Here we twist our hash around, from being # author => time => message => filelist # in %$authorhash to # time => author => message => filelist # in %changelog. # # This is also where we merge entries. The algorithm proceeds # through the timeline of the changelog with a sliding window of # $Max_Checkin_Duration seconds; within that window, entries that # have the same log message are merged. # # (To save space, we zap %$authorhash after we've copied # everything out of it.) my %changelog; while (my ($author,$timehash) = each %$authorhash) { my $lasttime; my %stamptime; foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash)) { my $msghash = $timehash->{$time}; while (my ($msg,$qunklist) = each %$msghash) { my $stamptime = $stamptime{$msg}; if ((defined $stamptime) and (($time - $stamptime) < $Max_Checkin_Duration) and (defined $changelog{$stamptime}{$author}{$msg})) { push(@{$changelog{$stamptime}{$author}{$msg}}, @$qunklist); } else { $changelog{$time}{$author}{$msg} = $qunklist; $stamptime{$msg} = $time; } } } } undef (%$authorhash); ### Now we can write out the ChangeLog! my ($logfile_here, $logfile_bak, $tmpfile); if (! $Output_To_Stdout) { $logfile_here = $dir . $Log_File_Name; $logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem $tmpfile = "${logfile_here}.cvs2cl$$.tmp"; $logfile_bak = "${logfile_here}.bak"; open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\""; } else { open (LOG_OUT, ">-") or die "Unable to open stdout for writing"; } print LOG_OUT $ChangeLog_Header; if ($XML_Output) { print LOG_OUT "<?xml version=\"1.0\"?>\n\n" . "<changelog xmlns=\"http://www.red-bean.com/xmlns/cvs2cl/\">\n\n"; } foreach my $time (sort {$main::b <=> $main::a} (keys %changelog)) { my $authorhash = $changelog{$time}; while (my ($author,$mesghash) = each %$authorhash) { # If XML, escape in outer loop to avoid compound quoting: if ($XML_Output) { $author = &xml_escape ($author); } while (my ($msg,$qunklist) = each %$mesghash) { my $files = &pretty_file_list ($qunklist); my $header_line; # date and author my $body; # see below my $wholething; # $header_line + $body # Set up the date/author line. # kff todo: do some more XML munging here, on the header # part of the entry: my ($ignore,$min,$hour,$mday,$mon,$year,$wday) = $UTC_Times ? gmtime($time) : localtime($time); # XML output includes everything else, we might as well make # it always include Day Of Week too, for consistency. if ($Show_Day_Of_Week or $XML_Output) { $wday = ("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")[$wday]; $wday = ($XML_Output) ? "<weekday>${wday}</weekday>\n" : " $wday"; } else { $wday = ""; } if ($XML_Output) { $header_line = sprintf ("<date>%4u-%02u-%02u</date>\n" . "${wday}" . "<time>%02u:%02u</time>\n" . "<author>%s</author>\n", $year+1900, $mon+1, $mday, $hour, $min, $author); } else { $header_line = sprintf ("%4u-%02u-%02u${wday} %02u:%02u %s\n\n", $year+1900, $mon+1, $mday, $hour, $min, $author); } # Reshape the body according to user preferences. if ($XML_Output) { $msg = &preprocess_msg_text ($msg); $body = $files . $msg; } elsif ($No_Wrap) { $msg = &preprocess_msg_text ($msg); $files = wrap ("\t", " ", "$files"); $msg =~ s/\n(.*)/\n\t$1/g; unless ($After_Header eq " ") { $msg =~ s/^(.*)/\t$1/g; } $body = $files . $After_Header . $msg; } else # do wrapping, either FSF-style or regular { if ($FSF_Style) { $files = wrap ("\t", " ", "$files"); my $files_last_line_len = 0; if ($After_Header eq " ") { $files_last_line_len = &last_line_len ($files); $files_last_line_len += 1; # for $After_Header } $msg = &wrap_log_entry ($msg, "\t", 69 - $files_last_line_len, 69); $body = $files . $After_Header . $msg; } else # not FSF-style { $msg = &preprocess_msg_text ($msg); $body = $files . $After_Header . $msg; $body = wrap ("\t", " ", "$body"); } } $wholething = $header_line . $body; if ($XML_Output) { $wholething = "<entry>\n${wholething}</entry>\n"; } # One last check: make sure it passes the regexp test, if the # user asked for that. We have to do it here, so that the # test can match against information in the header as well # as in the text of the log message. # How annoying to duplicate so much code just because I # can't figure out a way to evaluate scalars on the trailing # operator portion of a regular expression. Grrr. if ($Case_Insensitive) { unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/oi)) { print LOG_OUT "${wholething}\n"; } } else { unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/o)) { print LOG_OUT "${wholething}\n"; } } } } } if ($XML_Output) { print LOG_OUT "</changelog>\n"; } close (LOG_OUT); if (! $Output_To_Stdout) { if (-f $logfile_here) { rename ($logfile_here, $logfile_bak); } rename ($tmpfile, $logfile_here); } }}sub parse_date_and_author (){ # Parses the date/time and author out of a line like: # # date: 1999/02/19 23:29:05; author: apharris; state: Exp; my $line = shift; my ($year, $mon, $mday, $hours, $min, $secs, $author) = $line =~ m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);# or die "Couldn't parse date ``$line''"; die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258); # Kinda arbitrary, but useful as a sanity check my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900); return ($time, $author);}# Here we take a bunch of qunks and convert them into printed# summary that will include all the information the user asked for.sub pretty_file_list (){ if ($Hide_Filenames and (! $XML_Output)) { return ""; } my $qunksref = shift; my @qunkrefs = @$qunksref; my @filenames; my $beauty = ""; # The accumulating header string for this entry. my %non_unanimous_tags; # Tags found in a proper subset of qunks my %unanimous_tags; # Tags found in all qunks my %all_branches; # Branches found in any qunk my $common_dir = undef; # Dir prefix common to all files ("" if none) my $fbegun = 0; # Did we begin printing filenames yet? # First, loop over the qunks gathering all the tag/branch names. # We'll put them all in non_unanimous_tags, and take out the # unanimous ones later. foreach my $qunkref (@qunkrefs) { # Keep track of whether all the files in this commit were in the # same directory, and memorize it if so. We can make the output a # little more compact by mentioning the directory only once. if ((scalar (@qunkrefs)) > 1) { if (! (defined ($common_dir))) { my ($base, $dir); ($base, $dir, undef) = fileparse ($$qunkref{'filename'}); if ((! (defined ($dir))) # this first case is sheer paranoia or ($dir eq "") or ($dir eq "./") or ($dir eq ".\\")) { $common_dir = ""; } else { $common_dir = $dir; } } elsif ($common_dir ne "") { # Already have a common dir prefix, so how much of it can we preserve? $common_dir = &common_path_prefix ($$qunkref{'filename'}, $common_dir); } } else # only one file in this entry anyway, so common dir not an issue { $common_dir = ""; } if (defined ($$qunkref{'branch'})) { $all_branches{$$qunkref{'branch'}} = 1; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -