Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/perl -w
      2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 2  -*-
      3 
      4 # Perl script to create a ChangeLog entry with names of files
      5 # and functions from a cvs diff.
      6 #
      7 # Darin Adler <darin (at] eazel.com>, started 20 April 2000
      8 # Java support added by Maciej Stachowiak <mjs (at] eazel.com>
      9 # Multiple ChangeLog support added by Laszlo (Laca) Peter <laca (at] sun.com>
     10 # last updated 23 May 2006
     11 #
     12 # (Someone put a license in here, like maybe GPL.)
     13 #
     14 # TODO:
     15 #   Provide option to put new ChangeLog into a separate file
     16 #     instead of editing the ChangeLog.
     17 #   For new files, just say "New file" instead of listing
     18 #     function names.
     19 #   List functions that have been removed too.
     20 #   Decide what a good logical order is for the changed files
     21 #     other than a normal text "sort" (top level first?)
     22 #     (group directories?) (.h before .c?)
     23 #   Leave a diff file behind if asked, but in unified format.
     24 #   Handle C++ and yacc source files too (other languages?).
     25 #   Help merge when there are ChangeLog conflicts or if there's
     26 #     already a partly written ChangeLog entry.
     27 #   Add command line option to put the ChangeLog into a separate
     28 #     file or just spew it out stdout.
     29 #   Figure out how to allow -z options from .cvsrc to work without
     30 #     letting other bad options work. Currently the -f disables
     31 #     everything from the .cvsrc.
     32 #   Add CVS version numbers for each file too (can't do that until
     33 #     the changes are checked in, though).
     34 #   Work around diff stupidity where deleting a function that starts
     35 #     with a comment makes diff think that the following function
     36 #     has been changed (if the following function starts with a comment
     37 #     with the same first line, such as /**)
     38 #   Work around diff stupidity where deleting an entire function and
     39 #     the blank lines before it makes diff think you've changed the
     40 #     previous function.
     41 
     42 use diagnostics;
     43 use strict;
     44 
     45 use English;
     46 use Text::Wrap;
     47 use File::Basename;
     48 
     49 # Check for cvs or svn system
     50 my $command;
     51 if (-e "CVS/Root")
     52   {
     53     $command = "cvs";
     54   }
     55 elsif (-e ".svn/entries")
     56   {
     57     $command = "svn";
     58   }
     59 else
     60   {
     61     die "There is not known revision system.\n"
     62   }
     63 
     64 # Update the change log file.
     65 sub update_change_log ($) {
     66     my $logname = shift;
     67     if ($command eq "cvs") {
     68         print STDERR "  Updating $logname from cvs repository.\n";
     69         open ERRORS, "cvs update $logname |" or die "The cvs update of ChangeLog failed: $OS_ERROR.\n";
     70     } else {
     71         print STDERR "  Updating $logname from svn repository.\n";
     72         open ERRORS, "svn update $logname |" or die "The cvs update of ChangeLog failed: $OS_ERROR.\n";
     73     }
     74     print STDERR "    $ARG" while <ERRORS>;
     75     close ERRORS;
     76 }
     77 
     78 # For each file, build a list of modified lines.
     79 # Use line numbers from the "after" side of each diff.
     80 my %changed_line_ranges;
     81 my $file;
     82 if ($command eq "cvs")
     83   {
     84     print STDERR "  Running cvs diff to find changes.\n";
     85     open DIFF, "cvs -fq diff -N |" or die "The cvs diff failed: $OS_ERROR.\n";
     86   }
     87 else
     88   {
     89     print STDERR "  Running svn diff to find changes.\n";
     90     open DIFF, "svn --non-interactive diff --diff-cmd diff -x \"-b\" |" or die "The cvs diff failed: $OS_ERROR.\n";
     91   }
     92 
     93 while (<DIFF>)
     94   {
     95     $file = $1 if /^Index: (\S+)$/;
     96     my $basename = basename ($file);
     97     if (defined $file
     98         and $basename ne "ChangeLog"
     99         and (/^\d+(,\d+)?[acd](\d+)(,(\d+))?/ or /^Binary files/ or /^Cannot display: file marked as a binary type./) )
    100       {
    101         push @{$changed_line_ranges{$file}}, [ $2, $4 || $2 ];
    102       }
    103   }
    104 close DIFF;
    105 if (!%changed_line_ranges)
    106   {
    107     print STDERR "  No changes found.\n";
    108     exit;
    109   }
    110 
    111 # For each ".c" file, convert line range to function list.
    112 print STDERR "  Extracting affected function names from C source files.\n";
    113 my %function_lists;
    114 foreach my $file (keys %changed_line_ranges)
    115   {
    116     # An empty function list still indicates that something changed.
    117     $function_lists{$file} = "";
    118 
    119     # Only look for function names in .c files.
    120     next unless $file =~ /\.(c|java|cs)/;
    121 
    122     # Find all the functions in the file.
    123     open SOURCE, $file or next;
    124     my @function_ranges = get_function_line_ranges(\*SOURCE, $file);
    125     close SOURCE;
    126 
    127     # Find all the modified functions.
    128     my @functions;
    129     my %saw_function;
    130     my @change_ranges = (@{$changed_line_ranges{$file}}, []);
    131     my @change_range = (0, 0);
    132     FUNCTION: foreach my $function_range_ref (@function_ranges)
    133       {
    134         my @function_range = @$function_range_ref;
    135 
    136         # Advance to successive change ranges.
    137         for (;; @change_range = @{shift @change_ranges})
    138           {
    139             last FUNCTION unless @change_range;
    140 
    141             # If past this function, move on to the next one.
    142             next FUNCTION if $change_range[0] > $function_range[1];
    143             
    144             # If an overlap with this function range, record the function name.
    145             if ($change_range[1] >= $function_range[0]
    146                 and $change_range[0] <= $function_range[1])
    147               {
    148                 if (!$saw_function{$function_range[2]})
    149                   {
    150                     $saw_function{$function_range[2]} = 1;
    151                     push @functions, $function_range[2]; 
    152                   }
    153                 next FUNCTION;
    154               }
    155           }
    156       }
    157 
    158     # Format the list of functions now.
    159     $function_lists{$file} = " (" . join("), (", @functions) . "):" if @functions;
    160   }
    161 
    162 # Write out a new ChangeLog file.
    163 print STDERR "  Finding ChangeLog files:\n";
    164 my %changelogs;
    165 foreach my $file (sort keys %function_lists) {
    166     $file = dirname ($file);
    167     while ($file ne '.' and $file ne '/') {
    168         if (-f "$file/ChangeLog") {
    169             $changelogs{"./$file"} = 1;
    170             last;
    171         }
    172         $file = dirname ($file);
    173     }
    174 }
    175 $changelogs{'.'} = 1;
    176 
    177 foreach my $chl (reverse sort keys %changelogs) {
    178     print STDERR "\t${chl}/ChangeLog\n";
    179 }
    180 
    181 print STDERR "  Editing the ChangeLog file(s).\n";
    182 my $date = sprintf "%d-%02d-%02d",
    183   1900 + (localtime $BASETIME)[5], # year
    184   1 + (localtime $BASETIME)[4], # month
    185   (localtime $BASETIME)[3]; # day within month
    186 my $name = $ENV{CHANGE_LOG_NAME}
    187   || $ENV{REAL_NAME}
    188   || (getpwuid $REAL_USER_ID)[6]
    189   || "set REAL_NAME environment variable";
    190 my $email_address = $ENV{CHANGE_LOG_EMAIL_ADDRESS}
    191   || $ENV{EMAIL_ADDRESS}
    192   || "set EMAIL_ADDRESS environment variable";
    193 
    194 foreach my $chlog (reverse sort keys %changelogs) {
    195     update_change_log ("$chlog/ChangeLog");
    196     # It's less efficient to read the whole thing into memory than it would be
    197     # to read it while we prepend to it later, but I like doing this part first.
    198     open OLD_CHANGE_LOG, "${chlog}/ChangeLog" or die "Could not open ChangeLog file: $OS_ERROR.\n";
    199     my @old_change_log = <OLD_CHANGE_LOG>;
    200     close OLD_CHANGE_LOG;
    201     open CHANGE_LOG, "> ${chlog}/ChangeLog" or die "Could not write ChangeLog\n.";
    202     print CHANGE_LOG "$date  $name  <$email_address>\n\n";
    203     foreach my $file (sort keys %function_lists) {
    204         my $fname = "./$file";
    205         if ($fname =~ /^${chlog}\//) {
    206             $fname =~ s/^${chlog}\///;
    207             my $lines = wrap("\t", "\t", "XX$fname:$function_lists{$file}");
    208             $lines =~ s/^\tXX/\t* /;
    209             print CHANGE_LOG "$lines\n";
    210             delete ($function_lists{$file});
    211         }
    212     }
    213     print CHANGE_LOG "\n", @old_change_log;
    214     close CHANGE_LOG;
    215     
    216     # Done.
    217     print STDERR "  Done editing ${chlog}/ChangeLog.\n";
    218     last if not (keys %function_lists);
    219 }
    220 
    221 exit;
    222 
    223 
    224 sub get_function_line_ranges
    225   {
    226     my ($file_handle, $file_name) = @_;
    227 
    228     if ($file_name =~ /\.c$/) {
    229         return get_function_line_ranges_for_c ($file_handle, $file_name);
    230     } elsif ($file_name =~ /\.java$/) {
    231         return get_function_line_ranges_for_java ($file_handle, $file_name);
    232     } elsif ($file_name =~ /\.cs$/) {
    233 	#FIXME write a function to extract from .cs files
    234         return get_function_line_ranges_for_java ($file_handle, $file_name);
    235     }
    236     return ();
    237   }
    238 
    239 # Read a file and get all the line ranges of the things that look like C functions.
    240 # A function name is the last word before an open parenthesis before the outer
    241 # level open brace. A function starts at the first character after the last close
    242 # brace or semicolon before the function name and ends at the close brace.
    243 # Comment handling is simple-minded but will work for all but pathological cases.
    244 #
    245 # Result is a list of triples: [ start_line, end_line, function_name ].
    246 
    247 sub get_function_line_ranges_for_c
    248   {
    249     my ($file_handle, $file_name) = @_;
    250 
    251     my @ranges;
    252 
    253     my $in_comment = 0;
    254     my $in_macro = 0;
    255     my $in_parentheses = 0;
    256     my $in_braces = 0;
    257     
    258     my $word = "";
    259 
    260     my $potential_start = 0;
    261     my $potential_name = "";
    262     
    263     my $start = 0;
    264     my $name = "";
    265     
    266     while (<$file_handle>)
    267       {
    268         # Handle continued multi-line comment.
    269         if ($in_comment)
    270           {
    271             next unless s-.*\*/--;
    272             $in_comment = 0;
    273           }
    274 
    275         # Handle continued macro.
    276         if ($in_macro)
    277           {
    278             $in_macro = 0 unless /\\$/;
    279             next;
    280           }
    281 
    282         # Handle start of macro (or any preprocessor directive).
    283         if (/^\s*\#/)
    284           {
    285             $in_macro = 1 if /^([^\\]|\\.)*\\$/;
    286             next;
    287           }
    288 
    289         # Handle comments and quoted text.
    290         while (m-(/\*|//|\'|\")-) # \' and \" keep emacs perl mode happy
    291           {
    292             my $match = $1;
    293             if ($match eq "/*")
    294               {
    295                 if (!s-/\*.*?\*/--)
    296                   {
    297                     s-/\*.*--;
    298                     $in_comment = 1;
    299                   }
    300               }
    301             elsif ($match eq "//")
    302               {
    303                 s-//.*--;
    304               }
    305             else # ' or "
    306               {
    307                 if (!s-$match([^\\]|\\.)*?$match--)
    308                   {
    309                     warn "mismatched quotes at line $INPUT_LINE_NUMBER in $file_name\n";
    310                     s-$match.*--;
    311                   }
    312               }
    313           }
    314         
    315         # Find function names.
    316         while (m-(\w+|[(){};])-g)
    317           {
    318             # Open parenthesis.
    319             if ($1 eq "(")
    320               {
    321                 $potential_name = $word unless $in_parentheses;
    322                 $in_parentheses++;
    323                 next;
    324               }
    325 
    326             # Close parenthesis.
    327             if ($1 eq ")")
    328               {
    329                 $in_parentheses--;
    330                 next;
    331               }
    332 
    333             # Open brace.
    334             if ($1 eq "{")
    335               {
    336                 # Promote potiential name to real function name at the
    337                 # start of the outer level set of braces (function body?).
    338                 if (!$in_braces and $potential_start)
    339                   {
    340                     $start = $potential_start;
    341                     $name = $potential_name;
    342                   }
    343 
    344                 $in_braces++;
    345                 next;
    346               }
    347 
    348             # Close brace.
    349             if ($1 eq "}")
    350               {
    351                 $in_braces--;
    352 
    353                 # End of an outer level set of braces.
    354                 # This could be a function body.
    355                 if (!$in_braces and $name)
    356                   {
    357                     push @ranges, [ $start, $INPUT_LINE_NUMBER, $name ];
    358                     $name = "";
    359                   }
    360 
    361                 $potential_start = 0;
    362                 $potential_name = "";
    363                 next;
    364               }
    365 
    366             # Semicolon.
    367             if ($1 eq ";")
    368               {
    369                 $potential_start = 0;
    370                 $potential_name = "";
    371                 next;
    372               }
    373 
    374             # Word.
    375             $word = $1;
    376             if (!$in_parentheses)
    377               {
    378                 $potential_start = 0;
    379                 $potential_name = "";
    380               }
    381             if (!$potential_start)
    382               {
    383                 $potential_start = $INPUT_LINE_NUMBER;
    384                 $potential_name = "";
    385               }
    386           }
    387       }
    388 
    389     warn "mismatched braces in $file_name\n" if $in_braces;
    390     warn "mismatched parentheses in $file_name\n" if $in_parentheses;
    391 
    392     return @ranges;
    393   }
    394 
    395 
    396 
    397 # Read a file and get all the line ranges of the things that look like Java
    398 # classes, interfaces and methods.
    399 #
    400 # A class or interface name is the word that immediately follows
    401 # `class' or `interface' when followed by an open curly brace and not
    402 # a semicolon. It can appear at the top level, or inside another class
    403 # or interface block, but not inside a function block
    404 #
    405 # A class or interface starts at the first character after the first close
    406 # brace or after the function name and ends at the close brace.
    407 #
    408 # A function name is the last word before an open parenthesis before
    409 # an open brace rather than a semicolon. It can appear at top level or
    410 # inside a class or interface block, but not inside a function block.
    411 #
    412 # A function starts at the first character after the first close
    413 # brace or after the function name and ends at the close brace.
    414 #
    415 # Comment handling is simple-minded but will work for all but pathological cases.
    416 #
    417 # Result is a list of triples: [ start_line, end_line, function_name ].
    418 
    419 sub get_function_line_ranges_for_java
    420   {
    421     my ($file_handle, $file_name) = @_;
    422 
    423     my @current_scopes;
    424 
    425     my @ranges;
    426 
    427     my $in_comment = 0;
    428     my $in_macro = 0;
    429     my $in_parentheses = 0;
    430     my $in_braces = 0;
    431     my $in_non_block_braces = 0;
    432     my $class_or_interface_just_seen = 0;
    433 
    434     my $word = "";
    435 
    436     my $potential_start = 0;
    437     my $potential_name = "";
    438     my $potential_name_is_class_or_interface = 0;
    439     
    440     my $start = 0;
    441     my $name = "";
    442     my $current_name_is_class_or_interface = 0;
    443     
    444     while (<$file_handle>)
    445       {
    446         # Handle continued multi-line comment.
    447         if ($in_comment)
    448           {
    449             next unless s-.*\*/--;
    450             $in_comment = 0;
    451           }
    452 
    453         # Handle continued macro.
    454         if ($in_macro)
    455           {
    456             $in_macro = 0 unless /\\$/;
    457             next;
    458           }
    459 
    460         # Handle start of macro (or any preprocessor directive).
    461         if (/^\s*\#/)
    462           {
    463             $in_macro = 1 if /^([^\\]|\\.)*\\$/;
    464             next;
    465           }
    466 
    467         # Handle comments and quoted text.
    468         while (m-(/\*|//|\'|\")-) # \' and \" keep emacs perl mode happy
    469           {
    470             my $match = $1;
    471             if ($match eq "/*")
    472               {
    473                 if (!s-/\*.*?\*/--)
    474                   {
    475                     s-/\*.*--;
    476                     $in_comment = 1;
    477                   }
    478               }
    479             elsif ($match eq "//")
    480               {
    481                 s-//.*--;
    482               }
    483             else # ' or "
    484               {
    485                 if (!s-$match([^\\]|\\.)*?$match--)
    486                   {
    487                     warn "mismatched quotes at line $INPUT_LINE_NUMBER in $file_name\n";
    488                     s-$match.*--;
    489                   }
    490               }
    491           }
    492         
    493         # Find function names.
    494         while (m-(\w+|[(){};])-g)
    495           {
    496             # Open parenthesis.
    497             if ($1 eq "(")
    498               {
    499                 if (!$in_parentheses) {
    500                     $potential_name = $word;
    501                     $potential_name_is_class_or_interface = 0;
    502                 }
    503                 $in_parentheses++;
    504                 next;
    505               }
    506 
    507             # Close parenthesis.
    508             if ($1 eq ")")
    509               {
    510                 $in_parentheses--;
    511                 next;
    512               }
    513 
    514             # Open brace.
    515             if ($1 eq "{")
    516               {
    517                 # Promote potiential name to real function name at the
    518                 # start of the outer level set of braces (function/class/interface body?).
    519                 if (!$in_non_block_braces
    520                     and (!$in_braces or $current_name_is_class_or_interface) 
    521                     and $potential_start)
    522                   {
    523                     if ($name)
    524                       {
    525                           push @ranges, [ $start, ($INPUT_LINE_NUMBER - 1), 
    526                                           join ('.', @current_scopes) ];
    527                       }
    528 
    529 
    530                     $current_name_is_class_or_interface = $potential_name_is_class_or_interface;
    531                     
    532                     $start = $potential_start;
    533                     $name = $potential_name;
    534 
    535                     push (@current_scopes, $name);
    536                   } else {
    537                       $in_non_block_braces++;
    538                   }
    539 
    540                 $potential_name = "";
    541                 $potential_start = 0;
    542  
    543                 $in_braces++;
    544                 next;
    545               }
    546 
    547             # Close brace.
    548             if ($1 eq "}")
    549               {
    550                 $in_braces--;
    551                 
    552                 # End of an outer level set of braces.
    553                 # This could be a function body.
    554                 if (!$in_non_block_braces) 
    555                   {
    556                     if ($name)
    557                       {
    558                         push @ranges, [ $start, $INPUT_LINE_NUMBER, 
    559                                         join ('.', @current_scopes) ];
    560                         
    561                         pop (@current_scopes);                        
    562 
    563                         if (@current_scopes) 
    564                           {
    565                             $current_name_is_class_or_interface = 1;
    566                             
    567                             $start = $INPUT_LINE_NUMBER + 1;
    568                             $name =  $current_scopes[$#current_scopes-1];
    569                           }
    570                         else
    571                           {
    572                             $current_name_is_class_or_interface = 0;
    573                             $start = 0;
    574                             $name =  "";
    575                           } 
    576                     }
    577                   }
    578                 else
    579                   {
    580                     $in_non_block_braces-- if $in_non_block_braces;
    581                   }
    582 
    583                 $potential_start = 0;
    584                 $potential_name = "";
    585                 next;
    586               }
    587 
    588             # Semicolon.
    589             if ($1 eq ";")
    590               {
    591                 $potential_start = 0;
    592                 $potential_name = "";
    593                 next;
    594               }
    595             
    596             if ($1 eq "class" or $1 eq "interface") {
    597                 $class_or_interface_just_seen = 1;
    598                 next;
    599             }
    600 
    601             # Word.
    602             $word = $1;
    603             if (!$in_parentheses)
    604               {
    605                 if ($class_or_interface_just_seen) {
    606                     $potential_name = $word;
    607                     $potential_start = $INPUT_LINE_NUMBER;
    608                     $class_or_interface_just_seen = 0;
    609                     $potential_name_is_class_or_interface = 1;
    610                     next;
    611                 }
    612               }
    613             if (!$potential_start)
    614               {
    615                 $potential_start = $INPUT_LINE_NUMBER;
    616                 $potential_name = "";
    617               }
    618             $class_or_interface_just_seen = 0;
    619           }
    620       }
    621 
    622     warn "mismatched braces in $file_name\n" if $in_braces;
    623     warn "mismatched parentheses in $file_name\n" if $in_parentheses;
    624 
    625     return @ranges;
    626   }
    627