Home | History | Annotate | Download | only in scripts
      1 #!/usr/perl5/bin/perl
      2 #
      3 # Script for extracting copyright and licensing information from source code
      4 #
      5 # CDDL HEADER START
      6 #
      7 # The contents of this file are subject to the terms of the
      8 # Common Development and Distribution License, Version 1.0 only
      9 # (the "License").  You may not use this file except in compliance
     10 # with the License.
     11 #
     12 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     13 # or http://www.opensolaris.org/os/licensing.
     14 # See the License for the specific language governing permissions
     15 # and limitations under the License.
     16 #
     17 # When distributing Covered Code, include this CDDL HEADER in each
     18 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     19 # If applicable, add the following below this CDDL HEADER, with the
     20 # fields enclosed by brackets "[]" replaced with your own identifying
     21 # information: Portions Copyright [yyyy] [name of copyright owner]
     22 #
     23 # CDDL HEADER END
     24 #
     25 #
     26 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     27 # Use is subject to license terms.
     28 #
     29 
     30 use strict;
     31 use warnings;
     32 use Cwd;
     33 use Getopt::Long qw(:config gnu_compat no_auto_abbrev bundling pass_through);
     34 
     35 my $min_merge = 10;
     36 
     37 sub usage() {
     38     print "copyright-extractor [options] <source directory>\n";
     39     print "\n";
     40     print "Options:\n";
     41     print "  -r, --raw\n";
     42     print "         Print raw comments only, do not attempt to merge,\n";
     43     print "         only unify identical comments.\n";
     44     print "  -c, --copyright-first\n";
     45     print "         Attempt to move copyright statements to the start of the\n";
     46     print "         comment block.\n";
     47     print "         Note: when using this option, there is a chance that\n";
     48     print "         lines get mixed up if a copyright statement extends to\n";
     49     print "         more than one line.\n";
     50     print "  -m n --min=n\n";
     51     print "         only merge if there are at least n consecutive identical\n";
     52     print "         lines.  default: $min_merge\n";
     53     print "  -g, --gpl\n";
     54     print "         Add the disclaimer about GPLv2 to the beginning of the\n";
     55     print "         output if any of the comments look like GPL/LGPL\n";
     56     print "  -O, --omitted\n";
     57     print "         Print a list of files that were not checked\n";
     58     print "  -h, --help\n";
     59     print "         Print this usage information\n";
     60     print "  -d n, --debug=n\n";
     61     print "         Turn on debug output.\n";
     62 }
     63 
     64 my %blurbs;
     65 
     66 my $gpl_found = 0;
     67 
     68 my @files_omitted;
     69 my $debug = 0;
     70 my $dumb_mode = 0;
     71 my $copyright_first = 0;
     72 my $gpl_disclaimer = 0;
     73 my $print_omitted = 0;
     74 
     75 my @dirs;
     76 sub process_args {
     77     my $arg = shift;
     78     
     79     if ($arg =~ /^-/) {
     80 	print "Unknown option: $arg\n";
     81 	print "Try --help for usage.\n";
     82 	exit (1);
     83     }
     84 
     85     push (@dirs, $arg);
     86 }
     87 
     88 sub process_options {
     89     
     90     Getopt::Long::Configure ("bundling");
     91       
     92     GetOptions ('d|debug=n' => sub { shift; $debug = shift; },
     93 		'm|min=n' => sub { shift; $min_merge = shift; },
     94 		'r|raw' => sub { $dumb_mode = 1; },
     95 		'c|copyright-first' => sub { $copyright_first = 1; },
     96 		'O|omitted' => sub { $print_omitted = 1; },
     97 		'g|gpl' => sub { $gpl_disclaimer = 1; },
     98 		'h|help' => sub { usage (); exit (0); },
     99 		'<>' => \&process_args);
    100 }
    101 
    102 use constant FTYPE_IGNORE => 0;
    103 use constant FTYPE_C => 1;
    104 use constant FTYPE_PERL => 2;
    105 use constant FTYPE_PYTHON => 3;
    106 use constant FTYPE_SHELL => 4;
    107 use constant FTYPE_JAVA => 5;
    108 
    109 # a very simple file type check based on the file name
    110 # fname: the file name to classify
    111 # Returns: one of the above contants
    112 sub get_file_type ($) {
    113     my $fname = shift;
    114 
    115     if ($fname =~ /([~]$|\/(ChangeLog|configure\.in|Makefile|ltmain\.sh|README|NEWS|INSTALL|HACKING|configure$|config\.)$)/) {
    116 	# some file names to ignore
    117 	push (@files_omitted, $fname);
    118 	return FTYPE_IGNORE;
    119     } elsif ($fname =~ /\.(am|ac|o|lo|ps|la|cache|diff|out|log|guess|spec)$/) {
    120 	# some more file names to ignore
    121 	push (@files_omitted, $fname);
    122 	return FTYPE_IGNORE;
    123     } elsif ($fname =~ /\.(c|h|hpp|cpp|C|CPP|cc|CC)$/) {
    124 	return FTYPE_C;
    125     } elsif ($fname =~ /\.pl$/) {
    126 	return FTYPE_PERL;
    127     } elsif ($fname =~ /\.py$/) {
    128 	return FTYPE_PYTHON;
    129     } elsif ($fname =~ /\.(sh|ksh|csh)$/) {
    130 	return FTYPE_SHELL;
    131     } elsif ($fname =~ /\.(java)$/) {
    132 	return FTYPE_JAVA;
    133     } else {
    134 	# FIXME: could do something smart here
    135 	push (@files_omitted, $fname);
    136 	return FTYPE_IGNORE;
    137     }
    138 }
    139 
    140 # return 1 if the string includes words that suggest that the string
    141 # is some sort of legal text.  If none of these words appear in the
    142 # string, this program will ignore it and assume that it's some other
    143 # comment that happens to be at the beginning of the file
    144 sub is_legalese ($) {
    145     my $str = shift;
    146     
    147     $str = lc ($str);
    148     if ($str =~ /(licen[cs]|legal|terms|condition|copyright|rights|\(c\)|copying|usage|binary|distribut|gpl)/) {
    149 	return 1;
    150     }
    151 
    152     return 0;
    153 }
    154 
    155 # extract the comments 
    156 sub extract_comments_shell($) {
    157     my $fname = shift;
    158 
    159     my $blurb;
    160     my $line;
    161     open SRCFILE, "<$fname" or die "failed to open file $fname";
    162     while ($line = <SRCFILE>) {
    163 	chomp ($line);
    164 	next if $line =~ /^#!/;
    165 	last if $line =~ /^[^#]/;
    166 	$line =~ s/^#//;
    167 	# delete certain types of comments, like emacs mode spec, etc
    168 	$line =~ s/^\s*-\*-.*-\*-\s*$//;
    169 	$line =~ s/^\s\$Id:.*\$\s*$//;
    170 	$line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
    171 
    172 	chomp ($line);
    173 
    174 	if (defined $blurb) {
    175 	    $blurb = $blurb . "\n" . $line;
    176 	} elsif ($line ne '') {
    177 	    $blurb = $line;
    178 	}
    179 	$line = undef;
    180     }
    181     close SRCFILE;
    182 
    183     if (defined ($blurb) and is_legalese ($blurb)) {
    184 	$blurbs{$fname} = $blurb;
    185     }
    186 }
    187 
    188 sub extract_comments_c($) {
    189     my $fname = shift;
    190 
    191     my $blurb;
    192     my $in_comment_block = 0;
    193     open SRCFILE, "<$fname" or die "failed to open file $fname";
    194     my $line;
    195     while ($line = <SRCFILE>) {
    196 	chomp ($line);
    197 	if ($in_comment_block) {
    198 	    if ($line =~ /\*\//) {
    199 		$line =~ s/\*\/.*//;
    200 		$in_comment_block = 0;
    201 	    } elsif ($line =~ /^\/\//) {
    202 		$line =~ s/^\/\///;
    203 	    } elsif ($line =~ /^( \*|\*)/) {
    204 		$line =~ s/^( \*|\*)//;
    205 	    }
    206 	} else {
    207 	    if ($line =~ /^\s*\/\*(.*)\*\//) {
    208 		$line =~ s/^\s*\/\*(.*)\*\//$1/g;
    209 	    } elsif ($line =~ /^\s*\/\*/) {
    210 		$in_comment_block = 1;
    211 		$line =~ s/^\s*\/\*//;
    212 	    } elsif ($line =~ /^\/\//) {
    213 		$line =~ s/^\s*\/\///;
    214 	    } elsif ($line eq '') {
    215 		# add to blurb if not the start of the blurb
    216 	    } else {
    217 		# end of comments, stop processing
    218 		last;
    219 	    }
    220 	}
    221 	# delete certain types of comments, like emacs mode spec, etc
    222 	$line =~ s/^\s*-\*-.*-\*-\s*$//;
    223 	$line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
    224 	$line =~ s/^\s\$Id:.*\$\s*$//;
    225 	$line =~ s/^\s*\**\s*\\ingroup\s*.*$//;
    226 	$line =~ s/^\s*\**\s*\\file\s*.*$//;
    227 	$line =~ s/^\s*\**\s*\@-type\@\s*$//;
    228 
    229 	chomp ($line);
    230 
    231 	if (defined $blurb) {
    232 	    $blurb = $blurb . "\n" . $line;
    233 	} elsif ($line ne '') {
    234 	    $blurb = $line;
    235 	}
    236 	$line = undef;
    237     }
    238     close SRCFILE;
    239     if (defined ($blurb) and is_legalese ($blurb)) {
    240 	$blurbs{$fname} = $blurb;
    241     }
    242 }
    243 
    244 sub extract_comments_python($) {
    245     my $fname = shift;
    246 
    247     my $blurb;
    248     my $in_comment_block = 0;
    249     open SRCFILE, "<$fname" or die "failed to open file $fname";
    250     my $line;
    251     while ($line = <SRCFILE>) {
    252 	chomp ($line);
    253 	if ($in_comment_block) {
    254 	    if ($line =~ /"""/) {
    255 		$line =~ s/"""//;
    256 		$in_comment_block = 0;
    257 	    } elsif ($line =~ /#/) {
    258 		$line =~ s/^#//;
    259 	    }
    260 	} else {
    261 	    if ($line =~ /^\s*"""(.*)"""/) {
    262 		$line =~ s/^\s*"""(.*)"""/$1/g;
    263 	    } elsif ($line =~ /^\s*"""/) {
    264 		$in_comment_block = 1;
    265 		$line =~ s/^\s*"""//;
    266 	    } elsif ($line =~ /^\/\//) {
    267 		$line =~ s/^\s*"""//;
    268 	    } elsif ($line eq '') {
    269 		# add to blurb if not the start of the blurb
    270 	    } else {
    271 		# end of comments, stop processing
    272 		last;
    273 	    }
    274 	}
    275 	# delete certain types of comments, like emacs mode spec, etc
    276 	$line =~ s/^\s*-\*-.*-\*-\s*$//;
    277 	$line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
    278 	$line =~ s/^\s\$Id:.*\$\s*$//;
    279 
    280 	chomp ($line);
    281 
    282 	if (defined $blurb) {
    283 	    $blurb = $blurb . "\n" . $line;
    284 	} elsif ($line ne '') {
    285 	    $blurb = $line;
    286 	}
    287 	$line = undef;
    288     }
    289     close SRCFILE;
    290     if (defined ($blurb) and is_legalese ($blurb)) {
    291 	$blurbs{$fname} = $blurb;
    292     }
    293 }
    294 
    295 sub extract_comments($);
    296 
    297 # process a directory or a file recursively: extract the comments
    298 # from the beginning of each file and save them in @blurbs
    299 sub extract_comments($) {
    300     my $fname = shift;
    301     if (-d $fname) {
    302 	# directory -> process recursively
    303 	opendir(DIR, $fname) || die("Cannot open directory $fname");
    304 	my @thefiles= readdir(DIR);
    305 	closedir(DIR);
    306 	foreach my $f (@thefiles) {
    307 	    next if $f eq '.';
    308 	    next if $f eq '..';
    309 	    next if $f eq '.libs';
    310 	    next if $f eq 'intl';
    311 	    extract_comments ("$fname/$f");
    312 	}
    313     } elsif (-f $fname) {
    314 	# regular file -> identify file type and read comments
    315 	my $ftype = get_file_type ($fname);
    316 	return if $ftype == FTYPE_IGNORE;
    317 	if ($ftype == FTYPE_C) {
    318 	    extract_comments_c ($fname);
    319 	} elsif ($ftype == FTYPE_PERL) {
    320 	    extract_comments_shell ($fname);
    321 	} elsif ($ftype == FTYPE_SHELL) {
    322 	    extract_comments_shell ($fname);
    323 	} elsif ($ftype == FTYPE_PYTHON) {
    324 	    extract_comments_python ($fname);
    325 	} elsif ($ftype == FTYPE_JAVA) {
    326 	    extract_comments_c ($fname);
    327 	}
    328     } else {
    329 	print STDERR "ERROR: $fname: no such file or directory\n";
    330     }
    331 }
    332 
    333 # like uniq(1)
    334 sub uniq (@) {
    335     my @list = @_;
    336     my $prev;
    337     if (not @list) {
    338 	return @list;
    339     }
    340     $prev = $list[0];
    341     my @uniq_list = ($prev);
    342     foreach my $str (@list) {
    343 	next if $str eq $prev;
    344 	push (@uniq_list, $str);
    345 	$prev = $str;
    346     }
    347     return @uniq_list;
    348 }
    349 
    350 # return the number of lines in str
    351 sub line_count ($) {
    352     my $str = shift;
    353 
    354     return ($str =~ tr/\n//) + 1;
    355 }
    356 
    357 # return 1 if str is a member of the list, 0 otherwise
    358 sub is_member ($@) {
    359     my $str = shift;
    360     my @list = @_;
    361 
    362     foreach my $s (@list) {
    363 	if ($str eq $s) {
    364 	    return 1;
    365 	}
    366     }
    367     
    368     return 0;
    369 }
    370 
    371 sub do_merge_comments ($$$$$);
    372 
    373 # Args: references to lists of strings (lines of the texts)
    374 #
    375 # ml1: lines from the first text already processed
    376 # l1:  remaining lines of the 1st text
    377 # nl1: remaining normalised lines of the 1st text
    378 # l2:  remaining lines of the 2nd text
    379 # nl2: remaining normalised lines of the 1st text
    380 #
    381 # Return: list of merged lines
    382 sub do_merge_comments ($$$$$) {
    383     my $ml1_ref = shift;
    384     my $l1_ref = shift;
    385     my $nl1_ref = shift;
    386     my $l2_ref = shift;
    387     my $nl2_ref = shift;
    388 
    389     my @mlines1 = @$ml1_ref;
    390     my @nmlines1;
    391     my @lines1 = @$l1_ref;
    392     my @norm_lines1 = @$nl1_ref;
    393     my @lines2 = @$l2_ref;
    394     my @norm_lines2 = @$nl2_ref;
    395     my @nmlines2;
    396     my @mlines2;
    397 
    398     my @merged_lines;
    399     my $line1;
    400     my $norm_line1;
    401     my $line2;
    402     my $norm_line2;
    403 
    404     if ($debug > 2) {
    405 	print "DEBUG: attempting to merge\n";
    406 	if (@mlines1) {
    407 	    print "DEBUG: lines already processed from 1st text:\n";
    408 	    print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
    409 	    foreach my $l (@mlines1) {
    410 		print "DEBUG: $l\n";
    411 	    }
    412 	}
    413 	print "DEBUG: 1st text:\n";
    414 	print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
    415 	foreach my $l (@lines1) {
    416 	    print "DEBUG: $l\n";
    417 	}
    418 	print "DEBUG: 2nd text:\n";
    419 	print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
    420 	foreach my $l (@lines2) {
    421 	    print "DEBUG: $l\n";
    422 	}
    423 	print "DEBUG: <<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
    424     }
    425 
    426     if (not @lines1) {
    427 	push (@merged_lines, @mlines1);
    428 	push (@merged_lines, @lines2);
    429 	return @merged_lines;
    430     }
    431 
    432     if (not @lines2) {
    433 	push (@merged_lines, @mlines1);
    434 	push (@merged_lines, @lines1);
    435 	return @merged_lines;
    436     }
    437 
    438     # first save the lines only appearing in lines1,
    439     # stop at the first 2 common lines that are not empty
    440     while (@lines1) {
    441 	$line1 = shift (@lines1);
    442 	$norm_line1 = shift (@norm_lines1);
    443 	if (($norm_line1 ne '') and
    444 	    is_member ($norm_line1, @norm_lines2)) {
    445 	    last;
    446 	} else {
    447 	    push (@mlines1, $line1);
    448 	    push (@nmlines1, $norm_line1);
    449 	}
    450     }
    451     # now save the lines appearing in lines2 before the common line
    452     while (@lines2) {
    453 	$line2 = shift (@lines2);
    454 	$norm_line2 = shift (@norm_lines2);
    455 
    456 	if ($norm_line2 ne $norm_line1) {
    457 	    push (@mlines2, $line2);
    458 	    push (@nmlines2, $line2);
    459 	} else {
    460             last;
    461         }
    462     }
    463     my @common_lines;
    464     my @ncommon_lines;
    465     # now save the first common line
    466     if ($norm_line1 eq $norm_line2) {
    467 	if ($debug > 3) {
    468 	    print "DEBUG: 1st common line:\n";
    469 	    print "DEBUG: $line1\n";
    470 	}
    471 	@common_lines = ($line1);
    472 	@ncommon_lines = ($norm_line2);
    473     } else {
    474 	# no common lines were found
    475 	# lines1 should be empty, all lines moved to mlines1
    476 	push (@merged_lines, @mlines1);
    477 	push (@merged_lines, @mlines2);
    478 	return @merged_lines;
    479     }
    480     # save all common lines
    481     while (@lines1 and @lines2) {
    482 	$line1 = shift (@lines1);
    483 	$norm_line1 = shift (@norm_lines1);
    484 	$line2 = shift (@lines2);
    485 	$norm_line2 = shift (@norm_lines2);
    486 	if ($norm_line1 ne $norm_line2) {
    487 	    if ($debug > 3) {
    488 		print "DEBUG: no more common lines.\n";
    489 	    }
    490 	    unshift (@lines1, $line1);
    491 	    unshift (@norm_lines1, $norm_line1);
    492 	    unshift (@lines2, $line2);
    493 	    unshift (@norm_lines2, $norm_line2);
    494 	    last;
    495 	} else {
    496 	    if ($debug > 3) {
    497 		print "DEBUG: common line:\n";
    498 		print "DEBUG: $line1\n";
    499 	    }
    500 	    push (@common_lines, $line1);
    501 	    push (@ncommon_lines, $norm_line1);
    502 	}
    503     }
    504 
    505     # only merge if the number of common lines is at least $min_merge
    506     # or we are at the end of one of the texts or if at the
    507     # beginning of the 2nd text
    508     if (($#common_lines >= $min_merge) or 
    509 	(not @lines1) or (not @lines2) or
    510 	(not @mlines2)) {
    511 	if ($debug > 1) {
    512 	    print "DEBUG: common lines:\n";
    513 	    foreach my $l (@common_lines) {
    514 		print "DEBUG: $l\n";
    515 	    }
    516 	    print "DEBUG: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n";
    517 	}
    518 	# first the lines from the 1st text
    519 	push (@merged_lines, @mlines1);
    520 	# then the lines from the 2nd text
    521 	push (@merged_lines, @mlines2);
    522 	# finally the common lines
    523 	push (@merged_lines, @common_lines);
    524     } else {
    525 	# don't merge
    526 
    527 	# add the common lines to the processed part of the 1st text
    528 	push (@mlines1, @common_lines);
    529 
    530 	# add the common lines back to the unprocessed part of the 2nd text
    531 	unshift (@lines2, @common_lines);
    532 	# add the lines before the common lines back to the unprocessed
    533 	# part of the 2nd text
    534 	unshift (@lines2, @mlines2);
    535 	# add the normalised common lines back to
    536 	# the unprocessed part of the 2nd text
    537 	unshift (@norm_lines2, @ncommon_lines);
    538 	# add the normalised lines before the common lines back to
    539         # the unprocessed part of the 2nd text
    540 	unshift (@norm_lines2, @nmlines2);
    541 
    542 	# add the normalised common lines back to 
    543 	# try to merge the rest of the texts
    544 	my @more_merged_lines = do_merge_comments (\@mlines1,
    545 	    \@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
    546 	push (@merged_lines, @more_merged_lines);	
    547 	return @merged_lines;
    548     }
    549 
    550     if (not @lines1) {
    551 	push (@merged_lines, @lines2);
    552     } elsif (not @lines2) {
    553 	push (@merged_lines, @lines1);
    554     } else {
    555 	# repeat the process for the remaining lines
    556 	my @l1;
    557 	my @more_merged_lines = do_merge_comments (\@l1,
    558 	    \@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
    559 	push (@merged_lines, @more_merged_lines);
    560     }
    561 
    562     return @merged_lines;
    563 }
    564 
    565 sub merge_comments ($$) {
    566     my $str1 = shift;
    567     my $str2 = shift;
    568     my @lines1 = split /\n/, $str1;
    569     my @lines2 = split /\n/, $str2;
    570     my @norm_lines1;
    571     my @norm_lines2;
    572 
    573     foreach my $l0 (@lines1) {
    574 	# ignore whitespace differences
    575 	my $l1 = "$l0";
    576 	$l1 =~ s/\s+/ /g;
    577 	$l1 =~ s/^ //g;
    578 	chomp ($l1);
    579 	$l1 =~ s/ $//g;
    580 	$l1 = lc ($l1);
    581 	push (@norm_lines1, $l1);
    582     }
    583     foreach my $l0 (@lines2) {
    584 	# ignore whitespace differences
    585 	my $l2 = "$l0";
    586 	$l2 =~ s/\s+/ /g;
    587 	$l2 =~ s/^ //g;
    588 	chomp ($l2);
    589 	$l2 =~ s/ $//g;
    590 	$l2 = lc ($l2);
    591 	push (@norm_lines2, $l2);
    592     }
    593 
    594     my @l0;
    595     my @merged_lines = do_merge_comments (\@l0, \@lines1, \@norm_lines1,
    596 					  \@lines2, \@norm_lines2);
    597     my $merged_str;
    598     if ($copyright_first) {
    599 	my @copyright_lines;
    600 	my @non_cr_lines;
    601 
    602 	foreach my $line (@merged_lines) {
    603 	    if ($line =~ /^\s*(copyright|\(c\)||author:|all rights reserved)/i) {
    604 		push (@copyright_lines, $line);
    605 	    } else {
    606 		push (@non_cr_lines, $line);
    607 	    }
    608 	}
    609 	@copyright_lines = sort (@copyright_lines);
    610 	@copyright_lines = uniq (@copyright_lines);
    611 	$merged_str = join ("\n", (@copyright_lines, @non_cr_lines));
    612     } else {
    613 	$merged_str = join ("\n", @merged_lines);
    614     }
    615     return $merged_str;
    616 }
    617 
    618 my @all_comments;
    619 my %comments;
    620 
    621 sub unify_comments () {
    622     foreach my $fname (keys %blurbs) {
    623 	if ($blurbs{$fname} =~ /\b(gpl|lgpl|gnu\s+(library\s+|lesser\s+|)general\s+public\s+license)\b/si) {
    624 	    # looks like GNU GPL/LGPL
    625 	    $gpl_found = 1;
    626 	}
    627 	if (defined ($comments{$blurbs{$fname}})) {
    628 	    $comments{$blurbs{$fname}} = $comments{$blurbs{$fname}} .
    629 		", $fname";
    630 	} else {
    631 	    $comments{$blurbs{$fname}} = $fname;
    632 	}
    633     }
    634     @all_comments = (keys %comments);
    635 }
    636 
    637 sub smart_merge_comments () {
    638     my @temp_all_comments = @all_comments;
    639     @all_comments = ();
    640 
    641     my $i = 0;
    642     while ($i <= $#temp_all_comments) {
    643 	my $did_merge = 0;
    644 	my $c1 = $temp_all_comments[$i];
    645 	for (my $j = $i+1; $j <= $#temp_all_comments; $j++) {
    646 	    my $c2 = $temp_all_comments[$j];
    647 	    my $c1_lc = line_count ($c1);
    648 	    my $c2_lc = line_count ($c2);
    649 	    my $c12_merged = merge_comments ($c1, $c2);
    650 	    my $c12_lc = line_count ($c12_merged);
    651 	    # if more than 10 lines or more than 25% saved then
    652 	    # keep the merged comments
    653 	    my $diff_lc = $c1_lc + $c2_lc - $c12_lc;
    654 	    if (($diff_lc > 10) or ($c12_lc <= ($c1_lc + $c2_lc)*0.75)) {
    655 		if ($debug > 0) {
    656 		    print "DEBUG*****************************************\n";
    657 		    print "$c1\n";
    658 		    print "++++++++++++++++++++++++++++++++++++++++++++++\n";
    659 		    print "$c2\n";
    660 		    print "==============================================\n";
    661 		    print "$c12_merged\n";
    662 		    print "*****************************************DEBUG\n";
    663 		}
    664 		$temp_all_comments[$j] = $c12_merged;
    665 		$did_merge = 1;
    666 		$comments{$c12_merged} = "$comments{$c1}, $comments{$c2}";
    667 		last;
    668 	    }
    669 	}
    670 	if (not $did_merge) {
    671 	    push (@all_comments, $c1);
    672 	}
    673 	$i++;
    674     }
    675 }
    676 
    677 sub print_comments () {
    678     if ($gpl_found and $gpl_disclaimer) {
    679 	print << "__EOF"
    680 For the avoidance of doubt, except that if any license choice other
    681 than GPL or LGPL is available it will apply instead, Sun elects to
    682 use only the General Public License version 2 (GPLv2) at this time
    683 for any software where a choice of GPL license versions is made
    684 available with the language indicating that GPLv2 or any later
    685 version may be used, or where a choice of which version of the GPL
    686 is applied is otherwise unspecified.
    687 
    688 --------------------------------------------------------------------
    689 
    690 __EOF
    691     }
    692     foreach my $comment (@all_comments) {
    693 	print "$comments{$comment}:\n";
    694 	print $comment;
    695 	print "\n\n" .
    696 	    "--------------------------------------------------------------------" .
    697 	    "\n\n";
    698     }
    699 }
    700 
    701 sub main() {
    702     my $srcdir;
    703 
    704     process_options ();
    705 
    706     if (not @dirs) {
    707 	usage();
    708 	exit (1);
    709     }
    710 
    711     foreach my $srcdir (@dirs) {
    712 	if ($srcdir =~ /^\./) {
    713 	    $srcdir = getcwd();
    714 	}
    715 	extract_comments ($srcdir);
    716     }
    717 
    718     unify_comments ();
    719     if (not $dumb_mode) {
    720 	smart_merge_comments ();
    721     }
    722 
    723     print_comments ();
    724 
    725     if ($print_omitted and @files_omitted) {
    726 	print "\nThe following files were not checked:\n\n";
    727 	foreach my $fname (@files_omitted) {
    728 	    print "    $fname\n";
    729 	}
    730     }
    731 }
    732 
    733 main();
    734