handle modules/ outside clscan
[gnulib.git] / debian / clscan / clscan
1 #!/usr/bin/perl -w
2 # Copyright 2009 Ian Beckwith <ianb@erislabs.net>
3 # License: GPL v2 or later.
4
5 use strict;
6 use vars qw($me);
7 $me=($0=~/(?:.*\/)?(.*)/)[0];
8
9 use Getopt::Long;
10 use YAML::Any;
11 use Digest::SHA qw(sha256_hex);
12 use File::Find;
13 use File::Copy;
14
15 our $CLSCANDIR="debian/clscan";
16 our $FILESCACHE="$CLSCANDIR/files.yaml";
17 our $NEWFILES="$CLSCANDIR/new.txt";
18 our $COPYRIGHTSTUB="$CLSCANDIR/copyright.in";
19
20 our @filenames=();
21 our $files={};
22 our $new={};
23
24 my $scan=0;
25 my $merge=0;
26 my $writecopyright=0;
27
28 usage() unless(@ARGV);
29 usage() unless GetOptions("scan|s" => \$scan,
30                           "merge|m" => \$merge,
31                           "write|w" => \$writecopyright,
32                           "help|h" => sub { usage(); });
33
34 load_cache();
35 scan() if($scan);
36 merge() if($merge);
37 write_copyright() if ($merge || $writecopyright);
38
39 sub scan
40 {
41     get_filenames();
42     for my $file (@filenames)
43     {
44         scan_file($file);
45     }
46     write_new();
47     find_deleted();
48 }
49
50 sub merge
51 {
52     merge_new();
53     save_cache();
54 }
55
56 sub write_copyright
57 {
58     unless(keys(%$files))
59     {
60         die("$me: no files known, run $0 --scan\n");
61     }
62     unless(copy($COPYRIGHTSTUB, "debian/copyright"))
63     {
64         die("$me: cannot copy $COPYRIGHTSTUB to debian/copyright: $!\n");
65     }
66     unless(open(COPYRIGHT, ">>debian/copyright"))
67     {
68         die("$me: cannot append to debian/copyright: $!\n");
69     }
70
71     # group files by license/license_text/copyright
72     my $licenses={};
73     for my $file (sort keys(%$files))
74     {
75         my $license=$files->{$file}->{license};
76         my $copyright=$files->{$file}->{copyright};
77         my $license_text=$files->{$file}->{license_text};
78         push(@{$licenses->{$license}->{$license_text}->{$copyright}}, $file);
79     }
80     my %refs=();
81     my $refnum=0;
82     for my $license (sort keys(%$licenses))
83     {
84         for my $license_text (sort keys(%{$licenses->{$license}}))
85         {
86             my $licensestr=$license;
87             if(length($license_text))
88             {
89                 $refnum++;
90                 $licensestr .= " (REF$refnum)";
91                 $refs{$licensestr}=$license_text;
92             }
93             for my $copyright (sort keys(%{$licenses->{$license}->{$license_text}}))
94             {
95                 next if(!length($license) && !length($copyright) && !length($license_text));
96                 my @filelist=sort @{$licenses->{$license}->{$license_text}->{$copyright}};
97                 print COPYRIGHT "Files: ", join(', ', @filelist), "\n";
98                 print COPYRIGHT "Copyright: $copyright\n" if length($copyright);
99                 print COPYRIGHT "License: $licensestr\n" if length($licensestr);
100                 print COPYRIGHT "\n";
101             }
102         }
103     }
104     for my $ref (keys(%refs))
105     {
106         print COPYRIGHT "License: $ref\n";
107         my @text=split(/\n/, $refs{$ref});
108         print COPYRIGHT map { "    " . $_ . "\n" } @text;
109         print COPYRIGHT "\n";
110     }
111     print COPYRIGHT license_trailer(sort keys(%$licenses));
112     close(COPYRIGHT);
113 }
114
115 sub license_trailer
116 {
117     my @licenses_used=@_;
118     my $license_data = {
119         "Apache-2.0" => "Apache License Version 2.0",
120         "Artistic" => "Artistic License",
121         "GFDL-1.2" => "GNU Free Documentation License Version 1.2",
122         "GFDL-1.3" => "GNU Free Documentation License Version 1.3",
123         "GFDL" => "GNU Free Documentation License",
124         "GPL-2" => "GNU General Public License Version 2",
125         "GPL-3" => "GNU General Public License Version 3",
126         "GPL" => "GNU General Public License",
127         "LGPL-2" => "GNU Library General Public License Version 2",
128         "LGPL-2.1" => "GNU Lesser General Public License Version 2.1",
129         "LGPL-3" => "GNU Lesser General Public License Version 3",
130         "LGPL" => "GNU Lesser General Public License",
131     };
132
133     my %types_found=();
134 TYPE: for my $type (keys(%$license_data))
135     {
136         for my $license (@licenses_used)
137         {
138             if($license =~ /$type(\+|\b)/i)
139             {
140                 $types_found{$type}=1;
141                 # avoid matching eg GPL-2 *and* GPL
142                 next TYPE;
143             }
144         }
145     }
146     my $text="\n";
147     for my $type (sort keys(%types_found))
148     {
149         $text .= "The complete text of the " . $license_data->{$type} ." can be\n";
150         $text .= "found in /usr/share/common-licenses/$type\n";
151     }
152     return $text;
153 }
154
155
156 sub guess_license
157 {
158     my $file=shift;
159     my $license='';
160     my $copyright='';
161     if(open(LICENSECHECK, "licensecheck --copyright \"$file\"|"))
162     {
163         while(<LICENSECHECK>)
164         {
165             chomp;
166             if(/^\s+\[(.*)\]$/)
167             {
168                 $copyright=$1;
169             }
170             elsif(/.*:\s+(.*)/)
171             {
172                 $license=$1;
173             }
174         }
175         close(LICENSECHECK);
176         $copyright =~ s/^\s*Copyright\s*:\s*//;
177         $license =~ s/.*UNKNOWN.*//;
178         $license =~ s/(L?GPL) \(v([\.\d]+) or later\)/$1-$2+/i;
179         $license =~ s/(L?GPL) \(v([\.\d]+)\)/$1-$2/i;
180         $license =~ s/G?FDL \(v([\.\d]+) or later\)/GFDL-$1+/i;
181         $license =~ s/G?FDL \(v([\.\d]+)\)/GFDL-$1/i;
182         $license =~ s/Apache \(v([\.\d]+) or later\)/Apache-$1+/i;
183         $license =~ s/Apache \(v([\.\d]+)\)/Apache-$1+/i;
184     }
185     return($license, $copyright);
186 }
187
188 sub scan_file
189 {
190     my $filename=shift;
191     unless(open(FILE, $filename))
192     {
193         warn("$me: $filename: cannot open: $!\n");
194         return;
195     }
196     my $header='';
197     for(my $i=0; $i < 15; $i++)
198     {
199         my $line=<FILE>;
200         last unless($line);
201         $header .= $line;
202     }
203     close(FILE);
204     my $hash=sha256_hex($header);
205     if( (!exists($files->{$filename})) ||
206         ($files->{$filename}->{hash} ne $hash))
207     {
208         filechanged($filename, $hash, $header);
209     }
210 }
211
212
213 sub filechanged
214 {
215     my($filename, $hash, $header)=@_;
216     my($license_guess, $copyright_guess)=guess_license($filename);
217     $new->{$filename}={
218         hash=>$hash,
219         license=>$license_guess,
220         copyright=>$copyright_guess,
221         header=>$header,
222     };
223     if(exists($files->{$filename}))
224     {
225         if(exists($files->{$filename}->{copyright}))
226         {
227             $new->{$filename}->{copyright_old}=$files->{$filename}->{copyright};
228         }
229         if(exists($files->{$filename}->{license}))
230         {
231             $new->{$filename}->{license_old}=$files->{$filename}->{license};
232         }
233         if(exists($files->{$filename}->{license_text}))
234         {
235             $new->{$filename}->{license_text_old}=$files->{$filename}->{license_text};
236         }
237     }
238 }
239
240 sub wanted
241 {
242     if(/^\.git/ || /^\.cvs/ || /^debian/ || /^modules$/)
243     {
244         $File::Find::prune=1;
245     }
246     elsif(-f)
247     {
248         push(@filenames, $File::Find::name);
249     }
250 }
251
252 sub get_filenames
253 {
254     find(\&wanted, ".");
255 }
256
257 sub load_cache
258 {
259     unless(open(YAML,$FILESCACHE))
260     {
261         warn("$me: cannot load cache $FILESCACHE: $!\n");
262         return;
263     }
264     my $yaml;
265     {
266         local $/=undef;
267         $yaml=<YAML>;
268     }
269     close(YAML);
270     $files=Load($yaml);
271 }
272
273 sub save_cache
274 {
275     backup($FILESCACHE);
276     unless(open(YAML,">$FILESCACHE"))
277     {
278         warn("$me: cannot save cache $FILESCACHE: $!\n");
279         return;
280     }
281     print YAML Dump($files);
282     close(YAML);
283 }
284
285 sub write_new
286 {
287     backup($NEWFILES);
288     unless(keys(%$new))
289     {
290         warn("$me: no new/changed files found\n");
291         return;
292     }
293     unless(open(NEW,">$NEWFILES"))
294     {
295         die("$me: cannot write to $NEWFILES: $!\n");
296     }
297     for my $file (sort keys %$new)
298     {
299         print NEW "File: $file\n";
300         print NEW "Hash: ", $new->{$file}->{hash}, "\n";
301         print NEW "Copyright: ", $new->{$file}->{copyright}, "\n";
302         print NEW "License: ", $new->{$file}->{license}, "\n";
303         print NEW "Licence_Text: \n";
304         if($new->{$file}->{license_old})
305         {
306             print NEW "#License_old: ", $new->{$file}->{license_old}, "\n";
307         }
308         if($new->{$file}->{copyright_old})
309         {
310             print NEW "#Copyright_old: ", $new->{$file}->{copyright_old}, "\n";
311         }
312         if($new->{$file}->{licence_text_old})
313         {
314             print NEW "#License_text_old: ", $new->{$file}->{licence_text_old}, "\n";
315         }
316         print NEW "#Header: \n";
317         my @headerlines=split(/\n/, $new->{$file}->{header});
318         @headerlines=map { "#" . $_ } grep { defined $_; } @headerlines;
319         print NEW join("\n", @headerlines);
320         print NEW "\n\n";
321     }
322     close NEW;
323 }
324
325 sub merge_new
326 {
327     unless(open(NEW, $NEWFILES))
328     {
329         die("$me: $NEWFILES: cannot open: $!\n");
330     }
331     my $license='';
332     my $copyright='';
333     my $hash='';
334     my $file='';
335     my $license_text='';
336     my $in_license_text=0;
337     my $line=0;
338     while(<NEW>)
339     {
340         $line++;
341         chomp;
342         next if(/^\s*$/);
343         next if(/^\s*\#/);
344         if($in_license_text && /^\s+(.*)/)
345         {
346             $license_text .= "\n" . $1;
347         }
348         elsif(/^File:\s*(.*)/)
349         {
350             my $newfile=$1;
351             # save previous entry
352             if(length($file) && length($hash))
353             {
354                 $files->{$file}={ hash=>$hash,
355                                   copyright=>$copyright,
356                                   license=>$license,
357                                   license_text=>$license_text };
358             }
359             $file=$newfile;
360             $license='';
361             $copyright='';
362             $hash='';
363             $license_text='';
364         }
365         elsif(/^Hash:\s*(.*)/)
366         {
367             $hash=$1;
368         }
369         elsif(/^Copyright:\s*(.*)/)
370         {
371             $copyright=$1;
372         }
373         elsif(/^License:\s*(.*)/)
374         {
375             $license=$1;
376         }
377         elsif(/^License_text:\s*(.*)/)
378         {
379             $in_license_text=1;
380             $license_text=$1;
381         }
382         elsif($in_license_text && /^\s+(.*)/)
383         {
384             $license_text .= "\n" . $1;
385         }
386         else
387         {
388             warn("$me: $NEWFILES: line $line not recognized\n");
389         }
390     }
391     close(NEW);
392     # save last entry
393     if(length($file) && length($hash))
394     {
395         $files->{$file}={ hash=>$hash,
396                           copyright=>$copyright,
397                           license=>$license,
398                           license_text=>$license_text };
399     }
400 }
401
402 sub find_deleted
403 {
404     my @deleted=();
405     my %newnames = map { $_ => 1 } @filenames;
406     for my $file (sort keys(%$files))
407     {
408         unless(exists($newnames{$file}))
409         {
410             push(@deleted, $file);
411         }
412     }
413     if(@deleted)
414     {
415         print "Removed files:\n";
416         print join("\n", @deleted),"\n";
417     }
418 }
419
420 sub backup
421 {
422     my $base=shift;
423     my $old="$base.old";
424     if(-f $base)
425     {
426         unlink($base);
427         move($base, $old);
428     }
429 }
430
431 sub usage
432 {
433     die("usage: $me [--scan] [--merge]\n",
434         "scans for changed copyright/licenses\n",
435         "  -s|-scan      Scan for new files & files with changed copyright headers\n",
436         "                Writes to debian/clscan/new.txt for manual review.\n",
437         "  -m|--merge    Merges new data from debian/clscan/new.txt\n",
438         "  -w|--write    Writes updated debian/copyright.\n",
439         "                --merge implies --write.\n");
440 }