From f57b9b87f8c213c516bc1fbc2dea3ea70f7f7395 Mon Sep 17 00:00:00 2001 From: zak Date: Sun, 12 Nov 2006 20:59:20 +0000 Subject: [PATCH] Fixes to update.pl and tidying up --- scripts/mirror-scripts/update.pl | 219 +++++++++++++++++++++------------------ 1 file changed, 120 insertions(+), 99 deletions(-) diff --git a/scripts/mirror-scripts/update.pl b/scripts/mirror-scripts/update.pl index ee21c0bb..6292c7d1 100755 --- a/scripts/mirror-scripts/update.pl +++ b/scripts/mirror-scripts/update.pl @@ -32,6 +32,7 @@ use strict; use Cwd; use Date::Parse; use Date::Format; +use Getopt::Long; use LWP::UserAgent; use Time::Local; @@ -40,10 +41,10 @@ use Time::Local; ################## # by default, this is not a test -my $istest = 0; +my $verbose = 0; # by default, this is the working directory -my $workingdir = getcwd(); +my $workingdir = "."; # get the time and date - using UTC my $timenow = time(); @@ -52,8 +53,6 @@ my $timenow = time(); # otherwise we do our normal processing my $listchangefiles = 0; -my $outputfile; - # The root URL to fetch files from my $remoteroot; @@ -61,23 +60,31 @@ my $remoteroot; # process command line arguments ################################ -foreach my $argnum (0 .. $#ARGV) { - my $argument = $ARGV[$argnum]; - if ($argument eq "--test") { - $istest = 1; - } elsif ($argument =~ /^--workingdir=(.*)$/) { - $workingdir = $1; - } elsif ($argument =~ /^--now=(.*)$/) { - $timenow = str2time($1); - } elsif ($argument =~ /^--output=(.*)$/) { - $outputfile = $1; - } elsif ($argument =~ /^--remoteroot=(.*)$/) { - $remoteroot = $1; - } elsif ($argument =~ /^--list-change-files/) { - $listchangefiles = 1; - } +sub usage() { + print STDERR < \$verbose, + "workingdir=s" => \$workingdir, + "now=s" => sub($) { $timenow = str2time($_[0]); }, + "remoteroot=s" => \$remoteroot, + "list-change-files!" => \$listchangefiles, + "help" => sub() { usage(); } ); + +usage() if not defined $remoteroot; + # the directory where all files with state are kept my $statedir = "$workingdir/state"; if (! -e $statedir) { @@ -100,75 +107,6 @@ my $ua = LWP::UserAgent->new(agent => "MirMirror/0.1", keep_alive => 1); -################## -# the program flow -################## - -# first work out when the last time we were up to date is and -# find present time. -my $timeoflastupdate = findLastUpdateTime(); - -if ($istest) { - print STDERR "timenow is ".time2str("%c",$timenow)." \n"; - print STDERR "timeoflastupdate is ".time2str("%c",$timeoflastupdate)." \n\n"; -} - -# Now we know which days' changes we need to get from the server -my @changesfiles = getChangesFileList(); -if ($istest or $listchangefiles) { - foreach my $file (@changesfiles) { print STDERR "using changes file $file\n"; } - exit 0 if $listchangefiles; -} - -# get the changes files -my @changesfilecontent; -foreach my $file (@changesfiles) { push @changesfilecontent, getChangesFile($file); } - -# if the file has not changed (response code 304) then ignore it - -# iterate over all the fetched files, building up a list of files -# to fetch/delete -my %files; -foreach my $changes (@changesfilecontent) { - my @changes = split /[\r\n]+/, $changes; - foreach my $change (@changes) { - my ($time, $op, $path) = split ' ', $change; - # TODO: Ignore changes prior to $timeoflastupdate - # TODO: Ignore malformed lines, especially wacky paths that could be malicious - $files{$path} = $op; - print STDERR "Marked $path as '$op'\n" if $istest; - } -} - -# Fetch all files whose last operation was "add" or "change" -# Delete all files whose last operation was "delete" -while (my ($file, $op) = each %files) { - if ($op eq "delete") { - if (-e "$workingdir/$file") { - # delete: if the file exists, remove it - print STDERR "deleting $workingdir/$file\n" if $istest; - unlink "$workingdir/$file" or die "Can't delete $workingdir/$file ($!)"; - } - else { - print STDERR "not deleting $workingdir/$file beacuse it doesn't exist\n" if $istest; - } - } - elsif ($op eq "add" or $op eq "change") { - # add/change: re-fetch the file - my $content = fetchFile("$remoteroot/$file","$workingdir/$file"); - } - else { - die "Unknown operation '$op'"; - } -} - - -# update the last "up-to-date" time -saveLastUpdateTime() unless $istest; - -# finish -exit 0; - ############### # SUBROUTINES # ############### @@ -203,11 +141,12 @@ sub saveLastUpdateTime() { } # return an array of filename -sub getChangesFileList() +sub getChangesFileList($$) { + my ($fromtime, $totime) = @_; my @files; - for (my $time = str2time(time2str("%Y:%m:%dT00:00:00", $timeoflastupdate)); - $time < $timenow; + for (my $time = str2time(time2str("%Y:%m:%dT00:00:00", $fromtime)); + $time < $totime; $time += 86400) { push @files, time2str("changes%Y%m%d.txt", $time); } @@ -217,15 +156,25 @@ sub getChangesFileList() # get the directory part of a filename sub dirPart($) { my $dir = $_[0]; - $dir =~ s{/[^/]*$}{}; + $dir =~ s{/+[^/]*$}{}; return $dir; } +# ensure the given directory exists (like mkdir -p) +sub ensureDir($) { + my $dir = $_[0]; + if (! -e $dir) { + my $parent = dirPart($dir); + &ensureDir($parent) if ($parent); + mkdir $dir or die "Can't create directory $dir ($!)"; + } +} + # get a file, optionally saving it locally sub fetchFile($;$) { my ($remotefile, $localfile) = @_; - if ($istest) { + if ($verbose) { print STDERR "fetching $remotefile"; print STDERR " as $localfile" if $localfile; } @@ -235,11 +184,8 @@ sub fetchFile($;$) { my $resp = $ua->request(HTTP::Request->new(GET => "$remotefile")); if ($resp->is_success) { if ($localfile) { - print STDERR " -> success\n" if $istest; - my $localdir = dirPart($localfile); - if (! -e $localdir) { - mkdir $localdir or die "Can't create directory $localdir ($!)"; - } + print STDERR " -> success\n" if $verbose; + ensureDir(dirPart($localfile)); open (LOCAL, ">", "$localfile") or die "Can't open $localfile for writing ($!)"; print LOCAL $resp->content or die "Error writing $localfile ($!)"; @@ -249,7 +195,7 @@ sub fetchFile($;$) { return $resp->content; } else { - print STDERR " -> failed\n" if $istest; + print STDERR " -> failed\n" if $verbose; die "Can't fetch $remotefile (".$resp->status_line.")"; } } @@ -258,3 +204,78 @@ sub fetchFile($;$) { sub getChangesFile($) { return fetchFile($changesroot."/".$_[0], $changesdir."/".$_[0]); } + + + +################## +# the program flow +################## + +# first work out when the last time we were up to date is and +# find present time. +my $timeoflastupdate = findLastUpdateTime(); + +if ($verbose) { + print STDERR "timenow is ".time2str("%c",$timenow)." \n"; + print STDERR "timeoflastupdate is ".time2str("%c",$timeoflastupdate)." \n\n"; +} + +# Now we know which days' changes we need to get from the server +my @changesfiles = getChangesFileList($timeoflastupdate, $timenow); +if ($verbose or $listchangefiles) { + foreach my $file (@changesfiles) { print STDERR "using changes file $file\n"; } + exit 0 if $listchangefiles; +} + +# get the changes files +my @changesfilecontent; +foreach my $file (@changesfiles) { push @changesfilecontent, getChangesFile($file); } + +# if the file has not changed (response code 304) then ignore it + +# iterate over all the fetched files, building up a list of files +# to fetch/delete +my %files; +foreach my $changes (@changesfilecontent) { + my @changes = split /[\r\n]+/, $changes; + foreach my $change (@changes) { + my ($time, $op, $path) = split ' ', $change; + # Strip scheme and host from absolute URLs + $path =~ s{^[a-z]+://[a-z0-9\-\.]+/}{/}; + # TODO: Ignore changes prior to $timeoflastupdate + # TODO: Ignore malformed lines, especially wacky paths that could be malicious + $files{$path} = $op; + print STDERR "Marked $path as '$op'\n" if $verbose; + } +} + +# Fetch all files whose last operation was "add" or "change" +# Delete all files whose last operation was "delete" +while (my ($file, $op) = each %files) { + if ($op eq "delete") { + if (-e "$workingdir/$file") { + # delete: if the file exists, remove it + print STDERR "deleting $workingdir/$file\n" if $verbose; + unlink "$workingdir/$file" or die "Can't delete $workingdir/$file ($!)"; + } + else { + print STDERR "not deleting $workingdir/$file beacuse it doesn't exist\n" if $verbose; + } + } + elsif ($op eq "add" or $op eq "change" or $op eq "Modification") { + # add/change: re-fetch the file + my $content = fetchFile("$remoteroot/$file","$workingdir/$file"); + } + else { + die "Unknown operation '$op'"; + } +} + + +# update the last "up-to-date" time +saveLastUpdateTime(); + +# finish +exit 0; + + -- 2.11.0