From 76425eef84dcefa6731d85280c3d08cfa4892d07 Mon Sep 17 00:00:00 2001 From: zak Date: Sun, 12 Nov 2006 23:17:06 +0000 Subject: [PATCH] Use Date::Calc for date math Remove Date::Parse and Date::Format which conflict with HTTP::Date Don't fail if some changes files don't exist --- scripts/mirror-scripts/update.pl | 89 ++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/scripts/mirror-scripts/update.pl b/scripts/mirror-scripts/update.pl index 97473dcc..b6690bb9 100755 --- a/scripts/mirror-scripts/update.pl +++ b/scripts/mirror-scripts/update.pl @@ -29,24 +29,19 @@ use strict; -use File::stat; -use Getopt::Long; -use HTTP::Status; -use LWP::UserAgent; -use Time::Local; +use Date::Calc::Object; # For date/time math +use File::stat; # For getting mtimes +use Getopt::Long; # For parsing command-line options +use HTTP::Status; # For HTTP status codes +use LWP::UserAgent; # For HTTP client functionality +use HTTP::Date; # For HTTP-compatible str2time and time2str -use Date::Parse; -use Date::Format; - -# This has name conflicts with Date::Parse and -# Date::Format; don't import its names. -use HTTP::Date (); ################## # Global variables ################## -# by default, this is not a test +# Verbose? (Debugging messages) my $verbose = 0; # by default, this is the working directory @@ -121,28 +116,20 @@ my $ua = new LWP::UserAgent( agent => "MirMirror/0.1", # if the file with the update time has disappeared, alert the admin # and use the datestamp on the startpage file ( /en/index.html ) sub findLastUpdateTime() { - open (UPDATETIME, "<", $lastupdatefile) or return $timenow; + open (UPDATETIME, "<", $lastupdatefile) or return Date::Calc->today()->date2time(); my $lastupdatetimestr = ; close (UPDATETIME); chomp ($lastupdatetimestr); + my $lastupdatetime = str2time ($lastupdatetimestr); + die "Can't parse last update time" if not $lastupdatetime; return str2time ($lastupdatetimestr); } -# convert the date into a correctly formatted string -sub date2ISOstr($) { - return time2str ("%Y:%m:%dT%T", $_[0]); -} - -# convert the date into RFC2616 format -sub date2HTTPstr($) { - return time2str ("%Y:%m:%dT%T", $_[0]); -} - # write the time now into the last update file sub saveLastUpdateTime() { open (UPDATETIME, ">", $lastupdatefile) or die "Can't open $lastupdatefile for writing ($!)"; - print UPDATETIME date2ISOstr($timenow); + print UPDATETIME time2str($timenow); close (UPDATETIME); } @@ -151,10 +138,11 @@ sub getChangesFileList($$) { my ($fromtime, $totime) = @_; my @files; - for (my $time = str2time(time2str("%Y:%m:%dT00:00:00", $fromtime)); - $time < $totime; - $time += 86400) { - push @files, time2str("changes%Y%m%d.txt", $time); + my $time = Date::Calc->time2date($fromtime); + my $maxtime = Date::Calc->time2date($totime); + for (; $time <= $maxtime; ++$time) { + my ($y,$m,$d) = $time->date; + push @files, sprintf("changes%04d%02d%02d.txt",$y,$m,$d); } return @files; } @@ -190,15 +178,15 @@ sub fetchFile($;$) { if ($localfile and -e $localfile) { # Don't fetch unless more recent than local copy my $stat = stat($localfile); - $req->header("If-Modified-Since" => HTTP::Date::time2str($stat->mtime)); + $req->header("If-Modified-Since" => time2str($stat->mtime)); } my $resp = $ua->request($req); if ($resp->is_success) { # 2xx codes - my $mtime = HTTP::Date::str2time($resp->header("Last-Modified")); + my $mtime = str2time($resp->header("Last-Modified")); if ($localfile) { if ($verbose) { print STDERR " -> success"; - print STDERR "; mtime ".time2str("%c", $mtime) if $mtime; + print STDERR "; mtime ".time2str($mtime) if $mtime; print STDERR "\n"; } ensureDir(dirPart($localfile)); @@ -226,6 +214,11 @@ sub fetchFile($;$) { die "Can't fetch $remotefile (got redirect, not yet handled)"; } else { + if ($resp->code == RC_NOT_FOUND) { # 404 + print STDERR " -> not found\n" if $verbose; + return undef; + } + print STDERR " -> failed (".$resp->code.")\n" if $verbose; die "Can't fetch $remotefile (".$resp->status_line.")"; } @@ -247,15 +240,19 @@ sub getChangesFile($) { my $timeoflastupdate = findLastUpdateTime(); if ($verbose) { - print STDERR "timenow is ".time2str("%c",$timenow)." \n"; - print STDERR "timeoflastupdate is ".time2str("%c",$timeoflastupdate)." \n\n"; + print STDERR "timenow is ".time2str($timenow)." \n"; + print STDERR "timeoflastupdate is ".time2str($timeoflastupdate)." \n\n"; } # Now we know which days' changes we need to get from the server my @changesfiles = getChangesFileList($timeoflastupdate, $timenow); -if ($verbose or $listchangefiles) { +if ($verbose) { foreach my $file (@changesfiles) { print STDERR "using changes file $file\n"; } - exit 0 if $listchangefiles; +} + +if ($listchangefiles) { + foreach my $file (@changesfiles) { print "$file\n"; } + exit 0; } # get the changes files @@ -267,21 +264,29 @@ foreach my $file (@changesfiles) { $changesfilecontent{$file} = getChangesFile($ # iterate over all the fetched files, building up a list of files # to fetch/delete my %files; -foreach my $changes (@changesfiles) { - my $date = $changes; - $date =~ s{^(?:.*/)?changes([0-9]+)\.txt$}{$1} or die "Can't extract date from changes filename $changes"; +foreach my $changesfile (@changesfiles) { + my $changesfilecontent = $changesfilecontent{$changesfile}; + if (not defined $changesfilecontent) { + print STDERR "Skipping changes file $changesfile; not present at remote end\n" if $verbose; + next; + } + + my $date = $changesfile; + $date =~ s{^(?:.*/)?changes([0-9]+)\.txt$}{$1} or die "Can't extract date from changes filename $changesfile"; + + print STDERR "Processing changes file $changesfile\n" if $verbose; - my @changes = split /[\r\n]+/, $changesfilecontent{$changes}; + my @changes = split /[\r\n]+/, $changesfilecontent; foreach my $change (@changes) { my ($time, $op, $path) = split ' ', $change; # Ignore malformed lines, especially wacky paths that could be malicious if ($time =~ /[^0-9:]/) { - die "Invalid time $time in $changes"; + die "Invalid time $time in $changesfile"; } if ($path =~ m{(?:^|/)\.\.(?:/|$)}) { - die "Invalid path $path (contains ..) in $changes"; + die "Invalid path $path (contains ..) in $changesfile"; } # Strip scheme and host from absolute URLs @@ -316,7 +321,9 @@ while (my ($file, $op) = each %files) { } elsif ($op eq "add" or $op eq "change" or $op eq "Modification") { # add/change: re-fetch the file + # FIXME: don't insist on reading entire file into memory my $content = fetchFile("$remoteroot/$file","$workingdir/$file"); + die "File $remoteroot/$file not found" if not defined $content; } else { die "Unknown operation '$op'"; -- 2.11.0