#! /usr/bin/perl -W # we'll need the following external programs # find mail # TODO # - check more (e.g.: ar x foo.deb && zcat control.tar.gz && zcat data.tar.gz) # - if requested: add a fall back using the md5sum binary require 5.006_000; # XXX Really? use strict; use Getopt::Long; use Digest::MD5; BEGIN { unshift @INC, $ENV{DMS_BASE} if $ENV{DMS_BASE} && -d $ENV{DMS_BASE}.'/DMS';} use DMS::Common; # options my $RsyncLog; my $Dir; my $FileCheck = 0; my $MD5All = 0; my $MD5Part = 0; my $Include; my $Exclude; my $prog_md5sum = 'md5sum'; # IIRC, it's called "md5" on FreeBSD?!? sub usage() { print <) { chomp; my $pfile = $_; open(C, "gzip -dc $pfile |") || die "Can't gunzip!"; while () { chomp; if (/^Filename: (.+)$/) { msg(2, "Scanned $count files (Missing ".(scalar @missing_all).") ". "[".int($count / (time - $start + 0.0001))." files/s]"."\n") if ++$count % 5000 == 0; unless (-f "$dir/$1" || (-l "$dir/$1" && -f readlink("$dir/$1"))) { push @missing_all, $1; push @{$missing{$pfile}}, $1; } } } close C; } close P; if (@missing_all) { print "Missing files:\n"; for my $i (keys %missing) { print "$i:\n ", join("\n ", @{$missing{$i}}), "\n"; } } } my $md5 = Digest::MD5->new; my %md5sums; my $check_start = 0; my $check_count = 0; my $check_size = 0; my $debfiles = 0; sub do_md5_check_with_perl($) { my $file = shift; open(A, "$file") || do { msg(0, "Can't open $$file!\n"); return 0; }; $md5->reset(); my $s = $md5->addfile(*A)->hexdigest(); close A; $s; } sub do_md5_check_with_md5sum($) { my $file = shift; unless (-e $file) { errmsg("File '$file' does not exist!\n"); return 0; } my $output = `$prog_md5sum $file`; if ($?) { errmsg("Error calling '$prog_md5sum $file': $!\n"); return 0; } my ($a, @r) = split /\s+/, $output; if (length($a) != 32) { errmsg("Error from $prog_md5sum for $file: $a @r!\n"); return 0; } $a; } # return 1 if md5sum matches, 0 if not, 2 if file missing sub check_md5sum_file($$$) { my ($dir, $file, $md5sum) = @_; my $ret; if (!defined $md5sums{$file}) { msg(0, "No md5sum for $file in Package files!\n"); return 0; } $check_start = time if $check_start == 0; #my $s = do_md5_check_with_perl("$dir/$file"); my $s = do_md5_check_with_md5sum("$dir/$file"); if ($s eq $md5sums{$file}) { $ret = 1; # statistics stuff (we all like that, don't we? :) $check_size += (stat("$dir/$file"))[7]; msg(2, "MD5 check: Processed $check_count files [". int($check_count/(time-$check_start+0.0001))." files/s, ". int($check_size/(time-$check_start+0.0001)/1024)." kbytes/s". (($MD5All && !defined $Include && !defined $Exclude) ? (", ".int($check_count/$debfiles*100)."%") : "")."]\n") if ++$check_count % 100 == 0; } elsif ($s == 0) { # file does not exist $ret = 2; } else{ msg(1, "Wrong $file ($s != $md5sums{$file})\n"); $ret = 0; } msg(3, "$file: ".(($ret == 1)?"Ok":($ret == 0 ? "Failed" : "Missing")).".\n"); $ret; } my $slurp_start = 0; sub slurp_package_infos($) { my $dir = shift; my $ecount = 0; my ($Filename, $MD5sum); $slurp_start = time if $slurp_start == 0; msg(1, "Reading package info in.\n"); open(P, "find $dir/dists -name Packages.gz | xargs -n 1 gzip -dc |") || die "Cannot fork: $!!"; while (

) { chomp; # we normally have an order of entries but are they constant in the # future? $Filename = $1 if /^Filename: (.+)$/; $MD5sum = $1 if /^MD5sum: (.+)$/; if ($_ eq '') { if (defined $Filename && defined $MD5sum) { if (defined $md5sums{$Filename}) { if ($md5sums{$Filename} ne $MD5sum) { msg(0, "Different MD5 sum for the same file in the packages files!\n"); } } else { $md5sums{$Filename} = $MD5sum; $debfiles++; } ($Filename, $MD5sum) = (undef, undef); msg(2, "Get info: Processed $ecount entries ($debfiles files) [".int(($ecount/(time-$slurp_start+0.0001)))." entries/s].\n") if (++$ecount % 5000 == 0); } else { print "Uuuhhh!\n"; } } } close P || die "Can't close: $!!"; } sub display_wrong_files(@) { msg(0, "Wrong checksums: \n".join("\n", @_)."\n") if @_; } sub display_missing_files(@) { msg(0, "Missing files: \n".join("\n", @_)."\n") if @_; } sub md5_check_all($) { my $dir = shift; my @wrong; my @missing; # check all files in %md5sums foreach my $f (keys %md5sums) { next if defined $Include && $f !~ /$Include/; next if defined $Exclude && $f =~ /$Exclude/; my $r = check_md5sum_file($dir, $f, $md5sums{$f}); push @wrong, $f if $r == 0; push @missing, $f if $r == 2; } display_wrong_files(@wrong); display_missing_files(@missing); } sub md5_check_partly($$) { my ($dir, $rsynclog) = @_; my @wrong; my @missing; open(R, $rsynclog) || die "Can't open $rsynclog: $!!"; while () { chomp; next unless /^\S+\.deb$/; next if defined $Include && $_ !~ /$Include/; next if defined $Exclude && $_ =~ /$Exclude/; # there are files transmited during updates which aren't referenced # in any Packages file next unless defined $md5sums{$_}; my $r = check_md5sum_file($dir, $_, $md5sums{$_}); push @wrong, $_ if $r == 0; push @missing, $_ if $r == 2; } close R || die "Can't close $rsynclog: $!!"; display_wrong_files(@wrong); display_missing_files(@missing); } parse_args(); if (!defined $Dir) { usage(); exit(1); } if (! -d "$Dir/dists") { msg(0, "$Dir doesn't seem to be a Debian archive!\n"); usage(); exit(1); } unless ($FileCheck || $MD5All || $MD5Part) { msg(0, "Need to give one of -f, -a or -p!\n"); usage(); exit(1); } # do the work if ($FileCheck) { file_check($Dir); } if ($MD5All && $MD5Part) { usage(); exit(1); } if ($MD5All || $MD5Part) { if ($MD5Part && (!defined $RsyncLog || ! -r $RsyncLog)) { msg(0, "Need to supply a rsync sync log!\n"); usage(); exit(1); } slurp_package_infos($Dir); md5_check_all($Dir) if $MD5All; md5_check_partly($Dir, $RsyncLog) if $MD5Part; } exit 0;