#!/usr/local/bin/perl # # This program attemps to clear out the cruft that # accumulates over the years when you've installed # 47 versions of 300 different ports... # # The output of this program is a shell script. It # takes no actions itself except to build a database # so that subsequent runs are faster. # # One BIG assumption: you build your ports by hand and # keep the list of what you want to be installed in # /usr/ports/makefile. # # Copyright (C) 2001,2002 David Muir Sharnoff. License hereby # granted for anyone to use, modify or redistribute this program at # their own risk. Please feed useful changes back to muir@idiom.com. # # VERSION: 2003.12.12 # Nomenclature. # port - in /usr/ports # package - in /var/db/pkg # Tunable knobs... my $pkgdb = "/var/db/pkg"; # where to store database of checksums & such. # largish. my $cksumdb = "/var/db/portdata.db"; # file where list of ports to install is kept my $request_list = "/usr/ports/makefile"; # prefix for lines in that file my $request_prefix = 'SUBDIR +='; # size of DBFile cache. I have lot's of RAM... my $cachesize = 400_000; # delete packages that weren't requested? my $delete_nonrequested = 0; # delete old packages? Define old in days... 0 for disable-feature my $too_old = 0; # for debugging purposes, limit the scope of # to just packages that match the regular expression... my $restrict; #$restrict = qr"^weblint"; my $portsdir = "/usr/ports"; use File::Slurp; use File::Flock; use Digest::MD5 qw(md5_hex); use DB_File; use POSIX; use File::Basename; #my $fileorigindb = "/var/db/fileorigin.db"; use strict; my %pkginfo; # this is the main runtime data structure. # keyed by port name in /var/db/pkg # fields: # origin - the claimed source directory in /var/db/pkg # mkorgin - the directory w/Makefile that might build this # uorigin - mkorigin or origin if no mkorigin # files - list of files included # depends_on - list of dependencies # directories - list of directories involved # match - number of files that match their md5s # mismatch - number of files that do not match their md5s # notchecked - number of files that do have md5s # namever - portname-portversion # health - rating 0-100 of match vs mismatch vs notchecked # recent - time (in days) since last file was accessed # version - the version of this package # current_version - the current version of this port (frm Makefiles) # new_depends - dependencies as per Makefile # overlapsummary - description of strongest file overlap # dodelete - this package should be deleted # doupgrade - this package should be upgraded # comment - string to emit # possible_request- this package isn't requested but maybe it should be my %filesfrom; # for each file that any package has installed, a list of # ports that use it. mismatches are not included my %mismatch; # for each file that any package has installed, a list of # ports that use it. only mismatches included my %indexdata; # data from /usr/ports/INDEX # keyed by port directory name # fields: # dependencies - list of dependencies # namever - portname-portversion my %simplenames;# from /usr/ports/INDEX, a translation from # ports with unambigeous names to their # directories. my %requested; # tracking dependencies from the point-of-view # of the requested package. # keyed by portname-portversion my %drequest; # tracking dependencies from the point-of-view # of the requested package. # keyed by directory my %namever2dir;# from /usr/ports/INDEX, a translation from # portname-portversion to port directory my %versions; # for each port directory, a listing (by version) # of packages installed from that directory $DB_HASH->{cachesize} = $cachesize; lock($cksumdb); my %portdata; tie %portdata, 'DB_File', $cksumdb, O_RDWR|O_CREAT, 0640, $DB_HASH or die "cannot open $cksumdb: $!"; #unlink($fileorigindb); #my %fileorigin; #tie %fileorigin, 'DB_File', $fileorigindb, O_RDWR|O_CREAT, 0640, $DB_HASH # or die "cannot open $fileorigindb: $!"; synchronize_name_and_version(); for my $pkg (sort &read_dir($pkgdb)) { next unless $pkg =~ /$restrict/; next if $pkg =~ /^\./; next unless -d "$pkgdb/$pkg"; pass1($pkg); } for my $pkg (sort keys %pkginfo) { pass2($pkg); } for my $pkg (sort keys %pkginfo) { pass3($pkg); } my $trace_requests = -e $request_list; trace_requests(); for my $dir (sort keys %versions) { pass4($dir); } for my $pkg (sort keys %pkginfo) { pass5($pkg); } for my $pkg (sort keys %pkginfo) { passN($pkg); } exit 0; # # This is where we emit the shell script # %filesfrom & %mismatch are destroyed # my %upgrade_requested; sub passN { my ($p) = @_; my $pkg = $pkginfo{$p}; print "# -------------------------------- $p\n"; printf "# health: %d%%\n", $pkg->{health}; printf "# last used: %0.2f days ago\n", $pkg->{recent}; print "# overlaps: $pkg->{overlapsummary}\n" if $pkg->{overlapsummary}; if ($pkg->{version} eq $pkg->{current_version}) { print "# this is the current version\n"; } else { print "# current version: $pkg->{current_version}\n"; } print "# requested by $drequest{$pkg->{uorigin}}\n" if $trace_requests && $pkg->{uorigin} && $drequest{$pkg->{uorigin}}; print $pkg->{comment} if $pkg->{comment}; print "# origin: $pkg->{uorigin}\n"; if ($pkg->{dodelete}) { print "# DELETE\n"; my $buf; my $keeper; print "# deleting $p - $pkg->{dodelete}\n"; for my $file (@{$pkg->{files}}) { my @ff = grep($_ ne $p, @{$filesfrom{$file}}, @{$mismatch{$file}}); delete $mismatch{$file}; if (@ff) { # $buf .= sprintf "# %s still used in %d ports (eg %s)\n", # $file, scalar(@ff), $ff[0]; $filesfrom{$file} = \@ff; $keeper++; } else { $buf .= "rm '$file'\n" if -e $file; delete $filesfrom{$file}; } } if ($keeper) { print $buf; for my $pf (sort &read_dir("$pkgdb/$p")) { print "rm $pkgdb/$p/$pf\n"; } for my $dir (sort { length($b) <=> length($a) } @{$pkg->{directories}}) { print "rmdir '$dir'\n" if -d $dir; } print "rmdir $pkgdb/$p\n"; } else { print "# none of $p's files are used elsewhere...\n"; print "pkg_delete -f $p\n"; } } if ($pkg->{doupgrade}) { print "# UPGRADE\n"; if ($requested{$pkg->{namever}} ne $request_list) { print "# $p requested by $requested{$pkg->{namever}}\n"; if ($pkg->{uorigin} && ! $upgrade_requested{$pkg->{uorigin}}++) { print "echo '$request_prefix $pkg->{uorigin}' >> $request_list\n"; } } } if ($pkg->{possible_request} && ! $pkg->{doupgrade} && $pkg->{uorigin} && ! $upgrade_requested{$pkg->{uorigin}}) { print "echo '# $request_prefix $pkg->{uorigin}' >> $request_list\n"; } } # # Use the request tracing to set some packages up for deletion # and others up for upgrading. # sub pass5 { my ($p) = @_; my $pkg = $pkginfo{$p}; my $reqby = $requested{"$pkg->{name}-$pkg->{version}"} || $drequest{$pkg->{uorigin}} || $drequest{$pkg->{origin}}; if ($reqby) { $pkg->{requested_by} = $reqby; } elsif ($trace_requests) { print "# no request for $p\n"; if ($delete_nonrequested) { $pkg->{dodelete} = "no request" unless $pkg->{dodelete}; } $pkg->{possible_request} = 1; } } # # we attempt to figure out which of the non-current versions # of something that we should keep... # # Use the health! # # This is not perfect and suffers when the health is equal. A # dewey-comparison is needed in that situation. An exercise # for the reader! # sub pass4 { my ($dir) = @_; my (@v) = sort { $pkginfo{$versions{$dir}{$b}}{health} <=> $pkginfo{$versions{$dir}{$a}}{health} || $b <=> $a || $b cmp $a } keys %{$versions{$dir}}; return unless @v > 1; my $most = $pkginfo{$versions{$dir}{shift @v}} || die; if (grep($_ eq $most->{current_version}, @v)) { # the current version is installed, we're okay print "# current version from $dir is installed\n"; return; } $most->{comment} .= "# most healthy of peers\n"; for my $v (@v) { $pkginfo{$versions{$dir}{$v}}{dodelete} = "defer to $most->{version}"; } } # # Look to see if it's obvious that a package should be deleted # because the current version is installed and this version isn't # the current version... # # Sets $pkginfo{$package}{uorigin} # Sets $versions{$directory}{$version} = $package # sub pass3 { my ($p) = @_; my $pkg = $pkginfo{$p}; my ($name, $version, $origin, $current_version, $depends) = pkgname2name($p); $pkg->{name} = $name; $pkg->{version} = $version; $pkg->{mkorigin} = $origin; $pkg->{current_version} = $current_version; $pkg->{new_depends} = $depends; unless ($pkg->{mkorigin} || $pkg->{origin}) { print "# cannot find original port directory for $p\n"; $pkg->{dodelete} = "cannot find original port directory" unless $pkg->{dodelete}; } if ($pkg->{version} ne $pkg->{current_version}) { if ($pkginfo{"$pkg->{name}-$pkg->{current_version}"}) { print "# $pkg->{name}: $pkg->{version} installed, $pkg->{current_version} also installed\n"; $pkg->{dodelete} = "a more recent version is installed"; } elsif ($drequest{$p} && $trace_requests) { print "# $pkg->{name}: $pkg->{version} installed, $pkg->{current_version} available\n"; $pkg->{doupgrade} = "a more recent version is available"; } elsif ($trace_requests) { print "# $pkg->{name}: $pkg->{version} installed, $pkg->{current_version} available - NO REQUEST\n"; $pkg->{possible_request} = 1; } else { print "# $pkg->{name}: $pkg->{version} installed, $pkg->{current_version} available\n"; } } my $o = $pkg->{origin} || $pkg->{mkorigin}; if ($o) { $pkg->{uorigin} = $o; $versions{$o}{$pkg->{version}} = $p; } } # # Examins which packages overlap with each other. This isn't # really used. # # Sets $pkginfo{$package}{overlapsummary} # sub pass2 { my ($p) = @_; my $pkg = $pkginfo{$p}; my %overlapwith; # check for overlaps for my $file (@{$pkg->{files}}) { for my $port (@{$filesfrom{$file}}) { $overlapwith{$port}++; } } delete $overlapwith{$p}; my $myfiles = @{$pkg->{files}}; for my $port (sort { $overlapwith{$b} <=> $overlapwith{$a} } keys %overlapwith) { next if $overlapwith{$port} / $myfiles * 100 < 5; $pkg->{overlap}{$port} = $overlapwith{$port}; next if $pkg->{health} > $pkginfo{$port}{health}; $pkg->{overlapsummary} = sprintf "%d%% with %s (%d%%)", $overlapwith{$port} / $myfiles * 100, $port, $pkginfo{$port}{health}; printf "# %s (%d%%) overlaps %s\n", $p, $pkg->{health}, $pkg->{overlapsummary}; last; } } # # Read the +CONTENTS file from /var/db/pkg/$package. # # Sets most of %pkginfo. # sub pass1 { my ($p) = @_; my $cf = "$pkgdb/$p/+CONTENTS"; open(CONTENTS, "<$cf") or die "cannot open $cf: $!"; my $cwd = '/'; my $lastfile; my $name; my %checksum; my @files; my @pkgdep; my $pkgformatrev; my $origin; my @dirs; while () { chop; if (/^@/) { if (/^\@comment MD5:(\S+)/) { $checksum{$lastfile} = $1; } elsif (/^\@cwd (\S+)/) { $cwd = $1; } elsif (/^\@comment PKG_FORMAT_REVISION:(\S+)/) { $pkgformatrev = $1; } elsif (/^\@comment ORIGIN:(\S+)/) { $origin = $1; } elsif (/^\@comment/) { # ignore it } elsif (/^\@name (\S+)/) { $name = $1; } elsif (/^\@(?:un)?exec/) { # ignore it } elsif (/^\@(mode|owner|group)/) { # ignore it } elsif (/^\@ignore(?:_inst)?/) { # ignore it } elsif (/^\@mtree/) { # warn "found mtree directive in $cf"; } elsif (/^\@dirrm\s(\S+)/) { push(@dirs, $1); } elsif (/^\@display/) { # ignore it } elsif (/^\@pkgdep\s+(\S+)/) { push(@pkgdep, $1); } else { warn "unknown \@ directive: $_ in $cf"; } next; } my $file = "$cwd/$_"; push(@files, $file); $lastfile = $file; } my $match = 0; my $mismatch = 0; my $notchecked = 0; my $recent; for my $f (@files) { my $mm; $filesfrom{$f} = [] unless $filesfrom{$f}; $mismatch{$f} = [] unless $mismatch{$f}; if ($checksum{$f}) { my $md5 = checksum($f); if ($md5 eq $checksum{$f}) { $match++; push(@{$filesfrom{$f}}, $p); # $fileorigin{$f} .= " +$p"; } else { $mismatch++; $mm++; # $fileorigin{$f} .= " -$p"; push(@{$mismatch{$f}}, $p); } } else { $notchecked++; push(@{$filesfrom{$f}}, $p); # $fileorigin{$f} .= " =$p"; } if (-e $f && ! $mm) { my $a = -A $f; $recent = $a unless defined $recent; $recent = $a if $a < $recent; } } my $health = scalar(@files) ? 50 + 50 * ($match - $mismatch) / scalar(@files) : 100; $pkginfo{$p} = { checksums => \%checksum, origin => $origin, depends_on => \@pkgdep, directories => \@dirs, files => \@files, match => $match, mismatch => $mismatch, notchecked => $notchecked, health => $health, recent => $recent, }; if ($too_old && -M $cf > $too_old) { $pkginfo{$p}{dodelete} = sprintf "old age (%d days)", -M $cf; } printf "# $p scanned (%d%% healthy, used %0.2f days ago)\n", $health, $recent; } # # Return the checksum of a file on the system. Cache the # checksum so that later runs go faster. # # It pulls the entire file into memory. This could be faster # and use less memory if it didn't do that... # sub checksum { my ($file) = @_; if (-e $file) { my ($fatime, $fmtime) = (stat(_))[8,9]; if ($portdata{$file}) { my ($mtime, $md5) = split(' ', $portdata{$file}); return $md5 if $mtime eq $fmtime; } my $c = read_file($file); my $md5 = md5_hex($c); $portdata{$file} = "$fmtime $md5"; utime($fatime, $fmtime, $file); return $md5; } else { return "no such file"; } } # # If /usr/ports/INDEX has been modified since the last run, # re-read all the Makefiles to see what /usr/ports/INDEX # missed... # sub synchronize_name_and_version { read_index(); if ($portdata{'makefiles read'} && stat("$portsdir/INDEX") && $portdata{'makefiles read'} > (stat(_))[9] && ! $restrict) { # everything should be a-okay } else { read_makefiles(); } } # # Read /usr/ports/INDEX. # Also read /usr/ports/INDEX.missing - I don't know # why /usr/ports/INDEX isn't complete, but I'll work # around it :} # sub read_index { my $index = "$portsdir/INDEX"; open(INDEX, "<$index") or die "open $index: $!"; while () { index_line($_); } close(INDEX); my $index_extra = "$portsdir/INDEX.missing"; return unless -e "$portsdir/INDEX.missing"; open(INDEX, "<$index_extra") or die "open $index: $!"; while () { index_line($_); } close(INDEX); } # # Process a line from /usr/ports/INDEX # # Sets $simlenames{$portname} # Sets $indexdata{$directory} # Sets $namever2dir{$package} # sub index_line { my ($iline) = @_; my ($namever, $dir, $root, $desc, $descfile, $maintainer, $categories, $dependencies, $url) = split(/\|/, $iline); $dir =~ s,^\Q$portsdir\E/,,; return if $indexdata{$dir}; return unless -e "$portsdir/$dir/Makefile"; $namever2dir{$namever} = $dir; $indexdata{$dir} = { dependencies => [ split(' ', $dependencies) ], namever => $namever, }; if ($namever =~ m,^(.*)-(.*)$,) { my $simplename = $1; my $simpleversion = $2; $indexdata{$dir}{name} = $simplename; $indexdata{$dir}{version} = $simpleversion; if (exists $simplenames{$simplename}) { $simplenames{$simplename} = undef; } else { $simplenames{$simplename} = $dir; } } return 1; } # # Read the Makefiles to fill in some details... # Much of the time /usr/ports/INDEX has all the required # information, but sometimes it doesn't. Cache the times # it doesn't for later runs. # # Attempt to process the Makefile internally, but when that # fails, use make to process the Makefile :-) # # Pay special attention to the situation where PORTNAME # is duplicated between ports. # sub read_makefiles { open(IEXTRA, ">>$portsdir/INDEX.missing") || die; for my $category (sort { $a cmp $b } read_dir($portsdir)) { next unless -d "$portsdir/$category"; for my $port (sort { $a cmp $b } read_dir("$portsdir/$category")) { next unless $port =~ /$restrict/; my $dir = "$portsdir/$category/$port"; my $makefile = "$dir/Makefile"; next unless -e $makefile; if (! $indexdata{"$category/$port"}) { chdir($dir) || die "chdir $dir: $!"; my $indexline = `make describe`; if (read_index($indexline)) { print IEXTRA $indexline; print "# No index entry for $category/$port (corrected)\n"; } else { print "# No index entry for $category/$port (unable to correct)\n"; } } my $vars = read_makefile($makefile); my $portname = $vars->{PORTNAME}; 1 while ($portname =~ s/\$\{([^{}]+)\}/$vars->{$1}/); my $portversion = $vars->{PORTVERSION}; 1 while ($portversion =~ s/\$\{([^{}]+)\}/$vars->{$1}/); my $portrevision = $vars->{PORTREVISION}; 1 while ($portrevision =~ s/\$\{([^{}]+)\}/$vars->{$1}/); if ($vars->{BROKEN}) { print "# Port $category/$port is marked broken\n"; next; } unless ($portname) { chdir($dir) || die "chdir $dir: $!"; $portname = `make -V PORTNAME`; chop($portname); print "# No portname defined in $makefile\n" unless $portname; } unless ($portversion) { chdir($dir) || die "chdir $dir: $!"; $portversion = `make -V PORTVERSION`; chop($portversion); print "# No portversion defined in $makefile\n" unless $portversion; } unless ($portrevision) { chdir($dir) || die "chdir $dir: $!"; $portrevision = `make -V PORTREVISION`; chop($portrevision); } if ($portrevision) { $portrevision = "_$portrevision"; } else { $portrevision = ''; } # print "pn: $portname\n"; # print "pv: $portversion\n"; # print "pr: $portrevision\n"; next unless $portname; next unless $portversion; if ($simplenames{$portname} && $indexdata{$simplenames{$portname}}{simplename} eq $portname && $indexdata{$simplenames{$portname}}{simpleversion} eq $portname) { # hey! The INDEX file is enough. delete $portdata{$portname}; } elsif ($portdata{$portname}) { my ($d, $ver) = split(' ', $portdata{$portname}); if ($d eq 'multiple') { $portdata{"$category/$port"} = "$portversion$portrevision" unless $portdata{"$category/$port"} eq "$portversion$portrevision"; # print "Apd{$category/$port} = $portversion$portrevision\n"; } elsif ($d eq "$category/$port" || ! -e "$portsdir/$d/Makefile") { $portdata{$portname} = "$category/$port $portversion$portrevision"; # print "Bportdata{$portname} = $category/$port $portversion$portrevision\n"; } else { # now we've got an interesting situation.. # multiple ports claiming the same name. $portdata{$portname} = "multiple"; $portdata{$d} = $ver; $portdata{"$category/$port"} = "$portversion$portrevision"; # print "Cpd{$portname} = multiple\n"; # print "portdata{$d} = $ver\n"; # print "pd{$category/$port} = $portversion$portrevision\n"; } } else { $portdata{$portname} = "$category/$port $portversion$portrevision" unless $portdata{$portname} eq "$category/$port $portversion$portrevision" # print "Dportdata{$portname} = $category/$port $portversion$portrevision\n"; } } } close(IEXTRA); $portdata{"makefiles read"} = time; } # # Read a single makefile, seeting variables as you go. # This isn't really that accurate but it seems to work. # sub read_makefile { my ($makefile, $vars) = @_; $vars = {} unless $vars; $vars->{".CURDIR"} ||= dirname($makefile); chdir($vars->{".CURDIR"}); local(*MAKEFILE); open(MAKEFILE, "<$makefile") or warn "open $makefile: $!"; while() { if (/^\.include "(.*?)"/) { my $if = $1; 1 while ($if =~ s/\$\{([^{}]+)\}/$vars->{$1}/); read_makefile($if, $vars); next; } next unless /^(\w+)=\s*(\S+)/; $vars->{$1} = $2; } close(MAKEFILE); return $vars; } # # Read the list of requested ports # sub read_request_list { my %requested; return unless -e $request_list; local(*RL); open(RL, "<$request_list") or die "open $request_list: $!"; while() { s,^\Q$request_prefix\E,,o || next; s,^\s+,,; s,\s+$,,; $requested{$_} = $request_list; } close(RL); return %requested; } # # Look up the port name, port version, directory, and # dependencies from a package name ($portname-$portversion). # # Possible improvement: handle multiples where they've changed # directories. # sub pkgname2name { my ($p) = @_; my @name = split('-', $p); my @version = pop(@name); while (@name) { my $name = join('-', @name); if (exists $portdata{$name}) { my ($dir, $version) = split(' ',$portdata{$name}); if ($dir eq "multiple") { if ($pkginfo{$p} && $pkginfo{$p}{origin}) { $version = $portdata{$pkginfo{$p}{origin}}; return ($name, join('-', @version), $pkginfo{$p}{origin}, $version, $indexdata{$dir}{dependencies}); } elsif (exists $simplenames{$name}) { my $dir = $simplenames{$name}; return ( $name, join('-', @version), $dir, $indexdata{$dir}{version}, $indexdata{$dir}{dependencies}); } else { return ($name, "$name-?", "?", "?", $indexdata{$dir}{dependencies}); } } return ($name, join('-', @version), $dir, $version, $indexdata{$dir}{dependencies}); } elsif (exists $simplenames{$name}) { my $dir = $simplenames{$name}; return ( $name, join('-', @version), $dir, $indexdata{$dir}{version}, $indexdata{$dir}{dependencies}); } unshift(@version, pop(@name)); } return; } # # Trace dependencies. # # Sets $drequest{$directory} # Sets $requested{$portname-$portversion} # sub trace_requests { my @todo; %drequest = read_request_list(); my %done; my @todo; for my $request (keys %drequest) { if ($indexdata{$request}) { my $namever = $indexdata{$request}{namever}; $requested{$namever} = $request_list; for my $dep (@{$indexdata{$request}{dependencies}}) { next if $requested{$dep}; $requested{$dep} = $namever; push(@todo, $dep); } $done{$namever}++; } else { print "# $request: not a valid port\n"; } } while (@todo) { my $namever = shift(@todo); if ($namever =~ m,/usr/ports/([^/]+)/([^/]+)$,) { my $dir = "$1/$2"; my $nv = $indexdata{$dir}{namever}; $drequest{$dir} = $requested{$namever} || "?"; if ($nv) { $requested{$nv} = $requested{$namever}; delete $requested{$namever}; $namever = $nv; } else { print "# Could not translate $namever to a name-version\n"; } } next if $done{$namever}++; if ($namever2dir{$namever}) { $drequest{$namever2dir{$namever}} = "??" unless $drequest{$namever2dir{$namever}}; } else { print "# Could not translate $namever to a directory\n"; } if ($namever2dir{$namever} && $indexdata{$namever2dir{$namever}}) { for my $dep (@{$indexdata{$namever2dir{$namever}}{dependencies}}) { next if $requested{$dep}; $requested{$dep} = $namever; push(@todo, $dep); } } else { my ($name, $verreq, $dir, $curver, $deps) = pkgname2name($namever); print "# $requested{$namever} requests $namever but $name-$curver is current\n"; for my $dep (split(' ', $deps)) { next if $requested{$dep}; $requested{$dep} = $namever; push(@todo, $dep); } } } #for my $i (sort keys %requested) { # print "# requested $i - $requested{$i}\n"; #} #for my $i (sort keys %drequest) { # print "# drequest $i - $drequest{$i}\n"; #} }