#!/usr/bin/perl
# Copyright 2003-2007 Vlado Keselj http://www.cs.dal.ca/~vlado

sub help { print <<"#EOT" }
# Find equal files in a directory tree, version $VERSION
#
# Relies on diff.
#
# Usage: find-equal-files [switches] [directories]
#  -n  find equal files even if they have different names
#  -i  report equal files as found, beside the final report
#  -h  Print help and exit.
#  -r pathp Remove redundant files, not with path prefix pathp
#      Removes only if there is at least one file not ~pathp
#  -t  Test mode, used with -r, but only report what would be
#      removed instad of removing it.
#  -v  Print version of the program and exit.
#EOT

use strict;
use vars qw( $VERSION %Tab );
$VERSION = sprintf "%d.%d", q$Revision: 1.7 $ =~ /(\d+)/g;

use Getopt::Std;
use vars qw($opt_v $opt_h $opt_n $opt_i $opt_r $opt_t);
getopts("vhnitr:");

if ($opt_v) { print "$VERSION\n"; exit; }
elsif ($opt_h || !@ARGV) { &help(); exit; }

$| = 1;
&find_equal_files(@ARGV);

print "FINAL REPORT:\n";
foreach my $k (keys %Tab) {
    foreach my $e (@{ $Tab{$k} }) {
	next unless @{ $e->{otherfiles} };
	print "equal files: $e->{file0}\n";
	foreach my $f (@{ $e->{otherfiles} }) { print "        and: $f\n" }
    }
}

if (length($opt_r) > 0) { &remove_files() }

sub remove_files {
    print "REMOVING FILES with prefix \"$opt_r\"";
    print " (TEST MODE)" if $opt_t; print ":\n";
    foreach my $k (keys %Tab) {
	foreach my $e (@{ $Tab{$k} }) {
	    my @files = ($e->{file0}, @{ $e->{otherfiles} } );
	    next unless @files > 1;
	    my @fileswithp = grep { index($_,$opt_r)==0 } @files;
	    my @filesnop   = grep { index($_,$opt_r)!=0 } @files;
	    next if @fileswithp == 0 or @filesnop == 0;
	    print "Keep: @filesnop\n";
	    foreach my $f (@fileswithp) {
		if ($opt_t) { print " TO RM   $f\n" }
		else { print "   Removing $f\n"; unlink($f); }
	    }
	}
    }
}

sub find_equal_files {
    while ($#_ > -1) {
	my $dir = shift;

	next if -l $dir || !-e $dir; # symbolic link or does not exist: ignore it

	if (not -d $dir) {	                  # a file
	    my $size = ((stat $dir)[7]);
	    my $basename = $dir;
	    if ($dir =~ /\/([^\/]+)$/) { $basename = $1 }
	    my $key = $opt_n ? $size : "$basename $size";

	    if (exists $Tab{$key}) {              # Could be equal
		local $_;
		foreach ( @{ $Tab{$key} } ) {
		    local(*SAVEOUT, *SAVEERR); # temporarily redirect STDOUT
		    open(SAVEOUT, ">&STDOUT");
		    open(SAVEERR, ">&STDERR");
		    open(STDOUT, ">/dev/null") ||
			die "Can't redirect stdout to /dev/null";
		    open(STDERR, ">/dev/null") ||
			die "Can't redirect stdout to /dev/null";

		    my $r = system('diff', $_->{file0}, $dir) / 256;

		    close(STDERR); open(STDERR, ">&SAVEERR");
		    close(STDOUT); open(STDOUT, ">&SAVEOUT");

		    if ($r == 0) {
			push @{ $_->{otherfiles} }, $dir;
			if ($opt_i)
			{ print "equal files:$_->{file0}\n        and:$dir\n" }
			goto FOUND_SAME;
		    }
		}
		push @{ $Tab{$key} }, { file0=>$dir, otherfiles=>[] };
	      FOUND_SAME:
	    }
	    else { $Tab{$key} = [ { file0=>$dir, otherfiles=>[] } ] }
	    next;
	}
	
	local ($_, *DIR); 	                  # recursively enter directory
	opendir(DIR, $dir) || die "can't opendir $dir: $!";
	map { /^\.\.?$/ ? '' : (&find_equal_files("$dir/$_")) } readdir(DIR);
	closedir(DIR);
    }
}