# $Header: /monk/rcs/parts/sccs/simplify.pl,v 9.1.1.2 1992/07/03 00:28:13 muir Exp muir $

#############################################################################
#
#  Copyright (c) 1992 Comdisco Systems Inc.
#  All rights reserved.
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions
#  are met:
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#  3. All advertising materials mentioning features or use of this software
#     must display the following acknowledgement:
#       This product includes software developed by the Comdisco Systems Inc.
#  4. The name of Comdisco may not be used to endorse or promote products
#     derived from this software without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COMDISCO SYSTEMS INC ``AS IS'' AND
#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED.  IN NO EVENT SHALL COMDISCO SYSTEMS INC BE LIABLE
#  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
#  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
#  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
#  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
#  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
#  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#  SUCH DAMAGE.
#
# This copyright notice derrived from material copyrighted by the Regents
# of the University of California.
#
#############################################################################


#
# path simplifcation routines.
#
# A "cononical form" of a path is one which has no extra characters
# and traverses no links.
#
# &different_places(path1,path2) 	returns true if path1 and path2
#					lead to the same place
# &simplify'absolutify(path)		(apollo only) returns path with
#					//hostname added if not already
#					present
# &simplify_path(paths)			returns canonical form of paths
# &simplify(paths)			modifies paths to remove extra
#					/, /., and */.. sequences
# &trace_links(paths)			returns paths after expanding
#					all symlinks
# &relative_link(path_from,path_to)	returns the cononical relative path
#					(suitable for making a link)
#					from path_from to path_to.

# external resources required:
#	
# $main'hostname is required by &absolutify
# $main'cwd is required by &trace_links
# 
# on apollo domain os systems, set:
# 	$os'slashslash = '/',
#	$on_apollo = 1,
#	$os'varsym = 1
#	


#
# The array %simplify'ignore_link is used to specify that certain links
# should *not* be followed.  Its use is somewhat dangerous.
#

package main;

CONFIG: {
	print "! simplify.pl\n" if $main'vload;
	print $main'countdown--,' ' if $main'countdown;
	require "os.pl";
}

#

sub different_places
{
	local($a,$b) = @_;
	($a,$b) = &simplify_path($a,$b);
	if ($os'slashslash) {
		if ($a =~ m,^//, || $b =~ m,^//,) {
			$a = &simplify'absolutify($a);
			$b = &simplify'absolutify($b);
		}
	}
	return $a cmp $b;
}

sub simplify'absolutify 
{
	local($p) = @_;

	return $p unless $os'slashslash;
	return $p if $p =~ m,^//,;
	return "//$main'hostname$p" if $p =~ m,^/,;
	return &absolutify("$main'cwd/$p");
}

sub simplify_path
{
	local(@l) = @_;

	@l = &trace_links(@l);
	&simplify(@l);
	return @l;
}

sub simplify 
{
	local(@comps,$comp);
	local(@new,@pre);
	local($s,$ss);
	local($leadingSlash);
	local($leadingSlashSlash);
	local($trailingSlash);
	for $_ (@_) {
		next unless $_;

		print "simplify $_:" if $simplify'debug;

		$trailingSlash = m=[^/].*/$=;
		$leadingSlash = m=^/=;
		$leadingSlashSlash = ($os'slashslash && m=^//=);

		@comps = (split(/\/+/,$_));
		shift(@comps) if $leadingSlash;

		@new = ();
		@pre = ();

		while ($comp = shift(@comps)) {
			next if $comp eq '.';
			if ($comp eq '..') {
				if (@new) {
					pop(@new);
				} elsif ($os'slashslash && $leadingSlash && !$leadingSlashSlash) {
					$leadingSlashSlash = 1;
				} elsif (! $leadingSlash) {
					push(@pre,$comp);
				}
				next;
			}
			push(@new,$comp);
			next;
		}

		$_ = ($leadingSlash ? "/" : "") . ($leadingSlashSlash ? "/" : "") . join('/',@pre,@new);
		$_ = '.' unless $_;

		print "$_\n" if $simplify'debug;
	}
}

#&testsimplify();
#sub testsimplify
#{	
#	local($k);
#	for $t (
#	    "model::a/c",
#		"a/./c", "a/c/.", "a/b/../c", "a/c/d/..", "a/c/d.new/..", "a/b/e/./../../c",
#		"a/c/d/e/./../..", "a/b.new/../c", "a/b/e/../.././c", "a/c/d/e/../../.",
#	        "a//c/d/e/./..//..", "a/b/e//../.././c", "a/c///d/e//../../.",
#	    "model:/a/c",
#		"/./a/d/../c",
#		"//./a/d/../c", "/../a/d/../c",
#	    "model:/",
#	        "/", "/.", "/foo/..",
#	    "model://",
#	        "//", 
#		"//.", "/..", "//..", "///",
#	    "model:a",
#	    	"a", "a/.",
#		"a/..", 
#	    "model:../../a/b",
#		"x/../y/../z/../../../a/x/../b/.", 
#	    "specials",
#		"..", ".", "../..",
#		"") {
#		$k = $t;
#		&simplify($k);
#		print "\t\t\t\t$t:".(" "x(20-length($t)))."$k\n";
#	}
#	exit(2);
#}


#
# Trace links does what the unix kernel does as it
# follows links.  It will return a path to the actual
# file that the links point to.
#

sub trace_links
{
	local(@l) = @_;
	local($e,$x);
	local(@r);
	local($last);
	local($tail);

	$last = shift(@l) if $l[0] eq '-skipLast';

	for $l (@l) {
		if ($l !~ m,^/,) {
			if ($main'cwd =~ m,[^/],) {
			    $l = "$main'cwd/$l";	# cwd must already be set
			} else {
			    $l = "$main'cwd$l";
			}
		}


		if ($l !~ m,^/, && $main'cwd =~ m,[^/],) {
			$l = "$main'cwd/$l";		# cwd must already be set
		}

		if ($os'slashslash) {
			while ($l =~ s!///!//!g) {;}
			if ($l eq '//' || $l eq '/') {
				push(@r,$l);
				next;
			}
		} else {
			while ($l =~ s!//!/!g) {;}
			if ($l eq '/') {
				push(@r,$l);
				next;
			}
		}

		print "TRACE: =$l=\n" if $simplify'debug;
		print "TRACE: =$l=\n" if $simplify'debug;

		if ($last) {
			if ($l =~ s,(/[^/]+)$,,) {	# $l:t
				$tail = $1;
			} else {
				warn("could not grab tail from $l");
			}
		} elsif ($l =~ s,(..)/$,\1,) {		# remove trailing /
			$tail = '/';
		}
		$x = "";
		$e = $l;
		while ($e ne '') {
			print "T1 =$x=$e=\n" if $simplify'debug;
			#
			# add a component to $x
			#
			if ($e =~ m,^(//?[^/]+)(/.*)?$,) {    # /X
				$x = "/." if ($x eq "/" && $os'slashslash);
				$x .= $1;
				$e = $2;
			} else {
				die "malformed path ($l: $x, $e)!";
			}
			print "T2: =$x=$e=\n" if $simplify'debug;
			&simplify($x);
			print "T3: =$x=$e=\n" if $simplify'debug;

			while (-l $x && !defined($simplify'ignore_link{$x})) {
				local($nl);
				($nl = readlink($x)) 
					|| do {
						die "could not read link $x: $!";
						last;
					};

				#
				# apollo soft-links can have environment 
				# variables in them!!!  
				#
				print "TW0: =$x=$e=$nl=\n" if $simplify'debug;
				if ($os'varsyms) {
					$on_apollo while $nl =~ s,\$\(([^)]+)\),defined($ENV{$1}) ? $ENV{$1} : '',e;
				}
				#
				# if the new link contains multiple parts, 
				# put most of it into $e, and just the
				# first part into $x.
				#
				print "TW1: =$x=$e=$nl=\n" if $simplify'debug;
				if ($nl =~ m,^(/?/?[^/]+)(/.+)$,) {
					$nl = $1;
					$e = $2.$e;
				}
				#
				# relative link?
				#
				print "TW2: =$x=$e=$nl=\n" if $simplify'debug;
				if ($nl =~ m,^/,) {
					$x = $nl;
				} else {
					$x =~ s,(/[^/]+)$,/$nl,
						|| die "internal error: $x, $nl";
				}
				print "TW3: =$x=$e=$nl=\n" if $simplify'debug;
			}
			&simplify($x);
			print "T4: =$x=$e=\n" if $simplify'debug;
		}
		print "TP: =$x=$tail=\n" if $simplify'debug;
		push(@r,$x.$tail);
	}
	return @r;
}

#
# /a/b -> /b/c => a/b -> ../b/c
#
sub relative_link
{
	local($from,$to) = @_;
	local($a,$b);

	die "bad path: $from" unless $from =~ m!^/!;
	die "bad path: $to" unless $to =~ m!^/!;

#print "Starting: $from, $to.\n";

	if ($os'slashslash) {
		#
		# make sure both or neither start with //
		#
		if ($from =~ m!^//! || $to =~ m!^//!) {
			$from = &simplify'absolutify($from);
			$to = &simplify'absolutify($to);
		}
	} else {
		while ($to =~ s!^//!/!g) { ; };
	}
	local($fromdir) = &simplify_path(&dirname($from));
	$fromdir .= '/' unless $fromdir =~ m!/$!;
	$from = $fromdir . &basename($from);

	#
	# /a/b/c/d
	# /a/x/y/w
	#
	for (;;) {
		$from =~ m!^(/+|[^/]+)!;
		$a = $1;
		$to =~ m!^(/+|[^/]+)!;
		$b = $1;
#print "f $from t $to.\n";
#print "a $a b $b.\n";
		last unless $a;
		last unless $a eq $b;
		$from = substr($from,length($a),length($from));
		$to = substr($to,length($b),length($to));
	}
	#
	# b/c/d
	# x/y/w
	#
	#
#print "f $from t $to.\n";
	$from = &dirname($from);
	return $to if $from eq '.';
	#
	# x/y
	#
	local(@f) = split('/',$from);
	for $i (@f) {
		$i = "..";
	}
	# ../..
	#
	local($j) = join('/',@f);
	local($fp);
	if ($j) { 
		$fp = "$j/$to";
	} else {
		$fp = $to;
	}
#print "final path = $fp, join = $j\n";

	return $fp;
}

1;