#!/usr/local/bin/perl

$VERSION = "1.12";

# CGI code.pl
# Version 1.11
# Part of "WWW Cyrillic Encoding Suite"
# Get docs and newest version from
#	http://www.neystadt.org/cyrillic/
#
# Copyright (c) 1997-98, John Neystadt <http://www.neystadt.org/john/>
# You may install this script on your web site for free
# To obtain permision for redistribution or any other usage
#	contact john@neystadt.org.
#
# Drop me a line if you deploy this script on your site.

# This script translates WEB pages from one Russian code to another.
# Developed by Leonid Neishtadt (http://www.neystadt.org/leonid/)
# e-mail: leonid@neystadt.org
#
# Currently the following codes are supported:
# DOS (alternate) code page CP866 (dos)
# Windows code page CP1251 (win).
# UNIX code KOI8-r (koi8 or nocs for supressing charset Metatag),
# ISO-8859-5 (iso),
# Macintosh (mac),
# Volapuk (transliteration) (vol) - only as output code.
# AUTO - auto selection of output encoding according to platform where browser 
#        runs (Windows, UNIX, MACINTOSH, OS/2)
#
# Usage: Copy this script into cgi-bin directory,
#        refer to it as ..../cgi-bin/code.pl/"tab"/"URL to be translated"
# where "tab" is one of the above encodings or 'rus' for displaying menu 
# with available codes.
# It is also can be coded as 'fromcode-tocode' for explicit definition of
# the original file encoding. 
# "URL" is absolute URL from the server root (Don't forget to set $path).
# or full URL like http://cnn.com. 
# All relative references from this page to other WEB pages will be also
# translated through the same code table (isn't supported yet for full URLs).
#
# Source encoding is taken from Metatag like:
# <META HTTP-EQUIV="Content-Type" CONTENT="text/plain; charset=win">
# The tag is changed during translation or deleted for 'vol' and 'nocs'.
# If the tag is absent default encoding is taken from variable $defcode.
#
# It is recommended that you put <META HTTP-EQUIV="Content-Type" ...> on all 
# your pages, and choose only destination encoding in urls. Do no worry for
# old buggy browsers which can't display correctly pages with this meta-tag
# NOCS encoding converts page to koi8 and deletes the meta-tag
#
# READABLE URLS
# -------------
# If you use APPACHE you can add the lines similar to those to your webserver 
# configuration files:
#
# ScriptAlias /koi8       /home/www/neystadt/cgi-bin/code.pl/koi8
# ScriptAlias /win        /home/www/neystadt/cgi-bin/code.pl/win
# ScriptAlias /dos        /home/www/neystadt/cgi-bin/code.pl/dos
# ScriptAlias /mac        /home/www/neystadt/cgi-bin/code.pl/mac
# ScriptAlias /iso        /home/www/neystadt/cgi-bin/code.pl/iso
# ScriptAlias /vol        /home/www/neystadt/cgi-bin/code.pl/vol
# ScriptAlias /lat        /home/www/neystadt/cgi-bin/code.pl/vol
# ScriptAlias /nocs       /home/www/neystadt/cgi-bin/code.pl/nocs
#
# From now you will be able to translate urls like http://www.neystadt.org/russia/
# simply by prefixing the url with encoding: http://www.neystadt.org/koi8/russia/ 
# or http://www.neystadt.org/lat/russia/.
# 
# Note that code.pl automatically finds index.html if directory names is given 
# (like in example above). The index file name can be changed by $IndexFileName
# parameters in the script.


=head1 NAME

code.pl - CGI script to convert on-the-fly html pages across cyrillic charsets

=cut

use Convert::Cyrillic;
use LWP::UserAgent;
use HTTP::Headers::UserAgent;

$path="..";     # <==== path from cgi-bin to the server root.
$defcode="WIN"; # <==== default source encoding
$maxsize=500000; # maximum file size
$IndexFileName = 'index.html';
$UserAgent=$ENV{HTTP_USER_AGENT};
$scrname=$ENV{SCRIPT_NAME};
$file=$ENV{PATH_INFO};
$file=~s/^$scrname//;
$file=~s/\+/ /go;
$file=~s/%(..)/pack("c",hex($1))/ge;
if ($file=~/[\.\/\\]([^\.\/\\]+)$/o) {$ext=lc($1);} else {$ext='html';}
$file=~s%^\/([^\/]*)%%o;
$lang=uc($+);
if ($lang eq 'RUS') {
	print "Content-type: text/html\n
	<html><body><h3>Select Russian encoding:</h3>
	<ul>
	<li><a href=\"$scrname/koi8$file\">KOI8-r</a> ���������
	<li><a href=\"$scrname/win$file\">CP1251</a> MS-Windows ���������
	<li><a href=\"$scrname/iso$file\">ISO-8859-5</a> ���������
	<li><a href=\"$scrname/dos$file\">CP866</a> DOS (alternative) ����a����
	<li><a href=\"$scrname/mac$file\">MAC</a> Macintosh
	<li><a href=\"$scrname/vol$file\">volapuk</a> transliteraciya
	<li><a href=\"$scrname/nocs$file\">KOI8-r without Metatag</a> ���������
	</ul></body></html>";
	goto end;
}
if ($lang=~/(.*)-(.*)/o) { $charset=$1; $lang=$2; }
if (!(',ISO,KOI8,KOI,DOS,WIN,VOL,MAC,NOCS,AUTO,' =~ /,$lang,/i)) {
	$err = "Unsupported code - $lang"; 
	goto error;
}

$file =~ s|http:/([^/])|http://$1|oi; # Some vers of Ms-IIS merge '//' into '/' in Urls

if ($file =~ s|^/(http://)|$1|oi) {
	$url=$ENV {'QUERY_STRING'}; 
	if ($url) { $url= "?" . $url; }
	$url = $file . $url;

	my $ua = new LWP::UserAgent;
	$ua->agent("code.pl/1.2 " . $ua->agent);
	$ua->from ('leonid@neystadt.org');
	
	my $req = new HTTP::Request (GET => $url);
	my $res = $ua->request ($req);

	if (!$res->is_success) {
		my $err = $res->error_as_HTML();
		print <<"EOF";
Content-Type: text/html

<h1>Failure</h1>
Failed to retrive url: <b>$url</b>.
Remote server returned the following reponse:
<hr>
$err
EOF
		goto end;
	}

	$type = $res->content_type;
	$buffer = $res->content;

	#neystadt::http_rtr::Http_Retrieve ($url, $buffer, $hdrs);
	#$hdrs=~/Content-Type: (.*)\n/io; $type = $1;
} else {
	if ($file=~/cgi-bin/io) {
		$err = "Incorrect file name"; 
		goto error;
	}

	$file = "$path$file";
	if (-d $file) {
		$file = "$file/$IndexFileName"; 
		$ext = 'htm';
	}
	if (open In,"$file") {
		binmode In; read (In, $buffer, $maxsize); close In;
	} else {
		print "Content-type: text/html

<title>HTTP Error</title><h2>Error: 404 Not Found</h2>
<HR>
The requested URI $file does not exist.
<HR>";
		goto end;
	}
}

if ($lang=~/auto/io){
	$platform = HTTP::Headers::UserAgent::GetPlatform ($UserAgent);
	$lang='koi';
	$lang='win' if $platform=~/WIN/io;
	$lang='mac' if $platform eq 'MAC';
	$lang='koi' if $platform eq 'UNIX';
	$lang='dos' if $platform eq 'OS2';
	$lang='nocs' if $platform eq 'Linux';
}

$newcharset = "koi8-r" if $lang=~/koi|nocs/io; 
$newcharset = "windows-1251" if $lang=~/win/io; 
$newcharset = "x-mac-cyrillic" if $lang=~/mac/io; 
$newcharset = "ibm866" if $lang=~/dos/io;
$newcharset = "ISO-8859-5" if $lang=~/iso/io;

if ($buffer=~s/<\s*META\s+HTTP-EQUIV\s*=\s*"?Content-Type"?\s+CONTENT\s*=\s*"?(.*);\s+charset\s*=\s*(.*)"?\s*>/<META HTTP-EQUIV="Content-Type" CONTENT="$1; charset=$newcharset">/io) {
	$type=$1; $charset=$2 if !$charset;
	if ($lang=~/nocs|vol/io){
		$buffer=~s/<\s*META\s+HTTP-EQUIV\s*=\s*"?Content-Type"?\s+CONTENT\s*=\s*"?(.*);\s+charset\s*=\s*(.*)"?\s*>//io;
	}
}
else {
	$type="text/html"  if $ext eq 'html' || $ext eq 'htm';
	$type="text/plain"  if $ext eq 'txt';
	$type="image/gif"  if $ext eq 'gif';
	$type="image/jpeg" if $ext eq 'jpg' || $ext eq 'jpeg';
}

$lang="koi8" if $lang=~/nocs/io;
$type="text/html" if  !$type;
$slang=$defcode;
$slang="KOI8" if $charset=~/koi/io;
$slang="WIN" if $charset=~/1251/io;
$slang="ISO" if $charset=~/iso/io;
$slang="DOS" if $charset=~/alt/io;
$slang="MAC" if $charset=~/mac/io;

# translate the page
$buffer = Convert::Cyrillic::cstocs ($slang,$lang,$buffer)
	if $type =~ /text/o; 

if ($hdrs) {
	binmode STDOUT; 
	print $hdrs;
} else {
	print("Content-type: $type\n\n");
	binmode STDOUT; 
}

print $buffer;
goto end;
error:
	ermsg($err);
end:;

sub ermsg {
	if (!$sw) {$sw=1; print "Content-type: text/plain\n\n";}
	print "@_[0]\n";
}

__END__

=head1 DESCRIPTION

See the comments on the top of the script.

=head1 PREREQUISITES

This script requires the C<LWP>, C<Convert::Cyrillic> and C<HTTP::Headers::UserAgent> 
modules available from CPAN.

=pod OSNAMES

any

=pod SCRIPT CATEGORIES

CGI

=cut