#!/usr/bin/perl -w
# amarank -- rank amazon books
# tchrist@perl.com 
# version 1.1, Mon Aug 31 06:48:06 MDT 1998

use strict;

use Getopt::Std;
use Text::Wrap qw($columns &wrap);
use HTML::FormatText;
use HTML::Parse;
use HTML::TreeBuilder;
use HTTP::Request;
use HTTP::Request::Common qw(GET POST);
use HTTP::Response;
use LWP::UserAgent;
use URI::URL;

$columns = 75;

sub UNKNOWN { 999999999 }

sub usage {
    print STDERR "$0: @_\n" if @_;
    die <<EO_USAGE
usage: $0 [-s] [-t] [-a] [-v] [-r] [-w columns] [-n count] title
   -s	    search on subject
   -t	    search on title [DEFAULT]
   -a	    search on author
   -r	    raw output only, don't rand first (faster output)
   -n num   show only the first n books
   -w cols  wrap at this many columns (DEFAULT: $columns)
   -v 	    debugging
EO_USAGE
}

$| = 1;

my(
    $browser,		# the virtual browser we'll use as user agent
    $curreq,		# the current HTTP request object
    $response,          # the current HTTP response object

    $content,		# raw HTML of 
    $which_search,       # subject, author, or title
    $form,
    $formatter,
    $his_base,
    $hitlist,
    $bookcount,
    $next_screen,
    $search_string,
    $url,

    %opts,

    %Desc,
    %ISBN_Rank,
    %ISBN_Title,
    %Seen_ISBN,
);


sub fdie   { die sprintf @_ }
sub dprint { print STDERR @_ if $opts{'v'} } 

@opts{qw!s t a!} = (0,0,0);
getopts("rstavn:w:", \%opts) || usage("argument error");

if ($opts{"w"}) {
    $columns = $opts{"w"};
} 


if ( $opts{"s"} + $opts{"t"} + $opts{"a"} > 1 ) {
    usage("only one of opts s, t, and s allowed.");
} 

$which_search = $opts{"t"} ?  'title'
	      : $opts{"s"} ?  'subject'
	      : $opts{"a"} ?  'author'
	      :               'title';

$search_string = "@ARGV" || "perl";

$url = "http://www.amazon.com/";

$browser = LWP::UserAgent->new();
$browser->agent("amarank/1.0");    

$curreq = GET($url);
$curreq->referer("http://wizard.yellowbrick.oz");  


if (($response = $browser->request($curreq))->is_error()) {
    fdie "Failed to lookup $url: %s\n", $response->status_line;
}

$his_base = $response->base;

dprint "His base is $his_base\n";

unless ($response->content =~ /Full search: <a href\s*=\s*"([^"]*)">/) {
    die "couldn't find full search\n";
}
$url = url($1, $his_base);
dprint "New url is $url\n";

$curreq = GET($url);
$curreq->referer($his_base->as_string);

if (($response = $browser->request($curreq))->is_error()) {
    fdie "Failed to lookup $url: %s\n", $response->status_line;
}

$his_base = $response->base;
dprint "base is $his_base";

die "no title search: $content" unless $response->content() =~ 
    m#Enter\s*Author.*?Title.*?(<form[^>]*?action\s*=\s*"([^"]+)".*?)</form>#is;

$form   = $1;
$url = url($2, $his_base);

dprint "Search is at $url\n";

$curreq = POST $url, [ 
    "author" 	    => $which_search eq 'author'  && $search_string,
    "author-mode"   => "full",

    "title" 	    => $which_search eq 'title'   && $search_string,
    "title-mode"    => "word",

    "subject" 	    => $which_search eq 'subject' && $search_string,
    "subject-mode"  => "word",

    "submit" => "Search Now",
];

$curreq->referer($his_base->as_string);


if (($response = $browser->request($curreq))->is_error()) {
    fdie "Failed to lookup $url: %s\n", $response->status_line;
}

$hitlist = $response->content();
dprint "base is $his_base";

while ($hitlist =~ m{
			<b> \s* 
			<a \s+ href \s* = \s*
			    " (/exec/obidos/ASIN/(\d+))/[^"]+" \s* >
			(.*?)
			</a></b>(.*)
		    }xig ) 
{
    my($bookurl, $isbn, $title, $text) = ($1,$2,$3,$4);

    next if $Seen_ISBN{$isbn}++;  # top few are dups

    $bookcount++;
    last if $opts{'n'} && $bookcount > $opts{'n'};

    for ($title) {
	s/&amp;/&/g;
	s/&lt;/</g;
	s/&gt;/>/g;
	s/&quot;/"/g;
    } 

    $ISBN_Title{$isbn} = $title;
    print STDERR "[ISBN $isbn: $title]\n" if $opts{'v'} || !$opts{'r'};

    $url = url($bookurl, $his_base)->abs . "/t";
    $curreq = GET $url;
    $response = $browser->request($curreq);
    if ($response->is_error()) {
	printf "Failed to lookup $url: %s\n", $response->status_line;
	exit(1);
    }

    my $data = $response->content;
    $data =~ s/<\/?t[rhd].*?>//isg;   # they have bad html in here
    my $html = parse_html($data);
    my $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 500);
    my $ascii = $formatter->format($html);
    for ($ascii) { 
	my($rank) = /Amazon\.com\s+Sales\s+Rank:\s*([\d,]+)/;
	($ISBN_Rank{$isbn} = $rank || UNKNOWN) =~ s/,//g;

	s/.*\|\s*\n//s;
	s/\[(TABLE|FORM) NOT SHOWN\]//gs;
	s/Learn more about.*?ordering//si;
	s/-----.*$//s;
	s/\r//g;
	s/^ +$//g;
	s/^Our Price.*//m;
	s/^You Save.*//m;
	s/(?=List Price)/\n/;
	s/\n{2,}/\n/g;
	unless ($opts{'r'}) {
	    $Desc{$isbn} = $_;
	} else { 
	    print map { wrap("", "    ", $_) . "\n" } split /\n/;
	    print "\n";
	}
    } 

} 

exit if $opts{'r'};

for my $isbn ( sort { $ISBN_Rank{$a} <=>  $ISBN_Rank{$b} } keys %ISBN_Rank ) {
    $_ = $Desc{$isbn};
    s/(Amazon\.com\s+Sales\s+Rank:.*\n)//;
    print "Rank: ", ($ISBN_Rank{$isbn} == UNKNOWN)
			? "Unknown"
			: commify($ISBN_Rank{$isbn}),
	    "\n";
    print map { wrap("", "    ", $_) . "\n" } split /\n/;
    print "\n";

} 


sub commify {
    my $text = reverse $_[0];
    $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
    return scalar reverse $text;
}