# ----------
# aviManager
# ----------
#
# imdb grabber
#
# Author: thrawn,M.Riester
# Web : http://avimanager.sourceforge.net
#
# Copyright (c) 2002 thrawn ;-), M.Riester.
#
# This program is distributed under the terms of the GPL v2.
#
sub download_page
{
my ($url, $quiet) = @_;
if ($socks_use) {
$url =~ s{&+}{&}g;
$url =~ s{&}{\\&};
$url =~ s{[^\d\w.:/=?;]}{}g;
my $cmd = "curl --socks '$socks_url' \"$url\"";
print "socket use: $cmd
";
#TODO is this really secure?
my $contl = `$cmd`;
print mt("Error") . ": $!" if $?;
return $contl;
}
for ($i = 1; $i<6; $i++) {
$ua = LWP::UserAgent->new;
$ua->agent("Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.10) Gecko/20050825 Firefox/1.0.4 (Debian package 1.0.4-2sarge3)"); # pretend we are very capable browser from a very nice operating system
$ua->proxy('http', $proxy_url) if $proxy_use;
$req = HTTP::Request->new(GET => $url);
$req->header('Accept' => 'text/html');
# send request
$res = $ua->request($req);
$contl = $res->as_string;
# check the outcome
unless ($res->is_success) {
print "... ", mt("Error") . ": " . $res->status_line . "
\n";
$i = 5 if $res->status_line =~ /^404/;
if ($i == 5) {
print "Fatal Error: can't get ", $url;
exit;
} else {
print "\t", mt("Waiting 5 seconds for retry")," $i/5";
sleep 5;
}
} else {
print "... OK\t" unless $quiet;
last;
}
}
return $contl;
}
sub convertHTMLSpecialChars {
my $result = shift;
$result =~ s/(\d+);/chr($1)/eg;
return $result;
}
sub extract_imdb_title {
my @rat_a = grep /
/i) { $start = 1; $cnt++; my $i = $cnt - 1; $results .= "\nAlternate Plot $i:\n" if $i; } s/^.*
//gi;
$start = 0 if $start && /\A \z/xms;
$results .= $_ . "\n" if $start;
#$start = 0;
}
chomp($results);
return $results;
}
sub extract_imdb_plot_short {
my ( $cont ) = @_;
#print $cont;
my ( $rat_a ) = $cont =~ /Plot \s Outline.*?$(.*?)<\/div>/xms;
return $rat_a;
}
sub extract_imdb_rating {
my ( $cont ) = @_;
#my ( $rat_a ) = $cont =~ /User \s Rating:<\/h5>.*?(\d+\.\d)\/10<\/b>/xms;
my ( $rat_a ) = $cont =~ /Ratings:.*?an \s average \s vote \s of \s (\d+\.\d)\/10\"/xms;
return $rat_a;
}
sub extract_imdb_rating_cnt {
my @rat_a = grep /itemprop="ratingCount".*users/g, split("\n",shift);
($rat_a[0] ) =$rat_a[0]=~ /([\d,]+)<\/span> \s users/xms;
$rat_a[0]=~ s/,//xms;
if ($rat_a[0]>0) {
return $rat_a[0];
} else {
return "5";
}
}
sub extract_imdb_runtime {
my @run_a=grep / min/g,split("\n",shift);
foreach (@run_a) {
if (/^\d/) {
$run_a[0] = $_;
last;
}
}
$run_a[0]=~s/^\W*//g;
$run_a[0]=~s/^.*> //g;
$run_a[0]=~s/ min.?$//g;
$run_a[0]=~s/ min//g;
$run_a[0]=~s/ \/ / /g;
$run_a[0]=~s/ .*//g;
# figure out, how many runtimes are saved, so make new string with less
# informations
my $tmp = $run_a[0];
$tmp =~ s/\(.*?\)//g;
$tmp =~ s/\s+/ /g;
my @runtimes = split(" ", $tmp);
if ($#runtimes == 0) {
$run_a[0] =~ s/\(.+\)//g;
$run_a[0] =~ s/\D//g;
}
return $run_a[0];
}
sub extract_imdb_genres {
#my @gen_a=grep /Genres\//g,split(" ",shift);
my @gen_a=grep /itemprop\=\"genre\">/g,split(" ",shift);
$gen_a="";
foreach (@gen_a) {
s/itemprop\=\"genre\">//g;
s/<\/A>.*$//gi;
$gen_a.=$_." " unless (/CLASS/ || /IMDb/i);
}
if($gen_a eq "") {
$gen_a="";
}
return $gen_a;
}
sub extract_imdb_year {
my (@year_a,@year_b) = "";
@year_a=@year_b=grep /
";
my $contl = `$cmd`;
# print mt("Error") . ": $!" if $?;
return $contl;
}
my $ua = new LWP::UserAgent;
$ua->agent("Mozilla/8.0"); # pretend we are very capable browser
my $url = new HTTP::Request ('GET', $source);
if ( -e $dest ) {
# $url->header('If-Modified-Since' => HTTP::Date::time2str((stat($dest))[8]));
unlink($dest);
}
$ua->proxy('http', $proxy_url) if $proxy_use;
$ua->request($url, $dest);
}
sub extract_imdb_director_actors {
# first actor is always director";
my @ac_a=split("\n",shift);
# print join "\n", @ac_a;
my @act = ();
my $director_found = 0;
for my $i (0 .. $#ac_a) {
#if ($ac_a[$i]=~/Director\:/g && $director_found == 0) {
if ($ac_a[$i]=~/itemprop\=\"director\"/g && $director_found == 0) {
$act[0]=$ac_a[++$i];
$act[0]=~s/<\/A>.*$//gi;
$act[0]=~s/^.*\">//g;
$act[0] = convertHTMLSpecialChars($act[0]);
$act[0] =~ s/\s+$//;
$act[0] =~ s/^\s+//;
$act[0]=~s/>//g;
$director_found = 1;
}
#if ($ac_a[$i]=~/^>\w+/gi||$ac_a[$i]=~/Credited cast/gi ) {
if ($ac_a[$i]=~/meta\ name\=\"description\"/gi ) {
my @actors = split(/\,\ /i,$ac_a[$i]);
my $i = 1;
my $j = 0;
foreach (@actors) {
$j += 1;
#next if ($actors[$j]=~/tinyhead/g);
$act[$i]=$actors[$j];
#$act[$i]=~s/<.*>//gi;
$act[$i]=~s/^.*?\">//g;
$act[$i]=~s/.* \s With/\s With\,/g;
$act[$i]=~s/\.\ .*//g;
$act[$i]=~s/^\n//g;
$act[$i] = convertHTMLSpecialChars($act[$i]);
#$act[$i] =~ s/\s+$//;
#$act[$i] =~ s/^\s+//;
$i += 1;
last if $i > $grab_actors;
}
}
}
return @act;
}
sub extract_imdb_director_actors_url {
# first actor is always director;
my @ac_a=split("\n",shift);
my @act = ();
my $imdb_t=shift;
my $director_found = 0;
for($i=0;$i!=$#ac_a;$i++) {
#if ($ac_a[$i]=~/Director\:/g && $director_found == 0) {
if ($ac_a[$i]=~/Credited cast\:/g && $director_found == 0) {
$act[0]=$ac_a[++$i];
$act[0]=~s///g;
$director_found = 1;
}
if ($ac_a[$i]=~/Cast overview/gi||$ac_a[$i]=~/Credited cast/gi ) {
my @actors = split(/ $grab_actors;
}
}
}
return @act;
}
sub extract_imdb_headshot
{
my ($cont_t, $name, $name_url) = @_;
my @ima_a=grep /name=\"headshot\"/gi,split("Downloading Headshot
";
if ($ima_a[0]) {
download_image( $ima_a[0], $headshots_path . "/$name.jpg");
}
} else {
print "no photo submitted
";
}
return $ima_a[0];
}
sub extract_imdb_id {
my $cont_t=shift;
my @ima_a=grep /More at IMDb Pro/gi,split("\n",$cont_t);
foreach (@ima_a) {
return $1 if /http:\/\/pro\.imdb\.com\/title\/tt(\d+)\/"/gi;
}
@ima_a=grep /\"\/AddRecommendation/gi,split("\n",$cont_t);
foreach (@ima_a) {
return $1 if /a href=\"\/AddRecommendation\?const=(\d+)\">/gi;
}
@ima_a=grep /tt(\d+)\/ratings/i,split("\n",$cont_t);
foreach (@ima_a) {
return $1 if /tt(\d+)\/ratings/i;
}
return;
}
sub extract_imdb_search_results
{
my $cont_t=shift;
my $title = extract_imdb_title($cont_t);
my $r_tag_a = " " . mt("Add to requests") . "";
; my $debug = 1;
if ($title =~ /IMDb \s+ Title \s+ Search/gxi) {
#print header ; #. "DEBUG:\n $cont_t \n $title \n ENDDEBUG" if $debug;
my @tmp_results;
my @results;
while ( $cont_t =~ m{(.*?).*?(\d{4})}msg ) {
push @tmp_results, "<\/a> $2 ($3)${r_tag_a}$1&title=$2$r_tag_e";
}
my %delete_dup = ();
foreach (@tmp_results) {
push(@results,$_) if (/^\s*<\/a> $title" . $r_tag_a . $imdb_num . "&title=" . $title . $r_tag_e;
return $return;
}
}
1;