use HTML::TreeBuilder::XPath;
use strict;
use warnings;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
my @myFileName=('');
$myFileName[0]="Kniro-Lippies V.6 Concept.JPG";
$myFileName[1]="Kniro concept thing.png";
$myFileName[2]="Kniro og.png";
... (redacted because there are a lot)
$myFileName[700]="Theta's redesign.jpg";
$myFileName[701]="THETA..jpg";
$myFileName[702]="Lippies Book 8 Page 10.jpg";
my $agentName="User:Nyro_the_Leopard (http://lippies.shoutwiki.com/wik
+i/User:Nyro_the_Leopard) grabbing some data using ExtractImages.pl";
my $browser = LWP::UserAgent->new();
$browser->timeout(500);
my $string='crappyfartsgohome/images/';
my $endString='"';
my $position=0;
my $endPosition=0;
#my $prefix='http://vignette.wikia.nocookie.net/crappyfartsgohome/imag
+es/;
my $prefix='';
my $delimiter="\n";
my $reject1='OKAY_I_SERIOUSLY_CANNOT.png);';
my $reject2='Yum_yum.jpg';
my $newArrayIndex=0;
for (my $count=0; $count<=417; $count++){
my $url="http://crappyfartsgohome.wikia.com/wiki/File:".$myFileNam
+e[$count];
my $request = HTTP::Request->new(GET => $url);
my $response = $browser->request($request);
if ($response->is_error()) {printf "%s\n", $response->status_line;
+}
my $contents = $response->content();
$position=index($contents,$string,0)+length($string);
$endPosition=index($contents,$endString,$position);
my $fileName=substr($contents,$position,$endPosition-$position);
if ($position!=-1 && $fileName ne $reject1 && $fileName ne $reject
+2){
#print $prefix.$fileName.$delimiter;
print '$myFileName['.$newArrayIndex.']="'.$fileName.'";'.$deli
+miter;
$newArrayIndex++;
}
}
for my $count ( 0 .. 417 ) {
my $url = "http://crappyfartsgohome.wikia.com/wiki/File:".$myFileN
+ame[$count];
my $request = HTTP::Request->new(GET => $url);
my $response = $browser->request($request);
if ($response->is_error()) {print $response->status_line, "\n";}
my $contents = $response->content();
my $tree = HTML::TreeBuilder::XPath->new;
$tree->parse($contents);
$tree->eof;
my @links = $tree->findnodes('//div[@class="fullImageLink"]/a');
my $image_link = $links[0];
my $image_url = $image_link->attr('href');
print "$myFileName[$count]\n$image_url\n\n";
next if $image_url =~ m/$reject1/;
next if $image_url =~ m/$reject2/;
# do stuff with image URL
}
I basically just took my code and replaced the "# ..." from tangent's code with it because I was guessing that's what I was supposed to do
Also if he meant the "next if $image_url =~ m/$reject1/;" was supposed to have the "OKAY_I_SERIOUSLY_CANNOT.png" thing replacing $reject1, I tried that (and replaced reject2 with the yum yum one) but it didn't work. |