use strict; use Data::Dumper; # Dump the raw data use URI; use CGI; use LWP::Simple; # Fetch the page itself use LWP::UserAgent; # Create a proper User-Agent header use HTML::TreeBuilder; # Find the attributes of the tag my $cgi = CGI->new(); my $ua = LWP::UserAgent->new; # $ua->agent('pps Plucker Perl Spider, v0.1.83 [comics]'); $ua->agent('Opera/7.54 (Windows NT 5.0; U) [de]'); my $page = "http://www.ucomics.com/"; my $response = $ua->request(HTTP::Request->new(GET => "$page")); my $root = HTML::TreeBuilder->new_from_content($response->content); my (%images, %strips, %stripname) = (); foreach my $node ($root->find_by_tag_name('option')) { # Only add the non-empty elements in $strips{$node->attr('value')}++ if ($node->attr('value')); } # print Dumper(%strips); foreach my $comic (sort keys %strips) { push my @comics, $comic; foreach my $strip (@comics) { fetch_comic($strip); } } sub fetch_comic { my $strip = shift; # printf "Sleeping for: %s seconds..", sleep int(rand(3) + 5); # print "Requesting $strip\n"; my $response = $ua->request(HTTP::Request->new(GET => "$strip")); my $content = $response->content; my $root = HTML::TreeBuilder->new_from_content($content); my %images = (); foreach my $node ($root->find_by_tag_name('img')) { $images{$node->attr('src')}++ } my @stripname = $root->look_down(_tag => 'font', class => 'comictitle'); # Debug for now foreach my $foo (@stripname) { printf "DEBUG: %s\n\n", $foo->as_text; } my $title = $root->look_down('_tag', 'title')->as_text; foreach my $comic (sort keys %images) { print "$title, $comic\n"if $comic =~ m|/comics/|; } }