while( my $url = shift @urls)
{
print "URL is $url\n";
my $request = HTTP::Request->new(GET => $url);
my $parser = HTML::Parser->new(api_version => 3);
$parser->handler(start => \&start,'self,tagname,attr');
my $response = $browser->request($request);
if ($response->is_success)
{
print $response->content();
$parser->{base} ||= $response->base;
$parser->{browser} ||= $browser;
$parser->parse($response->content);
$parser->eof();
}
else
{
print "ERROR: " . $response->status_line . "\n";
}
} sub start
+
{
my ($parser,$tagname,$attr)= @_;
if ($tagname eq 'img')
{
if ($attr->{src})
+
{
+
my $img_url = $attr->{src};
+
my $remote_name =URI->new_abs($img_url,$parser
+->{base});
#my ($local_name) = $img_url =~ m!([^/]+)$!;
+
my $local_name = $remote_name->host . $remote_
+name->path
;
+
#my $local_name = "/dev/null";
+
mkpath(dirname($local_name),0,0711);
+
print "Getting imagefile: $img_url\n";
+
my $response = $parser->{browser}->mirror($rem
+ote_name,$
local_name);
+
print STDERR "YYY-$local_name: ",$response->me
+ssage,"\n"
;
+
}
+
}
+
}
Here is the output when I run it the second time
Getting imagefile: images/logo.gif
LWP::UserAgent::mirror: ()
LWP::UserAgent::request: ()
HTTP::Cookies::add_cookie_header: Checking www.google.com for cookies
HTTP::Cookies::add_cookie_header: Checking .google.com for cookies
HTTP::Cookies::add_cookie_header: - checking cookie path=/
HTTP::Cookies::add_cookie_header: - checking cookie PREF=ID=0f9d8bbb3b0ee898:TM
=1036535059:LM=1036535059:S=2ea2eKPQlO4uYAN6
HTTP::Cookies::add_cookie_header: it's a match
HTTP::Cookies::add_cookie_header: Checking google.com for cookies
HTTP::Cookies::add_cookie_header: Checking .com for cookies
LWP::UserAgent::send_request: GET http://www.google.com/images/logo.gif
LWP::UserAgent::_need_proxy: Not proxied
LWP::Protocol::http::request: ()
LWP::UserAgent::request: Simple response: Not Modified
YYY-www.google.com/images/logo.gif: 304 Not Modified |