#!/usr/bin/perl -w use strict; use POE; use POE::Component::Client::UserAgent; use XML::Sablotron::DOM; use URI; my @urls = qw(http://dellah.org/svn/perl/); my $thread = 0; sub _start { $_[HEAP]->{situa} = new XML::Sablotron::Situation(); $_[HEAP]->{alias} = "useragent".$_[ARG0]; $_[HEAP]->{thread} = ++$thread; POE::Component::Client::UserAgent->new (alias => $_[HEAP]->{alias}); $_[KERNEL]->yield("next"); } sub next { if (my $url = pop @urls) { $_[HEAP]->{url} = URI->new($url); print "Thread $_[HEAP]->{thread} fetching $url\n"; $_[KERNEL]->post ( $_[HEAP]->{alias} => "request", { request => HTTP::Request->new(GET => $url), response => $_[SESSION]->postback('got_it') } ); } else { print "Thread $_[HEAP]->{thread} waiting 10s for more work\n"; $_[KERNEL]->delay(waiting => 10); } } sub waiting { if (@urls) { goto \&next; } else { print "Thread $_[HEAP]->{thread} shutting down\n"; $_[KERNEL]->post ( $_[HEAP]->{alias} => "shutdown", ); } } sub got_it { my (undef, $response) = @{$_[ARG1]}; # write the response to a file my $url = $_[HEAP]->{url}; # pick a filename like wget does (my $filename = $url) =~ s{^http://}{}; my ($dirname) = ($filename =~ m{(.*)/[^/]*}); ( -d $dirname ) || system("mkdir -p '$dirname'"); if ($response->is_success) { if ($url =~ m{/$}) { print "Thread $_[HEAP]->{thread} scanning $url\n"; # a directory - grok it with Sablotron $_[HEAP]->{doc} = XML::Sablotron::DOM::parseBuffer ($_[HEAP]->{situa}, $response->content); my @dir_nodes = @{ $_[HEAP]->{doc}->xql("//dir") || []}; for my $node (@dir_nodes) { push @urls, URI->new_abs($node->getAttribute("href"), $url) } my @file_nodes = @{ $_[HEAP]->{doc}->xql("//file") || []}; for my $node (@file_nodes) { push @urls, URI->new_abs($node->getAttribute("href"), $url) } } else { # write response ... if (open FOO, ">$filename") { print "Thread $_[HEAP]->{thread} saving $url to $filename\n"; print FOO $response->content; close FOO; } else { warn "failed to open $filename for writing; $!"; }; } } else { warn "Thread $_[HEAP]->{thread} got error loading $url; " .$response->status_line."\n"; } $_[KERNEL]->yield("next"); } for (1..3) { POE::Session -> create ( inline_states => { _start => \&_start, next => \&next, got_it => \&got_it, waiting => \&waiting, }, args => [ $_ ], # arguments ); } $poe_kernel->run();