#!/usr/bin/perl use strict; use warnings; use LWP::UserAgent; my $ua = LWP::UserAgent->new(agent=>"g0ns node grabber/0.01"); open (my $ln,"<","/home/charlesc/scripts/lastnode") or die $!; my $lastnode = <$ln>; chomp $lastnode; close $ln; #opendir(my $dh,"/srv/www/htdocs/perlmonks/"); #my @files = readdir($dh); #closedir $dh; #for (@files) #{ # if ($_ eq "index.html"){$_=""} # $_ =~s/\.xml//; #} #my @sortedfiles = sort {$a <=> $b} @files; my $nextfile = ++$lastnode; print "nextfile = $nextfile\n"; my $retrievednode; my $end; my $counter=0; while (!$end) { if ($counter >=25){last} # just in case, don't take more than 25 in a run. my $starttime = time(); my $url = "http://www.perlmonks.org/?displaytype=xml;node_id=$nextfile"; print "Searching $url\n"; my $req = HTTP::Request->new(GET=>$url); my $result = $ua->request($req); my $content; my $endtime = time(); if ($result->is_success){$content= $result->content} else {next} if ($content =~/title="Not found"/ && $content =~/superdoc/) { $end++; } else { open (my $fh,">","/srv/www/htdocs/perlmonks/$nextfile.xml") or die "Can't open $nextfile.xml for writing because $!"; print $fh $content; close $fh; } $retrievednode = $nextfile; $nextfile++; $counter++; my $pause = 2*($endtime - $starttime)+1; print "Sleeping for $pause seconds\n"; sleep $pause; } open ($ln,">","/home/charlesc/scripts/lastnode") or die $!; print $ln --$retrievednode."\n"; close $ln;