#!/usr/bin/perl -w use strict; use diagnostics; use LWP::RobotUA; use URI::URL; #use HTML::Parser (); use HTML::SimpleLinkExtor; my $a = 0; my $links; my $visited; my $base; my $u; for ( $u = 1 ; $u < 1000000000 ; $u++ ) { open( FILE1, ") { my $ua = LWP::RobotUA->new( 'theusefulbot', 'bot@theusefulnet.com' ); #my $p = HTML::Parser->new(); $ua->delay( 10 / 6000 ); my $content = $ua->get($_)->content; #my $text = $p->parse($content)->parse; open( VISITED, ">>/var/www/links/visited.txt" ) || die; print VISITED "$_\n"; close(VISITED); open( VISITED, "new($base); $extor->parse($content); my @links = $extor->a; $u++; open( FILE2, ">/var/www/links/file$u.txt" ) || die; foreach $links (@links) { my @visited = ; foreach $visited (@visited) { if ( $visited eq $links ) { print "Duplicate found"; } else { open( OUTPUT, ">/var/www/data/$a.txt" ) || die; print OUTPUT "$_\n\n"; print OUTPUT "$content"; close(OUTPUT); print FILE2 url("$links")->abs("$_"); print FILE2 "\n"; } } } $a++; $u--; } close(FILE1); close(FILE2); close(VISITED); print "File #: $a\n"; }