Category: | Web Stuff |
Author/Contact Info | wizbancp |
Description: | A script for exploring site and catch link simply specify the starting url and the searching depth (sorry for my english!:-)) at the end the script produce a text files with the address catched. |
After the critics(:-)) i modified the script to catch only link address & don't also email.... =:-(
usage: "script.pl url depth" or simply "script.pl"
#!/usr/bin/perl -w
require LWP::UserAgent;
open LINK, ">", "link.txt";
if (!@ARGV)
{
print "Insert starting URL: ";
$indirizzo=<STDIN>;
chomp($indirizzo);
print "\nInsert searching depth: ";
$profond=<STDIN>;
chomp($profond);
}
else
{
$indirizzo = $ARGV[0];
$profond = $ARGV[1];
}
$indirizzohttp="http://".$indirizzo;
my @elencolink = $indirizzohttp;
my $ua = LWP::UserAgent->new;
$ua->agent('WizCaptureBot/1.11');
$ua->timeout(10);
$ua->env_proxy;
sub pausa #pausing the script before ending
{
print "\nPress Enter to exit.\n";
my $pausa = <STDIN>;
}
sub catturalink #procedure for url capture
{
my $codice = shift;
my $cont = 0;
while ($codice =~m/(http|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@
+?^=%&:\/~\+#]*[\w\-\@?^=%&\/~\+#])?/g)
{
$indirizzolink="$&";
$cont++;
print LINK "$indirizzolink\n";
push @elencolink, $indirizzolink;
}
print "Find $cont links\n";
}
sub visitapagina #capture the site code
{
my $pagina = shift;
my $response = $ua->get("$pagina");
if ($response->is_success)
{
$codicehtml = $response->content;
print "\n -- $pagina --\n";
catturalink($codicehtml);
}
else
{
print "\n -- $pagina --\n";
print $response->status_line."\n";
}
}
my $inizio=0;
my $fine=0;
visitapagina($elencolink[0]);
while($profond!=0)
{
$profond--;
$inizio=$fine+1;
$fine = scalar(@elencolink)-1;
for($c=$inizio; $c<=$fine; $c++)
{
print "\n$inizio $c $fine";
visitapagina($elencolink[$c]);
}
}
print"\n Operation ended! \n";
pausa;
close LINK;
|
|
---|
Replies are listed 'Best First'. | |
---|---|
Re: Link & Email Hunter
by merlyn (Sage) on Feb 13, 2007 at 16:33 UTC | |
by Limbic~Region (Chancellor) on Feb 14, 2007 at 14:20 UTC | |
Re: Link & Email Hunter
by blue_cowdawg (Monsignor) on Feb 13, 2007 at 16:19 UTC | |
| |
Re: Link Hunter
by wizbancp (Sexton) on Feb 14, 2007 at 08:23 UTC |
Back to
Code Catacombs