Beefy Boxes and Bandwidth Generously Provided by pair Networks
No such thing as a small change
 
PerlMonks  

Re: WebTimeLoad 0.15 scan and time website using LWP::Ua

by Discipulus (Canon)
on May 10, 2012 at 13:15 UTC ( [id://969808]=note: print w/replies, xml ) Need Help??


in reply to WebTimeLoad 0.15 scan and time website using LWP::Ua

here a 0.20 version with a minor but important improvement: as explained by Corion walking in the cloister, you can force the UserAgent to ask for a domainname at a specifed IP addres.

I find this feature very handy so i added to my code: you can pass a new option
--forced_ip_resolution 1.2.3.4
achieve this. In all verbosity level of the program this is underlined after the url requested.

#!/usr/bin/perl use strict; use warnings; use LWP::UserAgent; use Time::HiRes qw(gettimeofday tv_interval); use Getopt::Long; use HTML::Parse; ## v 16 risolto bug di visualizzazione broken links ## problema con URL tipo http://213.203.143.54/ada_portale/index.php?o +ption=com_frontpage&Itemid=1&lang=italian $|++; # options my (@url, $count, $sleep, $timeout,$showhead,$protocol,$verbosity,$for +ced_ip); #other global my ($ssep, $bsep,%stat); $bsep = ('=' x 68)."\n"; $ssep = ('-' x 68)."\n"; &helpme()if( (! GetOptions( 'count=s' => \$count, 'url=s' => \@url, 'sleep=n' => \$sleep, 'timeout=n' => \$timeout, 'header' => \$showhead, 'protocol=s' => \$protocol, 'verbosity=n' => \$verbosity, 'force_ip_resolution=s' => \$forced_ip, )) or (!$url[0])); # some default values $count ||= 1; $sleep ||= 1; $timeout||=15; $verbosity = defined $verbosity ? $verbosity : 1 ; $protocol||= 'http'; $protocol =~ s![:/]+!!g; $forced_ip ||= undef; ###################################################################### +########## # LWP::UA initialize my $ua = LWP::UserAgent->new; $ua->agent("libwww-perl/5.10.1"); $ua->timeout($timeout); ###################################################################### +########## foreach my $repetition(1..$count) { foreach my $u (@url) { unless ( $u =~ m/^$protocol/ ){$u=$protocol.'://'.$u}; $u =~s/\/$//; #removing an eventual / as last char $u=lc($u); undef %stat; %stat=( sbody =>0, # size of the body skeleto +n of each frame sintlink =>0, # size internal linked con +tent sextlink =>0, # size external linked con +tent tbody =>0, # time taken by body skele +ton of each frame tintlink =>0, # time taken by internal c +ontent textlink =>0, # time taken by external c +ontent cintlink =>0, # count of internal link cextlink =>0, # count of external link brokenlink=>[], # broken links mainurl => $u, # url got as arg pages =>[$u], # main page or all the fra +mes cache =>{}, # cache for included resou +rces respcode =>undef, # respmess =>undef, # some responses resptitle =>undef, # respserver=>undef, # resplength=>undef, # resphead =>undef, # ); foreach my $page ( @{$stat{'pages'}}) { &get_page($page, $forced_ip) } foreach my $content_link (keys %{$stat{'cache'}}) { &time_link($content_link) } &report; } sleep ($sleep) unless $repetition == $count; } ###################################################################### +########## sub get_page { my $urltoget = shift; my $forced_ip = shift; my $resp; my $t0 = [gettimeofday]; if (defined $forced_ip ){ $urltoget =~ s/\w+:\/\///g; $forced_ip = $protocol.'://'.$forced_ip; $resp = $ua->get( $forced_ip , Host => $urltoget ); } else {$resp = $ua->get($urltoget);} $stat{tbody} += tv_interval ($t0, [gettimeofday]); # add the content_length declared for EACH FRame/page loaded $stat{resplength} += ($resp->content_length||0); # add the real bytes length obtained for EACH FRame/page loaded $stat{sbody} += (length ($resp->content)|| 0); # add some more info only for the principal page (not for any fram +e loaded) $stat{respcode} = $stat{respcode} || $resp->code; $stat{respmess} = $stat{respmess} || $resp->message; $stat{resptitle} = $stat{resptitle} || $resp->title; $stat{respserver} = $stat{respserver} || $resp->server; $stat{resphead} = $stat{resphead} || $resp->headers_as_string; # now parse the HTLM my $parsed_html = parse_html($resp->content); foreach my $link_found(@{ $parsed_html->extract_links(qw(body img +src frame )) }) { next if $$link_found[0] =~ /#/; # brutally skip anch +ors my $uriobj = URI->new( $$link_found[0]); my $absurl = $uriobj->abs($urltoget); #if is a frame add to pages adding an iteration to this s +ub if ($$link_found[3] eq 'frame') {push @{$stat{'pages'}} +, "$absurl";next} #? need to stringify $absurl #else is a content and we add this to the cache hash else {$stat{cache}{ $absurl }=[] } # will store there +length and time later on } } ###################################################################### +########## sub time_link { my $url = shift; my $t0 = [gettimeofday]; my $resp = $ua->get($url); my $ttaken = tv_interval ($t0, [gettimeofday]); my $bytesrec = length($resp->content); if ($resp->is_success()) { @{$stat{'cache'}{$url}} = ($bytesrec, $ttaken ); #official regex from http://search.cpan.org/~gaas/URI-1.58/URI +.pm my($scheme, $domain, $path, $query, $fragment) = $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^ +#]*))?(?:#(.*))?|; #tell internal from external content if ($stat{mainurl} =~ /$domain/) { $stat{sintlink} += $bytesrec ; $stat{tintlink} += $ttaken ; $stat{cintlink}++; } else { $stat{sextlink} += $bytesrec ; $stat{textlink} += $ttaken ; $stat{cextlink}++; } } else { push @{$stat{'brokenlink'}},$url; } } ###################################################################### +########## sub Arrotonda_Mega { my( $size, $n ) =( shift, 0 ); return "0 bytes" unless defined $size; return "0 bytes" unless $size > 0; ++$n and $size /= 1024 until $size < 1024; return sprintf "%.4f %s", $size, ( qw[ bytes Kb Mb Gb ] )[ $n ]; } ###################################################################### +########## sub report { my $totsize = $stat{'sbody'} + $stat{'sintlink'} + $stat{'sex +tlink'}; my $tottime = $stat{'tbody'} + $stat{'tintlink'} + $stat{'tex +tlink'}; my $forcedtxt = defined $forced_ip ? "FORCED ON $forced_ip\t" + : '' ; #################### if ($verbosity == 0){ print scalar localtime (time),"\t", $stat{mainurl},"\t",$forcedtxt,$stat{respcode},"\t", $totsize,"\t", $tottime,"\t", (&Arrotonda_Mega($totsize / $tottime) ),"/sec","\n"; + #Illegal division by zero at C:\SCRIPTS\webTimeLoad15.pl line 153. } #################### elsif ($verbosity == 1){ print $bsep,$stat{mainurl},"\t",$forcedtxt,scalar localtim +e (time),"\n",$ssep, $stat{respcode},"\t",$stat{respmess}||'UNDEF',"\t",$ +stat{resptitle}||'UNDEF',"\n", $ssep, "downloaded ",&Arrotonda_Mega($totsize)," ($totsize +bytes) in ", $tottime," seconds (", &Arrotonda_Mega($totsize / $tottime),"/s)\n",$bsep; } #################### elsif ($verbosity > 1){ print $bsep,$stat{mainurl},"\t",$forcedtxt,scalar localtim +e (time),"\n",$ssep, "Response code: ",$stat{respcode}||'UNDEF +',"\n", "Response message: ",$stat{respmess}||'UNDEF +',"\n", "Response server: ",$stat{respserver}||'UND +EF',"\n", "Response declared length: ",$stat{resplength}||'UND +EF',"\n", "Response title: ",$stat{resptitle}||'UNDE +F',"\n", $ssep, "main page content (",scalar @{$stat{pages}},"): +\t",&Arrotonda_Mega($stat{sbody}), " in ",$stat{tbody}," seconds \@ ", &Arrotonda_Mega($stat{sbody} / $stat{tbody}),"/s +\n"; if ($verbosity > 2) { print $ssep,"\tdetail of loaded pages (url): +\n",$ssep, "\t",join ("\n\t", @{$stat{pages}}),"\ +n",$ssep; } ### report about extra downloaded content locale to $url if ($stat{cintlink} > 0) { print "included content ($stat{cintlink}):\t",&Arroto +nda_Mega($stat{sintlink}), " in ",$stat{tintlink}," seconds \@ ", &Arrotonda_Mega($stat{sintlink} / $stat{tintlink +}),"/s\n"; } else {print "no included content found.\n"} ### report about extra downloaded content external to $ur +l if ($stat{cextlink} > 0) { print "external content ($stat{cextlink}):\t",&Arrot +onda_Mega($stat{sextlink}), " in ",$stat{textlink}," seconds \@ ", &Arrotonda_Mega($stat{sextlink} / $stat{textlink +}),"/s\n"; } else {print "no external content found.\n"} ### report about broken links if (scalar @{$stat{brokenlink}} > 0) { print "broken links found:\t",scalar @{$stat{broke +nlink}},"\n",$ssep; if ($verbosity > 2){ print "\tdetail of broken links (url):\n", +$ssep, #(join ("\n", @{$stat{brokenlink}})),$ssep +; (map {"\t".$_."\n"} @{$stat{brokenlink}}), +$ssep; } } else {print "no broken links found.\n",$ssep} if ( ($verbosity > 2) && keys %{$stat{cache}} > 0) { print "\tdetail of loaded content (url bytes + seconds):\n",$ssep, (map { "\t$_ @{$stat{cache}{$_}}\n" } + sort keys %{$stat{cache}} ), $ssep; } ## total print "downloaded ",&Arrotonda_Mega($totsize)," ($totsize + bytes) in ", $tottime," seconds (", &Arrotonda_Mega($totsize / $tottime),"/s)\n",$bsep; } # verbosity set to smtng strange switch to defaults else{ $verbosity = 1; &report } # eventually print headers if ($showhead) {print "headers received:\n",$ssep,$stat{resph +ead},"\n",$bsep;} } ###################################################################### +########## sub helpme { print <<EOH; Utilizzo di $0 $0 -u URL [-u URL2 -u URL3 --verbosity [0-3] --header --sleep n --coun +t n --timeout n --protocol s --force_ip_resolution s] EOH exit 0; }
there are no rules, there are no thumbs..

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://969808]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others about the Monastery: (2)
As of 2024-04-26 04:09 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found