Re: WebTimeLoad 0.15 scan and time website using LWP::Ua

here a 0.20 version with a minor but important improvement: as explained by Corion walking in the cloister, you can force the UserAgent to ask for a domainname at a specifed IP addres.

I find this feature very handy so i added to my code: you can pass a new option

--forced_ip_resolution 1.2.3.4
[download]

achieve this. In all verbosity level of the program this is underlined after the url requested.

#!/usr/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
use Time::HiRes qw(gettimeofday tv_interval);
use Getopt::Long;
use HTML::Parse;
## v 16 risolto bug di visualizzazione broken links
## problema con URL tipo http://213.203.143.54/ada_portale/index.php?o
+ption=com_frontpage&Itemid=1&lang=italian
$|++;

# options
my (@url, $count, $sleep, $timeout,$showhead,$protocol,$verbosity,$for
+ced_ip);
#other  global
my ($ssep, $bsep,%stat);

$bsep = ('=' x 68)."\n";
$ssep = ('-' x 68)."\n";
&helpme()if( (! GetOptions(
                          'count=s' => \$count,
                          'url=s' => \@url,
                          'sleep=n' => \$sleep,
                          'timeout=n' => \$timeout,
                          'header' =>  \$showhead,
                          'protocol=s' => \$protocol,
                          'verbosity=n' => \$verbosity,
                          'force_ip_resolution=s' => \$forced_ip,
                       )) or (!$url[0]));
# some default values
$count ||= 1;
$sleep ||= 1;
$timeout||=15;
$verbosity = defined $verbosity ? $verbosity : 1 ;
$protocol||= 'http';
$protocol =~ s![:/]+!!g;
$forced_ip ||= undef;
######################################################################
+##########
# LWP::UA initialize
my $ua = LWP::UserAgent->new;
$ua->agent("libwww-perl/5.10.1");
$ua->timeout($timeout);
######################################################################
+##########
foreach my $repetition(1..$count) {
  foreach my $u (@url) {
            unless ( $u =~ m/^$protocol/ ){$u=$protocol.'://'.$u};
            $u =~s/\/$//;       #removing an eventual / as last char
            $u=lc($u);

            undef %stat;
            %stat=(
                  sbody     =>0,            # size of the body skeleto
+n of each frame
                  sintlink  =>0,            # size internal linked con
+tent
                  sextlink  =>0,            # size external linked con
+tent
                  tbody     =>0,            # time taken by body skele
+ton of each frame
                  tintlink  =>0,            # time taken by internal c
+ontent
                  textlink  =>0,            # time taken by external c
+ontent
                  cintlink  =>0,            # count of internal link
                  cextlink  =>0,            # count of external link
                  brokenlink=>[],           # broken links
                  mainurl   => $u,          # url got as arg
                  pages     =>[$u],         # main page or all the fra
+mes
                  cache     =>{},           # cache for included resou
+rces
                  respcode  =>undef,        #
                  respmess  =>undef,        #   some responses
                  resptitle =>undef,        #
                  respserver=>undef,        #
                  resplength=>undef,        #
                  resphead  =>undef,        #

            );
            foreach my $page ( @{$stat{'pages'}})
                  { &get_page($page, $forced_ip)   }
            foreach my $content_link (keys %{$stat{'cache'}})
                  { &time_link($content_link) }
           &report;
  }
  sleep ($sleep) unless $repetition == $count;
}
######################################################################
+##########
sub get_page  {
    my $urltoget = shift;
    my $forced_ip = shift;
    my $resp;
    my  $t0 = [gettimeofday];
    
    if (defined $forced_ip ){
            $urltoget =~ s/\w+:\/\///g;
            $forced_ip = $protocol.'://'.$forced_ip;
            $resp = $ua->get( $forced_ip , Host => $urltoget );
    }
    else {$resp = $ua->get($urltoget);}
        
    $stat{tbody} +=  tv_interval ($t0, [gettimeofday]);
    # add the content_length declared for EACH FRame/page loaded
    $stat{resplength} += ($resp->content_length||0);
    #  add the real bytes length obtained for EACH FRame/page loaded
    $stat{sbody} += (length ($resp->content)|| 0);
    # add some more info only for the principal page (not for any fram
+e loaded)
    $stat{respcode} = $stat{respcode} || $resp->code;
    $stat{respmess} = $stat{respmess} || $resp->message;
    $stat{resptitle} = $stat{resptitle} || $resp->title;
    $stat{respserver} = $stat{respserver} || $resp->server;
    $stat{resphead} = $stat{resphead} || $resp->headers_as_string;

    # now parse the HTLM
    my $parsed_html = parse_html($resp->content);
    foreach my $link_found(@{ $parsed_html->extract_links(qw(body img 
+src frame )) }) {
                  next if $$link_found[0] =~ /#/; # brutally skip anch
+ors
                  my $uriobj = URI->new( $$link_found[0]);
                      my $absurl   = $uriobj->abs($urltoget);

             #if is a frame add to pages adding an iteration to this s
+ub
               if ($$link_found[3] eq 'frame') {push @{$stat{'pages'}}
+, "$absurl";next}   #? need to stringify $absurl
             #else is a content and we add this to the cache hash
               else {$stat{cache}{ $absurl }=[] }  # will store there 
+length and time later on
   }
}
######################################################################
+##########
sub time_link  {
    my $url = shift;

    my  $t0 = [gettimeofday];
    my $resp = $ua->get($url);
    my $ttaken = tv_interval ($t0, [gettimeofday]);
    my $bytesrec = length($resp->content);

    if ($resp->is_success())  {
        @{$stat{'cache'}{$url}} = ($bytesrec, $ttaken );
        #official regex from http://search.cpan.org/~gaas/URI-1.58/URI
+.pm
        my($scheme, $domain, $path, $query, $fragment) =
            $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^
+#]*))?(?:#(.*))?|;
             #tell internal from external content
             if ($stat{mainurl} =~ /$domain/) {
                      $stat{sintlink} +=  $bytesrec ;
                      $stat{tintlink} +=  $ttaken ;
                      $stat{cintlink}++;
              }
              else {
                      $stat{sextlink} +=  $bytesrec ;
                      $stat{textlink} +=  $ttaken ;
                      $stat{cextlink}++;
              }
    }
    else {
        push @{$stat{'brokenlink'}},$url;
    }
}

######################################################################
+##########
sub  Arrotonda_Mega
 {
   my( $size, $n ) =( shift, 0 );
   return "0 bytes" unless defined $size;
   return "0 bytes" unless  $size > 0;
   ++$n and $size /= 1024 until $size < 1024;
    return sprintf "%.4f %s",
          $size, ( qw[ bytes Kb Mb Gb ] )[ $n ];
 }
######################################################################
+##########
sub report  {
         my $totsize = $stat{'sbody'} + $stat{'sintlink'} + $stat{'sex
+tlink'};
         my $tottime = $stat{'tbody'} + $stat{'tintlink'} + $stat{'tex
+tlink'};
         my $forcedtxt = defined $forced_ip ? "FORCED ON $forced_ip\t"
+ : '' ;
         ####################
         if ($verbosity == 0){
            print scalar localtime (time),"\t",
                  $stat{mainurl},"\t",$forcedtxt,$stat{respcode},"\t",
                  $totsize,"\t",
                  $tottime,"\t",
                  (&Arrotonda_Mega($totsize / $tottime) ),"/sec","\n";
+  #Illegal division by zero at C:\SCRIPTS\webTimeLoad15.pl line 153.
         }
         ####################
         elsif ($verbosity == 1){
            print $bsep,$stat{mainurl},"\t",$forcedtxt,scalar localtim
+e (time),"\n",$ssep,
                  $stat{respcode},"\t",$stat{respmess}||'UNDEF',"\t",$
+stat{resptitle}||'UNDEF',"\n",
                  $ssep,
                  "downloaded ",&Arrotonda_Mega($totsize)," ($totsize 
+bytes) in ",
                  $tottime," seconds (",
                  &Arrotonda_Mega($totsize / $tottime),"/s)\n",$bsep;

         }
         ####################
         elsif ($verbosity > 1){
            print $bsep,$stat{mainurl},"\t",$forcedtxt,scalar localtim
+e (time),"\n",$ssep,
                  "Response code:            ",$stat{respcode}||'UNDEF
+',"\n",
                  "Response message:         ",$stat{respmess}||'UNDEF
+',"\n",
                  "Response server:          ",$stat{respserver}||'UND
+EF',"\n",
                  "Response declared length: ",$stat{resplength}||'UND
+EF',"\n",
                  "Response title:           ",$stat{resptitle}||'UNDE
+F',"\n",
                  $ssep,
                      "main page content (",scalar @{$stat{pages}},"):
+\t",&Arrotonda_Mega($stat{sbody}),
                      " in ",$stat{tbody}," seconds \@ ",
                      &Arrotonda_Mega($stat{sbody} / $stat{tbody}),"/s
+\n";
                   if ($verbosity > 2) {
                          print $ssep,"\tdetail of loaded pages (url):
+\n",$ssep,
                                "\t",join ("\n\t", @{$stat{pages}}),"\
+n",$ssep;
                   }
             ### report about extra downloaded content locale to $url
             if ($stat{cintlink} > 0) {
                 print "included content ($stat{cintlink}):\t",&Arroto
+nda_Mega($stat{sintlink}),
                      " in ",$stat{tintlink}," seconds \@ ",
                      &Arrotonda_Mega($stat{sintlink} / $stat{tintlink
+}),"/s\n";
             }
             else {print "no included content found.\n"}
             ### report about extra downloaded content external to $ur
+l
             if ($stat{cextlink} > 0) {
                 print "external content  ($stat{cextlink}):\t",&Arrot
+onda_Mega($stat{sextlink}),
                      " in ",$stat{textlink}," seconds \@ ",
                      &Arrotonda_Mega($stat{sextlink} / $stat{textlink
+}),"/s\n";
             }
             else {print "no external content found.\n"}
             ### report about broken links
             if (scalar @{$stat{brokenlink}} > 0) {
                 print    "broken links found:\t",scalar @{$stat{broke
+nlink}},"\n",$ssep;
                 if ($verbosity > 2){
                            print "\tdetail of broken links (url):\n",
+$ssep,
                            #(join ("\n", @{$stat{brokenlink}})),$ssep
+;
                            (map {"\t".$_."\n"} @{$stat{brokenlink}}),
+$ssep;
                 }
             }
             else {print "no broken links found.\n",$ssep}

             if ( ($verbosity > 2) && keys %{$stat{cache}} > 0) {
                          print "\tdetail of loaded content (url bytes
+ seconds):\n",$ssep,
                                (map { "\t$_ @{$stat{cache}{$_}}\n" } 
+ sort keys %{$stat{cache}}  ),
                                $ssep;
                   }
             ## total
             print "downloaded ",&Arrotonda_Mega($totsize)," ($totsize
+ bytes) in ",
                  $tottime," seconds (",
                  &Arrotonda_Mega($totsize / $tottime),"/s)\n",$bsep;

         }
         # verbosity set to smtng strange switch to defaults
         else{ $verbosity = 1; &report  }
         # eventually print headers
         if ($showhead) {print "headers received:\n",$ssep,$stat{resph
+ead},"\n",$bsep;}
}
######################################################################
+##########
sub helpme {
 print <<EOH;

Utilizzo di $0
$0 -u URL [-u URL2 -u URL3 --verbosity [0-3] --header --sleep n --coun
+t  n --timeout n --protocol s --force_ip_resolution s]


EOH
exit 0;
}
[download]

there are no rules, there are no thumbs..

Comment on Re: WebTimeLoad 0.15 scan and time website using LWP::Ua Select or Download Code


No such thing as a small change
	PerlMonks