WebTimeLoad 0.23

A new version! 0.23
New parameter -r --render save the content on a temporary file and open it at the end of the program.
It creates only a similar page not identical, just to give you an idea of what you are fetching.
created a decent help. intercepted some possible "Illigal division by zero.." errors.

enjoy:

UPDATE january 31 2014 removed $u=lc($u) beacuse is bugged: you CAN have uppercaseness in urls.
#!/usr/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
use Time::HiRes qw(gettimeofday tv_interval);
use Getopt::Long;
use HTML::Parse;
## v 16 risolto bug di visualizzazione broken links
## problema con URL tipo http://213.203.143.54/ada_portale/index.php?o
+ption=com_frontpage&Itemid=1&lang=italian
# v 20 added force_ip_resolution
# v 22 rendering OK (in timelink e get_page) e formattazione %.6d per 
+$tottime in report
# v 23 help added
$|++;

# options
my (@url, $count, $sleep, $timeout,$showhead,$protocol,$verbosity,$for
+ced_ip,$render);
#other  global
my ($ssep, $bsep,%stat);

$bsep = ('=' x 68)."\n";
$ssep = ('-' x 68)."\n";
&helpme()if( (! GetOptions(
                          'count=n' => \$count,
                          'url=s' => \@url,
                          'sleep=n' => \$sleep,
                          'timeout=n' => \$timeout,
                          'header' =>  \$showhead,
                          'protocol=s' => \$protocol,
                          'verbosity=n' => \$verbosity,
                          'force_ip_resolution=s' => \$forced_ip,
                          'render' =>  \$render,
                       )) or (!$url[0]));
# some default values
$count ||= 1;
$sleep ||= 1;
$timeout||=15;
$verbosity = defined $verbosity ? $verbosity : 1 ;
$protocol||= 'http';
$protocol =~ s![:/]+!!g;
$forced_ip ||= undef;
$render ||=undef;

if ($render) { $count = 1; @url = qq($url[0])}

######################################################################
+##########
# LWP::UA initialize
my $ua = LWP::UserAgent->new;
$ua->agent("libwww-perl/5.10.1");
$ua->timeout($timeout);
######################################################################
+##########
foreach my $repetition(1..$count) {
  foreach my $u (@url) {
            unless ( $u =~ m/^$protocol/ ){$u=$protocol.'://'.$u};
            $u =~s/\/$//;       #removing an eventual / as last char
            #$u=lc($u);####THIS IS BUGGED, dunno why i put it...

            undef %stat;
            %stat=(
                  sbody     =>0,            # size of the body skeleto
+n of each frame
                  sintlink  =>0,            # size internal linked con
+tent
                  sextlink  =>0,            # size external linked con
+tent
                  tbody     =>0,            # time taken by body skele
+ton of each frame
                  tintlink  =>0,            # time taken by internal c
+ontent
                  textlink  =>0,            # time taken by external c
+ontent
                  cintlink  =>0,            # count of internal link
                  cextlink  =>0,            # count of external link
                  brokenlink=>[],           # broken links
                  mainurl   => $u,          # url got as arg
                  pages     =>[$u],         # main page or all the fra
+mes
                  cache     =>{},           # cache for included resou
+rces
                  respcode  =>undef,        #
                  respmess  =>undef,        #   some responses
                  resptitle =>undef,        #
                  respserver=>undef,        #
                  resplength=>undef,        #
                  resphead  =>undef,        #

            );
            foreach my $page ( @{$stat{'pages'}})
                  { &get_page($page, $forced_ip)   }
            foreach my $content_link (keys %{$stat{'cache'}})
                  { &time_link($content_link) }
           &report;
  }
  sleep ($sleep) unless $repetition == $count;
}
######################################################################
+##########
sub get_page  {
    my $urltoget = shift;
    my $forced_ip = shift;
    my $resp;
    my  $t0 = [gettimeofday];
    
    if (defined $forced_ip ){
            $urltoget =~ s/\w+:\/\///g;
            $forced_ip = $protocol.'://'.$forced_ip;
            $resp = $ua->get( $forced_ip , Host => $urltoget );
    }
    else {$resp = $ua->get($urltoget);}
    
    $stat{tbody} +=  tv_interval ($t0, [gettimeofday]);
    # add the content_length declared for EACH FRame/page loaded
    $stat{resplength} += ($resp->content_length||0);
    #  add the real bytes length obtained for EACH FRame/page loaded
    $stat{sbody} += (length ($resp->content)|| 0);
    # add some more info only for the principal page (not for any fram
+e loaded)
    $stat{respcode} = $stat{respcode} || $resp->code;
    $stat{respmess} = $stat{respmess} || $resp->message;
    $stat{resptitle} = $stat{resptitle} || $resp->title;
    $stat{respserver} = $stat{respserver} || $resp->server;
    $stat{resphead} = $stat{resphead} || $resp->headers_as_string;

    # now parse the HTLM
    my $parsed_html = parse_html($resp->content);
    foreach my $link_found(@{ $parsed_html->extract_links(qw(body img 
+src frame)) }) {
                  next if $$link_found[0] =~ /#/; # brutally skip anch
+ors
                  my $uriobj = URI->new( $$link_found[0]);
                      my $absurl   = $uriobj->abs($urltoget);

             #if is a frame add to pages adding an iteration to this s
+ub
               if ($$link_found[3] eq 'frame') {push @{$stat{'pages'}}
+, "$absurl";next}   #? need to stringify $absurl
             #else is a content and we add this to the cache hash
               else {$stat{cache}{ $absurl }=[] }  # will store there 
+length and time later on
   }
   if ($render){
    mkdir "$ENV{TEMP}\\_temp_files"||die;
        open RENDER, "> $ENV{TEMP}/_temp.html"|| die "unable to write 
+to %TEMP%\\_temp.html";
    (my $localcont =   $resp->content ) =~s/src="([^"]*)\//src=".\/_te
+mp_files\//gm;

        print RENDER $localcont;
        close RENDER;
        #system ("start   $ENV{TEMP}/_temp.html");
   }
}
######################################################################
+##########
sub time_link  {
    my $url = shift;

    my  $t0 = [gettimeofday];
    my $resp = $ua->get($url);
    my $ttaken = tv_interval ($t0, [gettimeofday]);
#$ttaken = sprintf ("%0.6f", $ttaken);
    my $bytesrec = length($resp->content);

    if ($resp->is_success())  {
        @{$stat{'cache'}{$url}} = ($bytesrec, $ttaken );
        #official regex from http://search.cpan.org/~gaas/URI-1.58/URI
+.pm
        my($scheme, $domain, $path, $query, $fragment) =
            $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^
+#]*))?(?:#(.*))?|;
             #tell internal from external content
             if ($stat{mainurl} =~ /$domain/) {
                      $stat{sintlink} +=  $bytesrec ;
                      $stat{tintlink} +=  $ttaken ;
                      $stat{cintlink}++;
              }
              else {
                      $stat{sextlink} +=  $bytesrec ;
                      $stat{textlink} +=  $ttaken ;
                      $stat{cextlink}++;
              }
              if ($render){
                   (my $ele = $url )=~s/^.*\///;
                  #would be better convert html escapes like in WITH%2
+0SPACES.gif
                    open RENDER, "> $ENV{TEMP}\\_temp_files\\$ele"|| d
+ie "unable to write to %TEMP%\\_temp_files\\$ele";
                    binmode RENDER;
                    print RENDER $resp->content;
                        close RENDER;
              }
    }
    else {
        push @{$stat{'brokenlink'}},$url;
    }
}

######################################################################
+##########
sub  Arrotonda_Mega
 {
   my( $size, $n ) =( shift, 0 );
   return "0 bytes" unless defined $size;
   return "0 bytes" unless  $size > 0;
   ++$n and $size /= 1024 until $size < 1024;
    return sprintf "%.4f %s",
          $size, ( qw[ bytes Kb Mb Gb ] )[ $n ];
 }
######################################################################
+##########
sub report  {
         my $totsize = $stat{'sbody'} + $stat{'sintlink'} + $stat{'sex
+tlink'};
         my $tottime = $stat{'tbody'} + $stat{'tintlink'} + $stat{'tex
+tlink'};
         $tottime = sprintf ("%0.6f", $tottime);
             my $forcedtxt = defined $forced_ip ? " \@ $forced_ip\t" :
+ "\t" ;
         ####################
         if ($verbosity == 0){
            print scalar localtime (time),"\t",
                  $stat{mainurl},$forcedtxt,$stat{respcode},"\t",
                  $totsize,"\t",
                  $tottime,"\t",
                  ( $tottime ? &Arrotonda_Mega($totsize / $tottime)."/
+s" : 'NA' ),
                  ,"\n"; 
         }
         ####################
         elsif ($verbosity == 1){
            print $bsep,$stat{mainurl},$forcedtxt,scalar localtime (ti
+me),"\n",$ssep,
                  $stat{respcode},"\t",$stat{respmess}||'UNDEF',"\t",$
+stat{resptitle}||'UNDEF',"\n",
                  $ssep,
                  "downloaded ",&Arrotonda_Mega($totsize)," ($totsize 
+bytes) in ",
                  $tottime," seconds (",
                  ($tottime ? &Arrotonda_Mega($totsize / $tottime)."/s
+)" : 'NA'),
                  ,"\n",$bsep;

         }
         ####################
         elsif ($verbosity > 1){
            print $bsep,$stat{mainurl},$forcedtxt,scalar localtime (ti
+me),"\n",$ssep,
                  "Response code:            ",$stat{respcode}||'UNDEF
+',"\n",
                  "Response message:         ",$stat{respmess}||'UNDEF
+',"\n",
                  "Response server:          ",$stat{respserver}||'UND
+EF',"\n",
                  "Response declared length: ",$stat{resplength}||'UND
+EF',"\n",
                  "Response title:           ",$stat{resptitle}||'UNDE
+F',"\n",
                  $ssep,
                      "main page content (",scalar @{$stat{pages}},"):
+\t",&Arrotonda_Mega($stat{sbody}),
                      " in ",$stat{tbody}," seconds \@ ",
                      ($stat{tbody} ? &Arrotonda_Mega($stat{sbody} / $
+stat{tbody})."/s)" : 'NA'),
                      "\n";
                   if ($verbosity > 2) {
                          print $ssep,"\tdetail of loaded pages (url):
+\n",$ssep,
                                "\t",join ("\n\t", @{$stat{pages}}),"\
+n",$ssep;
                   }
             ### report about extra downloaded content locale to $url
             if ($stat{cintlink} > 0) {
                 print "included content ($stat{cintlink}):\t",&Arroto
+nda_Mega($stat{sintlink}),
                      " in ",$stat{tintlink}," seconds \@ ",
                      #&Arrotonda_Mega($stat{sintlink} / $stat{tintlin
+k}),"/s\n";
                      ($stat{tintlink} ? &Arrotonda_Mega($stat{sintlin
+k} / $stat{tintlink})."/s)" : 'NA'),
                      "\n";
             }
             else {print "no included content found.\n"}
             ### report about extra downloaded content external to $ur
+l
             if ($stat{cextlink} > 0) {
                 print "external content  ($stat{cextlink}):\t",&Arrot
+onda_Mega($stat{sextlink}),
                      " in ",$stat{textlink}," seconds \@ ",
                      #&Arrotonda_Mega($stat{sextlink} / $stat{textlin
+k}),"/s\n";
                      ($stat{textlink} ? &Arrotonda_Mega($stat{sextlin
+k} / $stat{textlink})."/s)" : 'NA'),
                      "\n";
             }
             else {print "no external content found.\n"}
             ### report about broken links
             if (scalar @{$stat{brokenlink}} > 0) {
                 print    "broken links found:\t",scalar @{$stat{broke
+nlink}},"\n",$ssep;
                 if ($verbosity > 2){
                            print "\tdetail of broken links (url):\n",
+$ssep,
                            #(join ("\n", @{$stat{brokenlink}})),$ssep
+;
                            (map {"\t".$_."\n"} @{$stat{brokenlink}}),
+$ssep;
                 }
             }
             else {print "no broken links found.\n",$ssep}

             if ( ($verbosity > 2) && keys %{$stat{cache}} > 0) {
                          print "\tdetail of loaded content (url bytes
+ seconds):\n",$ssep,
                                (map { "\t$_ @{$stat{cache}{$_}}\n" } 
+ sort keys %{$stat{cache}}  ),
                                $ssep;
                   }
             ## total
             print "downloaded ",&Arrotonda_Mega($totsize)," ($totsize
+ bytes) in ",
                  $tottime," seconds (",
                  ( $tottime ? &Arrotonda_Mega($totsize / $tottime)."/
+s)" : 'NA' ),
                  "\n",$bsep;

         }
         # verbosity set to smtng strange switch to defaults
         else{ $verbosity = 1; &report  }
         # eventually print headers
         if ($showhead) {print "headers received:\n",$ssep,$stat{resph
+ead},"\n",$bsep;}
         if ($render) {system ("start   $ENV{TEMP}/_temp.html");}
}
######################################################################
+##########
sub helpme {
 print <<EOH;

USAGE of $0:

--url URL [-u URL2 -u URL3 -v [0-3] -h -s n -c  n -t n -p s -f IP -r]

    -c --count    how many times repeat the check. default is 1
    -f --force_ip_resolution given a valid dotted decimal IP force on 
+it the resolution of the url.
    -h --header   set to 1 to print header too. deefault to 0
    -p --protocol http is assumed unless specified.
    -r --render   rend the page as it was parsed (giving just an idea 
+of the original)
    -s --sleep    seconds between checks. default is 1.
    -t --timeout  the timeout for the connection. default to 15 second
+s
    -u --url        the url or urls we want to scan
    -v --verbosity from 0 (one line output for each check) to 3. defau
+lt 1


NOTES:
    --render  set also --count to 0 and consider the first url only. I
+t does not replicate
              the original page because does not take in count of css 
+and other thinks.
    --verbosity
            0 the one line output containing this info: date, url, res
+ponse code, byte downloaded,
              seconds elapsed, speed.
            1 a more descrictive multiline output with title of the pa
+ge and bytes expressed in
              human readable form too.
            2 as 1 but with general infos about external or internal l
+oaded content and broken links.
            3 as 2 with detailed info for each loaded content.
    --protocol http is the default, you can specify ftp too. https is 
+supported only if you have
              installed Crypt::SSLeay module.


EXAMPLES:
    -u www.perl.org -c 10 -s 5
          check it 10 times sleeping 5 seconds between checks

    -u www.perl.org -u search.cpan.org -c 10 -s 5
          as above but checks two sites

    -u www.perl.org -c 5000 -s 60 -v 0
          a long running check with one line output

    -u www.perl.org -h -v 3
          a detailed check with header dumped

    -u www.perl.org -f 127.0.0.1
          try my local copy of this site..

    -u 127.0.0.1 -p ftp
          access the ftp running locally.

BUGS:
    If your connection is too fast you can experience an 'Illigal divi
+sion by zero..'
    error in speed calculation..;)
    The process of downloading content and following links will be any
+way different from
    a real browser. You cannot trust the size at byte resolution: ther
+e is no cache effect
    between checks, no css download nor favicons. You'll get results v
+ery near the real
    datum and speed quite realworld sized.
    The rendering of the page with -r option is fairly inaccurate but 
+serve to give an overall
    idea of the page.

AUTHOR:
    dis
    cip
    ulus a
    t pmonks org

DISCLAIMER:
    use at your own risk.

EOH
exit 0;
}
[download]
there are no rules, there are no thumbs..
Comment on WebTimeLoad 0.23 Select or Download Code

We don't bite newbies here... much
	PerlMonks