Beefy Boxes and Bandwidth Generously Provided by pair Networks
go ahead... be a heretic
 
PerlMonks  

Gilimanjaro's scratchpad

by Gilimanjaro (Hermit)
on Jun 14, 2004 at 10:24 UTC ( [id://366473]=scratchpad: print w/replies, xml ) Need Help??

Code for graq:
#!/usr/bin/perl use strict; use warnings; use open ':locale'; # tell perl to use the $LANG environme +nt encoding for STDOUT/IN/ERR # check 'man 3 open' for details; but +this is quite important use HTML::Entities; # used to decode HTML &; entities use Encode qw(decode); # used to decode utf8/iso into perls i +nternal representation (which is utf8) my $data = { 'href' => 'http://www.accountancyage.com/accountancyage/news/2 +159769/kpmg-sets-retail-think-tank', 'teaser' => '<p><small>AccountancyAge.com, <a href="http://www +.accountancyage.com/">Accountancy Age</a>, Thursday 6 July 2006 at 00 +:00:00</small></p><p><i> Firm forms partnership with retail research +group </i></p><p>KPMG has launched the &#226;&#128;&#152;Retail Think + Tank&#226;&#128;&#153; (RTT) aimed at establishing &#226;&#128;&#152 +;the true health and status\'&#194;&#160;of the retail sector. The Bi +g Four firm has joined forces with retail research group...</p><p><sm +all>&gt;&nbsp;<a href="http://www.accountancyage.com/accountancyage/n +ews/2159769/kpmg-sets-retail-think-tank"><i>Read the full article</i> +</a></small></p>', 'title' => "KPMG sets up retail \x{e2}\x{80}\x{98}think tank\x +{e2}\x{80}\x{99}", }; my $html = $data->{teaser}; decode_entities($html); # because it's html, we need t +o do this first $html = decode('utf8',$html); # now 'parse' the utf8 my $title = $data->{title}; # this is 'raw' utf8; the \x{e +2} sequences indicate this $title = decode('utf8',$title); # so just parse it print "** $title:\n"; print "$html\n";
This generates a nice hash by region name, with a an array of hashes with all data in it...
#!/usr/bin/perl use warnings; use strict; use HTML::TreeBuilder; use Data::Dumper; my $tree = HTML::TreeBuilder->new_from_file('IDQ60606.shtml'); my @cellnames = qw( station time temperature dewpoint relhumidity deltat wind_dir speedkmh gustkmh speedknt gistknt pressure rain ); my $region; my %data; for my $row ($tree->look_down('_tag'=>'tr')) { my @cells = $row->look_down('_tag'=>'td'); print scalar @cells, "\n"; if(@cells==1) { $region = $cells[0]->as_trimmed_text; } if(@cells == @cellnames) { my %row; @row{@cellnames} = map { $_->as_trimmed_text} @cells; push @{$data{$region}} => \%row; } } print "$_\n" for keys %data; print Dumper \%data; while (my ($region,$data) = each %data) { my @data = @$data; my $raintotal; for (@data) { my $rain = $_->{rain}; $rain = 0 if $rain eq '-'; $raintotal+=$rain; } my $rainaverage = @data ? ($raintotal / @data) : undef; print "$region: $rainaverage\n"; }

#!/usr/bin/perl use warnings; use strict; use HTML::TreeBuilder; my $tree = HTML::TreeBuilder->new_from_file('IDQ60606.shtml'); my @cells = $tree->look_down( '_tag' => 'td', 'class' => 'rowlevel1', ); print $_->as_trimmed_text,"\n" for @cells;
gives
PENINSULA GULF COUNTRY NORTHERN GOLDFIELDS and UPPER FLINDERS NORTH TROPICAL COAST and TABLELANDS HERBERT and LOWER BURDEKIN CENTRAL COAST - WHITSUNDAYS CAPRICORNIA CENTRAL HIGHLANDS - COALFIELDS CENTRAL WEST NORTHWEST CHANNEL COUNTRY MARANOA and WARREGO DARLING DOWNS and GRANITE BELT WIDE BAY and BURNETT SOUTHEAST COAST CORAL SEA
Some of my snippets...

# All directories in our parent's path %dirs = map { /^.*\/(.*)/ => $_ } grep {-d} glob "../*";
# De-crapper (for use after Word HTML idiocy) my $file = join '',<>; $file =~ s/<li.*?>/<li>/gms; $file =~ s/<p.*?>/<p>/gms; $file =~ s/<\/?o:.*?>//gms; $file =~ s/<!.*?>//gms; $file =~ s/<h2.*?>/<h2>/gms; $file =~ s/<div .*?>//gms; $file =~ s/<\/?span.*?>//gms; print $file;
# Password generator my @chars = ('.','!','#','@','$','/',0..9,'A'..'Z','a'..'z'); my $length = 8 + rand 4; my $pw = join '', @chars[ map { rand @chars } (1..$length)];
# Java namestyle to SQL namestyle regex s/(?<!^)([A-Z]+)/_\L$1\E/g;
# Environment dumper while (my @set = each %ENV) {printf "%s=>%s\n",@set}
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others avoiding work at the Monastery: (2)
As of 2024-04-20 03:58 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found