#!/usr/bin/perl -w
use strict;
#--
#-- Script: SETIStat.pl
#-- Purpose: Displays the information for the SETI@Home PM group
#--
#-- Author: Robert(Bob) Smith
#-- Date: December 11, 2001
#--
#-- Wish List: Add error handling if the requested HTML page is not re
+trieved
#--
#-- Rev Hist: 00.00.a 2001-12-11 rws Initial version
#--
#-- Notes: This script was created as a learning example for HTML:
+:TableExtract
#--
#-- Use modules
use HTML::TableExtract;
use HTTP::Request::Common;
use LWP::UserAgent;
#-- Define constants
use constant VERSION => '00.00.a';
use constant FIELD_DELIM => ',';
use constant SETI_URL => 'http://setiathome.ssl.berke
+ley.edu/stats/team/team_86606.html';
use constant ERR =>
{
'ok' => 0,
};
#-- Define variables
my $gExtractedTable; # Table extracted from the HTM
+L
my $gHTMLPage; # Retrieved HTML page
my $gName; # Member's name
my $gRank; # Member's ranking
my $gRow; # Pointer to rows in extracted
+ tables
my $gTable; # Pointer to extracted tables
my $gUserAgent; # LWP::UserAgent
#-- Retrieve the HTML page
$gUserAgent = LWP::UserAgent->new;
$gHTMLPage = $gUserAgent->request(GET SETI_URL);
#-- Extract the table
#--
#-- Note: TableExtract will handle tables nested within tables (outerm
+ost table is depth==0)
#-- as well as multiple tables within the same HTML document (fi
+rst table is count==0).
#-- Since the information I wish to extract is not nested, depth
+ will be 0, and since
#-- it is the second table on the page (the first table is the g
+roup description,
#-- web site, number of members, etc...,) the count will be 1.
#--
$gExtractedTable = HTML::TableExtract->new(depth => 0, count => 1);
$gExtractedTable->parse($gHTMLPage->content);
#-- Display information
foreach $gTable ($gExtractedTable->table_states)
{
foreach $gRow ($gTable->rows)
{
#-- Print data row
if ($$gRow[0]=~/^(\d+)\)\s*/)
{
($gRank, $gName)=($1,$');
$gName=$` if $gName=~/[\s\n]+$/;
$$gRow[2]=$' if $$gRow[2]=~/^\s+/;
print $gRank, FIELD_DELIM,
$gName, FIELD_DELIM,
$$gRow[1], FIELD_DELIM,
$$gRow[2], FIELD_DELIM,
$$gRow[3], "\n";
}
#-- Print header row
else
{
$$gRow[1]=~tr/\n/ /;
$$gRow[3]=~tr/\n/ /;
print 'Rank', FIELD_DELIM,
$$gRow[0], FIELD_DELIM,
$$gRow[1], FIELD_DELIM,
$$gRow[2], FIELD_DELIM,
$$gRow[3], "\n";
}
}
}
#-- Exit
exit(ERR->{ok});
#-- End of script
|