#!/usr/bin/perl -w
use strict;
#--
#-- Script: SETIStat.pl
#-- Purpose: Displays the information for the SETI@Home PM group
#--
#-- Author: Robert(Bob) Smith
#-- Date: December 11, 2001
#--
#-- Wish List: Add error handling if the requested HTML page is not re
+trieved
#--
#-- Rev Hist: 00.00.a 2001-12-11 rws Initial version
#--
#-- Notes: This script was created as a learning example for HTML:
+:TableExtract
#--
#-- Use modules
use HTML::TableExtract;
use HTTP::Request::Common;
use LWP::UserAgent;
#-- Define constants
use constant VERSION => '00.00.a';
use constant FIELD_DELIM => ',';
use constant SETI_URL => 'http://setiathome.ssl.berke
+ley.edu/stats/team/team_86606.html';
use constant ERR =>
{
'ok' => 0,
};
#-- Define variables
my $gExtractedTable; # Table extracted from the HTM
+L
my $gHTMLPage; # Retrieved HTML page
my $gName; # Member's name
my $gRank; # Member's ranking
my $gRow; # Pointer to rows in extracted
+ tables
my $gTable; # Pointer to extracted tables
my $gUserAgent; # LWP::UserAgent
#-- Retrieve the HTML page
$gUserAgent = LWP::UserAgent->new;
$gHTMLPage = $gUserAgent->request(GET SETI_URL);
#-- Extract the table
#--
#-- Note: TableExtract will handle tables nested within tables (outerm
+ost table is depth==0)
#-- as well as multiple tables within the same HTML document (fi
+rst table is count==0).
#-- Since the information I wish to extract is not nested, depth
+ will be 0, and since
#-- it is the second table on the page (the first table is the g
+roup description,
#-- web site, number of members, etc...,) the count will be 1.
#--
$gExtractedTable = HTML::TableExtract->new(depth => 0, count => 1);
$gExtractedTable->parse($gHTMLPage->content);
#-- Display information
foreach $gTable ($gExtractedTable->table_states)
{
foreach $gRow ($gTable->rows)
{
#-- Print data row
if ($$gRow[0]=~/^(\d+)\)\s*/)
{
($gRank, $gName)=($1,$');
$gName=$` if $gName=~/[\s\n]+$/;
$$gRow[2]=$' if $$gRow[2]=~/^\s+/;
print $gRank, FIELD_DELIM,
$gName, FIELD_DELIM,
$$gRow[1], FIELD_DELIM,
$$gRow[2], FIELD_DELIM,
$$gRow[3], "\n";
}
#-- Print header row
else
{
$$gRow[1]=~tr/\n/ /;
$$gRow[3]=~tr/\n/ /;
print 'Rank', FIELD_DELIM,
$$gRow[0], FIELD_DELIM,
$$gRow[1], FIELD_DELIM,
$$gRow[2], FIELD_DELIM,
$$gRow[3], "\n";
}
}
}
#-- Exit
exit(ERR->{ok});
#-- End of script
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.