http://qs321.pair.com?node_id=224763
Category: Web Stuff
Author/Contact Info /msg Mr. Muskrat
Description:

I am still working on my top Seti@home teams script.

This uses: (in no particular order)
grantm's XML::Simple,
samtregar's HTML::Template,
mojotoad's HTML::TableExtract,
gbarr's Net::FTP,
LWP::UserAgent,
HTML::Entities
and DBI.

Many thanks to the authors of these wonderful modules!

Output is available at Top 40 Seti@home Teams.

The URL that Berkeley uses for the XML team stats may change once they "officially" announce it. They still need to fix some things... See my Malformed XML thread at the Seti@home Bulletin Board for more infomation.update 4

Updated the ParseTeamXML subroutine so that it no longer uses a hash as a reference. Thanks tye and thunders for helping me resolve this issue.

Update 2 added color coding for rise/fall in rank.

Update 3 Updated code to work around duplicate teams in the top teams page.

Update 4 Thread is AWOL.

#!/usr/bin/perl -w
use strict;
use XML::Simple;
use LWP::UserAgent;
use HTML::Entities;
use HTML::TableExtract;
use HTML::Template;
use DBI;
use Net::FTP;

# --- Config section --- #

# !!!
# You will need to fill out the DB and FTP info sections.
# !!!

# once Berkeley fixes their malformed XML, set this to 0
my $malformed_xml = 1;

# Seti@home URL info
my %seti = (
  baseurl => 'http://setiathome.ssl.berkeley.edu/',
  teamlookup => 'fcgi-bin/fcgi?cmd=team_lookup_xml&name=',
  topteams => 'stats/team/team_type_0.html',
);

# DB info
my %db = (
  type => 'mysql',
  name => 'seti',
  host => '',
  user => '',
  pass => '',
);

# FTP info
my %ftp = (
  host => '',
  user => '',
  pass => '',
  dir  => '',
  file => 'teams.html',
);

# Number of teams to track
my $num_teams = 40;

# --- End of config section --- #

my $dbh = DBI->connect("DBI:$db{type}:$db{name}:$db{host}", $db{user},
+ $db{pass},
  { PrintError => 0}) || die $DBI::errstr;

# To enable debugging, set an environment variable named debug to a tr
+ue value
# or add a command line parameter that has a true value
my $debug = $ARGV[0] || $ENV{debug} || 0;

print "Getting Top Teams...\n" if $debug;

my @teams = GetTeams();

print "Parsing Teams...\n" if $debug;

my ($static, $dynamic) = ParseTeams(@teams);

$dbh->disconnect();

# Begin creation of HTML file that will be uploaded

print "Creating HTML file...\n" if $debug;

my $time = gmtime();

my (@stemp, @dtemp);
for my $num (0..$#teams) {
  my $teamname = $$dynamic{$num};
  my $bteam = $$static{$teamname};
  my $team = $$dynamic{$teamname};
  my %class = (
    rank    => '',
    members => '',
    results => '',
  );
  if ($$team{rank} > $$bteam{rank}) {
    $class{rank} = 'red';
  } elsif ($$team{rank} < $$bteam{rank}) {
    $class{rank} = 'green';
  }
  if ($$team{members} > $$bteam{members}) {
    $class{members} = 'green';
  } elsif ($$team{members} < $$bteam{members}) {
    $class{members} = 'red';
  }
  if ($$team{results} > $$bteam{results}) {
    $class{results} = 'green';
  } elsif ($$team{results} < $$bteam{results}) {
    $class{results} = 'red';
  }
  push @dtemp, { rank => $$team{rank}, name => $teamname, members => $
+$team{members}, results => $$team{results}, c => $class{rank}, cm => 
+$class{members}, cr => $class{results} };  
  $teamname = $$static{$num};
  $bteam = $$static{$teamname};
  push @stemp, { rank => $$bteam{rank}, name => $teamname, members => 
+$$bteam{members}, results => $$bteam{results} };
}

# open the html template for the Rank Standings Page
my $top = HTML::Template->new(filename => 'top.tmpl');
# fill in the parameters
$top->param(NUM_TEAMS => $num_teams);
$top->param(TIME => $time);
$top->param(STEMP => [@stemp]);
$top->param(DTEMP => [@dtemp]);

print "Writing the HTML file...\n" if $debug;

# create the Top Teams Page
open(HTML, '>', $ftp{file});
print HTML $top->output;
close(HTML);

print "Uploading the HTML file...\n" if $debug;

UploadTeams($ftp{host}, $ftp{user}, $ftp{pass}, $ftp{dir}, $ftp{file})
+;

exit;

# Teams processing subroutines

sub GetTeams {
  my $url = $seti{baseurl}.$seti{topteams};
  print "Grabbing top teams: $url\n" if $debug;
  my $html = GetURL($url);
  my $te  = new HTML::TableExtract( headers => [qw(Name Members Result
+s Total Average)]);
  $te->parse($html);
  my @teams;
  my $lastteam = '';
  my $rank = 0;
  foreach my $ts ($te->table_states) {
    foreach my $row ($ts->rows) {
      my ($team) = $$row[0] =~ /\d+\)\s+(.*)\s*$/;
      next if ($team eq $lastteam);
      $rank++;
      print "$rank $team\n" if ($debug && $rank <= $num_teams);
      push(@teams, $team) if ($rank <= $num_teams);
      $lastteam = $team;
    }
  }
  return @teams;
}

sub ParseTeams {
  my @teams = @_;
 
  # @static will hold the static data
  # @dynamic will hold the dynamic data
  # using arrays until sorting complete
  my (@static, @dynamic);
  for my $num (0..$#teams) {
    sleep(15);
    my $name = $teams[$num]; # team name
    my $url = $seti{baseurl}.$seti{teamlookup}.$name;
    print "Team URL: '$url'\n" if $debug;
    my $file = GetURL($url);
    $file = FixXML($file) if $malformed_xml;
    WriteXML("$name.xml", $file) if $debug;
    my ($bmembers, $bresults, $members, $results) = ParseTeamXML($file
+); 
    push(@static, [$name, $bmembers, $bresults]);
    push(@dynamic, [$name, $members, $results]);
  }

  # sort on the results in @dynamic
  @dynamic = sort { $b->[2] <=> $a->[2] } @dynamic;

  # sorting complete, move the data into hashes for easier lookup
  my (%static, %dynamic);
  for my $num (0..$#teams) {
    my ($bname, $bmembers, $bresults) = @{$static[$num]};
    my ($name, $members, $results) = @{$dynamic[$num]};
    my $rank = $num + 1;
    $static{$bname} = {rank => $rank, members => $bmembers, results =>
+ $bresults};
    $dynamic{$name} = {rank => $rank, members => $members, results => 
+$results};
    $static{$num} = $bname;
    $dynamic{$num} = $name;
    InsertTeamValues($dbh, "bteams", $rank, $bname, $bmembers, $bresul
+ts);
    InsertTeamValues($dbh, "teams", $rank, $name, $members, $results);
  }

  return (\%static, \%dynamic);
}

# LWP subroutines

sub GetURL {
  my $url = shift;
  my $ua = new LWP::UserAgent(
    keep_alive => 1,
    timeout => 240,
  );
  $ua->agent('KWSNStats/0.4');
  my $file;
  my $pagelen;
  until ($file and $pagelen > 0) {
    sleep(15);
    print "Grabbing $url\n" if $debug;
    my $req = HTTP::Request->new(GET => $url);
    my $res = $ua->request($req);
    print $res->code, " ", $res->message,"\n" if $debug;
     if ($res->is_success) {
      $file = $res->content;
      $pagelen = length($file);
    } else {
      warn "Unable to get $url";
      $pagelen = -1
    }
    print "page length: $pagelen\n" if $debug;
  }
  return $file;
}

# XML subroutines

sub FixXML {
  # fix Berkeley's malformed XML and remove some unnecessary info
  my $xml = shift;
  $xml =~ s!<profile>.*?</profile>!!sg;
  $xml =~ s!<url>.*?</url>!!sg;
  $xml =~ s!<totalcpu>.*?</totalcpu>!!sg;
  $xml =~ s!<avecpu>.*?</avecpu>!!sg;
  $xml =~ s!<datelastresult>.*?</datelastresult>!!sg;
  $xml =~ s!<country>.*?</country>!!sg;
  $xml =~ s/\n+/\n/g;
  $xml =~ s/[|]/ /g; # turn chr(127) and chr(27) into spaces
  my @xml = split('\n', $xml);
  $xml = '';
  for my $line (@xml) {
    my ($name) = $line =~ m!<name>(.*)</name>!;
    if (defined $name) {
      print $name,$/ if $debug;
      encode_entities($name, '<&>³');
      $line = "<name>$name<\/name>";
    }
    $xml .= $line."\n";
  }
  $xml =~ s!&sup3;!3!g;
  return $xml;
}

sub ParseTeamXML {
  my $xml = shift;
  my $xs = new XML::Simple( keyattr => 'topmembers');
  my $ref = $xs->XMLin($xml);
  my $bresults = $$ref{numresults};
  my $bmembers = $$ref{nummembers};
  my $results = 0;
  my @members = @{ $ref->{topmembers}{member} };
  my $members = scalar @members;
  for my $member (@members) {
    $results += $$member{numresults};
  }
  if ($debug) {
    print "Berkeley Members: $bmembers\n";
    print "Berkeley Results: $bresults\n";
    print "XML Members: $members\n";
    print "XML Results: $results\n";
  }
  return ($bmembers, $bresults, $members, $results);
}

sub WriteXML {
  my $file = shift;
  my $xml = shift;
  print "Writing XML out to $file.\n";
  open(XML, '>', $file);
  print XML $xml;
  close(XML);
}

# DB subroutines

sub InsertTeamValues {
  my ($dbh, $table, $rank, $team, $members, $results) = @_;
  my $sth = $dbh->prepare("INSERT INTO $table (rank, team, members, re
+sults, datetime) VALUES (?,?,?,?,NOW())") or die $dbh->errstr;
   $sth->execute($rank, $team, $members, $results) or die $sth->errstr
+;
}

# FTP subroutines

sub UploadTeams {
  my ($host, $user, $pass, $dir, $file) = @_;
  my $ftp = Net::FTP->new($host, user => $user, pass => $pass, Debug =
+> $debug);
  $ftp->login($user, $pass);
  $ftp->cwd($dir);                       
  $ftp->put($file);
  $ftp->quit;
}


Template (top.tmpl)

<html>
<head>
<title>Sir Muskrat's Top <TMPL_VAR NAME=NUM_TEAMS> Seti@home Teams</ti
+tle>
<style type="text/css">
      body {       color: #FFFFFF;
        background-color: #000000; }
      td {         color: #FFFFFF; }
      td.green {   color: #00FF00; }
      td.red {     color: #FF0000; }
      a:link {     color: #CC99FF; }
      a:visited {  color: #CC99FF; }
</style>
</head>
<body bgcolor="#000000" text="#FFFFFF" link="#CC99FF" vlink="#CC99FF" 
+alink="#CC99FF">
<h1 align="center">Sir Muskrat's Top <TMPL_VAR NAME=NUM_TEAMS> Seti@ho
+me Teams</h1>
<p>Last updated: <TMPL_VAR NAME=TIME></p>
<p><table>
<tr><td>
<table border="1" align="center" valign="middle" cellspacing="2" cellp
+adding="2">
<caption>Static Top <TMPL_VAR NAME=NUM_TEAMS> Teams</caption>
<tr><th>Rank</th><th>Team Name</th><th>Members</th><th>Results</th></t
+r>
<TMPL_LOOP NAME=STEMP>
  <tr>
    <td><TMPL_VAR NAME=RANK></td>
    <td><TMPL_VAR NAME=NAME></td>
    <td><TMPL_VAR NAME=MEMBERS></td>
    <td><TMPL_VAR NAME=RESULTS></td>
  </tr>
</TMPL_LOOP>
</table></td>
<td><table border="1" align="center" valign="middle" cellspacing="2" c
+ellpadding="2">
<caption>Dynamic Top <TMPL_VAR NAME=NUM_TEAMS> Teams</caption>
<tr><th>Rank</th><th>Team Name</th><th>Members</th><th>Results</th></t
+r>
<TMPL_LOOP NAME=DTEMP>
  <tr>
    <td class='<TMPL_VAR NAME=C>'><TMPL_VAR NAME=RANK></td>
    <td><TMPL_VAR NAME=NAME></td>
    <td class='<TMPL_VAR NAME=CM>'><TMPL_VAR NAME=MEMBERS></td>
    <td class='<TMPL_VAR NAME=CR>'><TMPL_VAR NAME=RESULTS></td>
  </tr>
</TMPL_LOOP>
</table>
</td></tr>
</table></p>
<p>Static is calculated by Berkeley.  Dynamic is calculated by adding 
+up the results for all members.<br />
<font color="#ff0000">Red</font> means the dynamic number is lower tha
+n the static while <font color="#00ff00">green</font> indicates a hig
+her number.</p>
</body>
</html>