#!/usr/bin/perl -w use LWP::Simple; use HTML::TableExtract; my $html_report; # replace this with LWP::Simple get() or somesuch # for fetching main report open(F, "); close(F); foreach my $row (rows_from_main_report($html_report)) { next unless $row->[1] =~ /failed/i; my($link) = $row->[2] =~ /href\s*\=\s*\"?([^\"]+)/; unless ($link) { print STDERR "no link from row ($row->[2])\n"; next; } print "$link\n"; my $html = get($link); unless ($html) { print STDERR "no html from link $link\n"; next; } print "$link\n"; foreach my $row (rows_from_fail_report($html)) { # do whatever here print join(' : ', @$row), "\n"; } } sub rows_from_main_report { my $html = shift || die "HTML string required\n"; my $te = HTML::TableExtract->new( headers => [qw(computer data time)], keep_html => 1, ); $te->parse($html_report); my $ts = $te->first_table_state_found; $ts->rows; } sub rows_from_fail_report { my $html = shift || die "HTML string required\n"; my $te = HTML::TableExtract->new( headers => [qw(job date client class schedule master desc)], ); $te->parse($html); my $ts = $te->first_table_state_found; $ts->rows; }