..tried to use HTML::Parser, but it was ended up pretty ugly,
What didn't you like with using HTML::Parser ?
#!/usr/bin/perl
use warnings;
use strict;
use HTML::Parser;
my %inside = ();
my $tbl = -1; my $col; my $row;
my @table = ();
my $p = HTML::Parser->new(
handlers => {
start => [ \&start,'tagname' ],
end => [ \&end, 'tagname' ],
text => [ \&text, 'text' ],
}
);
$p->parse_file(\*DATA); # or filename
# output
for my $t (0..$#table){
print "\nTable $t\n";
for my $r (0..$#{$table[$t]}){
my $line = join "\t",$r,@{$table[$t][$r]};
print "$line\n";
}
}
sub start {
my $tag = shift;
$inside{$tag} = 1;
if ($tag eq 'table'){
++$tbl; $row = -1;
} elsif ($tag eq 'tr'){
++$row; $col = -1;
} elsif ($tag eq 'th'){
++$col;
$table[$tbl][$row][$col] = ''; # or undef
}
}
sub end {
my $tag = shift;
$inside{$tag} = 0;
}
sub text {
my $str = shift;
if ( $inside{'th'} ){
$table[$tbl][$row][$col] = $str;
}
}
__DATA__
</table></body><body bgcolor="black"><h1>
Summary</h1><table border="1"><tr><th>Employee A</th><th>-0.82</th>
</tr><tr><th>Employee B</th><th>-5.02</th>
</tr><tr><th>Employee C</th><th>19</th>
</tr></table></body><body bgcolor="black"><h1>
Summary</h1><table border="1"><tr><th>Employee A</th><th></th>
</tr><tr><th>Employee B</th><th></th>
</tr><tr><th>Employee C</th><th></th>
poj