#!/usr/bin/perl
use warnings;
use strict;
use HTML::Parser;
my %inside = ();
my $tbl = -1; my $col; my $row;
my @table = ();
my $p = HTML::Parser->new(
handlers => {
start => [ \&start,'tagname' ],
end => [ \&end, 'tagname' ],
text => [ \&text, 'text' ],
}
);
$p->parse_file(\*DATA); # or filename
# output
for my $t (0..$#table){
print "\nTable $t\n";
for my $r (0..$#{$table[$t]}){
my $line = join "\t",$r,@{$table[$t][$r]};
print "$line\n";
}
}
sub start {
my $tag = shift;
$inside{$tag} = 1;
if ($tag eq 'table'){
++$tbl; $row = -1;
} elsif ($tag eq 'tr'){
++$row; $col = -1;
} elsif ($tag eq 'th'){
++$col;
$table[$tbl][$row][$col] = ''; # or undef
}
}
sub end {
my $tag = shift;
$inside{$tag} = 0;
}
sub text {
my $str = shift;
if ( $inside{'th'} ){
$table[$tbl][$row][$col] = $str;
}
}
__DATA__
Summary
Employee A | -0.82 |
---|
Employee B | -5.02 |
---|
Employee C | 19 |
---|
Summary
Employee A | |
---|
Employee B | |
---|
Employee C | |
---|