Every time I invoke HTML::TableExtract's
parse() method, it doesn't re-initialise the object, it appends to the object. See example below.
I want to use it on a new table every time (iterating through a paged website with a scraper), and it doesn't make sense to keep the previous table.
My workaround is to just re-initialise the object with new(), but that feels wrong. I've read through the POD for TableExtract and and I'm baffled. There doesn't seem to be a preference for this behaviour and there doesn't seem to be a method to re-initialise the object either in TableExtract or HTML::Parser.
use strict;
use warnings;
use diagnostics;
use HTML::TableExtract;
my $table_1 = '
<table><tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>quux</td></tr></table>';
my $table_2 = '
<table><tr><td>bof</td><td>xyzzy</td></tr>
<tr><td>bat</td><td>gazonk</td></tr></table>';
my $te = HTML::TableExtract->new();
$te->parse($table_1);
foreach my $ts ($te->tables) {
print "Table (", join(',', $ts->coords), "):\n";
foreach my $row ($ts->rows) {
print join(',', @$row), "\n";
}
}
## what goes here if I want to dump table_1 ?
$te->parse($table_2);
foreach my $ts ($te->tables) {
print "Table (", join(',', $ts->coords), "):\n";
foreach my $row ($ts->rows) {
print join(',', @$row), "\n";
}
}
($_='kkvvttuu bbooppuuiiffss qqffssmm iibbddllffss')
=~y~b-v~a-z~s; print