use warnings;
use strict;
use Data::Dump;
use Web::Scraper::LibXML;
my $scraper = scraper {
process '//p[contains(@class, "myClass")]',
'paragraph' => 'TEXT';
};
dd $scraper->scrape(\<<'END_HTML');
<p class="myClass">Blah
Blah
Blah
</p>
END_HTML
__END__
{ paragraph => "Blah\n\nBlah\n\nBlah\n" }
Or, monkey-patching Web::Scraper::build_tree:
*Web::Scraper::build_tree = sub {
my($self, $html) = @_;
my $t = HTML::TreeBuilder::XPath->new;
$t->store_comments(1) if ($t->can('store_comments'));
$t->no_space_compacting(1);
$t->ignore_unknown(0);
$t->parse($html);
$t->eof;
$t;
};
|