use strict;
use warnings;
use HTML::Parser;
my @html = (<
One
Five
Seven
EOT
, <
One
Five
*/ console.log(' Six '); /*
]]>*/
Eight
]]>
EOT
);
my $state = 0;
my $p = HTML::Parser->new (
api_version => 3,
start_h => [ sub {
shift eq 'a' or return;
my $href = shift->{href} or return;
$state = 1;
print "$href\t";
shift->handler (text => sub {
print trim(shift);
}, 'dtext, self');
}, 'tagname, attr, self'],
end_h => [ sub {
return unless shift eq 'a' && $state;
$state = 0;
print "\n";
shift->handler (text => '');
}, 'tagname, self'],
);
print "HTML:\n";
$p->parse ($html[0]);
print "XHTML:\n";
$p->xml_mode (1);
$p->marked_sections (1);
$p->parse ($html[1]);
sub trim {
(my $str = shift) =~ s/^\s+|\s+$//g;
return $str;
}