use strict; use warnings; use HTML::Parser; my @html = (< One Five Seven EOT , < One Five */ console.log(' Six '); /* ]]>*/ Eight ]]> EOT ); my $state = 0; my $p = HTML::Parser->new ( api_version => 3, start_h => [ sub { shift eq 'a' or return; my $href = shift->{href} or return; $state = 1; print "$href\t"; shift->handler (text => sub { print trim(shift); }, 'dtext, self'); }, 'tagname, attr, self'], end_h => [ sub { return unless shift eq 'a' && $state; $state = 0; print "\n"; shift->handler (text => ''); }, 'tagname, self'], ); print "HTML:\n"; $p->parse ($html[0]); print "XHTML:\n"; $p->xml_mode (1); $p->marked_sections (1); $p->parse ($html[1]); sub trim { (my $str = shift) =~ s/^\s+|\s+$//g; return $str; }