#!/usr/bin/perl -w
use strict;
use HTML::Parser;
my $parser = HTML::Parser->new(
start_h => [ \&_starttag, 'self, tagname, attr' ],
end_h => [ \&_endtag, 'self, tagname' ],
text_h => [ \&_text, 'self, dtext' ]
);
my @chunks;
$parser->parse_file(\*DATA);
print "----------\n$_\n----------\n\n" for @chunks;
sub _starttag {
my ($self, $tag, $attr) = @_;
$self->{'_pre'} = 1 if ($tag eq 'pre');
}
sub _endtag {
my ($self, $tag) = @_;
$self->{'_pre'} = undef if ($tag eq 'pre');
}
sub _text {
my ($self, $dtext) = @_;
$dtext =~ s/\A\s+//;
$dtext =~ s/\s+\z//;
return() unless ( length($dtext) > 0 and $dtext =~ /[^\s]/ );
if ( defined($self->{'_pre'}) ) {
push(@chunks, "PRE: $dtext");
}
else {
push(@chunks, "TEXT: $dtext");
}
}
__END__
<p>This is a bad try to display text then code
<pre>#! usr/bin/perl
use strict;
use warnings;
print "Hello World!";</pre>
and then plain text again</p>