package Filter;
use strict;
use base 'HTML::Parser';
my ($filter, $want_it);
my @ok_tags = qw ( h1 h2 h3 h4 p br );
my %ok_tags;
$ok_tags{$_}++ for @ok_tags;
sub start {
my ($self, $tag, $attr, $attrseq, $origtext) = @_;
if ( exists $ok_tags{$tag}) {
$filter .= $origtext;
$want_it = 1;
} else {
$want_it = 0;
}
}
sub text {
my ($self, $text) = @_;
$filter .= $text if $want_it;
}
sub comment {
# uncomment to no strip comments
# my ($self, $comment) = @_;
# $filter .= "";
}
sub end {
my ($self, $tag, $origtext) = @_;
$filter .= $origtext if exists $ok_tags{$tag};
}
my $parser = new Filter;
my $html = join '', ;
$parser->parse($html);
$parser->eof;
print $html;
print "\n\n------------------------\n\n";
print $filter;
__DATA__
Title
Hello Parser
You need HTML::Parser
Parser rocks!
html.parser.com
use HTML::Parser;