use HTML::Parser; my $inside_li = 0; my $list_item = ''; sub start { my ($tag, $text) = @_; if ($inside_li) { $list_item .= $text; return; } if ($tag eq 'li') { $inside_li = 1; } print $text; }; sub text { my ($text) = @_; if ($inside_li) { $list_item .= $text; return; } print $text; }; sub end { my ($tag, $text) = @_; if ($tag eq 'li') { $inside_li = 0; # do things to
  • content $list_item =~ s/^\s+//; print $list_item; $list_item = ''; } if ($inside_li) { $list_item .= $text; return; } print $text; }; my $parser = HTML::Parser->new( api_version => 3, start_h => [\&start, "tagname, text"], text_h => [\&text, "text"], end_h => [\&end, "tagname, text"], default_h => [\&text, "text"], ); $parser->parse_file(\*DATA);