I agree. This uses HTML::TokeParser. I have found that it is easily adaptable to do any chore you may have parsing html. Since I've started using it I've never used a regex on html. It's never worth the effort.
#!/bin/perl5
use strict;
use warnings;
use HTML::TokeParser;
open HTML_FILE, 'form.html' or die;
my $tp = HTML::TokeParser->new( \*HTML_FILE ) or die;
my $html;
my $found_form = 0;
while ( my $t = $tp->get_token ) {
$found_form++, next if $t->[0] eq 'S' and $t->[1] eq 'form';
$found_form--, next if $t->[0] eq 'E' and $t->[1] eq 'form';
next if $found_form;
$html .= $t->[4] if $t->[0] eq 'S';
$html .= $t->[1] if $t->[0] eq 'T' or $t->[0] eq 'C';
$html .= $t->[2] if $t->[0] eq 'E';
}
close HTML_FILE;
print "$html\n";
# ["S", $t, $attr, $attrseq, $text]
# ["E", $t, $text]
# ["T", $text, $is_data]
# ["C", $text]
# ["D", $text]
# ["PI", $token0, $text]
wfsp |