use HTML::TokeParser::Simple;
# assumes that $text is a scalar containing the actual HTML
my $p = HTML::TokeParser::Simple->new( \$text );
my $token;
do { $token = $p->get_token } until $token->is_start_tag('p');
my $new_text = $token->return_text;
do ( $token = $p->get_token ) {
my $temp = $token->return_text;
if ( $token->is_text ) {
$temp =~ s/\s+/ /g; # collapse whitespace
$temp =~ s/^\s//; # remove initial whitespace
$temp =~ s/\s$//; # remove trailing whitespace
}
$new_text .= $temp;
} until $token->is_end_tag('p');
$new_text .= $token->return_text;