Just for fun :)
It only takes one s/// (with a little extra state)
#!/usr/bin/perl
use strict; # https://perlmonks.org/?node_id=11144309
use warnings;
my $str = 'word1 <tag0> word2 <tag1>word3 word4</tag1> word5 </tag0> w
+ord6 <tag2>word7 word8</tag2>word9 <tag3>word10</tag3> word11';
print parsestringwithtags( $str, 'tag0', 'tag1' ), "\n";
print parsestringwithtags( $str, 'tag3' ), "\n";
print parsestringwithtags( $str, 'tag1', 'tag2', 'tag3' ), "\n";
sub parsestringwithtags
{
local $_ = shift;
my %tags = map { $_, 1 } @_;
my $active = 1;
my @state = ['', 1];
s{ <(/?)(\w+)> | ([^<>]+) | ([<>]) }{
$4
? die "rogue angle bracket $4 at $-[4]"
: $2 # tag
? $1 # it is an end tag
? do { $state[-1][0] eq $2 # matches current tag
? do { $active = (pop @state)->[1]; '' }
: die "mismatched tags $state[-1][0] vs $2" }
: do { push @state, [$2, $active]; $tags{$2} or $active = 0;
+ '' }
: $3 x $active # non-tag only if active
}gex;
@state > 1 and die "missing close tag for $state[-1][0]";
return $_;
}
Outputs:
word1 word2 word3 word4 word5 word6 word9 word11
word1 word6 word9 word10 word11
word1 word6 word7 word8word9 word10 word11