use Text::Balanced qw(extract_multiple gen_extract_tagged); my $codes = qr(A|ABBR|ACRONYM|B|BIG|CITE|CODE|DFN|EM|I|KBD|SAMP|SMALL|SPAN|STRONG|SUB|SUP|TT|VAR); my $extractor = [ gen_extract_tagged('', ''), gen_extract_tagged("$codes<", '>', '') ]; # Join all plain text segments, and element substitutions; removing comments sub inline { my $text = shift; my $result = ''; for (extract_multiple $text, $extractor) { $_ = element(lc $1,$2) if /^($codes)<(.*)>$/; $result .= $_ unless (/^';