use Text::Balanced qw(extract_multiple gen_extract_tagged); my $codes = qr(A|ABBR|ACRONYM|B|BIG|CITE|CODE|DFN|EM|I|KBD|SAMP|SMALL|SPAN|STRONG|SUB|SUP|TT|VAR); my $extractor = [ gen_extract_tagged('', ''), gen_extract_tagged("$codes<", '>', '') ]; # Join all plain text segments, and element substitutions; removing comments sub inline { my $text = shift; my $result = ''; for (extract_multiple $text, $extractor) { $_ = element(lc $1,$2) if /^($codes)<(.*)>$/; $result .= $_ unless (/^'; #### 'Anyone who watches the Syfy channel knows that on Monday nights they aired three television series ' 'Ika|href="Movies_by_series.pl?series=EWA#EUReKA">|class="title">' ', ' 'I>' ', and ' 'I>' '. Some might not be aware that these three series have formed a crossover cosmology which I call ' 'A' ''