my $font_tag_match = m{ # Tags in pairs like content \< \s* (+) [ \s*<[a-z:]>* \s* = \s* [ ' <[^']>* ' | " <[^"]>* " ] ]* \s* \> [ <[^<>]>* | ]* \< \s* / \s* font \s* \> }x; # remove all text that is not between a pair of tags.. $input_data =~ s/^$font_tag_match//mg; #### # Under Construction