$xml =~ s{(?:()|(<[^\s?!>]+))}{ $1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} }seg; # or ... sligthly more efficient for XMLs with most tags without the dots $xml =~ s{(?:()|(<[^\s?!>\.]+\.[^\s?!>]+))}{ $1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} }seg; # and now even more efficient thanks to moving the < outside the or $xml =~ s{<(?:(!\[CDATA\[.*?\]\]>)|(<[^\s?!>\.]+\.[^\s?!>]+))}{ '<'. ($1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} ) }seg; #### my $XML = <<'__XML__'; BLAH ]]> STUFF T BLAH BLAH ]]> __XML__ use Benchmark; sub one { my $xml = $XML; $xml =~ s{(?:()|(<[^\s?!>]+))}{ $1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} }seg; $xml =~ s{(?:()|(<[^\s?!>]+))}{ $1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} }seg; $xml =~ s{(?:()|(<[^\s?!>]+))}{ $1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} }seg; return; } sub two { my $xml = $XML; $xml =~ s{(?:()|(<[^\s?!>\.]+\.[^\s?!>]+))}{ $1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} }seg; $xml =~ s{(?:()|(<[^\s?!>\.]+\.[^\s?!>]+))}{ $1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} }seg; $xml =~ s{(?:()|(<[^\s?!>\.]+\.[^\s?!>]+))}{ $1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} }seg; return; } sub three { my $xml = $XML; $xml =~ s{<(?:(!\[CDATA\[.*?\]\]>)|(<[^\s?!>\.]+\.[^\s?!>]+))}{ '<'. ($1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} ) }seg; $xml =~ s{<(?:(!\[CDATA\[.*?\]\]>)|(<[^\s?!>\.]+\.[^\s?!>]+))}{ '<'. ($1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} ) }seg; $xml =~ s{<(?:(!\[CDATA\[.*?\]\]>)|(<[^\s?!>\.]+\.[^\s?!>]+))}{ '<'. ($1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} ) }seg; return; } sub four { my $xml = $XML; $xml =~ s{<(?:(!\[CDATA\[.*?\]\]>)|(<[^\s?!>]+))}{ '<'. ($1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} ) }seg; $xml =~ s{<(?:(!\[CDATA\[.*?\]\]>)|(<[^\s?!>]+))}{ '<'. ($1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} ) }seg; $xml =~ s{<(?:(!\[CDATA\[.*?\]\]>)|(<[^\s?!>]+))}{ '<'. ($1 or do {(my $tagname = $2 ) =~ tr/.//d;$tagname;} ) }seg; return; } timethese( 100000, { one => \&one, two => \&two, three => \&three, four => \&four, }); __END__ Benchmark: timing 100000 iterations of four, one, three, two... four: 2 wallclock secs ( 1.89 usr + 0.00 sys = 1.89 CPU) @ 52882.07/s (n=100000) one: 10 wallclock secs ( 9.53 usr + 0.00 sys = 9.53 CPU) @ 10492.08/s (n=100000) three: 2 wallclock secs ( 1.86 usr + 0.00 sys = 1.86 CPU) @ 53792.36/s (n=100000) two: 6 wallclock secs ( 6.27 usr + 0.00 sys = 6.27 CPU) @ 15959.14/s (n=100000)