Well then, it's something like below that might fit.
#!/usr/bin/perl
use strict;
use warnings;
use Benchmark;
my %entities = ();
my $counter = 10000;
while ($counter-- > 0) {
$entities{ '&ent' . sprintf( '%05d', $counter ) . ';' } = $counter;
}
my $text_to_be_changed = <<EOT;
This is some text containing five (&ent00005;) entities that
will be changed:
&ent00029;, &ent00129;, &ent00229;, &ent00329; and &ent00429;
The pseudo_entities below should rest unchanged:
\&dent00029;, \&dont_change;, \& qwerty ;, 12345;.
EOT
timethese( 1000, {
'with_splitting' => \&with_splitting,
'regexish' => \®exish,
});
exit;
sub with_splitting {
my @modified_parts = ();
my @split_on_semicolon = split( /;/, $text_to_be_changed);
foreach my $ending_in_semicolon (@split_on_semicolon) {
if ( $ending_in_semicolon =~ m/(&\w+)$/
and exists( $entities{ "$1;"} ) ) {
$ending_in_semicolon =~ s/(&\w+)$/$entities{ "$1;" }/;
}
else {
$ending_in_semicolon .= ';' ;
}
push( @modified_parts, $ending_in_semicolon );
}
my $result = join( '', @modified_parts );
#print "RESULT: \n", $result;
}
sub regexish {
my $huge_entity = join '|', keys %entities;
$text_to_be_changed =~ s/($huge_entity)/$entities{$1}/g;
#print "RES2: \n", $text_to_be_changed;
}