If I understand your input data, then the code below contains two working solutions.
The 'tagsub' solution is very similar to Joost's concept.
Even on an old 200MHz box, either solution seems quite fast; perhaps I misunderstood your issue with Joost's solution:
- 5500 tags/sec - taglist
- 4800 tags/sec - tagsub
#!/usr/bin/perl -w
use strict;
my $data = <<'EOF';
[url="http://www.foo.bar.com"]foobar[/url]
[url]http://www.foo.bar.com[/url]
[img]http://example.com/prettylady.gif[/img]
[img="http://example.com/prettylady.gif"]Pretty Lady[/img]
[email="bruce.gray@acm.org"]Util[/email]
[b]bold[/b]
[i]ital[/i]
[u]under[/u]
[em]emph[/em]
[strong]smelly[/strong]
EOF
my %taglist = (
'' => { # Simple
b => sub { qq{<b>$_[1]</b>} },
i => sub { qq{<i>$_[1]</i>} },
u => sub { qq{<u>$_[1]</u>} },
em => sub { qq{<em>$_[1]</em>} },
strong => sub { qq{<strong>$_[1]</strong>} },
url => sub { qq{<a href="$_[1]">$_[1]</a>} },
img => sub { qq{<img src="$_[1]" alt="$_[1]" />} },
},
'=' => { # Complex
email => sub { qq{<a href="mailto:$_[0]">$_[1]</a>} },
url => sub { qq{<a href="$_[0]">$_[1]</a>} },
img => sub { qq{<img src="$_[0]" alt="$_[1]" />} },
},
);
sub tagsub {
my $tag = shift;
my $equals_sign = shift;
if ( not $equals_sign ) { # Simple
return qq{<b>$_[1]</b>} if $tag eq 'b';
return qq{<i>$_[1]</i>} if $tag eq 'i';
return qq{<u>$_[1]</u>} if $tag eq 'u';
return qq{<em>$_[1]</em>} if $tag eq 'em';
return qq{<strong>$_[1]</strong>} if $tag eq 'strong';
return qq{<a href="$_[1]">$_[1]</a>} if $tag eq 'url';
return qq{<img src="$_[1]" alt="$_[1]" />} if $tag eq 'img';
} else { # Complex
return qq{<a href="mailto:$_[0]">$_[1]</a>} if $tag eq 'email';
return qq{<a href="$_[0]">$_[1]</a>} if $tag eq 'url';
return qq{<img src="$_[0]" alt="$_[1]" />} if $tag eq 'img';
}
}
my $pat = qr{\[(\w+)(=?)['"]?([^\]]*?)['"]?\](.+?)\[/\1\]};
$_ = $data;
s/$pat/$taglist{$2}{$1}->($3,$4)/esg;
print "Run # 1:\n$_\n";
$_ = $data;
s/$pat/tagsub($1,$2,$3,$4)/esg;
print "Run # 2:\n$_\n";
# Boost size of $data to 10240 lines.
#$data .= $data for 1 .. 10;
#
#use Benchmark qw(cmpthese);
#cmpthese(
# -300,
# {
# hash_sub => sub{
# $_ = $data;
# s/$pat/$taglist{$2}{$1}->($3,$4)/esg;
# },
# one_sub => sub{
# $_ = $data;
# s/$pat/tagsub($1,$2,$3,$4)/esg;
# },
# }
#);