See HTML::TokeParser::Simple:
#!/usr/bin/perl
use strict;
use warnings;
use HTML::TokeParser::Simple;
my $open = 0;
my $match = 'Globalization';
my $uri = 'http://example.com/glossary?globalization';
my $p = HTML::TokeParser::Simple->new(*DATA);
while (my $t = $p->get_token) {
if ($t->is_start_tag('a')) {
$open++;
print $t->as_is;
}
elsif ($t->is_end_tag('a')) {
$open--;
print $t->as_is;
}
elsif ($t->is_text) {
my $text = $t->as_is;
if ($text =~ /$match/) {
if (not $open) {
my $href = qq{<a href="$uri">$match</a>};
$text =~ s/$match/$href/;
print $text;
} else {
print $text;
}
} else {
print $text;
}
} else {
print $t->as_is;
}
}
__DATA__
Oh no, <a href="/encyclopedia/Anti-Globalization/index.html">Anti-glob
+alization</a> activists are coming! Globalization is rejected by...
which prints:
Oh no, <a href="/encyclopedia/Anti-Globalization/index.html">Anti-glob
+alization</a> activists are coming!
<a href="http://example.com/glossary?globalization">Globalization</a>
+is rejected by...
This should be enough to get you started :)