Re: Need to speed up many regex substitutions and somehow make them a here-doc list

The dynamic alternation building technique described here and exemplified here is good for handling multiple stringA -> stringB replacement, but for an application such as you seem to have, multiple regex to string replacement, it's not a general solution. Here's something that may address your needs more closely. As always, the fine details of regex definition are critical. I still have no idea as to relative speed :)

Win8 Strawberry 5.8.9.5 (32)  Sat 10/01/2022 19:44:07
C:\@Work\Perl\monks
>perl

use strict;
use warnings;

use Data::Dump qw(dd);  # for debug

use constant IGNORE => qr{ \A \s* (?: [#] .*)? \z }xms;

my $text = <<'TEXT';
Regexes have been tAlKed about as being abstract for
1000's of years.
TEXT
print "before ---$text--- \n";

my $regex_replacement_string = <<'REGEX';
a 'A'

i 'I'
# comment line w/o leading spaces
e 'E' # optional entry comment
^ [0-9] .*? \s  ''# optional comment
   # comment line with leading spaces
(?i) \S*? talk \S* \s  ' SPOKEN '
REGEX

my ($rx_search, @replacelist) = build_search($regex_replacement_string
+);
dd $rx_search;     # for debug
dd \@replacelist;  # for debug
print "\n";        # for debug

$text =~ s{ $rx_search }{$replacelist[$^R]}xmsg;
print "after +++$text+++ \n";

sub build_search {

    my ($rx_replace_string,
        ) = @_;

    my $rx_sq_body        = qr{ [^\\']* (?: \\. [^\\']*)* }xms;
    my $rx_comment_to_eol = qr{ [#] .* }xms;

    my @regexes;
    my @replacements;

    use re 'eval';

    my @regexlist = split qr{ \s* \n }xms, $rx_replace_string;

    REGEX_REPLACEMENT:
    for my $rx_replace (@regexlist) {

        next REGEX_REPLACEMENT if $rx_replace =~ IGNORE;

        my $got_rx_replace =
        my ($rx, $replace) =
        $rx_replace =~ m{
            \A \s*
            (.*?)             \s*  # everything before '-pair is regex
            ' ($rx_sq_body) ' \s*  # capture body of '-pair
            $rx_comment_to_eol?    # ignore optional trailing comment
            \s* \z
            }xms;

        die "bad regex/replacement '$rx_replace'" unless $got_rx_repla
+ce;

        my $n = @regexes;
        $rx = "$rx (?{ $n })";
        push @regexes, qr{ $rx }xms;
        push @replacements, $replace;

        }  # end for REGEX_REPLACEMENT

    my ($rx_combined) = map qr{ $_ }xms, join ' | ', @regexes;

    return $rx_combined, @replacements;

    }  # end sub build_search()

^Z
before ---Regexes have been tAlKed about as being abstract for
1000's of years.
---
qr/ (?msx-i: a (?{ 0 }) ) | (?msx-i: i (?{ 1 }) ) | (?msx-i: e (?{ 2 }
+) ) | (?msx-i: ^ [0-9] .*? \s
(?{ 3 }) ) | (?msx-i: (?i) \S*? talk \S* \s (?{ 4 }) ) /msx
["A", "I", "E", "", " SPOKEN "]

after +++REgExEs hAvE bEEn  SPOKEN About As bEIng AbstrAct for
of yEArs.
+++
[download]

Give a man a fish: <%-{-{-{-<

Comment on Re: Need to speed up many regex substitutions and somehow make them a here-doc list Select or Download Code


good chemistry is complicated, and a little bit messy -LW
	PerlMonks