Beefy Boxes and Bandwidth Generously Provided by pair Networks
"be consistent"
 
PerlMonks  

How to access last capture groups outside regex using interpolation but w/o using re 'eval'?

by rsFalse (Chaplain)
on Jun 16, 2020 at 20:10 UTC ( [id://11118150]=perlquestion: print w/replies, xml ) Need Help??

rsFalse has asked for the wisdom of the Perl Monks concerning the following question:

Hello,

I want solve this problem using regexes:
Given a "sorted" target string and a sequence of sizes of groups of equal consecutive characters, find and output characters from a string which corresponds to each group.

I wrote a code, which glues smaller regexes to a bigger one, and uses re-eval ((?{ ... })). I use a "stack" array for saving captured characters, and use push/pop to manipulate it.
Also I used $^N variable interpolating it inside double-quotes. This fails to run without "use re 'eval'". How can I overcome it? Any alternatives to $^N for access a last captured group outside of regex?
Any ideas of alternative solutions for a given problem? Code:
#!/usr/bin/perl use warnings; use strict; use re 'eval'; $\ = $/; while(<DATA>){ print '-' x 15; chomp; my $target = $_; my @groups = split ' ', <DATA>; print "target:[$target]"; print "groups:[@groups]"; my @chars_seq; my $re = join " (?!\\g{-1}).*? \n", map { sprintf "(?: " . "( (.)\\g{-1}{%d} ) " . "(?{ push \@chars_seq, \$\^N; }) " . "(?: (?=) | (?{ pop \@chars_seq }) (*FAIL) ) " . "(?{ print join ' ', \@chars_seq; })" . ")" , $_ - 1 } @groups; print for "regex:[", $re, "]"; $re =~ s/\n//g; $target =~ / $re /x or print "FAIL!"; print "character sequence:[@chars_seq]"; } __DATA__ cbaa 1 2 cba 1 bbaa 1 2 cccqrrtaaa 2 2 2 cccqrrtaaa 1 1 2 2 cccqrrtaaa 1 1 1 1 3
OUTPUT:
--------------- target:[cbaa] groups:[1 2] regex:[ (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{1} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) ] c c aa character sequence:[c aa] --------------- target:[cba] groups:[1] regex:[ (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) ] c character sequence:[c] --------------- target:[bbaa] groups:[1 2] regex:[ (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{1} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) ] b b b aa character sequence:[b aa] --------------- target:[cccqrrtaaa] groups:[2 2 2] regex:[ (?: ( (.)\g{-1}{1} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{1} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{1} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) ] cc cc cc rr cc rr aa character sequence:[cc rr aa] --------------- target:[cccqrrtaaa] groups:[1 1 2 2] regex:[ (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{1} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{1} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) ] c c c c q c q rr c q rr aa character sequence:[c q rr aa] --------------- target:[cccqrrtaaa] groups:[1 1 1 1 3] regex:[ (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{0} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) (?!\g{-1}) +.*? (?: ( (.)\g{-1}{2} ) (?{ push @chars_seq, $^N; }) (?: (?=) | (?{ pop @ +chars_seq }) (*FAIL) ) (?{ print join ' ', @chars_seq; })) ] c c c c q c q r c q r c q r t c q r t aaa character sequence:[c q r t aaa]

Replies are listed 'Best First'.
Re: How to access last capture groups outside regex using interpolation but w/o using re 'eval'?
by tybalt89 (Monsignor) on Jun 16, 2020 at 23:24 UTC
    #!/usr/bin/perl use strict; # https://perlmonks.org/?node_id=11118150 use warnings; while( <DATA> ) { my @sizes = split ' ', <DATA>; my $regex = join '', map { "(?=(.))(\\g{-2}{$_})\\g{-2}*.*?" } @size +s; my @groups = ( /$regex/ )[ map 2 * $_ + 1, 0 .. $#sizes ]; print "[@groups]\n"; } __DATA__ cbaa 1 2 cba 1 bbaa 1 2 cccqrrtaaa 2 2 2 cccqrrtaaa 1 1 2 2 cccqrrtaaa 1 1 1 1 3

    Outputs:

    [c aa] [c] [b aa] [cc rr aa] [c q rr aa] [c q r t aaa]
      Thanks! Crafty solution!
Re: How to access last capture groups outside regex using interpolation but w/o using re 'eval'?
by AnomalousMonk (Archbishop) on Jun 16, 2020 at 22:16 UTC

    One way:

    c:\@Work\Perl\monks>perl -wMstrict -le "use 5.010; ;; use warnings; use strict; ;; use Test::More 'no_plan'; use Test::NoWarnings; ;; use Data::Dump qw(dd pp); ;; my @TESTS = ( [ '', [ 1, ], [ ], ], [ 'c', [ 1, ], [ qw(c) ], ], [ 'c', [ 2, ], [ ], ], [ 'cba', [ 1, ], [ qw(c) ], ], [ 'cbaa', [ 1, 2, ], [ qw(c aa) ], ], [ 'bbaa', [ 1, 2, ], [ qw(b aa) ], ], [ 'cccqrrtaaa', [ 1, 1, 1, 1, 3, ], [ qw(c q r t aaa) ], ], [ 'cccqrrtaaa', [ 2, 2, 2, ], [ qw(cc rr aa) ], ], [ 'cccqrrtaaa', [ 1, 1, 2, 2, ], [ qw(c q rr aa) ], ], ); ;; VECTOR: for my $ar_vector (@TESTS) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } ;; my ($string, $ar_groups, $ar_expected) = @$ar_vector; ;; my $rx = build_regex($ar_groups); ;; my @groups = extract_groups($string, $rx); ;; is_deeply \@groups, $ar_expected, qq{'$string' <- (@$ar_groups) -> (@$ar_expected)} } ;; done_testing; ;; exit; ;; ;; sub build_regex { my ($ar_groups) = @_; ;; my ($rx) = map qr{ $_ }xms, join ' .*? ', map qr{ ((.) \g-1{$_} (?! \g-1)) }xms, map $_-1, @$ar_groups ; ;; return $rx; } ;; sub extract_groups { my ($string, $rx) = @_; ;; my $p; return grep $p = !$p, $string =~ $rx ; } " ok 1 - '' <- (1) -> () ok 2 - 'c' <- (1) -> (c) ok 3 - 'c' <- (2) -> () ok 4 - 'cba' <- (1) -> (c) ok 5 - 'cbaa' <- (1 2) -> (c aa) ok 6 - 'bbaa' <- (1 2) -> (b aa) ok 7 - 'cccqrrtaaa' <- (1 1 1 1 3) -> (c q r t aaa) ok 8 - 'cccqrrtaaa' <- (2 2 2) -> (cc rr aa) ok 9 - 'cccqrrtaaa' <- (1 1 2 2) -> (c q rr aa) 1..9 ok 10 - no warnings 1..10
    Now let's see the rest of the specification. :)

    Update: This code needs Perl version 5.10+ only because it uses the \g{n} relative backreference.


    Give a man a fish:  <%-{-{-{-<

      Thanks! Nice simplification and improvement!

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: perlquestion [id://11118150]
Approved by marto
Front-paged by haukex
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others admiring the Monastery: (6)
As of 2024-03-28 16:23 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found