http://qs321.pair.com?node_id=11113014


in reply to wiki regex reprocessing replacement

Here's my take. One thing I don't understand is the inclusion of  > < characters in the pre- and post-markup tag delimiters (update: e.g.,  my $pre  = qr/(^|\s|>)/; here), probably because I'm not familiar with wikisyntax. Can you link me to a discussion of the role of these characters? I prepared two versions, one using  (?(DEFINE) ...) and one based purely on  qr// interpolation. Maybe one is faster, but I haven't done any Benchmark-ing (nor am I likely to).

File W2H_1.pm:
# W2H_1.pm 15feb20waw package W2H_1; use 5.010; # need regex extensions use strict; use warnings; # use Data::Dump qw(pp dd); # for debug use constant DEBUG => 0; use constant { DBPR_rx => 0 && DEBUG, }; my %w2h_map = qw(* b / i _ u); # wiki markup -> HTML tag mapping my $mq_wiki_mark = quotemeta join '', keys %w2h_map; # meta-quoted wi +ki my $rx_wml = qr{ ((?&PRE_OPEN) (?&WIKI_MARK)) # $1: openin +g wikimark ((?: (?: (?: (?! \g-2 (?&POST_CLOSE)) .)*+) | (?R) )*) # $2: marked + body \g-2 (?&POST_CLOSE) # closing wi +kimark (?(DEFINE) (?<PRE_OPEN> (?<! [^>\s$mq_wiki_mark])) # after string start, > + or \s or wml (?<POST_CLOSE> (?! [^<\s$mq_wiki_mark])) # before string end, < + or \s or wml (?<WIKI_MARK> [\Q$mq_wiki_mark\E]) ) }xms; DBPR_rx and print $rx_wml, "\n"; # only used for development diagnostics my $rx_pre_open = qr{ (?<! [^\s>]) }xms; my $rx_post_close = qr{ (?! [^\s<]) }xms; my $rx_wiki_mark = qr{ [$mq_wiki_mark] }xms; # only used for development diagnostics die q{bad '\A wm' match} unless '_' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad '\s wm' match} unless ' _' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad '> wm' match} unless '>_' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad 'wm \z' match} unless '_' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; die q{bad 'wm \s' match} unless '_ ' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; die q{bad 'wm <' match} unless '_<' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; # subroutines ###################################################### sub w2h { # not exported; invoke fully-qualified my ($wiki, # string possibly containing wiki markup ) = @_; (my $html = $wiki) =~ s{ $rx_wml # captures to $1 $2 } { my ($html_tag, $body) = ($w2h_map{$1}, w2h($2)); "<$html_tag>$body</$html_tag>"; }xmsge; return $html; } 1;
File W2H_2.pm:
# W2H_2.pm 15feb20waw package W2H_2; use 5.010; # need regex extensions use strict; use warnings; # use Data::Dump qw(pp dd); # for debug use constant DEBUG => 0; use constant { DBPR_rx => 0 && DEBUG, }; my %w2h_map = qw(* b / i _ u); # wiki markup -> HTML tag mapping my $mq_wiki_mark = quotemeta join '', keys %w2h_map; # meta-quoted wi +ki my $rx_pre_open = qr{ (?<! [^>\s$mq_wiki_mark]) }xms; my $rx_post_close = qr{ (?! [^<\s$mq_wiki_mark]) }xms; my $rx_wiki_mark = qr{ [$mq_wiki_mark] }xms; my $rx_wml = qr{ ($rx_pre_open $rx_wiki_mark) # $1: openin +g wikimark ((?: (?: (?: (?! \g-2 $rx_post_close) .)*+) | (?R) )*) # $2: marked + body \g-2 $rx_post_close # closing wi +kimark }xms; DBPR_rx and print $rx_wml, "\n"; # only used for development diagnostics die q{bad '\A wm' match} unless '_' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad '\s wm' match} unless ' _' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad '> wm' match} unless '>_' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad 'wm \z' match} unless '_' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; die q{bad 'wm \s' match} unless '_ ' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; die q{bad 'wm <' match} unless '_<' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; # subroutines ###################################################### sub w2h { # not exported; invoke fully-qualified my ($wiki, # string possibly containing wiki markup ) = @_; (my $html = $wiki) =~ s{ $rx_wml # captures to $1 $2 } { my ($html_tag, $body) = ($w2h_map{$1}, w2h($2)); "<$html_tag>$body</$html_tag>"; }xmsge; return $html; } 1;
File W2H.t:
# W2H.t 15feb20waw use strict; use warnings; # use Data::Dump qw(pp dd); # for debug use Test::More 'no_plan'; use Test::NoWarnings; BEGIN { use_ok 'W2H_1'; use_ok 'W2H_2'; } use constant DEBUG => 1; use constant { DBPR_m_d1 => 0 && DEBUG, }; my @Tests = ( [ '_abcd_' => '<u>abcd</u>', ], [ '_/abcd/_' => '<u><i>abcd</i></u>', ], [ '*_/abcd/_*' => '<b><u><i>abcd</i></u></b>', ], [ '*_/a*b_c/d/_*' => '<b><u><i>a*b_c/d</i></u></b>', ], [ ' *_/a*b_c/d/_* ' => ' <b><u><i>a*b_c/d</i></u></b> ', ], [ '_abc__def_' => '<u>abc</u><u>def</u>', ], [ '_abc__def_*ghi**jkl*/mno//pqr/' => '<u>abc</u><u>def</u><b>ghi</b><b>jkl</b><i>mno</i><i>pqr</i>', ], [ ' _abc__def_*ghi**jkl*/mno//pqr/ ' => ' <u>abc</u><u>def</u><b>ghi</b><b>jkl</b><i>mno</i><i>pqr</i> ', ], [ '_abc_ _def_ *ghi* *jkl* /mno/ /pqr/' => '<u>abc</u> <u>def</u> <b>ghi</b> <b>jkl</b> <i>mno</i> <i>pqr</i> +', ], [ '_abc_ /def/' => '<u>abc</u> <i>def</i>', ], [ ' _abc_ /def/ ' => ' <u>abc</u> <i>def</i> ', ], [ 'x _abc_ /def/ x' => 'x <u>abc</u> <i>def</i> x', ], [ '_abc /xyz/ cba_ /def/' => '<u>abc <i>xyz</i> cba</u> <i>def</i>', + ], [ '_/one *t/w*o*/ th/r_ee_ null' => '<u><i>one <b>t/w*o</b></i> th/r +_ee</u> null', ], [ '_/one *two*/ th/ree_ null _/f*ur *five*/ six_ null _/se_ven *eig +ht*/ nine_ *fail_', => '<u><i>one <b>two</b></i> th/ree</u> null <u><i>f*ur <b>five</b></ +i> six</u> null <u><i>se_ven <b>eight</b></i> nine</u> *fail_', 'from pm#11112991' ], ); # testing, testing... FUNT: for my $func_name ( 'W2H_1::w2h', 'W2H_2::w2h', ) { note "\n=== testing $func_name() ===\n\n"; *w2h = do { no strict 'refs'; *$func_name; }; VECTOR: for my $ar_vector (@Tests) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } my ($wiki, $expected, $comment) = @$ar_vector; $comment = defined $comment ? "$comment: " : ''; my $got = w2h($wiki); DBPR_m_d1 and diag ":$wiki:"; DBPR_m_d1 and diag ":$got:"; is $got, $expected, "${comment}'$wiki' -> '$expected'"; } # end for VECTOR } # end for FUNT done_testing; exit; # subroutines ###################################################### # none for now


Give a man a fish:  <%-{-{-{-<