Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

comment on

( #3333=superdoc: print w/replies, xml ) Need Help??

Here's my take. One thing I don't understand is the inclusion of  > < characters in the pre- and post-markup tag delimiters (update: e.g.,  my $pre  = qr/(^|\s|>)/; here), probably because I'm not familiar with wikisyntax. Can you link me to a discussion of the role of these characters? I prepared two versions, one using  (?(DEFINE) ...) and one based purely on  qr// interpolation. Maybe one is faster, but I haven't done any Benchmark-ing (nor am I likely to).

File W2H_1.pm:
# W2H_1.pm 15feb20waw package W2H_1; use 5.010; # need regex extensions use strict; use warnings; # use Data::Dump qw(pp dd); # for debug use constant DEBUG => 0; use constant { DBPR_rx => 0 && DEBUG, }; my %w2h_map = qw(* b / i _ u); # wiki markup -> HTML tag mapping my $mq_wiki_mark = quotemeta join '', keys %w2h_map; # meta-quoted wi +ki my $rx_wml = qr{ ((?&PRE_OPEN) (?&WIKI_MARK)) # $1: openin +g wikimark ((?: (?: (?: (?! \g-2 (?&POST_CLOSE)) .)*+) | (?R) )*) # $2: marked + body \g-2 (?&POST_CLOSE) # closing wi +kimark (?(DEFINE) (?<PRE_OPEN> (?<! [^>\s$mq_wiki_mark])) # after string start, > + or \s or wml (?<POST_CLOSE> (?! [^<\s$mq_wiki_mark])) # before string end, < + or \s or wml (?<WIKI_MARK> [\Q$mq_wiki_mark\E]) ) }xms; DBPR_rx and print $rx_wml, "\n"; # only used for development diagnostics my $rx_pre_open = qr{ (?<! [^\s>]) }xms; my $rx_post_close = qr{ (?! [^\s<]) }xms; my $rx_wiki_mark = qr{ [$mq_wiki_mark] }xms; # only used for development diagnostics die q{bad '\A wm' match} unless '_' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad '\s wm' match} unless ' _' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad '> wm' match} unless '>_' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad 'wm \z' match} unless '_' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; die q{bad 'wm \s' match} unless '_ ' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; die q{bad 'wm <' match} unless '_<' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; # subroutines ###################################################### sub w2h { # not exported; invoke fully-qualified my ($wiki, # string possibly containing wiki markup ) = @_; (my $html = $wiki) =~ s{ $rx_wml # captures to $1 $2 } { my ($html_tag, $body) = ($w2h_map{$1}, w2h($2)); "<$html_tag>$body</$html_tag>"; }xmsge; return $html; } 1;
File W2H_2.pm:
# W2H_2.pm 15feb20waw package W2H_2; use 5.010; # need regex extensions use strict; use warnings; # use Data::Dump qw(pp dd); # for debug use constant DEBUG => 0; use constant { DBPR_rx => 0 && DEBUG, }; my %w2h_map = qw(* b / i _ u); # wiki markup -> HTML tag mapping my $mq_wiki_mark = quotemeta join '', keys %w2h_map; # meta-quoted wi +ki my $rx_pre_open = qr{ (?<! [^>\s$mq_wiki_mark]) }xms; my $rx_post_close = qr{ (?! [^<\s$mq_wiki_mark]) }xms; my $rx_wiki_mark = qr{ [$mq_wiki_mark] }xms; my $rx_wml = qr{ ($rx_pre_open $rx_wiki_mark) # $1: openin +g wikimark ((?: (?: (?: (?! \g-2 $rx_post_close) .)*+) | (?R) )*) # $2: marked + body \g-2 $rx_post_close # closing wi +kimark }xms; DBPR_rx and print $rx_wml, "\n"; # only used for development diagnostics die q{bad '\A wm' match} unless '_' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad '\s wm' match} unless ' _' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad '> wm' match} unless '>_' =~ m{ $rx_pre_open $rx_wiki_mar +k }xms; die q{bad 'wm \z' match} unless '_' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; die q{bad 'wm \s' match} unless '_ ' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; die q{bad 'wm <' match} unless '_<' =~ m{ $rx_wiki_mark $rx_post_clo +se }xms; # subroutines ###################################################### sub w2h { # not exported; invoke fully-qualified my ($wiki, # string possibly containing wiki markup ) = @_; (my $html = $wiki) =~ s{ $rx_wml # captures to $1 $2 } { my ($html_tag, $body) = ($w2h_map{$1}, w2h($2)); "<$html_tag>$body</$html_tag>"; }xmsge; return $html; } 1;
File W2H.t:
# W2H.t 15feb20waw use strict; use warnings; # use Data::Dump qw(pp dd); # for debug use Test::More 'no_plan'; use Test::NoWarnings; BEGIN { use_ok 'W2H_1'; use_ok 'W2H_2'; } use constant DEBUG => 1; use constant { DBPR_m_d1 => 0 && DEBUG, }; my @Tests = ( [ '_abcd_' => '<u>abcd</u>', ], [ '_/abcd/_' => '<u><i>abcd</i></u>', ], [ '*_/abcd/_*' => '<b><u><i>abcd</i></u></b>', ], [ '*_/a*b_c/d/_*' => '<b><u><i>a*b_c/d</i></u></b>', ], [ ' *_/a*b_c/d/_* ' => ' <b><u><i>a*b_c/d</i></u></b> ', ], [ '_abc__def_' => '<u>abc</u><u>def</u>', ], [ '_abc__def_*ghi**jkl*/mno//pqr/' => '<u>abc</u><u>def</u><b>ghi</b><b>jkl</b><i>mno</i><i>pqr</i>', ], [ ' _abc__def_*ghi**jkl*/mno//pqr/ ' => ' <u>abc</u><u>def</u><b>ghi</b><b>jkl</b><i>mno</i><i>pqr</i> ', ], [ '_abc_ _def_ *ghi* *jkl* /mno/ /pqr/' => '<u>abc</u> <u>def</u> <b>ghi</b> <b>jkl</b> <i>mno</i> <i>pqr</i> +', ], [ '_abc_ /def/' => '<u>abc</u> <i>def</i>', ], [ ' _abc_ /def/ ' => ' <u>abc</u> <i>def</i> ', ], [ 'x _abc_ /def/ x' => 'x <u>abc</u> <i>def</i> x', ], [ '_abc /xyz/ cba_ /def/' => '<u>abc <i>xyz</i> cba</u> <i>def</i>', + ], [ '_/one *t/w*o*/ th/r_ee_ null' => '<u><i>one <b>t/w*o</b></i> th/r +_ee</u> null', ], [ '_/one *two*/ th/ree_ null _/f*ur *five*/ six_ null _/se_ven *eig +ht*/ nine_ *fail_', => '<u><i>one <b>two</b></i> th/ree</u> null <u><i>f*ur <b>five</b></ +i> six</u> null <u><i>se_ven <b>eight</b></i> nine</u> *fail_', 'from pm#11112991' ], ); # testing, testing... FUNT: for my $func_name ( 'W2H_1::w2h', 'W2H_2::w2h', ) { note "\n=== testing $func_name() ===\n\n"; *w2h = do { no strict 'refs'; *$func_name; }; VECTOR: for my $ar_vector (@Tests) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } my ($wiki, $expected, $comment) = @$ar_vector; $comment = defined $comment ? "$comment: " : ''; my $got = w2h($wiki); DBPR_m_d1 and diag ":$wiki:"; DBPR_m_d1 and diag ":$got:"; is $got, $expected, "${comment}'$wiki' -> '$expected'"; } # end for VECTOR } # end for FUNT done_testing; exit; # subroutines ###################################################### # none for now


Give a man a fish:  <%-{-{-{-<


In reply to Re: wiki regex reprocessing replacement by AnomalousMonk
in thread wiki regex reprocessing replacement by LanX

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?
    Username:
    Password:

    What's my password?
    Create A New User
    Chatterbox?
    and the web crawler heard nothing...

    How do I use this? | Other CB clients
    Other Users?
    Others avoiding work at the Monastery: (4)
    As of 2020-08-05 17:02 GMT
    Sections?
    Information?
    Find Nodes?
    Leftovers?
      Voting Booth?
      Which rocket would you take to Mars?










      Results (36 votes). Check out past polls.

      Notices?