use v5.12; use warnings; use Test::More; my $escaped = qr/\\./; my $quoted = qr/ (['"]) # --- start-quote (?: # --- inside $escaped # any escape-pair | . # anything else )*? # non-greedy (?: # --- end \g{-1} # same quote | $ # EOL ends missing pair ) /x; my $re = qr/ (?: $escaped # any escape pair | $quoted # any quoted string | \S # any none whitespace )+ # at least once /x; my $str = q{This "is so" very simple.}; my @tests = ( # q{all '- and "-quotes properly balanced}, [ q{This is simple.}, [ q{This}, q{is}, q{simple.} ] ], [ q{ This is simple. }, [ q{This}, q{is}, q{simple.} ] ], [ q{This is "so very simple".}, [ q{This}, q{is}, q{"so very simple".} ] ], [ q{This "is so" very simple.}, [ q{This}, q{"is so"}, q{very}, q{simple.} ] ], [ q{This 'isn\'t nice.'}, [ q{This}, q{'isn\'t nice.'} ] ], [ q{This "isn\"t nice."}, [ q{This}, q{"isn\"t nice."} ] ], [ q{This 'isn\\\\'t nice.'}, [ q{This}, q{'isn\\\\'t}, q{nice.'} ] ], [ q{This "isn\\\\"t nice."}, [ q{This}, q{"isn\\\\"t}, q{nice."} ] ], [ q{This 'is not unnice.'}, [ q{This}, q{'is not unnice.'} ] ], [ q{This "is not unnice."}, [ q{This}, q{"is not unnice."} ] ], [ q{a "bb cc" d}, [ q{a}, q{"bb cc"}, q{d} ] ], # q{UNbalanced '- and "-quotes at absolute end of string}, [ q{This is "so very simple}, [ q{This}, q{is}, q{"so very simple} ] ], [ q{This 'isn\'t nice.}, [ q{This}, q{'isn\'t nice.} ] ], [ q{This "isn\"t nice.}, [ q{This}, q{"isn\"t nice.} ] ], [ q{This 'isn\\\\'t nice.}, [ q{This}, q{'isn\\\\'t}, q{nice.} ] ], [ q{This "isn\\\\"t nice.}, [ q{This}, q{"isn\\\\"t}, q{nice.} ] ], [ q{This 'is not unnice.}, [ q{This}, q{'is not unnice.} ] ], [ q{This "is not unnice.}, [ q{This}, q{"is not unnice.} ] ], # 'what about these questionable cases?', [ q{is this"really so"simple now?}, [ q{is}, q{this"really so"simple}, q{now?} ] ], [ q{is this"really so" now?}, [ q{is}, q{this"really so"}, q{now?} ] ], [ q{is "really so"simple now?}, [ q{is}, q{"really so"simple}, q{now?} ] ], [ q{is this'really so'simple now?}, [ q{is}, q{this'really so'simple}, q{now?} ] ], [ q{is this'really so' now?}, [ q{is}, q{this'really so'}, q{now?} ] ], [ q{is 'really so'simple now?}, [ q{is}, q{'really so'simple}, q{now?} ] ], ); plan tests => 0+@tests; for my $test (@tests) { my ($str, $exp) = @$test; my $got; push @$got, $& while ($str =~ /$re/g); is_deeply($got, $exp, qq{<$str>: } . join('|', @$exp)); } #### -*- mode: compilation; default-directory: "d:/tmp/pm/" -*- Compilation started at Sun Oct 24 14:00:21 C:/Strawberry/perl/bin\perl.exe -w d:/tmp/pm/break_not_quoted.pl 1..24 ok 1 - : This|is|simple. ok 2 - < This is simple. >: This|is|simple. ok 3 - : This|is|"so very simple". ok 4 - : This|"is so"|very|simple. ok 5 - : This|'isn\'t nice.' ok 6 - : This|"isn\"t nice." ok 7 - : This|'isn\\'t|nice.' ok 8 - : This|"isn\\"t|nice." ok 9 - : This|'is not unnice.' ok 10 - : This|"is not unnice." ok 11 - : a|"bb cc"|d ok 12 - : This|is|"so very simple ok 13 - : This|'isn\'t nice. ok 14 - : This|"isn\"t nice. ok 15 - : This|'isn\\'t|nice. ok 16 - : This|"isn\\"t|nice. ok 17 - : This|'is not unnice. ok 18 - : This|"is not unnice. ok 19 - : is|this"really so"simple|now? ok 20 - : is|this"really so"|now? ok 21 - : is|"really so"simple|now? ok 22 - : is|this'really so'simple|now? ok 23 - : is|this'really so'|now? ok 24 - : is|'really so'simple|now? Compilation finished at Sun Oct 24 14:00:21