Building on kcott's approach (and his test cases and their underlying assumptions), here's a regex-based solution. I've added a few test cases of my own, but their validity is questionable because I don't fully understand perl-diddler's requirements. No attempt has been made to compare performance.
Win8 Strawberry 5.8.9.5 (32) Sun 10/24/2021 3:14:25
C:\@Work\Perl\monks
>perl
use strict;
use warnings;
use Test::More;
use Test::NoWarnings;
sub pp { local $" = '| |'; "|@{$_[0]}|"; } # for output pretty-print
+ing
my @tests = (
q{all '- and "-quotes properly balanced},
[ q{This is simple.}, [ q{This}, q{is}, q{simple.}
+ ] ],
[ q{ This is simple. }, [ q{This}, q{is}, q{simple.}
+ ] ],
[ q{This is "so very simple".}, [ q{This}, q{is}, q{"so very simple"
+.} ] ],
[ q{This "is so" very simple.}, [ q{This}, q{"is so"}, q{very}, q{si
+mple.} ] ],
[ q{This 'isn\'t nice.'}, [ q{This}, q{'isn\'t nice.'}
+ ] ],
[ q{This "isn\"t nice."}, [ q{This}, q{"isn\"t nice."}
+ ] ],
[ q{This 'isn\\\\'t nice.'}, [ q{This}, q{'isn\\\\'t}, q{nice.'}
+ ] ],
[ q{This "isn\\\\"t nice."}, [ q{This}, q{"isn\\\\"t}, q{nice."}
+ ] ],
[ q{This 'is not unnice.'}, [ q{This}, q{'is not unnice.'}
+ ] ],
[ q{This "is not unnice."}, [ q{This}, q{"is not unnice."}
+ ] ],
[ q{a "bb cc" d}, [ q{a}, q{"bb cc"}, q{d}
+ ] ],
q{UNbalanced '- and "-quotes at absolute end of string},
[ q{This is "so very simple}, [ q{This}, q{is}, q{"so very simple} ]
+ ],
[ q{This 'isn\'t nice.}, [ q{This}, q{'isn\'t nice.} ]
+ ],
[ q{This "isn\"t nice.}, [ q{This}, q{"isn\"t nice.} ]
+ ],
[ q{This 'isn\\\\'t nice.}, [ q{This}, q{'isn\\\\'t}, q{nice.} ]
+ ],
[ q{This "isn\\\\"t nice.}, [ q{This}, q{"isn\\\\"t}, q{nice.} ]
+ ],
[ q{This 'is not unnice.}, [ q{This}, q{'is not unnice.} ]
+ ],
[ q{This "is not unnice.}, [ q{This}, q{"is not unnice.} ]
+ ],
'what about these questionable cases?',
[ q{is this"really so"simple now?}, [ q{is}, q{this"really so"simple
+}, q{now?} ] ],
[ q{is this"really so" now?}, [ q{is}, q{this"really so"},
+ q{now?} ] ],
[ q{is "really so"simple now?}, [ q{is}, q{"really so"simple},
+ q{now?} ] ],
[ q{is this'really so'simple now?}, [ q{is}, q{this'really so'simple
+}, q{now?} ] ],
[ q{is this'really so' now?}, [ q{is}, q{this'really so'},
+ q{now?} ] ],
[ q{is 'really so'simple now?}, [ q{is}, q{'really so'simple},
+ q{now?} ] ],
);
my @additional = qw(Test::NoWarnings); # each of these adds 1 test
plan 'tests' => (scalar grep { ref eq 'ARRAY' } @tests)
+ @additional
;
# an escape \ escapes ANY character.
my $rx_dq = qr{ " [^\\"]* (?: \\. [^\\"]*)* (?: " | \z) }xms;
my $rx_sq = qr{ ' [^\\']* (?: \\. [^\\']*)* (?: ' | \z) }xms;
my $rx_q = qr{ $rx_dq | $rx_sq }xms;
# match quoted or non-space substrings. alt order critical!
# my $rx_extract = qr{ $rx_q \S* | \S+ }xms; # for non-questionable c
+ases
my $rx_extract = qr{ [^'"\s]* $rx_q [^'"\s]* | \S+ }xms;
VECTOR:
for my $ar_vector (@tests) {
if (not ref $ar_vector) {
note $ar_vector;
next VECTOR;
}
my ($string, $ar_expected) = @$ar_vector;
my @got = $string =~ m{ $rx_extract }xmsg;
is_deeply \@got, $ar_expected, "|$string| -> " . pp $ar_expected;
} # end for VECTOR
^Z
1..25
# all '- and "-quotes properly balanced
ok 1 - |This is simple.| -> |This| |is| |simple.|
ok 2 - | This is simple. | -> |This| |is| |simple.|
ok 3 - |This is "so very simple".| -> |This| |is| |"so very simple".|
ok 4 - |This "is so" very simple.| -> |This| |"is so"| |very| |simple.
+|
ok 5 - |This 'isn\'t nice.'| -> |This| |'isn\'t nice.'|
ok 6 - |This "isn\"t nice."| -> |This| |"isn\"t nice."|
ok 7 - |This 'isn\\'t nice.'| -> |This| |'isn\\'t| |nice.'|
ok 8 - |This "isn\\"t nice."| -> |This| |"isn\\"t| |nice."|
ok 9 - |This 'is not unnice.'| -> |This| |'is not unnice.'|
ok 10 - |This "is not unnice."| -> |This| |"is not unnice."|
ok 11 - |a "bb cc" d| -> |a| |"bb cc"| |d|
# UNbalanced '- and "-quotes at absolute end of string
ok 12 - |This is "so very simple| -> |This| |is| |"so very simple|
ok 13 - |This 'isn\'t nice.| -> |This| |'isn\'t nice.|
ok 14 - |This "isn\"t nice.| -> |This| |"isn\"t nice.|
ok 15 - |This 'isn\\'t nice.| -> |This| |'isn\\'t| |nice.|
ok 16 - |This "isn\\"t nice.| -> |This| |"isn\\"t| |nice.|
ok 17 - |This 'is not unnice.| -> |This| |'is not unnice.|
ok 18 - |This "is not unnice.| -> |This| |"is not unnice.|
# what about these questionable cases?
ok 19 - |is this"really so"simple now?| -> |is| |this"really so"simple
+| |now?|
ok 20 - |is this"really so" now?| -> |is| |this"really so"| |now
+?|
ok 21 - |is "really so"simple now?| -> |is| |"really so"simple| |n
+ow?|
ok 22 - |is this'really so'simple now?| -> |is| |this'really so'simple
+| |now?|
ok 23 - |is this'really so' now?| -> |is| |this'really so'| |now
+?|
ok 24 - |is 'really so'simple now?| -> |is| |'really so'simple| |n
+ow?|
ok 25 - no warnings
Give a man a fish: <%-{-{-{-<