#!/usr/bin/env perl use strict; use warnings; use Test::More; my @tests = ( [q{This is simple.}, [q{This}, q{is}, q{simple.}]], [q{ This is simple. }, [q{This}, q{is}, q{simple.}]], [q{This is "so very simple".}, [q{This}, q{is}, q{"so very simple".}]], [q{This "is so" very simple.}, [q{This}, q{"is so"}, q{very}, q{simple.}]], [q{This 'isn\'t nice.'}, [q{This}, q{'isn\'t nice.'}]], [q{This "isn\"t nice."}, [q{This}, q{"isn\"t nice."}]], [q{This 'isn\\'t nice.'}, [q{This}, q{'isn\\'t nice.'}]], [q{This "isn\\"t nice."}, [q{This}, q{"isn\\"t nice."}]], [q{This 'isn\\\\'t nice.'}, [q{This}, q{'isn\\\\'t}, q{nice.'}]], [q{This "isn\\\\"t nice."}, [q{This}, q{"isn\\\\"t}, q{nice."}]], [q{This 'is not unnice.'}, [q{This}, q{'is not unnice.'}]], [q{This "is not unnice."}, [q{This}, q{"is not unnice."}]], [q{a "bb cc" d}, [q{a}, q{"bb cc"}, q{d}]], ); plan tests => 0+@tests; for my $test (@tests) { my ($raw_str, $exp) = @$test; my $str = ($raw_str =~ /^\s*(.*?)\s*$/)[0]; my $got = []; my $str_len = length $str; my ($unbroken, $in_quote, $escape, $in_space) = ('', '', 0, 0); my $quote_re = qr{(['"])}; for my $str_index (0 .. $str_len - 1) { my $char = substr $str, $str_index, 1; if ($escape) { $unbroken .= $char; $escape = 0; next; } if ($char eq qq{\\}) { $escape = 1; $unbroken .= $char; next; } if ($char =~ $quote_re) { my $quote = $char; if ($in_quote) { $in_quote = '' if $in_quote eq $quote; } else { $in_quote = $quote; } $unbroken .= $char; next; } if ($char eq ' ') { next if $in_space; if ($in_quote) { $unbroken .= $char; } else { $in_space = 1; } } else { $unbroken .= $char; $in_space = 0; next; } if ($in_space) { push @$got, $unbroken; $unbroken = ''; } } push @$got, $unbroken; is_deeply($got, $exp, qq{<$raw_str>: } . join('|', @$exp)); } #### $ ./pm_11137926_str_parse.pl 1..13 ok 1 - : This|is|simple. ok 2 - < This is simple. >: This|is|simple. ok 3 - : This|is|"so very simple". ok 4 - : This|"is so"|very|simple. ok 5 - : This|'isn\'t nice.' ok 6 - : This|"isn\"t nice." ok 7 - : This|'isn\'t nice.' ok 8 - : This|"isn\"t nice." ok 9 - : This|'isn\\'t|nice.' ok 10 - : This|"isn\\"t|nice." ok 11 - : This|'is not unnice.' ok 12 - : This|"is not unnice." ok 13 - : a|"bb cc"|d