use strict; use warnings; while (my $line = ) { chomp $line; # store tokens other than separators my @aTokens; # state: are we inside or outside of a parenthesized token? my $bParen; my $sInParens=''; while ($line =~ /("[^"]+"|\(|\)|[^)\s]+|\s+)/g) { my $sToken = $1; if ($sToken eq '(') { #starting a parenthesized token $bParen=1; } elsif ($sToken eq ')') { #ending a parenthesized token: add it to the list $bParen=0; push @aTokens, "($sInParens)"; $sInParens=''; } elsif ($bParen) { # in the middle of a parenthesized token $sInParens .= $sToken; } elsif ($sToken =~ /^\S/) { # not a parenthesized token # either a quoted or unquoted non-whitespace token # add it to the list push @aTokens, $sToken; } } local $"='> <'; printf "input : %s\n%s", "<$line>", "tokens: <@aTokens>"; } __DATA__ xxx "()" ("charset" "ISO-8859-1") (")") "xxx"