#!/usr/bin/env perl use strict; use warnings; my $book = do {local $/; }; # slurp the book # Split book into words (delimited by \b{wb}), sequences of newlines, # and sequences of anything else. while ($book =~ /( ( \W+ ) | ( \b{wb}.+?\b{wb} ) | ( \n+ ) ) /xg) { show($1); } print "\n"; # show(): make spaces and newlines visible sub show { my $str = shift; $str =~ tr/\n/$/; $str =~ tr/ /_/; print "{$str}\n"; } __DATA__ --First paragraph-- Second one's followed by only one newline. "Hello," she said, "How's tricks?" Third paragraph doesn't end with any punctuation ... and the splitting works 4th one is separated by two newlines. The End. #### {--} {First} {_} {paragraph} {--$} <- The newline ('$') should be separate group {Second} {_} {one's} {_} {followed} {_} {by} {_} {only} {_} {one} {_} {newline} {._"} {Hello} {,"_} {she} {_} {said} {,_"} {How's} {_} {tricks} {?"$$} <- the two newlines should be a separate group {Third} {_} {paragraph} {_} {doesn't} {_} {end} {_} {with} {_} {any} {_} {punctuation} {_..._} {and} {_} {the} {_} {splitting} {_} {works} <- Correctly {$$} <- split {4th} {_} {one} {_} {is} {_} {separated} {_} {by} {_} {two} {_} {newlines} {.$$_________} <- should be three separate groups {The} {_} {End} {.$}