while (<>) { # for each input line $_=lc; # lowercase s/\W+/ /g; # maps all (seqs of) non-word chars to space @wl=split /\s+/; # take the words of this line @w{@wl}=() # put them as keys into a hash (undef values) } $,="\n"; # separate 'print' args with a newline print sort keys %w; # print the sorted keys