As the one suggesting Mo-Do as another datapoint, I feel obliged to add something productive, so here's the (deparsed) oneliner I used to look at the statistics of lyrics:
# BEGIN { $/ = "\n"; $\ = "\n"; }
# use feature 'current_sub', 'evalbytes', 'fc', 'postderef_qq', 'say',
+ 'state', 'switch', 'unicode_strings', 'unicode_eval';
while (<>) {
chomp $_;
foreach $_ (/(\w+)/gu) {
++$total;
++$count{lc $_};
}
END {
say "${_}: $count{$_}" foreach (sort keys %count);
say sprintf('%d distinct words in %d total', scalar keys %coun
+t, $total);
say sprintf('%0.2f', scalar keys(%count) / $total);
}
}