I made a parallel demonstration. The results are taken from a 32-core Linux box. On Windows, run Cygwin's Perl for best performance.
Basically, workers process and gather orderly letters "a" through "z". I was hoping to gather an array of dualvars, but forgotten serialization removes the numeric part.
#!/usr/bin/env perl
# https://www.perlmonks.org/?node_id=11148465
use warnings;
use strict;
use feature qw{ say };
use Scalar::Util qw{ dualvar };
use MCE;
die "Usage: $0 input1 [ input2 ... ]\n" unless @ARGV;
# Ensure given input files are readable.
my @infiles = @ARGV; @ARGV = ();
for (@infiles) {
die "Cannot open '$_'" unless -r "$_";
}
# MCE gather and parallel routines.
our @DATA;
sub gather_routine {
my ($data_ref) = @_;
while (@{ $data_ref }) {
push @DATA, dualvar(
shift @{ $data_ref },
shift @{ $data_ref }
);
}
}
sub parallel_routine {
my ($char, %by_word, @data, @ret) = ($_);
for my $file (@infiles) {
open my $fh, '<', $file;
while (<$fh>) {
if (substr($_,0,1) eq $char) {
chomp;
my ($k, $v) = split /\t/, $_;
$by_word{$k} += $v;
}
}
close $fh;
}
while (my ($k, $v) = each %by_word) {
push @data, dualvar($v, $k);
}
push(@ret, 0+$_, "$_") for sort @data;
MCE::relay { MCE->gather(\@ret) };
}
# Run parallel using MCE.
warn "start\n";
my $tstart1 = time;
MCE->new(
input_data => ['a'..'z'],
max_workers => 7,
chunk_size => 1,
init_relay => 1,
posix_exit => 1,
gather => \&gather_routine,
user_func => \¶llel_routine,
use_threads => 0,
)->run(1);
my $tend1 = time;
warn "get properties + pre-sort: ", $tend1 - $tstart1, " secs\n";
# Output dualvar data, sorted by count.
$| = 0; # enable output buffering
my $tstart2 = time;
say "$_\t".(0+$_) for sort { $b <=> $a } @DATA;
my $tend2 = time;
warn "final sort + output: ", $tend2 - $tstart2, " secs\n";
warn "total: ", $tend2 - $tstart1, " secs\n";