Hi vr and all,
This is a parallel version for UNIX and Windows. The lengths piddle is shared using PDL::IO::FastRaw.
Update 1: Construct seqs_c from inside workers to consume lesser memory consumption.
Update 2: Applied vr's bug fix. Plus added 2nd example that runs parallel on Windows.
Update 3: Output not 100% consistent (1e7 diff). Not suited for parallelism due to cache miss. The non-PDL solutions here and here handle cache miss. But not yet here.
9,12c9,12
< [ 616 7532665] # correct
< [ 613 5649499]
< [ 611 8474249]
< [ 608 6355687]
---
> [ 615 7532665] # wrong
> [ 612 5649499]
> [ 610 8474249]
> [ 607 6355687]
14c14
< [ 606 9533531] # correct
---
> [ 605 9533531] # wrong
UNIX:
use strict;
use warnings;
use feature 'say';
BEGIN {
# Does not work on Windows unfortunately.
die "Sorry, this script requires a UNIX based OS, exiting...\n"
if $^O eq 'MSWin32';
}
use PDL;
use File::Map; # ensure that Perl has File::Map before loading FastRaw
use PDL::IO::FastRaw;
use MCE::Signal '$tmp_dir';
use MCE::Flow;
{ no warnings 'once'; $PDL::BIGPDL = 1; }
use List::Util;
BEGIN { *_min = \&List::Util::min; # collision
*_max = \&List::Util::max } # with PDL
use constant MAX => shift || 1e7;
use constant TOP => _min( 20, MAX );
use constant CHUNK => _min( 40000, MAX ); # but keep it even
use constant MAXLEN => MAX * 1; # ?? # x(1..2)
use Time::HiRes 'time';
# create a raw file for lengths
writefraw( ones( short, 3 + MAXLEN ), "$tmp_dir/lengths" );
# memory map the raw file
my $lengths = mapfraw( "$tmp_dir/lengths" );
$lengths-> inplace-> setvaltobad( 1 );
$lengths-> set( 1, 1 );
$lengths-> set( 2, 2 );
$lengths-> set( 4, 3 );
my $t = time;
mce_flow_s {
max_workers => MCE::Util::get_ncpu(),
chunk_size => CHUNK + 1,
bounds_only => 1,
},
sub {
my ( $mce, $chunk_ref, $chunk_id ) = @_;
my ( $from, $to ) = @{ $chunk_ref };
my $seqs_c = $from + sequence( longlong, $to - $from + 1 );
if ( $chunk_id == 1 ) {
$seqs_c-> setbadat( 0 );
$seqs_c-> setbadat( 1 );
$seqs_c-> badvalue( 2 );
}
else {
$seqs_c-> setbadat( $from % 2 ? 1 : 0 );
$seqs_c-> slice([ $from % 2 ? 1 : 0, $to - $from, 2 ]) .= 2
+;
$seqs_c-> badvalue( 2 );
}
my $lengths_c = $lengths-> slice([ $from, $to ]);
my $current = zeroes( short, nelem( $seqs_c ));
while ( any $seqs_c-> isgood ) {
my ( $seqs_c_odd, $current_odd_masked )
= where( $seqs_c, $current, $seqs_c & 1 );
$current_odd_masked ++;
$current ++;
( $seqs_c_odd *= 3 ) ++;
$seqs_c >>= 1;
my ( $seqs_cap, $lengths_cap, $current_cap )
= where( $seqs_c, $lengths_c, $current,
$seqs_c <= MAXLEN );
my $lut = $lengths-> index( $seqs_cap );
# "_f" is for "finished"
my ( $seqs_f, $lengths_f, $lut_f, $current_f )
= where( $seqs_cap, $lengths_cap, $lut, $current_cap,
$lut-> isgood );
$lengths_f .= $lut_f + $current_f;
$seqs_f .= 2; # i.e. BAD
}
# "_e" is for "at even positions, ahead" ##
##
# my $from_e = _max( $from * 2, $to ) + 2; # bug ##
my $from_e = $from == 0 ? $to + 2 : $from * 2; # fixed ##
my $to_e = _min( $to * 2, MAXLEN ); ##
##
( $lengths-> slice([ $from_e, $to_e, 2 ]) ##
.= $lengths-> slice([ $from_e / 2, $to_e / 2 ])) ++ ##
if $from_e <= MAXLEN; ##
}, 0, MAX;
MCE::Flow->finish;
say {*STDERR} "compute time: ", time - $t;
# same finale
$lengths-> badflag( 0 );
$lengths = $lengths-> slice([ 1, MAX ]);
my $sorted_i = $lengths-> qsorti;
my $sorted = $lengths-> index( $sorted_i );
my $value = $sorted-> at( MAX - TOP );
my $pos = vsearch_insert_leftmost( $value, $sorted );
my $top_i = $sorted_i-> slice([ MAX - 1, $pos ]);
( my $result = $lengths
-> index( $top_i )
-> longlong
-> bitnot
-> cat( $top_i + 1 )
-> transpose
-> qsortvec
-> slice([], [ 0, TOP - 1 ])
)-> slice([ 0 ], [])
-> inplace
-> bitnot;
say $result;
say {*STDERR} "total time: ", time - $t;
Windows:
The following script works on Windows with up to 8 workers. Specifying higher than 8 workers causes PDL to emit, "PDL::Internal Error: data structure recursion limit exceeded (max 1000 levels)". I also tested on Linux. No problems there including 32 workers.
use strict;
use warnings;
use feature 'say';
use PDL;
use File::Map; # ensure that Perl has File::Map before loading FastRaw
use PDL::IO::FastRaw;
use MCE::Signal '$tmp_dir';
use MCE::Flow;
{ no warnings 'once'; $PDL::BIGPDL = 1; }
use List::Util;
BEGIN { *_min = \&List::Util::min; # collision
*_max = \&List::Util::max } # with PDL
use constant MAX => shift || 1e7;
use constant TOP => _min( 20, MAX );
use constant CHUNK => _min( 40000, MAX ); # but keep it even
use constant MAXLEN => MAX * 1; # ?? # x(1..2)
use Time::HiRes 'time';
# create a raw file for lengths
writefraw( ones( short, 3 + MAXLEN ), "$tmp_dir/lengths" );
my $t = time;
my $lengths;
MCE::Flow->init(
max_workers => _min( 8, MCE::Util::get_ncpu() ),
chunk_size => CHUNK + 1,
bounds_only => 1,
init_relay => 1,
user_begin => sub {
$lengths = mapfraw( "$tmp_dir/lengths" );
if ( MCE->wid == 1 ) {
$lengths-> inplace-> setvaltobad( 1 );
$lengths-> set( 1, 1 );
$lengths-> set( 2, 2 );
$lengths-> set( 4, 3 );
}
MCE->sync;
},
);
mce_flow_s sub {
my ( $mce, $chunk_ref, $chunk_id ) = @_;
my ( $from, $to ) = @{ $chunk_ref };
my $seqs_c = $from + sequence( longlong, $to - $from + 1 );
if ( $chunk_id == 1 ) {
$seqs_c-> setbadat( 0 );
$seqs_c-> setbadat( 1 );
$seqs_c-> badvalue( 2 );
}
else {
$seqs_c-> setbadat( $from % 2 ? 1 : 0 );
$seqs_c-> slice([ $from % 2 ? 1 : 0, $to - $from, 2 ]) .= 2
+;
$seqs_c-> badvalue( 2 );
}
my $lengths_c = $lengths-> slice([ $from, $to ]);
my $current = zeroes( short, nelem( $seqs_c ));
while ( any $seqs_c-> isgood ) {
my ( $seqs_c_odd, $current_odd_masked )
= where( $seqs_c, $current, $seqs_c & 1 );
$current_odd_masked ++;
$current ++;
( $seqs_c_odd *= 3 ) ++;
$seqs_c >>= 1;
my ( $seqs_cap, $lengths_cap, $current_cap )
= where( $seqs_c, $lengths_c, $current,
$seqs_c <= MAXLEN );
my $lut = $lengths-> index( $seqs_cap );
# "_f" is for "finished"
my ( $seqs_f, $lengths_f, $lut_f, $current_f )
= where( $seqs_cap, $lengths_cap, $lut, $current_cap,
$lut-> isgood );
$lengths_f .= $lut_f + $current_f;
$seqs_f .= 2; # i.e. BAD
}
# "_e" is for "at even positions, ahead" ##
##
# my $from_e = _max( $from * 2, $to ) + 2; # bug ##
my $from_e = $from == 0 ? $to + 2 : $from * 2; # fixed ##
my $to_e = _min( $to * 2, MAXLEN ); ##
##
MCE::relay { ##
( $lengths-> slice([ $from_e, $to_e, 2 ]) ##
.= $lengths-> slice([ $from_e / 2, $to_e / 2 ])) ++ ##
if $from_e <= MAXLEN; ##
};
}, 0, MAX;
MCE::Flow->finish;
say {*STDERR} "compute time: ", time - $t;
# same finale
$lengths = mapfraw( "$tmp_dir/lengths" );
$lengths-> badflag( 0 );
$lengths = $lengths-> slice([ 1, MAX ]);
my $sorted_i = $lengths-> qsorti;
my $sorted = $lengths-> index( $sorted_i );
my $value = $sorted-> at( MAX - TOP );
my $pos = vsearch_insert_leftmost( $value, $sorted );
my $top_i = $sorted_i-> slice([ MAX - 1, $pos ]);
( my $result = $lengths
-> index( $top_i )
-> longlong
-> bitnot
-> cat( $top_i + 1 )
-> transpose
-> qsortvec
-> slice([], [ 0, TOP - 1 ])
)-> slice([ 0 ], [])
-> inplace
-> bitnot;
say $result;
say {*STDERR} "total time: ", time - $t;
Results:
Compute time excludes initial piddle creation and final sorting/output as these run serially. Total time is captured by running time perl script.pl.
1e7:
serial total 15.272s, compute 14.674s 1 core
parallel total 8.327s, compute 7.478s 2 cores
parallel total 4.726s, compute 3.912s 4 cores
parallel total 2.855s, compute 2.025s 8 cores
parallel total 1.911s, compute 1.085s 16 cores
parallel total 1.484s, compute 0.835s 32 cores
[
[ 686 8400511]
[ 668 8865705]
[ 665 6649279]
[ 663 9973919]
[ 621 6674175]
[ 616 7332399]
[ 616 7532665]
[ 613 5649499]
[ 611 8474249]
[ 608 6355687]
[ 606 8847225]
[ 606 9533531]
[ 603 6635419]
[ 601 9953129]
[ 598 7464846]
[ 598 7464847]
[ 597 3732423]
[ 595 5598635]
[ 593 8397953]
[ 590 6298465]
]
1e8:
parallel total 11.779s, compute 5.631s 32 cores
Regards, Mario
Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
Read Where should I post X? if you're not absolutely sure you're posting in the right place.
Please read these before you post! —
Posts may use any of the Perl Monks Approved HTML tags:
- a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
|
For: |
|
Use: |
| & | | & |
| < | | < |
| > | | > |
| [ | | [ |
| ] | | ] |
Link using PerlMonks shortcuts! What shortcuts can I use for linking?
See Writeup Formatting Tips and other pages linked from there for more info.