Greetings,
The prior demonstration involves sorting (qsorti) using one core, taking ~ 5 extra seconds for 1e8. That led me to try parallel sorting inside user_end. Code for both UNIX and Windows are provided.
Update 1: Output not 100% consistent. Not suited for parallelism due to cache miss. The non-PDL solutions here and here handle cache miss. But not yet here.
Update 2: Output now 100% consistent, possible with MCE::relay.
UNIX:
use strict;
use warnings;
use feature 'say';
BEGIN {
# Does not work on Windows unfortunately.
die "Sorry, this script requires a UNIX based OS, exiting...\n"
if $^O eq 'MSWin32';
}
use PDL;
use File::Map; # ensure that Perl has File::Map before loading FastRaw
use PDL::IO::FastRaw;
use MCE::Signal '$tmp_dir';
use MCE::Flow;
use MCE::Candy;
{
no warnings 'once'; $PDL::BIGPDL = 1;
eval q{ PDL::set_autopthread_targ(1) };
}
use List::Util;
BEGIN { *_min = \&List::Util::min; # collision
*_max = \&List::Util::max } # with PDL
use constant MAX => shift || 1e7;
use constant TOP => _min( 20, MAX );
use constant CHUNK => _min( 40000, MAX ); # but keep it even
use constant MAXLEN => MAX * 1; # ?? # x(1..2)
use Time::HiRes 'time';
my $t = time;
# create a raw file for lengths
writefraw( ones( short, 3 + MAXLEN ), "$tmp_dir/lengths" );
# memory map the raw file
my $lengths = mapfraw( "$tmp_dir/lengths" );
$lengths-> inplace-> setvaltobad( 1 );
$lengths-> set( 1, 1 );
$lengths-> set( 2, 2 );
$lengths-> set( 4, 3 );
my @top_seqs;
MCE::Flow->init(
max_workers => MCE::Util::get_ncpu(),
chunk_size => CHUNK + 1,
bounds_only => 1,
init_relay => 1,
gather => MCE::Candy::out_iter_array(\@top_seqs),
user_end => sub {
# wait for any remaining workers to complete processing
MCE->sync;
my $size = MAX / MCE->max_workers + 1;
my $from = ( MCE->wid - 1 ) * $size + 1;
my $to = $from + $size;
$from++ if $from > 1;
$to = MAX if $to > MAX;
my $lengths_c = $lengths-> slice([ $from, $to ]);
$lengths_c-> badflag( 0 );
my $sorted_i = $lengths_c-> qsorti;
my $sorted = $lengths_c-> index( $sorted_i );
my $value = $sorted-> at( $to - $from - TOP );
my $pos = vsearch_insert_leftmost( $value, $sorted );
my $top_i = $sorted_i-> slice([ $to - $from, $pos ]);
( my $result = $lengths_c
-> index( $top_i )
-> longlong
-> bitnot
-> cat( $top_i + $from )
-> transpose
-> qsortvec
-> slice([], [ 0, TOP - 1 ])
)-> slice([ 0 ], [])
-> inplace
-> bitnot;
# From PDL to Perl: [ 0 1 ] becomes [ 1, 0 ],
my $str = $result->string;
$str =~ s/(\d+)\s+(\d+)(.*)/$2,$1$3,/g;
my $ret = eval $str;
MCE->gather( MCE->wid, @$ret );
},
);
mce_flow_s sub {
my ( $mce, $chunk_ref, $chunk_id ) = @_;
my ( $from, $to ) = @{ $chunk_ref };
my $seqs_c = $from + sequence( longlong, $to - $from + 1 );
if ( $chunk_id == 1 ) {
$seqs_c-> setbadat( 0 );
$seqs_c-> setbadat( 1 );
$seqs_c-> badvalue( 2 );
}
else {
$seqs_c-> setbadat( $from % 2 ? 1 : 0 );
$seqs_c-> slice([ $from % 2 ? 1 : 0, $to - $from, 2 ]) .= 2
+;
$seqs_c-> badvalue( 2 );
}
my $lengths_c = $lengths-> slice([ $from, $to ]);
my $current = zeroes( short, nelem( $seqs_c ));
while ( any $seqs_c-> isgood ) {
my ( $seqs_c_odd, $current_odd_masked )
= where( $seqs_c, $current, $seqs_c & 1 );
$current_odd_masked ++;
$current ++;
( $seqs_c_odd *= 3 ) ++;
$seqs_c >>= 1;
my ( $seqs_cap, $lengths_cap, $current_cap )
= where( $seqs_c, $lengths_c, $current,
$seqs_c <= MAXLEN );
my $lut = $lengths-> index( $seqs_cap );
# "_f" is for "finished"
my ( $seqs_f, $lengths_f, $lut_f, $current_f )
= where( $seqs_cap, $lengths_cap, $lut, $current_cap,
$lut-> isgood );
$lengths_f .= $lut_f + $current_f;
$seqs_f .= 2; # i.e. BAD
}
# "_e" is for "at even positions, ahead" ##
##
# my $from_e = _max( $from * 2, $to ) + 2; # bug ##
my $from_e = $from == 0 ? $to + 2 : $from * 2; # fixed ##
my $to_e = _min( $to * 2, MAXLEN ); ##
##
MCE::relay { ##
( $lengths-> slice([ $from_e, $to_e, 2 ]) ##
.= $lengths-> slice([ $from_e / 2, $to_e / 2 ])) ++ ##
if $from_e <= MAXLEN; ##
}; ##
}, 0, MAX;
MCE::Flow->finish;
@top_seqs = ( sort { $b->[1] <=> $a->[1]} @top_seqs )[ 0..19 ];
printf "Collatz(%5d) has sequence length of %3d steps\n", @$_
for @top_seqs;
say {*STDERR} time - $t;
Windows:
The following script works on Windows with up to 8 workers. Specifying higher than 8 workers causes PDL to emit, "PDL::Internal Error: data structure recursion limit exceeded (max 1000 levels)". I also tested on Linux. No problems there including 32 workers.
use strict;
use warnings;
use feature 'say';
use PDL;
use File::Map; # ensure that Perl has File::Map before loading FastRaw
use PDL::IO::FastRaw;
use MCE::Signal '$tmp_dir';
use MCE::Flow;
use MCE::Candy;
{
no warnings 'once'; $PDL::BIGPDL = 1;
eval q{ PDL::set_autopthread_targ(1) };
}
use List::Util;
BEGIN { *_min = \&List::Util::min; # collision
*_max = \&List::Util::max } # with PDL
use constant MAX => shift || 1e7;
use constant TOP => _min( 20, MAX );
use constant CHUNK => _min( 40000, MAX ); # but keep it even
use constant MAXLEN => MAX * 1; # ?? # x(1..2)
use Time::HiRes 'time';
my $t = time;
# create a raw file for lengths
writefraw( ones( short, 3 + MAXLEN ), "$tmp_dir/lengths" );
my $max_workers = $^O eq 'MSWin32' ? 8 : MCE::Util::get_ncpu();
my @top_seqs;
my $lengths;
MCE::Flow->init(
max_workers => _min( $max_workers, MCE::Util::get_ncpu() ),
chunk_size => CHUNK + 1,
bounds_only => 1,
init_relay => 1,
gather => MCE::Candy::out_iter_array(\@top_seqs),
user_begin => sub {
$lengths = mapfraw( "$tmp_dir/lengths" );
if ( MCE->wid == 1 ) {
$lengths-> inplace-> setvaltobad( 1 );
$lengths-> set( 1, 1 );
$lengths-> set( 2, 2 );
$lengths-> set( 4, 3 );
}
MCE->sync;
},
user_end => sub {
# wait for any remaining workers to complete processing
MCE->sync;
my $size = MAX / MCE->max_workers + 1;
my $from = ( MCE->wid - 1 ) * $size + 1;
my $to = $from + $size;
$from++ if $from > 1;
$to = MAX if $to > MAX;
my $lengths_c = $lengths-> slice([ $from, $to ]);
$lengths_c-> badflag( 0 );
my $sorted_i = $lengths_c-> qsorti;
my $sorted = $lengths_c-> index( $sorted_i );
my $value = $sorted-> at( $to - $from - TOP );
my $pos = vsearch_insert_leftmost( $value, $sorted );
my $top_i = $sorted_i-> slice([ $to - $from, $pos ]);
( my $result = $lengths_c
-> index( $top_i )
-> longlong
-> bitnot
-> cat( $top_i + $from )
-> transpose
-> qsortvec
-> slice([], [ 0, TOP - 1 ])
)-> slice([ 0 ], [])
-> inplace
-> bitnot;
# From PDL to Perl: [ 0 1 ] becomes [ 1, 0 ],
my $str = $result->string;
$str =~ s/(\d+)\s+(\d+)(.*)/$2,$1$3,/g;
my $ret = eval $str;
MCE->gather( MCE->wid, @$ret );
},
);
mce_flow_s sub {
my ( $mce, $chunk_ref, $chunk_id ) = @_;
my ( $from, $to ) = @{ $chunk_ref };
my $seqs_c = $from + sequence( longlong, $to - $from + 1 );
if ( $chunk_id == 1 ) {
$seqs_c-> setbadat( 0 );
$seqs_c-> setbadat( 1 );
$seqs_c-> badvalue( 2 );
}
else {
$seqs_c-> setbadat( $from % 2 ? 1 : 0 );
$seqs_c-> slice([ $from % 2 ? 1 : 0, $to - $from, 2 ]) .= 2
+;
$seqs_c-> badvalue( 2 );
}
my $lengths_c = $lengths-> slice([ $from, $to ]);
my $current = zeroes( short, nelem( $seqs_c ));
while ( any $seqs_c-> isgood ) {
my ( $seqs_c_odd, $current_odd_masked )
= where( $seqs_c, $current, $seqs_c & 1 );
$current_odd_masked ++;
$current ++;
( $seqs_c_odd *= 3 ) ++;
$seqs_c >>= 1;
my ( $seqs_cap, $lengths_cap, $current_cap )
= where( $seqs_c, $lengths_c, $current,
$seqs_c <= MAXLEN );
my $lut = $lengths-> index( $seqs_cap );
# "_f" is for "finished"
my ( $seqs_f, $lengths_f, $lut_f, $current_f )
= where( $seqs_cap, $lengths_cap, $lut, $current_cap,
$lut-> isgood );
$lengths_f .= $lut_f + $current_f;
$seqs_f .= 2; # i.e. BAD
}
# "_e" is for "at even positions, ahead" ##
##
# my $from_e = _max( $from * 2, $to ) + 2; # bug ##
my $from_e = $from == 0 ? $to + 2 : $from * 2; # fixed ##
my $to_e = _min( $to * 2, MAXLEN ); ##
##
MCE::relay { ##
( $lengths-> slice([ $from_e, $to_e, 2 ]) ##
.= $lengths-> slice([ $from_e / 2, $to_e / 2 ])) ++ ##
if $from_e <= MAXLEN; ##
}; ##
}, 0, MAX;
MCE::Flow->finish;
@top_seqs = ( sort { $b->[1] <=> $a->[1]} @top_seqs )[ 0..19 ];
printf "Collatz(%5d) has sequence length of %3d steps\n", @$_
for @top_seqs;
say {*STDERR} time - $t;
Results:
time perl script.pl
1e7:
serial 15.311s 1 core
parallel 7.898s 2 cores
parallel 4.229s 4 cores
parallel 2.244s 8 cores
parallel 1.265s 16 cores
parallel 0.815s 32 cores
1e8:
serial 2m38.645s 1 core
parallel 11.779s 32 cores before: serial qsorti
parallel 6.652s 32 cores after : parallel qsorti
parallel 2.656s 32 cores non-PDL solution
parallel 1.644s 32 cores non-PDL solution with Inline::C
https://perlmonks.org/?node_id=11115780
Collatz(63728127) has sequence length of 950 steps
Collatz(95592191) has sequence length of 948 steps
Collatz(96883183) has sequence length of 811 steps
Collatz(86010015) has sequence length of 798 steps
Collatz(98110761) has sequence length of 749 steps
Collatz(73583070) has sequence length of 746 steps
Collatz(73583071) has sequence length of 746 steps
Collatz(36791535) has sequence length of 745 steps
Collatz(55187303) has sequence length of 743 steps
Collatz(56924955) has sequence length of 743 steps
Collatz(82780955) has sequence length of 741 steps
Collatz(85387433) has sequence length of 741 steps
Collatz(63101607) has sequence length of 738 steps
Collatz(64040575) has sequence length of 738 steps
Collatz(93128574) has sequence length of 736 steps
Collatz(93128575) has sequence length of 736 steps
Collatz(94652411) has sequence length of 736 steps
Collatz(96060863) has sequence length of 736 steps
Collatz(46564287) has sequence length of 735 steps
Collatz(69846431) has sequence length of 733 steps
Regards, Mario
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.