Hi,
There was one thing left to try and that is using Inline::C. I captured the top 20 for 1e9 and 1e10.
Update 1: Set chunk_size to reduce IPC. Tried staying within CPU cache by keeping @ret small. 1e10 now completes in 4 minutes (20% improvement).
Update 2: Added collatz_count_c2 using GCC compiler intrinsics.
use strict;
use warnings;
use feature 'say';
use MCE::Flow;
use Inline C => Config =>
CCFLAGSEX => '-O2 -fomit-frame-pointer', clean_after_build => 0;
use Inline C => <<'END_OF_C_CODE';
#include <stdlib.h>
#include <stdint.h>
#if defined(_WIN32)
#define strtoull _strtoui64
#endif
void collatz_count_c1( SV* _n )
{
uint64_t n, steps = 1; /* include 1 */
Inline_Stack_Vars;
#ifdef __LP64__
n = SvUV( _n );
#else
n = strtoull( SvPV_nolen( _n ), NULL, 10 );
#endif
/* count using the T(x) notation */
do {
n % 2 ? ( steps += 2, n = (3 * n + 1) >> 1 )
: ( steps += 1, n = n >> 1 );
} while ( n != 1 );
Inline_Stack_Reset;
Inline_Stack_Push( sv_2mortal( newSVuv( steps ) ) );
Inline_Stack_Done;
}
void collatz_count_c2( SV* _n )
{
uint64_t n, steps = 1; /* include 1 */
Inline_Stack_Vars;
#ifdef __LP64__
n = SvUV( _n );
#else
n = strtoull( SvPV_nolen( _n ), NULL, 10 );
#endif
/* based on GCC compiler intrinsics demonstration by Alex Shirley
+*/
/* https://stackoverflow.com/questions/38114205/c-collatz-conjectu
+re-optimization */
/* https://www.go4expert.com/articles/builtin-gcc-functions-builti
+nclz-t29238 */
if ( n % 2 == 0 ) {
steps += __builtin_ctz(n); /* account for all evens */
n >>= __builtin_ctz(n); /* always returns an odd */
}
/* when we enter we're always working on an odd number */
do {
n = 3 * n + 1;
steps += __builtin_ctz(n) + 1; /* account for odd and even */
n >>= __builtin_ctz(n); /* always returns an odd */
} while ( n != 1 );
Inline_Stack_Reset;
Inline_Stack_Push( sv_2mortal( newSVuv( steps ) ) );
Inline_Stack_Done;
}
END_OF_C_CODE
sub collatz_count {
my ($n) = @_;
my $steps = 1; # count 1
# count using the T(x) notation
$n % 2 ? ( $steps += 2, $n = (3 * $n + 1) >> 1 )
: ( $steps += 1, $n = $n >> 1 )
while $n != 1;
return $steps;
}
no warnings 'once';
#*collatz = \&collatz_count; # choose collatz here
#*collatz = \&collatz_count_c1; # using the T(x) notation
*collatz = \&collatz_count_c2; # using compiler intrinsics
my $m = shift || 1e6;
my ( @sizes, $chunk_size );
$chunk_size = int( $m / MCE::Util::get_ncpu() / 40 + 1 );
$chunk_size += 1 if $chunk_size % 2;
MCE::Flow->init(
max_workers => MCE::Util::get_ncpu(),
chunk_size => $chunk_size,
gather => \@sizes,
bounds_only => 1,
);
mce_flow_s sub {
my ( $start, $end ) = @{ $_[1] }; my @ret;
for ( $start .. $end ) {
push @ret, [ $_, collatz($_) ];
@ret = ( sort { $b->[1] <=> $a->[1] } @ret )[ 0..19 ]
if @ret > 100;
}
( @ret > 20 )
? MCE->gather( ( sort { $b->[1] <=> $a->[1] } @ret )[ 0..19 ]
+)
: MCE->gather( @ret );
}, 1, $m;
MCE::Flow->finish;
say "@$_"
for ( sort { $b->[1] <=> $a->[1] } @sizes )[ 0..19 ];
Output
1..1e9
collatz_count 6m10.877s 32 cores Perl
collatz_count_c1 11m20.134s 1 core Inline C
collatz_count_c2 8m56.683s 1 core Inline C
collatz_count_c1 0m23.443s 32 cores 29.0x
collatz_count_c2 0m18.507s 32 cores 29.0x
670617279 987
848749995 977
954843745 972
954843751 972
716132809 969
537099606 966
537099607 966
268549803 965
805649409 964
805649410 964
805649411 964
805649415 964
402824705 963
604237057 961
604237058 961
604237059 961
302118529 960
906355586 959
906355587 959
906355588 959
1..1e10, 32 cores, Inline C
collatz_count_c1 4m00.323s
collatz_count_c2 3m09.774s
9780657630 1133
9780657631 1133
4890328815 1132
7335493223 1130
6189322407 1122
9283983611 1120
8140184737 1094
6105138553 1091
9157707830 1089
9157707831 1089
4578853915 1088
6868280873 1086
5151210655 1083
7726815983 1081
9282648843 1058
6961986633 1055
5221489974 1052
5221489975 1052
2610744987 1051
7832234962 1050
1..1e11, 32 cores, Inline C
collatz_count_c1 40m50.177s
collatz_count_c2 32m17.319s
75128138247 1229
84519155529 1224
85833820801 1224
63389366646 1221
63389366647 1221
64375365601 1221
31694683323 1220
95084049969 1219
95084049970 1219
95084049971 1219
96563048402 1219
96563048403 1219
47542024985 1218
48281524201 1218
71313037476 1216
71313037477 1216
71313037478 1216
71313037479 1216
72422286302 1216
72422286303 1216
Look at C go!
Regards, Mario
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.