I finally got around to benchmarking. (Removing the nulls, left by the bitwise&, in-place using tr is the saving grace!):
#! perl -slw
use strict;
use Time::HiRes qw[ time ];
my @benches = (
sub {
printf 'unpack: ';
my $mask = shift;
my $templ;
while( $mask =~ /((.)\2*)/g ) {
$templ .= (qw(x a))[$2] . length $1;
}
return sub {
my $fh = shift;
my $count = 0;
my $out;
$out = join'', unpack( $templ, $_ ), ++$count while <$fh>;
$count;
}
},
sub {
printf 'substr: ';
my $mask = shift;
my $templ;
my @mask;
while ( $mask =~ /0+/g ) {
push @mask, [ $-[0], ( $+[0] - $-[0] ) ];
}
return sub {
my $fh = shift;
my $count = 0;
my $out;
while( defined( $out = <$fh> ) ) {
substr( $out, $mask[-$_][0], $mask[-$_][1],'' ) for 1
+.. @mask;
++$count;
}
$count;
}
},
sub {
printf 'substrref: ';
my $mask = shift;
my $templ;
my $buf = chr(0); $buf x= 400_000;
my @refs; push @refs, \substr( $buf, $-[0], $+[0] - $-[0] ) wh
+ile $mask =~ /0+/g;
return sub {
my $fh = shift;
my $count = 0;
my $out;
while( <$fh> ) {
substr( $buf, 0 ) = $_;
$out = join'', map $$_, @refs;
++$count;
}
$count;
}
},
sub {
printf "bitops: ";
my $mask = shift;
$mask =~ tr[01][\x00\xff];
return sub {
my $fh = shift;
my $count = 0;
$_ &= $mask, tr[\x00][]d, ++$count while <$fh>;
$count;
}
},
);
$|++;
our $OPT //= 0;
our $FLUSHFILE //= '10gb.csv';
our $TESTFILE //= '1023727.dat';
our $S //= 1;
srand $S;
my $mask = join '', map int( rand 2 ), 1 .. 400_000;
open I, '<', $FLUSHFILE or die $!;
1 while <I>;
close I;
my $start = time;
my $run = $benches[ $OPT ]->( $mask );
open I, '<', $TESTFILE or die $!;
my $records = $run->( \*I );
close I;
my $stop = time;
printf "Took %f seconds for %u records (%f recs/second)\n",
$stop - $start, $records, $records / ($stop - $start);
__END__
C:\test>for /l %n in (0,1,3) do @1023727 -OPT=%n
unpack: Took 164.702357 seconds for 2606 records (15.822482 recs/secon
+d)
substr: Took 2971.481218 seconds for 2606 records (0.877004 recs/secon
+d)
substrref: Took 154.501948 seconds for 2606 records (16.867101 recs/se
+cond)
bitops: Took 12.534998 seconds for 2606 records (207.897916 recs/secon
+d)
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
div class=