BrowserUk has asked for the wisdom of the Perl Monks concerning the following question:
Given a (long) string, ineterleave a fixed byte value after every existing byte.
Simple task with many solutions, but is there one that's quickest?
$buf = chr(1) x 1e6;;
$s = time;
$out = join( chr(0), unpack '(A1)*', $buf ) .chr(0);
print time() - $s;;
11.3910000324249
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
Re: Interleaving bytes in a string quickly
by jmcnamara (Monsignor) on Feb 26, 2010 at 14:20 UTC
|
For the specific example of interleaving a null byte this should be ~50 times faster than the original solution:
$out = pack 'v*', unpack 'C*', $buf;
--
John.
| [reply] [d/l] |
|
| [reply] |
|
Then maybe something like this. It should still be ~25 faster than the original for the general case:
sub interleave {
my $str = shift;
my $filler = 64; # Char number.
my $hi_bytes = pack 'v*', unpack 'C*', $str;
my $lo_bytes = pack 'n*', ($filler) x length $str;
return $hi_bytes | $lo_bytes;
}
--
John.
| [reply] [d/l] |
|
Re: Interleaving bytes in a string quickly
by shmem (Chancellor) on Feb 26, 2010 at 12:00 UTC
|
For a string with only one byte value, flipping bits
$out = $buf x 2;
vec($out,($_<<4)+8,1) = 0 for 0..length($buf)-1;
seems to be fastest. | [reply] [d/l] |
|
| [reply] [d/l] |
|
Still vec can be used... differences are minimal:
update: included jmcnamara's code, which is really fast.
#!/usr/bin/perl
use Benchmark qw(cmpthese);
my $buf = chr(1) x 2e6;
cmpthese(-10,
{
unpack => sub { # BrowserUk
join( chr(0), unpack '(A1)*', $buf ) .chr(0)
},
substr => sub { # salva
$out = chr(0) x (length($buf) * 2);
substr($out, $_ * 2, 1, substr($buf, $_, 1)) for 0..length
+($buf);
$out;
},
vec => sub { # shmem
$out = chr(0) x (length($buf) * 2);
vec($out,$_<<1,8) = vec($buf,$_,8)
for 0..length($buf)-1;
$out;
},
vec2 => sub { # jmcnamara
my $out = pack 'v*', unpack 'C*', $buf;
},
}
);
__END__
s/iter vec unpack substr vec2
vec 1.44 -- -1% -3% -81%
unpack 1.42 1% -- -2% -81%
substr 1.40 3% 2% -- -80%
vec2 0.275 423% 417% 408% --
But your unpack routine doesn't seem to work correctly.
update2: added jmcnamara's code to the stuff inside readmore tags | [reply] [d/l] [select] |
|
Re: Interleaving bytes in a string quickly
by Fletch (Bishop) on Feb 26, 2010 at 14:14 UTC
|
Inline::C?
Update: Bleh this won't work with nulls because it's returning a vanilla C char*. You'd need to take and return an SV* instead, but you get the idea.
$ perl interleave.plx
0.0027470588684082
0.410820007324219
#!/usr/bin/env perl
use strict;
use warnings;
use Inline 'C';
use Time::HiRes qw( time );
my $buf = chr(1) x 1e6;;
my $s = time;
my $out = interleave( $buf, chr(0) );
print time() - $s, "\n";
$buf = chr(1) x 1e6;;
$s = time;
$out = join( chr(0), unpack '(A1)*', $buf ) .chr(0);
print time() - $s, "\n";
__END__
__C__
char *interleave(char *in, char other) {
char *outbuf = malloc( sizeof(char) * strlen( in ) * 2 + 1);
char *outp = outbuf;
char *p = in;
while( *p ) {
*outp++ = *p++;
*outp++ = other;
}
*outp = '\0';
return outbuf;
}
The cake is a lie.
The cake is a lie.
The cake is a lie.
| [reply] [d/l] |
|
#!/usr/bin/env perl
use strict;
use warnings;
use Inline C => <<'__EOI__';
/* sv_in and sv_pad may not be NULL */
/* That won't happen when called from Perl */
SV* interleave_bytes(SV* sv_in, SV* sv_pad) {
STRLEN l_in; char* p_in = SvPVbyte(sv_in, l_in);
STRLEN l_pad; char* p_pad = SvPVbyte(sv_pad, l_pad);
char pad;
SV* sv_out;
char* p_out;
if (l_pad != 1)
croak("usage");
pad = *p_pad;
sv_out = newSVpvn("", 0);
p_out = SvGROW(sv_out, l_in*2+1); /* XXX Could overflow */
SvCUR_set(sv_out, l_in*2);
while (l_in--) {
*(p_out++) = *(p_in++);
*(p_out++) = pad;
}
*p_out = '\0';
return sv_out;
}
__EOI__
use Time::HiRes qw( time );
my $buf = chr(1) x 1e6;;
my $s = time;
my $out = interleave_bytes( $buf, chr(0) );
print time() - $s, "\n";
#use Devel::Peek;
#Dump $out;
$buf = chr(1) x 1e6;;
$s = time;
$out = join( chr(0), unpack '(A1)*', $buf ) .chr(0);
print time() - $s, "\n";
0.00459790229797363
0.446739912033081
Update: Fixed bugs mentioned elsewhere in thread.
| [reply] [d/l] [select] |
Re: Interleaving bytes in a string quickly
by BrowserUk (Patriarch) on Feb 26, 2010 at 16:11 UTC
|
I declare lanX the winner (with jmacnamara a close second) for a pure perl solution.
fletch's Inline C for absolute speed.
#! perl -slw
use 5.010;
use strict;
use Inline C => Config => BUILD_NOISY => 1;
use Inline C => <<'END_C', NAME => '_825494', CLEAN_AFTER_BUILD => 0;
SV *interleave( SV *in, char other ) {
size_t len = SvCUR( in ), i;
SV *out = newSV( sizeof(char) * len * 2 );
char *outp = SvPVX( out );
char *p = SvPVX( in );
for( i = 0; i < len; ++i ) {
*outp++ = *p++;
*outp++ = other;
}
*outp = '\0';
SvPOK_on( out );
SvCUR_set( out, len * 2 );
return out;
}
END_C
use Devel::Peek;
use Benchmark qw[ cmpthese ];
use constant DEBUG => 0; ## Set to 1 to see first 20 bytes of results;
our $in = chr(1) x 1e6;
our $mask = ( chr( 0 ) . chr( 64 ) ) x 1e6;
cmpthese DEBUG ? 1 : -3, {
buk => q[
my $out = join( chr(64), unpack '(A1)*', $in ) . chr(64);
print 'buk ', unpack 'C20', $out if DEBUG;
],
shmem => q[
my $out = chr(64) x ( length( $in ) * 2 );
vec( $out, $_<<1, 8 ) = vec( $in, $_, 8 ) for 0..length( $in )
+ -1;
print 'shmem ', unpack 'C20', $out if DEBUG;
],
salva => q[
my $out = chr(64) x ( length( $in ) * 2 );
substr( $out, $_ * 2, 1, substr( $in, $_, 1 ) ) for 0..length(
+ $in )-1;
print 'salva ', unpack 'C20', $out if DEBUG;
],
lanX => q[
my $out = pack 'S*', unpack 'C*', $in;
$out |= $mask;
print 'lanX ', unpack 'C20', $out if DEBUG;;
],
jmac => q[
my $hi_bytes = pack 'v*', unpack 'C*', $in;
my $lo_bytes = pack 'n*', ( 64 ) x length $in;
my $out = $hi_bytes | $lo_bytes;
print 'jmac ', unpack 'C20', $out if DEBUG;;
],
fletch => q[
my $out = interleave( $in, chr( 64 ) );
print 'fletch', unpack 'C20', $out if DEBUG;;
],
};
__END__
C:\test>825494
buk 164164164164164164164164164164
(warning: too few iterations for a reliable count)
fletch164164164164164164164164164164
(warning: too few iterations for a reliable count)
jmac 164164164164164164164164164164
(warning: too few iterations for a reliable count)
lanX 164164164164164164164164164164
(warning: too few iterations for a reliable count)
salva 164164164164164164164164164164
(warning: too few iterations for a reliable count)
shmem 164164164164164164164164164164
(warning: too few iterations for a reliable count)
C:\test>825494
Rate buk shmem salva jmac lanX fletch
buk 2.10/s -- -26% -37% -76% -81% -99%
shmem 2.84/s 36% -- -14% -68% -74% -99%
salva 3.32/s 58% 17% -- -62% -69% -99%
jmac 8.84/s 321% 211% 166% -- -18% -97%
lanX 10.8/s 415% 279% 225% 22% -- -97%
fletch 334/s 15849% 11662% 9968% 3685% 3000% --
-
-
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
| [reply] [d/l] |
|
| [reply] [d/l] [select] |
|
| [reply] |
|
|
|
|
Were you looking for a pure perl or an inline solution and which one are you going to use?
| [reply] |
|
The problem arose from Re^3: encoding hdmi video byte. How to efficiently interleave the constant bytes into the tr/// encoded 100MB buffer loads. So I was looking for a pure Perl solution.
If it was for me, I'd probably do the whole thing in C.
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
| [reply] |
|
|
Re: Interleaving bytes in a string quickly
by salva (Canon) on Feb 26, 2010 at 12:05 UTC
|
This is slightly faster on my computer:
$s = time;
$out1 = chr(0) x 2e6;
substr($out1, $_ * 2, 1, substr($buf, $_, 1)) for 0..1e6;
print time() - $s, "\n";
| [reply] [d/l] |
Re: Interleaving bytes in a string quickly
by LanX (Saint) on Feb 26, 2010 at 13:58 UTC
|
Hi
It's a repeated task for many buf-strings of fixed length to convert?
If yes, you may wanna try to pack(unpack $string) such that each byte is transformed to the highbyte of a 16-bit word and the lowbyte is 0.
Then just "or" with constant string with the "fixed byte" at lowbyte positions (that are 1,3,5..., the odd indexes).
Should be faster since you avoid looping bytewise on the perl level... but I have no time to try it out now.
Update: corrected | [reply] [d/l] |
|
roughly 5-7 times faster!
$mult=1e7;
$buf = chr(1) x $mult;
$s = time;
$out = join( chr(0), unpack '(A1)*', $buf ) .chr(0);
print "TIME0:",time() - $s,"\n";
$del=pack('(B16)*','00000000'.'11111111')x $mult;
$buf="ABCDEFGHIJ"x($mult/10);
$s = time();
#$buf2=pack('(B16)*',unpack('(B8)*',$buf));
$buf2=pack 'v*', unpack 'C*', $buf; # jmcnamaras pack-unpac
+k-ing is much faster !!!
$out = $del|$buf2;
print "TIME1:",time() - $s,"\n";
print join ",",unpack '(B8)20',$out; # check result
OUT:
TIME0:14
TIME1:2
01000001,11111111,01000010,11111111,01000011,11111111,01000100,1111111
+1,01000101,11111111,01000110,11111111,01000111,11111111,01001000,1111
+1111,01001001,11111111,01001010,11111111
UPDATE: even if it's a one-time task you can profit from this approach by chunking the string into eqally sized pieces which are "or"ed against the precalculated $del string.
UPDATE: even if you include calculating $del it's about 3 times faster! | [reply] [d/l] [select] |
Re: Interleaving bytes in a string quickly
by ikegami (Patriarch) on Feb 26, 2010 at 17:52 UTC
|
s/.\K/\x01/sg; # 5.010+
s/(?<=.)/\x01/sg; # 5.006+
perform?
| [reply] [d/l] [select] |
|
ike510 => q[
( my $out = $in ) =~ s/.\K/\x40/sg;
print 'ike510', unpack 'C20', $out if DEBUG;;
],
ike5_6 => q[
( my $out = $in ) =~ s/(.)/$1\x40/sg;
print 'ike5.6', unpack 'C20', $out if DEBUG;;
],
But the answer to the quesion is 'not well':
Rate ike5_6 ike510 buk shmem salva jmac lanX fletch
ike5_6 0.882/s -- -30% -56% -68% -73% -90% -91% -100%
ike510 1.26/s 43% -- -37% -54% -61% -85% -88% -100%
buk 1.99/s 126% 58% -- -28% -39% -76% -81% -99%
shmem 2.77/s 214% 120% 39% -- -15% -67% -73% -99%
salva 3.25/s 269% 158% 63% 17% -- -61% -69% -99%
jmac 8.42/s 855% 567% 322% 204% 159% -- -19% -97%
lanX 10.4/s 1076% 721% 420% 274% 219% 23% -- -97%
fletch 309/s 34974% 24395% 15418% 11055% 9407% 3575% 2883% --
I suspect that constantly reallocating the results string every 8 insertions does for them.
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
| [reply] [d/l] [select] |
|
$ perl -le'$_="abc"; s/(.)/$1\x40/sg; print'
a@b@c@
$ perl -le'$_="abc"; s/(?<=.)/\x40/sg; print'
a@b@c@
$ perl -le'$_="abc"; s/(?!^)/\x40/sg; print'
a@b@c@
$ perl -e'
use Benchmark qw( cmpthese );
our $i = chr(1) x 1e6;
cmpthese(-3, {
cap => sub { (my $o=our $i) =~ s/(.)/$1\x40/sg; },
peek1 => sub { (my $o=our $i) =~ s/(?<=.)/\x40/sg; },
peek2 => sub { (my $o=our $i) =~ s/(?!^)/\x40/sg; },
});
'
Rate cap peek1 peek2
cap 1.39/s -- -25% -27%
peek1 1.85/s 33% -- -3%
peek2 1.91/s 37% 4% --
| [reply] [d/l] |
|
And the "Deepest Thread Award" goes to ...
by LanX (Saint) on Mar 12, 2010 at 13:16 UTC
|
| [reply] |
|
| [reply] |
|
|