Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl Monk, Perl Meditation
 
PerlMonks  

Re: Fastest byteswap (little endian to big endian (eg. 34127856 -> 12345678)

by FreeBeerReekingMonk (Deacon)
on Apr 15, 2015 at 22:14 UTC ( [id://1123571]=note: print w/replies, xml ) Need Help??


in reply to Fastest byteswap (little endian to big endian (eg. 34127856 -> 12345678)

I am sure that dd is faster than XS, but here is an XS snippet (although I am not sure about the signed/unsigned or if I need to actually detect unicode bytes, assuming normal 8 bit bytes):

char * swapstring(str) SV *str INIT: STRLEN len; char* buf = SvPVbyte(str, len); CODE: while(len--){ asm("ror $4,%1" : "+r" (buf[len])); } RETVAL = buf; OUTPUT: RETVAL
the ror idea comes from stackoverflow.
edit: Doh! Nevermind, this produces 56781234 -> 12345678, which is not what was asked.

  • Comment on Re: Fastest byteswap (little endian to big endian (eg. 34127856 -> 12345678)
  • Download Code

Replies are listed 'Best First'.
Re^2: Fastest byteswap (little endian to big endian (eg. 34127856 -> 12345678)
by oiskuu (Hermit) on Apr 16, 2015 at 09:19 UTC

    Ror can do byteswaps all right, but it's not very remarkable at that.

    #! /usr/bin/perl use Inline C => Config => CC => 'gcc', OPTIMIZE => '-O3 -mssse3 -funro +ll-all-loops'; use Inline C => <<'__CUT__', NAME => 'swab'; #include <x86intrin.h> void swab_ror(SV *v) { STRLEN slen; char *s = SvPV(v, slen); uint16_t *w = (uint16_t*) s; size_t n = slen >> 1; for (; n; n--) { asm("rorw $8, %0" : "+r,m" (w[n-1]) : : "cc"); } } void swab_sse(SV *v) { STRLEN slen; char *s = SvPV(v, slen); __m128i x, t; size_t n = slen & ~(size_t)1; for (; (n & 0xe); n -= 2) { uint16_t *w = (uint16_t*) &s[n-2]; *w = __rorw(*w, 8); } t = _mm_set_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1); for (; n; n -= 16) { x = _mm_lddqu_si128((__m128i*)&s[n-16]); x = _mm_shuffle_epi8(x, t); _mm_storeu_si128((__m128i*)&s[n-16], x); } } __CUT__ our $str = pack "C*", map rand(256), 1..34567; use Benchmark 'cmpthese'; cmpthese -5, { swab_ror => q( swab_ror $str ), swab_sse => q( swab_sse $str ), };

      that is a big performance difference! Thanks for showing that.
      Rate swab_ror swab_sse
      swab_ror 155077/s -- -82%
      swab_sse 885905/s 471% --

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://1123571]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others pondering the Monastery: (5)
As of 2024-04-19 07:46 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found