use strict; use warnings; use List::MoreUtils qw(natatime); # See [id://861938] sub chunk_array { my ( $n, @vals ) = @_; my $str; my $iter = natatime( $n, @vals ); while ( my @line = $iter->() ) { $str .= join( " ", @line ) . "\n"; } return $str; } sub dump_blocks { my $file = shift; # bytemap file to read my $nblock = shift; # number of blocks to dump local $/ = \128; # Make <> operator read 128 bytes open( my $fh, '<', $file ) or die "error: open '$file': $!"; binmode($fh); for my $n ( 1 .. $nblock ) { my $block = <$fh>; length($block) == 128 or die "oops"; print "block $n:\n"; print chunk_array( 16, map { sprintf "%3d", ord } split //, $block ); } close($fh); } # A 4GB bytemap consists of four 1GB physical files (suffix: "-0" .. "-3"). # (See [id://1086481] for a program that generates a bytemap). my $bytemap_root = "lookzpM.byte"; my $fname = $bytemap_root . "-0"; dump_blocks( $fname, 16 ); #### block 1: 0 118 0 126 0 126 0 102 0 102 0 94 0 94 0 86 0 86 0 94 0 94 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 2: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 75 55 51 55 59 63 35 47 43 39 35 39 27 31 19 15 11 23 19 23 27 31 block 3: 0 111 0 103 0 103 0 127 0 79 0 119 0 119 0 127 0 111 0 103 0 103 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 4: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 114 0 122 0 114 0 106 0 98 0 90 0 66 0 74 0 82 0 90 0 82 44 42 40 34 36 58 56 2 12 0 8 50 52 58 56 50 44 42 40 34 36 block 5: 0 88 0 76 0 72 0 84 0 88 0 108 0 104 0 100 0 120 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 6: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 109 0 97 0 101 0 65 0 77 0 113 0 117 0 113 0 109 0 97 0 101 5 1 1 0 0 17 17 21 21 17 17 45 45 33 33 37 0 65 0 77 0 block 7: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 8: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 68 0 72 0 76 0 88 0 84 0 104 0 108 0 24 26 4 2 8 0 12 0 120 0 116 0 104 0 108 0 120 0 68 0 72 0 76 0 88 0 84 0 block 9: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 10: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 83 0 91 0 67 0 75 0 67 63 59 55 51 55 43 47 51 63 59 7 3 7 11 15 3 31 27 23 19 23 0 111 0 95 0 71 0 71 0 79 0 127 0 119 0 119 0 111 0 127 0 block 11: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 12: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 124 0 112 0 116 0 112 0 124 0 64 0 68 0 64 0 92 0 80 0 84 22 16 30 28 30 0 6 4 6 0 62 60 62 48 54 52 54 48 62 60 62 0 70 0 70 0 94 0 94 0 86 0 0 0 0 0 0 0 0 0 0 0 block 13: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 14: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 85 0 89 0 93 0 73 0 69 0 121 0 125 0 105 0 117 0 121 0 125 1 9 5 5 1 25 29 29 17 105 0 85 0 89 0 93 0 73 0 69 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 15: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 block 16: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 78 0 70 0 70 0 94 44 46 40 22 20 22 24 30 12 14 8 6 4 6 0 126 0 110 0 118 0 118 0 126 0 78 0 70 0 70 0 94 0 110 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #### #define LAST_CHUNK_IDX_4MB 33554431 #define LAST_CHUNK_IDX_8MB (33554432*2 - 1) #define BIT_MAP_SIZE_BYTES ((LAST_CHUNK_IDX_8MB+1) >> 3) typedef unsigned char bytev_t; void make_bitmapA( const char* bitfname, // in: bit map file name const bytev_t* bytevec, // in: byte vector unsigned char* bitvec // out: bitvec put here (assumed already zeroed) ) { unsigned int uu; unsigned int uu2; unsigned int byte; unsigned int bit; const size_t* psz; const bytev_t* psy; int j; unsigned int neven; unsigned int nodd; unsigned int totzero = 0; unsigned int toteven = 0; unsigned int totodd = 0; for (uu = 0; uu <= LAST_CHUNK_IDX_4MB; ++uu) { psy = &bytevec[uu << 7]; psz = (size_t*)psy; // This is the zero case. if (psz[0] == 0 && psz[1] == 0 && psz[2] == 0 && psz[3] == 0 && psz[4] == 0 && psz[5] == 0 && psz[6] == 0 && psz[7] == 0 && psz[8] == 0 && psz[9] == 0 && psz[10] == 0 && psz[11] == 0 && psz[12] == 0 && psz[13] == 0 && psz[14] == 0 && psz[15] == 0) { // zero: both bits are zero, nothing to do ++totzero; continue; } // All non-zero elements in block will be all even or all odd! neven = 0; nodd = 0; for (j = 0; j < 128; ++j) { if (psy[j] != 0) { if (psy[j] % 2 == 0) { ++neven; } else { ++nodd; } } } if (neven > 0 && nodd > 0) { fprintf(stderr, "oops 1\n"); exit(1); } if (neven == 0 && nodd == 0) { fprintf(stderr, "oops 2\n"); exit(1); } uu2 = uu * 2; // nonzero: set first bit byte = uu2 >> 3; bit = uu2 & 7; bitvec[byte] |= (1 << (bit+1)); if (neven > 0) { // even: second bit is zero, nothing to do ++toteven; continue; } // odd: set second bit ++totodd; bitvec[byte] |= (1 << bit); } printf("zero=%u even=%u odd=%u from %u (%u)\n", totzero, toteven, totodd, LAST_CHUNK_IDX_4MB+1, uu); FILE* fh; printf("write %s\n", bitfname); fh = fopen(bitfname, "wb"); if (fh == NULL) { fprintf(stderr, "oops open '%s' failed: errno=%d\n", bitfname, errno); exit(1); } if (fwrite(bitvec, BIT_MAP_SIZE_BYTES, 1, fh) != 1) { fprintf(stderr, "oops write '%s' failed: errno=%d\n", bitfname, errno); exit(1); } fclose(fh); } #### #include #include #define H_PRIME 1000003 #define HASH_LEN 11 #define MOD_TARG 1001 #define M_TARG 1000 #define D_TARG 500 #define C_TARG 100 #define L_TARG 50 #define X_TARG 10 #define GET_TWO_BITS(x, p) (((x) >> (p)) & ((size_t)3)) #define UNROLL(qx) qqm = bytevecM[m7 ^ qx]; \ if (qqm != 0 \ && qqm == bytevecD[d7 ^ qx]) { \ qk[nqk++] = qqm; qk[nqk++] = qx; } #define UNROLL_M7A(qx) \ m7arr[qx] = (unsigned int)( (m6 ^ qx) * H_PRIME ); \ m7arr_r12[qx] = m7arr[qx] >> 12; \ _mm_prefetch((const char*)(&pbvM[m7arr_r12[qx]]), _MM_HINT_T0); #define UNROLL_M7B(qx) \ if ( (mceo = GET_TWO_BITS(pbvM[m7arr_r12[qx]], ((m7arr[qx] >> 6) & 62)) ) != 0) { \ d7 = (unsigned int)( (d6 ^ qx) * H_PRIME ); \ d7_r12 = d7 >> 12; \ _mm_prefetch((const char*)(&pbvD[d7_r12]), _MM_HINT_T0); \ qj[nqj++] = qx; \ qj[nqj++] = d7; \ qj[nqj++] = d7_r12; \ qj[nqj++] = mceo; } // ------------------------------------------------------------------- int inner( const unsigned char* bitvecM, const unsigned char* bitvecD, const unsigned char* bitvecC, const unsigned char* bitvecL, const unsigned char* bitvecX, const unsigned char* bitvecV, const unsigned char* bitvecI, const unsigned char* bytevecM, const unsigned char* bytevecD, int startval, int endval, int m2, int d2, int c2, int l2, int x2, int v2, int i2, __m128i* ps ) { __m128i s2 = _mm_set_epi32(i2, v2, x2, l2); __m128i hp = _mm_set1_epi32(H_PRIME); __m128i s3, s4, s5, s6; int m3, m4, m5, m6; int d3, d4, d5, d6; int c3, c4, c5, c6; unsigned int m7, d7, d7_r12, c7, c7_r12, l7, l7_r12, x7, x7_r12, v7, v7_r12, i7, i7_r12; int q3, q4, q5, q6, q7; int c8, l8, x8; int c9, l9, x9; unsigned int qqm; unsigned int q8; int jj; int iret = 0; int cnt; int qz; int nqj; int nqk; unsigned int qj[128*8]; unsigned int qk[128*8]; unsigned int m7arr[128]; unsigned int m7arr_r12[128]; unsigned int mceo, dceo, cceo, lceo, xceo, vceo, iceo; const size_t* pbvM = (size_t*)bitvecM; const size_t* pbvD = (size_t*)bitvecD; const size_t* pbvC = (size_t*)bitvecC; const size_t* pbvL = (size_t*)bitvecL; const size_t* pbvX = (size_t*)bitvecX; const size_t* pbvV = (size_t*)bitvecV; const size_t* pbvI = (size_t*)bitvecI; for (q3 = startval; q3 < endval; ++q3) { if (q3 == 10 || q3 == 13) continue; m3 = (m2 ^ q3) * H_PRIME; d3 = (d2 ^ q3) * H_PRIME; c3 = (c2 ^ q3) * H_PRIME; s3 = _mm_mullo_epi32(_mm_xor_si128(s2, _mm_set1_epi32(q3)), hp); for (q4 = 1; q4 < 128; ++q4) { if (q4 == 10 || q4 == 13) continue; m4 = (m3 ^ q4) * H_PRIME; d4 = (d3 ^ q4) * H_PRIME; c4 = (c3 ^ q4) * H_PRIME; s4 = _mm_mullo_epi32(_mm_xor_si128(s3, _mm_set1_epi32(q4)), hp); for (q5 = 1; q5 < 128; ++q5) { if (q5 == 10 || q5 == 13) continue; m5 = (m4 ^ q5) * H_PRIME; d5 = (d4 ^ q5) * H_PRIME; c5 = (c4 ^ q5) * H_PRIME; s5 = _mm_mullo_epi32(_mm_xor_si128(s4, _mm_set1_epi32(q5)), hp); for (q6 = 1; q6 < 128; ++q6) { if (q6 == 10 || q6 == 13) continue; m6 = (m5 ^ q6) * H_PRIME; d6 = (d5 ^ q6) * H_PRIME; c6 = (c5 ^ q6) * H_PRIME; s6 = _mm_mullo_epi32(_mm_xor_si128(s5, _mm_set1_epi32(q6)), hp); UNROLL_M7A(1) UNROLL_M7A(2) UNROLL_M7A(3) UNROLL_M7A(4) UNROLL_M7A(5) UNROLL_M7A(6) UNROLL_M7A(7) UNROLL_M7A(8) UNROLL_M7A(9) UNROLL_M7A(11) UNROLL_M7A(12) for (q7 = 14; q7 < 128; ++q7) { UNROLL_M7A(q7) } nqj = 0; UNROLL_M7B(1) UNROLL_M7B(2) UNROLL_M7B(3) UNROLL_M7B(4) UNROLL_M7B(5) UNROLL_M7B(6) UNROLL_M7B(7) UNROLL_M7B(8) UNROLL_M7B(9) UNROLL_M7B(11) UNROLL_M7B(12) for (q7 = 14; q7 < 128; ++q7) { // UNROLL_M7B(q7) mceo = GET_TWO_BITS(pbvM[m7arr_r12[q7]], ((m7arr[q7] >> 6) & 62)); if (mceo == 0) continue; d7 = (unsigned int)( (d6 ^ q7) * H_PRIME ); d7_r12 = d7 >> 12; _mm_prefetch((const char*)(&pbvD[d7_r12]), _MM_HINT_T0); qj[nqj++] = q7; qj[nqj++] = d7; qj[nqj++] = d7_r12; qj[nqj++] = mceo; } nqk = 0; cnt = 0; while (cnt < nqj) { q7 = qj[cnt++]; d7 = qj[cnt++]; d7_r12 = qj[cnt++]; mceo = qj[cnt++]; dceo = GET_TWO_BITS(pbvD[d7_r12], ((d7 >> 6) & 62)); if (dceo != mceo) continue; c7 = (unsigned int)( (c6 ^ q7) * H_PRIME ); c7_r12 = c7 >> 12; _mm_prefetch((const char*)(&pbvC[c7_r12]), _MM_HINT_T0); qk[nqk++] = q7; qk[nqk++] = d7; qk[nqk++] = c7; qk[nqk++] = c7_r12; qk[nqk++] = mceo; } nqj = 0; cnt = 0; while (cnt < nqk) { q7 = qk[cnt++]; d7 = qk[cnt++]; c7 = qk[cnt++]; c7_r12 = qk[cnt++]; mceo = qk[cnt++]; cceo = GET_TWO_BITS(pbvC[c7_r12], ((c7 >> 6) & 62)); if (cceo != mceo) continue; l7 = (unsigned int)( (s6.m128i_i32[0] ^ q7) * H_PRIME ); l7_r12 = l7 >> 12; _mm_prefetch((const char*)(&pbvL[l7_r12]), _MM_HINT_T0); qj[nqj++] = q7; qj[nqj++] = d7; qj[nqj++] = c7; qj[nqj++] = l7; qj[nqj++] = l7_r12; qj[nqj++] = mceo; } nqk = 0; cnt = 0; while (cnt < nqj) { q7 = qj[cnt++]; d7 = qj[cnt++]; c7 = qj[cnt++]; l7 = qj[cnt++]; l7_r12 = qj[cnt++]; mceo = qj[cnt++]; lceo = GET_TWO_BITS(pbvL[l7_r12], ((l7 >> 6) & 62)); if (lceo != mceo) continue; x7 = (unsigned int)( (s6.m128i_i32[1] ^ q7) * H_PRIME ); x7_r12 = x7 >> 12; _mm_prefetch((const char*)(&pbvX[x7_r12]), _MM_HINT_T0); qk[nqk++] = q7; qk[nqk++] = d7; qk[nqk++] = c7; qk[nqk++] = l7; qk[nqk++] = x7; qk[nqk++] = x7_r12; qk[nqk++] = mceo; } nqj = 0; cnt = 0; while (cnt < nqk) { q7 = qk[cnt++]; d7 = qk[cnt++]; c7 = qk[cnt++]; l7 = qk[cnt++]; x7 = qk[cnt++]; x7_r12 = qk[cnt++]; mceo = qk[cnt++]; xceo = GET_TWO_BITS(pbvX[x7_r12], ((x7 >> 6) & 62)); if (xceo != mceo) continue; v7 = (unsigned int)( (s6.m128i_i32[2] ^ q7) * H_PRIME ); v7_r12 = v7 >> 12; _mm_prefetch((const char*)(&pbvV[v7_r12]), _MM_HINT_T0); qj[nqj++] = q7; qj[nqj++] = d7; qj[nqj++] = c7; qj[nqj++] = l7; qj[nqj++] = x7; qj[nqj++] = v7; qj[nqj++] = v7_r12; qj[nqj++] = mceo; } nqk = 0; cnt = 0; while (cnt < nqj) { q7 = qj[cnt++]; d7 = qj[cnt++]; c7 = qj[cnt++]; l7 = qj[cnt++]; x7 = qj[cnt++]; v7 = qj[cnt++]; v7_r12 = qj[cnt++]; mceo = qj[cnt++]; vceo = GET_TWO_BITS(pbvV[v7_r12], ((v7 >> 6) & 62)); if (vceo != mceo) continue; i7 = (unsigned int)( (s6.m128i_i32[3] ^ q7) * H_PRIME ); i7_r12 = i7 >> 12; _mm_prefetch((const char*)(&pbvI[i7_r12]), _MM_HINT_T0); qk[nqk++] = q7; qk[nqk++] = d7; qk[nqk++] = c7; qk[nqk++] = l7; qk[nqk++] = x7; qk[nqk++] = i7; qk[nqk++] = i7_r12; qk[nqk++] = mceo; } nqj = 0; cnt = 0; while (cnt < nqk) { q7 = qk[cnt++]; d7 = qk[cnt++]; c7 = qk[cnt++]; l7 = qk[cnt++]; x7 = qk[cnt++]; i7 = qk[cnt++]; i7_r12 = qk[cnt++]; mceo = qk[cnt++]; iceo = GET_TWO_BITS(pbvI[i7_r12], ((i7 >> 6) & 62)); if (iceo != mceo) continue; _mm_prefetch(&bytevecM[m7arr[q7] & 0xffffff80], _MM_HINT_T0); _mm_prefetch(&bytevecM[64+(m7arr[q7] & 0xffffff80)], _MM_HINT_T0); _mm_prefetch(&bytevecD[d7 & 0xffffff80], _MM_HINT_T0); _mm_prefetch(&bytevecD[64+(d7 & 0xffffff80)], _MM_HINT_T0); qj[nqj++] = q7; qj[nqj++] = d7; qj[nqj++] = c7; qj[nqj++] = l7; qj[nqj++] = x7; } if (nqj == 0) continue; qz = 0; while (qz < nqj) { q7 = qj[qz++]; d7 = qj[qz++]; c7 = qj[qz++]; l7 = qj[qz++]; x7 = qj[qz++]; m7 = m7arr[q7]; nqk = 0; UNROLL(1) UNROLL(2) UNROLL(3) UNROLL(4) UNROLL(5) UNROLL(6) UNROLL(7) UNROLL(8) UNROLL(9) UNROLL(11) UNROLL(12) UNROLL(14) UNROLL(15) UNROLL(16) UNROLL(17) UNROLL(18) UNROLL(19) UNROLL(20) UNROLL(21) UNROLL(22) UNROLL(23) UNROLL(24) UNROLL(25) UNROLL(26) UNROLL(27) UNROLL(28) UNROLL(29) UNROLL(30) UNROLL(31) UNROLL(32) UNROLL(33) UNROLL(34) UNROLL(35) UNROLL(36) UNROLL(37) UNROLL(38) UNROLL(39) UNROLL(40) UNROLL(41) UNROLL(42) UNROLL(43) UNROLL(44) UNROLL(45) UNROLL(46) UNROLL(47) UNROLL(48) UNROLL(49) UNROLL(50) UNROLL(51) UNROLL(52) UNROLL(53) UNROLL(54) UNROLL(55) UNROLL(56) UNROLL(57) UNROLL(58) UNROLL(59) UNROLL(60) UNROLL(61) UNROLL(62) UNROLL(63) UNROLL(64) UNROLL(65) UNROLL(66) UNROLL(67) UNROLL(68) UNROLL(69) UNROLL(70) UNROLL(71) UNROLL(72) UNROLL(73) UNROLL(74) UNROLL(75) UNROLL(76) UNROLL(77) UNROLL(78) UNROLL(79) UNROLL(80) UNROLL(81) UNROLL(82) UNROLL(83) UNROLL(84) UNROLL(85) UNROLL(86) UNROLL(87) UNROLL(88) UNROLL(89) UNROLL(90) UNROLL(91) UNROLL(92) UNROLL(93) UNROLL(94) UNROLL(95) UNROLL(96) UNROLL(97) UNROLL(98) UNROLL(99) UNROLL(100) UNROLL(101) UNROLL(102) UNROLL(103) UNROLL(104) UNROLL(105) UNROLL(106) UNROLL(107) UNROLL(108) UNROLL(109) UNROLL(110) UNROLL(111) UNROLL(112) UNROLL(113) UNROLL(114) UNROLL(115) UNROLL(116) UNROLL(117) UNROLL(118) UNROLL(119) UNROLL(120) UNROLL(121) UNROLL(122) UNROLL(123) UNROLL(124) UNROLL(125) UNROLL(126) UNROLL(127) if (nqk == 0) continue; cnt = 0; while (cnt < nqk) { qqm = qk[cnt++]; q8 = qk[cnt++]; // Calculate instead of lookup to reduce memory footprint. c8 = ((int)c7 ^ q8) * H_PRIME; jj = (c8 ^ qqm) ^ HASH_LEN; c9 = jj % MOD_TARG; if (c9 < 0) c9 += MOD_TARG; if (c9 == C_TARG) { l8 = ((int)l7 ^ q8) * H_PRIME; jj = (l8 ^ qqm) ^ HASH_LEN; l9 = jj % MOD_TARG; if (l9 < 0) l9 += MOD_TARG; if (l9 == L_TARG) { x8 = ((int)x7 ^ q8) * H_PRIME; jj = (x8 ^ qqm) ^ HASH_LEN; x9 = jj % MOD_TARG; if (x9 < 0) x9 += MOD_TARG; if (x9 == X_TARG) { ps[iret++] = _mm_set_epi16(0, qqm, q8, q7, q6, q5, q4, q3); } } } } } } } } } return iret; } #### 1 23 105 120 47 93 48 104 16 39: 1000 500 100 50 10 5 734 1 61 61 73 46 107 4 103 114 54: 1000 500 100 50 10 916 1 1 68 125 58 6 81 39 76 57 34: 1000 500 100 50 10 956 1 1 77 91 35 121 50 78 4 40 117: 1000 500 100 50 10 5 95 2 24 70 80 56 33 49 14 84 38: 1000 500 100 50 10 5 720 2 55 7 58 19 95 66 106 123 76: 1000 500 100 50 10 5 223 2 78 45 126 107 78 4 125 50 39: 1000 500 100 50 10 996 1 2 119 112 12 43 73 103 42 63 82: 1000 500 100 50 10 5 197 3 3 110 96 21 116 76 79 62 126: 1000 500 100 50 10 5 886 3 48 91 63 25 24 4 1 60 21: 1000 500 100 50 10 37 1 3 48 91 63 25 24 4 1 61 21: 1000 500 100 50 10 37 1 3 82 4 111 53 42 62 67 50 32: 1000 500 100 50 10 910 1 3 87 44 79 85 80 45 6 107 93: 1000 500 100 50 10 5 448 3 120 99 112 40 69 102 8 114 14: 1000 500 100 50 10 530 1 3 124 119 88 127 67 22 113 67 123: 1000 500 100 50 10 954 1 4 20 27 28 11 64 88 81 120 126: 1000 500 100 50 10 5 191 4 43 14 62 25 58 27 36 68 79: 1000 500 100 50 10 5 29 4 89 72 27 70 115 122 7 50 29: 1000 500 100 50 10 5 289 4 89 72 27 70 115 122 7 51 29: 1000 500 100 50 10 5 273 5 8 124 117 9 72 5 111 96 110: 1000 500 100 50 10 5 878 5 8 124 117 9 72 5 111 112 110: 1000 500 100 50 10 5 622 5 42 77 27 93 51 126 85 59 97: 1000 500 100 50 10 5 936 5 52 94 25 97 87 9 76 114 20: 1000 500 100 50 10 147 1 5 61 120 123 86 20 62 35 42 119: 1000 500 100 50 10 5 315 6 14 32 112 84 121 98 26 116 19: 1000 500 100 50 10 277 1 6 25 53 101 4 114 61 55 56 59: 1000 500 100 50 10 5 247 6 40 29 66 1 11 41 110 84 15: 1000 500 100 50 10 5 183 6 59 34 115 24 123 71 86 85 80: 1000 500 100 50 10 5 922 6 90 30 49 53 77 9 43 49 109: 1000 500 100 50 10 864 1 6 90 40 43 27 11 2 111 83 31: 1000 500 100 50 10 5 335 6 127 122 67 20 12 106 17 47 90: 1000 500 100 50 10 5 89 7 82 45 71 6 60 20 45 122 42: 1000 500 100 50 10 147 1 7 91 114 61 33 121 14 33 127 8: 1000 500 100 50 10 5 960 8 24 74 68 29 46 54 28 45 122: 1000 500 100 50 10 157 1 8 81 99 126 73 33 84 39 23 58: 1000 500 100 50 10 3 1 8 109 37 7 102 98 19 25 27 60: 1000 500 100 50 10 5 143 9 8 60 1 94 6 51 38 78 100: 1000 500 100 50 10 810 1 9 21 3 59 26 7 72 92 114 1: 1000 500 100 50 10 5 107 9 97 39 6 74 69 102 33 28 20: 1000 500 100 50 10 5 307 9 99 60 53 116 57 54 109 36 63: 1000 500 100 50 10 177 1 9 100 85 27 24 79 114 88 2 83: 1000 500 100 50 10 722 1 11 31 6 74 111 80 49 21 5 106: 1000 500 100 50 10 750 1 11 112 65 42 28 21 91 105 107 97: 1000 500 100 50 10 235 1 11 119 73 69 89 121 102 103 82 5: 1000 500 100 50 10 908 1 12 19 63 11 15 120 67 58 102 65: 1000 500 100 50 10 5 167 12 30 27 7 52 91 33 77 80 59: 1000 500 100 50 10 5 574 12 34 117 60 98 3 116 40 62 59: 1000 500 100 50 10 476 1 12 54 14 99 51 123 48 42 84 1: 1000 500 100 50 10 313 1 12 93 75 47 78 84 12 60 83 32: 1000 500 100 50 10 954 1 14 36 71 80 101 39 123 49 33 85: 1000 500 100 50 10 57 1 14 37 20 37 26 91 96 8 51 14: 1000 500 100 50 10 5 448 14 76 18 127 16 48 79 1 27 24: 1000 500 100 50 10 185 1 14 79 111 28 29 81 7 68 121 49: 1000 500 100 50 10 5 301 14 87 50 108 7 117 30 72 61 1: 1000 500 100 50 10 5 942 14 97 109 101 47 78 124 106 98 61: 1000 500 100 50 10 519 1 14 104 76 107 79 22 79 75 112 56: 1000 500 100 50 10 750 1 14 108 43 91 86 71 39 63 93 56: 1000 500 100 50 10 900 1 14 113 51 97 31 112 94 80 67 123: 1000 500 100 50 10 926 1 15 19 103 89 2 124 124 48 50 89: 1000 500 100 50 10 483 1 16 17 30 115 63 98 20 51 55 14: 1000 500 100 50 10 5 13 16 122 8 28 24 106 121 34 69 33: 1000 500 100 50 10 5 608 17 11 119 60 47 44 78 103 125 48: 1000 500 100 50 10 5 1 17 32 37 28 15 44 79 63 34 102: 1000 500 100 50 10 652 1 17 38 34 46 117 66 91 43 93 76: 1000 500 100 50 10 5 115 17 39 32 53 7 127 114 8 77 22: 1000 500 100 50 10 115 1 17 57 37 112 36 73 78 53 39 119: 1000 500 100 50 10 5 63 17 57 107 60 113 117 72 64 56 17: 1000 500 100 50 10 988 1 17 107 81 48 51 8 106 7 62 69: 1000 500 100 50 10 105 1 17 123 66 76 11 24 57 58 7 65: 1000 500 100 50 10 5 974 #### 1: 4 2: 4 3: 7 4: 4 5: 5 6: 7 7: 2 8: 3 9: 5 11: 3 12: 5 14: 9 15: 1 16: 2 17: 8 Min: 1 Max: 9 Total: 69 number of 5 (V) hits found: 35 number of 1 (I) hits found: 35 Ave: 4.60 #### 1: 64 3: 58 5: 73 7: 63 9: 46 11: 72 13: 58 15: 72 17: 61 19: 70 21: 63 23: 64 25: 62 27: 60 29: 66 31: 49 33: 64 35: 67 37: 42 39: 74 41: 86 43: 60 45: 52 47: 64 49: 49 51: 67 53: 68 55: 59 57: 60 59: 56 61: 72 63: 68 65: 56 67: 66 69: 59 71: 73 73: 59 75: 61 77: 64 79: 64 81: 52 83: 72 85: 45 87: 52 89: 68 91: 65 93: 55 95: 51 97: 72 99: 66 nhit = 50, nodd=50, neven=0, sum = 3109, min = 42, max = 86, ave=62.18 50 numbers hit 101: 50 103: 70 105: 62 107: 51 109: 49 111: 54 113: 55 115: 59 117: 72 119: 67 121: 58 123: 61 125: 64 127: 55 129: 42 131: 65 133: 59 135: 64 137: 43 139: 54 141: 46 143: 42 145: 62 147: 49 149: 42 151: 53 153: 49 155: 48 157: 47 159: 34 161: 45 163: 45 165: 44 167: 54 169: 51 171: 53 173: 60 175: 51 177: 45 179: 50 181: 40 183: 51 185: 51 187: 45 189: 48 191: 65 193: 51 195: 36 197: 51 199: 43 nhit = 50, nodd=50, neven=0, sum = 2605, min = 34, max = 72, ave=52.1 800: 61 *EVEN* 802: 48 *EVEN* 804: 48 *EVEN* 806: 41 *EVEN* 808: 45 *EVEN* 810: 55 *EVEN* 812: 37 *EVEN* 814: 51 *EVEN* 816: 57 *EVEN* 818: 52 *EVEN* 820: 48 *EVEN* 822: 40 *EVEN* 824: 56 *EVEN* 826: 57 *EVEN* 828: 46 *EVEN* 830: 48 *EVEN* 832: 38 *EVEN* 834: 59 *EVEN* 836: 43 *EVEN* 838: 61 *EVEN* 840: 61 *EVEN* 842: 59 *EVEN* 844: 55 *EVEN* 846: 58 *EVEN* 848: 58 *EVEN* 850: 60 *EVEN* 852: 50 *EVEN* 854: 51 *EVEN* 856: 54 *EVEN* 858: 60 *EVEN* 860: 54 *EVEN* 862: 53 *EVEN* 864: 60 *EVEN* 866: 51 *EVEN* 868: 58 *EVEN* 870: 56 *EVEN* 872: 58 *EVEN* 874: 49 *EVEN* 876: 64 *EVEN* 878: 64 *EVEN* 880: 61 *EVEN* 882: 62 *EVEN* 884: 63 *EVEN* 886: 52 *EVEN* 888: 67 *EVEN* 890: 61 *EVEN* 892: 55 *EVEN* 894: 56 *EVEN* 896: 70 *EVEN* 898: 57 *EVEN* nhit = 50, nodd=0, neven=50, sum = 2728, min = 37, max = 70, ave=54.56 50 numbers hit 900: 55 *EVEN* 902: 49 *EVEN* 904: 47 *EVEN* 906: 76 *EVEN* 908: 43 *EVEN* 910: 65 *EVEN* 912: 57 *EVEN* 914: 52 *EVEN* 916: 74 *EVEN* 918: 79 *EVEN* 920: 60 *EVEN* 922: 69 *EVEN* 924: 57 *EVEN* 926: 68 *EVEN* 928: 57 *EVEN* 930: 80 *EVEN* 932: 54 *EVEN* 934: 84 *EVEN* 936: 67 *EVEN* 938: 64 *EVEN* 940: 59 *EVEN* 942: 73 *EVEN* 944: 64 *EVEN* 946: 72 *EVEN* 948: 68 *EVEN* 950: 64 *EVEN* 952: 58 *EVEN* 954: 65 *EVEN* 956: 62 *EVEN* 958: 62 *EVEN* 960: 66 *EVEN* 962: 59 *EVEN* 964: 60 *EVEN* 966: 64 *EVEN* 968: 71 *EVEN* 970: 59 *EVEN* 972: 68 *EVEN* 974: 73 *EVEN* 976: 56 *EVEN* 978: 64 *EVEN* 980: 56 *EVEN* 982: 57 *EVEN* 984: 62 *EVEN* 986: 61 *EVEN* 988: 80 *EVEN* 990: 62 *EVEN* 992: 51 *EVEN* 994: 63 *EVEN* 996: 74 *EVEN* 998: 64 *EVEN* 1000: 57 *EVEN* nhit = 51, nodd=0, neven=51, sum = 3231, min = 43, max = 84, ave=63.3529411764706 #### magic = chr(17)+chr(11)+chr(119)+chr(60)+chr(47)+chr(44)+chr(78)+chr(103)+chr(125)+chr(48) for r in ["M", "D", "C", "L", "X", "V", "I"]: n = hash(r + magic) % 1001 print r, n #### M 1000 D 500 C 100 L 50 X 10 V 5 I 1