for (q7 = 1; q7 < 128; ++q7) {
if (q7 == 10 || q7 == 13) continue;
m7 = (m6 ^ q7) * H_PRIME;
s7 = _mm_mullo_epi32(_mm_xor_si128(s6, _mm_set1_epi32(q7)), hp);
_mm_prefetch(&bytevecM[(unsigned int)m7 & 0xffffff80], _MM_HINT_T0);
_mm_prefetch(&bytevecM[64+((unsigned int)m7 & 0xffffff80)], _MM_HINT_T0);
UNROLL(1)
UNROLL(2)
UNROLL(3)
UNROLL(4)
UNROLL(5)
UNROLL(6)
UNROLL(7)
UNROLL(8)
UNROLL(9)
UNROLL(11)
UNROLL(12)
for (q8 = 14; q8 < 128; ++q8) {
UNROLL(q8)
}
}
####
for (q7 = 1; q7 < 128; ++q7) {
if (q7 == 10 || q7 == 13) continue;
m7 = (m6 ^ q7) * H_PRIME;
m7arr[q7] = m7;
_mm_prefetch(&bytevecM[(unsigned int)m7 & 0xffffff80], _MM_HINT_T0);
_mm_prefetch(&bytevecM[64+((unsigned int)m7 & 0xffffff80)], _MM_HINT_T0);
}
for (q7 = 1; q7 < 128; ++q7) {
if (q7 == 10 || q7 == 13) continue;
m7 = m7arr[q7];
s7 = _mm_mullo_epi32(_mm_xor_si128(s6, _mm_set1_epi32(q7)), hp);
UNROLL(1)
UNROLL(2)
UNROLL(3)
UNROLL(4)
UNROLL(5)
UNROLL(6)
UNROLL(7)
UNROLL(8)
UNROLL(9)
UNROLL(11)
UNROLL(12)
for (q8 = 14; q8 < 128; ++q8) {
UNROLL(q8)
}
}
##
##
for (q7 = 1; q7 < 128; ++q7) {
if (q7 == 10 || q7 == 13) continue;
m7 = (m6 ^ q7) * H_PRIME;
m7arr[q7] = m7;
_mm_prefetch(&bytevecM[(unsigned int)m7 & 0xffffff80], _MM_HINT_T0);
_mm_prefetch(&bytevecM[64+((unsigned int)m7 & 0xffffff80)], _MM_HINT_T0);
s7 = _mm_mullo_epi32(_mm_xor_si128(s6, _mm_set1_epi32(q7)), hp);
s7arr[q7] = s7;
}
for (q7 = 1; q7 < 128; ++q7) {
if (q7 == 10 || q7 == 13) continue;
m7 = m7arr[q7];
s7 = s7arr[q7];
// ... same UNROLL as before
}