I tried to shift the parities left instead of shifting the operands right: unsurprisingly, the differences are negligible.
With
Unsigned bit = 1;
for ( ; bit <= rhs && bit != 0 ; bit <<= 1) {
long both = lhs_p&rhs_p;
res ^= both;
lhs_p = (lhs_p ^ both ^ lhs) << 1;
rhs_p = ((rhs_p ^ both ^ rhs) & bit) << 1;
}
if (0 == rhs_p)
return res;
lhs_p &= bit;
if (0 != lhs_p)
return res ^= lhs_p;
bit = lhs ^ (lhs & (lhs-bit));
return res ^ (bit << 1);
With determination, the bit-hack variants of population count and parity can be coded without undue regard to type sizes:
/** bit masks */
const uintmax_t
ALL_ONES = ~(uintmax_t)0,
O_ONES = ALL_ONES/0xff, //0x...10101
FIVES = ALL_ONES/3, //0x...33333
THREES = ALL_ONES/5, //0x...55555
OFOFS = ALL_ONES/17; //0x...F0F0F
/** return number of bits set to one in <code>bits</code> */
// fast multiplication variant
uintmax_tint popCount(longuintmax_t bits) {
bits -= (bits >> 1) & FIVES;
bits = (bits & THREES) + ((bits >> 2) & THREES);
// of course, O7O7s would do - but 255 doesn't divide by 7
bits = bits + (bits >> 4) & OFOFS;
// needs one more "fold operation" and "ones spaced 16"
// for 256 <= #bits < 65536
return (bits * O_ONES) >> (sizeof bits * 8);
}
/** return (odd) parity of <code>bits</code> */
uintmax_tint parity(longuintmax_t bits) {
long shifted =uintmax_t bits;shifted;
for (int shift = 4 ; 0 < ((shifted >>>== bits) >>= shift) ; shift *= 2)
shifted = bits ^= shifted;
return (0x6996 >> ((int)bits & 0xf)) & 1;
}