Skip to main content
added "shift-left variant", improved type mess
Source Link
greybeard
  • 7.7k
  • 3
  • 21
  • 56

I tried to shift the parities left instead of shifting the operands right: unsurprisingly, the differences are negligible.
With

    Unsigned bit = 1;
    for ( ; bit <= rhs && bit != 0 ; bit <<= 1) {
        long both = lhs_p&rhs_p;
        res ^= both;
        lhs_p = (lhs_p ^ both ^ lhs) << 1;
        rhs_p = ((rhs_p ^ both ^ rhs) & bit) << 1;  
    }
    if (0 == rhs_p)
        return res;
    lhs_p &= bit;
    if (0 != lhs_p)
        return res ^= lhs_p;
    bit = lhs ^ (lhs & (lhs-bit));
    return res ^ (bit << 1);

With determination, the bit-hack variants of population count and parity can be coded without undue regard to type sizes:

/** bit masks */
const uintmax_t
    ALL_ONES = ~(uintmax_t)0,
    O_ONES = ALL_ONES/0xff, //0x...10101
    FIVES  = ALL_ONES/3,    //0x...33333
    THREES = ALL_ONES/5,    //0x...55555
    OFOFS  = ALL_ONES/17;   //0x...F0F0F

/** return number of bits set to one in <code>bits</code> */
// fast multiplication variant
uintmax_tint popCount(longuintmax_t bits) {
    bits -= (bits >> 1) & FIVES;
    bits = (bits & THREES) + ((bits >> 2) & THREES);
// of course, O7O7s would do - but 255 doesn't divide by 7
    bits = bits + (bits >> 4) & OFOFS;
// needs one more "fold operation" and "ones spaced 16"
//  for 256 <= #bits < 65536
    return (bits * O_ONES) >> (sizeof bits * 8);
}

/** return (odd) parity of <code>bits</code> */
uintmax_tint parity(longuintmax_t bits) {
    long shifted =uintmax_t bits;shifted;
    for (int shift = 4 ; 0 < ((shifted >>>== bits) >>= shift) ; shift *= 2)
        shifted = bits ^= shifted;
    return (0x6996 >> ((int)bits & 0xf)) & 1;
}

I tried to shift the parities left instead of shifting the operands right: unsurprisingly, the differences are negligible.
With determination, the bit-hack variants of population count and parity can be coded without undue regard to type sizes:

/** bit masks */
const uintmax_t
    ALL_ONES = ~(uintmax_t)0,
    O_ONES = ALL_ONES/0xff, //0x...10101
    FIVES  = ALL_ONES/3,    //0x...33333
    THREES = ALL_ONES/5,    //0x...55555
    OFOFS  = ALL_ONES/17;   //0x...F0F0F

/** return number of bits set to one in <code>bits</code> */
// fast multiplication variant
uintmax_t popCount(long bits) {
    bits -= (bits >> 1) & FIVES;
    bits = (bits & THREES) + ((bits >> 2) & THREES);
// of course, O7O7s would do - but 255 doesn't divide by 7
    bits = bits + (bits >> 4) & OFOFS;
// needs one more "fold operation" and "ones spaced 16"
//  for 256 <= #bits < 65536
    return (bits * O_ONES) >> (sizeof bits * 8);
}

/** return (odd) parity of <code>bits</code> */
uintmax_t parity(long bits) {
    long shifted = bits;
    for (int shift = 4 ; 0 < (shifted >>>= shift) ; shift *= 2)
        shifted = bits ^= shifted;
    return (0x6996 >> (bits & 0xf)) & 1;
}

I tried to shift the parities left instead of shifting the operands right: unsurprisingly, the differences are negligible.

    Unsigned bit = 1;
    for ( ; bit <= rhs && bit != 0 ; bit <<= 1) {
        long both = lhs_p&rhs_p;
        res ^= both;
        lhs_p = (lhs_p ^ both ^ lhs) << 1;
        rhs_p = ((rhs_p ^ both ^ rhs) & bit) << 1;  
    }
    if (0 == rhs_p)
        return res;
    lhs_p &= bit;
    if (0 != lhs_p)
        return res ^= lhs_p;
    bit = lhs ^ (lhs & (lhs-bit));
    return res ^ (bit << 1);

With determination, the bit-hack variants of population count and parity can be coded without undue regard to type sizes:

/** bit masks */
const uintmax_t
    ALL_ONES = ~(uintmax_t)0,
    O_ONES = ALL_ONES/0xff, //0x...10101
    FIVES  = ALL_ONES/3,    //0x...33333
    THREES = ALL_ONES/5,    //0x...55555
    OFOFS  = ALL_ONES/17;   //0x...F0F0F

/** return number of bits set to one in <code>bits</code> */
// fast multiplication variant
int popCount(uintmax_t bits) {
    bits -= (bits >> 1) & FIVES;
    bits = (bits & THREES) + ((bits >> 2) & THREES);
// of course, O7O7s would do - but 255 doesn't divide by 7
    bits = bits + (bits >> 4) & OFOFS;
// needs one more "fold operation" and "ones spaced 16"
//  for 256 <= #bits < 65536
    return (bits * O_ONES) >> (sizeof bits * 8);
}

/** return (odd) parity of <code>bits</code> */
int parity(uintmax_t bits) {
    uintmax_t shifted;
    for (int shift = 4 ; 0 < ((shifted = bits) >>= shift) ; shift *= 2)
        bits ^= shifted;
    return (0x6996 >> ((int)bits & 0xf)) & 1;
}
Source Link
greybeard
  • 7.7k
  • 3
  • 21
  • 56

Some operations can be shaved from the main loop
(there are machines without "AndNot-instruction"):

// process bits from least significant to most in shorter operand
while (0 < rhs) {
    Unsigned res_i = lhs_p & rhs_p;
    res ^= res_i << i;

    lhs_p ^= res_i ^ lhs;
    rhs_p = (rhs_p ^ res_i ^ rhs) & 1;

    ++i;
    lhs >>= 1u;
    rhs >>= 1u;
}
// remaining bits of longer operand
if (0 == rhs_p)
    return res;
lhs_p &= 1;

(While expected run lengths for uniformly distributed bit patterns are (1 +) smaller than 1,) There is no need to process the remaining bits in a loop:

    if (0 != lhs_p)
        return res ^= lhs_p << i;
    Unsigned bit = lhs ^ (lhs & (lhs-1));
    return res ^ (bit << (i+1));

I tried to shift the parities left instead of shifting the operands right: unsurprisingly, the differences are negligible.
With determination, the bit-hack variants of population count and parity can be coded without undue regard to type sizes:

/** bit masks */
const uintmax_t
    ALL_ONES = ~(uintmax_t)0,
    O_ONES = ALL_ONES/0xff, //0x...10101
    FIVES  = ALL_ONES/3,    //0x...33333
    THREES = ALL_ONES/5,    //0x...55555
    OFOFS  = ALL_ONES/17;   //0x...F0F0F

/** return number of bits set to one in <code>bits</code> */
// fast multiplication variant
uintmax_t popCount(long bits) {
    bits -= (bits >> 1) & FIVES;
    bits = (bits & THREES) + ((bits >> 2) & THREES);
// of course, O7O7s would do - but 255 doesn't divide by 7
    bits = bits + (bits >> 4) & OFOFS;
// needs one more "fold operation" and "ones spaced 16"
//  for 256 <= #bits < 65536
    return (bits * O_ONES) >> (sizeof bits * 8);
}

/** return (odd) parity of <code>bits</code> */
uintmax_t parity(long bits) {
    long shifted = bits;
    for (int shift = 4 ; 0 < (shifted >>>= shift) ; shift *= 2)
        shifted = bits ^= shifted;
    return (0x6996 >> (bits & 0xf)) & 1;
}