Introduction
I have this GitHub repository (version 1.0.0.). It implements a rank(i) operation in \$\Theta(1)\$ time, and select(i) operation in \$\Theta(\log n)\$ time.
(This post has an continuation post, version 1.0.1.)
Code
com.github.coderodde.util.RankSelectBitVector.java:
/**
* This class defines a packed bit vector that supports {@code rank()} operation
* in {@code O(1)} time, and {@code select()} in {@code O(log n)} time.
*
* @version 1.0.0
* @since 1.0.0
*/
public final class RankSelectBitVector {
/**
* Indicates whether some bits were changed since the previous building of
* the index data structures.
*/
private boolean hasDirtyState = true;
/**
* The actual bit storage array.
*/
private final byte[] bytes;
/**
* The actual requested number of bits in this bit vector. Will be smaller
* than the total capacity.
*/
private final int numberOfRequestedBits;
/**
* Denotes index of the most rightmost meaningful bit. Will be set to
* {@code numberOfRequestedBits - 1}.
*/
private final int maximumBitIndex;
/**
* Caches the number of bits set to one (1).
*/
private int numberOfSetBits;
/**
* The block size in the {@code first} table.
*/
private int ell;
/**
* The block size in the {@code second} table.
*/
private int k;
// The following three tables hold the index necessary for efficient rank
// operation. According to internet, has space
// O(sgrt(n) * log log n * log n.
private int[] first;
private int[] second;
private int[][] third;
/**
* Constructs a new bit vector.
*
* @param numberOfRequestedBits the actual number of bits to support.
*/
public RankSelectBitVector(int numberOfRequestedBits) {
checkNumberOfRequestedBits(numberOfRequestedBits);
this.numberOfRequestedBits = numberOfRequestedBits;
// Calculate the actual number of storage bytes:
int numberOfBytes = numberOfRequestedBits / Byte.SIZE +
(numberOfRequestedBits % Byte.SIZE != 0 ? 1 : 0);
numberOfBytes++; // Padding tail byte in order to simplify the last
// rank/select.
bytes = new byte[numberOfBytes];
// Set the rightmost, valid index:
this.maximumBitIndex = this.bytes.length * Byte.SIZE - 1;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder().append("[Bit vector, size = ");
sb.append(getNumberOfSupportedBits())
.append(" bits, data = ");
int bitNumber = 0;
for (int i = 0; i < getNumberOfSupportedBits(); i++) {
sb.append(readBitImpl(i) ? "1" : "0");
bitNumber++;
if (bitNumber % 8 == 0) {
sb.append(" ");
}
}
return sb.append("]").toString();
}
/**
* Preprocesses the internal data structures in {@code O(n)}.
*/
public void buildIndices() {
if (hasDirtyState == false) {
// Nothing to do.
return;
}
//// Deal with the 'first'.
// n - total number of bit slots:
int n = bytes.length * Byte.SIZE;
// elll - the l value:
this.ell = (int) Math.pow(Math.ceil(log2(n) / 2.0), 2.0);
this.first = new int[n / ell + 1];
for (int i = ell; i < n; i++) {
if (i % ell == 0) {
int firstArraySlotIndex = i / ell;
int startIndex = i - ell;
int endIndex = i - 1;
first[firstArraySlotIndex] =
first[firstArraySlotIndex - 1] +
bruteForceRank(startIndex,
endIndex);
}
}
//// Deal with the 'second'.
this.k = (int) Math.ceil(log2(n) / 2.0);
this.second = new int[n / k + 1];
for (int i = k; i < n; i++) {
if (i % k == 0) {
second[i/k] = bruteForceRank(ell * (i / ell), i - 1);
}
}
//// Deal with the 'third': four Russians' technique:
this.third = new int[(int) Math.pow(2.0, k - 1)][];
for (int selectorIndex = 0;
selectorIndex < third.length;
selectorIndex++) {
third[selectorIndex] = new int[k - 1];
// third[selectorIndex][0] is always zero (0).
third[selectorIndex][0] = (bitIsSet(selectorIndex, k - 2) ? 1 : 0);
for (int j = 1; j < k - 1; j++) {
third[selectorIndex][j] =
third[selectorIndex][j - 1] +
(bitIsSet(selectorIndex, k - j - 2) ? 1 : 0);
}
}
hasDirtyState = false;
}
/**
* Returns the number of bits that are set (have value of one (1)).
*
* @return the number of set bits.
*/
public int getNumberOfSetBits() {
return numberOfSetBits;
}
/**
* Returns the number of bits this bit vector supports.
*
* @return the number of bits supported.
*/
public int getNumberOfSupportedBits() {
return numberOfRequestedBits;
}
/**
* Sets the {@code index}th bit to one (1).
*
* @param index the index of the target bit.
*/
public void writeBitOn(int index) {
writeBit(index, true);
}
/**
* Sets the {@code index}th bit to zero (0).
*
* @param index the index of the target bit.
*/
public void writeBitOff(int index) {
writeBit(index, false);
}
/**
* Writes the {@code index}th bit to {@code on}.
*
* @param index the index of the target bit.
* @param on the selector of the bit: if {@code true}, the bit will be
* set to one, otherwise set zero.
*/
public void writeBit(int index, boolean on) {
checkBitAccessIndex(index);
writeBitImpl(index, on);
}
/**
* Reads the {@code index}th bit where indexation starts from zero (0).
*
* @param index the bit index.
* @return {@code true} if and only if the {@code index}th bit is set.
*/
public boolean readBit(int index) {
checkBitAccessIndex(index);
return readBitImpl(index);
}
/**
* Returns the rank of {@code index}, i.e., the number of set bits in the
* subvector {@code vector[1..index]}. Runs in {@code O((log n)^2)} time.
*
* @param index the target index.
* @return the rank for the input target.
*/
public int rankFirst(int index) {
checkBitIndexForRank(index);
makeSureStateIsCompiled();
int startIndex = ell * (index / ell);
int endIndex = index - 1;
return first[index / ell] + bruteForceRank(startIndex, endIndex);
}
/**
* Returns the {@code index}th rank. Runs in {@code O(log n)} time.
*
* @param index the target index.
* @return the rank of the input index.
*/
public int rankSecond(int index) {
checkBitIndexForRank(index);
makeSureStateIsCompiled();
int startIndex = k * (index / k);
int endIndex = index - 1;
return first[index / ell] +
second[index / k] +
bruteForceRank(startIndex,
endIndex);
}
/**
* Returns the {@code index}th rank. Runs in {@code O(1)} time.
*
* @param index the target index.
* @return the rank of the input index.
*/
public int rankThird(int index) {
checkBitIndexForRank(index);
makeSureStateIsCompiled();
int f = first[index / ell];
int s = second[index / k];
int thirdEntryIndex = index % k - 1;
if (thirdEntryIndex == -1) {
return f + s;
}
int selectorIndex =
extractBitVector(index)
.toInteger(k - 1);
return f + s + third[selectorIndex][thirdEntryIndex];
}
/**
* Returns the index of the {@code index}th 1-bit. Relies on
* {@link #rankFirst(int)}, which runs in {@code O((log n)^2)}, which yields
* {@code O((log n)^3)} running time for the {@code selectFirst}.
*
* @param bitIndex the target index.
* @return the index of the {@code index}th 1-bit.
*/
public int selectFirst(int bitIndex) {
checkBitIndexForSelect(bitIndex);
return selectImplFirst(bitIndex, 0, getNumberOfSupportedBits());
}
/**
* Returns the index of the {@code index}th 1-bit. Relies on
* {@link #rankSecond(int)}, which runs in {@code O(log n)}, which yields
* {@code O((log n)^2)} running time for the {@code selectSecond}.
*
* @param bitIndex the target index.
* @return the index of the {@code index}th 1-bit.
*/
public int selectSecond(int bitIndex) {
checkBitIndexForSelect(bitIndex);
return selectImplSecond(bitIndex, 0, getNumberOfSupportedBits());
}
/**
* Returns the index of the {@code index}th 1-bit. Relies on
* {@link #rankThird(int)}, which runs in {@code O(1)}, which yields
* {@code O(log n)} running time for the {@code selectThird}.
*
* @param bitIndex the target index.
* @return the index of the {@code index}th 1-bit.
*/
public int selectThird(int bitIndex) {
checkBitIndexForSelect(bitIndex);
return selectImplThird(bitIndex, 0, getNumberOfSupportedBits());
}
private int selectImplFirst(int bitIndex,
int rangeStartIndex,
int rangeLength) {
if (rangeLength == 1) {
return rangeStartIndex;
}
int halfRangeLength = rangeLength / 2;
int r = rankFirst(halfRangeLength + rangeStartIndex);
if (r >= bitIndex) {
return selectImplFirst(bitIndex,
rangeStartIndex,
halfRangeLength);
} else {
return selectImplFirst(bitIndex,
rangeStartIndex + halfRangeLength,
rangeLength - halfRangeLength);
}
}
private int selectImplSecond(int bitIndex,
int rangeStartIndex,
int rangeLength) {
if (rangeLength == 1) {
return rangeStartIndex;
}
int halfRangeLength = rangeLength / 2;
int r = rankSecond(halfRangeLength + rangeStartIndex);
if (r >= bitIndex) {
return selectImplSecond(bitIndex,
rangeStartIndex,
halfRangeLength);
} else {
return selectImplSecond(bitIndex,
rangeStartIndex + halfRangeLength,
rangeLength - halfRangeLength);
}
}
private int selectImplThird(int bitIndex,
int rangeStartIndex,
int rangeLength) {
if (rangeLength == 1) {
return rangeStartIndex;
}
int halfRangeLength = rangeLength / 2;
int r = rankThird(halfRangeLength + rangeStartIndex);
if (r >= bitIndex) {
return selectImplThird(bitIndex,
rangeStartIndex,
halfRangeLength);
} else {
return selectImplThird(bitIndex,
rangeStartIndex + halfRangeLength,
rangeLength - halfRangeLength);
}
}
/**
* The delegate for manipulating bits.
*
* @param index the index of the target bit.
* @param on the flag deciding the value of the bit in question.
*/
private void writeBitImpl(int index, boolean on) {
boolean previousBitValue = readBit(index);
if (on) {
if (previousBitValue == false) {
hasDirtyState = true;
numberOfSetBits++;
}
turnBitOn(index);
} else {
if (previousBitValue == true) {
hasDirtyState = true;
numberOfSetBits--;
}
turnBitOff(index);
}
}
/**
* Implements the actual reading of a bit.
*
* @param index the index of the target bit to read.
* @return the value of the target bit.
*/
boolean readBitImpl(int index) {
int byteIndex = index / Byte.SIZE;
int targetByteBitIndex = index % Byte.SIZE;
byte targetByte = bytes[byteIndex];
return (targetByte & (1 << targetByteBitIndex)) != 0;
}
/**
* Makes sure that the state of the internal data structures is up to date.
*/
private void makeSureStateIsCompiled() {
if (hasDirtyState) {
buildIndices();
hasDirtyState = false;
}
}
/**
* Turns the {@code index}th bit on. Indexation is zero-based.
*
* @param index the target bit index.
*/
private void turnBitOn(int index) {
int byteIndex = index / Byte.SIZE;
int bitIndex = index % Byte.SIZE;
byte mask = 1;
mask <<= bitIndex;
bytes[byteIndex] |= mask;
}
/**
* Turns the {@code index}th bit off. Indexation is zero-based.
*
* @param index the target bit index.
*/
private void turnBitOff(int index) {
int byteIndex = index / Byte.SIZE;
int bitIndex = index % Byte.SIZE;
byte mask = 1;
mask <<= bitIndex;
bytes[byteIndex] &= ~mask;
}
private void checkBitIndexForSelect(int selectionIndex) {
if (selectionIndex < 0) {
throw new IndexOutOfBoundsException(
String.format(
"The input selection index is negative: " +
"(%d). Must be within range [1..%d].\n",
selectionIndex,
numberOfSetBits));
}
if (selectionIndex == 0) {
throw new IndexOutOfBoundsException(
String.format(
"The input selection index is zero (0). " +
"Must be within range [1..%d].\n",
numberOfSetBits));
}
if (selectionIndex > numberOfSetBits) {
throw new IndexOutOfBoundsException(
String.format(
"The input selection index is too large (%d). " +
"Must be within range [1..%d].\n",
selectionIndex,
numberOfSetBits));
}
}
private void checkBitIndexForRank(int index) {
if (index < 0) {
throw new IndexOutOfBoundsException(
String.format("Negative bit index: %d.", index));
}
if (index > numberOfRequestedBits) {
throw new IndexOutOfBoundsException(
String.format(
"Too large bit index (%d), number of bits " +
"supported is %d.",
index,
numberOfRequestedBits));
}
}
private void checkBitAccessIndex(int accessIndex) {
if (accessIndex < 0) {
throw new IndexOutOfBoundsException(
String.format(
"Negative bit access index: %d.",
accessIndex));
}
if (accessIndex >= getNumberOfSupportedBits()) {
throw new IndexOutOfBoundsException(
String.format(
"Too large bit access index (%d), number of bits " +
"supported is %d.",
accessIndex,
getNumberOfSupportedBits()));
}
}
/**
* Returns {@code true} if and only if the {@code bitIndex}th bit in
* {@code value} is set.
*
* @param value the value of which to inspect the bit.
* @param bitIndex the bit index.
* @return {@code true} if and only if the specified bit is set.
*/
private boolean bitIsSet(int value, int bitIndex) {
return (value & (1 << bitIndex)) != 0;
}
int toInteger(int numberOfBitsToRead) {
int integer = 0;
for (int i = 0; i < numberOfBitsToRead; i++) {
boolean bit = readBitImpl(i);
if (bit == true) {
integer |= 1 << i;
}
}
return integer;
}
private RankSelectBitVector extractBitVector(int i) {
int startIndex = k * (i / k);
int endIndex = Math.min(k * (i / k + 1) - 2, maximumBitIndex);
int extractedBitVectorLength = endIndex - startIndex + 1;
RankSelectBitVector extractedBitVector =
new RankSelectBitVector(extractedBitVectorLength);
for (int index = extractedBitVectorLength - 1,
j = startIndex;
j <= endIndex;
j++, index--) {
extractedBitVector.writeBitImpl(index, this.readBitImpl(j));
}
return extractedBitVector;
}
private int bruteForceRank(int startIndex, int endIndex) {
int rank = 0;
for (int i = startIndex; i <= endIndex; i++) {
if (readBitImpl(i)) {
rank++;
}
}
return rank;
}
private void checkNumberOfRequestedBits(int numberOfRequestedBits) {
if (numberOfRequestedBits == 0) {
throw new IllegalArgumentException("Requested zero (0) bits.");
}
if (numberOfRequestedBits < 0) {
throw new IllegalArgumentException(
String.format(
"Requested negative number of bits (%d).\n",
numberOfRequestedBits));
}
}
private static double log2(double v) {
return Math.log(v) / Math.log(2.0);
}
}
com.github.coderodde.util.RankSelectBitVectorBenchmark.java:
package com.github.coderodde.util.benchmark;
import com.github.coderodde.util.RankSelectBitVector;
import java.util.Random;
public final class RankSelectBitVectorBenchmark {
/**
* The number of bits in the benchmark bit vector.
*/
private static final int BIT_VECTOR_LENGTH = 4_000_000;
public static void main(String[] args) {
long seed = parseSeed(args);
System.out.printf("Seed = %d\n", seed);
Random random = new Random(seed);
long st = System.currentTimeMillis();
RankSelectBitVector rankSelectBitVector = createRandomBitVector(random);
System.out.printf("Built the bit vector in %d milliseconds.\n",
System.currentTimeMillis() - st);
st = System.currentTimeMillis(); // st - start time.
rankSelectBitVector.buildIndices();
System.out.printf("Preprocessed the bit vector in %d milliseconds.\n",
System.currentTimeMillis() - st);
System.out.println("--- Benchmarking rank operation ---");
benchmarkRanks(rankSelectBitVector);
System.out.println("--- Benchmarking select operation ---");
benchmarkSelects(rankSelectBitVector);
}
private static RankSelectBitVector createRandomBitVector(Random random) {
RankSelectBitVector rankSelectBitVector =
new RankSelectBitVector(BIT_VECTOR_LENGTH);
for (int bitIndex = 0;
bitIndex != rankSelectBitVector.getNumberOfSupportedBits();
bitIndex++) {
if (random.nextBoolean()) {
rankSelectBitVector.writeBitOn(bitIndex);
}
}
return rankSelectBitVector;
}
private static boolean rankArraysEqual(int[] rankArray1, int[] rankArray2) {
if (rankArray1.length != rankArray2.length) {
throw new IllegalArgumentException("Rank array length mismatch.");
}
int n = Math.max(rankArray1.length, rankArray2.length);
for (int i = 0; i != n; i++) {
int rank1 = rankArray1[i];
int rank2 = rankArray2[i];
if (rank1 != rank2) {
System.err.printf(
"ERROR: Mismatch at index = %d, " +
"rank1 = %d, rank2 = %d.\n",
i,
rank1,
rank2);
return false;
}
}
return true;
}
private static void
benchmarkRanks(RankSelectBitVector rankSelectBitVector) {
int numberOfBits = rankSelectBitVector.getNumberOfSupportedBits();
int[] answers1 = new int[numberOfBits];
int[] answers2 = new int[numberOfBits];
int[] answers3 = new int[numberOfBits];
long st = System.currentTimeMillis(); // st - start time.
for (int i = 0; i != numberOfBits; i++) {
answers1[i] = rankSelectBitVector.rankFirst(i);
}
long answersDuration1 = System.currentTimeMillis() - st;
System.out.printf(
"rankFirst() ran for %d milliseconds.\n",
answersDuration1);
st = System.currentTimeMillis();
for (int i = 0; i != numberOfBits; i++) {
answers2[i] = rankSelectBitVector.rankSecond(i);
}
long answersDuration2 = System.currentTimeMillis() - st;
System.out.printf(
"rankSecond() ran for %d milliseconds.\n",
answersDuration2);
st = System.currentTimeMillis();
for (int i = 0; i != numberOfBits; i++) {
answers3[i] = rankSelectBitVector.rankThird(i);
}
long answersDuration3 = System.currentTimeMillis() - st;
System.out.printf(
"rankThird() ran for %d milliseconds.\n",
answersDuration3);
if (!rankArraysEqual(answers1, answers2)) {
System.err.println("Failed on rankFirst vs. rankSecond.");
return;
}
if (!rankArraysEqual(answers1, answers3)) {
System.err.println("Failed on rankFirst vs. rankThird.");
}
}
private static void
benchmarkSelects(RankSelectBitVector rankSelectBitVector) {
int numberOfSetBits = rankSelectBitVector.getNumberOfSetBits();
int[] answers1 = new int[numberOfSetBits + 1];
int[] answers2 = new int[numberOfSetBits + 1];
int[] answers3 = new int[numberOfSetBits + 1];
long st = System.currentTimeMillis();
for (int i = 1; i <= numberOfSetBits; i++) {
answers1[i] = rankSelectBitVector.selectFirst(i);
}
long answersDuration1 = System.currentTimeMillis() - st;
System.out.printf(
"selectFirst() ran for %d milliseconds.\n",
answersDuration1);
st = System.currentTimeMillis();
for (int i = 1; i <= numberOfSetBits; i++) {
answers2[i] = rankSelectBitVector.selectSecond(i);
}
long answersDuration2 = System.currentTimeMillis() - st;
System.out.printf(
"selectSecond() ran for %d milliseconds.\n",
answersDuration2);
st = System.currentTimeMillis();
for (int i = 1; i <= numberOfSetBits; i++) {
answers3[i] = rankSelectBitVector.selectThird(i);
}
long answersDuration3 = System.currentTimeMillis() - st;
System.out.printf(
"selectThird() ran for %d milliseconds.\n",
answersDuration3);
if (!rankArraysEqual(answers1, answers2)) {
System.err.println("Failed on selectFirst vs. selectSecond.");
return;
}
if (!rankArraysEqual(answers1, answers3)) {
System.err.println("Failed on selectFirst vs. selectThird.");
}
}
private static long parseSeed(String[] args) {
if (args.length == 0) {
return System.currentTimeMillis();
}
try {
return Long.parseLong(args[0]);
} catch (NumberFormatException ex) {
System.err.printf(
"WARNING: Could not parse '%s' as an long value.", args[0]);
return System.currentTimeMillis();
}
}
}
com.github.coderodde.util.RankSelectBitVectorTest.java:
package com.github.coderodde.util;
import java.util.Random;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import org.junit.Test;
public final class RankSelectBitVectorTest {
@Test
public void lastBitRank() {
RankSelectBitVector bv = new RankSelectBitVector(8);
bv.writeBitOn(2);
bv.writeBitOn(6);
bv.writeBitOn(7);
assertEquals(3, bv.rankFirst(8));
assertEquals(3, bv.rankSecond(8));
assertEquals(3, bv.rankThird(8));
}
@Test
public void smallSelect() {
RankSelectBitVector bv = new RankSelectBitVector(8);
bv.writeBitOn(2);
bv.writeBitOn(4);
bv.writeBitOn(5);
bv.writeBitOn(7);
// 00101101
// select(1) = 2
// select(2) = 4
// select(3) = 5
// select(4) = 7
assertEquals(2, bv.selectFirst(1));
assertEquals(4, bv.selectFirst(2));
assertEquals(5, bv.selectFirst(3));
assertEquals(7, bv.selectFirst(4));
}
@Test
public void debugTest1() {
// 00101101
RankSelectBitVector bv = new RankSelectBitVector(8);
bv.writeBitOn(2);
bv.writeBitOn(4);
bv.writeBitOn(5);
bv.writeBitOn(7);
assertEquals(0, bv.rankThird(0));
assertEquals(0, bv.rankThird(1));
assertEquals(0, bv.rankThird(2));
assertEquals(1, bv.rankThird(3));
assertEquals(1, bv.rankThird(4));
assertEquals(2, bv.rankThird(5));
assertEquals(3, bv.rankThird(6));
assertEquals(3, bv.rankThird(7));
assertEquals(4, bv.rankThird(8));
assertEquals(2, bv.selectFirst(1));
assertEquals(4, bv.selectFirst(2));
assertEquals(5, bv.selectFirst(3));
assertEquals(7, bv.selectFirst(4));
}
@Test
public void debugTest2() {
// 00101101 10101101
RankSelectBitVector bv = new RankSelectBitVector(16);
bv.writeBitOn(2);
bv.writeBitOn(4);
bv.writeBitOn(5);
bv.writeBitOn(7);
bv.writeBitOn(8);
bv.writeBitOn(10);
bv.writeBitOn(12);
bv.writeBitOn(13);
bv.writeBitOn(15);
assertEquals(0, bv.rankThird(0));
assertEquals(0, bv.rankThird(1));
assertEquals(0, bv.rankThird(2));
assertEquals(1, bv.rankThird(3));
assertEquals(1, bv.rankThird(4));
assertEquals(2, bv.rankThird(5));
assertEquals(3, bv.rankThird(6));
assertEquals(3, bv.rankThird(7));
assertEquals(4, bv.rankThird(8));
assertEquals(5, bv.rankThird(9));
assertEquals(5, bv.rankThird(10));
assertEquals(6, bv.rankThird(11));
assertEquals(6, bv.rankThird(12));
assertEquals(7, bv.rankThird(13));
assertEquals(8, bv.rankThird(14));
assertEquals(8, bv.rankThird(15));
assertEquals(9, bv.rankThird(16));
assertEquals(2, bv.selectFirst(1));
assertEquals(4, bv.selectFirst(2));
assertEquals(5, bv.selectFirst(3));
assertEquals(7, bv.selectFirst(4));
assertEquals(8, bv.selectFirst(5));
assertEquals(10, bv.selectFirst(6));
assertEquals(12, bv.selectFirst(7));
assertEquals(13, bv.selectFirst(8));
assertEquals(15, bv.selectFirst(9));
}
@Test
public void debugTest3() {
// 00101101 10101101 00010010
RankSelectBitVector bv = new RankSelectBitVector(24);
bv.writeBitOn(2);
bv.writeBitOn(4);
bv.writeBitOn(5);
bv.writeBitOn(7);
bv.writeBitOn(8);
bv.writeBitOn(10);
bv.writeBitOn(12);
bv.writeBitOn(13);
bv.writeBitOn(15);
bv.writeBitOn(19);
bv.writeBitOn(22);
assertEquals(0, bv.rankThird(0));
assertEquals(0, bv.rankThird(1));
assertEquals(0, bv.rankThird(2));
assertEquals(1, bv.rankThird(3));
assertEquals(1, bv.rankThird(4));
assertEquals(2, bv.rankThird(5));
assertEquals(3, bv.rankThird(6));
assertEquals(3, bv.rankThird(7));
assertEquals(4, bv.rankThird(8));
assertEquals(5, bv.rankThird(9));
assertEquals(5, bv.rankThird(10));
assertEquals(6, bv.rankThird(11));
assertEquals(6, bv.rankThird(12));
assertEquals(7, bv.rankThird(13));
assertEquals(8, bv.rankThird(14));
assertEquals(8, bv.rankThird(15));
assertEquals(9, bv.rankThird(16));
// 00010010
assertEquals(9, bv.rankThird(17));
assertEquals(9, bv.rankThird(18));
assertEquals(9, bv.rankThird(19));
assertEquals(10, bv.rankThird(20));
assertEquals(10, bv.rankThird(21));
assertEquals(10, bv.rankThird(22));
assertEquals(11, bv.rankThird(23));
assertEquals(11, bv.rankThird(24));
// select():
assertEquals(2, bv.selectFirst(1));
assertEquals(4, bv.selectFirst(2));
assertEquals(5, bv.selectFirst(3));
assertEquals(7, bv.selectFirst(4));
assertEquals(8, bv.selectFirst(5));
assertEquals(10, bv.selectFirst(6));
assertEquals(12, bv.selectFirst(7));
assertEquals(13, bv.selectFirst(8));
assertEquals(15, bv.selectFirst(9));
assertEquals(19, bv.selectFirst(10));
assertEquals(22, bv.selectFirst(11));
}
@Test
public void bruteForceTest() {
long seed = System.currentTimeMillis();
seed = 1706163778488L;
Random random = new Random(seed);
System.out.println("-- bruteForceTest, seed = " + seed);
RankSelectBitVector bv = getRandomBitVector(random);
BruteForceBitVector referenceBv = copy(bv);
bv.buildIndices();
int numberOfOneBits = bv.rankThird(bv.getNumberOfSupportedBits());
for (int i = 0; i < bv.getNumberOfSupportedBits(); i++) {
int actualRank = referenceBv.rank(i);
int rank1 = bv.rankFirst(i);
int rank2 = bv.rankSecond(i);
int rank3 = bv.rankThird(i);
int selectIndex = random.nextInt(numberOfOneBits) + 1;
int actualSelect = referenceBv.select(selectIndex);
int select1 = bv.selectFirst(selectIndex);
if (select1 != actualSelect) {
System.out.printf(
"ERROR: i = %d, actualSelect = %d, select1 = %d.\n",
i,
actualSelect,
select1);
}
if (rank3 != actualRank) {
System.out.printf(
"ERROR: i = %d, actual rank = %d, rank1 = %d, " +
"rank2 = %d, rank3 = %d.\n",
i,
actualRank,
rank1,
rank2,
rank3);
}
assertEquals(actualRank, rank1);
assertEquals(actualRank, rank2);
assertEquals(actualRank, rank3);
assertEquals(actualSelect, select1);
}
}
private static RankSelectBitVector getRandomBitVector(Random random) {
RankSelectBitVector bv = new RankSelectBitVector(5973);
for (int i = 0; i < bv.getNumberOfSupportedBits(); i++) {
if (random.nextDouble() < 0.3) {
bv.writeBitOn(i);
}
}
return bv;
}
private static BruteForceBitVector copy(RankSelectBitVector bv) {
BruteForceBitVector referenceBv =
new BruteForceBitVector(bv.getNumberOfSupportedBits());
for (int i = 0; i < bv.getNumberOfSupportedBits(); i++) {
if (bv.readBit(i)) {
referenceBv.writeBitOn(i);
}
}
return referenceBv;
}
@Test
public void toInteger() {
RankSelectBitVector bitVector = new RankSelectBitVector(31);
assertEquals(0, bitVector.toInteger(20));
bitVector.writeBit(1, true);
assertEquals(2, bitVector.toInteger(20));
bitVector.writeBit(2, true);
assertEquals(6, bitVector.toInteger(20));
bitVector.writeBit(4, true);
assertEquals(22, bitVector.toInteger(20));
}
@Test
public void readWriteBit() {
RankSelectBitVector bitVector = new RankSelectBitVector(30);
bitVector.writeBit(12, true);
assertTrue(bitVector.readBit(12));
bitVector.writeBit(12, false);
assertFalse(bitVector.readBit(12));
assertFalse(bitVector.readBit(13));
}
// @Test
public void bruteForceBitVectorSelect() {
BruteForceBitVector bv = new BruteForceBitVector(8);
bv.writeBitOn(2);
bv.writeBitOn(4);
bv.writeBitOn(6);
bv.writeBitOn(7);
assertEquals(2, bv.select(1));
assertEquals(4, bv.select(2));
assertEquals(6, bv.select(3));
assertEquals(7, bv.select(4));
}
@Test
public void countSetBits() {
RankSelectBitVector bv = new RankSelectBitVector(11);
assertEquals(0, bv.getNumberOfSetBits());
bv.writeBitOn(10);
assertEquals(1, bv.getNumberOfSetBits());
bv.writeBitOn(5);
assertEquals(2, bv.getNumberOfSetBits());
bv.writeBitOff(10);
assertEquals(1, bv.getNumberOfSetBits());
bv.writeBitOff(5);
assertEquals(0, bv.getNumberOfSetBits());
}
}
(The missing class BruteForceBitVector is here.)
Typical benchmark demo output
Seed = 1706175245835
Built the bit vector in 74 milliseconds.
Preprocessed the bit vector in 117 milliseconds.
--- Benchmarking rank operation ---
rankFirst() ran for 623 milliseconds.
rankSecond() ran for 110 milliseconds.
rankThird() ran for 625 milliseconds.
--- Benchmarking select operation ---
selectFirst() ran for 7593 milliseconds.
selectSecond() ran for 1399 milliseconds.
selectThird() ran for 6006 milliseconds.
Critique request
I would like to hear about the following issues:
- Efficiency.
rankThird(int)in particular. - Unit tests. Did I cover all the possible corner cases?
- Anything else?



rankThirddoesn't look familiar to me. Where did you get that from? Actually this whole thing, which paper did you base this on? It's different from the rank/select techniques I know about. \$\endgroup\$