Skip to main content
deleted 169 characters in body
Source Link
toto2
  • 5.5k
  • 17
  • 21
public static class PositionAndHash {
    private final int base;
    private final int queueSize;
    private final int basePowerSize;
    private final Queue<Integer> boundedQueue;basePowerPatternSize;
    private final int position;
    private final int hash;

    public PositionAndHash(int base, int queueSizepatternSize) {
        this.base = base;
        this.queueSize = queueSize;
        this.basePowerSizebasePowerPatternSize = Collections.nCopies(queueSizepatternSize, base).stream().reduce(1, (i1, i2) -> i1 * i2);
        this.boundedQueue = new ArrayDeque<>();
        this.position = -1;
        this.hash = 0;
    }

    private PositionAndHash(int base, int queueSize, int basePowerSize, Queue<Integer> boundedQueue, int position, int hash) {
        this.base = base;
        this.queueSize = queueSize;
        this.basePowerSizebasePowerPatternSize = basePowerSize;
        this.boundedQueue = boundedQueue;
        this.position = position;
        this.hash = hash;
    }

    public int getPosition() {
        return position;
    }

    public int getHash() {
        return hash;
    }

    public PositionAndHash increment(int newChar, Optional<Integer> removedChar) {
        int newHash = hash * base + newChar;
        Queue<Integer> newBoundedQueue = new ArrayDeque<>(boundedQueue);
        {
            newBoundedQueue.add(newChar);
            if (newBoundedQueue.size() > queueSize) {
                int oldChar =- newBoundedQueueremovedChar.pollorElse(0);
                newHash -= oldChar * basePowerSize;
            }
        }basePowerPatternSize;
        return new PositionAndHash(base, queueSize, basePowerSize, newBoundedQueuebasePowerPatternSize, position + 1, newHash);
    }

    public static int computeSingleHash(String string, int hashBase) {
        return computeRollingHash(string, string.length(), hashBase)
                .skip(string.length() - 1)
                .mapToInt(PositionAndHash::getHash)
                .findFirst().getAsInt();
    }

    public static Stream<PositionAndHash> computeRollingHash(String searchableString, int subStringSizepatternSize, int hashBase) {
        // Kind of messed up to use an AtomicReference here, but Java's Stream does not have fold operations.
        AtomicReference<PositionAndHash> positionAndHashRef = new AtomicReference<>(new PositionAndHash(hashBase, subStringSizepatternSize));
        return searchableString.chars()
                .mapToObj(charIntnewChar -> {
                    PositionAndHash newHashprevPositionAndHash = positionAndHashRef.get();
                    int removedCharIndex = prevPositionAndHash.getPosition() - patternSize + 1;
                    Optional<Integer> removedChar = (removedCharIndex >= 0) 
                            ? Optional.of((int) searchableString.charAt(removedCharIndex))
                            : Optional.empty();
                    PositionAndHash newHash = prevPositionAndHash.increment(charIntnewChar, removedChar);
                    positionAndHashRef.set(newHash);
                    return newHash;
                });
    }

    @Override
    public String toString() {
        return "PositionAndHash{" + "position=" + position + ", hash=" + hash + '}';
    }

}

public static void main(String[] args) {
    int hashBase = 103;
    String searchableString = "aaasfaaaaiofjwenvaiefowjfaaalsdfl";
    String pattern = "aaa";
    int subStringSize = pattern.length();
    int patternHash = PositionAndHash.computeSingleHash(pattern, hashBase);
    System.out.println("pattern hash: " + patternHash);

    Stream<PositionAndHash> rollingHash = PositionAndHash.computeRollingHash(searchableString, subStringSize, hashBase);
    // computeRollingHashrollingHash.forEach(System.out::println);

    IntStream hashMatchPositions = rollingHash.skip(subStringSize - 1)
            .filter(posAndHash -> posAndHash.getHash() == patternHash)
            .mapToInt(PositionAndHash::getPosition)
            .map(pos -> pos - subStringSize + 1);
     // hashMatchPositions.forEach(System.out::println);

    IntStream trueMatchPositions = hashMatchPositions
            .filter(pos -> searchableString.substring(pos, pos + subStringSize).equals(pattern));
    trueMatchPositions.forEach(System.out::println);
 
      
    // Or to only print the first match instead of all matches:
    // (Note that Stream is "intelligent" ("lazily evaluated") and 
    //  will not process the whole Stream when only the first element is fetched (findFirst()).)
    
    // trueMatchPositions.findFirst().ifPresent(System.out::println);
}
public static class PositionAndHash {
    private final int base;
    private final int queueSize;
    private final int basePowerSize;
    private final Queue<Integer> boundedQueue;
    private final int position;
    private final int hash;

    public PositionAndHash(int base, int queueSize) {
        this.base = base;
        this.queueSize = queueSize;
        this.basePowerSize = Collections.nCopies(queueSize, base).stream().reduce(1, (i1, i2) -> i1 * i2);
        this.boundedQueue = new ArrayDeque<>();
        this.position = -1;
        this.hash = 0;
    }

    private PositionAndHash(int base, int queueSize, int basePowerSize, Queue<Integer> boundedQueue, int position, int hash) {
        this.base = base;
        this.queueSize = queueSize;
        this.basePowerSize = basePowerSize;
        this.boundedQueue = boundedQueue;
        this.position = position;
        this.hash = hash;
    }

    public int getPosition() {
        return position;
    }

    public int getHash() {
        return hash;
    }

    public PositionAndHash increment(int newChar) {
        int newHash = hash * base + newChar;
        Queue<Integer> newBoundedQueue = new ArrayDeque<>(boundedQueue);
        {
            newBoundedQueue.add(newChar);
            if (newBoundedQueue.size() > queueSize) {
                int oldChar = newBoundedQueue.poll();
                newHash -= oldChar * basePowerSize;
            }
        }
        return new PositionAndHash(base, queueSize, basePowerSize, newBoundedQueue, position + 1, newHash);
    }

    public static int computeSingleHash(String string, int hashBase) {
        return computeRollingHash(string, string.length(), hashBase)
                .skip(string.length() - 1)
                .mapToInt(PositionAndHash::getHash)
                .findFirst().getAsInt();
    }

    public static Stream<PositionAndHash> computeRollingHash(String searchableString, int subStringSize, int hashBase) {
        // Kind of messed up to use an AtomicReference here, but Java's Stream does not have fold operations.
        AtomicReference<PositionAndHash> positionAndHashRef = new AtomicReference<>(new PositionAndHash(hashBase, subStringSize));
        return searchableString.chars()
                .mapToObj(charInt -> {
                    PositionAndHash newHash = positionAndHashRef.get().increment(charInt);
                    positionAndHashRef.set(newHash);
                    return newHash;
                });
    }

    @Override
    public String toString() {
        return "PositionAndHash{" + "position=" + position + ", hash=" + hash + '}';
    }

}

public static void main(String[] args) {
    int hashBase = 103;
    String searchableString = "aaasfaaaaiofjwenvaiefowjfaaalsdfl";
    String pattern = "aaa";
    int subStringSize = pattern.length();
    int patternHash = PositionAndHash.computeSingleHash(pattern, hashBase);
    System.out.println("pattern hash: " + patternHash);

    Stream<PositionAndHash> rollingHash = PositionAndHash.computeRollingHash(searchableString, subStringSize, hashBase);
    // computeRollingHash.forEach(System.out::println);

    IntStream hashMatchPositions = rollingHash.skip(subStringSize - 1)
            .filter(posAndHash -> posAndHash.getHash() == patternHash)
            .mapToInt(PositionAndHash::getPosition)
            .map(pos -> pos - subStringSize + 1);
    // hashMatchPositions.forEach(System.out::println);

    IntStream trueMatchPositions = hashMatchPositions
            .filter(pos -> searchableString.substring(pos, pos + subStringSize).equals(pattern));
    trueMatchPositions.forEach(System.out::println);
 
      
    // Or to only print the first match instead of all matches:
    // (Note that Stream is "intelligent" ("lazily evaluated") and 
    //  will not process the whole Stream when only the first element is fetched (findFirst()).)
    
    // trueMatchPositions.findFirst().ifPresent(System.out::println);
}
public static class PositionAndHash {
    private final int base;
    private final int basePowerPatternSize;
    private final int position;
    private final int hash;

    public PositionAndHash(int base, int patternSize) {
        this.base = base;
        this.basePowerPatternSize = Collections.nCopies(patternSize, base).stream().reduce(1, (i1, i2) -> i1 * i2);
        this.position = -1;
        this.hash = 0;
    }

    private PositionAndHash(int base, int basePowerSize, int position, int hash) {
        this.base = base;
        this.basePowerPatternSize = basePowerSize;
        this.position = position;
        this.hash = hash;
    }

    public int getPosition() {
        return position;
    }

    public int getHash() {
        return hash;
    }

    public PositionAndHash increment(int newChar, Optional<Integer> removedChar) {
        int newHash = hash * base + newChar - removedChar.orElse(0) * basePowerPatternSize;
        return new PositionAndHash(base, basePowerPatternSize, position + 1, newHash);
    }

    public static int computeSingleHash(String string, int hashBase) {
        return computeRollingHash(string, string.length(), hashBase)
                .skip(string.length() - 1)
                .mapToInt(PositionAndHash::getHash)
                .findFirst().getAsInt();
    }

    public static Stream<PositionAndHash> computeRollingHash(String searchableString, int patternSize, int hashBase) {
        // Kind of messed up to use an AtomicReference here, but Java's Stream does not have fold operations.
        AtomicReference<PositionAndHash> positionAndHashRef = new AtomicReference<>(new PositionAndHash(hashBase, patternSize));
        return searchableString.chars()
                .mapToObj(newChar -> {
                    PositionAndHash prevPositionAndHash = positionAndHashRef.get();
                    int removedCharIndex = prevPositionAndHash.getPosition() - patternSize + 1;
                    Optional<Integer> removedChar = (removedCharIndex >= 0) 
                            ? Optional.of((int) searchableString.charAt(removedCharIndex))
                            : Optional.empty();
                    PositionAndHash newHash = prevPositionAndHash.increment(newChar, removedChar);
                    positionAndHashRef.set(newHash);
                    return newHash;
                });
    }

    @Override
    public String toString() {
        return "PositionAndHash{" + "position=" + position + ", hash=" + hash + '}';
    }

}

public static void main(String[] args) {
    int hashBase = 103;
    String searchableString = "aaasfaaaaiofjwenvaiefowjfaaalsdfl";
    String pattern = "aaa";
    int subStringSize = pattern.length();
    int patternHash = PositionAndHash.computeSingleHash(pattern, hashBase);
    System.out.println("pattern hash: " + patternHash);

    Stream<PositionAndHash> rollingHash = PositionAndHash.computeRollingHash(searchableString, subStringSize, hashBase);
    // rollingHash.forEach(System.out::println);

    IntStream hashMatchPositions = rollingHash.skip(subStringSize - 1)
            .filter(posAndHash -> posAndHash.getHash() == patternHash)
            .mapToInt(PositionAndHash::getPosition)
            .map(pos -> pos - subStringSize + 1);
     //hashMatchPositions.forEach(System.out::println);

    IntStream trueMatchPositions = hashMatchPositions
            .filter(pos -> searchableString.substring(pos, pos + subStringSize).equals(pattern));
    trueMatchPositions.forEach(System.out::println);
    
    // Or to only print the first match instead of all matches:
    // (Note that Stream is "intelligent" ("lazily evaluated") and 
    //  will not process the whole Stream when only the first element is fetched (findFirst()).)
    
    // trueMatchPositions.findFirst().ifPresent(System.out::println);
}
Source Link
toto2
  • 5.5k
  • 17
  • 21

  • Your code is not OO at all. Having a bunch of static methods is closer to C or Fortran than an OO language. (@rolfl did not mention OO directly, but talked of "statefulness".)

  • Not only it's not OO, but you are using very long methods. You should break things down in "logical units". The code is easier to read and maintain that way. My rule of thumb is that a method should not be more than 10 lines.

  • Watch out for variable names. In Java verbose names are preferred. phash and thash are a bit short, and out is outright confusing. Usually out would have something to do with output.

Here is my implementation in Java 8, since you were curious about that:

public static class PositionAndHash {
    private final int base;
    private final int queueSize;
    private final int basePowerSize;
    private final Queue<Integer> boundedQueue;
    private final int position;
    private final int hash;

    public PositionAndHash(int base, int queueSize) {
        this.base = base;
        this.queueSize = queueSize;
        this.basePowerSize = Collections.nCopies(queueSize, base).stream().reduce(1, (i1, i2) -> i1 * i2);
        this.boundedQueue = new ArrayDeque<>();
        this.position = -1;
        this.hash = 0;
    }

    private PositionAndHash(int base, int queueSize, int basePowerSize, Queue<Integer> boundedQueue, int position, int hash) {
        this.base = base;
        this.queueSize = queueSize;
        this.basePowerSize = basePowerSize;
        this.boundedQueue = boundedQueue;
        this.position = position;
        this.hash = hash;
    }

    public int getPosition() {
        return position;
    }

    public int getHash() {
        return hash;
    }

    public PositionAndHash increment(int newChar) {
        int newHash = hash * base + newChar;
        Queue<Integer> newBoundedQueue = new ArrayDeque<>(boundedQueue);
        {
            newBoundedQueue.add(newChar);
            if (newBoundedQueue.size() > queueSize) {
                int oldChar = newBoundedQueue.poll();
                newHash -= oldChar * basePowerSize;
            }
        }
        return new PositionAndHash(base, queueSize, basePowerSize, newBoundedQueue, position + 1, newHash);
    }

    public static int computeSingleHash(String string, int hashBase) {
        return computeRollingHash(string, string.length(), hashBase)
                .skip(string.length() - 1)
                .mapToInt(PositionAndHash::getHash)
                .findFirst().getAsInt();
    }

    public static Stream<PositionAndHash> computeRollingHash(String searchableString, int subStringSize, int hashBase) {
        // Kind of messed up to use an AtomicReference here, but Java's Stream does not have fold operations.
        AtomicReference<PositionAndHash> positionAndHashRef = new AtomicReference<>(new PositionAndHash(hashBase, subStringSize));
        return searchableString.chars()
                .mapToObj(charInt -> {
                    PositionAndHash newHash = positionAndHashRef.get().increment(charInt);
                    positionAndHashRef.set(newHash);
                    return newHash;
                });
    }

    @Override
    public String toString() {
        return "PositionAndHash{" + "position=" + position + ", hash=" + hash + '}';
    }

}

public static void main(String[] args) {
    int hashBase = 103;
    String searchableString = "aaasfaaaaiofjwenvaiefowjfaaalsdfl";
    String pattern = "aaa";
    int subStringSize = pattern.length();
    int patternHash = PositionAndHash.computeSingleHash(pattern, hashBase);
    System.out.println("pattern hash: " + patternHash);

    Stream<PositionAndHash> rollingHash = PositionAndHash.computeRollingHash(searchableString, subStringSize, hashBase);
    // computeRollingHash.forEach(System.out::println);

    IntStream hashMatchPositions = rollingHash.skip(subStringSize - 1)
            .filter(posAndHash -> posAndHash.getHash() == patternHash)
            .mapToInt(PositionAndHash::getPosition)
            .map(pos -> pos - subStringSize + 1);
    // hashMatchPositions.forEach(System.out::println);

    IntStream trueMatchPositions = hashMatchPositions
            .filter(pos -> searchableString.substring(pos, pos + subStringSize).equals(pattern));
    trueMatchPositions.forEach(System.out::println);

      
    // Or to only print the first match instead of all matches:
    // (Note that Stream is "intelligent" ("lazily evaluated") and 
    //  will not process the whole Stream when only the first element is fetched (findFirst()).)
    
    // trueMatchPositions.findFirst().ifPresent(System.out::println);
}

I actually don't like my implementation that much. Most of all is that the class PositionAndHash does too many things.