Intro
(This post has a continuation at Get histogram of bytes in any set of files in Java - take II.)
This time I decided to rewrite the byte histogram counters in C++ [1][2][3] to Java. Usage is the same: if no arguments is supplied, the program reads from the standard input. Otherwise, it attempts to treat all the arguments as file names and attempts to count bytes in them.
Code
com.github.coderodde.file.util.ByteHistogram.java:
package com.github.coderodde.file.util;
/**
* This class implements the byte histogram.
*
* @version 1.0.0 (Nov 13, 2024)
* @since 1.0.0 (Nov 13, 2024)
*/
public final class ByteHistogram {
private static final int HISTOGRAM_CAPACITY = 256;
private static final int SCREEN_WIDTH = 80;
private static final int LINE_PREAMBLE_WIDTH = 11;
private final long[] data = new long[HISTOGRAM_CAPACITY];
/**
* Account the byte {@code b}.
*
* @param b the byte to account.
*/
public void insert(final byte b) {
data[Byte.toUnsignedInt(b)]++;
}
/**
* Converts this byte histogram to an ASCII art.
*
* @return ASCII art version of this byte histogram.
*/
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
final long maximumCount = computeMaximumCount();
final int countStringLength =
computeCountStringLength(maximumCount);
final String lineFormat = getLineFormat(countStringLength);
for (int i = 0; i < data.length; i++) {
loadLine(sb,
lineFormat,
countStringLength,
i,
maximumCount);
}
return sb.toString();
}
/**
* Builds the format for printing the lines in the output.
*
* @param countStringLength the count string length in characters.
*
* @return the format for printing the lines in the output.
*/
private static String getLineFormat(final int countStringLength) {
return String.format("0x%%02x [%%c]: %% %dd %%s\n", countStringLength);
}
/**
* Loads a single line to the total output of this byte histogram.
*
* @param sb the string builder.
* @param lineFormat the format of the line.
* @param countStringLength the length of the count string.
* @param index the byte index.
* @param maximumCount the maximum count in the histogram.
*/
private void loadLine(final StringBuilder sb,
final String lineFormat,
final int countStringLength,
final int index,
final long maximumCount) {
sb.append(
String.format(
lineFormat,index,
Character.isLetterOrDigit((char) index) ?
(char) index :
'?',
data[index],
computeBarAscii(data[index],
maximumCount,
countStringLength)));
}
/**
* Computes and returns the bar ASCII art.
*
* @param count the count of the line we are processing.
* @param maximumCount the maximum count in the byte histogram.
* @param countStringLength the count string length.
*
* @return the bar ASCII art.
*/
private static String computeBarAscii(final long count,
final long maximumCount,
final int countStringLength) {
final float ratio = ((float) count) / ((float) maximumCount);
final int maximumBarLength = SCREEN_WIDTH
- LINE_PREAMBLE_WIDTH
- countStringLength;
final int barLength = (int)(ratio * maximumBarLength);
final StringBuilder sb = new StringBuilder(barLength);
for (int i = 0; i < barLength; i++) {
sb.append("*");
}
return sb.toString();
}
/**
* Computes the maximum count in this byte histogram.
*
* @return the maximum count.
*/
private long computeMaximumCount() {
long m = 0L;
for (final long count : data) {
m = Math.max(m, count);
}
return m;
}
/**
* Computes and returns the length of the widest length string.
*
* @param maximumLength the maximum length of the byte histogram.
*
* @return the widest length of the count string in characters.
*/
private static int computeCountStringLength(final long maximumLength) {
return Long.toString(maximumLength).length();
}
}
com.github.coderodde.file.util.ByteHistogramApp.java:
package com.github.coderodde.file.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
/**
* This class implements a program for counting byte histograms in files.
*
* @version 1.0.0 (Nov 13, 2024)
* @since 1.0.0 (Nov 13, 2024)
*/
public final class ByteHistogramApp {
public static void main(String[] args) {
List<InputStream> inputStreamList = null;
// Prepare the input streams from which to build the (shared) byte
// histogram:
try {
inputStreamList = getInputStreams(args);
} catch (final MultipleFileNotFoundException ex) {
ex.getExceptionList()
.forEach((e) -> System.err.println(e.getMessage()));
System.exit(-1);
}
// Once here, we have valid input streams. Request the histogram and
// print it in the console:
try {
System.out.println(processInputStreamList(inputStreamList));
} catch (MultipleIOException ex) {
ex.getExceptionList()
.forEach((e) -> System.err.println(e.getMessage()));
System.exit(-2);
}
}
/**
* Converts the input argument list to the list of input streams.
*
* @param args the names of the files to process.
*
* @return the input stream list.
*
* @throws MultipleFileNotFoundException if any file failed.
*/
private static List<InputStream> getInputStreams(final String[] args)
throws MultipleFileNotFoundException {
if (args.length == 0) {
return List.of(System.in);
}
final List<InputStream> inputStreamList = new ArrayList<>(args.length);
final MultipleFileNotFoundException exceptionList =
new MultipleFileNotFoundException();
for (final String fileName : args) {
try {
final InputStream is = new FileInputStream(new File(fileName));
inputStreamList.add(is);;
} catch (final FileNotFoundException ex) {
// Add the exception ex to the exceptionList:
exceptionList.add(ex);
}
}
if (!exceptionList.isEmpty()) {
// Once here, something went wrong. Throw:
throw exceptionList;
}
return inputStreamList;
}
/**
* Builds the shared histogram from the input streams in the argument.
*
* @param inputStreamList the list of input stream supplying the bytes.
*
* @return the shared byte histogram.
*
* @throws MultipleIOException if any stream threw.
*/
private static ByteHistogram
processInputStreamList(final List<InputStream> inputStreamList)
throws MultipleIOException {
final ByteHistogram histogram = new ByteHistogram();
final MultipleIOException ex = new MultipleIOException();
for (final InputStream is : inputStreamList) {
try {
processInputStream(is, histogram);
} catch (final IOException e) {
// Add the new I/O exception e to ex::
ex.add(e);
}
}
if (!ex.isEmpty()) {
// Once here, something went wrong. Throw:
throw ex;
}
return histogram;
}
/**
* Processes the input stream reading bytes from it until end of file is
* reached.
*
* @param is the input stream.
* @param histogram the target histogram.
*
* @throws IOException if I/O fails.
*/
private static void processInputStream(final InputStream is,
final ByteHistogram histogram)
throws IOException {
int i;
while ((i = is.read()) != -1) {
histogram.insert((byte) i);
}
}
}
com.github.coderodde.file.util.MultipleFileNotFoundException.java:
package com.github.coderodde.file.util;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
/**
* This class holds a list of actual exception object of type
* {@link java.io.FileNotFoundException}.
*
* @version 1.0.0 (Nov 13, 2024)
* @since 1.0.0 (Nov 13, 2024)
*/
public final class MultipleFileNotFoundException extends Exception {
private final List<FileNotFoundException> exceptionList = new ArrayList<>();
public void add(final FileNotFoundException ex) {
exceptionList.add(
Objects.requireNonNull(
ex,
"The input exception is null."));
}
public boolean isEmpty() {
return exceptionList.isEmpty();
}
public Collection<FileNotFoundException> getExceptionList() {
return Collections.unmodifiableCollection(exceptionList);
}
}
com.github.coderodde.file.util.MultipleIOException.java:
package com.github.coderodde.file.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
/**
* This class holds a list of actual exception object of type
* {@link java.io.IOException}.
*
* @version 1.0.0 (Nov 13, 2024)
* @since 1.0.0 (Nov 13, 2024)
*/
public final class MultipleIOException extends Exception {
private final List<IOException> exceptionList = new ArrayList<>();
public void add(final IOException ex) {
exceptionList.add(
Objects.requireNonNull(
ex,
"The input exception is null."));
}
public boolean isEmpty() {
return exceptionList.isEmpty();
}
public Collection<IOException> getExceptionList() {
return Collections.unmodifiableCollection(exceptionList);
}
}
Typical output
C:\Users\rodio\OneDrive\Documents\NetBeansProjects\ByteHistogram.java\target\classes>echo hello world | java com.github.
coderodde.file.util.ByteHistogramApp
0x00 [?]: 0
0x01 [?]: 0
0x02 [?]: 0
0x03 [?]: 0
0x04 [?]: 0
0x05 [?]: 0
0x06 [?]: 0
0x07 [?]: 0
0x08 [?]: 0
0x09 [?]: 0
0x0a [?]: 1 **********************
0x0b [?]: 0
0x0c [?]: 0
0x0d [?]: 1 **********************
0x0e [?]: 0
0x0f [?]: 0
0x10 [?]: 0
0x11 [?]: 0
0x12 [?]: 0
0x13 [?]: 0
0x14 [?]: 0
0x15 [?]: 0
0x16 [?]: 0
0x17 [?]: 0
0x18 [?]: 0
0x19 [?]: 0
0x1a [?]: 0
0x1b [?]: 0
0x1c [?]: 0
0x1d [?]: 0
0x1e [?]: 0
0x1f [?]: 0
0x20 [?]: 2 *********************************************
0x21 [?]: 0
0x22 [?]: 0
0x23 [?]: 0
0x24 [?]: 0
0x25 [?]: 0
0x26 [?]: 0
0x27 [?]: 0
0x28 [?]: 0
0x29 [?]: 0
0x2a [?]: 0
0x2b [?]: 0
0x2c [?]: 0
0x2d [?]: 0
0x2e [?]: 0
0x2f [?]: 0
0x30 [0]: 0
0x31 [1]: 0
0x32 [2]: 0
0x33 [3]: 0
0x34 [4]: 0
0x35 [5]: 0
0x36 [6]: 0
0x37 [7]: 0
0x38 [8]: 0
0x39 [9]: 0
0x3a [?]: 0
0x3b [?]: 0
0x3c [?]: 0
0x3d [?]: 0
0x3e [?]: 0
0x3f [?]: 0
0x40 [?]: 0
0x41 [A]: 0
0x42 [B]: 0
0x43 [C]: 0
0x44 [D]: 0
0x45 [E]: 0
0x46 [F]: 0
0x47 [G]: 0
0x48 [H]: 0
0x49 [I]: 0
0x4a [J]: 0
0x4b [K]: 0
0x4c [L]: 0
0x4d [M]: 0
0x4e [N]: 0
0x4f [O]: 0
0x50 [P]: 0
0x51 [Q]: 0
0x52 [R]: 0
0x53 [S]: 0
0x54 [T]: 0
0x55 [U]: 0
0x56 [V]: 0
0x57 [W]: 0
0x58 [X]: 0
0x59 [Y]: 0
0x5a [Z]: 0
0x5b [?]: 0
0x5c [?]: 0
0x5d [?]: 0
0x5e [?]: 0
0x5f [?]: 0
0x60 [?]: 0
0x61 [a]: 0
0x62 [b]: 0
0x63 [c]: 0
0x64 [d]: 1 **********************
0x65 [e]: 1 **********************
0x66 [f]: 0
0x67 [g]: 0
0x68 [h]: 1 **********************
0x69 [i]: 0
0x6a [j]: 0
0x6b [k]: 0
0x6c [l]: 3 ********************************************************************
0x6d [m]: 0
0x6e [n]: 0
0x6f [o]: 2 *********************************************
0x70 [p]: 0
0x71 [q]: 0
0x72 [r]: 1 **********************
0x73 [s]: 0
0x74 [t]: 0
0x75 [u]: 0
0x76 [v]: 0
0x77 [w]: 1 **********************
0x78 [x]: 0
0x79 [y]: 0
0x7a [z]: 0
0x7b [?]: 0
0x7c [?]: 0
0x7d [?]: 0
0x7e [?]: 0
0x7f [?]: 0
0x80 [?]: 0
0x81 [?]: 0
0x82 [?]: 0
0x83 [?]: 0
0x84 [?]: 0
0x85 [?]: 0
0x86 [?]: 0
0x87 [?]: 0
0x88 [?]: 0
0x89 [?]: 0
0x8a [?]: 0
0x8b [?]: 0
0x8c [?]: 0
0x8d [?]: 0
0x8e [?]: 0
0x8f [?]: 0
0x90 [?]: 0
0x91 [?]: 0
0x92 [?]: 0
0x93 [?]: 0
0x94 [?]: 0
0x95 [?]: 0
0x96 [?]: 0
0x97 [?]: 0
0x98 [?]: 0
0x99 [?]: 0
0x9a [?]: 0
0x9b [?]: 0
0x9c [?]: 0
0x9d [?]: 0
0x9e [?]: 0
0x9f [?]: 0
0xa0 [?]: 0
0xa1 [?]: 0
0xa2 [?]: 0
0xa3 [?]: 0
0xa4 [?]: 0
0xa5 [?]: 0
0xa6 [?]: 0
0xa7 [?]: 0
0xa8 [?]: 0
0xa9 [?]: 0
0xaa [ª]: 0
0xab [?]: 0
0xac [?]: 0
0xad [?]: 0
0xae [?]: 0
0xaf [?]: 0
0xb0 [?]: 0
0xb1 [?]: 0
0xb2 [?]: 0
0xb3 [?]: 0
0xb4 [?]: 0
0xb5 [µ]: 0
0xb6 [?]: 0
0xb7 [?]: 0
0xb8 [?]: 0
0xb9 [?]: 0
0xba [º]: 0
0xbb [?]: 0
0xbc [?]: 0
0xbd [?]: 0
0xbe [?]: 0
0xbf [?]: 0
0xc0 [À]: 0
0xc1 [Á]: 0
0xc2 [Â]: 0
0xc3 [Ã]: 0
0xc4 [Ä]: 0
0xc5 [Å]: 0
0xc6 [Æ]: 0
0xc7 [Ç]: 0
0xc8 [È]: 0
0xc9 [É]: 0
0xca [Ê]: 0
0xcb [Ë]: 0
0xcc [Ì]: 0
0xcd [Í]: 0
0xce [Î]: 0
0xcf [Ï]: 0
0xd0 [Ð]: 0
0xd1 [Ñ]: 0
0xd2 [Ò]: 0
0xd3 [Ó]: 0
0xd4 [Ô]: 0
0xd5 [Õ]: 0
0xd6 [Ö]: 0
0xd7 [?]: 0
0xd8 [Ø]: 0
0xd9 [Ù]: 0
0xda [Ú]: 0
0xdb [Û]: 0
0xdc [Ü]: 0
0xdd [Ý]: 0
0xde [Þ]: 0
0xdf [ß]: 0
0xe0 [à]: 0
0xe1 [á]: 0
0xe2 [â]: 0
0xe3 [ã]: 0
0xe4 [ä]: 0
0xe5 [å]: 0
0xe6 [æ]: 0
0xe7 [ç]: 0
0xe8 [è]: 0
0xe9 [é]: 0
0xea [ê]: 0
0xeb [ë]: 0
0xec [ì]: 0
0xed [í]: 0
0xee [î]: 0
0xef [ï]: 0
0xf0 [ð]: 0
0xf1 [ñ]: 0
0xf2 [ò]: 0
0xf3 [ó]: 0
0xf4 [ô]: 0
0xf5 [õ]: 0
0xf6 [ö]: 0
0xf7 [?]: 0
0xf8 [ø]: 0
0xf9 [ù]: 0
0xfa [ú]: 0
0xfb [û]: 0
0xfc [ü]: 0
0xfd [ý]: 0
0xfe [þ]: 0
0xff [ÿ]: 0
Critique request
I would love to hear any commentary regarding my attempt.
References
[1] Get histogram of bytes in any set of files in C++14
[2] Get histogram of bytes in any set of files in C++14 - take II
isLetterOrDigitisn't nearly the set of useful printable characters. Consider printing everything for whichisISOControlreturns false. \$\endgroup\$