EDIT
@Peilonrayz I attempted to fix the issues you pointed out and came up with the following (this is the whole module, with some additional functionality I'd been working on):
from itertools import islice, takewhile, tee, chain
from re import match, split
from copy import copy
from colorama import init, Style
tee = tee
init()
COL0 = '08X'
COL1 = '02X'
PAD = ' '
FMT = '{} {} {}'
DLM = ' '
BASE = 16
def change_format(col0, col1, fmt=''):
global COL0, COL1, PAD, FMT, DLM, BASE
COL0 = col0
COL1 = col1
PAD = ' ' * int(col1[:-1])
if fmt:
FMT = fmt
DLM = fmt[fmt.index('}') + 1]
BASE = {'b': 2, 'o': 8, 'd': 10, 'x': 16, 'X': 16}[col1[-1]]
def fix(it, offset, start, stop, step):
n = (offset + start) % step
return islice(it, start, stop), n, start + step - n
def mk_row(it, nxt, start):
return bytearray(islice(it, nxt - start))
def to_hex(i):
return format(i, COL1)
def to_chr(i):
i = chr(i)
return i if i.isprintable() else '.'
def pad_gen(fn, pad, it, n, step):
count = 0
for i in range(n):
yield pad
for i in it:
yield fn(i)
count += 1
while count < step - n:
yield pad
count += 1
def hexdump_gen(it, offset=0, start=0, stop=None, step=16, sep='\b'):
it, n, nxt = fix(it, offset, start, stop, step)
while True:
row = mk_row(it, nxt, start)
if not row:
break
args = row, n, step
col0 = format(offset + start - n, COL0)
col1 = list(pad_gen(to_hex, PAD, *args))
col2 = ''.join(pad_gen(to_chr, ' ', *args))
col1.insert(step // 2, sep)
yield FMT.format(col0, ' '.join(col1), col2)
start = nxt
nxt += step
n = 0
def compress_hexdump_gen(*args, **kwargs):
row = ''
duplicates = 0
for i in hexdump_gen(*args, **kwargs):
if row.split()[1:] == i.split()[1:]:
if not duplicates:
yield '*'
duplicates += 1
else:
yield i
row = i
duplicates = 0
if duplicates > 1:
index = row.index(DLM)
col0 = int(row[:index], 16)
col0 += duplicates * kwargs.get('step', 16)
yield format(col0, COL0) + row[index:]
def predicate(i):
return not match('(\s{3,})', i)
def highlight(row, sep, ba):
for n, i in enumerate(takewhile(predicate, copy(row))):
if ' ' not in i and i != sep:
j = int(i, BASE)
if j in ba or (not ba and chr(j).isprintable()):
row[n] = Style.BRIGHT + i + Style.RESET_ALL
def highlight_hexdump_gen(it, step=16, sep='\b', ba=b''):
ba = bytearray(ba)
index = step * 2 + (1 if not sep else 3)
for i in it:
row0 = split('(\s+)', i)
row1 = row0[2:index]
highlight(row1, sep, ba)
yield ''.join(row0[:2] + row1 + row0[index:])
def to_bytes(row, step):
ba = bytearray()
for i in takewhile(predicate, row[2:step * 2 + 1]):
i = i.replace(Style.BRIGHT, '').replace(Style.RESET_ALL, '')
if i.isalnum():
ba.append(int(i, BASE))
return ba
def decompress_gen(row0, row1, step):
i = int(row0[0].rstrip(DLM), 16) + step
j = int(row1[0].rstrip(DLM), 16)
while not i >= j:
row = [(format(i, COL0) + DLM).rstrip(' ')] + row0[1:]
yield to_bytes(row, step)
i += step
def from_hexdump_gen(it, step=16, sep='\b'):
i = j = ''
while True:
row = j if j else next(it, None)
if row is None:
break
elif row == '*' or row == '*\n':
j = next(it, ''.join(i))
yield from decompress_gen(i, j.split(), step)
else:
index = row.find(sep)
i = row[:index] + row[index + 1:]
i = split('(\s+)', i)
j = ''
yield to_bytes(i, step)
def test(it0, it1, offset=0, start=0, stop=None, step=16, sep='\b'):
it1, _, nxt = fix(it1, offset, start, stop, step)
for i in from_hexdump_gen(it0, step=step, sep=sep):
if mk_row(it1, nxt, start) != i:
break
start = nxt
nxt += step
else:
return True
def read_binary_gen(file):
with open(file, 'rb') as f:
yield from chain.from_iterable(f)
def write(file, gen):
with open(file, 'w') as f:
for i in gen:
f.write(i + '\n')
def read_gen(file):
with open(file, 'r') as f:
yield from f
def write_binary(file, gen):
with open(file, 'wb') as f:
for i in gen:
f.write(i)