Revisions to Python 3.x Hexdump

added 5517 characters in body

Source Link

edited Jul 26, 2017 at 17:38

user133955

EDIT

@Peilonrayz I attempted to fix the issues you pointed out and came up with the following (this is the whole module, with some additional functionality I'd been working on):

from itertools import islice, takewhile, tee, chain
from re import match, split
from copy import copy
from colorama import init, Style

tee = tee
init()

COL0 = '08X'
COL1 = '02X'
PAD = '  '
FMT = '{}   {}  {}'
DLM = ' '
BASE = 16


def change_format(col0, col1, fmt=''):
    global COL0, COL1, PAD, FMT, DLM, BASE
    COL0 = col0
    COL1 = col1
    PAD = ' ' * int(col1[:-1])
    if fmt:
        FMT = fmt
        DLM = fmt[fmt.index('}') + 1]
    BASE = {'b': 2, 'o': 8, 'd': 10, 'x': 16, 'X': 16}[col1[-1]]


def fix(it, offset, start, stop, step):
    n = (offset + start) % step
    return islice(it, start, stop), n, start + step - n


def mk_row(it, nxt, start):
    return bytearray(islice(it, nxt - start))


def to_hex(i):
    return format(i, COL1)


def to_chr(i):
    i = chr(i)
    return i if i.isprintable() else '.'


def pad_gen(fn, pad, it, n, step):
    count = 0
    for i in range(n):
        yield pad
    for i in it:
        yield fn(i)
        count += 1
    while count < step - n:
        yield pad
        count += 1


def hexdump_gen(it, offset=0, start=0, stop=None, step=16, sep='\b'):
    it, n, nxt = fix(it, offset, start, stop, step)
    while True:
        row = mk_row(it, nxt, start)
        if not row:
            break
        args = row, n, step
        col0 = format(offset + start - n, COL0)
        col1 = list(pad_gen(to_hex, PAD, *args))
        col2 = ''.join(pad_gen(to_chr, ' ', *args))
        col1.insert(step // 2, sep)
        yield FMT.format(col0, ' '.join(col1), col2)
        start = nxt
        nxt += step
        n = 0


def compress_hexdump_gen(*args, **kwargs):
    row = ''
    duplicates = 0
    for i in hexdump_gen(*args, **kwargs):
        if row.split()[1:] == i.split()[1:]:
            if not duplicates:
                yield '*'
            duplicates += 1
        else:
            yield i
            row = i
            duplicates = 0
    if duplicates > 1:
        index = row.index(DLM)
        col0 = int(row[:index], 16)
        col0 += duplicates * kwargs.get('step', 16)
        yield format(col0, COL0) + row[index:]


def predicate(i):
    return not match('(\s{3,})', i)


def highlight(row, sep, ba):
    for n, i in enumerate(takewhile(predicate, copy(row))):
        if ' ' not in i and i != sep:
            j = int(i, BASE)
            if j in ba or (not ba and chr(j).isprintable()):
                row[n] = Style.BRIGHT + i + Style.RESET_ALL


def highlight_hexdump_gen(it, step=16, sep='\b', ba=b''):
    ba = bytearray(ba)
    index = step * 2 + (1 if not sep else 3)
    for i in it:
        row0 = split('(\s+)', i)
        row1 = row0[2:index]
        highlight(row1, sep, ba)
        yield ''.join(row0[:2] + row1 + row0[index:])


def to_bytes(row, step):
    ba = bytearray()
    for i in takewhile(predicate, row[2:step * 2 + 1]):
        i = i.replace(Style.BRIGHT, '').replace(Style.RESET_ALL, '')
        if i.isalnum():
            ba.append(int(i, BASE))
    return ba


def decompress_gen(row0, row1, step):
    i = int(row0[0].rstrip(DLM), 16) + step
    j = int(row1[0].rstrip(DLM), 16)
    while not i >= j:
        row = [(format(i, COL0) + DLM).rstrip(' ')] + row0[1:]
        yield to_bytes(row, step)
        i += step


def from_hexdump_gen(it, step=16, sep='\b'):
    i = j = ''
    while True:
        row = j if j else next(it, None)
        if row is None:
            break
        elif row == '*' or row == '*\n':
            j = next(it, ''.join(i))
            yield from decompress_gen(i, j.split(), step)
        else:
            index = row.find(sep)
            i = row[:index] + row[index + 1:]
            i = split('(\s+)', i)
            j = ''
            yield to_bytes(i, step)


def test(it0, it1, offset=0, start=0, stop=None, step=16, sep='\b'):
    it1, _, nxt = fix(it1, offset, start, stop, step)
    for i in from_hexdump_gen(it0, step=step, sep=sep):
        if mk_row(it1, nxt, start) != i:
            break
        start = nxt
        nxt += step
    else:
        return True


def read_binary_gen(file):
    with open(file, 'rb') as f:
        yield from chain.from_iterable(f)


def write(file, gen):
    with open(file, 'w') as f:
        for i in gen:
            f.write(i + '\n')


def read_gen(file):
    with open(file, 'r') as f:
        yield from f


def write_binary(file, gen):
    with open(file, 'wb') as f:
        for i in gen:
            f.write(i)

EDIT

@Peilonrayz I attempted to fix the issues you pointed out and came up with the following (this is the whole module, with some additional functionality I'd been working on):

from itertools import islice, takewhile, tee, chain
from re import match, split
from copy import copy
from colorama import init, Style

tee = tee
init()

COL0 = '08X'
COL1 = '02X'
PAD = '  '
FMT = '{}   {}  {}'
DLM = ' '
BASE = 16


def change_format(col0, col1, fmt=''):
    global COL0, COL1, PAD, FMT, DLM, BASE
    COL0 = col0
    COL1 = col1
    PAD = ' ' * int(col1[:-1])
    if fmt:
        FMT = fmt
        DLM = fmt[fmt.index('}') + 1]
    BASE = {'b': 2, 'o': 8, 'd': 10, 'x': 16, 'X': 16}[col1[-1]]


def fix(it, offset, start, stop, step):
    n = (offset + start) % step
    return islice(it, start, stop), n, start + step - n


def mk_row(it, nxt, start):
    return bytearray(islice(it, nxt - start))


def to_hex(i):
    return format(i, COL1)


def to_chr(i):
    i = chr(i)
    return i if i.isprintable() else '.'


def pad_gen(fn, pad, it, n, step):
    count = 0
    for i in range(n):
        yield pad
    for i in it:
        yield fn(i)
        count += 1
    while count < step - n:
        yield pad
        count += 1


def hexdump_gen(it, offset=0, start=0, stop=None, step=16, sep='\b'):
    it, n, nxt = fix(it, offset, start, stop, step)
    while True:
        row = mk_row(it, nxt, start)
        if not row:
            break
        args = row, n, step
        col0 = format(offset + start - n, COL0)
        col1 = list(pad_gen(to_hex, PAD, *args))
        col2 = ''.join(pad_gen(to_chr, ' ', *args))
        col1.insert(step // 2, sep)
        yield FMT.format(col0, ' '.join(col1), col2)
        start = nxt
        nxt += step
        n = 0


def compress_hexdump_gen(*args, **kwargs):
    row = ''
    duplicates = 0
    for i in hexdump_gen(*args, **kwargs):
        if row.split()[1:] == i.split()[1:]:
            if not duplicates:
                yield '*'
            duplicates += 1
        else:
            yield i
            row = i
            duplicates = 0
    if duplicates > 1:
        index = row.index(DLM)
        col0 = int(row[:index], 16)
        col0 += duplicates * kwargs.get('step', 16)
        yield format(col0, COL0) + row[index:]


def predicate(i):
    return not match('(\s{3,})', i)


def highlight(row, sep, ba):
    for n, i in enumerate(takewhile(predicate, copy(row))):
        if ' ' not in i and i != sep:
            j = int(i, BASE)
            if j in ba or (not ba and chr(j).isprintable()):
                row[n] = Style.BRIGHT + i + Style.RESET_ALL


def highlight_hexdump_gen(it, step=16, sep='\b', ba=b''):
    ba = bytearray(ba)
    index = step * 2 + (1 if not sep else 3)
    for i in it:
        row0 = split('(\s+)', i)
        row1 = row0[2:index]
        highlight(row1, sep, ba)
        yield ''.join(row0[:2] + row1 + row0[index:])


def to_bytes(row, step):
    ba = bytearray()
    for i in takewhile(predicate, row[2:step * 2 + 1]):
        i = i.replace(Style.BRIGHT, '').replace(Style.RESET_ALL, '')
        if i.isalnum():
            ba.append(int(i, BASE))
    return ba


def decompress_gen(row0, row1, step):
    i = int(row0[0].rstrip(DLM), 16) + step
    j = int(row1[0].rstrip(DLM), 16)
    while not i >= j:
        row = [(format(i, COL0) + DLM).rstrip(' ')] + row0[1:]
        yield to_bytes(row, step)
        i += step


def from_hexdump_gen(it, step=16, sep='\b'):
    i = j = ''
    while True:
        row = j if j else next(it, None)
        if row is None:
            break
        elif row == '*' or row == '*\n':
            j = next(it, ''.join(i))
            yield from decompress_gen(i, j.split(), step)
        else:
            index = row.find(sep)
            i = row[:index] + row[index + 1:]
            i = split('(\s+)', i)
            j = ''
            yield to_bytes(i, step)


def test(it0, it1, offset=0, start=0, stop=None, step=16, sep='\b'):
    it1, _, nxt = fix(it1, offset, start, stop, step)
    for i in from_hexdump_gen(it0, step=step, sep=sep):
        if mk_row(it1, nxt, start) != i:
            break
        start = nxt
        nxt += step
    else:
        return True


def read_binary_gen(file):
    with open(file, 'rb') as f:
        yield from chain.from_iterable(f)


def write(file, gen):
    with open(file, 'w') as f:
        for i in gen:
            f.write(i + '\n')


def read_gen(file):
    with open(file, 'r') as f:
        yield from f


def write_binary(file, gen):
    with open(file, 'wb') as f:
        for i in gen:
            f.write(i)