diff --git a/rebindiff/__main__.py b/rebindiff/__main__.py index 5620a19..845b7f7 100644 --- a/rebindiff/__main__.py +++ b/rebindiff/__main__.py @@ -1,266 +1,4 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os -import os.path -import random -import sys -from typing import Sequence, Generator, Tuple, BinaryIO, Optional, Mapping - -from rebindiff.utils import color -from rebindiff.utils.strings import ellipsize -from rebindiff.utils.values import str_base, unicode_ascii_repr - -BLOCK_NUM_HEADER = "blk" -BLOCK_NUM_PADDING_RIGHT = 2 -BLOCK_NUM_BASE = 16 - -BLOCK_BYTES = 4 - -LEFT_PADDING = 1 -BYTE_SPACING = 1 -HEX_ASCII_SPACING = 2 -FILE_SPACING = 4 -RIGHT_PADDING = 0 - -NOT_ASCII_CHAR = "‧" -HORZ_ELLIPSIS = "…" -VERT_ELLIPSIS = "⋮" - -BLOCK_TEMPLATE = " " * LEFT_PADDING + "{hexr}" + " " * HEX_ASCII_SPACING + "|{asciir}|" + " " * RIGHT_PADDING - -HEX_REPR_LENGTH = (2 + BYTE_SPACING) * BLOCK_BYTES - BYTE_SPACING -ASCII_REPR_LENGTH = BLOCK_BYTES + 2 - -NAME_MAX_LENGTH = HEX_REPR_LENGTH + HEX_ASCII_SPACING + ASCII_REPR_LENGTH -TOTAL_BLOCK_LENGTH = NAME_MAX_LENGTH + LEFT_PADDING + RIGHT_PADDING - -if BLOCK_NUM_BASE > ord('Z') - ord('A') + 10 or BLOCK_NUM_BASE < 2: - raise AssertionError("Invalid block number base") - -global_colormap = {} - - -def print_names(block_col_width: int, *names: str) -> None: - print(end=color.BOLD + color.UNDERLINE) - print(BLOCK_NUM_HEADER.center(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT) - - print( - " " * LEFT_PADDING + - (" " * (FILE_SPACING + LEFT_PADDING + RIGHT_PADDING)).join( - [ellipsize(name, NAME_MAX_LENGTH).center(NAME_MAX_LENGTH) for name in names] - ) + " " * RIGHT_PADDING - ) - - print(end=color.RESET) - - -def colorize(string: str, byte: int, colormap: Mapping[int, str]) -> str: - if byte in colormap: - return color.colors[colormap[byte]] + string + color.RESET - return string - - -def get_other_halfbyte(half: int, byte: int) -> int: - if byte == 0: - return 0 - if half & 0x0F == 0: - return byte & 0x0F - return byte & 0x1F0 - - -def get_color_map(intgroup: Sequence[Optional[int]]) -> Mapping[int, str]: - intset = set(intgroup) - if len(intset) == 1: - return {} - - from .utils import color - # ncolors = list(sorted(color.colors.keys(), key=lambda c: c.replace("BRIGHT_", ""))) - ncolors = list(color.colors.keys()).copy() - random.shuffle(ncolors) - ncolors *= 3 - colormap = {} - halfbytes = {} - - for byte in intgroup: - lo = byte & 0x0F - hi = byte & 0x1F0 - - if lo not in halfbytes: - halfbytes[lo] = [] - if hi not in halfbytes: - halfbytes[hi] = [] - - halfbytes[lo].append(byte) - halfbytes[hi].append(byte) - - for halfbyte in halfbytes: # sorted(halfbytes, key=lambda half: int_distance_sort_key(half, halfbytes.keys())): - if len(halfbytes[halfbyte]) == len(intgroup): - # Halfbyte does not change - continue - colormap[halfbyte] = ncolors.pop(0) - - for byte in intset: - lo = byte & 0x0F - hi = byte & 0x1F0 - - # The byte gets the color of the half with the least occurrences - try: - least_common = sorted([hi, lo], key=lambda half: len(halfbytes[half]))[0] - colormap[byte] = colormap[least_common] - except: - print(byte, colormap) - - return colormap - - -def print_line(block_col_width, block_num: int, line: Sequence[Sequence[Optional[bytes]]]) -> None: - global global_colormap - print( - color.BOLD + str_base(block_num, BLOCK_NUM_BASE).rjust(block_col_width), - end=" " * BLOCK_NUM_PADDING_RIGHT + color.RESET - ) - - hex_reprs = ["" if set(block) != {None} else None for block in line] - ascii_reprs = hex_reprs.copy() - - byte_count = 0 - - for bgroup in zip(*line): - byte_count += 1 - intgroup = [int.from_bytes(byte, 'little') | 0x100 for byte in bgroup] - - colormap = get_color_map(intgroup) - colormap = {byte: color if byte not in global_colormap - else global_colormap[byte] for byte, color in colormap.items()} - global_colormap.update(colormap) - - for i, byte in zip(range(len(hex_reprs)), intgroup): - if hex_reprs[i] is None: - continue - if byte is None: - hex_reprs[i] += " " - ascii_reprs[i] += " " - else: - asuni = unicode_ascii_repr(byte & 0xFF) - - lo = byte & 0x0F - hi = byte & 0x1F0 - - hi_str = colorize("%01X" % ((byte & 0xF0) >> 4), hi, colormap) - lo_str = colorize("%01X" % lo, lo, colormap) - - hex_reprs[i] += hi_str + lo_str - ascii_reprs[i] += colorize(asuni, byte, colormap) - - if byte_count < BLOCK_BYTES: - hex_reprs[i] += " " * BYTE_SPACING - - reprs = [] - for hexr, asciir in zip(hex_reprs, ascii_reprs): - if not hexr: - reprs.append(" " * TOTAL_BLOCK_LENGTH) - continue - reprs.append(BLOCK_TEMPLATE.format(hexr=hexr, asciir=asciir)) - - print((" " * FILE_SPACING).join(reprs)) - - -def print_identical_line_bogus(block_col_width: int, last_line: Sequence[Sequence[Optional[bytes]]]): - print(end=color.BRIGHT_BLACK) - print(str(VERT_ELLIPSIS).rjust(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT) - - bogus_blocks = [] - for block in last_line: - byteset = set([tuple(i) for i in block if i]) - bogus_char = NOT_ASCII_CHAR if len(byteset) > 0 else " " - - bogus_blocks.append( - BLOCK_TEMPLATE.replace("|", bogus_char).format( - hexr=(" " * BYTE_SPACING).join(bogus_char * 2 for _ in range(BLOCK_BYTES)), - asciir="".join(bogus_char for _ in range(BLOCK_BYTES)) - ) - ) - - print((" " * FILE_SPACING).join(bogus_blocks)) - print(end=color.RESET) - - -def get_block_col_width(*filenames): - max_size = max( - map( - lambda filename: os.stat(filename).st_size, - filenames - ) - ) - max_block = (max_size + BLOCK_BYTES - (max_size % BLOCK_BYTES)) / BLOCK_BYTES - - digits = 0 - while max_block > 0: - max_block //= BLOCK_NUM_BASE - digits += 1 - - return max(digits, len(BLOCK_NUM_HEADER)) - - -def print_hexdiff(*filenames: str): - names, files = zip(*gen_files(*filenames)) - block_col_width = get_block_col_width(*filenames) - - print_names(block_col_width, *names) - - identical_lines = 0 - prev_line = None - line = [[] for _ in range(len(files))] - block_num = 0 - block_bytes = 0 - remaining_files = len(files) - - while remaining_files > 0: - if block_bytes >= BLOCK_BYTES: - if prev_line == line: - identical_lines += 1 - if identical_lines == 1: - print_identical_line_bogus(block_col_width, line) - else: - identical_lines = 0 - print_line(block_col_width, block_num, line) - prev_line = line - - block_bytes = 0 - block_num += 1 - line = [[] for _ in range(len(files))] - - block_bytes += 1 - - for i in range(len(files)): - f = files[i] - file_line = line[i] - - if f.closed: - file_line.append(None) - continue - - try: - byte = f.read(1) - if not byte: - raise EOFError - line[i].append(byte) - except EOFError: - file_line.append(None) - f.close() - remaining_files -= 1 - - -def gen_files(*filenames: str) -> Generator[Tuple[str, BinaryIO], None, None]: - for filename in filenames: - f = open(filename, "rb") - basename = os.path.basename(filename) - yield (basename, f) - +from rebindiff.main import main if __name__ == "__main__": - if len(sys.argv) < 2: - raise SystemExit("At least one file needs to be specified") - - print_hexdiff(*sys.argv[1:]) + main() diff --git a/rebindiff/main.py b/rebindiff/main.py new file mode 100644 index 0000000..cfcc456 --- /dev/null +++ b/rebindiff/main.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import os.path +import random +import sys +from typing import Sequence, Generator, Tuple, BinaryIO, Optional, Mapping + +from rebindiff.utils import color +from rebindiff.utils.strings import ellipsize +from rebindiff.utils.values import str_base, unicode_ascii_repr + +BLOCK_NUM_HEADER = "blk" +BLOCK_NUM_PADDING_RIGHT = 2 +BLOCK_NUM_BASE = 16 + +BLOCK_BYTES = 4 + +LEFT_PADDING = 1 +BYTE_SPACING = 1 +HEX_ASCII_SPACING = 2 +FILE_SPACING = 4 +RIGHT_PADDING = 0 + +NOT_ASCII_CHAR = "‧" +HORZ_ELLIPSIS = "…" +VERT_ELLIPSIS = "⋮" + +BLOCK_TEMPLATE = " " * LEFT_PADDING + "{hexr}" + " " * HEX_ASCII_SPACING + "|{asciir}|" + " " * RIGHT_PADDING + +HEX_REPR_LENGTH = (2 + BYTE_SPACING) * BLOCK_BYTES - BYTE_SPACING +ASCII_REPR_LENGTH = BLOCK_BYTES + 2 + +NAME_MAX_LENGTH = HEX_REPR_LENGTH + HEX_ASCII_SPACING + ASCII_REPR_LENGTH +TOTAL_BLOCK_LENGTH = NAME_MAX_LENGTH + LEFT_PADDING + RIGHT_PADDING + +if BLOCK_NUM_BASE > ord('Z') - ord('A') + 10 or BLOCK_NUM_BASE < 2: + raise AssertionError("Invalid block number base") + +global_colormap = {} + + +def print_names(block_col_width: int, *names: str) -> None: + print(end=color.BOLD + color.UNDERLINE) + print(BLOCK_NUM_HEADER.center(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT) + + print( + " " * LEFT_PADDING + + (" " * (FILE_SPACING + LEFT_PADDING + RIGHT_PADDING)).join( + [ellipsize(name, NAME_MAX_LENGTH).center(NAME_MAX_LENGTH) for name in names] + ) + " " * RIGHT_PADDING + ) + + print(end=color.RESET) + + +def colorize(string: str, byte: int, colormap: Mapping[int, str]) -> str: + if byte in colormap: + return color.colors[colormap[byte]] + string + color.RESET + return string + + +def get_other_halfbyte(half: int, byte: int) -> int: + if byte == 0: + return 0 + if half & 0x0F == 0: + return byte & 0x0F + return byte & 0x1F0 + + +def get_color_map(intgroup: Sequence[Optional[int]]) -> Mapping[int, str]: + intset = set(intgroup) + if len(intset) == 1: + return {} + + from .utils import color + # ncolors = list(sorted(color.colors.keys(), key=lambda c: c.replace("BRIGHT_", ""))) + ncolors = list(color.colors.keys()).copy() + random.shuffle(ncolors) + ncolors *= 3 + colormap = {} + halfbytes = {} + + for byte in intgroup: + lo = byte & 0x0F + hi = byte & 0x1F0 + + if lo not in halfbytes: + halfbytes[lo] = [] + if hi not in halfbytes: + halfbytes[hi] = [] + + halfbytes[lo].append(byte) + halfbytes[hi].append(byte) + + for halfbyte in halfbytes: # sorted(halfbytes, key=lambda half: int_distance_sort_key(half, halfbytes.keys())): + if len(halfbytes[halfbyte]) == len(intgroup): + # Halfbyte does not change + continue + colormap[halfbyte] = ncolors.pop(0) + + for byte in intset: + lo = byte & 0x0F + hi = byte & 0x1F0 + + # The byte gets the color of the half with the least occurrences + try: + least_common = sorted([hi, lo], key=lambda half: len(halfbytes[half]))[0] + colormap[byte] = colormap[least_common] + except: + print(byte, colormap) + + return colormap + + +def print_line(block_col_width, block_num: int, line: Sequence[Sequence[Optional[bytes]]]) -> None: + global global_colormap + print( + color.BOLD + str_base(block_num, BLOCK_NUM_BASE).rjust(block_col_width), + end=" " * BLOCK_NUM_PADDING_RIGHT + color.RESET + ) + + hex_reprs = ["" if set(block) != {None} else None for block in line] + ascii_reprs = hex_reprs.copy() + + byte_count = 0 + + for bgroup in zip(*line): + byte_count += 1 + intgroup = [int.from_bytes(byte, 'little') | 0x100 for byte in bgroup] + + colormap = get_color_map(intgroup) + colormap = {byte: color if byte not in global_colormap + else global_colormap[byte] for byte, color in colormap.items()} + global_colormap.update(colormap) + + for i, byte in zip(range(len(hex_reprs)), intgroup): + if hex_reprs[i] is None: + continue + if byte is None: + hex_reprs[i] += " " + ascii_reprs[i] += " " + else: + asuni = unicode_ascii_repr(byte & 0xFF) + + lo = byte & 0x0F + hi = byte & 0x1F0 + + hi_str = colorize("%01X" % ((byte & 0xF0) >> 4), hi, colormap) + lo_str = colorize("%01X" % lo, lo, colormap) + + hex_reprs[i] += hi_str + lo_str + ascii_reprs[i] += colorize(asuni, byte, colormap) + + if byte_count < BLOCK_BYTES: + hex_reprs[i] += " " * BYTE_SPACING + + reprs = [] + for hexr, asciir in zip(hex_reprs, ascii_reprs): + if not hexr: + reprs.append(" " * TOTAL_BLOCK_LENGTH) + continue + reprs.append(BLOCK_TEMPLATE.format(hexr=hexr, asciir=asciir)) + + print((" " * FILE_SPACING).join(reprs)) + + +def print_identical_line_bogus(block_col_width: int, last_line: Sequence[Sequence[Optional[bytes]]]): + print(end=color.BRIGHT_BLACK) + print(str(VERT_ELLIPSIS).rjust(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT) + + bogus_blocks = [] + for block in last_line: + byteset = set([tuple(i) for i in block if i]) + bogus_char = NOT_ASCII_CHAR if len(byteset) > 0 else " " + + bogus_blocks.append( + BLOCK_TEMPLATE.replace("|", bogus_char).format( + hexr=(" " * BYTE_SPACING).join(bogus_char * 2 for _ in range(BLOCK_BYTES)), + asciir="".join(bogus_char for _ in range(BLOCK_BYTES)) + ) + ) + + print((" " * FILE_SPACING).join(bogus_blocks)) + print(end=color.RESET) + + +def get_block_col_width(*filenames): + max_size = max( + map( + lambda filename: os.stat(filename).st_size, + filenames + ) + ) + max_block = (max_size + BLOCK_BYTES - (max_size % BLOCK_BYTES)) / BLOCK_BYTES + + digits = 0 + while max_block > 0: + max_block //= BLOCK_NUM_BASE + digits += 1 + + return max(digits, len(BLOCK_NUM_HEADER)) + + +def print_hexdiff(*filenames: str): + names, files = zip(*gen_files(*filenames)) + block_col_width = get_block_col_width(*filenames) + + print_names(block_col_width, *names) + + identical_lines = 0 + prev_line = None + line = [[] for _ in range(len(files))] + block_num = 0 + block_bytes = 0 + remaining_files = len(files) + + while remaining_files > 0: + if block_bytes >= BLOCK_BYTES: + if prev_line == line: + identical_lines += 1 + if identical_lines == 1: + print_identical_line_bogus(block_col_width, line) + else: + identical_lines = 0 + print_line(block_col_width, block_num, line) + prev_line = line + + block_bytes = 0 + block_num += 1 + line = [[] for _ in range(len(files))] + + block_bytes += 1 + + for i in range(len(files)): + f = files[i] + file_line = line[i] + + if f.closed: + file_line.append(None) + continue + + try: + byte = f.read(1) + if not byte: + raise EOFError + line[i].append(byte) + except EOFError: + file_line.append(None) + f.close() + remaining_files -= 1 + + +def gen_files(*filenames: str) -> Generator[Tuple[str, BinaryIO], None, None]: + for filename in filenames: + f = open(filename, "rb") + basename = os.path.basename(filename) + yield (basename, f) + + +def main(): + if len(sys.argv) < 2: + raise SystemExit("At least one file needs to be specified") + + print_hexdiff(*sys.argv[1:]) + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b781518 --- /dev/null +++ b/setup.py @@ -0,0 +1,13 @@ +from setuptools import setup, find_packages + +setup( + name='rebindiff', + version='0.1', + packages=find_packages(), + license='GPL-3.0', + author='Davide Depau', + author_email='davide@depau.eu', + entry_points = { + 'console_scripts': ['rebindiff=rebindiff.main:main'], + } +)