From 74e0cfb40de4ed07b04e32cc52111283e6691db0 Mon Sep 17 00:00:00 2001 From: Davide Depau Date: Wed, 13 Nov 2019 22:24:09 +0100 Subject: [PATCH] Add code --- rebindiff/__init__.py | 0 rebindiff/__main__.py | 266 ++++++++++++++++++++++++++++++++++++ rebindiff/utils/__init__.py | 0 rebindiff/utils/color.py | 41 ++++++ rebindiff/utils/strings.py | 12 ++ rebindiff/utils/values.py | 51 +++++++ 6 files changed, 370 insertions(+) create mode 100644 rebindiff/__init__.py create mode 100644 rebindiff/__main__.py create mode 100644 rebindiff/utils/__init__.py create mode 100644 rebindiff/utils/color.py create mode 100644 rebindiff/utils/strings.py create mode 100644 rebindiff/utils/values.py diff --git a/rebindiff/__init__.py b/rebindiff/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rebindiff/__main__.py b/rebindiff/__main__.py new file mode 100644 index 0000000..5620a19 --- /dev/null +++ b/rebindiff/__main__.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import os.path +import random +import sys +from typing import Sequence, Generator, Tuple, BinaryIO, Optional, Mapping + +from rebindiff.utils import color +from rebindiff.utils.strings import ellipsize +from rebindiff.utils.values import str_base, unicode_ascii_repr + +BLOCK_NUM_HEADER = "blk" +BLOCK_NUM_PADDING_RIGHT = 2 +BLOCK_NUM_BASE = 16 + +BLOCK_BYTES = 4 + +LEFT_PADDING = 1 +BYTE_SPACING = 1 +HEX_ASCII_SPACING = 2 +FILE_SPACING = 4 +RIGHT_PADDING = 0 + +NOT_ASCII_CHAR = "‧" +HORZ_ELLIPSIS = "…" +VERT_ELLIPSIS = "⋮" + +BLOCK_TEMPLATE = " " * LEFT_PADDING + "{hexr}" + " " * HEX_ASCII_SPACING + "|{asciir}|" + " " * RIGHT_PADDING + +HEX_REPR_LENGTH = (2 + BYTE_SPACING) * BLOCK_BYTES - BYTE_SPACING +ASCII_REPR_LENGTH = BLOCK_BYTES + 2 + +NAME_MAX_LENGTH = HEX_REPR_LENGTH + HEX_ASCII_SPACING + ASCII_REPR_LENGTH +TOTAL_BLOCK_LENGTH = NAME_MAX_LENGTH + LEFT_PADDING + RIGHT_PADDING + +if BLOCK_NUM_BASE > ord('Z') - ord('A') + 10 or BLOCK_NUM_BASE < 2: + raise AssertionError("Invalid block number base") + +global_colormap = {} + + +def print_names(block_col_width: int, *names: str) -> None: + print(end=color.BOLD + color.UNDERLINE) + print(BLOCK_NUM_HEADER.center(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT) + + print( + " " * LEFT_PADDING + + (" " * (FILE_SPACING + LEFT_PADDING + RIGHT_PADDING)).join( + [ellipsize(name, NAME_MAX_LENGTH).center(NAME_MAX_LENGTH) for name in names] + ) + " " * RIGHT_PADDING + ) + + print(end=color.RESET) + + +def colorize(string: str, byte: int, colormap: Mapping[int, str]) -> str: + if byte in colormap: + return color.colors[colormap[byte]] + string + color.RESET + return string + + +def get_other_halfbyte(half: int, byte: int) -> int: + if byte == 0: + return 0 + if half & 0x0F == 0: + return byte & 0x0F + return byte & 0x1F0 + + +def get_color_map(intgroup: Sequence[Optional[int]]) -> Mapping[int, str]: + intset = set(intgroup) + if len(intset) == 1: + return {} + + from .utils import color + # ncolors = list(sorted(color.colors.keys(), key=lambda c: c.replace("BRIGHT_", ""))) + ncolors = list(color.colors.keys()).copy() + random.shuffle(ncolors) + ncolors *= 3 + colormap = {} + halfbytes = {} + + for byte in intgroup: + lo = byte & 0x0F + hi = byte & 0x1F0 + + if lo not in halfbytes: + halfbytes[lo] = [] + if hi not in halfbytes: + halfbytes[hi] = [] + + halfbytes[lo].append(byte) + halfbytes[hi].append(byte) + + for halfbyte in halfbytes: # sorted(halfbytes, key=lambda half: int_distance_sort_key(half, halfbytes.keys())): + if len(halfbytes[halfbyte]) == len(intgroup): + # Halfbyte does not change + continue + colormap[halfbyte] = ncolors.pop(0) + + for byte in intset: + lo = byte & 0x0F + hi = byte & 0x1F0 + + # The byte gets the color of the half with the least occurrences + try: + least_common = sorted([hi, lo], key=lambda half: len(halfbytes[half]))[0] + colormap[byte] = colormap[least_common] + except: + print(byte, colormap) + + return colormap + + +def print_line(block_col_width, block_num: int, line: Sequence[Sequence[Optional[bytes]]]) -> None: + global global_colormap + print( + color.BOLD + str_base(block_num, BLOCK_NUM_BASE).rjust(block_col_width), + end=" " * BLOCK_NUM_PADDING_RIGHT + color.RESET + ) + + hex_reprs = ["" if set(block) != {None} else None for block in line] + ascii_reprs = hex_reprs.copy() + + byte_count = 0 + + for bgroup in zip(*line): + byte_count += 1 + intgroup = [int.from_bytes(byte, 'little') | 0x100 for byte in bgroup] + + colormap = get_color_map(intgroup) + colormap = {byte: color if byte not in global_colormap + else global_colormap[byte] for byte, color in colormap.items()} + global_colormap.update(colormap) + + for i, byte in zip(range(len(hex_reprs)), intgroup): + if hex_reprs[i] is None: + continue + if byte is None: + hex_reprs[i] += " " + ascii_reprs[i] += " " + else: + asuni = unicode_ascii_repr(byte & 0xFF) + + lo = byte & 0x0F + hi = byte & 0x1F0 + + hi_str = colorize("%01X" % ((byte & 0xF0) >> 4), hi, colormap) + lo_str = colorize("%01X" % lo, lo, colormap) + + hex_reprs[i] += hi_str + lo_str + ascii_reprs[i] += colorize(asuni, byte, colormap) + + if byte_count < BLOCK_BYTES: + hex_reprs[i] += " " * BYTE_SPACING + + reprs = [] + for hexr, asciir in zip(hex_reprs, ascii_reprs): + if not hexr: + reprs.append(" " * TOTAL_BLOCK_LENGTH) + continue + reprs.append(BLOCK_TEMPLATE.format(hexr=hexr, asciir=asciir)) + + print((" " * FILE_SPACING).join(reprs)) + + +def print_identical_line_bogus(block_col_width: int, last_line: Sequence[Sequence[Optional[bytes]]]): + print(end=color.BRIGHT_BLACK) + print(str(VERT_ELLIPSIS).rjust(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT) + + bogus_blocks = [] + for block in last_line: + byteset = set([tuple(i) for i in block if i]) + bogus_char = NOT_ASCII_CHAR if len(byteset) > 0 else " " + + bogus_blocks.append( + BLOCK_TEMPLATE.replace("|", bogus_char).format( + hexr=(" " * BYTE_SPACING).join(bogus_char * 2 for _ in range(BLOCK_BYTES)), + asciir="".join(bogus_char for _ in range(BLOCK_BYTES)) + ) + ) + + print((" " * FILE_SPACING).join(bogus_blocks)) + print(end=color.RESET) + + +def get_block_col_width(*filenames): + max_size = max( + map( + lambda filename: os.stat(filename).st_size, + filenames + ) + ) + max_block = (max_size + BLOCK_BYTES - (max_size % BLOCK_BYTES)) / BLOCK_BYTES + + digits = 0 + while max_block > 0: + max_block //= BLOCK_NUM_BASE + digits += 1 + + return max(digits, len(BLOCK_NUM_HEADER)) + + +def print_hexdiff(*filenames: str): + names, files = zip(*gen_files(*filenames)) + block_col_width = get_block_col_width(*filenames) + + print_names(block_col_width, *names) + + identical_lines = 0 + prev_line = None + line = [[] for _ in range(len(files))] + block_num = 0 + block_bytes = 0 + remaining_files = len(files) + + while remaining_files > 0: + if block_bytes >= BLOCK_BYTES: + if prev_line == line: + identical_lines += 1 + if identical_lines == 1: + print_identical_line_bogus(block_col_width, line) + else: + identical_lines = 0 + print_line(block_col_width, block_num, line) + prev_line = line + + block_bytes = 0 + block_num += 1 + line = [[] for _ in range(len(files))] + + block_bytes += 1 + + for i in range(len(files)): + f = files[i] + file_line = line[i] + + if f.closed: + file_line.append(None) + continue + + try: + byte = f.read(1) + if not byte: + raise EOFError + line[i].append(byte) + except EOFError: + file_line.append(None) + f.close() + remaining_files -= 1 + + +def gen_files(*filenames: str) -> Generator[Tuple[str, BinaryIO], None, None]: + for filename in filenames: + f = open(filename, "rb") + basename = os.path.basename(filename) + yield (basename, f) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + raise SystemExit("At least one file needs to be specified") + + print_hexdiff(*sys.argv[1:]) diff --git a/rebindiff/utils/__init__.py b/rebindiff/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rebindiff/utils/color.py b/rebindiff/utils/color.py new file mode 100644 index 0000000..345be4b --- /dev/null +++ b/rebindiff/utils/color.py @@ -0,0 +1,41 @@ +BLACK = '\033[30m' +RED = '\033[31m' +GREEN = '\033[32m' +YELLOW = '\033[33m' +BLUE = '\033[34m' +PURPLE = '\033[35m' +CYAN = '\033[36m' +WHITE = '\033[37m' + +colors = { + # 'BLACK': '\033[30m', + 'RED': '\033[31m', + 'GREEN': '\033[32m', + 'YELLOW': '\033[33m', + 'BLUE': '\033[34m', + 'PURPLE': '\033[35m', + 'CYAN': '\033[36m', + # 'WHITE': '\033[37m', + # 'BRIGHT_BLACK': '\033[30;1m', + 'BRIGHT_RED': '\033[31;1m', + 'BRIGHT_GREEN': '\033[32;1m', + 'BRIGHT_YELLOW': '\033[33;1m', + 'BRIGHT_BLUE': '\033[34;1m', + 'BRIGHT_PURPLE': '\033[35;1m', + 'BRIGHT_CYAN': '\033[36;1m', + # 'BRIGHT_WHITE': '\033[37;1m', +} + +BRIGHT_BLACK = '\033[30;1m' +BRIGHT_RED = '\033[31;1m' +BRIGHT_GREEN = '\033[32;1m' +BRIGHT_YELLOW = '\033[33;1m' +BRIGHT_BLUE = '\033[34;1m' +BRIGHT_PURPLE = '\033[35;1m' +BRIGHT_CYAN = '\033[36;1m' +BRIGHT_WHITE = '\033[37;1m' + + +BOLD = '\033[1m' +UNDERLINE = '\033[4m' +RESET = '\033[0m' diff --git a/rebindiff/utils/strings.py b/rebindiff/utils/strings.py new file mode 100644 index 0000000..b78dabc --- /dev/null +++ b/rebindiff/utils/strings.py @@ -0,0 +1,12 @@ +import os + + +def ellipsize(string: str, max_length: int, ellipsis="…") -> str: + if len(string) <= max_length: + return string + if "." not in string: + return string[:-len(ellipsis)] + ellipsis + else: + name, ext = os.path.splitext(string) + name = name[:-len(ellipsis)] + ellipsis + return ".".join([name, ext]) \ No newline at end of file diff --git a/rebindiff/utils/values.py b/rebindiff/utils/values.py new file mode 100644 index 0000000..8340cb2 --- /dev/null +++ b/rebindiff/utils/values.py @@ -0,0 +1,51 @@ +from typing import Iterable + + +def digit_to_char(digit): + if digit < 10: + return str(digit) + return chr(ord('A') + digit - 10) + + +def str_base(number, base): + if number < 0: + return '-' + str_base(-number, base) + (d, m) = divmod(number, base) + if d > 0: + return str_base(d, base) + digit_to_char(m) + return digit_to_char(m) + + +def unicode_ascii_repr(ascii: int) -> str: + if 0 <= ascii < 32: + return chr(ascii + 0x2400) + elif 32 <= ascii < 127: + return chr(ascii) + elif ascii == 127: + return chr(33 + 0x2400) + # Rip 🂢 + return "‧" + + +def int_distance(lhs: int, rhs: int) -> int: + if lhs == rhs: + return 0 + + distance = 0 + + while lhs > 0 and rhs > 0: + if lhs & 1 != rhs & 1: + distance += 1 + lhs >>= 1 + rhs >>= 1 + + return distance + + +def int_distance_sort_key(item: int, all_items: Iterable[int]) -> int: + return sum( + map( + lambda i: int_distance(i, item), + all_items + ) + )