#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import os.path import random import sys from typing import Sequence, Generator, Tuple, BinaryIO, Optional, Mapping from rebindiff.utils import color from rebindiff.utils.strings import ellipsize from rebindiff.utils.values import str_base, unicode_ascii_repr BLOCK_NUM_HEADER = "blk" BLOCK_NUM_PADDING_RIGHT = 2 BLOCK_NUM_BASE = 16 BLOCK_BYTES = 4 LEFT_PADDING = 1 BYTE_SPACING = 1 HEX_ASCII_SPACING = 2 FILE_SPACING = 4 RIGHT_PADDING = 0 NOT_ASCII_CHAR = "‧" HORZ_ELLIPSIS = "…" VERT_ELLIPSIS = "⋮" BLOCK_TEMPLATE = " " * LEFT_PADDING + "{hexr}" + " " * HEX_ASCII_SPACING + "|{asciir}|" + " " * RIGHT_PADDING HEX_REPR_LENGTH = (2 + BYTE_SPACING) * BLOCK_BYTES - BYTE_SPACING ASCII_REPR_LENGTH = BLOCK_BYTES + 2 NAME_MAX_LENGTH = HEX_REPR_LENGTH + HEX_ASCII_SPACING + ASCII_REPR_LENGTH TOTAL_BLOCK_LENGTH = NAME_MAX_LENGTH + LEFT_PADDING + RIGHT_PADDING if BLOCK_NUM_BASE > ord('Z') - ord('A') + 10 or BLOCK_NUM_BASE < 2: raise AssertionError("Invalid block number base") global_colormap = {} def print_names(block_col_width: int, *names: str) -> None: print(end=color.BOLD + color.UNDERLINE) print(BLOCK_NUM_HEADER.center(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT) print( " " * LEFT_PADDING + (" " * (FILE_SPACING + LEFT_PADDING + RIGHT_PADDING)).join( [ellipsize(name, NAME_MAX_LENGTH).center(NAME_MAX_LENGTH) for name in names] ) + " " * RIGHT_PADDING ) print(end=color.RESET) def colorize(string: str, byte: int, colormap: Mapping[int, str]) -> str: if byte in colormap: return color.colors[colormap[byte]] + string + color.RESET return string def get_other_halfbyte(half: int, byte: int) -> int: if byte == 0: return 0 if half & 0x0F == 0: return byte & 0x0F return byte & 0x1F0 def get_color_map(intgroup: Sequence[Optional[int]]) -> Mapping[int, str]: intset = set(intgroup) if len(intset) == 1: return {} from .utils import color # ncolors = list(sorted(color.colors.keys(), key=lambda c: c.replace("BRIGHT_", ""))) ncolors = list(color.colors.keys()).copy() random.shuffle(ncolors) ncolors *= 3 colormap = {} halfbytes = {} for byte in intgroup: lo = byte & 0x0F hi = byte & 0x1F0 if lo not in halfbytes: halfbytes[lo] = [] if hi not in halfbytes: halfbytes[hi] = [] halfbytes[lo].append(byte) halfbytes[hi].append(byte) for halfbyte in halfbytes: # sorted(halfbytes, key=lambda half: int_distance_sort_key(half, halfbytes.keys())): if len(halfbytes[halfbyte]) == len(intgroup): # Halfbyte does not change continue colormap[halfbyte] = ncolors.pop(0) for byte in intset: lo = byte & 0x0F hi = byte & 0x1F0 # The byte gets the color of the half with the least occurrences try: least_common = sorted([hi, lo], key=lambda half: len(halfbytes[half]))[0] colormap[byte] = colormap[least_common] except: print(byte, colormap) return colormap def print_line(block_col_width, block_num: int, line: Sequence[Sequence[Optional[bytes]]]) -> None: global global_colormap print( color.BOLD + str_base(block_num, BLOCK_NUM_BASE).rjust(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT + color.RESET ) hex_reprs = ["" if set(block) != {None} else None for block in line] ascii_reprs = hex_reprs.copy() byte_count = 0 for bgroup in zip(*line): byte_count += 1 intgroup = [int.from_bytes(byte, 'little') | 0x100 for byte in bgroup] colormap = get_color_map(intgroup) colormap = {byte: color if byte not in global_colormap else global_colormap[byte] for byte, color in colormap.items()} global_colormap.update(colormap) for i, byte in zip(range(len(hex_reprs)), intgroup): if hex_reprs[i] is None: continue if byte is None: hex_reprs[i] += " " ascii_reprs[i] += " " else: asuni = unicode_ascii_repr(byte & 0xFF) lo = byte & 0x0F hi = byte & 0x1F0 hi_str = colorize("%01X" % ((byte & 0xF0) >> 4), hi, colormap) lo_str = colorize("%01X" % lo, lo, colormap) hex_reprs[i] += hi_str + lo_str ascii_reprs[i] += colorize(asuni, byte, colormap) if byte_count < BLOCK_BYTES: hex_reprs[i] += " " * BYTE_SPACING reprs = [] for hexr, asciir in zip(hex_reprs, ascii_reprs): if not hexr: reprs.append(" " * TOTAL_BLOCK_LENGTH) continue reprs.append(BLOCK_TEMPLATE.format(hexr=hexr, asciir=asciir)) print((" " * FILE_SPACING).join(reprs)) def print_identical_line_bogus(block_col_width: int, last_line: Sequence[Sequence[Optional[bytes]]]): print(end=color.BRIGHT_BLACK) print(str(VERT_ELLIPSIS).rjust(block_col_width), end=" " * BLOCK_NUM_PADDING_RIGHT) bogus_blocks = [] for block in last_line: byteset = set([tuple(i) for i in block if i]) bogus_char = NOT_ASCII_CHAR if len(byteset) > 0 else " " bogus_blocks.append( BLOCK_TEMPLATE.replace("|", bogus_char).format( hexr=(" " * BYTE_SPACING).join(bogus_char * 2 for _ in range(BLOCK_BYTES)), asciir="".join(bogus_char for _ in range(BLOCK_BYTES)) ) ) print((" " * FILE_SPACING).join(bogus_blocks)) print(end=color.RESET) def get_block_col_width(*filenames): max_size = max( map( lambda filename: os.stat(filename).st_size, filenames ) ) max_block = (max_size + BLOCK_BYTES - (max_size % BLOCK_BYTES)) / BLOCK_BYTES digits = 0 while max_block > 0: max_block //= BLOCK_NUM_BASE digits += 1 return max(digits, len(BLOCK_NUM_HEADER)) def print_hexdiff(*filenames: str): names, files = zip(*gen_files(*filenames)) block_col_width = get_block_col_width(*filenames) print_names(block_col_width, *names) identical_lines = 0 prev_line = None line = [[] for _ in range(len(files))] block_num = 0 block_bytes = 0 remaining_files = len(files) while remaining_files > 0: if block_bytes >= BLOCK_BYTES: if prev_line == line: identical_lines += 1 if identical_lines == 1: print_identical_line_bogus(block_col_width, line) else: identical_lines = 0 print_line(block_col_width, block_num, line) prev_line = line block_bytes = 0 block_num += 1 line = [[] for _ in range(len(files))] block_bytes += 1 for i in range(len(files)): f = files[i] file_line = line[i] if f.closed: file_line.append(None) continue try: byte = f.read(1) if not byte: raise EOFError line[i].append(byte) except EOFError: file_line.append(None) f.close() remaining_files -= 1 def gen_files(*filenames: str) -> Generator[Tuple[str, BinaryIO], None, None]: for filename in filenames: f = open(filename, "rb") basename = os.path.basename(filename) yield (basename, f) def main(): if len(sys.argv) < 2: raise SystemExit("At least one file needs to be specified") print_hexdiff(*sys.argv[1:])