#!/usr/bin/env python3 """Z-Machine bytecode disassembler and opcode tracer. Performs recursive-descent disassembly of a z-machine story file, following all reachable code paths to catalog every opcode used. Cross-references findings against zvm implementation status. """ import argparse import struct import sys from collections import defaultdict from dataclasses import dataclass from pathlib import Path @dataclass class OpcodeInfo: """Metadata about an opcode.""" name: str stores: bool = False branches: bool = False terminal: bool = False inline_string: bool = False is_call: bool = False # Complete V3 opcode definitions OP2_OPCODES = { 1: OpcodeInfo("je", branches=True), 2: OpcodeInfo("jl", branches=True), 3: OpcodeInfo("jg", branches=True), 4: OpcodeInfo("dec_chk", branches=True), 5: OpcodeInfo("inc_chk", branches=True), 6: OpcodeInfo("jin", branches=True), 7: OpcodeInfo("test", branches=True), 8: OpcodeInfo("or", stores=True), 9: OpcodeInfo("and", stores=True), 10: OpcodeInfo("test_attr", branches=True), 11: OpcodeInfo("set_attr"), 12: OpcodeInfo("clear_attr"), 13: OpcodeInfo("store"), 14: OpcodeInfo("insert_obj"), 15: OpcodeInfo("loadw", stores=True), 16: OpcodeInfo("loadb", stores=True), 17: OpcodeInfo("get_prop", stores=True), 18: OpcodeInfo("get_prop_addr", stores=True), 19: OpcodeInfo("get_next_prop", stores=True), 20: OpcodeInfo("add", stores=True), 21: OpcodeInfo("sub", stores=True), 22: OpcodeInfo("mul", stores=True), 23: OpcodeInfo("div", stores=True), 24: OpcodeInfo("mod", stores=True), } OP1_OPCODES = { 0: OpcodeInfo("jz", branches=True), 1: OpcodeInfo("get_sibling", stores=True, branches=True), 2: OpcodeInfo("get_child", stores=True, branches=True), 3: OpcodeInfo("get_parent", stores=True), 4: OpcodeInfo("get_prop_len", stores=True), 5: OpcodeInfo("inc"), 6: OpcodeInfo("dec"), 7: OpcodeInfo("print_addr"), 8: OpcodeInfo("call_1s", stores=True, is_call=True), 9: OpcodeInfo("remove_obj"), 10: OpcodeInfo("print_obj"), 11: OpcodeInfo("ret", terminal=True), 12: OpcodeInfo("jump", terminal=True), 13: OpcodeInfo("print_paddr"), 14: OpcodeInfo("load", stores=True), 15: OpcodeInfo("not", stores=True), } OP0_OPCODES = { 0: OpcodeInfo("rtrue", terminal=True), 1: OpcodeInfo("rfalse", terminal=True), 2: OpcodeInfo("print", inline_string=True), 3: OpcodeInfo("print_ret", inline_string=True, terminal=True), 4: OpcodeInfo("nop"), 5: OpcodeInfo("save", branches=True), 6: OpcodeInfo("restore", branches=True), 7: OpcodeInfo("restart", terminal=True), 8: OpcodeInfo("ret_popped", terminal=True), 9: OpcodeInfo("pop"), 10: OpcodeInfo("quit", terminal=True), 11: OpcodeInfo("new_line"), 12: OpcodeInfo("show_status"), 13: OpcodeInfo("verify", branches=True), } VAR_OPCODES = { 0: OpcodeInfo("call_vs", stores=True, is_call=True), 1: OpcodeInfo("storew"), 2: OpcodeInfo("storeb"), 3: OpcodeInfo("put_prop"), 4: OpcodeInfo("sread"), 5: OpcodeInfo("print_char"), 6: OpcodeInfo("print_num"), 7: OpcodeInfo("random", stores=True), 8: OpcodeInfo("push"), 9: OpcodeInfo("pull"), 10: OpcodeInfo("split_window"), 11: OpcodeInfo("set_window"), 19: OpcodeInfo("output_stream"), 20: OpcodeInfo("input_stream"), 21: OpcodeInfo("sound_effect"), 22: OpcodeInfo("read_char", stores=True), } # ZVM implementation status (opcodes with real logic, not stubs) ZVM_IMPLEMENTED = { ("2OP", 1), ("2OP", 2), ("2OP", 4), ("2OP", 5), ("2OP", 8), ("2OP", 9), ("2OP", 13), ("2OP", 14), ("2OP", 15), ("2OP", 16), ("2OP", 17), ("2OP", 20), ("2OP", 21), ("2OP", 22), ("2OP", 23), ("1OP", 0), ("1OP", 2), ("1OP", 3), ("1OP", 5), ("1OP", 8), ("1OP", 12), ("1OP", 13), ("0OP", 0), ("0OP", 1), ("0OP", 2), ("0OP", 3), ("VAR", 0), ("VAR", 1), ("VAR", 3), ("VAR", 5), ("VAR", 7), ("VAR", 8), ("VAR", 10), ("VAR", 11), ("VAR", 19), ("VAR", 22), } class ZMachine: """Z-Machine story file reader and disassembler.""" def __init__(self, story_path: Path, verbose: bool = False): self.story_path = story_path self.verbose = verbose self.data = story_path.read_bytes() self.version = self.data[0] self.entry_point = self.read_word(0x06) self.static_mem_base = self.read_word(0x0E) # Disassembly state self.visited_addrs = set() self.visited_routines = set() self.worklist = [] self.opcode_counts = defaultdict(int) self.instruction_count = 0 self.routines_from_entrypoint = 0 self.routines_from_scan = 0 def read_byte(self, addr: int) -> int: """Read a single byte.""" return self.data[addr] def read_word(self, addr: int) -> int: """Read a 16-bit big-endian word.""" return struct.unpack_from(">H", self.data, addr)[0] def read_signed_word(self, addr: int) -> int: """Read a 16-bit signed big-endian word.""" val = self.read_word(addr) return val if val < 0x8000 else val - 0x10000 def unpack_routine_addr(self, packed: int) -> int: """Convert packed routine address to byte address.""" if self.version <= 3: return packed * 2 elif self.version <= 5: return packed * 4 else: return packed * 8 def parse_operands( self, pc: int, opcode_byte: int ) -> tuple[list[tuple[int, bool]], int]: """Parse operands and return (operands, bytes_consumed). Each operand is (value, is_constant) tuple. """ operands = [] pos = pc if opcode_byte < 0x80: # Long form 2OP op1_type = (opcode_byte >> 6) & 1 op2_type = (opcode_byte >> 5) & 1 if op1_type == 0: # small constant operands.append((self.read_byte(pos), True)) pos += 1 else: # variable operands.append((self.read_byte(pos), False)) pos += 1 if op2_type == 0: # small constant operands.append((self.read_byte(pos), True)) pos += 1 else: # variable operands.append((self.read_byte(pos), False)) pos += 1 elif opcode_byte < 0xB0: # Short form 1OP or 0OP op_type = (opcode_byte >> 4) & 3 if op_type == 0: # large constant operands.append((self.read_word(pos), True)) pos += 2 elif op_type == 1: # small constant operands.append((self.read_byte(pos), True)) pos += 1 elif op_type == 2: # variable operands.append((self.read_byte(pos), False)) pos += 1 # op_type == 3: 0OP, no operands else: # Variable form types_byte = self.read_byte(pos) pos += 1 for i in range(4): op_type = (types_byte >> (6 - i * 2)) & 3 if op_type == 3: # omitted break elif op_type == 0: # large constant operands.append((self.read_word(pos), True)) pos += 2 elif op_type == 1: # small constant operands.append((self.read_byte(pos), True)) pos += 1 elif op_type == 2: # variable operands.append((self.read_byte(pos), False)) pos += 1 return operands, pos - pc def parse_zstring(self, addr: int) -> int: """Parse z-string and return length in bytes.""" pos = addr while True: word = self.read_word(pos) pos += 2 if word & 0x8000: break return pos - addr def parse_branch(self, pc: int) -> tuple[int | None, int]: """Parse branch data and return (target_addr, bytes_consumed).""" branch_byte = self.read_byte(pc) short_form = (branch_byte & 0x40) != 0 if short_form: offset = branch_byte & 0x3F bytes_consumed = 1 else: offset = ((branch_byte & 0x3F) << 8) | self.read_byte(pc + 1) if offset >= 0x2000: offset -= 0x4000 bytes_consumed = 2 # Compute target if offset == 0 or offset == 1: # Return true/false - terminal for this path return None, bytes_consumed else: target = pc + bytes_consumed + offset - 2 return target, bytes_consumed def decode_instruction(self, addr: int) -> tuple[str, str, int, list[int]]: """Decode instruction at addr. Returns (opclass, name, next_addr, targets). targets is a list of addresses to visit next. """ if addr in self.visited_addrs: return "", "", addr, [] self.visited_addrs.add(addr) self.instruction_count += 1 opcode_byte = self.read_byte(addr) pc = addr + 1 # Determine form and opcode if opcode_byte < 0x80: # Long form 2OP opcode_num = opcode_byte & 0x1F opclass = "2OP" info = OP2_OPCODES.get(opcode_num) elif opcode_byte < 0xC0: # Short form (includes 0xB0-0xBF which are 0OP) op_type = (opcode_byte >> 4) & 3 if op_type == 3: # 0OP opcode_num = opcode_byte & 0x0F opclass = "0OP" info = OP0_OPCODES.get(opcode_num) else: # 1OP opcode_num = opcode_byte & 0x0F opclass = "1OP" info = OP1_OPCODES.get(opcode_num) elif opcode_byte < 0xE0: # Variable form 2OP (0xC0-0xDF) opcode_num = opcode_byte & 0x1F opclass = "2OP" info = OP2_OPCODES.get(opcode_num) else: # Variable form VAR (0xE0-0xFF) opcode_num = opcode_byte & 0x1F opclass = "VAR" info = VAR_OPCODES.get(opcode_num) if info is None: # Unknown opcode return opclass, f"unknown_{opcode_num}", pc, [] # Track opcode self.opcode_counts[(opclass, opcode_num)] += 1 if self.verbose: print( f" {addr:05x}: {opclass}:{opcode_num:02d} {info.name}", file=sys.stderr, ) # Parse operands operands, operand_bytes = self.parse_operands(pc, opcode_byte) pc += operand_bytes # Handle inline z-string if info.inline_string: string_bytes = self.parse_zstring(pc) pc += string_bytes # Handle store byte if info.stores: pc += 1 # Handle branch targets = [] if info.branches: branch_target, branch_bytes = self.parse_branch(pc) pc += branch_bytes if branch_target is not None: targets.append(branch_target) # Handle call (follow routine + continue after call) if info.is_call and operands: packed_addr, is_const = operands[0] if is_const and packed_addr != 0: routine_addr = self.unpack_routine_addr(packed_addr) if routine_addr not in self.visited_routines and routine_addr < len( self.data ): self.visited_routines.add(routine_addr) targets.append(routine_addr) # Handle jump (terminal but has target) if opcode_num == 12 and opclass == "1OP" and operands: offset, _ = operands[0] if offset >= 0x8000: offset -= 0x10000 jump_target = pc + offset - 2 targets.append(jump_target) return opclass, info.name, pc, targets # Add fall-through if not terminal if not info.terminal: targets.append(pc) return opclass, info.name, pc, targets def disassemble_routine(self, addr: int): """Disassemble a routine starting at addr.""" if addr >= len(self.data): return # Parse routine header num_locals = self.read_byte(addr) pc = addr + 1 # Skip local variable initial values (V3 only) if self.version <= 3: pc += num_locals * 2 # Add first instruction to worklist self.worklist.append(pc) def scan_data_for_routines(self): """Scan dynamic memory for packed addresses pointing to routines. Globals and object property tables live in dynamic memory and contain packed routine addresses. This catches routines reachable only through indirect calls (variable operands in CALL opcodes). """ high_mem = self.read_word(0x04) found = 0 # Scan every word in dynamic memory (globals, property tables) for offset in range(0, self.static_mem_base - 1, 2): packed = self.read_word(offset) if packed == 0: continue addr = self.unpack_routine_addr(packed) # Must point into the code region if addr < high_mem or addr >= len(self.data) - 1: continue # Must not already be a known routine if addr in self.visited_routines: continue # Must look like a valid routine header (local count 0-15) num_locals = self.read_byte(addr) if num_locals > 15: continue # First instruction must be within bounds first_instr = addr + 1 if self.version <= 3: first_instr += num_locals * 2 if first_instr >= len(self.data): continue self.visited_routines.add(addr) self.disassemble_routine(addr) found += 1 return found def _process_worklist(self): """Process the instruction worklist until empty.""" while self.worklist: addr = self.worklist.pop() if addr in self.visited_addrs or addr >= len(self.data): continue opclass, name, next_addr, targets = self.decode_instruction(addr) for target in targets: if target >= len(self.data) or target < 0: continue if target in self.visited_routines: num_locals = self.read_byte(target) first_instr = target + 1 if self.version <= 3: first_instr += num_locals * 2 if first_instr not in self.visited_addrs: self.worklist.append(first_instr) else: if target not in self.visited_addrs: self.worklist.append(target) def disassemble_all(self): """Perform complete recursive-descent disassembly.""" # Entry point is a byte address of the first instruction (V1-5) self.worklist.append(self.entry_point) # Phase 1: recursive descent from entry point self._process_worklist() self.reachable_routines = len(self.visited_routines) # Phase 2: scan dynamic memory for packed routine addresses self.scanned_routines = self.scan_data_for_routines() self._process_worklist() def generate_report(self) -> str: """Generate analysis report.""" lines = [] lines.append("=" * 70) lines.append("Z-MACHINE OPCODE TRACE REPORT") lines.append("=" * 70) lines.append("") lines.append(f"Story file: {self.story_path}") lines.append(f"Version: {self.version}") lines.append(f"Entry point: ${self.entry_point:04x}") lines.append(f"Story size: {len(self.data)} bytes") lines.append("") lines.append("DISASSEMBLY STATISTICS") lines.append("-" * 70) lines.append( f"Routines found (entry-point reachable): {self.reachable_routines}" ) lines.append(f"Routines found (data scan): {self.scanned_routines}") lines.append(f"Total routines: {len(self.visited_routines)}") lines.append(f"Instructions decoded: {self.instruction_count}") lines.append(f"Unique opcodes: {len(self.opcode_counts)}") lines.append("") # Opcodes by class lines.append("OPCODES FOUND IN STORY") lines.append("-" * 70) for opclass_name, opcode_dict in [ ("2OP", OP2_OPCODES), ("1OP", OP1_OPCODES), ("0OP", OP0_OPCODES), ("VAR", VAR_OPCODES), ]: lines.append(f"\n{opclass_name} opcodes:") found = [ (num, opcode_dict[num].name, self.opcode_counts[(opclass_name, num)]) for num in sorted(opcode_dict.keys()) if (opclass_name, num) in self.opcode_counts ] if found: for num, name, count in found: lines.append(f" {num:2d} {name:20s} (used {count} times)") else: lines.append(" (none)") lines.append("") lines.append("GAP ANALYSIS: ZVM IMPLEMENTATION STATUS") lines.append("-" * 70) implemented = [] missing = [] for (opclass, opcode_num), count in sorted(self.opcode_counts.items()): if opclass == "2OP": name = OP2_OPCODES[opcode_num].name elif opclass == "1OP": name = OP1_OPCODES[opcode_num].name elif opclass == "0OP": name = OP0_OPCODES[opcode_num].name elif opclass == "VAR": name = VAR_OPCODES[opcode_num].name else: name = "unknown" key = (opclass, opcode_num) if key in ZVM_IMPLEMENTED: implemented.append((opclass, opcode_num, name, count)) else: missing.append((opclass, opcode_num, name, count)) lines.append("\nImplemented in zvm:") for opclass, num, name, count in implemented: lines.append(f" {opclass}:{num:2d} {name:20s} (used {count} times)") lines.append("\nMissing from zvm (need porting):") for opclass, num, name, count in missing: lines.append(f" {opclass}:{num:2d} {name:20s} (used {count} times)") lines.append("") lines.append("SUMMARY") lines.append("-" * 70) total = len(self.opcode_counts) impl_count = len(implemented) missing_count = len(missing) lines.append( f"{total} unique opcodes found in story, " f"{impl_count} already in zvm, {missing_count} need porting" ) lines.append("") return "\n".join(lines) def main(): parser = argparse.ArgumentParser( description="Trace z-machine opcodes in a story file" ) parser.add_argument( "story", nargs="?", default="content/stories/zork1.z3", help="Path to z-machine story file (default: content/stories/zork1.z3)", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Print each opcode as it's found", ) args = parser.parse_args() story_path = Path(args.story) if not story_path.exists(): print(f"Error: Story file not found: {story_path}", file=sys.stderr) sys.exit(1) if args.verbose: print(f"Disassembling {story_path}...", file=sys.stderr) zm = ZMachine(story_path, verbose=args.verbose) zm.disassemble_all() print(zm.generate_report()) if __name__ == "__main__": main()