Add z-machine opcode tracing script

This commit is contained in:
Jared Miller 2026-02-09 18:29:58 -05:00
parent 47ef606e7f
commit 677ddac89f
Signed by: shmup
GPG key ID: 22B5C6D66A38B06C

621
scripts/trace_opcodes.py Normal file
View file

@ -0,0 +1,621 @@
#!/usr/bin/env python3
"""Z-Machine bytecode disassembler and opcode tracer.
Performs recursive-descent disassembly of a z-machine story file,
following all reachable code paths to catalog every opcode used.
Cross-references findings against zvm implementation status.
"""
import argparse
import struct
import sys
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
@dataclass
class OpcodeInfo:
"""Metadata about an opcode."""
name: str
stores: bool = False
branches: bool = False
terminal: bool = False
inline_string: bool = False
is_call: bool = False
# Complete V3 opcode definitions
OP2_OPCODES = {
1: OpcodeInfo("je", branches=True),
2: OpcodeInfo("jl", branches=True),
3: OpcodeInfo("jg", branches=True),
4: OpcodeInfo("dec_chk", branches=True),
5: OpcodeInfo("inc_chk", branches=True),
6: OpcodeInfo("jin", branches=True),
7: OpcodeInfo("test", branches=True),
8: OpcodeInfo("or", stores=True),
9: OpcodeInfo("and", stores=True),
10: OpcodeInfo("test_attr", branches=True),
11: OpcodeInfo("set_attr"),
12: OpcodeInfo("clear_attr"),
13: OpcodeInfo("store"),
14: OpcodeInfo("insert_obj"),
15: OpcodeInfo("loadw", stores=True),
16: OpcodeInfo("loadb", stores=True),
17: OpcodeInfo("get_prop", stores=True),
18: OpcodeInfo("get_prop_addr", stores=True),
19: OpcodeInfo("get_next_prop", stores=True),
20: OpcodeInfo("add", stores=True),
21: OpcodeInfo("sub", stores=True),
22: OpcodeInfo("mul", stores=True),
23: OpcodeInfo("div", stores=True),
24: OpcodeInfo("mod", stores=True),
}
OP1_OPCODES = {
0: OpcodeInfo("jz", branches=True),
1: OpcodeInfo("get_sibling", stores=True, branches=True),
2: OpcodeInfo("get_child", stores=True, branches=True),
3: OpcodeInfo("get_parent", stores=True),
4: OpcodeInfo("get_prop_len", stores=True),
5: OpcodeInfo("inc"),
6: OpcodeInfo("dec"),
7: OpcodeInfo("print_addr"),
8: OpcodeInfo("call_1s", stores=True, is_call=True),
9: OpcodeInfo("remove_obj"),
10: OpcodeInfo("print_obj"),
11: OpcodeInfo("ret", terminal=True),
12: OpcodeInfo("jump", terminal=True),
13: OpcodeInfo("print_paddr"),
14: OpcodeInfo("load", stores=True),
15: OpcodeInfo("not", stores=True),
}
OP0_OPCODES = {
0: OpcodeInfo("rtrue", terminal=True),
1: OpcodeInfo("rfalse", terminal=True),
2: OpcodeInfo("print", inline_string=True),
3: OpcodeInfo("print_ret", inline_string=True, terminal=True),
4: OpcodeInfo("nop"),
5: OpcodeInfo("save", branches=True),
6: OpcodeInfo("restore", branches=True),
7: OpcodeInfo("restart", terminal=True),
8: OpcodeInfo("ret_popped", terminal=True),
9: OpcodeInfo("pop"),
10: OpcodeInfo("quit", terminal=True),
11: OpcodeInfo("new_line"),
12: OpcodeInfo("show_status"),
13: OpcodeInfo("verify", branches=True),
}
VAR_OPCODES = {
0: OpcodeInfo("call_vs", stores=True, is_call=True),
1: OpcodeInfo("storew"),
2: OpcodeInfo("storeb"),
3: OpcodeInfo("put_prop"),
4: OpcodeInfo("sread"),
5: OpcodeInfo("print_char"),
6: OpcodeInfo("print_num"),
7: OpcodeInfo("random", stores=True),
8: OpcodeInfo("push"),
9: OpcodeInfo("pull"),
10: OpcodeInfo("split_window"),
11: OpcodeInfo("set_window"),
19: OpcodeInfo("output_stream"),
20: OpcodeInfo("input_stream"),
21: OpcodeInfo("sound_effect"),
22: OpcodeInfo("read_char", stores=True),
}
# ZVM implementation status (opcodes with real logic, not stubs)
ZVM_IMPLEMENTED = {
("2OP", 1),
("2OP", 2),
("2OP", 4),
("2OP", 5),
("2OP", 8),
("2OP", 9),
("2OP", 13),
("2OP", 14),
("2OP", 15),
("2OP", 16),
("2OP", 17),
("2OP", 20),
("2OP", 21),
("2OP", 22),
("2OP", 23),
("1OP", 0),
("1OP", 2),
("1OP", 3),
("1OP", 5),
("1OP", 8),
("1OP", 12),
("1OP", 13),
("0OP", 0),
("0OP", 1),
("0OP", 2),
("0OP", 3),
("VAR", 0),
("VAR", 1),
("VAR", 3),
("VAR", 5),
("VAR", 7),
("VAR", 8),
("VAR", 10),
("VAR", 11),
("VAR", 19),
("VAR", 22),
}
class ZMachine:
"""Z-Machine story file reader and disassembler."""
def __init__(self, story_path: Path, verbose: bool = False):
self.story_path = story_path
self.verbose = verbose
self.data = story_path.read_bytes()
self.version = self.data[0]
self.entry_point = self.read_word(0x06)
self.static_mem_base = self.read_word(0x0E)
# Disassembly state
self.visited_addrs = set()
self.visited_routines = set()
self.worklist = []
self.opcode_counts = defaultdict(int)
self.instruction_count = 0
self.routines_from_entrypoint = 0
self.routines_from_scan = 0
def read_byte(self, addr: int) -> int:
"""Read a single byte."""
return self.data[addr]
def read_word(self, addr: int) -> int:
"""Read a 16-bit big-endian word."""
return struct.unpack_from(">H", self.data, addr)[0]
def read_signed_word(self, addr: int) -> int:
"""Read a 16-bit signed big-endian word."""
val = self.read_word(addr)
return val if val < 0x8000 else val - 0x10000
def unpack_routine_addr(self, packed: int) -> int:
"""Convert packed routine address to byte address."""
if self.version <= 3:
return packed * 2
elif self.version <= 5:
return packed * 4
else:
return packed * 8
def parse_operands(
self, pc: int, opcode_byte: int
) -> tuple[list[tuple[int, bool]], int]:
"""Parse operands and return (operands, bytes_consumed).
Each operand is (value, is_constant) tuple.
"""
operands = []
pos = pc
if opcode_byte < 0x80:
# Long form 2OP
op1_type = (opcode_byte >> 6) & 1
op2_type = (opcode_byte >> 5) & 1
if op1_type == 0: # small constant
operands.append((self.read_byte(pos), True))
pos += 1
else: # variable
operands.append((self.read_byte(pos), False))
pos += 1
if op2_type == 0: # small constant
operands.append((self.read_byte(pos), True))
pos += 1
else: # variable
operands.append((self.read_byte(pos), False))
pos += 1
elif opcode_byte < 0xB0:
# Short form 1OP or 0OP
op_type = (opcode_byte >> 4) & 3
if op_type == 0: # large constant
operands.append((self.read_word(pos), True))
pos += 2
elif op_type == 1: # small constant
operands.append((self.read_byte(pos), True))
pos += 1
elif op_type == 2: # variable
operands.append((self.read_byte(pos), False))
pos += 1
# op_type == 3: 0OP, no operands
else:
# Variable form
types_byte = self.read_byte(pos)
pos += 1
for i in range(4):
op_type = (types_byte >> (6 - i * 2)) & 3
if op_type == 3: # omitted
break
elif op_type == 0: # large constant
operands.append((self.read_word(pos), True))
pos += 2
elif op_type == 1: # small constant
operands.append((self.read_byte(pos), True))
pos += 1
elif op_type == 2: # variable
operands.append((self.read_byte(pos), False))
pos += 1
return operands, pos - pc
def parse_zstring(self, addr: int) -> int:
"""Parse z-string and return length in bytes."""
pos = addr
while True:
word = self.read_word(pos)
pos += 2
if word & 0x8000:
break
return pos - addr
def parse_branch(self, pc: int) -> tuple[int | None, int]:
"""Parse branch data and return (target_addr, bytes_consumed)."""
branch_byte = self.read_byte(pc)
short_form = (branch_byte & 0x40) != 0
if short_form:
offset = branch_byte & 0x3F
bytes_consumed = 1
else:
offset = ((branch_byte & 0x3F) << 8) | self.read_byte(pc + 1)
if offset >= 0x2000:
offset -= 0x4000
bytes_consumed = 2
# Compute target
if offset == 0 or offset == 1:
# Return true/false - terminal for this path
return None, bytes_consumed
else:
target = pc + bytes_consumed + offset - 2
return target, bytes_consumed
def decode_instruction(self, addr: int) -> tuple[str, str, int, list[int]]:
"""Decode instruction at addr.
Returns (opclass, name, next_addr, targets).
targets is a list of addresses to visit next.
"""
if addr in self.visited_addrs:
return "", "", addr, []
self.visited_addrs.add(addr)
self.instruction_count += 1
opcode_byte = self.read_byte(addr)
pc = addr + 1
# Determine form and opcode
if opcode_byte < 0x80:
# Long form 2OP
opcode_num = opcode_byte & 0x1F
opclass = "2OP"
info = OP2_OPCODES.get(opcode_num)
elif opcode_byte < 0xC0:
# Short form (includes 0xB0-0xBF which are 0OP)
op_type = (opcode_byte >> 4) & 3
if op_type == 3:
# 0OP
opcode_num = opcode_byte & 0x0F
opclass = "0OP"
info = OP0_OPCODES.get(opcode_num)
else:
# 1OP
opcode_num = opcode_byte & 0x0F
opclass = "1OP"
info = OP1_OPCODES.get(opcode_num)
elif opcode_byte < 0xE0:
# Variable form 2OP (0xC0-0xDF)
opcode_num = opcode_byte & 0x1F
opclass = "2OP"
info = OP2_OPCODES.get(opcode_num)
else:
# Variable form VAR (0xE0-0xFF)
opcode_num = opcode_byte & 0x1F
opclass = "VAR"
info = VAR_OPCODES.get(opcode_num)
if info is None:
# Unknown opcode
return opclass, f"unknown_{opcode_num}", pc, []
# Track opcode
self.opcode_counts[(opclass, opcode_num)] += 1
if self.verbose:
print(
f" {addr:05x}: {opclass}:{opcode_num:02d} {info.name}",
file=sys.stderr,
)
# Parse operands
operands, operand_bytes = self.parse_operands(pc, opcode_byte)
pc += operand_bytes
# Handle inline z-string
if info.inline_string:
string_bytes = self.parse_zstring(pc)
pc += string_bytes
# Handle store byte
if info.stores:
pc += 1
# Handle branch
targets = []
if info.branches:
branch_target, branch_bytes = self.parse_branch(pc)
pc += branch_bytes
if branch_target is not None:
targets.append(branch_target)
# Handle call (follow routine + continue after call)
if info.is_call and operands:
packed_addr, is_const = operands[0]
if is_const and packed_addr != 0:
routine_addr = self.unpack_routine_addr(packed_addr)
if routine_addr not in self.visited_routines and routine_addr < len(
self.data
):
self.visited_routines.add(routine_addr)
targets.append(routine_addr)
# Handle jump (terminal but has target)
if opcode_num == 12 and opclass == "1OP" and operands:
offset, _ = operands[0]
if offset >= 0x8000:
offset -= 0x10000
jump_target = pc + offset - 2
targets.append(jump_target)
return opclass, info.name, pc, targets
# Add fall-through if not terminal
if not info.terminal:
targets.append(pc)
return opclass, info.name, pc, targets
def disassemble_routine(self, addr: int):
"""Disassemble a routine starting at addr."""
if addr >= len(self.data):
return
# Parse routine header
num_locals = self.read_byte(addr)
pc = addr + 1
# Skip local variable initial values (V3 only)
if self.version <= 3:
pc += num_locals * 2
# Add first instruction to worklist
self.worklist.append(pc)
def scan_data_for_routines(self):
"""Scan dynamic memory for packed addresses pointing to routines.
Globals and object property tables live in dynamic memory and
contain packed routine addresses. This catches routines reachable
only through indirect calls (variable operands in CALL opcodes).
"""
high_mem = self.read_word(0x04)
found = 0
# Scan every word in dynamic memory (globals, property tables)
for offset in range(0, self.static_mem_base - 1, 2):
packed = self.read_word(offset)
if packed == 0:
continue
addr = self.unpack_routine_addr(packed)
# Must point into the code region
if addr < high_mem or addr >= len(self.data) - 1:
continue
# Must not already be a known routine
if addr in self.visited_routines:
continue
# Must look like a valid routine header (local count 0-15)
num_locals = self.read_byte(addr)
if num_locals > 15:
continue
# First instruction must be within bounds
first_instr = addr + 1
if self.version <= 3:
first_instr += num_locals * 2
if first_instr >= len(self.data):
continue
self.visited_routines.add(addr)
self.disassemble_routine(addr)
found += 1
return found
def _process_worklist(self):
"""Process the instruction worklist until empty."""
while self.worklist:
addr = self.worklist.pop()
if addr in self.visited_addrs or addr >= len(self.data):
continue
opclass, name, next_addr, targets = self.decode_instruction(addr)
for target in targets:
if target >= len(self.data) or target < 0:
continue
if target in self.visited_routines:
num_locals = self.read_byte(target)
first_instr = target + 1
if self.version <= 3:
first_instr += num_locals * 2
if first_instr not in self.visited_addrs:
self.worklist.append(first_instr)
else:
if target not in self.visited_addrs:
self.worklist.append(target)
def disassemble_all(self):
"""Perform complete recursive-descent disassembly."""
# Entry point is a byte address of the first instruction (V1-5)
self.worklist.append(self.entry_point)
# Phase 1: recursive descent from entry point
self._process_worklist()
self.reachable_routines = len(self.visited_routines)
# Phase 2: scan dynamic memory for packed routine addresses
self.scanned_routines = self.scan_data_for_routines()
self._process_worklist()
def generate_report(self) -> str:
"""Generate analysis report."""
lines = []
lines.append("=" * 70)
lines.append("Z-MACHINE OPCODE TRACE REPORT")
lines.append("=" * 70)
lines.append("")
lines.append(f"Story file: {self.story_path}")
lines.append(f"Version: {self.version}")
lines.append(f"Entry point: ${self.entry_point:04x}")
lines.append(f"Story size: {len(self.data)} bytes")
lines.append("")
lines.append("DISASSEMBLY STATISTICS")
lines.append("-" * 70)
lines.append(
f"Routines found (entry-point reachable): {self.reachable_routines}"
)
lines.append(f"Routines found (data scan): {self.scanned_routines}")
lines.append(f"Total routines: {len(self.visited_routines)}")
lines.append(f"Instructions decoded: {self.instruction_count}")
lines.append(f"Unique opcodes: {len(self.opcode_counts)}")
lines.append("")
# Opcodes by class
lines.append("OPCODES FOUND IN STORY")
lines.append("-" * 70)
for opclass_name, opcode_dict in [
("2OP", OP2_OPCODES),
("1OP", OP1_OPCODES),
("0OP", OP0_OPCODES),
("VAR", VAR_OPCODES),
]:
lines.append(f"\n{opclass_name} opcodes:")
found = [
(num, opcode_dict[num].name, self.opcode_counts[(opclass_name, num)])
for num in sorted(opcode_dict.keys())
if (opclass_name, num) in self.opcode_counts
]
if found:
for num, name, count in found:
lines.append(f" {num:2d} {name:20s} (used {count} times)")
else:
lines.append(" (none)")
lines.append("")
lines.append("GAP ANALYSIS: ZVM IMPLEMENTATION STATUS")
lines.append("-" * 70)
implemented = []
missing = []
for (opclass, opcode_num), count in sorted(self.opcode_counts.items()):
if opclass == "2OP":
name = OP2_OPCODES[opcode_num].name
elif opclass == "1OP":
name = OP1_OPCODES[opcode_num].name
elif opclass == "0OP":
name = OP0_OPCODES[opcode_num].name
elif opclass == "VAR":
name = VAR_OPCODES[opcode_num].name
else:
name = "unknown"
key = (opclass, opcode_num)
if key in ZVM_IMPLEMENTED:
implemented.append((opclass, opcode_num, name, count))
else:
missing.append((opclass, opcode_num, name, count))
lines.append("\nImplemented in zvm:")
for opclass, num, name, count in implemented:
lines.append(f" {opclass}:{num:2d} {name:20s} (used {count} times)")
lines.append("\nMissing from zvm (need porting):")
for opclass, num, name, count in missing:
lines.append(f" {opclass}:{num:2d} {name:20s} (used {count} times)")
lines.append("")
lines.append("SUMMARY")
lines.append("-" * 70)
total = len(self.opcode_counts)
impl_count = len(implemented)
missing_count = len(missing)
lines.append(
f"{total} unique opcodes found in story, "
f"{impl_count} already in zvm, {missing_count} need porting"
)
lines.append("")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description="Trace z-machine opcodes in a story file"
)
parser.add_argument(
"story",
nargs="?",
default="content/stories/zork1.z3",
help="Path to z-machine story file (default: content/stories/zork1.z3)",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Print each opcode as it's found",
)
args = parser.parse_args()
story_path = Path(args.story)
if not story_path.exists():
print(f"Error: Story file not found: {story_path}", file=sys.stderr)
sys.exit(1)
if args.verbose:
print(f"Disassembling {story_path}...", file=sys.stderr)
zm = ZMachine(story_path, verbose=args.verbose)
zm.disassemble_all()
print(zm.generate_report())
if __name__ == "__main__":
main()