Optimize z-machine hot loop: fast step, dispatch table, inline bit ops

Add step_fast() that skips trace/logging overhead (saves ~22% at 1M+
avoided log calls). Pre-resolve opcode dispatch table at init to
eliminate per-instruction version checks and isinstance calls. Replace
BitField allocations with direct bit masks in opcode decoder.

Cold start: 4720ms -> 786ms. Steady state: ~500ms -> ~460ms.
This commit is contained in:
Jared Miller 2026-02-10 15:05:34 -05:00
parent 802c72819c
commit bb2f1989cb
Signed by: shmup
GPG key ID: 22B5C6D66A38B06C
2 changed files with 76 additions and 34 deletions

View file

@ -10,7 +10,7 @@ import random
import time
from collections import deque
from . import bitfield, zopdecoder, zscreen
from . import zopdecoder, zscreen
from .zlogging import log, log_disasm
@ -65,6 +65,7 @@ class ZCpu:
self._lexer = zlexer
self._zmachine = zmachine
self._trace = deque(maxlen=20)
self._dispatch = self._build_dispatch_table()
@property
def _program_counter(self):
@ -111,9 +112,7 @@ class ZCpu:
def _make_signed(self, a):
"""Turn the given 16-bit value into a signed integer."""
assert a < (1 << 16)
# This is a little ugly.
bf = bitfield.BitField(a)
if bf[15]:
if (a >> 15) & 1:
a = a - (1 << 16)
return a
@ -197,6 +196,55 @@ class ZCpu:
print(entry)
print("===================================\n")
def _build_dispatch_table(self):
"""Pre-resolve all opcode handlers for current version."""
table = {}
for opcode_class, opcode_class_list in self.opcodes.items():
class_table = []
for opcode_decl in opcode_class_list:
if not opcode_decl:
class_table.append(None)
continue
if not isinstance(opcode_decl, (list, tuple)):
func = opcode_decl
else:
func = None
if isinstance(opcode_decl[0], (list, tuple)):
for f, version in opcode_decl: # type: ignore
if version <= self._memory.version:
func = f
break
elif opcode_decl[1] <= self._memory.version:
func = opcode_decl[0]
if func is None:
class_table.append(None)
continue
implemented = bool(func.__doc__)
class_table.append((implemented, func))
table[opcode_class] = class_table
return table
def step_fast(self):
"""Execute a single instruction without tracing.
Returns True if execution should continue.
"""
(opcode_class, opcode_number, operands) = self._opdecoder.get_next_instruction()
entry = self._dispatch[opcode_class][opcode_number]
if entry is None:
raise ZCpuIllegalInstruction
implemented, func = entry
if not implemented:
return False
try:
func(self, *operands)
except (ZCpuQuit, ZCpuRestart):
raise
except Exception:
self._dump_trace()
raise
return True
def step(self):
"""Execute a single instruction. Returns True if execution should continue."""
current_pc = self._opdecoder.program_counter
@ -245,7 +293,7 @@ class ZCpu:
"""The Magic Function that takes little bits and bytes, twirls
them around, and brings the magic to your screen!"""
log("Execution started")
while self.step():
while self.step_fast():
pass
##

View file

@ -6,7 +6,6 @@
# root directory of this distribution.
#
from .bitfield import BitField
from .zlogging import log
@ -74,15 +73,11 @@ class ZOpDecoder:
# Extended opcode
return self._parse_opcode_extended()
opcode = BitField(opcode)
if opcode[7] == 0:
# Long opcode
if not (opcode & 0x80):
return self._parse_opcode_long(opcode)
elif opcode[6] == 0:
# Short opcode
elif not (opcode & 0x40):
return self._parse_opcode_short(opcode)
else:
# Variable opcode
return self._parse_opcode_variable(opcode)
def _parse_opcode_long(self, opcode):
@ -92,35 +87,35 @@ class ZOpDecoder:
log("Opcode is long")
LONG_OPERAND_TYPES = [SMALL_CONSTANT, VARIABLE]
operands = [
self._parse_operand(LONG_OPERAND_TYPES[opcode[6]]),
self._parse_operand(LONG_OPERAND_TYPES[opcode[5]]),
self._parse_operand(LONG_OPERAND_TYPES[(opcode >> 6) & 1]),
self._parse_operand(LONG_OPERAND_TYPES[(opcode >> 5) & 1]),
]
return (OPCODE_2OP, opcode[0:5], operands)
return (OPCODE_2OP, opcode & 0x1F, operands)
def _parse_opcode_short(self, opcode):
"""Parse an opcode of the short form."""
# Short opcodes can have either 1 operand, or no operand.
log("Opcode is short")
operand_type = opcode[4:6]
operand_type = (opcode >> 4) & 0x03
operand = self._parse_operand(operand_type)
if operand is None: # 0OP variant
log("Opcode is 0OP variant")
return (OPCODE_0OP, opcode[0:4], [])
return (OPCODE_0OP, opcode & 0x0F, [])
else:
log("Opcode is 1OP variant")
return (OPCODE_1OP, opcode[0:4], [operand])
return (OPCODE_1OP, opcode & 0x0F, [operand])
def _parse_opcode_variable(self, opcode):
"""Parse an opcode of the variable form."""
log("Opcode is variable")
if opcode[5]:
if (opcode >> 5) & 1:
log("Variable opcode of VAR kind")
opcode_type = OPCODE_VAR
else:
log("Variable opcode of 2OP kind")
opcode_type = OPCODE_2OP
opcode_num = opcode[0:5]
opcode_num = opcode & 0x1F
# Read all type bytes FIRST, before parsing any operands.
# call_vs2 (VAR:12) and call_vn2 (VAR:26) have two type bytes;
@ -183,12 +178,12 @@ class ZOpDecoder:
def _read_type_byte(self):
"""Read one operand type byte and return a list of type codes."""
operand_byte = BitField(self._get_pc())
operand_byte = self._get_pc()
return [
operand_byte[6:8],
operand_byte[4:6],
operand_byte[2:4],
operand_byte[0:2],
(operand_byte >> 6) & 0x03,
(operand_byte >> 4) & 0x03,
(operand_byte >> 2) & 0x03,
operand_byte & 0x03,
]
def _parse_operand_list(self, operand_types):
@ -214,12 +209,11 @@ class ZOpDecoder:
to by the PC. Increment PC just past the text."""
start_addr = self.program_counter
bf = BitField(0)
while True:
bf.__init__(self._memory[self.program_counter])
byte = self._memory[self.program_counter]
self.program_counter += 2
if bf[7] == 1:
if (byte >> 7) & 1:
break
return start_addr
@ -236,10 +230,10 @@ class ZOpDecoder:
to branch if true or branch if false), and second, the address to
jump to. Increment the PC as necessary."""
bf = BitField(self._get_pc())
branch_if_true = bool(bf[7])
if bf[6]:
branch_offset = bf[0:6]
byte = self._get_pc()
branch_if_true = bool((byte >> 7) & 1)
if (byte >> 6) & 1:
branch_offset = byte & 0x3F
else:
# We need to do a little magic here. The branch offset is
# written as a signed 14-bit number, with signed meaning '-n' is
@ -253,8 +247,8 @@ class ZOpDecoder:
# If the MSB is not set, we just extract the value and return it.
#
# Can you spell "Weird" ?
branch_offset = self._get_pc() + (bf[0:5] << 8)
if bf[5]:
branch_offset = self._get_pc() + ((byte & 0x1F) << 8)
if (byte >> 5) & 1:
branch_offset -= 8192
log(f"Branch if {branch_if_true} to offset {branch_offset:+d}")