Optimize z-machine hot loop: fast step, dispatch table, inline bit ops

Add step_fast() that skips trace/logging overhead (saves ~22% at 1M+
avoided log calls). Pre-resolve opcode dispatch table at init to
eliminate per-instruction version checks and isinstance calls. Replace
BitField allocations with direct bit masks in opcode decoder.

Cold start: 4720ms -> 786ms. Steady state: ~500ms -> ~460ms.
This commit is contained in:
Jared Miller 2026-02-10 15:05:34 -05:00
parent 802c72819c
commit bb2f1989cb
Signed by: shmup
GPG key ID: 22B5C6D66A38B06C
2 changed files with 76 additions and 34 deletions

View file

@ -10,7 +10,7 @@ import random
import time import time
from collections import deque from collections import deque
from . import bitfield, zopdecoder, zscreen from . import zopdecoder, zscreen
from .zlogging import log, log_disasm from .zlogging import log, log_disasm
@ -65,6 +65,7 @@ class ZCpu:
self._lexer = zlexer self._lexer = zlexer
self._zmachine = zmachine self._zmachine = zmachine
self._trace = deque(maxlen=20) self._trace = deque(maxlen=20)
self._dispatch = self._build_dispatch_table()
@property @property
def _program_counter(self): def _program_counter(self):
@ -111,9 +112,7 @@ class ZCpu:
def _make_signed(self, a): def _make_signed(self, a):
"""Turn the given 16-bit value into a signed integer.""" """Turn the given 16-bit value into a signed integer."""
assert a < (1 << 16) assert a < (1 << 16)
# This is a little ugly. if (a >> 15) & 1:
bf = bitfield.BitField(a)
if bf[15]:
a = a - (1 << 16) a = a - (1 << 16)
return a return a
@ -197,6 +196,55 @@ class ZCpu:
print(entry) print(entry)
print("===================================\n") print("===================================\n")
def _build_dispatch_table(self):
"""Pre-resolve all opcode handlers for current version."""
table = {}
for opcode_class, opcode_class_list in self.opcodes.items():
class_table = []
for opcode_decl in opcode_class_list:
if not opcode_decl:
class_table.append(None)
continue
if not isinstance(opcode_decl, (list, tuple)):
func = opcode_decl
else:
func = None
if isinstance(opcode_decl[0], (list, tuple)):
for f, version in opcode_decl: # type: ignore
if version <= self._memory.version:
func = f
break
elif opcode_decl[1] <= self._memory.version:
func = opcode_decl[0]
if func is None:
class_table.append(None)
continue
implemented = bool(func.__doc__)
class_table.append((implemented, func))
table[opcode_class] = class_table
return table
def step_fast(self):
"""Execute a single instruction without tracing.
Returns True if execution should continue.
"""
(opcode_class, opcode_number, operands) = self._opdecoder.get_next_instruction()
entry = self._dispatch[opcode_class][opcode_number]
if entry is None:
raise ZCpuIllegalInstruction
implemented, func = entry
if not implemented:
return False
try:
func(self, *operands)
except (ZCpuQuit, ZCpuRestart):
raise
except Exception:
self._dump_trace()
raise
return True
def step(self): def step(self):
"""Execute a single instruction. Returns True if execution should continue.""" """Execute a single instruction. Returns True if execution should continue."""
current_pc = self._opdecoder.program_counter current_pc = self._opdecoder.program_counter
@ -245,7 +293,7 @@ class ZCpu:
"""The Magic Function that takes little bits and bytes, twirls """The Magic Function that takes little bits and bytes, twirls
them around, and brings the magic to your screen!""" them around, and brings the magic to your screen!"""
log("Execution started") log("Execution started")
while self.step(): while self.step_fast():
pass pass
## ##

View file

@ -6,7 +6,6 @@
# root directory of this distribution. # root directory of this distribution.
# #
from .bitfield import BitField
from .zlogging import log from .zlogging import log
@ -74,15 +73,11 @@ class ZOpDecoder:
# Extended opcode # Extended opcode
return self._parse_opcode_extended() return self._parse_opcode_extended()
opcode = BitField(opcode) if not (opcode & 0x80):
if opcode[7] == 0:
# Long opcode
return self._parse_opcode_long(opcode) return self._parse_opcode_long(opcode)
elif opcode[6] == 0: elif not (opcode & 0x40):
# Short opcode
return self._parse_opcode_short(opcode) return self._parse_opcode_short(opcode)
else: else:
# Variable opcode
return self._parse_opcode_variable(opcode) return self._parse_opcode_variable(opcode)
def _parse_opcode_long(self, opcode): def _parse_opcode_long(self, opcode):
@ -92,35 +87,35 @@ class ZOpDecoder:
log("Opcode is long") log("Opcode is long")
LONG_OPERAND_TYPES = [SMALL_CONSTANT, VARIABLE] LONG_OPERAND_TYPES = [SMALL_CONSTANT, VARIABLE]
operands = [ operands = [
self._parse_operand(LONG_OPERAND_TYPES[opcode[6]]), self._parse_operand(LONG_OPERAND_TYPES[(opcode >> 6) & 1]),
self._parse_operand(LONG_OPERAND_TYPES[opcode[5]]), self._parse_operand(LONG_OPERAND_TYPES[(opcode >> 5) & 1]),
] ]
return (OPCODE_2OP, opcode[0:5], operands) return (OPCODE_2OP, opcode & 0x1F, operands)
def _parse_opcode_short(self, opcode): def _parse_opcode_short(self, opcode):
"""Parse an opcode of the short form.""" """Parse an opcode of the short form."""
# Short opcodes can have either 1 operand, or no operand. # Short opcodes can have either 1 operand, or no operand.
log("Opcode is short") log("Opcode is short")
operand_type = opcode[4:6] operand_type = (opcode >> 4) & 0x03
operand = self._parse_operand(operand_type) operand = self._parse_operand(operand_type)
if operand is None: # 0OP variant if operand is None: # 0OP variant
log("Opcode is 0OP variant") log("Opcode is 0OP variant")
return (OPCODE_0OP, opcode[0:4], []) return (OPCODE_0OP, opcode & 0x0F, [])
else: else:
log("Opcode is 1OP variant") log("Opcode is 1OP variant")
return (OPCODE_1OP, opcode[0:4], [operand]) return (OPCODE_1OP, opcode & 0x0F, [operand])
def _parse_opcode_variable(self, opcode): def _parse_opcode_variable(self, opcode):
"""Parse an opcode of the variable form.""" """Parse an opcode of the variable form."""
log("Opcode is variable") log("Opcode is variable")
if opcode[5]: if (opcode >> 5) & 1:
log("Variable opcode of VAR kind") log("Variable opcode of VAR kind")
opcode_type = OPCODE_VAR opcode_type = OPCODE_VAR
else: else:
log("Variable opcode of 2OP kind") log("Variable opcode of 2OP kind")
opcode_type = OPCODE_2OP opcode_type = OPCODE_2OP
opcode_num = opcode[0:5] opcode_num = opcode & 0x1F
# Read all type bytes FIRST, before parsing any operands. # Read all type bytes FIRST, before parsing any operands.
# call_vs2 (VAR:12) and call_vn2 (VAR:26) have two type bytes; # call_vs2 (VAR:12) and call_vn2 (VAR:26) have two type bytes;
@ -183,12 +178,12 @@ class ZOpDecoder:
def _read_type_byte(self): def _read_type_byte(self):
"""Read one operand type byte and return a list of type codes.""" """Read one operand type byte and return a list of type codes."""
operand_byte = BitField(self._get_pc()) operand_byte = self._get_pc()
return [ return [
operand_byte[6:8], (operand_byte >> 6) & 0x03,
operand_byte[4:6], (operand_byte >> 4) & 0x03,
operand_byte[2:4], (operand_byte >> 2) & 0x03,
operand_byte[0:2], operand_byte & 0x03,
] ]
def _parse_operand_list(self, operand_types): def _parse_operand_list(self, operand_types):
@ -214,12 +209,11 @@ class ZOpDecoder:
to by the PC. Increment PC just past the text.""" to by the PC. Increment PC just past the text."""
start_addr = self.program_counter start_addr = self.program_counter
bf = BitField(0)
while True: while True:
bf.__init__(self._memory[self.program_counter]) byte = self._memory[self.program_counter]
self.program_counter += 2 self.program_counter += 2
if bf[7] == 1: if (byte >> 7) & 1:
break break
return start_addr return start_addr
@ -236,10 +230,10 @@ class ZOpDecoder:
to branch if true or branch if false), and second, the address to to branch if true or branch if false), and second, the address to
jump to. Increment the PC as necessary.""" jump to. Increment the PC as necessary."""
bf = BitField(self._get_pc()) byte = self._get_pc()
branch_if_true = bool(bf[7]) branch_if_true = bool((byte >> 7) & 1)
if bf[6]: if (byte >> 6) & 1:
branch_offset = bf[0:6] branch_offset = byte & 0x3F
else: else:
# We need to do a little magic here. The branch offset is # We need to do a little magic here. The branch offset is
# written as a signed 14-bit number, with signed meaning '-n' is # written as a signed 14-bit number, with signed meaning '-n' is
@ -253,8 +247,8 @@ class ZOpDecoder:
# If the MSB is not set, we just extract the value and return it. # If the MSB is not set, we just extract the value and return it.
# #
# Can you spell "Weird" ? # Can you spell "Weird" ?
branch_offset = self._get_pc() + (bf[0:5] << 8) branch_offset = self._get_pc() + ((byte & 0x1F) << 8)
if bf[5]: if (byte >> 5) & 1:
branch_offset -= 8192 branch_offset -= 8192
log(f"Branch if {branch_if_true} to offset {branch_offset:+d}") log(f"Branch if {branch_if_true} to offset {branch_offset:+d}")