Optimize z-machine hot loop: fast step, dispatch table, inline bit ops
Add step_fast() that skips trace/logging overhead (saves ~22% at 1M+ avoided log calls). Pre-resolve opcode dispatch table at init to eliminate per-instruction version checks and isinstance calls. Replace BitField allocations with direct bit masks in opcode decoder. Cold start: 4720ms -> 786ms. Steady state: ~500ms -> ~460ms.
This commit is contained in:
parent
802c72819c
commit
bb2f1989cb
2 changed files with 76 additions and 34 deletions
|
|
@ -10,7 +10,7 @@ import random
|
|||
import time
|
||||
from collections import deque
|
||||
|
||||
from . import bitfield, zopdecoder, zscreen
|
||||
from . import zopdecoder, zscreen
|
||||
from .zlogging import log, log_disasm
|
||||
|
||||
|
||||
|
|
@ -65,6 +65,7 @@ class ZCpu:
|
|||
self._lexer = zlexer
|
||||
self._zmachine = zmachine
|
||||
self._trace = deque(maxlen=20)
|
||||
self._dispatch = self._build_dispatch_table()
|
||||
|
||||
@property
|
||||
def _program_counter(self):
|
||||
|
|
@ -111,9 +112,7 @@ class ZCpu:
|
|||
def _make_signed(self, a):
|
||||
"""Turn the given 16-bit value into a signed integer."""
|
||||
assert a < (1 << 16)
|
||||
# This is a little ugly.
|
||||
bf = bitfield.BitField(a)
|
||||
if bf[15]:
|
||||
if (a >> 15) & 1:
|
||||
a = a - (1 << 16)
|
||||
return a
|
||||
|
||||
|
|
@ -197,6 +196,55 @@ class ZCpu:
|
|||
print(entry)
|
||||
print("===================================\n")
|
||||
|
||||
def _build_dispatch_table(self):
|
||||
"""Pre-resolve all opcode handlers for current version."""
|
||||
table = {}
|
||||
for opcode_class, opcode_class_list in self.opcodes.items():
|
||||
class_table = []
|
||||
for opcode_decl in opcode_class_list:
|
||||
if not opcode_decl:
|
||||
class_table.append(None)
|
||||
continue
|
||||
if not isinstance(opcode_decl, (list, tuple)):
|
||||
func = opcode_decl
|
||||
else:
|
||||
func = None
|
||||
if isinstance(opcode_decl[0], (list, tuple)):
|
||||
for f, version in opcode_decl: # type: ignore
|
||||
if version <= self._memory.version:
|
||||
func = f
|
||||
break
|
||||
elif opcode_decl[1] <= self._memory.version:
|
||||
func = opcode_decl[0]
|
||||
if func is None:
|
||||
class_table.append(None)
|
||||
continue
|
||||
implemented = bool(func.__doc__)
|
||||
class_table.append((implemented, func))
|
||||
table[opcode_class] = class_table
|
||||
return table
|
||||
|
||||
def step_fast(self):
|
||||
"""Execute a single instruction without tracing.
|
||||
|
||||
Returns True if execution should continue.
|
||||
"""
|
||||
(opcode_class, opcode_number, operands) = self._opdecoder.get_next_instruction()
|
||||
entry = self._dispatch[opcode_class][opcode_number]
|
||||
if entry is None:
|
||||
raise ZCpuIllegalInstruction
|
||||
implemented, func = entry
|
||||
if not implemented:
|
||||
return False
|
||||
try:
|
||||
func(self, *operands)
|
||||
except (ZCpuQuit, ZCpuRestart):
|
||||
raise
|
||||
except Exception:
|
||||
self._dump_trace()
|
||||
raise
|
||||
return True
|
||||
|
||||
def step(self):
|
||||
"""Execute a single instruction. Returns True if execution should continue."""
|
||||
current_pc = self._opdecoder.program_counter
|
||||
|
|
@ -245,7 +293,7 @@ class ZCpu:
|
|||
"""The Magic Function that takes little bits and bytes, twirls
|
||||
them around, and brings the magic to your screen!"""
|
||||
log("Execution started")
|
||||
while self.step():
|
||||
while self.step_fast():
|
||||
pass
|
||||
|
||||
##
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@
|
|||
# root directory of this distribution.
|
||||
#
|
||||
|
||||
from .bitfield import BitField
|
||||
from .zlogging import log
|
||||
|
||||
|
||||
|
|
@ -74,15 +73,11 @@ class ZOpDecoder:
|
|||
# Extended opcode
|
||||
return self._parse_opcode_extended()
|
||||
|
||||
opcode = BitField(opcode)
|
||||
if opcode[7] == 0:
|
||||
# Long opcode
|
||||
if not (opcode & 0x80):
|
||||
return self._parse_opcode_long(opcode)
|
||||
elif opcode[6] == 0:
|
||||
# Short opcode
|
||||
elif not (opcode & 0x40):
|
||||
return self._parse_opcode_short(opcode)
|
||||
else:
|
||||
# Variable opcode
|
||||
return self._parse_opcode_variable(opcode)
|
||||
|
||||
def _parse_opcode_long(self, opcode):
|
||||
|
|
@ -92,35 +87,35 @@ class ZOpDecoder:
|
|||
log("Opcode is long")
|
||||
LONG_OPERAND_TYPES = [SMALL_CONSTANT, VARIABLE]
|
||||
operands = [
|
||||
self._parse_operand(LONG_OPERAND_TYPES[opcode[6]]),
|
||||
self._parse_operand(LONG_OPERAND_TYPES[opcode[5]]),
|
||||
self._parse_operand(LONG_OPERAND_TYPES[(opcode >> 6) & 1]),
|
||||
self._parse_operand(LONG_OPERAND_TYPES[(opcode >> 5) & 1]),
|
||||
]
|
||||
return (OPCODE_2OP, opcode[0:5], operands)
|
||||
return (OPCODE_2OP, opcode & 0x1F, operands)
|
||||
|
||||
def _parse_opcode_short(self, opcode):
|
||||
"""Parse an opcode of the short form."""
|
||||
# Short opcodes can have either 1 operand, or no operand.
|
||||
log("Opcode is short")
|
||||
operand_type = opcode[4:6]
|
||||
operand_type = (opcode >> 4) & 0x03
|
||||
operand = self._parse_operand(operand_type)
|
||||
if operand is None: # 0OP variant
|
||||
log("Opcode is 0OP variant")
|
||||
return (OPCODE_0OP, opcode[0:4], [])
|
||||
return (OPCODE_0OP, opcode & 0x0F, [])
|
||||
else:
|
||||
log("Opcode is 1OP variant")
|
||||
return (OPCODE_1OP, opcode[0:4], [operand])
|
||||
return (OPCODE_1OP, opcode & 0x0F, [operand])
|
||||
|
||||
def _parse_opcode_variable(self, opcode):
|
||||
"""Parse an opcode of the variable form."""
|
||||
log("Opcode is variable")
|
||||
if opcode[5]:
|
||||
if (opcode >> 5) & 1:
|
||||
log("Variable opcode of VAR kind")
|
||||
opcode_type = OPCODE_VAR
|
||||
else:
|
||||
log("Variable opcode of 2OP kind")
|
||||
opcode_type = OPCODE_2OP
|
||||
|
||||
opcode_num = opcode[0:5]
|
||||
opcode_num = opcode & 0x1F
|
||||
|
||||
# Read all type bytes FIRST, before parsing any operands.
|
||||
# call_vs2 (VAR:12) and call_vn2 (VAR:26) have two type bytes;
|
||||
|
|
@ -183,12 +178,12 @@ class ZOpDecoder:
|
|||
|
||||
def _read_type_byte(self):
|
||||
"""Read one operand type byte and return a list of type codes."""
|
||||
operand_byte = BitField(self._get_pc())
|
||||
operand_byte = self._get_pc()
|
||||
return [
|
||||
operand_byte[6:8],
|
||||
operand_byte[4:6],
|
||||
operand_byte[2:4],
|
||||
operand_byte[0:2],
|
||||
(operand_byte >> 6) & 0x03,
|
||||
(operand_byte >> 4) & 0x03,
|
||||
(operand_byte >> 2) & 0x03,
|
||||
operand_byte & 0x03,
|
||||
]
|
||||
|
||||
def _parse_operand_list(self, operand_types):
|
||||
|
|
@ -214,12 +209,11 @@ class ZOpDecoder:
|
|||
to by the PC. Increment PC just past the text."""
|
||||
|
||||
start_addr = self.program_counter
|
||||
bf = BitField(0)
|
||||
|
||||
while True:
|
||||
bf.__init__(self._memory[self.program_counter])
|
||||
byte = self._memory[self.program_counter]
|
||||
self.program_counter += 2
|
||||
if bf[7] == 1:
|
||||
if (byte >> 7) & 1:
|
||||
break
|
||||
|
||||
return start_addr
|
||||
|
|
@ -236,10 +230,10 @@ class ZOpDecoder:
|
|||
to branch if true or branch if false), and second, the address to
|
||||
jump to. Increment the PC as necessary."""
|
||||
|
||||
bf = BitField(self._get_pc())
|
||||
branch_if_true = bool(bf[7])
|
||||
if bf[6]:
|
||||
branch_offset = bf[0:6]
|
||||
byte = self._get_pc()
|
||||
branch_if_true = bool((byte >> 7) & 1)
|
||||
if (byte >> 6) & 1:
|
||||
branch_offset = byte & 0x3F
|
||||
else:
|
||||
# We need to do a little magic here. The branch offset is
|
||||
# written as a signed 14-bit number, with signed meaning '-n' is
|
||||
|
|
@ -253,8 +247,8 @@ class ZOpDecoder:
|
|||
# If the MSB is not set, we just extract the value and return it.
|
||||
#
|
||||
# Can you spell "Weird" ?
|
||||
branch_offset = self._get_pc() + (bf[0:5] << 8)
|
||||
if bf[5]:
|
||||
branch_offset = self._get_pc() + ((byte & 0x1F) << 8)
|
||||
if (byte >> 5) & 1:
|
||||
branch_offset -= 8192
|
||||
|
||||
log(f"Branch if {branch_if_true} to offset {branch_offset:+d}")
|
||||
|
|
|
|||
Loading…
Reference in a new issue