mud/src/mudlib/zmachine/zstring.py

480 lines
16 KiB
Python

#
# A ZString-to-Unicode Universal Translator.
#
# For the license of this file, please consult the LICENSE file in the
# root directory of this distribution.
#
import itertools
from .zlogging import log
class ZStringEndOfString(Exception):
"""No more data left in string."""
class ZStringIllegalAbbrevInString(Exception):
"""String abbreviation encountered within a string in a context
where it is not allowed."""
class ZStringTranslator:
def __init__(self, zmem):
self._mem = zmem
def get(self, addr):
from .bitfield import BitField
pos = (addr, BitField(self._mem.read_word(addr)), 0)
s = []
try:
while True:
s.append(self._read_char(pos))
pos = self._next_pos(pos)
except ZStringEndOfString:
return s
def _read_char(self, pos):
offset = (2 - pos[2]) * 5
return pos[1][offset : offset + 5]
def _is_final(self, pos):
return pos[1][15] == 1
def _next_pos(self, pos):
from .bitfield import BitField
offset = pos[2] + 1
# Overflowing from current block?
if offset == 3:
# Was last block?
if self._is_final(pos):
# Kill processing.
raise ZStringEndOfString
# Get and return the next block.
return (pos[0] + 2, BitField(self._mem.read_word(pos[0] + 2)), 0)
# Just increment the intra-block counter.
return (pos[0], pos[1], offset)
class ZCharTranslator:
# The default alphabet tables for ZChar translation.
# As the codes 0-5 are special, alphabets start with code 0x6.
DEFAULT_A0 = [ord(x) for x in "abcdefghijklmnopqrstuvwxyz"]
DEFAULT_A1 = [ord(x) for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
# A2 also has 0x6 as special char, so they start at 0x7.
DEFAULT_A2 = [ord(x) for x in "\n0123456789.,!?_#'\"/\\-:()"]
DEFAULT_A2_V5 = [ord(x) for x in "\n0123456789.,!?_#'\"/\\-:()"]
ALPHA = (DEFAULT_A0, DEFAULT_A1, DEFAULT_A2)
ALPHA_V5 = (DEFAULT_A0, DEFAULT_A1, DEFAULT_A2_V5)
def __init__(self, zmem):
self._mem = zmem
# Initialize the alphabets
if self._mem.version == 5:
self._alphabet = self._load_custom_alphabet() or self.ALPHA_V5
else:
self._alphabet = self.ALPHA
# Initialize the special state handlers
self._load_specials()
# Initialize the abbreviations (if supported)
self._load_abbrev_tables()
def _load_custom_alphabet(self):
"""Check for the existence of a custom alphabet, and load it
if it does exist. Return the custom alphabet if it was found,
None otherwise."""
# The custom alphabet table address is at 0x34 in the memory.
if self._mem[0x34] == 0:
return None
alph_addr = self._mem.read_word(0x34)
alphabet = self._mem[alph_addr : alph_addr + 78]
return [alphabet[0:26], alphabet[26:52], alphabet[52:78]]
def _load_abbrev_tables(self):
self._abbrevs = {}
# If the ZM doesn't do abbrevs, just return an empty dict.
if self._mem.version == 1:
return
# Build ourselves a ZStringTranslator for the abbrevs.
xlator = ZStringTranslator(self._mem)
def _load_subtable(num, base):
for i, zoff in [(i, base + (num * 64) + (i * 2)) for i in range(0, 32)]:
zaddr = self._mem.read_word(zoff)
zstr = xlator.get(self._mem.word_address(zaddr))
zchr = self.get(zstr, allow_abbreviations=False)
self._abbrevs[(num, i)] = zchr
abbrev_base = self._mem.read_word(0x18)
_load_subtable(0, abbrev_base)
# Does this ZM support the extended abbrev tables?
if self._mem.version >= 3:
_load_subtable(1, abbrev_base)
_load_subtable(2, abbrev_base)
def _load_specials(self):
"""Load the special character code handlers for the current
machine version.
"""
# The following three functions define the three possible
# special character code handlers.
def newline(state):
"""Append ZSCII 13 (newline) to the output."""
state["zscii"].append(13)
def shift_alphabet(state, direction, lock):
"""Shift the current alphaber up or down. If lock is
False, the alphabet will revert to the previous alphabet
after outputting 1 character. Else, the alphabet will
remain unchanged until the next shift.
"""
state["curr_alpha"] = (state["curr_alpha"] + direction) % 3
if lock:
state["prev_alpha"] = state["curr_alpha"]
def abbreviation(state, abbrev):
"""Insert the given abbreviation from the given table into
the output stream.
This character was an abbreviation table number. The next
character will be the offset within that table of the
abbreviation. Set up a state handler to intercept the next
character and output the right abbreviation."""
def write_abbreviation(state, c, subtable):
state["zscii"] += self._abbrevs[(subtable, c)]
del state["state_handler"]
# If we're parsing an abbreviation, there should be no
# nested abbreviations. So this is just a sanity check for
# people feeding us bad stories.
if not state["allow_abbreviations"]:
raise ZStringIllegalAbbrevInString
state["state_handler"] = lambda s, c: write_abbreviation(s, c, abbrev)
# Register the specials handlers depending on machine version.
if self._mem.version == 1:
self._specials = {
1: lambda s: newline(s),
2: lambda s: shift_alphabet(s, +1, False),
3: lambda s: shift_alphabet(s, -1, False),
4: lambda s: shift_alphabet(s, +1, True),
5: lambda s: shift_alphabet(s, -1, True),
}
elif self._mem.version == 2:
self._specials = {
1: lambda s: abbreviation(s, 0),
2: lambda s: shift_alphabet(s, +1, False),
3: lambda s: shift_alphabet(s, -1, False),
4: lambda s: shift_alphabet(s, +1, True),
5: lambda s: shift_alphabet(s, -1, True),
}
else: # ZM v3-5
self._specials = {
1: lambda s: abbreviation(s, 0),
2: lambda s: abbreviation(s, 1),
3: lambda s: abbreviation(s, 2),
4: lambda s: shift_alphabet(s, +1, False),
5: lambda s: shift_alphabet(s, -1, False),
}
def _special_zscii(self, state, char):
if "zscii_char" not in list(state.keys()):
state["zscii_char"] = char
else:
zchar = (state["zscii_char"] << 5) + char
state["zscii"].append(zchar)
del state["zscii_char"]
del state["state_handler"]
def get(self, zstr, allow_abbreviations=True):
state = {
"curr_alpha": 0,
"prev_alpha": 0,
"zscii": [],
"allow_abbreviations": allow_abbreviations,
}
for c in zstr:
if "state_handler" in list(state.keys()):
# If a special handler has registered itself, then hand
# processing over to it.
state["state_handler"](state, c) # type: ignore[call-non-callable]
elif c in list(self._specials.keys()):
# Hand off per-ZM version special char handling.
self._specials[c](state)
elif state["curr_alpha"] == 2 and c == 6:
# Handle the strange A2/6 character
state["state_handler"] = self._special_zscii
else:
# Do the usual Thing: append a zscii code to the
# decoded sequence and revert to the "previous"
# alphabet (or not, if it hasn't recently changed or
# was locked)
if c == 0:
# Append a space.
z = 32
elif state["curr_alpha"] == 2:
# The symbol alphabet table only has 25 chars
# because of the A2/6 special char, so we need to
# adjust differently.
z = self._alphabet[state["curr_alpha"]][c - 7]
else:
z = self._alphabet[state["curr_alpha"]][c - 6]
state["zscii"].append(z)
state["curr_alpha"] = state["prev_alpha"]
return state["zscii"]
class ZsciiTranslator:
# The default Unicode Translation Table that maps to ZSCII codes
# 155-251. The codes are unicode codepoints for a host of strange
# characters.
DEFAULT_UTT = [
chr(x)
for x in (
0xE4,
0xF6,
0xFC,
0xC4,
0xD6,
0xDC,
0xDF,
0xBB,
0xAB,
0xEB,
0xEF,
0xFF,
0xCB,
0xCF,
0xE1,
0xE9,
0xED,
0xF3,
0xFA,
0xFD,
0xC1,
0xC9,
0xCD,
0xD3,
0xDA,
0xDD,
0xE0,
0xE8,
0xEC,
0xF2,
0xF9,
0xC0,
0xC8,
0xCC,
0xD2,
0xD9,
0xE2,
0xEA,
0xEE,
0xF4,
0xFB,
0xC2,
0xCA,
0xCE,
0xD4,
0xDB,
0xE5,
0xC5,
0xF8,
0xD8,
0xE3,
0xF1,
0xF5,
0xC3,
0xD1,
0xD5,
0xE6,
0xC6,
0xE7,
0xC7,
0xFE,
0xF0,
0xDE,
0xD0,
0xA3,
0x153,
0x152,
0xA1,
0xBF,
)
]
# And here is the offset at which the Unicode Translation Table
# starts.
UTT_OFFSET = 155
# This subclass just lists all the "special" character codes that
# are capturable from an input stream. They're just there so that
# the user of the virtual machine can give them a nice name.
class Input:
DELETE = 8
ESCAPE = 27
# The cursor pad
CUR_UP = 129
CUR_DOWN = 130
CUR_LEFT = 131
CUR_RIGHT = 132
# The Function keys
F1 = 133
F2 = 134
F3 = 135
F4 = 136
F5 = 137
F6 = 138
F7 = 139
F8 = 140
F9 = 141
F10 = 142
F11 = 143
F12 = 144
# The numpad (keypad) keys.
KP_0 = 145
KP_1 = 146
KP_2 = 147
KP_3 = 148
KP_4 = 149
KP_5 = 150
KP_6 = 151
KP_7 = 152
KP_8 = 153
KP_9 = 154
def __init__(self, zmem):
self._mem = zmem
self._output_table = {0: "", 10: "\n"}
self._input_table = {"\n": 10}
self._load_unicode_table()
# Populate the input and output tables with the ASCII and UTT
# characters.
for code, char in [(x, chr(x)) for x in range(32, 127)]:
self._output_table[code] = char
self._input_table[char] = code
# Populate the input table with the extra "special" input
# codes. The cool trick we use here, is that all these values
# are in fact numbers, so their key will be available in both
# dicts, and ztoa will provide the correct code if you pass it
# a special symbol instead of a character to translate!
#
# Oh and we also pull the items from the subclass into this
# instance, so as to make reference to these special codes
# easier.
for name, code in [
(c, v)
for c, v in list(self.Input.__dict__.items())
if not c.startswith("__")
]:
self._input_table[code] = code
setattr(self, name, code)
# The only special support required for ZSCII: ZM v5 defines
# an extra character code to represent a mouse click. If we're
# booting a v5 ZM, define this.
if self._mem.version == 5:
self.MOUSE_CLICK = 254
self._input_table[254] = 254
def _load_unicode_table(self):
if self._mem.version == 5:
# Read the header extension table address
ext_table_addr = self._mem.read_word(0x36)
# If:
# - The extension header's address is non-null
# - There are at least 3 words in the extension header
# (the unicode translation table is the third word)
# - The 3rd word (unicode translation table address) is
# non-null
#
# Then there is a unicode translation table other than the
# default that needs loading.
if (
ext_table_addr != 0
and self._mem.read_word(ext_table_addr) >= 3
and self._mem.read_word(ext_table_addr + 6) != 0
):
# Get the unicode translation table address
utt_addr = self._mem.read_word(ext_table_addr + 6)
# The first byte is the number of unicode characters
# in the table.
utt_len = self._mem[utt_addr]
# Build the range of addresses to load from, and build
# the unicode translation table as a list of unicode
# chars.
utt_range = range(utt_addr + 1, utt_addr + 1 + (utt_len * 2), 2)
utt = [chr(self._mem.read_word(i)) for i in utt_range]
else:
utt = self.DEFAULT_UTT
# One way or another, we have a unicode translation
# table. Add all the characters in it to the input and
# output translation tables.
for zscii, unichar in zip(itertools.count(155), utt):
self._output_table[zscii] = unichar
self._input_table[unichar] = zscii
def ztou(self, index):
"""Translate the given ZSCII code into the corresponding
output Unicode character and return it, or raise an exception if
the requested index has no translation."""
try:
return self._output_table[index]
except KeyError:
# Handle undefined ZSCII characters
# 0-31 (except 0, 10): control characters, return empty string
# 128-154, 252-254: undefined, return placeholder
# 155-251: extended characters, should have Unicode table but don't
if index < 32:
return ""
# For undefined or unmapped characters, return a placeholder
log(f"Warning: undefined ZSCII character code {index}, using '?'")
return "?"
def utoz(self, char):
"""Translate the given Unicode code into the corresponding
input ZSCII character and return it, or raise an exception if
the requested character has no translation."""
try:
return self._input_table[char]
except KeyError:
raise IndexError("No such input character") from None
def get(self, zscii):
return "".join([self.ztou(c) for c in zscii])
class ZStringFactory:
def __init__(self, zmem):
self._mem = zmem
self.zstr = ZStringTranslator(zmem)
self.zchr = ZCharTranslator(zmem)
self.zscii = ZsciiTranslator(zmem)
def get(self, addr):
zstr = self.zstr.get(addr)
zchr = self.zchr.get(zstr)
return self.zscii.get(zchr)