# # A ZString-to-Unicode Universal Translator. # # For the license of this file, please consult the LICENSE file in the # root directory of this distribution. # import itertools from .zlogging import log class ZStringEndOfString(Exception): """No more data left in string.""" class ZStringIllegalAbbrevInString(Exception): """String abbreviation encountered within a string in a context where it is not allowed.""" class ZStringTranslator: def __init__(self, zmem): self._mem = zmem def get(self, addr): from .bitfield import BitField pos = (addr, BitField(self._mem.read_word(addr)), 0) s = [] try: while True: s.append(self._read_char(pos)) pos = self._next_pos(pos) except ZStringEndOfString: return s def _read_char(self, pos): offset = (2 - pos[2]) * 5 return pos[1][offset : offset + 5] def _is_final(self, pos): return pos[1][15] == 1 def _next_pos(self, pos): from .bitfield import BitField offset = pos[2] + 1 # Overflowing from current block? if offset == 3: # Was last block? if self._is_final(pos): # Kill processing. raise ZStringEndOfString # Get and return the next block. return (pos[0] + 2, BitField(self._mem.read_word(pos[0] + 2)), 0) # Just increment the intra-block counter. return (pos[0], pos[1], offset) class ZCharTranslator: # The default alphabet tables for ZChar translation. # As the codes 0-5 are special, alphabets start with code 0x6. DEFAULT_A0 = [ord(x) for x in "abcdefghijklmnopqrstuvwxyz"] DEFAULT_A1 = [ord(x) for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"] # A2 also has 0x6 as special char, so they start at 0x7. DEFAULT_A2 = [ord(x) for x in "\n0123456789.,!?_#'\"/\\-:()"] DEFAULT_A2_V5 = [ord(x) for x in "\n0123456789.,!?_#'\"/\\-:()"] ALPHA = (DEFAULT_A0, DEFAULT_A1, DEFAULT_A2) ALPHA_V5 = (DEFAULT_A0, DEFAULT_A1, DEFAULT_A2_V5) def __init__(self, zmem): self._mem = zmem # Initialize the alphabets if self._mem.version == 5: self._alphabet = self._load_custom_alphabet() or self.ALPHA_V5 else: self._alphabet = self.ALPHA # Initialize the special state handlers self._load_specials() # Initialize the abbreviations (if supported) self._load_abbrev_tables() def _load_custom_alphabet(self): """Check for the existence of a custom alphabet, and load it if it does exist. Return the custom alphabet if it was found, None otherwise.""" # The custom alphabet table address is at 0x34 in the memory. if self._mem[0x34] == 0: return None alph_addr = self._mem.read_word(0x34) alphabet = self._mem[alph_addr : alph_addr + 78] return [alphabet[0:26], alphabet[26:52], alphabet[52:78]] def _load_abbrev_tables(self): self._abbrevs = {} # If the ZM doesn't do abbrevs, just return an empty dict. if self._mem.version == 1: return # Build ourselves a ZStringTranslator for the abbrevs. xlator = ZStringTranslator(self._mem) def _load_subtable(num, base): for i, zoff in [(i, base + (num * 64) + (i * 2)) for i in range(0, 32)]: zaddr = self._mem.read_word(zoff) zstr = xlator.get(self._mem.word_address(zaddr)) zchr = self.get(zstr, allow_abbreviations=False) self._abbrevs[(num, i)] = zchr abbrev_base = self._mem.read_word(0x18) _load_subtable(0, abbrev_base) # Does this ZM support the extended abbrev tables? if self._mem.version >= 3: _load_subtable(1, abbrev_base) _load_subtable(2, abbrev_base) def _load_specials(self): """Load the special character code handlers for the current machine version. """ # The following three functions define the three possible # special character code handlers. def newline(state): """Append ZSCII 13 (newline) to the output.""" state["zscii"].append(13) def shift_alphabet(state, direction, lock): """Shift the current alphaber up or down. If lock is False, the alphabet will revert to the previous alphabet after outputting 1 character. Else, the alphabet will remain unchanged until the next shift. """ state["curr_alpha"] = (state["curr_alpha"] + direction) % 3 if lock: state["prev_alpha"] = state["curr_alpha"] def abbreviation(state, abbrev): """Insert the given abbreviation from the given table into the output stream. This character was an abbreviation table number. The next character will be the offset within that table of the abbreviation. Set up a state handler to intercept the next character and output the right abbreviation.""" def write_abbreviation(state, c, subtable): state["zscii"] += self._abbrevs[(subtable, c)] del state["state_handler"] # If we're parsing an abbreviation, there should be no # nested abbreviations. So this is just a sanity check for # people feeding us bad stories. if not state["allow_abbreviations"]: raise ZStringIllegalAbbrevInString state["state_handler"] = lambda s, c: write_abbreviation(s, c, abbrev) # Register the specials handlers depending on machine version. if self._mem.version == 1: self._specials = { 1: lambda s: newline(s), 2: lambda s: shift_alphabet(s, +1, False), 3: lambda s: shift_alphabet(s, -1, False), 4: lambda s: shift_alphabet(s, +1, True), 5: lambda s: shift_alphabet(s, -1, True), } elif self._mem.version == 2: self._specials = { 1: lambda s: abbreviation(s, 0), 2: lambda s: shift_alphabet(s, +1, False), 3: lambda s: shift_alphabet(s, -1, False), 4: lambda s: shift_alphabet(s, +1, True), 5: lambda s: shift_alphabet(s, -1, True), } else: # ZM v3-5 self._specials = { 1: lambda s: abbreviation(s, 0), 2: lambda s: abbreviation(s, 1), 3: lambda s: abbreviation(s, 2), 4: lambda s: shift_alphabet(s, +1, False), 5: lambda s: shift_alphabet(s, -1, False), } def _special_zscii(self, state, char): if "zscii_char" not in list(state.keys()): state["zscii_char"] = char else: zchar = (state["zscii_char"] << 5) + char state["zscii"].append(zchar) del state["zscii_char"] del state["state_handler"] def get(self, zstr, allow_abbreviations=True): state = { "curr_alpha": 0, "prev_alpha": 0, "zscii": [], "allow_abbreviations": allow_abbreviations, } for c in zstr: if "state_handler" in list(state.keys()): # If a special handler has registered itself, then hand # processing over to it. state["state_handler"](state, c) # type: ignore[call-non-callable] elif c in list(self._specials.keys()): # Hand off per-ZM version special char handling. self._specials[c](state) elif state["curr_alpha"] == 2 and c == 6: # Handle the strange A2/6 character state["state_handler"] = self._special_zscii else: # Do the usual Thing: append a zscii code to the # decoded sequence and revert to the "previous" # alphabet (or not, if it hasn't recently changed or # was locked) if c == 0: # Append a space. z = 32 elif state["curr_alpha"] == 2: # The symbol alphabet table only has 25 chars # because of the A2/6 special char, so we need to # adjust differently. z = self._alphabet[state["curr_alpha"]][c - 7] else: z = self._alphabet[state["curr_alpha"]][c - 6] state["zscii"].append(z) state["curr_alpha"] = state["prev_alpha"] return state["zscii"] class ZsciiTranslator: # The default Unicode Translation Table that maps to ZSCII codes # 155-251. The codes are unicode codepoints for a host of strange # characters. DEFAULT_UTT = [ chr(x) for x in ( 0xE4, 0xF6, 0xFC, 0xC4, 0xD6, 0xDC, 0xDF, 0xBB, 0xAB, 0xEB, 0xEF, 0xFF, 0xCB, 0xCF, 0xE1, 0xE9, 0xED, 0xF3, 0xFA, 0xFD, 0xC1, 0xC9, 0xCD, 0xD3, 0xDA, 0xDD, 0xE0, 0xE8, 0xEC, 0xF2, 0xF9, 0xC0, 0xC8, 0xCC, 0xD2, 0xD9, 0xE2, 0xEA, 0xEE, 0xF4, 0xFB, 0xC2, 0xCA, 0xCE, 0xD4, 0xDB, 0xE5, 0xC5, 0xF8, 0xD8, 0xE3, 0xF1, 0xF5, 0xC3, 0xD1, 0xD5, 0xE6, 0xC6, 0xE7, 0xC7, 0xFE, 0xF0, 0xDE, 0xD0, 0xA3, 0x153, 0x152, 0xA1, 0xBF, ) ] # And here is the offset at which the Unicode Translation Table # starts. UTT_OFFSET = 155 # This subclass just lists all the "special" character codes that # are capturable from an input stream. They're just there so that # the user of the virtual machine can give them a nice name. class Input: DELETE = 8 ESCAPE = 27 # The cursor pad CUR_UP = 129 CUR_DOWN = 130 CUR_LEFT = 131 CUR_RIGHT = 132 # The Function keys F1 = 133 F2 = 134 F3 = 135 F4 = 136 F5 = 137 F6 = 138 F7 = 139 F8 = 140 F9 = 141 F10 = 142 F11 = 143 F12 = 144 # The numpad (keypad) keys. KP_0 = 145 KP_1 = 146 KP_2 = 147 KP_3 = 148 KP_4 = 149 KP_5 = 150 KP_6 = 151 KP_7 = 152 KP_8 = 153 KP_9 = 154 def __init__(self, zmem): self._mem = zmem self._output_table = {0: "", 10: "\n"} self._input_table = {"\n": 10} self._load_unicode_table() # Populate the input and output tables with the ASCII and UTT # characters. for code, char in [(x, chr(x)) for x in range(32, 127)]: self._output_table[code] = char self._input_table[char] = code # Populate the input table with the extra "special" input # codes. The cool trick we use here, is that all these values # are in fact numbers, so their key will be available in both # dicts, and ztoa will provide the correct code if you pass it # a special symbol instead of a character to translate! # # Oh and we also pull the items from the subclass into this # instance, so as to make reference to these special codes # easier. for name, code in [ (c, v) for c, v in list(self.Input.__dict__.items()) if not c.startswith("__") ]: self._input_table[code] = code setattr(self, name, code) # The only special support required for ZSCII: ZM v5 defines # an extra character code to represent a mouse click. If we're # booting a v5 ZM, define this. if self._mem.version == 5: self.MOUSE_CLICK = 254 self._input_table[254] = 254 def _load_unicode_table(self): if self._mem.version == 5: # Read the header extension table address ext_table_addr = self._mem.read_word(0x36) # If: # - The extension header's address is non-null # - There are at least 3 words in the extension header # (the unicode translation table is the third word) # - The 3rd word (unicode translation table address) is # non-null # # Then there is a unicode translation table other than the # default that needs loading. if ( ext_table_addr != 0 and self._mem.read_word(ext_table_addr) >= 3 and self._mem.read_word(ext_table_addr + 6) != 0 ): # Get the unicode translation table address utt_addr = self._mem.read_word(ext_table_addr + 6) # The first byte is the number of unicode characters # in the table. utt_len = self._mem[utt_addr] # Build the range of addresses to load from, and build # the unicode translation table as a list of unicode # chars. utt_range = range(utt_addr + 1, utt_addr + 1 + (utt_len * 2), 2) utt = [chr(self._mem.read_word(i)) for i in utt_range] else: utt = self.DEFAULT_UTT # One way or another, we have a unicode translation # table. Add all the characters in it to the input and # output translation tables. for zscii, unichar in zip(itertools.count(155), utt): self._output_table[zscii] = unichar self._input_table[unichar] = zscii def ztou(self, index): """Translate the given ZSCII code into the corresponding output Unicode character and return it, or raise an exception if the requested index has no translation.""" try: return self._output_table[index] except KeyError: # Handle undefined ZSCII characters # 0-31 (except 0, 10): control characters, return empty string # 128-154, 252-254: undefined, return placeholder # 155-251: extended characters, should have Unicode table but don't if index < 32: return "" # For undefined or unmapped characters, return a placeholder log(f"Warning: undefined ZSCII character code {index}, using '?'") return "?" def utoz(self, char): """Translate the given Unicode code into the corresponding input ZSCII character and return it, or raise an exception if the requested character has no translation.""" try: return self._input_table[char] except KeyError: raise IndexError("No such input character") from None def get(self, zscii): return "".join([self.ztou(c) for c in zscii]) class ZStringFactory: def __init__(self, zmem): self._mem = zmem self.zstr = ZStringTranslator(zmem) self.zchr = ZCharTranslator(zmem) self.zscii = ZsciiTranslator(zmem) def get(self, addr): zstr = self.zstr.get(addr) zchr = self.zchr.get(zstr) return self.zscii.get(zchr)