X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;ds=sidebyside;f=interpreters%2Ffrotz%2Ftext.c;fp=interpreters%2Ffrotz%2Ftext.c;h=8145cfea29d6307c3969d0eb246c741e540eeaff;hb=b1f1dc50b22b30c4d7176e1ff7c0805e80fe0724;hp=0000000000000000000000000000000000000000;hpb=50176172d18ae72d019181725c5629d45d21c548;p=projects%2Fchimara%2Fchimara.git diff --git a/interpreters/frotz/text.c b/interpreters/frotz/text.c new file mode 100644 index 0000000..8145cfe --- /dev/null +++ b/interpreters/frotz/text.c @@ -0,0 +1,1109 @@ +/* text.c - Text manipulation functions + * Copyright (c) 1995-1997 Stefan Jokisch + * + * This file is part of Frotz. + * + * Frotz is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Frotz is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "frotz.h" + +enum string_type { + LOW_STRING, ABBREVIATION, HIGH_STRING, EMBEDDED_STRING, VOCABULARY +}; + +extern zword object_name (zword); + +static zchar decoded[10]; +static zword encoded[3]; + +/* + * According to Matteo De Luigi , + * 0xab and 0xbb were in each other's proper positions. + * Sat Apr 21, 2001 + */ +static zchar zscii_to_latin1[] = { + 0xe4, 0xf6, 0xfc, 0xc4, 0xd6, 0xdc, 0xdf, 0xbb, + 0xab, 0xeb, 0xef, 0xff, 0xcb, 0xcf, 0xe1, 0xe9, + 0xed, 0xf3, 0xfa, 0xfd, 0xc1, 0xc9, 0xcd, 0xd3, + 0xda, 0xdd, 0xe0, 0xe8, 0xec, 0xf2, 0xf9, 0xc0, + 0xc8, 0xcc, 0xd2, 0xd9, 0xe2, 0xea, 0xee, 0xf4, + 0xfb, 0xc2, 0xca, 0xce, 0xd4, 0xdb, 0xe5, 0xc5, + 0xf8, 0xd8, 0xe3, 0xf1, 0xf5, 0xc3, 0xd1, 0xd5, + 0xe6, 0xc6, 0xe7, 0xc7, 0xfe, 0xf0, 0xde, 0xd0, + 0xa3, 0x00, 0x00, 0xa1, 0xbf +}; + +/* + * translate_from_zscii + * + * Map a ZSCII character onto the ISO Latin-1 alphabet. + * + */ + +zchar translate_from_zscii (zbyte c) +{ + + if (c == 0xfc) + return ZC_MENU_CLICK; + if (c == 0xfd) + return ZC_DOUBLE_CLICK; + if (c == 0xfe) + return ZC_SINGLE_CLICK; + + if (c >= 0x9b && story_id != BEYOND_ZORK) { + + if (hx_unicode_table != 0) { /* game has its own Unicode table */ + + zbyte N; + + LOW_BYTE (hx_unicode_table, N) + + if (c - 0x9b < N) { + + zword addr = hx_unicode_table + 1 + 2 * (c - 0x9b); + zword unicode; + + LOW_WORD (addr, unicode) + + return (unicode < 0x100) ? (zchar) unicode : '?'; + + } else return '?'; + + } else /* game uses standard set */ + + if (c <= 0xdf) { + + if (c == 0xdc || c == 0xdd) /* Oe and oe ligatures */ + return '?'; /* are not ISO-Latin 1 */ + + return zscii_to_latin1[c - 0x9b]; + + } else return '?'; + } + + return c; + +}/* translate_from_zscii */ + +/* + * translate_to_zscii + * + * Map an ISO Latin-1 character onto the ZSCII alphabet. + * + */ + +zbyte translate_to_zscii (zchar c) +{ + int i; + + if (c == ZC_SINGLE_CLICK) + return 0xfe; + if (c == ZC_DOUBLE_CLICK) + return 0xfd; + if (c == ZC_MENU_CLICK) + return 0xfc; + + if (c >= ZC_LATIN1_MIN) { + + if (hx_unicode_table != 0) { /* game has its own Unicode table */ + + zbyte N; + int i; + + LOW_BYTE (hx_unicode_table, N) + + for (i = 0x9b; i < 0x9b + N; i++) { + + zword addr = hx_unicode_table + 1 + 2 * (i - 0x9b); + zword unicode; + + LOW_WORD (addr, unicode) + + if (c == unicode) + return (zbyte) i; + + } + + return '?'; + + } else { /* game uses standard set */ + + for (i = 0x9b; i <= 0xdf; i++) + if (c == zscii_to_latin1[i - 0x9b]) + return (zbyte) i; + + return '?'; + + } + } + + if (c == 0) /* Safety thing from David Kinder */ + c = '?'; /* regarding his Unicode patches */ + /* Sept 15, 2002 */ + + return c; + +}/* translate_to_zscii */ + +/* + * alphabet + * + * Return a character from one of the three character sets. + * + */ + +static zchar alphabet (int set, int index) +{ + + if (h_alphabet != 0) { /* game uses its own alphabet */ + + zbyte c; + + zword addr = h_alphabet + 26 * set + index; + LOW_BYTE (addr, c) + + return translate_from_zscii (c); + + } else /* game uses default alphabet */ + + if (set == 0) + return 'a' + index; + else if (set == 1) + return 'A' + index; + else if (h_version == V1) + return " 0123456789.,!?_#'\"/\\<-:()"[index]; + else + return " ^0123456789.,!?_#'\"/\\-:()"[index]; + +}/* alphabet */ + +/* + * load_string + * + * Copy a ZSCII string from the memory to the global "decoded" string. + * + */ + +static void load_string (zword addr, zword length) +{ + int resolution = (h_version <= V3) ? 2 : 3; + int i = 0; + + while (i < 3 * resolution) + + if (i < length) { + + zbyte c; + + LOW_BYTE (addr, c) + addr++; + + decoded[i++] = translate_from_zscii (c); + + } else decoded[i++] = 0; + +}/* load_string */ + +/* + * encode_text + * + * Encode the Unicode text in the global "decoded" string then write + * the result to the global "encoded" array. (This is used to look up + * words in the dictionary.) Up to V3 the vocabulary resolution is + * two, since V4 it is three words. Because each word contains three + * Z-characters, that makes six or nine Z-characters respectively. + * Longer words are chopped to the proper size, shorter words are are + * padded out with 5's. For word completion we pad with 0s and 31s, + * the minimum and maximum Z-characters. + * + */ + +static void encode_text (int padding) +{ + static zchar again[] = { 'a', 'g', 'a', 'i', 'n', 0 }; + static zchar examine[] = { 'e', 'x', 'a', 'm', 'i', 'n', 'e', 0 }; + static zchar wait[] = { 'w', 'a', 'i', 't', 0 }; + + zbyte zchars[12]; + const zchar *ptr = decoded; + zchar c; + int resolution = (h_version <= V3) ? 2 : 3; + int i = 0; + + /* Expand abbreviations that some old Infocom games lack */ + + if (f_setup.expand_abbreviations) + + if (padding == 0x05 && decoded[1] == 0) + + switch (decoded[0]) { + case 'g': ptr = again; break; + case 'x': ptr = examine; break; + case 'z': ptr = wait; break; + } + + /* Translate string to a sequence of Z-characters */ + + while (i < 3 * resolution) + + if ((c = *ptr++) != 0) { + + int index, set; + zbyte c2; + + /* Search character in the alphabet */ + + for (set = 0; set < 3; set++) + for (index = 0; index < 26; index++) + if (c == alphabet (set, index)) + goto letter_found; + + /* Character not found, store its ZSCII value */ + + c2 = translate_to_zscii (c); + + zchars[i++] = 5; + zchars[i++] = 6; + zchars[i++] = c2 >> 5; + zchars[i++] = c2 & 0x1f; + + continue; + + letter_found: + + /* Character found, store its index */ + + if (set != 0) + zchars[i++] = ((h_version <= V2) ? 1 : 3) + set; + + zchars[i++] = index + 6; + + } else zchars[i++] = padding; + + /* Three Z-characters make a 16bit word */ + + for (i = 0; i < resolution; i++) + + encoded[i] = + (zchars[3 * i + 0] << 10) | + (zchars[3 * i + 1] << 5) | + (zchars[3 * i + 2]); + + encoded[resolution - 1] |= 0x8000; + +}/* encode_text */ + +/* + * z_check_unicode, test if a unicode character can be read and printed. + * + * zargs[0] = Unicode + * + */ + +void z_check_unicode (void) +{ + zword c = zargs[0]; + + if (c >= 0x20 && c <= 0x7e) + store (3); + else if (c == 0xa0) + store (1); + else if (c >= 0xa1 && c <= 0xff) + store (3); + else + store (0); + +}/* z_check_unicode */ + +/* + * z_encode_text, encode a ZSCII string for use in a dictionary. + * + * zargs[0] = address of text buffer + * zargs[1] = length of ASCII string + * zargs[2] = offset of ASCII string within the text buffer + * zargs[3] = address to store encoded text in + * + * This is a V5+ opcode and therefore the dictionary resolution must be + * three 16bit words. + * + */ + +void z_encode_text (void) +{ + int i; + + load_string ((zword) (zargs[0] + zargs[2]), zargs[1]); + + encode_text (0x05); + + for (i = 0; i < 3; i++) + storew ((zword) (zargs[3] + 2 * i), encoded[i]); + +}/* z_encode_text */ + +/* + * decode_text + * + * Convert encoded text to Unicode. The encoded text consists of 16bit + * words. Every word holds 3 Z-characters (5 bits each) plus a spare + * bit to mark the last word. The Z-characters translate to ZSCII by + * looking at the current current character set. Some select another + * character set, others refer to abbreviations. + * + * There are several different string types: + * + * LOW_STRING - from the lower 64KB (byte address) + * ABBREVIATION - from the abbreviations table (word address) + * HIGH_STRING - from the end of the memory map (packed address) + * EMBEDDED_STRING - from the instruction stream (at PC) + * VOCABULARY - from the dictionary (byte address) + * + * The last type is only used for word completion. + * + */ + +#define outchar(c) if (st==VOCABULARY) *ptr++=c; else print_char(c) + +static void decode_text (enum string_type st, zword addr) +{ + zchar *ptr; + long byte_addr; + zchar c2; + zword code; + zbyte c, prev_c = 0; + int shift_state = 0; + int shift_lock = 0; + int status = 0; + + ptr = NULL; /* makes compilers shut up */ + byte_addr = 0; + + /* Calculate the byte address if necessary */ + + if (st == ABBREVIATION) + + byte_addr = (long) addr << 1; + + else if (st == HIGH_STRING) { + + if (h_version <= V3) + byte_addr = (long) addr << 1; + else if (h_version <= V5) + byte_addr = (long) addr << 2; + else if (h_version <= V7) + byte_addr = ((long) addr << 2) + ((long) h_strings_offset << 3); + else /* h_version == V8 */ + byte_addr = (long) addr << 3; + + if (byte_addr >= story_size) + runtime_error (ERR_ILL_PRINT_ADDR); + + } + + /* Loop until a 16bit word has the highest bit set */ + + if (st == VOCABULARY) + ptr = decoded; + + do { + + int i; + + /* Fetch the next 16bit word */ + + if (st == LOW_STRING || st == VOCABULARY) { + LOW_WORD (addr, code) + addr += 2; + } else if (st == HIGH_STRING || st == ABBREVIATION) { + HIGH_WORD (byte_addr, code) + byte_addr += 2; + } else + CODE_WORD (code) + + /* Read its three Z-characters */ + + for (i = 10; i >= 0; i -= 5) { + + zword abbr_addr; + zword ptr_addr; + + c = (code >> i) & 0x1f; + + switch (status) { + + case 0: /* normal operation */ + + if (shift_state == 2 && c == 6) + status = 2; + + else if (h_version == V1 && c == 1) + new_line (); + + else if (h_version >= V2 && shift_state == 2 && c == 7) + new_line (); + + else if (c >= 6) + outchar (alphabet (shift_state, c - 6)); + + else if (c == 0) + outchar (' '); + + else if (h_version >= V2 && c == 1) + status = 1; + + else if (h_version >= V3 && c <= 3) + status = 1; + + else { + + shift_state = (shift_lock + (c & 1) + 1) % 3; + + if (h_version <= V2 && c >= 4) + shift_lock = shift_state; + + break; + + } + + shift_state = shift_lock; + + break; + + case 1: /* abbreviation */ + + ptr_addr = h_abbreviations + 64 * (prev_c - 1) + 2 * c; + + LOW_WORD (ptr_addr, abbr_addr) + decode_text (ABBREVIATION, abbr_addr); + + status = 0; + break; + + case 2: /* ZSCII character - first part */ + + status = 3; + break; + + case 3: /* ZSCII character - second part */ + + c2 = translate_from_zscii ((prev_c << 5) | c); + outchar (c2); + + status = 0; + break; + + } + + prev_c = c; + + } + + } while (!(code & 0x8000)); + + if (st == VOCABULARY) + *ptr = 0; + +}/* decode_text */ + +#undef outchar + +/* + * z_new_line, print a new line. + * + * no zargs used + * + */ + +void z_new_line (void) +{ + + new_line (); + +}/* z_new_line */ + +/* + * z_print, print a string embedded in the instruction stream. + * + * no zargs used + * + */ + +void z_print (void) +{ + + decode_text (EMBEDDED_STRING, 0); + +}/* z_print */ + +/* + * z_print_addr, print a string from the lower 64KB. + * + * zargs[0] = address of string to print + * + */ + +void z_print_addr (void) +{ + + decode_text (LOW_STRING, zargs[0]); + +}/* z_print_addr */ + +/* + * z_print_char print a single ZSCII character. + * + * zargs[0] = ZSCII character to be printed + * + */ + +void z_print_char (void) +{ + + print_char (translate_from_zscii (zargs[0])); + +}/* z_print_char */ + +/* + * z_print_form, print a formatted table. + * + * zargs[0] = address of formatted table to be printed + * + */ + +void z_print_form (void) +{ + zword count; + zword addr = zargs[0]; + + bool first = TRUE; + + for (;;) { + + LOW_WORD (addr, count) + addr += 2; + + if (count == 0) + break; + + if (!first) + new_line (); + + while (count--) { + + zbyte c; + + LOW_BYTE (addr, c) + addr++; + + print_char (translate_from_zscii (c)); + + } + + first = FALSE; + + } + +}/* z_print_form */ + +/* + * print_num + * + * Print a signed 16bit number. + * + */ + +void print_num (zword value) +{ + int i; + + /* Print sign */ + + if ((short) value < 0) { + print_char ('-'); + value = - (short) value; + } + + /* Print absolute value */ + + for (i = 10000; i != 0; i /= 10) + if (value >= i || i == 1) + print_char ('0' + (value / i) % 10); + +}/* print_num */ + +/* + * z_print_num, print a signed number. + * + * zargs[0] = number to print + * + */ + +void z_print_num (void) +{ + + print_num (zargs[0]); + +}/* z_print_num */ + +/* + * print_object + * + * Print an object description. + * + */ + +void print_object (zword object) +{ + zword addr = object_name (object); + zword code = 0x94a5; + zbyte length; + + LOW_BYTE (addr, length) + addr++; + + if (length != 0) + LOW_WORD (addr, code) + + if (code == 0x94a5) { /* encoded text 0x94a5 == empty string */ + + print_string ("object#"); /* supply a generic name */ + print_num (object); /* for anonymous objects */ + + } else decode_text (LOW_STRING, addr); + +}/* print_object */ + +/* + * z_print_obj, print an object description. + * + * zargs[0] = number of object to be printed + * + */ + +void z_print_obj (void) +{ + + print_object (zargs[0]); + +}/* z_print_obj */ + +/* + * z_print_paddr, print the string at the given packed address. + * + * zargs[0] = packed address of string to be printed + * + */ + +void z_print_paddr (void) +{ + + decode_text (HIGH_STRING, zargs[0]); + +}/* z_print_paddr */ + +/* + * z_print_ret, print the string at PC, print newline then return true. + * + * no zargs used + * + */ + +void z_print_ret (void) +{ + + decode_text (EMBEDDED_STRING, 0); + new_line (); + ret (1); + +}/* z_print_ret */ + +/* + * print_string + * + * Print a string of ASCII characters. + * + */ + +void print_string (const char *s) +{ + char c; + + while ((c = *s++) != 0) + + if (c == '\n') + new_line (); + else + print_char (c); + +}/* print_string */ + +/* + * z_print_unicode + * + * zargs[0] = Unicode + * + */ + +void z_print_unicode (void) +{ + + print_char ((zargs[0] <= 0xff) ? zargs[0] : '?'); + +}/* z_print_unicode */ + +/* + * lookup_text + * + * Scan a dictionary searching for the given word. The first argument + * can be + * + * 0x00 - find the first word which is >= the given one + * 0x05 - find the word which exactly matches the given one + * 0x1f - find the last word which is <= the given one + * + * The return value is 0 if the search fails. + * + */ + +static zword lookup_text (int padding, zword dct) +{ + zword entry_addr; + zword entry_count; + zword entry; + zword addr; + zbyte entry_len; + zbyte sep_count; + int resolution = (h_version <= V3) ? 2 : 3; + int entry_number; + int lower, upper; + int i; + bool sorted; + + encode_text (padding); + + LOW_BYTE (dct, sep_count) /* skip word separators */ + dct += 1 + sep_count; + LOW_BYTE (dct, entry_len) /* get length of entries */ + dct += 1; + LOW_WORD (dct, entry_count) /* get number of entries */ + dct += 2; + + if ((short) entry_count < 0) { /* bad luck, entries aren't sorted */ + + entry_count = - (short) entry_count; + sorted = FALSE; + + } else sorted = TRUE; /* entries are sorted */ + + lower = 0; + upper = entry_count - 1; + + while (lower <= upper) { + + if (sorted) /* binary search */ + entry_number = (lower + upper) / 2; + else /* linear search */ + entry_number = lower; + + entry_addr = dct + entry_number * entry_len; + + /* Compare word to dictionary entry */ + + addr = entry_addr; + + for (i = 0; i < resolution; i++) { + LOW_WORD (addr, entry) + if (encoded[i] != entry) + goto continuing; + addr += 2; + } + + return entry_addr; /* exact match found, return now */ + + continuing: + + if (sorted) /* binary search */ + + if (encoded[i] > entry) + lower = entry_number + 1; + else + upper = entry_number - 1; + + else lower++; /* linear search */ + + } + + /* No exact match has been found */ + + if (padding == 0x05) + return 0; + + entry_number = (padding == 0x00) ? lower : upper; + + if (entry_number == -1 || entry_number == entry_count) + return 0; + + return dct + entry_number * entry_len; + +}/* lookup_text */ + +/* + * tokenise_text + * + * Translate a single word to a token and append it to the token + * buffer. Every token consists of the address of the dictionary + * entry, the length of the word and the offset of the word from + * the start of the text buffer. Unknown words cause empty slots + * if the flag is set (such that the text can be scanned several + * times with different dictionaries); otherwise they are zero. + * + */ + +static void tokenise_text (zword text, zword length, zword from, zword parse, zword dct, bool flag) +{ + zword addr; + zbyte token_max, token_count; + + LOW_BYTE (parse, token_max) + parse++; + LOW_BYTE (parse, token_count) + + if (token_count < token_max) { /* sufficient space left for token? */ + + storeb (parse++, token_count + 1); + + load_string ((zword) (text + from), length); + + addr = lookup_text (0x05, dct); + + if (addr != 0 || !flag) { + + parse += 4 * token_count; + + storew ((zword) (parse + 0), addr); + storeb ((zword) (parse + 2), length); + storeb ((zword) (parse + 3), from); + + } + + } + +}/* tokenise_text */ + +/* + * tokenise_line + * + * Split an input line into words and translate the words to tokens. + * + */ + +void tokenise_line (zword text, zword token, zword dct, bool flag) +{ + zword addr1; + zword addr2; + zbyte length; + zbyte c; + + length = 0; /* makes compilers shut up */ + + /* Use standard dictionary if the given dictionary is zero */ + + if (dct == 0) + dct = h_dictionary; + + /* Remove all tokens before inserting new ones */ + + storeb ((zword) (token + 1), 0); + + /* Move the first pointer across the text buffer searching for the + beginning of a word. If this succeeds, store the position in a + second pointer. Move the first pointer searching for the end of + the word. When it is found, "tokenise" the word. Continue until + the end of the buffer is reached. */ + + addr1 = text; + addr2 = 0; + + if (h_version >= V5) { + addr1++; + LOW_BYTE (addr1, length) + } + + do { + + zword sep_addr; + zbyte sep_count; + zbyte separator; + + /* Fetch next ZSCII character */ + + addr1++; + + if (h_version >= V5 && addr1 == text + 2 + length) + c = 0; + else + LOW_BYTE (addr1, c) + + /* Check for separator */ + + sep_addr = dct; + + LOW_BYTE (sep_addr, sep_count) + sep_addr++; + + do { + + LOW_BYTE (sep_addr, separator) + sep_addr++; + + } while (c != separator && --sep_count != 0); + + /* This could be the start or the end of a word */ + + if (sep_count == 0 && c != ' ' && c != 0) { + + if (addr2 == 0) + addr2 = addr1; + + } else if (addr2 != 0) { + + tokenise_text ( + text, + (zword) (addr1 - addr2), + (zword) (addr2 - text), + token, dct, flag ); + + addr2 = 0; + + } + + /* Translate separator (which is a word in its own right) */ + + if (sep_count != 0) + + tokenise_text ( + text, + (zword) (1), + (zword) (addr1 - text), + token, dct, flag ); + + } while (c != 0); + +}/* tokenise_line */ + +/* + * z_tokenise, make a lexical analysis of a ZSCII string. + * + * zargs[0] = address of string to analyze + * zargs[1] = address of token buffer + * zargs[2] = address of dictionary (optional) + * zargs[3] = set when unknown words cause empty slots (optional) + * + */ + +void z_tokenise (void) +{ + + /* Supply default arguments */ + + if (zargc < 3) + zargs[2] = 0; + if (zargc < 4) + zargs[3] = 0; + + /* Call tokenise_line to do the real work */ + + tokenise_line (zargs[0], zargs[1], zargs[2], zargs[3] != 0); + +}/* z_tokenise */ + +/* + * completion + * + * Scan the vocabulary to complete the last word on the input line + * (similar to "tcsh" under Unix). The return value is + * + * 2 ==> completion is impossible + * 1 ==> completion is ambiguous + * 0 ==> completion is successful + * + * The function also returns a string in its second argument. In case + * of 2, the string is empty; in case of 1, the string is the longest + * extension of the last word on the input line that is common to all + * possible completions (for instance, if the last word on the input + * is "fo" and its only possible completions are "follow" and "folly" + * then the string is "ll"); in case of 0, the string is an extension + * to the last word that results in the only possible completion. + * + */ + +int completion (const zchar *buffer, zchar *result) +{ + zword minaddr; + zword maxaddr; + zchar *ptr; + zchar c; + int len; + int i; + + *result = 0; + + /* Copy last word to "decoded" string */ + + len = 0; + + while ((c = *buffer++) != 0) + + if (c != ' ') { + + if (len < 9) + decoded[len++] = c; + + } else len = 0; + + decoded[len] = 0; + + /* Search the dictionary for first and last possible extensions */ + + minaddr = lookup_text (0x00, h_dictionary); + maxaddr = lookup_text (0x1f, h_dictionary); + + if (minaddr == 0 || maxaddr == 0 || minaddr > maxaddr) + return 2; + + /* Copy first extension to "result" string */ + + decode_text (VOCABULARY, minaddr); + + ptr = result; + + for (i = len; (c = decoded[i]) != 0; i++) + *ptr++ = c; + *ptr = 0; + + /* Merge second extension with "result" string */ + + decode_text (VOCABULARY, maxaddr); + + for (i = len, ptr = result; (c = decoded[i]) != 0; i++, ptr++) + if (*ptr != c) break; + *ptr = 0; + + /* Search was ambiguous or successful */ + + return (minaddr == maxaddr) ? 0 : 1; + +}/* completion */