1 /* text.c - Text manipulation functions
2 * Copyright (c) 1995-1997 Stefan Jokisch
4 * This file is part of Frotz.
6 * Frotz is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Frotz is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24 LOW_STRING, ABBREVIATION, HIGH_STRING, EMBEDDED_STRING, VOCABULARY
27 extern zword object_name (zword);
29 static zchar decoded[10];
30 static zword encoded[3];
33 * According to Matteo De Luigi <matteo.de.luigi@libero.it>,
34 * 0xab and 0xbb were in each other's proper positions.
37 static zchar zscii_to_latin1[] = {
38 0xe4, 0xf6, 0xfc, 0xc4, 0xd6, 0xdc, 0xdf, 0xbb,
39 0xab, 0xeb, 0xef, 0xff, 0xcb, 0xcf, 0xe1, 0xe9,
40 0xed, 0xf3, 0xfa, 0xfd, 0xc1, 0xc9, 0xcd, 0xd3,
41 0xda, 0xdd, 0xe0, 0xe8, 0xec, 0xf2, 0xf9, 0xc0,
42 0xc8, 0xcc, 0xd2, 0xd9, 0xe2, 0xea, 0xee, 0xf4,
43 0xfb, 0xc2, 0xca, 0xce, 0xd4, 0xdb, 0xe5, 0xc5,
44 0xf8, 0xd8, 0xe3, 0xf1, 0xf5, 0xc3, 0xd1, 0xd5,
45 0xe6, 0xc6, 0xe7, 0xc7, 0xfe, 0xf0, 0xde, 0xd0,
46 0xa3, 0x00, 0x00, 0xa1, 0xbf
50 * translate_from_zscii
52 * Map a ZSCII character onto the ISO Latin-1 alphabet.
56 zchar translate_from_zscii (zbyte c)
62 return ZC_DOUBLE_CLICK;
64 return ZC_SINGLE_CLICK;
66 if (c >= 0x9b && story_id != BEYOND_ZORK) {
68 if (hx_unicode_table != 0) { /* game has its own Unicode table */
72 LOW_BYTE (hx_unicode_table, N)
76 zword addr = hx_unicode_table + 1 + 2 * (c - 0x9b);
79 LOW_WORD (addr, unicode)
81 return (unicode < 0x100) ? (zchar) unicode : '?';
85 } else /* game uses standard set */
89 if (c == 0xdc || c == 0xdd) /* Oe and oe ligatures */
90 return '?'; /* are not ISO-Latin 1 */
92 return zscii_to_latin1[c - 0x9b];
99 }/* translate_from_zscii */
104 * Map an ISO Latin-1 character onto the ZSCII alphabet.
108 zbyte translate_to_zscii (zchar c)
112 if (c == ZC_SINGLE_CLICK)
114 if (c == ZC_DOUBLE_CLICK)
116 if (c == ZC_MENU_CLICK)
119 if (c >= ZC_LATIN1_MIN) {
121 if (hx_unicode_table != 0) { /* game has its own Unicode table */
126 LOW_BYTE (hx_unicode_table, N)
128 for (i = 0x9b; i < 0x9b + N; i++) {
130 zword addr = hx_unicode_table + 1 + 2 * (i - 0x9b);
133 LOW_WORD (addr, unicode)
142 } else { /* game uses standard set */
144 for (i = 0x9b; i <= 0xdf; i++)
145 if (c == zscii_to_latin1[i - 0x9b])
153 if (c == 0) /* Safety thing from David Kinder */
154 c = '?'; /* regarding his Unicode patches */
159 }/* translate_to_zscii */
164 * Return a character from one of the three character sets.
168 static zchar alphabet (int set, int index)
171 if (h_alphabet != 0) { /* game uses its own alphabet */
175 zword addr = h_alphabet + 26 * set + index;
178 return translate_from_zscii (c);
180 } else /* game uses default alphabet */
186 else if (h_version == V1)
187 return " 0123456789.,!?_#'\"/\\<-:()"[index];
189 return " ^0123456789.,!?_#'\"/\\-:()"[index];
196 * Copy a ZSCII string from the memory to the global "decoded" string.
200 static void load_string (zword addr, zword length)
202 int resolution = (h_version <= V3) ? 2 : 3;
205 while (i < 3 * resolution)
214 decoded[i++] = translate_from_zscii (c);
216 } else decoded[i++] = 0;
223 * Encode the Unicode text in the global "decoded" string then write
224 * the result to the global "encoded" array. (This is used to look up
225 * words in the dictionary.) Up to V3 the vocabulary resolution is
226 * two, since V4 it is three words. Because each word contains three
227 * Z-characters, that makes six or nine Z-characters respectively.
228 * Longer words are chopped to the proper size, shorter words are are
229 * padded out with 5's. For word completion we pad with 0s and 31s,
230 * the minimum and maximum Z-characters.
234 static void encode_text (int padding)
236 static zchar again[] = { 'a', 'g', 'a', 'i', 'n', 0 };
237 static zchar examine[] = { 'e', 'x', 'a', 'm', 'i', 'n', 'e', 0 };
238 static zchar wait[] = { 'w', 'a', 'i', 't', 0 };
241 const zchar *ptr = decoded;
243 int resolution = (h_version <= V3) ? 2 : 3;
246 /* Expand abbreviations that some old Infocom games lack */
248 if (f_setup.expand_abbreviations)
250 if (padding == 0x05 && decoded[1] == 0)
252 switch (decoded[0]) {
253 case 'g': ptr = again; break;
254 case 'x': ptr = examine; break;
255 case 'z': ptr = wait; break;
258 /* Translate string to a sequence of Z-characters */
260 while (i < 3 * resolution)
262 if ((c = *ptr++) != 0) {
267 /* Search character in the alphabet */
269 for (set = 0; set < 3; set++)
270 for (index = 0; index < 26; index++)
271 if (c == alphabet (set, index))
274 /* Character not found, store its ZSCII value */
276 c2 = translate_to_zscii (c);
280 zchars[i++] = c2 >> 5;
281 zchars[i++] = c2 & 0x1f;
287 /* Character found, store its index */
290 zchars[i++] = ((h_version <= V2) ? 1 : 3) + set;
292 zchars[i++] = index + 6;
294 } else zchars[i++] = padding;
296 /* Three Z-characters make a 16bit word */
298 for (i = 0; i < resolution; i++)
301 (zchars[3 * i + 0] << 10) |
302 (zchars[3 * i + 1] << 5) |
305 encoded[resolution - 1] |= 0x8000;
310 * z_check_unicode, test if a unicode character can be read and printed.
316 void z_check_unicode (void)
320 if (c >= 0x20 && c <= 0x7e)
324 else if (c >= 0xa1 && c <= 0xff)
329 }/* z_check_unicode */
332 * z_encode_text, encode a ZSCII string for use in a dictionary.
334 * zargs[0] = address of text buffer
335 * zargs[1] = length of ASCII string
336 * zargs[2] = offset of ASCII string within the text buffer
337 * zargs[3] = address to store encoded text in
339 * This is a V5+ opcode and therefore the dictionary resolution must be
344 void z_encode_text (void)
348 load_string ((zword) (zargs[0] + zargs[2]), zargs[1]);
352 for (i = 0; i < 3; i++)
353 storew ((zword) (zargs[3] + 2 * i), encoded[i]);
360 * Convert encoded text to Unicode. The encoded text consists of 16bit
361 * words. Every word holds 3 Z-characters (5 bits each) plus a spare
362 * bit to mark the last word. The Z-characters translate to ZSCII by
363 * looking at the current current character set. Some select another
364 * character set, others refer to abbreviations.
366 * There are several different string types:
368 * LOW_STRING - from the lower 64KB (byte address)
369 * ABBREVIATION - from the abbreviations table (word address)
370 * HIGH_STRING - from the end of the memory map (packed address)
371 * EMBEDDED_STRING - from the instruction stream (at PC)
372 * VOCABULARY - from the dictionary (byte address)
374 * The last type is only used for word completion.
378 #define outchar(c) if (st==VOCABULARY) *ptr++=c; else print_char(c)
380 static void decode_text (enum string_type st, zword addr)
391 ptr = NULL; /* makes compilers shut up */
394 /* Calculate the byte address if necessary */
396 if (st == ABBREVIATION)
398 byte_addr = (long) addr << 1;
400 else if (st == HIGH_STRING) {
403 byte_addr = (long) addr << 1;
404 else if (h_version <= V5)
405 byte_addr = (long) addr << 2;
406 else if (h_version <= V7)
407 byte_addr = ((long) addr << 2) + ((long) h_strings_offset << 3);
408 else /* h_version == V8 */
409 byte_addr = (long) addr << 3;
411 if (byte_addr >= story_size)
412 runtime_error (ERR_ILL_PRINT_ADDR);
416 /* Loop until a 16bit word has the highest bit set */
418 if (st == VOCABULARY)
425 /* Fetch the next 16bit word */
427 if (st == LOW_STRING || st == VOCABULARY) {
428 LOW_WORD (addr, code)
430 } else if (st == HIGH_STRING || st == ABBREVIATION) {
431 HIGH_WORD (byte_addr, code)
436 /* Read its three Z-characters */
438 for (i = 10; i >= 0; i -= 5) {
443 c = (code >> i) & 0x1f;
447 case 0: /* normal operation */
449 if (shift_state == 2 && c == 6)
452 else if (h_version == V1 && c == 1)
455 else if (h_version >= V2 && shift_state == 2 && c == 7)
459 outchar (alphabet (shift_state, c - 6));
464 else if (h_version >= V2 && c == 1)
467 else if (h_version >= V3 && c <= 3)
472 shift_state = (shift_lock + (c & 1) + 1) % 3;
474 if (h_version <= V2 && c >= 4)
475 shift_lock = shift_state;
481 shift_state = shift_lock;
485 case 1: /* abbreviation */
487 ptr_addr = h_abbreviations + 64 * (prev_c - 1) + 2 * c;
489 LOW_WORD (ptr_addr, abbr_addr)
490 decode_text (ABBREVIATION, abbr_addr);
495 case 2: /* ZSCII character - first part */
500 case 3: /* ZSCII character - second part */
502 c2 = translate_from_zscii ((prev_c << 5) | c);
514 } while (!(code & 0x8000));
516 if (st == VOCABULARY)
524 * z_new_line, print a new line.
530 void z_new_line (void)
538 * z_print, print a string embedded in the instruction stream.
547 decode_text (EMBEDDED_STRING, 0);
552 * z_print_addr, print a string from the lower 64KB.
554 * zargs[0] = address of string to print
558 void z_print_addr (void)
561 decode_text (LOW_STRING, zargs[0]);
566 * z_print_char print a single ZSCII character.
568 * zargs[0] = ZSCII character to be printed
572 void z_print_char (void)
575 print_char (translate_from_zscii (zargs[0]));
580 * z_print_form, print a formatted table.
582 * zargs[0] = address of formatted table to be printed
586 void z_print_form (void)
589 zword addr = zargs[0];
595 LOW_WORD (addr, count)
611 print_char (translate_from_zscii (c));
624 * Print a signed 16bit number.
628 void print_num (zword value)
634 if ((short) value < 0) {
636 value = - (short) value;
639 /* Print absolute value */
641 for (i = 10000; i != 0; i /= 10)
642 if (value >= i || i == 1)
643 print_char ('0' + (value / i) % 10);
648 * z_print_num, print a signed number.
650 * zargs[0] = number to print
654 void z_print_num (void)
657 print_num (zargs[0]);
664 * Print an object description.
668 void print_object (zword object)
670 zword addr = object_name (object);
674 LOW_BYTE (addr, length)
678 LOW_WORD (addr, code)
680 if (code == 0x94a5) { /* encoded text 0x94a5 == empty string */
682 print_string ("object#"); /* supply a generic name */
683 print_num (object); /* for anonymous objects */
685 } else decode_text (LOW_STRING, addr);
690 * z_print_obj, print an object description.
692 * zargs[0] = number of object to be printed
696 void z_print_obj (void)
699 print_object (zargs[0]);
704 * z_print_paddr, print the string at the given packed address.
706 * zargs[0] = packed address of string to be printed
710 void z_print_paddr (void)
713 decode_text (HIGH_STRING, zargs[0]);
718 * z_print_ret, print the string at PC, print newline then return true.
724 void z_print_ret (void)
727 decode_text (EMBEDDED_STRING, 0);
736 * Print a string of ASCII characters.
740 void print_string (const char *s)
744 while ((c = *s++) != 0)
760 void z_print_unicode (void)
763 print_char ((zargs[0] <= 0xff) ? zargs[0] : '?');
765 }/* z_print_unicode */
770 * Scan a dictionary searching for the given word. The first argument
773 * 0x00 - find the first word which is >= the given one
774 * 0x05 - find the word which exactly matches the given one
775 * 0x1f - find the last word which is <= the given one
777 * The return value is 0 if the search fails.
781 static zword lookup_text (int padding, zword dct)
789 int resolution = (h_version <= V3) ? 2 : 3;
795 encode_text (padding);
797 LOW_BYTE (dct, sep_count) /* skip word separators */
798 dct += 1 + sep_count;
799 LOW_BYTE (dct, entry_len) /* get length of entries */
801 LOW_WORD (dct, entry_count) /* get number of entries */
804 if ((short) entry_count < 0) { /* bad luck, entries aren't sorted */
806 entry_count = - (short) entry_count;
809 } else sorted = TRUE; /* entries are sorted */
812 upper = entry_count - 1;
814 while (lower <= upper) {
816 if (sorted) /* binary search */
817 entry_number = (lower + upper) / 2;
818 else /* linear search */
819 entry_number = lower;
821 entry_addr = dct + entry_number * entry_len;
823 /* Compare word to dictionary entry */
827 for (i = 0; i < resolution; i++) {
828 LOW_WORD (addr, entry)
829 if (encoded[i] != entry)
834 return entry_addr; /* exact match found, return now */
838 if (sorted) /* binary search */
840 if (encoded[i] > entry)
841 lower = entry_number + 1;
843 upper = entry_number - 1;
845 else lower++; /* linear search */
849 /* No exact match has been found */
854 entry_number = (padding == 0x00) ? lower : upper;
856 if (entry_number == -1 || entry_number == entry_count)
859 return dct + entry_number * entry_len;
866 * Translate a single word to a token and append it to the token
867 * buffer. Every token consists of the address of the dictionary
868 * entry, the length of the word and the offset of the word from
869 * the start of the text buffer. Unknown words cause empty slots
870 * if the flag is set (such that the text can be scanned several
871 * times with different dictionaries); otherwise they are zero.
875 static void tokenise_text (zword text, zword length, zword from, zword parse, zword dct, bool flag)
878 zbyte token_max, token_count;
880 LOW_BYTE (parse, token_max)
882 LOW_BYTE (parse, token_count)
884 if (token_count < token_max) { /* sufficient space left for token? */
886 storeb (parse++, token_count + 1);
888 load_string ((zword) (text + from), length);
890 addr = lookup_text (0x05, dct);
892 if (addr != 0 || !flag) {
894 parse += 4 * token_count;
896 storew ((zword) (parse + 0), addr);
897 storeb ((zword) (parse + 2), length);
898 storeb ((zword) (parse + 3), from);
909 * Split an input line into words and translate the words to tokens.
913 void tokenise_line (zword text, zword token, zword dct, bool flag)
920 length = 0; /* makes compilers shut up */
922 /* Use standard dictionary if the given dictionary is zero */
927 /* Remove all tokens before inserting new ones */
929 storeb ((zword) (token + 1), 0);
931 /* Move the first pointer across the text buffer searching for the
932 beginning of a word. If this succeeds, store the position in a
933 second pointer. Move the first pointer searching for the end of
934 the word. When it is found, "tokenise" the word. Continue until
935 the end of the buffer is reached. */
940 if (h_version >= V5) {
942 LOW_BYTE (addr1, length)
951 /* Fetch next ZSCII character */
955 if (h_version >= V5 && addr1 == text + 2 + length)
960 /* Check for separator */
964 LOW_BYTE (sep_addr, sep_count)
969 LOW_BYTE (sep_addr, separator)
972 } while (c != separator && --sep_count != 0);
974 /* This could be the start or the end of a word */
976 if (sep_count == 0 && c != ' ' && c != 0) {
981 } else if (addr2 != 0) {
985 (zword) (addr1 - addr2),
986 (zword) (addr2 - text),
993 /* Translate separator (which is a word in its own right) */
1000 (zword) (addr1 - text),
1005 }/* tokenise_line */
1008 * z_tokenise, make a lexical analysis of a ZSCII string.
1010 * zargs[0] = address of string to analyze
1011 * zargs[1] = address of token buffer
1012 * zargs[2] = address of dictionary (optional)
1013 * zargs[3] = set when unknown words cause empty slots (optional)
1017 void z_tokenise (void)
1020 /* Supply default arguments */
1027 /* Call tokenise_line to do the real work */
1029 tokenise_line (zargs[0], zargs[1], zargs[2], zargs[3] != 0);
1036 * Scan the vocabulary to complete the last word on the input line
1037 * (similar to "tcsh" under Unix). The return value is
1039 * 2 ==> completion is impossible
1040 * 1 ==> completion is ambiguous
1041 * 0 ==> completion is successful
1043 * The function also returns a string in its second argument. In case
1044 * of 2, the string is empty; in case of 1, the string is the longest
1045 * extension of the last word on the input line that is common to all
1046 * possible completions (for instance, if the last word on the input
1047 * is "fo" and its only possible completions are "follow" and "folly"
1048 * then the string is "ll"); in case of 0, the string is an extension
1049 * to the last word that results in the only possible completion.
1053 int completion (const zchar *buffer, zchar *result)
1064 /* Copy last word to "decoded" string */
1068 while ((c = *buffer++) != 0)
1079 /* Search the dictionary for first and last possible extensions */
1081 minaddr = lookup_text (0x00, h_dictionary);
1082 maxaddr = lookup_text (0x1f, h_dictionary);
1084 if (minaddr == 0 || maxaddr == 0 || minaddr > maxaddr)
1087 /* Copy first extension to "result" string */
1089 decode_text (VOCABULARY, minaddr);
1093 for (i = len; (c = decoded[i]) != 0; i++)
1097 /* Merge second extension with "result" string */
1099 decode_text (VOCABULARY, maxaddr);
1101 for (i = len, ptr = result; (c = decoded[i]) != 0; i++, ptr++)
1102 if (*ptr != c) break;
1105 /* Search was ambiguous or successful */
1107 return (minaddr == maxaddr) ? 0 : 1;