interpreters/frotz/text.c

   1 /* text.c - Text manipulation functions
   2  *      Copyright (c) 1995-1997 Stefan Jokisch
   3  *
   4  * This file is part of Frotz.
   5  *
   6  * Frotz is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * Frotz is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  19  */
  20
  21 #include "frotz.h"
  22
  23 enum string_type {
  24     LOW_STRING, ABBREVIATION, HIGH_STRING, EMBEDDED_STRING, VOCABULARY
  25 };
  26
  27 extern zword object_name (zword);
  28
  29 static zchar decoded[10];
  30 static zword encoded[3];
  31
  32 /*
  33  * According to Matteo De Luigi <matteo.de.luigi@libero.it>,
  34  * 0xab and 0xbb were in each other's proper positions.
  35  *   Sat Apr 21, 2001
  36  */
  37 static zchar zscii_to_latin1[] = {
  38     0xe4, 0xf6, 0xfc, 0xc4, 0xd6, 0xdc, 0xdf, 0xbb,
  39     0xab, 0xeb, 0xef, 0xff, 0xcb, 0xcf, 0xe1, 0xe9,
  40     0xed, 0xf3, 0xfa, 0xfd, 0xc1, 0xc9, 0xcd, 0xd3,
  41     0xda, 0xdd, 0xe0, 0xe8, 0xec, 0xf2, 0xf9, 0xc0,
  42     0xc8, 0xcc, 0xd2, 0xd9, 0xe2, 0xea, 0xee, 0xf4,
  43     0xfb, 0xc2, 0xca, 0xce, 0xd4, 0xdb, 0xe5, 0xc5,
  44     0xf8, 0xd8, 0xe3, 0xf1, 0xf5, 0xc3, 0xd1, 0xd5,
  45     0xe6, 0xc6, 0xe7, 0xc7, 0xfe, 0xf0, 0xde, 0xd0,
  46     0xa3, 0x00, 0x00, 0xa1, 0xbf
  47 };
  48
  49 /*
  50  * translate_from_zscii
  51  *
  52  * Map a ZSCII character onto the ISO Latin-1 alphabet.
  53  *
  54  */
  55
  56 zchar translate_from_zscii (zbyte c)
  57 {
  58
  59     if (c == 0xfc)
  60         return ZC_MENU_CLICK;
  61     if (c == 0xfd)
  62         return ZC_DOUBLE_CLICK;
  63     if (c == 0xfe)
  64         return ZC_SINGLE_CLICK;
  65
  66     if (c >= 0x9b && story_id != BEYOND_ZORK) {
  67
  68         if (hx_unicode_table != 0) {    /* game has its own Unicode table */
  69
  70             zbyte N;
  71
  72             LOW_BYTE (hx_unicode_table, N)
  73
  74             if (c - 0x9b < N) {
  75
  76                 zword addr = hx_unicode_table + 1 + 2 * (c - 0x9b);
  77                 zword unicode;
  78
  79                 LOW_WORD (addr, unicode)
  80
  81                 return (unicode < 0x100) ? (zchar) unicode : '?';
  82
  83             } else return '?';
  84
  85         } else                          /* game uses standard set */
  86
  87             if (c <= 0xdf) {
  88
  89                 if (c == 0xdc || c == 0xdd)     /* Oe and oe ligatures */
  90                     return '?';                 /* are not ISO-Latin 1 */
  91
  92                 return zscii_to_latin1[c - 0x9b];
  93
  94             } else return '?';
  95     }
  96
  97     return c;
  98
  99 }/* translate_from_zscii */
 100
 101 /*
 102  * translate_to_zscii
 103  *
 104  * Map an ISO Latin-1 character onto the ZSCII alphabet.
 105  *
 106  */
 107
 108 zbyte translate_to_zscii (zchar c)
 109 {
 110     int i;
 111
 112     if (c == ZC_SINGLE_CLICK)
 113         return 0xfe;
 114     if (c == ZC_DOUBLE_CLICK)
 115         return 0xfd;
 116     if (c == ZC_MENU_CLICK)
 117         return 0xfc;
 118
 119     if (c >= ZC_LATIN1_MIN) {
 120
 121         if (hx_unicode_table != 0) {    /* game has its own Unicode table */
 122
 123             zbyte N;
 124             int i;
 125
 126             LOW_BYTE (hx_unicode_table, N)
 127
 128             for (i = 0x9b; i < 0x9b + N; i++) {
 129
 130                 zword addr = hx_unicode_table + 1 + 2 * (i - 0x9b);
 131                 zword unicode;
 132
 133                 LOW_WORD (addr, unicode)
 134
 135                 if (c == unicode)
 136                     return (zbyte) i;
 137
 138             }
 139
 140             return '?';
 141
 142         } else {                        /* game uses standard set */
 143
 144             for (i = 0x9b; i <= 0xdf; i++)
 145                 if (c == zscii_to_latin1[i - 0x9b])
 146                     return (zbyte) i;
 147
 148             return '?';
 149
 150         }
 151     }
 152
 153     if (c == 0)         /* Safety thing from David Kinder */
 154         c = '?';        /* regarding his Unicode patches */
 155                         /* Sept 15, 2002 */
 156
 157     return c;
 158
 159 }/* translate_to_zscii */
 160
 161 /*
 162  * alphabet
 163  *
 164  * Return a character from one of the three character sets.
 165  *
 166  */
 167
 168 static zchar alphabet (int set, int index)
 169 {
 170
 171     if (h_alphabet != 0) {      /* game uses its own alphabet */
 172
 173         zbyte c;
 174
 175         zword addr = h_alphabet + 26 * set + index;
 176         LOW_BYTE (addr, c)
 177
 178         return translate_from_zscii (c);
 179
 180     } else                      /* game uses default alphabet */
 181
 182         if (set == 0)
 183             return 'a' + index;
 184         else if (set == 1)
 185             return 'A' + index;
 186         else if (h_version == V1)
 187             return " 0123456789.,!?_#'\"/\\<-:()"[index];
 188         else
 189             return " ^0123456789.,!?_#'\"/\\-:()"[index];
 190
 191 }/* alphabet */
 192
 193 /*
 194  * load_string
 195  *
 196  * Copy a ZSCII string from the memory to the global "decoded" string.
 197  *
 198  */
 199
 200 static void load_string (zword addr, zword length)
 201 {
 202     int resolution = (h_version <= V3) ? 2 : 3;
 203     int i = 0;
 204
 205     while (i < 3 * resolution)
 206
 207         if (i < length) {
 208
 209             zbyte c;
 210
 211             LOW_BYTE (addr, c)
 212             addr++;
 213
 214             decoded[i++] = translate_from_zscii (c);
 215
 216         } else decoded[i++] = 0;
 217
 218 }/* load_string */
 219
 220 /*
 221  * encode_text
 222  *
 223  * Encode the Unicode text in the global "decoded" string then write
 224  * the result to the global "encoded" array. (This is used to look up
 225  * words in the dictionary.) Up to V3 the vocabulary resolution is
 226  * two, since V4 it is three words. Because each word contains three
 227  * Z-characters, that makes six or nine Z-characters respectively.
 228  * Longer words are chopped to the proper size, shorter words are are
 229  * padded out with 5's. For word completion we pad with 0s and 31s,
 230  * the minimum and maximum Z-characters.
 231  *
 232  */
 233
 234 static void encode_text (int padding)
 235 {
 236     static zchar again[] = { 'a', 'g', 'a', 'i', 'n', 0 };
 237     static zchar examine[] = { 'e', 'x', 'a', 'm', 'i', 'n', 'e', 0 };
 238     static zchar wait[] = { 'w', 'a', 'i', 't', 0 };
 239
 240     zbyte zchars[12];
 241     const zchar *ptr = decoded;
 242     zchar c;
 243     int resolution = (h_version <= V3) ? 2 : 3;
 244     int i = 0;
 245
 246     /* Expand abbreviations that some old Infocom games lack */
 247
 248     if (f_setup.expand_abbreviations)
 249
 250         if (padding == 0x05 && decoded[1] == 0)
 251
 252             switch (decoded[0]) {
 253                 case 'g': ptr = again; break;
 254                 case 'x': ptr = examine; break;
 255                 case 'z': ptr = wait; break;
 256             }
 257
 258     /* Translate string to a sequence of Z-characters */
 259
 260     while (i < 3 * resolution)
 261
 262         if ((c = *ptr++) != 0) {
 263
 264             int index, set;
 265             zbyte c2;
 266
 267             /* Search character in the alphabet */
 268
 269             for (set = 0; set < 3; set++)
 270                 for (index = 0; index < 26; index++)
 271                     if (c == alphabet (set, index))
 272                         goto letter_found;
 273
 274             /* Character not found, store its ZSCII value */
 275
 276             c2 = translate_to_zscii (c);
 277
 278             zchars[i++] = 5;
 279             zchars[i++] = 6;
 280             zchars[i++] = c2 >> 5;
 281             zchars[i++] = c2 & 0x1f;
 282
 283             continue;
 284
 285         letter_found:
 286
 287             /* Character found, store its index */
 288
 289             if (set != 0)
 290                 zchars[i++] = ((h_version <= V2) ? 1 : 3) + set;
 291
 292             zchars[i++] = index + 6;
 293
 294         } else zchars[i++] = padding;
 295
 296     /* Three Z-characters make a 16bit word */
 297
 298     for (i = 0; i < resolution; i++)
 299
 300         encoded[i] =
 301             (zchars[3 * i + 0] << 10) |
 302             (zchars[3 * i + 1] << 5) |
 303             (zchars[3 * i + 2]);
 304
 305     encoded[resolution - 1] |= 0x8000;
 306
 307 }/* encode_text */
 308
 309 /*
 310  * z_check_unicode, test if a unicode character can be read and printed.
 311  *
 312  *      zargs[0] = Unicode
 313  *
 314  */
 315
 316 void z_check_unicode (void)
 317 {
 318     zword c = zargs[0];
 319
 320     if (c >= 0x20 && c <= 0x7e)
 321         store (3);
 322     else if (c == 0xa0)
 323         store (1);
 324     else if (c >= 0xa1 && c <= 0xff)
 325         store (3);
 326     else
 327         store (0);
 328
 329 }/* z_check_unicode */
 330
 331 /*
 332  * z_encode_text, encode a ZSCII string for use in a dictionary.
 333  *
 334  *      zargs[0] = address of text buffer
 335  *      zargs[1] = length of ASCII string
 336  *      zargs[2] = offset of ASCII string within the text buffer
 337  *      zargs[3] = address to store encoded text in
 338  *
 339  * This is a V5+ opcode and therefore the dictionary resolution must be
 340  * three 16bit words.
 341  *
 342  */
 343
 344 void z_encode_text (void)
 345 {
 346     int i;
 347
 348     load_string ((zword) (zargs[0] + zargs[2]), zargs[1]);
 349
 350     encode_text (0x05);
 351
 352     for (i = 0; i < 3; i++)
 353         storew ((zword) (zargs[3] + 2 * i), encoded[i]);
 354
 355 }/* z_encode_text */
 356
 357 /*
 358  * decode_text
 359  *
 360  * Convert encoded text to Unicode. The encoded text consists of 16bit
 361  * words. Every word holds 3 Z-characters (5 bits each) plus a spare
 362  * bit to mark the last word. The Z-characters translate to ZSCII by
 363  * looking at the current current character set. Some select another
 364  * character set, others refer to abbreviations.
 365  *
 366  * There are several different string types:
 367  *
 368  *    LOW_STRING - from the lower 64KB (byte address)
 369  *    ABBREVIATION - from the abbreviations table (word address)
 370  *    HIGH_STRING - from the end of the memory map (packed address)
 371  *    EMBEDDED_STRING - from the instruction stream (at PC)
 372  *    VOCABULARY - from the dictionary (byte address)
 373  *
 374  * The last type is only used for word completion.
 375  *
 376  */
 377
 378 #define outchar(c)      if (st==VOCABULARY) *ptr++=c; else print_char(c)
 379
 380 static void decode_text (enum string_type st, zword addr)
 381 {
 382     zchar *ptr;
 383     long byte_addr;
 384     zchar c2;
 385     zword code;
 386     zbyte c, prev_c = 0;
 387     int shift_state = 0;
 388     int shift_lock = 0;
 389     int status = 0;
 390
 391     ptr = NULL;         /* makes compilers shut up */
 392     byte_addr = 0;
 393
 394     /* Calculate the byte address if necessary */
 395
 396     if (st == ABBREVIATION)
 397
 398         byte_addr = (long) addr << 1;
 399
 400     else if (st == HIGH_STRING) {
 401
 402         if (h_version <= V3)
 403             byte_addr = (long) addr << 1;
 404         else if (h_version <= V5)
 405             byte_addr = (long) addr << 2;
 406         else if (h_version <= V7)
 407             byte_addr = ((long) addr << 2) + ((long) h_strings_offset << 3);
 408         else /* h_version == V8 */
 409             byte_addr = (long) addr << 3;
 410
 411         if (byte_addr >= story_size)
 412             runtime_error (ERR_ILL_PRINT_ADDR);
 413
 414     }
 415
 416     /* Loop until a 16bit word has the highest bit set */
 417
 418     if (st == VOCABULARY)
 419         ptr = decoded;
 420
 421     do {
 422
 423         int i;
 424
 425         /* Fetch the next 16bit word */
 426
 427         if (st == LOW_STRING || st == VOCABULARY) {
 428             LOW_WORD (addr, code)
 429             addr += 2;
 430         } else if (st == HIGH_STRING || st == ABBREVIATION) {
 431             HIGH_WORD (byte_addr, code)
 432             byte_addr += 2;
 433         } else
 434             CODE_WORD (code)
 435
 436         /* Read its three Z-characters */
 437
 438         for (i = 10; i >= 0; i -= 5) {
 439
 440             zword abbr_addr;
 441             zword ptr_addr;
 442
 443             c = (code >> i) & 0x1f;
 444
 445             switch (status) {
 446
 447             case 0:     /* normal operation */
 448
 449                 if (shift_state == 2 && c == 6)
 450                     status = 2;
 451
 452                 else if (h_version == V1 && c == 1)
 453                     new_line ();
 454
 455                 else if (h_version >= V2 && shift_state == 2 && c == 7)
 456                     new_line ();
 457
 458                 else if (c >= 6)
 459                     outchar (alphabet (shift_state, c - 6));
 460
 461                 else if (c == 0)
 462                     outchar (' ');
 463
 464                 else if (h_version >= V2 && c == 1)
 465                     status = 1;
 466
 467                 else if (h_version >= V3 && c <= 3)
 468                     status = 1;
 469
 470                 else {
 471
 472                     shift_state = (shift_lock + (c & 1) + 1) % 3;
 473
 474                     if (h_version <= V2 && c >= 4)
 475                         shift_lock = shift_state;
 476
 477                     break;
 478
 479                 }
 480
 481                 shift_state = shift_lock;
 482
 483                 break;
 484
 485             case 1:     /* abbreviation */
 486
 487                 ptr_addr = h_abbreviations + 64 * (prev_c - 1) + 2 * c;
 488
 489                 LOW_WORD (ptr_addr, abbr_addr)
 490                 decode_text (ABBREVIATION, abbr_addr);
 491
 492                 status = 0;
 493                 break;
 494
 495             case 2:     /* ZSCII character - first part */
 496
 497                 status = 3;
 498                 break;
 499
 500             case 3:     /* ZSCII character - second part */
 501
 502                 c2 = translate_from_zscii ((prev_c << 5) | c);
 503                 outchar (c2);
 504
 505                 status = 0;
 506                 break;
 507
 508             }
 509
 510             prev_c = c;
 511
 512         }
 513
 514     } while (!(code & 0x8000));
 515
 516     if (st == VOCABULARY)
 517         *ptr = 0;
 518
 519 }/* decode_text */
 520
 521 #undef outchar
 522
 523 /*
 524  * z_new_line, print a new line.
 525  *
 526  *      no zargs used
 527  *
 528  */
 529
 530 void z_new_line (void)
 531 {
 532
 533     new_line ();
 534
 535 }/* z_new_line */
 536
 537 /*
 538  * z_print, print a string embedded in the instruction stream.
 539  *
 540  *      no zargs used
 541  *
 542  */
 543
 544 void z_print (void)
 545 {
 546
 547     decode_text (EMBEDDED_STRING, 0);
 548
 549 }/* z_print */
 550
 551 /*
 552  * z_print_addr, print a string from the lower 64KB.
 553  *
 554  *      zargs[0] = address of string to print
 555  *
 556  */
 557
 558 void z_print_addr (void)
 559 {
 560
 561     decode_text (LOW_STRING, zargs[0]);
 562
 563 }/* z_print_addr */
 564
 565 /*
 566  * z_print_char print a single ZSCII character.
 567  *
 568  *      zargs[0] = ZSCII character to be printed
 569  *
 570  */
 571
 572 void z_print_char (void)
 573 {
 574
 575     print_char (translate_from_zscii (zargs[0]));
 576
 577 }/* z_print_char */
 578
 579 /*
 580  * z_print_form, print a formatted table.
 581  *
 582  *      zargs[0] = address of formatted table to be printed
 583  *
 584  */
 585
 586 void z_print_form (void)
 587 {
 588     zword count;
 589     zword addr = zargs[0];
 590
 591     bool first = TRUE;
 592
 593     for (;;) {
 594
 595         LOW_WORD (addr, count)
 596         addr += 2;
 597
 598         if (count == 0)
 599             break;
 600
 601         if (!first)
 602             new_line ();
 603
 604         while (count--) {
 605
 606             zbyte c;
 607
 608             LOW_BYTE (addr, c)
 609             addr++;
 610
 611             print_char (translate_from_zscii (c));
 612
 613         }
 614
 615         first = FALSE;
 616
 617     }
 618
 619 }/* z_print_form */
 620
 621 /*
 622  * print_num
 623  *
 624  * Print a signed 16bit number.
 625  *
 626  */
 627
 628 void print_num (zword value)
 629 {
 630     int i;
 631
 632     /* Print sign */
 633
 634     if ((short) value < 0) {
 635         print_char ('-');
 636         value = - (short) value;
 637     }
 638
 639     /* Print absolute value */
 640
 641     for (i = 10000; i != 0; i /= 10)
 642         if (value >= i || i == 1)
 643             print_char ('0' + (value / i) % 10);
 644
 645 }/* print_num */
 646
 647 /*
 648  * z_print_num, print a signed number.
 649  *
 650  *      zargs[0] = number to print
 651  *
 652  */
 653
 654 void z_print_num (void)
 655 {
 656
 657     print_num (zargs[0]);
 658
 659 }/* z_print_num */
 660
 661 /*
 662  * print_object
 663  *
 664  * Print an object description.
 665  *
 666  */
 667
 668 void print_object (zword object)
 669 {
 670     zword addr = object_name (object);
 671     zword code = 0x94a5;
 672     zbyte length;
 673
 674     LOW_BYTE (addr, length)
 675     addr++;
 676
 677     if (length != 0)
 678         LOW_WORD (addr, code)
 679
 680     if (code == 0x94a5) {       /* encoded text 0x94a5 == empty string */
 681
 682         print_string ("object#");       /* supply a generic name */
 683         print_num (object);             /* for anonymous objects */
 684
 685     } else decode_text (LOW_STRING, addr);
 686
 687 }/* print_object */
 688
 689 /*
 690  * z_print_obj, print an object description.
 691  *
 692  *      zargs[0] = number of object to be printed
 693  *
 694  */
 695
 696 void z_print_obj (void)
 697 {
 698
 699     print_object (zargs[0]);
 700
 701 }/* z_print_obj */
 702
 703 /*
 704  * z_print_paddr, print the string at the given packed address.
 705  *
 706  *      zargs[0] = packed address of string to be printed
 707  *
 708  */
 709
 710 void z_print_paddr (void)
 711 {
 712
 713     decode_text (HIGH_STRING, zargs[0]);
 714
 715 }/* z_print_paddr */
 716
 717 /*
 718  * z_print_ret, print the string at PC, print newline then return true.
 719  *
 720  *      no zargs used
 721  *
 722  */
 723
 724 void z_print_ret (void)
 725 {
 726
 727     decode_text (EMBEDDED_STRING, 0);
 728     new_line ();
 729     ret (1);
 730
 731 }/* z_print_ret */
 732
 733 /*
 734  * print_string
 735  *
 736  * Print a string of ASCII characters.
 737  *
 738  */
 739
 740 void print_string (const char *s)
 741 {
 742     char c;
 743
 744     while ((c = *s++) != 0)
 745
 746         if (c == '\n')
 747             new_line ();
 748         else
 749             print_char (c);
 750
 751 }/* print_string */
 752
 753 /*
 754  * z_print_unicode
 755  *
 756  *      zargs[0] = Unicode
 757  *
 758  */
 759
 760 void z_print_unicode (void)
 761 {
 762
 763     print_char ((zargs[0] <= 0xff) ? zargs[0] : '?');
 764
 765 }/* z_print_unicode */
 766
 767 /*
 768  * lookup_text
 769  *
 770  * Scan a dictionary searching for the given word. The first argument
 771  * can be
 772  *
 773  * 0x00 - find the first word which is >= the given one
 774  * 0x05 - find the word which exactly matches the given one
 775  * 0x1f - find the last word which is <= the given one
 776  *
 777  * The return value is 0 if the search fails.
 778  *
 779  */
 780
 781 static zword lookup_text (int padding, zword dct)
 782 {
 783     zword entry_addr;
 784     zword entry_count;
 785     zword entry;
 786     zword addr;
 787     zbyte entry_len;
 788     zbyte sep_count;
 789     int resolution = (h_version <= V3) ? 2 : 3;
 790     int entry_number;
 791     int lower, upper;
 792     int i;
 793     bool sorted;
 794
 795     encode_text (padding);
 796
 797     LOW_BYTE (dct, sep_count)           /* skip word separators */
 798     dct += 1 + sep_count;
 799     LOW_BYTE (dct, entry_len)           /* get length of entries */
 800     dct += 1;
 801     LOW_WORD (dct, entry_count)         /* get number of entries */
 802     dct += 2;
 803
 804     if ((short) entry_count < 0) {      /* bad luck, entries aren't sorted */
 805
 806         entry_count = - (short) entry_count;
 807         sorted = FALSE;
 808
 809     } else sorted = TRUE;               /* entries are sorted */
 810
 811     lower = 0;
 812     upper = entry_count - 1;
 813
 814     while (lower <= upper) {
 815
 816         if (sorted)                             /* binary search */
 817             entry_number = (lower + upper) / 2;
 818         else                                    /* linear search */
 819             entry_number = lower;
 820
 821         entry_addr = dct + entry_number * entry_len;
 822
 823         /* Compare word to dictionary entry */
 824
 825         addr = entry_addr;
 826
 827         for (i = 0; i < resolution; i++) {
 828             LOW_WORD (addr, entry)
 829             if (encoded[i] != entry)
 830                 goto continuing;
 831             addr += 2;
 832         }
 833
 834         return entry_addr;              /* exact match found, return now */
 835
 836     continuing:
 837
 838         if (sorted)                             /* binary search */
 839
 840             if (encoded[i] > entry)
 841                 lower = entry_number + 1;
 842             else
 843                 upper = entry_number - 1;
 844
 845         else lower++;                           /* linear search */
 846
 847     }
 848
 849     /* No exact match has been found */
 850
 851     if (padding == 0x05)
 852         return 0;
 853
 854     entry_number = (padding == 0x00) ? lower : upper;
 855
 856     if (entry_number == -1 || entry_number == entry_count)
 857         return 0;
 858
 859     return dct + entry_number * entry_len;
 860
 861 }/* lookup_text */
 862
 863 /*
 864  * tokenise_text
 865  *
 866  * Translate a single word to a token and append it to the token
 867  * buffer. Every token consists of the address of the dictionary
 868  * entry, the length of the word and the offset of the word from
 869  * the start of the text buffer. Unknown words cause empty slots
 870  * if the flag is set (such that the text can be scanned several
 871  * times with different dictionaries); otherwise they are zero.
 872  *
 873  */
 874
 875 static void tokenise_text (zword text, zword length, zword from, zword parse, zword dct, bool flag)
 876 {
 877     zword addr;
 878     zbyte token_max, token_count;
 879
 880     LOW_BYTE (parse, token_max)
 881     parse++;
 882     LOW_BYTE (parse, token_count)
 883
 884     if (token_count < token_max) {      /* sufficient space left for token? */
 885
 886         storeb (parse++, token_count + 1);
 887
 888         load_string ((zword) (text + from), length);
 889
 890         addr = lookup_text (0x05, dct);
 891
 892         if (addr != 0 || !flag) {
 893
 894             parse += 4 * token_count;
 895
 896             storew ((zword) (parse + 0), addr);
 897             storeb ((zword) (parse + 2), length);
 898             storeb ((zword) (parse + 3), from);
 899
 900         }
 901
 902     }
 903
 904 }/* tokenise_text */
 905
 906 /*
 907  * tokenise_line
 908  *
 909  * Split an input line into words and translate the words to tokens.
 910  *
 911  */
 912
 913 void tokenise_line (zword text, zword token, zword dct, bool flag)
 914 {
 915     zword addr1;
 916     zword addr2;
 917     zbyte length;
 918     zbyte c;
 919
 920     length = 0;         /* makes compilers shut up */
 921
 922     /* Use standard dictionary if the given dictionary is zero */
 923
 924     if (dct == 0)
 925         dct = h_dictionary;
 926
 927     /* Remove all tokens before inserting new ones */
 928
 929     storeb ((zword) (token + 1), 0);
 930
 931     /* Move the first pointer across the text buffer searching for the
 932        beginning of a word. If this succeeds, store the position in a
 933        second pointer. Move the first pointer searching for the end of
 934        the word. When it is found, "tokenise" the word. Continue until
 935        the end of the buffer is reached. */
 936
 937     addr1 = text;
 938     addr2 = 0;
 939
 940     if (h_version >= V5) {
 941         addr1++;
 942         LOW_BYTE (addr1, length)
 943     }
 944
 945     do {
 946
 947         zword sep_addr;
 948         zbyte sep_count;
 949         zbyte separator;
 950
 951         /* Fetch next ZSCII character */
 952
 953         addr1++;
 954
 955         if (h_version >= V5 && addr1 == text + 2 + length)
 956             c = 0;
 957         else
 958             LOW_BYTE (addr1, c)
 959
 960         /* Check for separator */
 961
 962         sep_addr = dct;
 963
 964         LOW_BYTE (sep_addr, sep_count)
 965         sep_addr++;
 966
 967         do {
 968
 969             LOW_BYTE (sep_addr, separator)
 970             sep_addr++;
 971
 972         } while (c != separator && --sep_count != 0);
 973
 974         /* This could be the start or the end of a word */
 975
 976         if (sep_count == 0 && c != ' ' && c != 0) {
 977
 978             if (addr2 == 0)
 979                 addr2 = addr1;
 980
 981         } else if (addr2 != 0) {
 982
 983             tokenise_text (
 984                 text,
 985                 (zword) (addr1 - addr2),
 986                 (zword) (addr2 - text),
 987                 token, dct, flag );
 988
 989             addr2 = 0;
 990
 991         }
 992
 993         /* Translate separator (which is a word in its own right) */
 994
 995         if (sep_count != 0)
 996
 997             tokenise_text (
 998                 text,
 999                 (zword) (1),
1000                 (zword) (addr1 - text),
1001                 token, dct, flag );
1002
1003     } while (c != 0);
1004
1005 }/* tokenise_line */
1006
1007 /*
1008  * z_tokenise, make a lexical analysis of a ZSCII string.
1009  *
1010  *      zargs[0] = address of string to analyze
1011  *      zargs[1] = address of token buffer
1012  *      zargs[2] = address of dictionary (optional)
1013  *      zargs[3] = set when unknown words cause empty slots (optional)
1014  *
1015  */
1016
1017 void z_tokenise (void)
1018 {
1019
1020     /* Supply default arguments */
1021
1022     if (zargc < 3)
1023         zargs[2] = 0;
1024     if (zargc < 4)
1025         zargs[3] = 0;
1026
1027     /* Call tokenise_line to do the real work */
1028
1029     tokenise_line (zargs[0], zargs[1], zargs[2], zargs[3] != 0);
1030
1031 }/* z_tokenise */
1032
1033 /*
1034  * completion
1035  *
1036  * Scan the vocabulary to complete the last word on the input line
1037  * (similar to "tcsh" under Unix). The return value is
1038  *
1039  *    2 ==> completion is impossible
1040  *    1 ==> completion is ambiguous
1041  *    0 ==> completion is successful
1042  *
1043  * The function also returns a string in its second argument. In case
1044  * of 2, the string is empty; in case of 1, the string is the longest
1045  * extension of the last word on the input line that is common to all
1046  * possible completions (for instance, if the last word on the input
1047  * is "fo" and its only possible completions are "follow" and "folly"
1048  * then the string is "ll"); in case of 0, the string is an extension
1049  * to the last word that results in the only possible completion.
1050  *
1051  */
1052
1053 int completion (const zchar *buffer, zchar *result)
1054 {
1055     zword minaddr;
1056     zword maxaddr;
1057     zchar *ptr;
1058     zchar c;
1059     int len;
1060     int i;
1061
1062     *result = 0;
1063
1064     /* Copy last word to "decoded" string */
1065
1066     len = 0;
1067
1068     while ((c = *buffer++) != 0)
1069
1070         if (c != ' ') {
1071
1072             if (len < 9)
1073                 decoded[len++] = c;
1074
1075         } else len = 0;
1076
1077     decoded[len] = 0;
1078
1079     /* Search the dictionary for first and last possible extensions */
1080
1081     minaddr = lookup_text (0x00, h_dictionary);
1082     maxaddr = lookup_text (0x1f, h_dictionary);
1083
1084     if (minaddr == 0 || maxaddr == 0 || minaddr > maxaddr)
1085         return 2;
1086
1087     /* Copy first extension to "result" string */
1088
1089     decode_text (VOCABULARY, minaddr);
1090
1091     ptr = result;
1092
1093     for (i = len; (c = decoded[i]) != 0; i++)
1094         *ptr++ = c;
1095     *ptr = 0;
1096
1097     /* Merge second extension with "result" string */
1098
1099     decode_text (VOCABULARY, maxaddr);
1100
1101     for (i = len, ptr = result; (c = decoded[i]) != 0; i++, ptr++)
1102         if (*ptr != c) break;
1103     *ptr = 0;
1104
1105     /* Search was ambiguous or successful */
1106
1107     return (minaddr == maxaddr) ? 0 : 1;
1108
1109 }/* completion */