2 * Copyright 2010-2012 Chris Spiegel.
4 * This file is part of Bocfel.
6 * Bocfel is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version
8 * 2 or 3, as published by the Free Software Foundation.
10 * Bocfel is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with Bocfel. If not, see <http://www.gnu.org/licenses/>.
30 * The index is the ZSCII value, minus 155 (so entry 0 refers to ZSCII
31 * value 155); and the value at the index is the Unicode character that
32 * the ZSCII value maps to. Because Latin-1 and Unicode are equivalent
33 * for 0–255, this table maps to both Unicode and Latin1, with the
34 * caveat that values greater than 255 should be considered invalid in
35 * Latin-1, and are translated as a question mark below in
36 * setup_tables() where appropriate.
38 #define UNICODE_TABLE_SIZE 97
39 static int unicode_entries = 69;
40 static uint16_t unicode_table[UNICODE_TABLE_SIZE] = {
41 0x00e4, 0x00f6, 0x00fc, 0x00c4, 0x00d6, 0x00dc, 0x00df, 0x00bb, 0x00ab,
42 0x00eb, 0x00ef, 0x00ff, 0x00cb, 0x00cf, 0x00e1, 0x00e9, 0x00ed, 0x00f3,
43 0x00fa, 0x00fd, 0x00c1, 0x00c9, 0x00cd, 0x00d3, 0x00da, 0x00dd, 0x00e0,
44 0x00e8, 0x00ec, 0x00f2, 0x00f9, 0x00c0, 0x00c8, 0x00cc, 0x00d2, 0x00d9,
45 0x00e2, 0x00ea, 0x00ee, 0x00f4, 0x00fb, 0x00c2, 0x00ca, 0x00ce, 0x00d4,
46 0x00db, 0x00e5, 0x00c5, 0x00f8, 0x00d8, 0x00e3, 0x00f1, 0x00f5, 0x00c3,
47 0x00d1, 0x00d5, 0x00e6, 0x00c6, 0x00e7, 0x00c7, 0x00fe, 0x00f0, 0x00de,
48 0x00d0, 0x00a3, 0x0153, 0x0152, 0x00a1, 0x00bf,
51 /* If a non-default Unicode table is found, this function is called with
52 * its address; it updates the Unicode table above.
54 void parse_unicode_table(uint16_t utable)
56 if(utable >= memory_size) die("corrupted story: unicode table out of range");
58 unicode_entries = BYTE(utable++);
60 if(unicode_entries > UNICODE_TABLE_SIZE) die("corrupted story: too many entries in the unicode table");
61 if(utable + (2 * unicode_entries) >= memory_size) die("corrupted story: unicode table out of range");
63 for(int i = 0; i < unicode_entries; i++)
65 unicode_table[i] = WORD(utable + (2 * i));
69 /* Table used to convert a ZSCII value to Unicode; and since this is
70 * only used for output, non-output values will be returned as a
73 uint16_t zscii_to_unicode[UINT8_MAX + 1];
75 /* These tables translate a Unicode or (Latin-1) character into its
76 * ZSCII equivalent. Only valid Unicode characters are translated (that
77 * is, those in the range 32–126, or 160 and above).
79 * The first table will translate invalid Unicode characters to zero;
80 * the second, to a question mark.
82 uint8_t unicode_to_zscii [UINT16_MAX + 1];
83 uint8_t unicode_to_zscii_q[UINT16_MAX + 1];
85 /* Convenience table: pass through all values 0–255, but yield a question mark
87 uint8_t unicode_to_latin1[UINT16_MAX + 1];
89 /* Convert ZSCII to Unicode line-drawing/rune characters. */
90 uint16_t zscii_to_font3[UINT8_MAX + 1];
92 /* Lookup table to see if a character is in the alphabet table. Key is
93 * the character, value is the index in the alphabet table, or -1.
95 int atable_pos[UINT8_MAX + 1];
97 /* Not all fonts provide all characters, so there
98 * may well be a lot of question marks.
100 static void build_font3_table(void)
102 for(int i = 0; i < UINT8_MAX; i++) zscii_to_font3[i] = UNICODE_QUESTIONMARK;
104 zscii_to_font3[ 32] = UNICODE_SPACE;
105 zscii_to_font3[ 33] = 0x2190; /* ← */
106 zscii_to_font3[ 34] = 0x2192; /* → */
107 zscii_to_font3[ 35] = 0x2571; /* ╱ */
108 zscii_to_font3[ 36] = 0x2572; /* ╲ */
109 zscii_to_font3[ 37] = UNICODE_SPACE;
110 zscii_to_font3[ 38] = 0x2500; /* ─ */
111 zscii_to_font3[ 39] = 0x2500; /* ─ */
112 zscii_to_font3[ 40] = 0x2502; /* │ */
113 zscii_to_font3[ 41] = 0x2502; /* │ (this should be slightly offset, but whatever) */
114 zscii_to_font3[ 42] = 0x2534; /* ┴ */
115 zscii_to_font3[ 43] = 0x252c; /* ┬ */
116 zscii_to_font3[ 44] = 0x251c; /* ├ */
117 zscii_to_font3[ 45] = 0x2524; /* ┤ */
118 zscii_to_font3[ 46] = 0x2514; /* └ */
119 zscii_to_font3[ 47] = 0x250c; /* ┌ */
120 zscii_to_font3[ 48] = 0x2510; /* ┐ */
121 zscii_to_font3[ 49] = 0x2518; /* ┘ */
123 /* There are a few characters that have no box-drawing equivalents.
124 * These are the pieces that have connections sticking out of them,
125 * used to link rooms together. There are two options: have filled
126 * boxes with no connections which makes the rooms look nice but the
127 * connections look bad, or unfilled boxes with connections which
128 * results in bad looking rooms but attached connections. The end
129 * result is something like this:
131 * No connections: Connections:
138 * By default the former is done, but the latter can be chosen.
140 zscii_to_font3[ 50] = options.enable_alt_graphics ? 0x2571 : 0x2514; /* ╱ or └ */
141 zscii_to_font3[ 51] = options.enable_alt_graphics ? 0x2572 : 0x250c; /* ╲ or ┌ */
142 zscii_to_font3[ 52] = options.enable_alt_graphics ? 0x2571 : 0x2510; /* ╱ or ┐ */
143 zscii_to_font3[ 53] = options.enable_alt_graphics ? 0x2572 : 0x2518; /* ╲ or ┘ */
145 zscii_to_font3[ 54] = 0x2588; /* █ */
146 zscii_to_font3[ 56] = 0x2584; /* ▄ */
147 zscii_to_font3[ 55] = 0x2580; /* ▀ */
148 zscii_to_font3[ 57] = 0x258c; /* ▌ */
149 zscii_to_font3[ 58] = 0x2590; /* ▐ */
151 zscii_to_font3[ 59] = options.enable_alt_graphics ? 0x2534 : 0x2584; /* ┴ or ▄ */
152 zscii_to_font3[ 60] = options.enable_alt_graphics ? 0x252c : 0x2580; /* ┬ or ▀ */
153 zscii_to_font3[ 61] = options.enable_alt_graphics ? 0x251c : 0x258c; /* ├ or ▌ */
154 zscii_to_font3[ 62] = options.enable_alt_graphics ? 0x2524 : 0x2590; /* ┤ or ▐ */
156 zscii_to_font3[ 63] = 0x259d; /* ▝ */
157 zscii_to_font3[ 64] = 0x2597; /* ▗ */
158 zscii_to_font3[ 65] = 0x2596; /* ▖ */
159 zscii_to_font3[ 66] = 0x2598; /* ▘ */
161 zscii_to_font3[ 67] = options.enable_alt_graphics ? 0x2571 : 0x259d; /* ╱ or ▝ */
162 zscii_to_font3[ 68] = options.enable_alt_graphics ? 0x2572 : 0x2597; /* ╲ or ▗ */
163 zscii_to_font3[ 69] = options.enable_alt_graphics ? 0x2571 : 0x2596; /* ╱ or ▖ */
164 zscii_to_font3[ 70] = options.enable_alt_graphics ? 0x2572 : 0x2598; /* ╲ or ▘ */
166 zscii_to_font3[ 75] = 0x2594; /* ▔ */
167 zscii_to_font3[ 76] = 0x2581; /* ▁ */
168 zscii_to_font3[ 77] = 0x258f; /* ▏ */
169 zscii_to_font3[ 78] = 0x2595; /* ▕ */
171 zscii_to_font3[ 79] = UNICODE_SPACE;
172 zscii_to_font3[ 80] = 0x258f; /* ▏ */
173 zscii_to_font3[ 81] = 0x258e; /* ▎ */
174 zscii_to_font3[ 82] = 0x258d; /* ▍ */
175 zscii_to_font3[ 83] = 0x258c; /* ▌ */
176 zscii_to_font3[ 84] = 0x258b; /* ▋ */
177 zscii_to_font3[ 85] = 0x258a; /* ▊ */
178 zscii_to_font3[ 86] = 0x2589; /* ▉ */
179 zscii_to_font3[ 87] = 0x2588; /* █ */
180 zscii_to_font3[ 88] = 0x2595; /* ▕ */
181 zscii_to_font3[ 89] = 0x258f; /* ▏ */
183 zscii_to_font3[ 90] = 0x2573; /* ╳ */
184 zscii_to_font3[ 91] = 0x253c; /* ┼ */
185 zscii_to_font3[ 92] = 0x2191; /* ↑ */
186 zscii_to_font3[ 93] = 0x2193; /* ↓ */
187 zscii_to_font3[ 94] = 0x2195; /* ↕ */
188 zscii_to_font3[ 95] = 0x2b1c; /* ⬜ */
189 zscii_to_font3[ 96] = UNICODE_QUESTIONMARK;
190 zscii_to_font3[ 97] = 0x16aa; /* ᚪ */
191 zscii_to_font3[ 98] = 0x16d2; /* ᛒ */
192 zscii_to_font3[ 99] = 0x16c7; /* ᛇ */
193 zscii_to_font3[100] = 0x16de; /* ᛞ */
194 zscii_to_font3[101] = 0x16d6; /* ᛖ */
195 zscii_to_font3[102] = 0x16a0; /* ᚠ */
196 zscii_to_font3[103] = 0x16b7; /* ᚷ */
197 zscii_to_font3[104] = 0x16bb; /* ᚻ */
198 zscii_to_font3[105] = 0x16c1; /* ᛁ */
199 zscii_to_font3[106] = 0x16e8; /* ᛨ */
200 zscii_to_font3[107] = 0x16e6; /* ᛦ */
201 zscii_to_font3[108] = 0x16da; /* ᛚ */
202 zscii_to_font3[109] = 0x16d7; /* ᛗ */
203 zscii_to_font3[110] = 0x16be; /* ᚾ */
204 zscii_to_font3[111] = 0x16a9; /* ᚩ */
205 zscii_to_font3[112] = UNICODE_QUESTIONMARK; /* no good symbol */
206 zscii_to_font3[113] = 0x0068; /* Unicode 'h'; close to the rune. */
207 zscii_to_font3[114] = 0x16b1; /* ᚱ */
208 zscii_to_font3[115] = 0x16cb; /* ᛋ */
209 zscii_to_font3[116] = 0x16cf; /* ᛏ */
210 zscii_to_font3[117] = 0x16a2; /* ᚢ */
211 zscii_to_font3[118] = 0x16e0; /* ᛠ */
212 zscii_to_font3[119] = 0x16b9; /* ᚹ */
213 zscii_to_font3[120] = 0x16c9; /* ᛉ */
214 zscii_to_font3[121] = 0x16a5; /* ᚥ */
215 zscii_to_font3[122] = 0x16df; /* ᛟ */
217 /* These are reversed (see §16); a slightly ugly hack in screen.c is
218 * used to accomplish this.
220 zscii_to_font3[123] = 0x2191; /* ↑ */
221 zscii_to_font3[124] = 0x2193; /* ↓ */
222 zscii_to_font3[125] = 0x2195; /* ↕ */
223 zscii_to_font3[126] = UNICODE_QUESTIONMARK;
226 void setup_tables(void)
228 /*** ZSCII to Unicode table. ***/
230 for(int i = 0; i < UINT8_MAX + 1; i++) zscii_to_unicode[i] = UNICODE_QUESTIONMARK;
231 zscii_to_unicode[0] = 0;
232 zscii_to_unicode[ZSCII_NEWLINE] = UNICODE_LINEFEED;
234 if(zversion == 6) zscii_to_unicode[ 9] = UNICODE_SPACE; /* Tab. */
235 if(zversion == 6) zscii_to_unicode[11] = UNICODE_SPACE; /* Sentence space. */
237 for(int i = 32; i < 127; i++) zscii_to_unicode[i] = i;
238 for(int i = 0; i < unicode_entries; i++)
240 uint16_t c = unicode_table[i];
242 if(!valid_unicode(c)) c = UNICODE_QUESTIONMARK;
244 /* If Unicode is not available, then any values > 255 are invalid. */
245 else if(!have_unicode && c > 255) c = UNICODE_QUESTIONMARK;
247 zscii_to_unicode[i + 155] = c;
250 /*** Unicode to ZSCII tables. ***/
252 /* Default values. */
253 memset(unicode_to_zscii, 0, sizeof unicode_to_zscii);
254 memset(unicode_to_zscii_q, ZSCII_QUESTIONMARK, sizeof unicode_to_zscii_q);
256 /* First fill up the entries found in the Unicode table. */
257 for(int i = 0; i < unicode_entries; i++)
259 uint16_t c = unicode_table[i];
263 unicode_to_zscii [c] = i + 155;
264 unicode_to_zscii_q[c] = i + 155;
268 /* Now the values that are equivalent in ZSCII and Unicode. */
269 for(int i = 32; i < 127; i++)
271 unicode_to_zscii [i] = i;
272 unicode_to_zscii_q[i] = i;
275 /* Properly translate a newline. */
276 unicode_to_zscii_q[UNICODE_LINEFEED] = ZSCII_NEWLINE;
278 /*** Unicode to Latin1 table. ***/
280 memset(unicode_to_latin1, UNICODE_QUESTIONMARK, sizeof unicode_to_latin1);
281 for(int i = 0; i < 256; i++) unicode_to_latin1[i] = i;
283 /*** ZSCII to character graphics table. ***/
287 /*** Alphabet table. ***/
289 for(int i = 0; i < 256; i++) atable_pos[i] = -1;
291 /* 52 is A2 character 6, which is special and should not
292 * be matched, so skip over it.
294 for(int i = 0; i < 52 ; i++) atable_pos[atable[i]] = i;
295 for(int i = 53; i < 26 * 3; i++) atable_pos[atable[i]] = i;
298 /* This is adapted from Zip2000 (Copyright 2001 Kevin Bracey). */
299 uint16_t unicode_tolower(uint16_t c)
301 static const unsigned char basic_latin[0x100] =
303 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
304 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
305 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
306 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
307 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
308 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
309 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
310 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
311 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
312 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
313 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
314 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
315 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
316 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf,
317 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
318 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
320 static const unsigned char latin_extended_a[0x80] =
322 0x01, 0x01, 0x03, 0x03, 0x05, 0x05, 0x07, 0x07, 0x09, 0x09, 0x0b, 0x0b, 0x0d, 0x0d, 0x0f, 0x0f,
323 0x11, 0x11, 0x13, 0x13, 0x15, 0x15, 0x17, 0x17, 0x19, 0x19, 0x1b, 0x1b, 0x1d, 0x1d, 0x1f, 0x1f,
324 0x21, 0x21, 0x23, 0x23, 0x25, 0x25, 0x27, 0x27, 0x29, 0x29, 0x2b, 0x2b, 0x2d, 0x2d, 0x2f, 0x2f,
325 0x00, 0x31, 0x33, 0x33, 0x35, 0x35, 0x37, 0x37, 0x38, 0x3a, 0x3a, 0x3c, 0x3c, 0x3e, 0x3e, 0x40,
326 0x40, 0x42, 0x42, 0x44, 0x44, 0x46, 0x46, 0x48, 0x48, 0x49, 0x4b, 0x4b, 0x4d, 0x4d, 0x4f, 0x4f,
327 0x51, 0x51, 0x53, 0x53, 0x55, 0x55, 0x57, 0x57, 0x59, 0x59, 0x5b, 0x5b, 0x5d, 0x5d, 0x5f, 0x5f,
328 0x61, 0x61, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x69, 0x6b, 0x6b, 0x6d, 0x6d, 0x6f, 0x6f,
329 0x71, 0x71, 0x73, 0x73, 0x75, 0x75, 0x77, 0x77, 0x00, 0x7a, 0x7a, 0x7c, 0x7c, 0x7e, 0x7e, 0x7f
331 static const unsigned char greek[0x50] =
333 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0xac, 0x87, 0xad, 0xae, 0xaf, 0x8b, 0xcc, 0x8d, 0xcd, 0xce,
334 0x90, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
335 0xc0, 0xc1, 0xa2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xac, 0xad, 0xae, 0xaf,
336 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
337 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf
339 static const unsigned char cyrillic[0x60] =
341 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
342 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
343 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
344 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
345 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
346 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f
349 if (c < 0x0100) c = basic_latin[c];
350 else if(c == 0x0130) c = 0x0069; /* capital i with dot -> lower case i */
351 else if(c == 0x0178) c = 0x00ff; /* capital y diaeresis -> lower case y diaeresis */
352 else if(c < 0x0180) c = latin_extended_a[c - 0x100] + 0x100;
353 else if(c >= 0x380 && c < 0x3d0) c = greek [c - 0x380] + 0x300;
354 else if(c >= 0x400 && c < 0x460) c = cyrillic [c - 0x400] + 0x400;