5 #include <glib/gstdio.h>
7 #define min(x,y) ( (x > y)? y : x )
11 **************** WRITING FUNCTIONS ********************************************
15 /* Internal function: change illegal (control) characters in a string to a
16 placeholder character. Must free returned string afterwards. */
18 remove_latin1_control_characters(unsigned char *s, gsize len)
20 /* If len == 0, then return an empty string, not NULL */
24 gchar *retval = g_new0(gchar, len);
26 for(i = 0; i < len; i++)
27 if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) )
29 /* Our placeholder character is '?'; other options are possible,
30 like printing "0x7F" or something */
36 /* Internal function: convert a Latin-1 string to a UTF-8 string, replacing
37 Latin-1 control characters by a placeholder first. The UTF-8 string must be
38 freed afterwards. Returns NULL on error. */
40 convert_latin1_to_utf8(gchar *s, gsize len)
44 gchar *canonical = remove_latin1_control_characters( (unsigned char *)s,
46 utf8 = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
51 error_dialog(NULL, error, "Error during latin1->utf8 conversion: ");
58 /* Internal function: write a UTF-8 string to a window's text buffer. */
60 write_utf8_to_window(winid_t win, gchar *s)
62 GtkTextBuffer *buffer =
63 gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
66 gtk_text_buffer_get_end_iter(buffer, &iter);
67 gtk_text_buffer_insert(buffer, &iter, s, -1);
70 /* Internal function: write a UTF-8 buffer with length to a stream. */
72 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
74 switch(str->stream_type)
76 case STREAM_TYPE_WINDOW:
77 /* Each window type has a different way of printing to it */
78 switch(str->window->window_type)
80 /* Printing to these windows' streams does nothing */
83 case wintype_Graphics:
84 str->write_count += len;
86 /* Text buffer window */
87 case wintype_TextBuffer:
89 gchar *utf8 = convert_latin1_to_utf8(buf, len);
92 write_utf8_to_window(str->window, utf8);
96 str->write_count += len;
99 g_warning("%s: Writing to this kind of window unsupported.",
103 /* Now write the same buffer to the window's echo stream */
104 if(str->window->echo_stream != NULL)
105 write_buffer_to_stream(str->window->echo_stream, buf, len);
109 case STREAM_TYPE_MEMORY:
110 if(str->unicode && str->ubuffer)
113 while(str->mark < str->buflen && foo < len)
114 str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
116 if(!str->unicode && str->buffer)
118 int copycount = min(len, str->buflen - str->mark);
119 memmove(str->buffer + str->mark, buf, copycount);
120 str->mark += copycount;
123 str->write_count += len;
126 case STREAM_TYPE_FILE:
131 /* Convert to four-byte big-endian */
132 gchar *writebuffer = g_new0(gchar, len * 4);
134 for(i = 0; i < len; i++)
135 writebuffer[i * 4 + 3] = buf[i];
136 fwrite(writebuffer, sizeof(gchar), len * 4,
139 else /* Regular file */
141 fwrite(buf, sizeof(gchar), len, str->file_pointer);
144 else /* Text mode is the same for Unicode and regular files */
146 gchar *utf8 = convert_latin1_to_utf8(buf, len);
147 g_fprintf(str->file_pointer, "%s", utf8);
151 str->write_count += len;
154 g_warning("%s: Writing to this kind of stream unsupported.",
160 * glk_put_char_stream:
161 * @str: An output stream.
162 * @ch: A character in Latin-1 encoding.
164 * Prints one character @ch to the stream @str. It is illegal for @str to be
165 * %NULL, or an input-only stream.
168 glk_put_char_stream(strid_t str, unsigned char ch)
170 g_return_if_fail(str != NULL);
171 g_return_if_fail(str->file_mode != filemode_Read);
173 write_buffer_to_stream(str, (gchar *)&ch, 1);
177 * glk_put_string_stream:
178 * @str: An output stream.
179 * @s: A null-terminated string in Latin-1 encoding.
181 * Prints @s to the stream @str. It is illegal for @str to be %NULL, or an
185 glk_put_string_stream(strid_t str, char *s)
187 g_return_if_fail(str != NULL);
188 g_return_if_fail(str->file_mode != filemode_Read);
190 write_buffer_to_stream(str, (gchar *)s, strlen(s));
194 * glk_put_buffer_stream:
195 * @str: An output stream.
196 * @buf: An array of characters in Latin-1 encoding.
197 * @len: Length of @buf.
199 * Prints @buf to the stream @str. It is illegal for @str to be %NULL, or an
203 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
205 g_return_if_fail(str != NULL);
206 g_return_if_fail(str->file_mode != filemode_Read);
208 write_buffer_to_stream(str, (gchar *)buf, len);
213 **************** READING FUNCTIONS ********************************************
217 /* Internal function: Read one big-endian four-byte character from file fp and
218 return it as a Unicode code point, or -1 on EOF */
220 read_ucs4be_char_from_file(FILE *fp)
222 unsigned char readbuffer[4];
223 if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
226 readbuffer[0] << 24 |
227 readbuffer[1] << 16 |
232 /* Internal function: Read one UTF-8 character, which may be more than one byte,
233 from file fp and return it as a Unicode code point, or -1 on EOF */
235 read_utf8_char_from_file(FILE *fp)
237 gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
239 gunichar charresult = (gunichar)-2;
240 for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++)
245 readbuffer[foo] = (gchar)ch;
246 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
247 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
250 /* Silently return unknown characters as 0xFFFD, Replacement Character */
251 if(charresult == (gunichar)-1 || charresult == (gunichar)-2)
256 /* Internal function: Tell whether this code point is a Unicode newline. The
257 file pointer and eight-bit flag are included in case the newline is a CR
258 (U+000D). If the next character is LF (U+000A) then it also belongs to the
261 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
263 if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
266 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) :
267 read_ucs4be_char_from_file(fp);
269 fseek(fp, utf8? -1 : -4, SEEK_CUR);
276 * glk_get_char_stream:
277 * @str: An input stream.
279 * Reads one character from the stream @str. (There is no notion of a ``current
280 * input stream.'') It is illegal for @str to be %NULL, or an output-only
283 * The result will be between 0 and 255. As with all basic text functions, Glk
284 * assumes the Latin-1 encoding. If the end of the stream has been reached, the
285 * result will be -1. Note that high-bit characters (128..255) are
286 * <emphasis>not</emphasis> returned as negative numbers.
288 * If the stream contains Unicode data --- for example, if it was created with
289 * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then
290 * characters beyond 255 will be returned as 0x3F ("?").
292 * Returns: A character value between 0 and 255, or -1 on end of stream.
295 glk_get_char_stream(strid_t str)
297 g_return_val_if_fail(str != NULL, -1);
298 g_return_val_if_fail(str->file_mode == filemode_Read
299 || str->file_mode == filemode_ReadWrite, -1);
301 switch(str->stream_type)
303 case STREAM_TYPE_MEMORY:
306 if(!str->ubuffer || str->mark >= str->buflen)
308 glui32 ch = str->ubuffer[str->mark++];
310 return (ch > 0xFF)? 0x3F : ch;
314 if(!str->buffer || str->mark >= str->buflen)
316 char ch = str->buffer[str->mark++];
322 case STREAM_TYPE_FILE:
327 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
331 return (ch > 0xFF)? 0x3F : ch;
333 else /* Regular file */
335 int ch = fgetc(str->file_pointer);
343 else /* Text mode is the same for Unicode and regular files */
345 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
350 return (ch > 0xFF)? 0x3F : ch;
353 g_warning("%s: Reading from this kind of stream unsupported.",
360 * glk_get_buffer_stream:
361 * @str: An input stream.
362 * @buf: A buffer with space for at least @len characters.
363 * @len: The number of characters to read.
365 * Reads @len characters from @str, unless the end of stream is reached first.
366 * No terminal null is placed in the buffer.
368 * Returns: The number of characters actually read.
371 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
373 g_return_val_if_fail(str != NULL, 0);
374 g_return_val_if_fail(str->file_mode == filemode_Read
375 || str->file_mode == filemode_ReadWrite, 0);
376 g_return_val_if_fail(buf != NULL, 0);
378 switch(str->stream_type)
380 case STREAM_TYPE_MEMORY:
385 while(copycount < len && str->ubuffer
386 && str->mark < str->buflen)
388 glui32 ch = str->ubuffer[str->mark++];
389 buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
394 if(str->buffer) /* if not, copycount stays 0 */
395 copycount = min(len, str->buflen - str->mark);
396 memmove(buf, str->buffer + str->mark, copycount);
397 str->mark += copycount;
400 str->read_count += copycount;
403 case STREAM_TYPE_FILE:
406 if(str->unicode) /* Binary file with 4-byte characters */
408 /* Read len characters of 4 bytes each */
409 unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
410 size_t count = fread(readbuffer, sizeof(unsigned char),
411 4 * len, str->file_pointer);
412 /* If there was an incomplete character */
416 g_warning("%s: Incomplete character in binary Unicode "
420 str->read_count += count / 4;
422 for(foo = 0; foo < count; foo += 4)
424 glsi32 ch = readbuffer[foo] << 24
425 | readbuffer[foo + 1] << 16
426 | readbuffer[foo + 2] << 8
427 | readbuffer[foo + 3];
428 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
433 else /* Regular binary file */
435 size_t count = fread(buf, sizeof(char), len,
437 str->read_count += count;
441 else /* Text mode is the same for Unicode and regular files */
443 /* Do it character-by-character */
445 for(foo = 0; foo < len; foo++)
447 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
451 buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
456 g_warning("%s: Reading from this kind of stream unsupported.",
463 * glk_get_line_stream:
464 * @str: An input stream.
465 * @buf: A buffer with space for at least @len characters.
466 * @len: The number of characters to read, plus one.
468 * Reads characters from @str, until either @len - 1 characters have been read
469 * or a newline has been read. It then puts a terminal null ('\0') aracter on
470 * the end. It returns the number of characters actually read, including the
471 * newline (if there is one) but not including the terminal null.
473 * It is usually more efficient to read several characters at once with
474 * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
475 * glk_get_char_stream() several times.
477 * Returns: The number of characters actually read.
480 glk_get_line_stream(strid_t str, char *buf, glui32 len)
482 g_return_val_if_fail(str != NULL, 0);
483 g_return_val_if_fail(str->file_mode == filemode_Read
484 || str->file_mode == filemode_ReadWrite, 0);
485 g_return_val_if_fail(buf != NULL, 0);
487 switch(str->stream_type)
489 case STREAM_TYPE_MEMORY:
494 /* Do it character-by-character */
495 while(copycount < len - 1 && str->ubuffer
496 && str->mark < str->buflen)
498 glui32 ch = str->ubuffer[str->mark++];
499 /* Check for Unicode newline; slightly different than
501 if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028
504 buf[copycount++] = '\n';
509 if(str->ubuffer[str->mark] == 0x0A)
510 str->mark++; /* skip past next newline */
511 buf[copycount++] = '\n';
514 buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
516 buf[copycount] = '\0';
520 if(str->buffer) /* if not, copycount stays 0 */
521 copycount = min(len - 1, str->buflen - str->mark);
522 char *endptr = memccpy(buf, str->buffer + str->mark, '\n',
524 if(endptr) /* newline was found */
525 copycount = endptr - buf; /* Real copy count */
526 buf[copycount] = '\0';
527 str->mark += copycount;
530 str->read_count += copycount;
533 case STREAM_TYPE_FILE:
536 if(str->unicode) /* Binary file with 4-byte characters */
538 /* Do it character-by-character */
540 for(foo = 0; foo < len - 1; foo++)
543 read_ucs4be_char_from_file(str->file_pointer);
550 if(is_unicode_newline(ch, str->file_pointer, FALSE))
556 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
561 else /* Regular binary file */
563 fgets(buf, len, str->file_pointer);
564 str->read_count += strlen(buf);
568 else /* Text mode is the same for Unicode and regular files */
570 /* Do it character-by-character */
572 for(foo = 0; foo < len - 1; foo++)
574 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
581 if(is_unicode_newline(ch, str->file_pointer, TRUE))
587 buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
593 g_warning("%s: Reading from this kind of stream unsupported.",
601 **************** SEEKING FUNCTIONS ********************************************
606 * glk_stream_get_position:
607 * @str: A file or memory stream.
609 * Returns the position of the read/write mark in @str. For memory streams and
610 * binary file streams, this is exactly the number of characters read or written
611 * from the beginning of the stream (unless you have moved the mark with
612 * glk_stream_set_position().) For text file streams, matters are more
613 * ambiguous, since (for example) writing one byte to a text file may store more
614 * than one character in the platform's native encoding. You can only be sure
615 * that the position increases as you read or write to the file.
617 * Additional complication: for Latin-1 memory and file streams, a character is
618 * a byte. For Unicode memory and file streams (those created by
619 * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
620 * a 32-bit word. So in a binary Unicode file, positions are multiples of four
623 * Returns: position of the read/write mark in @str.
626 glk_stream_get_position(strid_t str)
628 g_return_val_if_fail(str != NULL, 0);
630 switch(str->stream_type)
632 case STREAM_TYPE_MEMORY:
634 case STREAM_TYPE_FILE:
635 return ftell(str->file_pointer);
637 g_warning("%s: Seeking not supported on this type of stream.",
644 * glk_stream_set_position:
645 * @str: A file or memory stream.
646 * @pos: The position to set the mark to, relative to @seekmode.
647 * @seekmode: One of #seekmode_Start, #seekmode_Current, or #seekmode_End.
649 * Sets the position of the read/write mark in @str. The position is controlled
650 * by @pos, and the meaning of @pos is controlled by @seekmode:
652 * <listitem>#seekmode_Start: @pos characters after the beginning of the file.
654 * <listitem>#seekmode_Current: @pos characters after the current position
655 * (moving backwards if @pos is negative.)</listitem>
656 * <listitem>#seekmode_End: @pos characters after the end of the file. (@pos
657 * should always be zero or negative, so that this will move backwards to a
658 * position within the file.</listitem>
660 * It is illegal to specify a position before the beginning or after the end of
663 * In binary files, the mark position is exact --- it corresponds with the
664 * number of characters you have read or written. In text files, this mapping
665 * can vary, because of linefeed conventions or other character-set
666 * approximations. glk_stream_set_position() and glk_stream_get_position()
667 * measure positions in the platform's native encoding --- after character
668 * cookery. Therefore, in a text stream, it is safest to use
669 * glk_stream_set_position() only to move to the beginning or end of a file, or
670 * to a position determined by glk_stream_get_position().
672 * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
673 * characters are 32-bit words, or four bytes each.
676 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
678 g_return_if_fail(str != NULL);
679 g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
680 g_return_if_fail(!(seekmode == seekmode_End || pos > 0));
682 switch(str->stream_type)
684 case STREAM_TYPE_MEMORY:
687 case seekmode_Start: str->mark = pos; break;
688 case seekmode_Current: str->mark += pos; break;
689 case seekmode_End: str->mark = str->buflen + pos; break;
691 g_assert_not_reached();
695 case STREAM_TYPE_FILE:
700 case seekmode_Start: whence = SEEK_SET; break;
701 case seekmode_Current: whence = SEEK_CUR; break;
702 case seekmode_End: whence = SEEK_END; break;
704 g_assert_not_reached();
707 fseek(str->file_pointer, pos, whence);
711 g_warning("%s: Seeking not supported on this type of stream.",