5 #include <glib/gstdio.h>
9 **************** WRITING FUNCTIONS ********************************************
13 /* Internal function: change illegal (control) characters in a string to a
14 placeholder character. Must free returned string afterwards. */
16 remove_latin1_control_characters(unsigned char *s, gsize len)
18 /* If len == 0, then return an empty string, not NULL */
22 gchar *retval = g_new0(gchar, len);
24 for(i = 0; i < len; i++)
25 if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) )
27 /* Our placeholder character is '?'; other options are possible,
28 like printing "0x7F" or something */
34 /* Internal function: convert a Latin-1 string to a UTF-8 string, replacing
35 Latin-1 control characters by a placeholder first. The UTF-8 string must be
36 freed afterwards. Returns NULL on error. */
38 convert_latin1_to_utf8(gchar *s, gsize len)
42 gchar *canonical = remove_latin1_control_characters( (unsigned char *)s,
44 utf8 = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
49 g_warning("Error during latin1->utf8 conversion: %s", error->message);
56 /* Internal function: write a UTF-8 string to a text grid window's text buffer. */
58 write_utf8_to_grid(winid_t win, gchar *s)
60 /* Number of characters to insert */
61 glong length = g_utf8_strlen(s, -1);
62 glong chars_left = length;
66 GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
67 GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
69 /* Get cursor position */
71 gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
72 /* Spaces available on this line */
73 gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
75 while(chars_left > available_space && !gtk_text_iter_is_end(&start))
77 GtkTextIter end = start;
78 gtk_text_iter_forward_to_line_end(&end);
79 gtk_text_buffer_delete(buffer, &start, &end);
80 gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), available_space);
81 chars_left -= available_space;
82 gtk_text_iter_forward_line(&start);
83 available_space = win->width;
85 if(!gtk_text_iter_is_end(&start))
87 GtkTextIter end = start;
88 gtk_text_iter_forward_chars(&end, chars_left);
89 gtk_text_buffer_delete(buffer, &start, &end);
90 gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), -1);
93 gtk_text_buffer_move_mark(buffer, cursor, &start);
98 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
100 write_utf8_to_window(winid_t win, gchar *s)
104 GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
107 gtk_text_buffer_get_end_iter(buffer, &iter);
108 gtk_text_buffer_insert(buffer, &iter, s, -1);
113 /* Internal function: write a Latin-1 buffer with length to a stream. */
115 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
119 case STREAM_TYPE_WINDOW:
120 /* Each window type has a different way of printing to it */
121 switch(str->window->type)
123 /* Printing to these windows' streams does nothing */
126 case wintype_Graphics:
127 str->write_count += len;
130 /* Text grid window */
131 case wintype_TextGrid:
133 gchar *utf8 = convert_latin1_to_utf8(buf, len);
136 /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
137 write_utf8_to_grid(str->window, utf8);
141 str->write_count += len;
144 /* Text buffer window */
145 case wintype_TextBuffer:
147 gchar *utf8 = convert_latin1_to_utf8(buf, len);
150 write_utf8_to_window(str->window, utf8);
154 str->write_count += len;
157 g_warning("%s: Writing to this kind of window unsupported.", __func__);
160 /* Now write the same buffer to the window's echo stream */
161 if(str->window->echo_stream != NULL)
162 write_buffer_to_stream(str->window->echo_stream, buf, len);
166 case STREAM_TYPE_MEMORY:
167 if(str->unicode && str->ubuffer)
170 while(str->mark < str->buflen && foo < len)
171 str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
173 if(!str->unicode && str->buffer)
175 int copycount = MIN(len, str->buflen - str->mark);
176 memmove(str->buffer + str->mark, buf, copycount);
177 str->mark += copycount;
180 str->write_count += len;
183 case STREAM_TYPE_FILE:
188 /* Convert to four-byte big-endian */
189 gchar *writebuffer = g_new0(gchar, len * 4);
191 for(i = 0; i < len; i++)
192 writebuffer[i * 4 + 3] = buf[i];
193 fwrite(writebuffer, sizeof(gchar), len * 4,
196 else /* Regular file */
198 fwrite(buf, sizeof(gchar), len, str->file_pointer);
201 else /* Text mode is the same for Unicode and regular files */
203 gchar *utf8 = convert_latin1_to_utf8(buf, len);
204 g_fprintf(str->file_pointer, "%s", utf8);
208 str->write_count += len;
211 g_warning("%s: Writing to this kind of stream unsupported.", __func__);
216 * glk_put_char_stream:
217 * @str: An output stream.
218 * @ch: A character in Latin-1 encoding.
220 * Prints one character @ch to the stream @str. It is illegal for @str to be
221 * %NULL, or an input-only stream.
224 glk_put_char_stream(strid_t str, unsigned char ch)
226 g_return_if_fail(str != NULL);
227 g_return_if_fail(str->file_mode != filemode_Read);
229 write_buffer_to_stream(str, (gchar *)&ch, 1);
233 * glk_put_string_stream:
234 * @str: An output stream.
235 * @s: A null-terminated string in Latin-1 encoding.
237 * Prints @s to the stream @str. It is illegal for @str to be %NULL, or an
241 glk_put_string_stream(strid_t str, char *s)
243 g_return_if_fail(str != NULL);
244 g_return_if_fail(str->file_mode != filemode_Read);
246 write_buffer_to_stream(str, (gchar *)s, strlen(s));
250 * glk_put_buffer_stream:
251 * @str: An output stream.
252 * @buf: An array of characters in Latin-1 encoding.
253 * @len: Length of @buf.
255 * Prints @buf to the stream @str. It is illegal for @str to be %NULL, or an
259 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
261 g_return_if_fail(str != NULL);
262 g_return_if_fail(str->file_mode != filemode_Read);
264 write_buffer_to_stream(str, (gchar *)buf, len);
269 **************** READING FUNCTIONS ********************************************
273 /* Internal function: Read one big-endian four-byte character from file fp and
274 return it as a Unicode code point, or -1 on EOF */
276 read_ucs4be_char_from_file(FILE *fp)
278 unsigned char readbuffer[4];
279 if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
282 readbuffer[0] << 24 |
283 readbuffer[1] << 16 |
288 /* Internal function: Read one UTF-8 character, which may be more than one byte,
289 from file fp and return it as a Unicode code point, or -1 on EOF */
291 read_utf8_char_from_file(FILE *fp)
293 gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
295 gunichar charresult = (gunichar)-2;
296 for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++)
301 readbuffer[foo] = (gchar)ch;
302 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
303 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
306 /* Silently return unknown characters as 0xFFFD, Replacement Character */
307 if(charresult == (gunichar)-1 || charresult == (gunichar)-2)
312 /* Internal function: Tell whether this code point is a Unicode newline. The
313 file pointer and eight-bit flag are included in case the newline is a CR
314 (U+000D). If the next character is LF (U+000A) then it also belongs to the
317 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
319 if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
322 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) :
323 read_ucs4be_char_from_file(fp);
325 fseek(fp, utf8? -1 : -4, SEEK_CUR);
332 * glk_get_char_stream:
333 * @str: An input stream.
335 * Reads one character from the stream @str. (There is no notion of a ``current
336 * input stream.'') It is illegal for @str to be %NULL, or an output-only
339 * The result will be between 0 and 255. As with all basic text functions, Glk
340 * assumes the Latin-1 encoding. If the end of the stream has been reached, the
341 * result will be -1. Note that high-bit characters (128..255) are
342 * <emphasis>not</emphasis> returned as negative numbers.
344 * If the stream contains Unicode data --- for example, if it was created with
345 * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then
346 * characters beyond 255 will be returned as 0x3F ("?").
348 * Returns: A character value between 0 and 255, or -1 on end of stream.
351 glk_get_char_stream(strid_t str)
353 g_return_val_if_fail(str != NULL, -1);
354 g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
358 case STREAM_TYPE_MEMORY:
361 if(!str->ubuffer || str->mark >= str->buflen)
363 glui32 ch = str->ubuffer[str->mark++];
365 return (ch > 0xFF)? 0x3F : ch;
369 if(!str->buffer || str->mark >= str->buflen)
371 char ch = str->buffer[str->mark++];
377 case STREAM_TYPE_FILE:
382 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
386 return (ch > 0xFF)? 0x3F : ch;
388 else /* Regular file */
390 int ch = fgetc(str->file_pointer);
398 else /* Text mode is the same for Unicode and regular files */
400 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
405 return (ch > 0xFF)? 0x3F : ch;
408 g_warning("%s: Reading from this kind of stream unsupported.", __func__);
414 * glk_get_buffer_stream:
415 * @str: An input stream.
416 * @buf: A buffer with space for at least @len characters.
417 * @len: The number of characters to read.
419 * Reads @len characters from @str, unless the end of stream is reached first.
420 * No terminal null is placed in the buffer.
422 * Returns: The number of characters actually read.
425 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
427 g_return_val_if_fail(str != NULL, 0);
428 g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
429 g_return_val_if_fail(buf != NULL, 0);
433 case STREAM_TYPE_MEMORY:
438 while(copycount < len && str->ubuffer && str->mark < str->buflen)
440 glui32 ch = str->ubuffer[str->mark++];
441 buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
446 if(str->buffer) /* if not, copycount stays 0 */
447 copycount = MIN(len, str->buflen - str->mark);
448 memmove(buf, str->buffer + str->mark, copycount);
449 str->mark += copycount;
452 str->read_count += copycount;
455 case STREAM_TYPE_FILE:
458 if(str->unicode) /* Binary file with 4-byte characters */
460 /* Read len characters of 4 bytes each */
461 unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
462 size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
463 /* If there was an incomplete character */
467 g_warning("%s: Incomplete character in binary Unicode file.", __func__);
470 str->read_count += count / 4;
472 for(foo = 0; foo < count; foo += 4)
474 glsi32 ch = readbuffer[foo] << 24
475 | readbuffer[foo + 1] << 16
476 | readbuffer[foo + 2] << 8
477 | readbuffer[foo + 3];
478 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
483 else /* Regular binary file */
485 size_t count = fread(buf, sizeof(char), len, str->file_pointer);
486 str->read_count += count;
490 else /* Text mode is the same for Unicode and regular files */
492 /* Do it character-by-character */
494 for(foo = 0; foo < len; foo++)
496 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
500 buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
505 g_warning("%s: Reading from this kind of stream unsupported.", __func__);
511 * glk_get_line_stream:
512 * @str: An input stream.
513 * @buf: A buffer with space for at least @len characters.
514 * @len: The number of characters to read, plus one.
516 * Reads characters from @str, until either @len - 1 characters have been read
517 * or a newline has been read. It then puts a terminal null ('\0') aracter on
518 * the end. It returns the number of characters actually read, including the
519 * newline (if there is one) but not including the terminal null.
521 * It is usually more efficient to read several characters at once with
522 * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
523 * glk_get_char_stream() several times.
525 * Returns: The number of characters actually read.
528 glk_get_line_stream(strid_t str, char *buf, glui32 len)
530 g_return_val_if_fail(str != NULL, 0);
531 g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
532 g_return_val_if_fail(buf != NULL, 0);
536 case STREAM_TYPE_MEMORY:
541 /* Do it character-by-character */
542 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen)
544 glui32 ch = str->ubuffer[str->mark++];
545 /* Check for Unicode newline; slightly different than
547 if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
549 buf[copycount++] = '\n';
554 if(str->ubuffer[str->mark] == 0x0A)
555 str->mark++; /* skip past next newline */
556 buf[copycount++] = '\n';
559 buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
561 buf[copycount] = '\0';
565 if(str->buffer) /* if not, copycount stays 0 */
566 copycount = MIN(len - 1, str->buflen - str->mark);
567 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
568 if(endptr) /* newline was found */
569 copycount = endptr - buf; /* Real copy count */
570 buf[copycount] = '\0';
571 str->mark += copycount;
574 str->read_count += copycount;
577 case STREAM_TYPE_FILE:
580 if(str->unicode) /* Binary file with 4-byte characters */
582 /* Do it character-by-character */
584 for(foo = 0; foo < len - 1; foo++)
586 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
593 if(is_unicode_newline(ch, str->file_pointer, FALSE))
599 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
604 else /* Regular binary file */
606 fgets(buf, len, str->file_pointer);
607 str->read_count += strlen(buf);
611 else /* Text mode is the same for Unicode and regular files */
613 /* Do it character-by-character */
615 for(foo = 0; foo < len - 1; foo++)
617 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
624 if(is_unicode_newline(ch, str->file_pointer, TRUE))
630 buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
636 g_warning("%s: Reading from this kind of stream unsupported.", __func__);
643 **************** SEEKING FUNCTIONS ********************************************
648 * glk_stream_get_position:
649 * @str: A file or memory stream.
651 * Returns the position of the read/write mark in @str. For memory streams and
652 * binary file streams, this is exactly the number of characters read or written
653 * from the beginning of the stream (unless you have moved the mark with
654 * glk_stream_set_position().) For text file streams, matters are more
655 * ambiguous, since (for example) writing one byte to a text file may store more
656 * than one character in the platform's native encoding. You can only be sure
657 * that the position increases as you read or write to the file.
659 * Additional complication: for Latin-1 memory and file streams, a character is
660 * a byte. For Unicode memory and file streams (those created by
661 * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
662 * a 32-bit word. So in a binary Unicode file, positions are multiples of four
665 * Returns: position of the read/write mark in @str.
668 glk_stream_get_position(strid_t str)
670 g_return_val_if_fail(str != NULL, 0);
674 case STREAM_TYPE_MEMORY:
676 case STREAM_TYPE_FILE:
677 return ftell(str->file_pointer);
679 g_warning("%s: Seeking not supported on this type of stream.",
686 * glk_stream_set_position:
687 * @str: A file or memory stream.
688 * @pos: The position to set the mark to, relative to @seekmode.
689 * @seekmode: One of #seekmode_Start, #seekmode_Current, or #seekmode_End.
691 * Sets the position of the read/write mark in @str. The position is controlled
692 * by @pos, and the meaning of @pos is controlled by @seekmode:
694 * <listitem>#seekmode_Start: @pos characters after the beginning of the file.
696 * <listitem>#seekmode_Current: @pos characters after the current position
697 * (moving backwards if @pos is negative.)</listitem>
698 * <listitem>#seekmode_End: @pos characters after the end of the file. (@pos
699 * should always be zero or negative, so that this will move backwards to a
700 * position within the file.</listitem>
702 * It is illegal to specify a position before the beginning or after the end of
705 * In binary files, the mark position is exact --- it corresponds with the
706 * number of characters you have read or written. In text files, this mapping
707 * can vary, because of linefeed conventions or other character-set
708 * approximations. glk_stream_set_position() and glk_stream_get_position()
709 * measure positions in the platform's native encoding --- after character
710 * cookery. Therefore, in a text stream, it is safest to use
711 * glk_stream_set_position() only to move to the beginning or end of a file, or
712 * to a position determined by glk_stream_get_position().
714 * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
715 * characters are 32-bit words, or four bytes each.
718 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
720 g_return_if_fail(str != NULL);
721 g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
722 g_return_if_fail(!(seekmode == seekmode_End || pos > 0));
726 case STREAM_TYPE_MEMORY:
729 case seekmode_Start: str->mark = pos; break;
730 case seekmode_Current: str->mark += pos; break;
731 case seekmode_End: str->mark = str->buflen + pos; break;
733 g_assert_not_reached();
737 case STREAM_TYPE_FILE:
742 case seekmode_Start: whence = SEEK_SET; break;
743 case seekmode_Current: whence = SEEK_CUR; break;
744 case seekmode_End: whence = SEEK_END; break;
746 g_assert_not_reached();
749 fseek(str->file_pointer, pos, whence);
753 g_warning("%s: Seeking not supported on this type of stream.", __func__);