Oops, undoing accidentally committed parts of [259]
[rodin/chimara.git] / libchimara / strio.c
1 #include "charset.h"
2 #include "magic.h"
3 #include "stream.h"
4 #include <errno.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <glib.h>
8 #include <glib/gstdio.h>
9
10 /*
11  *
12  **************** WRITING FUNCTIONS ********************************************
13  *
14  */
15
16 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
17 static void
18 write_utf8_to_window_buffer(winid_t win, gchar *s)
19 {
20         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
21         {
22                 ILLEGAL("Tried to print to a text buffer window with line input pending.");
23                 return;
24         }
25
26         // Write to the buffer  
27         g_string_append(win->buffer, s);
28 }
29         
30 /* Internal function: flush a window's text buffer to the screen. */
31 void
32 flush_window_buffer(winid_t win)
33 {
34         if(win->type != wintype_TextBuffer && win->type != wintype_TextGrid)
35                 return;
36
37         if(win->buffer->len == 0)
38                 return;
39
40         gdk_threads_enter();
41
42         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
43
44         switch(win->type) {
45         case wintype_TextBuffer:
46         {
47                 GtkTextIter iter;
48                 gtk_text_buffer_get_end_iter(buffer, &iter);
49
50                 GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
51                 GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
52
53                 if(win->window_stream->hyperlink_mode) {
54                         GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
55                         GtkTextTag *link_tag = win->current_hyperlink->tag;
56                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, style_tag, link_style_tag, link_tag, NULL);
57                 } else {
58                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, style_tag, NULL);
59                 }
60
61                 ChimaraGlk *glk = CHIMARA_GLK(gtk_widget_get_ancestor(win->widget, CHIMARA_TYPE_GLK));
62                 g_assert(glk);
63                 g_signal_emit_by_name(glk, "text-buffer-output", win->rock, win->buffer->str);
64
65         }
66                 break;
67
68         case wintype_TextGrid:
69         {
70                 /* Number of characters to insert */
71                 glong length = win->buffer->len;
72                 glong chars_left = length;
73                 
74                 GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
75                 
76                 /* Get cursor position */
77                 GtkTextIter start;
78                 gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
79                 /* Spaces available on this line */
80                 gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
81                 
82                 while(chars_left > available_space && !gtk_text_iter_is_end(&start))
83                 {
84                         GtkTextIter end = start;
85                         gtk_text_iter_forward_to_line_end(&end);
86                         gtk_text_buffer_delete(buffer, &start, &end);
87
88                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
89                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
90
91                         if(win->window_stream->hyperlink_mode) {
92                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
93                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
94                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, style_tag, link_style_tag, link_tag, NULL);
95                         } else {
96                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, style_tag, NULL);
97                         }
98
99                         chars_left -= available_space;
100                         gtk_text_iter_forward_line(&start);
101                         available_space = win->width;
102                 }
103                 if(!gtk_text_iter_is_end(&start))
104                 {
105                         GtkTextIter end = start;
106                         gtk_text_iter_forward_chars(&end, chars_left);
107                         gtk_text_buffer_delete(buffer, &start, &end);
108
109                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
110                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
111
112                         if(win->window_stream->hyperlink_mode) {
113                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
114                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
115                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, style_tag, link_style_tag, link_tag, NULL);
116                         } else {
117                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, style_tag, NULL);
118                         }
119                 }
120                 
121                 gtk_text_buffer_move_mark(buffer, cursor, &start);
122         }
123                 break;
124         }
125
126         gdk_threads_leave();
127
128         g_string_truncate(win->buffer, 0);
129 }
130
131 /* Internal function: write a Latin-1 buffer with length to a stream. */
132 static void
133 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
134 {
135         switch(str->type)
136         {
137                 case STREAM_TYPE_WINDOW:
138                         /* Each window type has a different way of printing to it */
139                         switch(str->window->type)
140                         {
141                                 /* Printing to these windows' streams does nothing */
142                                 case wintype_Blank:
143                                 case wintype_Pair:
144                                 case wintype_Graphics:
145                                         str->write_count += len;
146                                         break;
147                                         
148                             /* Text grid/buffer windows */
149                             case wintype_TextGrid:
150                                 case wintype_TextBuffer:
151                             {
152                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
153                                 if(utf8 != NULL) {
154                                                 write_utf8_to_window_buffer(str->window, utf8);
155                                                 g_free(utf8);
156                                         }
157                                 }       
158                                         str->write_count += len;
159                                         break;
160                                 default:
161                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
162                         }
163                         
164                         /* Now write the same buffer to the window's echo stream */
165                         if(str->window->echo_stream != NULL)
166                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
167                         
168                         break;
169                         
170                 case STREAM_TYPE_MEMORY:
171                         if(str->unicode && str->ubuffer)
172                         {
173                                 int foo = 0;
174                                 while(str->mark < str->buflen && foo < len)
175                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
176                         }
177                         if(!str->unicode && str->buffer)
178                         {
179                                 int copycount = MIN(len, str->buflen - str->mark);
180                                 memmove(str->buffer + str->mark, buf, copycount);
181                                 str->mark += copycount;
182                         }
183
184                         str->write_count += len;
185                         break;
186                         
187                 case STREAM_TYPE_FILE:
188                         if(str->binary) 
189                         {
190                                 if(str->unicode) 
191                                 {
192                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
193                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
194                                         g_free(writebuffer);
195                                 } 
196                                 else /* Regular file */
197                                 {
198                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
199                                 }
200                         }
201                         else /* Text mode is the same for Unicode and regular files */
202                         {
203                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
204                                 if(utf8 != NULL)
205                                 {
206                                         g_fprintf(str->file_pointer, "%s", utf8);
207                                         g_free(utf8);
208                                 }
209                         }
210                         
211                         str->write_count += len;
212                         break;
213                 default:
214                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
215         }
216 }
217
218 /* Internal function: write a Unicode buffer with length to a stream. */
219 static void
220 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
221 {
222         switch(str->type)
223         {
224                 case STREAM_TYPE_WINDOW:
225                         /* Each window type has a different way of printing to it */
226                         switch(str->window->type)
227                         {
228                                 /* Printing to these windows' streams does nothing */
229                                 case wintype_Blank:
230                                 case wintype_Pair:
231                                 case wintype_Graphics:
232                                         str->write_count += len;
233                                         break;
234                                         
235                             /* Text grid/buffer windows */
236                             case wintype_TextGrid:
237                             case wintype_TextBuffer:
238                             {
239                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
240                                 if(utf8 != NULL) {
241                                                 write_utf8_to_window_buffer(str->window, utf8);
242                                                 g_free(utf8);
243                                         }
244                                 }       
245                                         str->write_count += len;
246                                         break;
247                                 default:
248                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
249                         }
250                         
251                         /* Now write the same buffer to the window's echo stream */
252                         if(str->window->echo_stream != NULL)
253                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
254                         
255                         break;
256                         
257                 case STREAM_TYPE_MEMORY:
258                         if(str->unicode && str->ubuffer)
259                         {
260                                 int copycount = MIN(len, str->buflen - str->mark);
261                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
262                                 str->mark += copycount;
263                         }
264                         if(!str->unicode && str->buffer)
265                         {
266                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
267                                 int copycount = MIN(len, str->buflen - str->mark);
268                                 memmove(str->buffer + str->mark, latin1, copycount);
269                                 g_free(latin1);
270                                 str->mark += copycount;
271                         }
272
273                         str->write_count += len;
274                         break;
275                         
276                 case STREAM_TYPE_FILE:
277                         if(str->binary) 
278                         {
279                                 if(str->unicode) 
280                                 {
281                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
282                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
283                                         g_free(writebuffer);
284                                 } 
285                                 else /* Regular file */
286                                 {
287                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
288                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
289                                         g_free(latin1);
290                                 }
291                         }
292                         else /* Text mode is the same for Unicode and regular files */
293                         {
294                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
295                                 if(utf8 != NULL) 
296                                 {
297                                         g_fprintf(str->file_pointer, "%s", utf8);
298                                         g_free(utf8);
299                                 }
300                         }
301                         
302                         str->write_count += len;
303                         break;
304                 default:
305                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
306         }
307 }
308
309 /**
310  * glk_put_char_stream:
311  * @str: An output stream.
312  * @ch: A character in Latin-1 encoding.
313  *
314  * The same as glk_put_char(), except that you specify a stream @str to print 
315  * to, instead of using the current stream. It is illegal for @str to be %NULL,
316  * or an input-only stream.
317  */
318 void
319 glk_put_char_stream(strid_t str, unsigned char ch)
320 {
321         VALID_STREAM(str, return);
322         g_return_if_fail(str->file_mode != filemode_Read);
323         
324         write_buffer_to_stream(str, (gchar *)&ch, 1);
325 }
326
327 /**
328  * glk_put_char_stream_uni:
329  * @str: An output stream.
330  * @ch: A Unicode code point.
331  *
332  * The same as glk_put_char_uni(), except that you specify a stream @str to
333  * print to, instead of using the current stream. It is illegal for @str to be 
334  * %NULL, or an input-only stream.
335  */
336 void
337 glk_put_char_stream_uni(strid_t str, glui32 ch)
338 {
339         VALID_STREAM(str, return);
340         g_return_if_fail(str->file_mode != filemode_Read);
341         
342         write_buffer_to_stream_uni(str, &ch, 1);
343 }
344
345 /**
346  * glk_put_string_stream:
347  * @str: An output stream.
348  * @s: A null-terminated string in Latin-1 encoding.
349  *
350  * The same as glk_put_string(), except that you specify a stream @str to print 
351  * to, instead of using the current stream. It is illegal for @str to be %NULL,
352  * or an input-only stream.
353  */
354 void
355 glk_put_string_stream(strid_t str, char *s)
356 {
357         VALID_STREAM(str, return);
358         if(*s == 0)
359                 return;
360
361         g_return_if_fail(str->file_mode != filemode_Read);
362
363         write_buffer_to_stream(str, s, strlen(s));
364 }
365
366 /**
367  * glk_put_string_stream_uni:
368  * @str: An output stream.
369  * @s: A null-terminated array of Unicode code points.
370  *
371  * The same as glk_put_string_uni(), except that you specify a stream @str to
372  * print to, instead of using the current stream. It is illegal for @str to be 
373  * %NULL, or an input-only stream.
374  */
375 void
376 glk_put_string_stream_uni(strid_t str, glui32 *s)
377 {
378         VALID_STREAM(str, return);
379         if(*s == 0)
380                 return;
381
382         g_return_if_fail(str->file_mode != filemode_Read);
383         
384         /* An impromptu strlen() for glui32 arrays */
385         glong len = 0;
386         glui32 *ptr = s;
387         while(*ptr++)
388                 len++;
389         write_buffer_to_stream_uni(str, s, len);
390 }
391
392 /**
393  * glk_put_buffer_stream:
394  * @str: An output stream.
395  * @buf: An array of characters in Latin-1 encoding.
396  * @len: Length of @buf.
397  *
398  * The same as glk_put_buffer(), except that you specify a stream @str to print 
399  * to, instead of using the current stream. It is illegal for @str to be %NULL,
400  * or an input-only stream.
401  */
402 void
403 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
404 {
405         VALID_STREAM(str, return);
406         if(len == 0)
407                 return;
408
409         g_return_if_fail(str->file_mode != filemode_Read);
410         
411         write_buffer_to_stream(str, buf, len);
412 }
413
414 /**
415  * glk_put_buffer_stream_uni:
416  * @str: An output stream.
417  * @buf: An array of Unicode code points.
418  * @len: Length of @buf.
419  *
420  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
421  * print to, instead of using the current stream. It is illegal for @str to be 
422  * %NULL, or an input-only stream.
423  */
424 void
425 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
426 {
427         VALID_STREAM(str, return);
428         if(len == 0)
429                 return;
430
431         g_return_if_fail(str->file_mode != filemode_Read);
432         
433         write_buffer_to_stream_uni(str, buf, len);
434 }
435
436 /*
437  *
438  **************** READING FUNCTIONS ********************************************
439  *
440  */
441
442 /* Internal function: Read one big-endian four-byte character from file fp and
443 return it as a Unicode code point, or -1 on EOF */
444 static glsi32
445 read_ucs4be_char_from_file(FILE *fp)
446 {
447         unsigned char readbuffer[4];
448         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
449                 return -1; /* EOF */
450         return
451                 readbuffer[0] << 24 | 
452                 readbuffer[1] << 16 | 
453                 readbuffer[2] << 8  | 
454                 readbuffer[3];
455 }
456
457 /* Internal function: Read one UTF-8 character, which may be more than one byte,
458 from file fp and return it as a Unicode code point, or -1 on EOF */
459 static glsi32
460 read_utf8_char_from_file(FILE *fp)
461 {
462         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
463         int foo;
464         gunichar charresult = (gunichar)-2;
465         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
466         {
467                 int ch = fgetc(fp);
468                 if(ch == EOF)
469                         return -1;
470                 readbuffer[foo] = (gchar)ch;
471                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
472                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
473                 point otherwise */
474         }
475         /* Silently return unknown characters as 0xFFFD, Replacement Character */
476         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
477                 return 0xFFFD;
478         return charresult;
479 }
480
481 /* Internal function: Tell whether this code point is a Unicode newline. The
482 file pointer and eight-bit flag are included in case the newline is a CR 
483 (U+000D). If the next character is LF (U+000A) then it also belongs to the
484 newline. */
485 static gboolean
486 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
487 {
488         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
489                 return TRUE;
490         if(ch == 0x0D) {
491                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
492                         read_ucs4be_char_from_file(fp);
493                 if(ch2 != 0x0A)
494                         if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1);
495                                 WARNING_S("Seek failed on stream", g_strerror(errno) );
496                 return TRUE;
497         }
498         return FALSE;
499 }
500
501 /* Internal function: Read one character from a stream. Returns a value which
502  can be returned unchanged by glk_get_char_stream_uni(), but 
503  glk_get_char_stream() must replace high values by the placeholder character. */
504 static glsi32
505 get_char_stream_common(strid_t str)
506 {
507         switch(str->type)
508         {
509                 case STREAM_TYPE_MEMORY:
510                         if(str->unicode)
511                         {
512                                 if(!str->ubuffer || str->mark >= str->buflen)
513                                         return -1;
514                                 glui32 ch = str->ubuffer[str->mark++];
515                                 str->read_count++;
516                                 return ch;
517                         }
518                         else
519                         {
520                                 if(!str->buffer || str->mark >= str->buflen)
521                                         return -1;
522                                 unsigned char ch = str->buffer[str->mark++];
523                                 str->read_count++;
524                                 return ch;
525                         }
526                         break;
527                         
528                 case STREAM_TYPE_FILE:
529                         if(str->binary) 
530                         {
531                                 if(str->unicode) 
532                                 {
533                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
534                                         if(ch == -1)
535                                                 return -1;
536                                         str->read_count++;
537                                         return ch;
538                                 }
539                                 else /* Regular file */
540                                 {
541                                         int ch = fgetc(str->file_pointer);
542                                         if(ch == EOF)
543                                                 return -1;
544                                         
545                                         str->read_count++;
546                                         return ch;
547                                 }
548                         }
549                         else /* Text mode is the same for Unicode and regular files */
550                         {
551                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
552                                 if(ch == -1)
553                                         return -1;
554                                         
555                                 str->read_count++;
556                                 return ch;
557                         }
558                 default:
559                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
560                         return -1;
561         }
562 }
563
564 /**
565  * glk_get_char_stream:
566  * @str: An input stream.
567  *
568  * Reads one character from the stream @str. (There is no notion of a
569  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
570  * an output-only stream.
571  *
572  * The result will be between 0 and 255. As with all basic text functions, Glk
573  * assumes the Latin-1 encoding. See <link 
574  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
575  * of the stream has been reached, the result will be -1. 
576  *
577  * <note><para>
578  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
579  *   returned as negative numbers.
580  * </para></note>
581  *
582  * If the stream contains Unicode data &mdash; for example, if it was created
583  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
584  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
585  *
586  * It is usually more efficient to read several characters at once with
587  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
588  * glk_get_char_stream() several times.
589  *
590  * Returns: A character value between 0 and 255, or -1 on end of stream.
591  */
592 glsi32
593 glk_get_char_stream(strid_t str)
594 {
595         VALID_STREAM(str, return -1);
596         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
597         
598         glsi32 ch = get_char_stream_common(str);
599         return (ch > 0xFF)? PLACEHOLDER : ch;
600 }
601
602 /**
603  * glk_get_char_stream_uni:
604  * @str: An input stream.
605  *
606  * Reads one character from the stream @str. The result will be between 0 and 
607  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
608  *
609  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
610  */
611 glsi32
612 glk_get_char_stream_uni(strid_t str)
613 {
614         VALID_STREAM(str, return -1);
615         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
616         
617         return get_char_stream_common(str);
618 }
619
620 /**
621  * glk_get_buffer_stream:
622  * @str: An input stream.
623  * @buf: A buffer with space for at least @len characters.
624  * @len: The number of characters to read.
625  *
626  * Reads @len characters from @str, unless the end of stream is reached first.
627  * No terminal null is placed in the buffer.
628  *
629  * Returns: The number of characters actually read.
630  */
631 glui32
632 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
633 {
634         VALID_STREAM(str, return 0);
635         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
636         g_return_val_if_fail(buf != NULL, 0);
637         
638         switch(str->type)
639         {
640                 case STREAM_TYPE_MEMORY:
641                 {
642                         int copycount = 0;
643                         if(str->unicode)
644                         {
645                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
646                                 {
647                                         glui32 ch = str->ubuffer[str->mark++];
648                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
649                                 }
650                         }
651                         else
652                         {
653                                 if(str->buffer) /* if not, copycount stays 0 */
654                                         copycount = MIN(len, str->buflen - str->mark);
655                                 memmove(buf, str->buffer + str->mark, copycount);
656                                 str->mark += copycount;
657                         }
658
659                         str->read_count += copycount;           
660                         return copycount;
661                 }       
662                 case STREAM_TYPE_FILE:
663                         if(str->binary) 
664                         {
665                                 if(str->unicode) /* Binary file with 4-byte characters */
666                                 {
667                                         /* Read len characters of 4 bytes each */
668                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
669                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
670                                         /* If there was an incomplete character */
671                                         if(count % 4 != 0) 
672                                         {
673                                                 count -= count % 4;
674                                                 WARNING("Incomplete character in binary Unicode file");
675                                         }
676                                         
677                                         int foo;
678                                         for(foo = 0; foo < count; foo += 4)
679                                         {
680                                                 glsi32 ch = readbuffer[foo] << 24
681                                                         | readbuffer[foo + 1] << 16
682                                                         | readbuffer[foo + 2] << 8
683                                                         | readbuffer[foo + 3];
684                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
685                                         }
686                                         g_free(readbuffer);
687                                         str->read_count += count / 4;
688                                         return count / 4;
689                                 }
690                                 else /* Regular binary file */
691                                 {
692                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
693                                         str->read_count += count;
694                                         return count;
695                                 }
696                         }
697                         else /* Text mode is the same for Unicode and regular files */
698                         {
699                                 /* Do it character-by-character */
700                                 int foo;
701                                 for(foo = 0; foo < len; foo++)
702                                 {
703                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
704                                         if(ch == -1)
705                                                 break;
706                                         str->read_count++;
707                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
708                                 }
709                                 return foo;
710                         }
711                 default:
712                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
713                         return 0;
714         }
715 }
716
717 /**
718  * glk_get_buffer_stream_uni:
719  * @str: An input stream.
720  * @buf: A buffer with space for at least @len Unicode code points.
721  * @len: The number of characters to read.
722  *
723  * Reads @len Unicode characters from @str, unless the end of stream is reached 
724  * first. No terminal null is placed in the buffer.
725  *
726  * Returns: The number of Unicode characters actually read.
727  */
728 glui32
729 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
730 {
731         VALID_STREAM(str, return 0);
732         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
733         g_return_val_if_fail(buf != NULL, 0);
734         
735         switch(str->type)
736         {
737                 case STREAM_TYPE_MEMORY:
738                 {
739                         int copycount = 0;
740                         if(str->unicode)
741                         {
742                                 if(str->ubuffer) /* if not, copycount stays 0 */
743                                         copycount = MIN(len, str->buflen - str->mark);
744                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
745                                 str->mark += copycount;
746                         }
747                         else
748                         {
749                                 while(copycount < len && str->buffer && str->mark < str->buflen)
750                                 {
751                                         unsigned char ch = str->buffer[str->mark++];
752                                         buf[copycount++] = ch;
753                                 }
754                         }
755
756                         str->read_count += copycount;           
757                         return copycount;
758                 }       
759                 case STREAM_TYPE_FILE:
760                         if(str->binary) 
761                         {
762                                 if(str->unicode) /* Binary file with 4-byte characters */
763                                 {
764                                         /* Read len characters of 4 bytes each */
765                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
766                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
767                                         /* If there was an incomplete character */
768                                         if(count % 4 != 0) 
769                                         {
770                                                 count -= count % 4;
771                                                 WARNING("Incomplete character in binary Unicode file");
772                                         }
773                                         
774                                         int foo;
775                                         for(foo = 0; foo < count; foo += 4)
776                                                 buf[foo / 4] = readbuffer[foo] << 24
777                                                         | readbuffer[foo + 1] << 16
778                                                         | readbuffer[foo + 2] << 8
779                                                         | readbuffer[foo + 3];
780                                         g_free(readbuffer);
781                                         str->read_count += count / 4;
782                                         return count / 4;
783                                 }
784                                 else /* Regular binary file */
785                                 {
786                                         unsigned char *readbuffer = g_new0(unsigned char, len);
787                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
788                                         int foo;
789                                         for(foo = 0; foo < count; foo++)
790                                                 buf[foo] = readbuffer[foo];
791                                         g_free(readbuffer);
792                                         str->read_count += count;
793                                         return count;
794                                 }
795                         }
796                         else /* Text mode is the same for Unicode and regular files */
797                         {
798                                 /* Do it character-by-character */
799                                 int foo;
800                                 for(foo = 0; foo < len; foo++)
801                                 {
802                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
803                                         if(ch == -1)
804                                                 break;
805                                         str->read_count++;
806                                         buf[foo] = ch;
807                                 }
808                                 return foo;
809                         }
810                 default:
811                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
812                         return 0;
813         }
814 }
815
816 /**
817  * glk_get_line_stream:
818  * @str: An input stream.
819  * @buf: A buffer with space for at least @len characters.
820  * @len: The number of characters to read, plus one.
821  *
822  * Reads characters from @str, until either 
823  * <inlineequation>
824  *   <alt>@len - 1</alt>
825  *   <mathphrase>@len - 1</mathphrase>
826  * </inlineequation>
827  * characters have been read or a newline has been read. It then puts a
828  * terminal null (<code>'\0'</code>) aracter on
829  * the end. It returns the number of characters actually read, including the
830  * newline (if there is one) but not including the terminal null.
831  *
832  * Returns: The number of characters actually read.
833  */
834 glui32
835 glk_get_line_stream(strid_t str, char *buf, glui32 len)
836 {
837         VALID_STREAM(str, return 0);
838         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
839         g_return_val_if_fail(buf != NULL, 0);
840
841         switch(str->type)
842         {
843                 case STREAM_TYPE_MEMORY:
844                 {
845                         int copycount = 0;
846                         if(str->unicode)
847                         {
848                                 /* Do it character-by-character */
849                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
850                                 {
851                                         glui32 ch = str->ubuffer[str->mark++];
852                                         /* Check for Unicode newline; slightly different than
853                                         in file streams */
854                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
855                                         {
856                                                 buf[copycount++] = '\n';
857                                                 break;
858                                         }
859                                         if(ch == 0x0D)
860                                         {
861                                                 if(str->ubuffer[str->mark] == 0x0A)
862                                                         str->mark++; /* skip past next newline */
863                                                 buf[copycount++] = '\n';
864                                                 break;
865                                         }
866                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
867                                 }
868                                 buf[copycount] = '\0';
869                         }
870                         else
871                         {
872                                 if(str->buffer) /* if not, copycount stays 0 */
873                                         copycount = MIN(len - 1, str->buflen - str->mark);
874                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
875                                 if(endptr) /* newline was found */
876                                         copycount = endptr - buf; /* Real copy count */
877                                 buf[copycount] = '\0';
878                                 str->mark += copycount;
879                         }
880                         
881                         str->read_count += copycount;
882                         return copycount;
883                 }       
884                 case STREAM_TYPE_FILE:
885                         if(str->binary) 
886                         {
887                                 if(str->unicode) /* Binary file with 4-byte characters */
888                                 {
889                                         /* Do it character-by-character */
890                                         int foo;
891                                         for(foo = 0; foo < len - 1; foo++)
892                                         {
893                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
894                                                 if(ch == -1) 
895                                                 {
896                                                         buf[foo] = '\0';
897                                                         return foo - 1;
898                                                 }
899                                                 str->read_count++;
900                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
901                                                 {
902                                                         buf[foo] = '\n';
903                                                         buf[foo + 1] = '\0';
904                                                         return foo;
905                                                 }
906                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
907                                         }
908                                         buf[len] = '\0';
909                                         return foo;
910                                 }
911                                 else /* Regular binary file */
912                                 {
913                                         if( !fgets(buf, len, str->file_pointer) ) {
914                                                 *buf = 0;
915                                                 return 0;
916                                         }
917
918                                         int nread = strlen(buf);
919                                         str->read_count += nread;
920                                         return nread;
921                                 }
922                         }
923                         else /* Text mode is the same for Unicode and regular files */
924                         {
925                                 /* Do it character-by-character */
926                                 int foo;
927                                 for(foo = 0; foo < len - 1; foo++)
928                                 {
929                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
930                                         if(ch == -1)
931                                         {
932                                                 buf[foo] = '\0';
933                                                 return foo - 1;
934                                         }
935                                         str->read_count++;
936                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
937                                         {
938                                                 buf[foo] = '\n';
939                                                 buf[foo + 1] = '\0';
940                                                 return foo;
941                                         }
942                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
943                                 }
944                                 buf[len] = '\0';
945                                 return foo;
946                         }
947                 default:
948                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
949                         return 0;
950         }
951 }
952
953 /**
954  * glk_get_line_stream_uni:
955  * @str: An input stream.
956  * @buf: A buffer with space for at least @len Unicode code points.
957  * @len: The number of characters to read, plus one.
958  *
959  * Reads Unicode characters from @str, until either 
960  * <inlineequation>
961  *   <alt>@len - 1</alt>
962  *   <mathphrase>@len - 1</mathphrase>
963  * </inlineequation> 
964  * Unicode characters have been read or a newline has been read. It then puts a
965  * terminal null (a zero value) on the end.
966  *
967  * Returns: The number of characters actually read, including the newline (if
968  * there is one) but not including the terminal null.
969  */
970 glui32
971 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
972 {
973         VALID_STREAM(str, return 0);
974         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
975         g_return_val_if_fail(buf != NULL, 0);
976
977         switch(str->type)
978         {
979                 case STREAM_TYPE_MEMORY:
980                 {
981                         int copycount = 0;
982                         if(str->unicode)
983                         {
984                                 /* Do it character-by-character */
985                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
986                                 {
987                                         glui32 ch = str->ubuffer[str->mark++];
988                                         /* Check for Unicode newline; slightly different than
989                                         in file streams */
990                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
991                                         {
992                                                 buf[copycount++] = '\n';
993                                                 break;
994                                         }
995                                         if(ch == 0x0D)
996                                         {
997                                                 if(str->ubuffer[str->mark] == 0x0A)
998                                                         str->mark++; /* skip past next newline */
999                                                 buf[copycount++] = '\n';
1000                                                 break;
1001                                         }
1002                                         buf[copycount++] = ch;
1003                                 }
1004                                 buf[copycount] = '\0';
1005                         }
1006                         else
1007                         {
1008                                 /* No recourse to memccpy(), so do it character-by-character */
1009                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
1010                                 {
1011                                         gchar ch = str->buffer[str->mark++];
1012                                         /* Check for newline */
1013                                         if(ch == '\n') /* Also check for \r and \r\n? */
1014                                         {
1015                                                 buf[copycount++] = '\n';
1016                                                 break;
1017                                         }
1018                                         buf[copycount++] = (unsigned char)ch;
1019                                 }
1020                                 buf[copycount] = 0;
1021                         }
1022                         
1023                         str->read_count += copycount;
1024                         return copycount;
1025                 }       
1026                 case STREAM_TYPE_FILE:
1027                         if(str->binary) 
1028                         {
1029                                 if(str->unicode) /* Binary file with 4-byte characters */
1030                                 {
1031                                         /* Do it character-by-character */
1032                                         int foo;
1033                                         for(foo = 0; foo < len - 1; foo++)
1034                                         {
1035                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
1036                                                 if(ch == -1) 
1037                                                 {
1038                                                         buf[foo] = 0;
1039                                                         return foo - 1;
1040                                                 }
1041                                                 str->read_count++;
1042                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
1043                                                 {
1044                                                         buf[foo] = ch; /* Preserve newline types??? */
1045                                                         buf[foo + 1] = 0;
1046                                                         return foo;
1047                                                 }
1048                                                 buf[foo] = ch;
1049                                         }
1050                                         buf[len] = 0;
1051                                         return foo;
1052                                 }
1053                                 else /* Regular binary file */
1054                                 {
1055                                         gchar *readbuffer = g_new0(gchar, len);
1056                                         if( !fgets(readbuffer, len, str->file_pointer) ) {
1057                                                 *buf = 0;
1058                                                 return 0;
1059                                         }
1060
1061                                         glui32 count = strlen(readbuffer);
1062                                         int foo;
1063                                         for(foo = 0; foo < count + 1; foo++) /* Copy terminator */
1064                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1065                                         str->read_count += count;
1066                                         return count;
1067                                 }
1068                         }
1069                         else /* Text mode is the same for Unicode and regular files */
1070                         {
1071                                 /* Do it character-by-character */
1072                                 int foo;
1073                                 for(foo = 0; foo < len - 1; foo++)
1074                                 {
1075                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1076                                         if(ch == -1)
1077                                         {
1078                                                 buf[foo] = 0;
1079                                                 return foo - 1;
1080                                         }
1081                                         str->read_count++;
1082                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1083                                         {
1084                                                 buf[foo] = ch; /* Preserve newline types??? */
1085                                                 buf[foo + 1] = 0;
1086                                                 return foo;
1087                                         }
1088                                         buf[foo] = ch;
1089                                 }
1090                                 buf[len] = 0;
1091                                 return foo;
1092                         }
1093                 default:
1094                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
1095                         return 0;
1096         }
1097 }
1098
1099 /*
1100  *
1101  **************** SEEKING FUNCTIONS ********************************************
1102  *
1103  */
1104
1105 /**
1106  * glk_stream_get_position:
1107  * @str: A file or memory stream.
1108  *
1109  * Returns the position of the read/write mark in @str. For memory streams and
1110  * binary file streams, this is exactly the number of characters read or written
1111  * from the beginning of the stream (unless you have moved the mark with
1112  * glk_stream_set_position().) For text file streams, matters are more 
1113  * ambiguous, since (for example) writing one byte to a text file may store more
1114  * than one character in the platform's native encoding. You can only be sure
1115  * that the position increases as you read or write to the file.
1116  *
1117  * Additional complication: for Latin-1 memory and file streams, a character is
1118  * a byte. For Unicode memory and file streams (those created by
1119  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1120  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1121  * bytes.
1122  *
1123  * <note><para>
1124  *   If this bothers you, don't use binary Unicode files. I don't think they're
1125  *   good for much anyhow.
1126  * </para></note>
1127  *
1128  * Returns: position of the read/write mark in @str.
1129  */
1130 glui32
1131 glk_stream_get_position(strid_t str)
1132 {
1133         VALID_STREAM(str, return 0);
1134         
1135         switch(str->type)
1136         {
1137                 case STREAM_TYPE_MEMORY:
1138                         return str->mark;
1139                 case STREAM_TYPE_FILE:
1140                         return ftell(str->file_pointer);
1141                 default:
1142                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1143                         return 0;
1144         }
1145 }
1146
1147 /**
1148  * glk_stream_set_position:
1149  * @str: A file or memory stream.
1150  * @pos: The position to set the mark to, relative to @seekmode.
1151  * @seekmode: One of %seekmode_Start, %seekmode_Current, or %seekmode_End.
1152  *
1153  * Sets the position of the read/write mark in @str. The position is controlled
1154  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1155  * <code>seekmode_</code> constants below.
1156  *
1157  * It is illegal to specify a position before the beginning or after the end of
1158  * the file.
1159  *
1160  * In binary files, the mark position is exact &mdash; it corresponds with the
1161  * number of characters you have read or written. In text files, this mapping 
1162  * can vary, because of linefeed conventions or other character-set 
1163  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1164  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1165  * the platform's native encoding &mdash; after character cookery. Therefore,
1166  * in a text stream, it is safest to use glk_stream_set_position() only to move
1167  * to the beginning or end of a file, or to a position determined by
1168  * glk_stream_get_position().
1169  *
1170  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1171  * characters are 32-bit words, or four bytes each.
1172  */
1173 void
1174 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1175 {
1176         VALID_STREAM(str, return);
1177         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1178         g_return_if_fail(!(seekmode == seekmode_End && pos > 0));
1179         
1180         switch(str->type)
1181         {
1182                 case STREAM_TYPE_MEMORY:
1183                         switch(seekmode)
1184                         {
1185                                 case seekmode_Start:   str->mark = pos;  break;
1186                                 case seekmode_Current: str->mark += pos; break;
1187                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1188                                 default:
1189                                         g_return_if_reached();
1190                                         return;
1191                         }
1192                         break;
1193                 case STREAM_TYPE_FILE:
1194                 {
1195                         int whence;
1196                         switch(seekmode)
1197                         {
1198                                 case seekmode_Start:   whence = SEEK_SET; break;
1199                                 case seekmode_Current: whence = SEEK_CUR; break;
1200                                 case seekmode_End:     whence = SEEK_END; break;
1201                                 default:
1202                                         g_return_if_reached();
1203                                         return;
1204                         }
1205                         if(fseek(str->file_pointer, pos, whence) == -1)
1206                                 WARNING("Seek failed on file stream");
1207                         break;
1208                 }
1209                 default:
1210                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1211                         return;
1212         }
1213 }
1214