66fb989c40622473e55d33821318e4f25353abda
[rodin/chimara.git] / libchimara / strio.c
1 #include "charset.h"
2 #include "magic.h"
3 #include "stream.h"
4 #include <errno.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <glib.h>
8 #include <glib/gstdio.h>
9
10 /*
11  *
12  **************** WRITING FUNCTIONS ********************************************
13  *
14  */
15
16 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
17 static void
18 write_utf8_to_window_buffer(winid_t win, gchar *s)
19 {
20         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
21         {
22                 ILLEGAL("Tried to print to a text buffer window with line input pending.");
23                 return;
24         }
25
26         // Write to the buffer  
27         g_string_append(win->buffer, s);
28 }
29         
30 /* Internal function: flush a window's text buffer to the screen. */
31 void
32 flush_window_buffer(winid_t win)
33 {
34         if(win->type != wintype_TextBuffer && win->type != wintype_TextGrid)
35                 return;
36
37         if(win->buffer->len == 0)
38                 return;
39
40         gdk_threads_enter();
41
42         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
43
44         switch(win->type) {
45         case wintype_TextBuffer:
46         {
47                 GtkTextIter iter;
48                 gtk_text_buffer_get_end_iter(buffer, &iter);
49
50                 GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
51                 GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
52
53                 if(win->window_stream->hyperlink_mode) {
54                         GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
55                         GtkTextTag *link_tag = win->current_hyperlink->tag;
56                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, style_tag, link_style_tag, link_tag, NULL);
57                 } else {
58                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, style_tag, NULL);
59                 }
60
61                 ChimaraGlk *glk = CHIMARA_GLK(gtk_widget_get_ancestor(win->widget, CHIMARA_TYPE_GLK));
62                 g_assert(glk);
63                 g_signal_emit_by_name(glk, "text-buffer-output", win->rock, win->buffer->str);
64         }
65                 break;
66
67         case wintype_TextGrid:
68         {
69                 /* Number of characters to insert */
70                 glong length = win->buffer->len;
71                 glong chars_left = length;
72                 
73                 GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
74                 
75                 /* Get cursor position */
76                 GtkTextIter start;
77                 gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
78                 /* Spaces available on this line */
79                 gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
80                 
81                 while(chars_left > available_space && !gtk_text_iter_is_end(&start))
82                 {
83                         GtkTextIter end = start;
84                         gtk_text_iter_forward_to_line_end(&end);
85                         gtk_text_buffer_delete(buffer, &start, &end);
86
87                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
88                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
89
90                         if(win->window_stream->hyperlink_mode) {
91                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
92                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
93                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, style_tag, link_style_tag, link_tag, NULL);
94                         } else {
95                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, style_tag, NULL);
96                         }
97
98                         chars_left -= available_space;
99                         gtk_text_iter_forward_line(&start);
100                         available_space = win->width;
101                 }
102                 if(!gtk_text_iter_is_end(&start))
103                 {
104                         GtkTextIter end = start;
105                         gtk_text_iter_forward_chars(&end, chars_left);
106                         gtk_text_buffer_delete(buffer, &start, &end);
107
108                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
109                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
110
111                         if(win->window_stream->hyperlink_mode) {
112                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
113                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
114                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, style_tag, link_style_tag, link_tag, NULL);
115                         } else {
116                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, style_tag, NULL);
117                         }
118                 }
119                 
120                 gtk_text_buffer_move_mark(buffer, cursor, &start);
121         }
122                 break;
123         }
124
125         gdk_threads_leave();
126
127         g_string_truncate(win->buffer, 0);
128 }
129
130 /* Internal function: write a Latin-1 buffer with length to a stream. */
131 static void
132 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
133 {
134         switch(str->type)
135         {
136                 case STREAM_TYPE_WINDOW:
137                         /* Each window type has a different way of printing to it */
138                         switch(str->window->type)
139                         {
140                                 /* Printing to these windows' streams does nothing */
141                                 case wintype_Blank:
142                                 case wintype_Pair:
143                                 case wintype_Graphics:
144                                         str->write_count += len;
145                                         break;
146                                         
147                             /* Text grid/buffer windows */
148                             case wintype_TextGrid:
149                                 case wintype_TextBuffer:
150                             {
151                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
152                                 if(utf8 != NULL) {
153                                                 write_utf8_to_window_buffer(str->window, utf8);
154                                                 g_free(utf8);
155                                         }
156                                 }       
157                                         str->write_count += len;
158                                         break;
159                                 default:
160                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
161                         }
162                         
163                         /* Now write the same buffer to the window's echo stream */
164                         if(str->window->echo_stream != NULL)
165                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
166                         
167                         break;
168                         
169                 case STREAM_TYPE_MEMORY:
170                         if(str->unicode && str->ubuffer)
171                         {
172                                 int foo = 0;
173                                 while(str->mark < str->buflen && foo < len)
174                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
175                         }
176                         if(!str->unicode && str->buffer)
177                         {
178                                 int copycount = MIN(len, str->buflen - str->mark);
179                                 memmove(str->buffer + str->mark, buf, copycount);
180                                 str->mark += copycount;
181                         }
182
183                         str->write_count += len;
184                         break;
185                         
186                 case STREAM_TYPE_FILE:
187                         if(str->binary) 
188                         {
189                                 if(str->unicode) 
190                                 {
191                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
192                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
193                                         g_free(writebuffer);
194                                 } 
195                                 else /* Regular file */
196                                 {
197                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
198                                 }
199                         }
200                         else /* Text mode is the same for Unicode and regular files */
201                         {
202                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
203                                 if(utf8 != NULL)
204                                 {
205                                         g_fprintf(str->file_pointer, "%s", utf8);
206                                         g_free(utf8);
207                                 }
208                         }
209                         
210                         str->write_count += len;
211                         break;
212                 default:
213                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
214         }
215 }
216
217 /* Internal function: write a Unicode buffer with length to a stream. */
218 static void
219 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
220 {
221         switch(str->type)
222         {
223                 case STREAM_TYPE_WINDOW:
224                         /* Each window type has a different way of printing to it */
225                         switch(str->window->type)
226                         {
227                                 /* Printing to these windows' streams does nothing */
228                                 case wintype_Blank:
229                                 case wintype_Pair:
230                                 case wintype_Graphics:
231                                         str->write_count += len;
232                                         break;
233                                         
234                             /* Text grid/buffer windows */
235                             case wintype_TextGrid:
236                             case wintype_TextBuffer:
237                             {
238                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
239                                 if(utf8 != NULL) {
240                                                 write_utf8_to_window_buffer(str->window, utf8);
241                                                 g_free(utf8);
242                                         }
243                                 }       
244                                         str->write_count += len;
245                                         break;
246                                 default:
247                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
248                         }
249                         
250                         /* Now write the same buffer to the window's echo stream */
251                         if(str->window->echo_stream != NULL)
252                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
253                         
254                         break;
255                         
256                 case STREAM_TYPE_MEMORY:
257                         if(str->unicode && str->ubuffer)
258                         {
259                                 int copycount = MIN(len, str->buflen - str->mark);
260                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
261                                 str->mark += copycount;
262                         }
263                         if(!str->unicode && str->buffer)
264                         {
265                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
266                                 int copycount = MIN(len, str->buflen - str->mark);
267                                 memmove(str->buffer + str->mark, latin1, copycount);
268                                 g_free(latin1);
269                                 str->mark += copycount;
270                         }
271
272                         str->write_count += len;
273                         break;
274                         
275                 case STREAM_TYPE_FILE:
276                         if(str->binary) 
277                         {
278                                 if(str->unicode) 
279                                 {
280                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
281                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
282                                         g_free(writebuffer);
283                                 } 
284                                 else /* Regular file */
285                                 {
286                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
287                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
288                                         g_free(latin1);
289                                 }
290                         }
291                         else /* Text mode is the same for Unicode and regular files */
292                         {
293                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
294                                 if(utf8 != NULL) 
295                                 {
296                                         g_fprintf(str->file_pointer, "%s", utf8);
297                                         g_free(utf8);
298                                 }
299                         }
300                         
301                         str->write_count += len;
302                         break;
303                 default:
304                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
305         }
306 }
307
308 /**
309  * glk_put_char_stream:
310  * @str: An output stream.
311  * @ch: A character in Latin-1 encoding.
312  *
313  * The same as glk_put_char(), except that you specify a stream @str to print 
314  * to, instead of using the current stream. It is illegal for @str to be %NULL,
315  * or an input-only stream.
316  */
317 void
318 glk_put_char_stream(strid_t str, unsigned char ch)
319 {
320         VALID_STREAM(str, return);
321         g_return_if_fail(str->file_mode != filemode_Read);
322         
323         write_buffer_to_stream(str, (gchar *)&ch, 1);
324 }
325
326 /**
327  * glk_put_char_stream_uni:
328  * @str: An output stream.
329  * @ch: A Unicode code point.
330  *
331  * The same as glk_put_char_uni(), except that you specify a stream @str to
332  * print to, instead of using the current stream. It is illegal for @str to be 
333  * %NULL, or an input-only stream.
334  */
335 void
336 glk_put_char_stream_uni(strid_t str, glui32 ch)
337 {
338         VALID_STREAM(str, return);
339         g_return_if_fail(str->file_mode != filemode_Read);
340         
341         write_buffer_to_stream_uni(str, &ch, 1);
342 }
343
344 /**
345  * glk_put_string_stream:
346  * @str: An output stream.
347  * @s: A null-terminated string in Latin-1 encoding.
348  *
349  * The same as glk_put_string(), except that you specify a stream @str to print 
350  * to, instead of using the current stream. It is illegal for @str to be %NULL,
351  * or an input-only stream.
352  */
353 void
354 glk_put_string_stream(strid_t str, char *s)
355 {
356         VALID_STREAM(str, return);
357         if(*s == 0)
358                 return;
359
360         g_return_if_fail(str->file_mode != filemode_Read);
361
362         write_buffer_to_stream(str, s, strlen(s));
363 }
364
365 /**
366  * glk_put_string_stream_uni:
367  * @str: An output stream.
368  * @s: A null-terminated array of Unicode code points.
369  *
370  * The same as glk_put_string_uni(), except that you specify a stream @str to
371  * print to, instead of using the current stream. It is illegal for @str to be 
372  * %NULL, or an input-only stream.
373  */
374 void
375 glk_put_string_stream_uni(strid_t str, glui32 *s)
376 {
377         VALID_STREAM(str, return);
378         if(*s == 0)
379                 return;
380
381         g_return_if_fail(str->file_mode != filemode_Read);
382         
383         /* An impromptu strlen() for glui32 arrays */
384         glong len = 0;
385         glui32 *ptr = s;
386         while(*ptr++)
387                 len++;
388         write_buffer_to_stream_uni(str, s, len);
389 }
390
391 /**
392  * glk_put_buffer_stream:
393  * @str: An output stream.
394  * @buf: An array of characters in Latin-1 encoding.
395  * @len: Length of @buf.
396  *
397  * The same as glk_put_buffer(), except that you specify a stream @str to print 
398  * to, instead of using the current stream. It is illegal for @str to be %NULL,
399  * or an input-only stream.
400  */
401 void
402 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
403 {
404         VALID_STREAM(str, return);
405         if(len == 0)
406                 return;
407
408         g_return_if_fail(str->file_mode != filemode_Read);
409         
410         write_buffer_to_stream(str, buf, len);
411 }
412
413 /**
414  * glk_put_buffer_stream_uni:
415  * @str: An output stream.
416  * @buf: An array of Unicode code points.
417  * @len: Length of @buf.
418  *
419  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
420  * print to, instead of using the current stream. It is illegal for @str to be 
421  * %NULL, or an input-only stream.
422  */
423 void
424 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
425 {
426         VALID_STREAM(str, return);
427         if(len == 0)
428                 return;
429
430         g_return_if_fail(str->file_mode != filemode_Read);
431         
432         write_buffer_to_stream_uni(str, buf, len);
433 }
434
435 /*
436  *
437  **************** READING FUNCTIONS ********************************************
438  *
439  */
440
441 /* Internal function: Read one big-endian four-byte character from file fp and
442 return it as a Unicode code point, or -1 on EOF */
443 static glsi32
444 read_ucs4be_char_from_file(FILE *fp)
445 {
446         unsigned char readbuffer[4];
447         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
448                 return -1; /* EOF */
449         return
450                 readbuffer[0] << 24 | 
451                 readbuffer[1] << 16 | 
452                 readbuffer[2] << 8  | 
453                 readbuffer[3];
454 }
455
456 /* Internal function: Read one UTF-8 character, which may be more than one byte,
457 from file fp and return it as a Unicode code point, or -1 on EOF */
458 static glsi32
459 read_utf8_char_from_file(FILE *fp)
460 {
461         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
462         int foo;
463         gunichar charresult = (gunichar)-2;
464         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
465         {
466                 int ch = fgetc(fp);
467                 if(ch == EOF)
468                         return -1;
469                 readbuffer[foo] = (gchar)ch;
470                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
471                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
472                 point otherwise */
473         }
474         /* Silently return unknown characters as 0xFFFD, Replacement Character */
475         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
476                 return 0xFFFD;
477         return charresult;
478 }
479
480 /* Internal function: Tell whether this code point is a Unicode newline. The
481 file pointer and eight-bit flag are included in case the newline is a CR 
482 (U+000D). If the next character is LF (U+000A) then it also belongs to the
483 newline. */
484 static gboolean
485 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
486 {
487         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
488                 return TRUE;
489         if(ch == 0x0D) {
490                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
491                         read_ucs4be_char_from_file(fp);
492                 if(ch2 != 0x0A)
493                         if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1);
494                                 WARNING_S("Seek failed on stream", g_strerror(errno) );
495                 return TRUE;
496         }
497         return FALSE;
498 }
499
500 /* Internal function: Read one character from a stream. Returns a value which
501  can be returned unchanged by glk_get_char_stream_uni(), but 
502  glk_get_char_stream() must replace high values by the placeholder character. */
503 static glsi32
504 get_char_stream_common(strid_t str)
505 {
506         switch(str->type)
507         {
508                 case STREAM_TYPE_MEMORY:
509                         if(str->unicode)
510                         {
511                                 if(!str->ubuffer || str->mark >= str->buflen)
512                                         return -1;
513                                 glui32 ch = str->ubuffer[str->mark++];
514                                 str->read_count++;
515                                 return ch;
516                         }
517                         else
518                         {
519                                 if(!str->buffer || str->mark >= str->buflen)
520                                         return -1;
521                                 unsigned char ch = str->buffer[str->mark++];
522                                 str->read_count++;
523                                 return ch;
524                         }
525                         break;
526                         
527                 case STREAM_TYPE_FILE:
528                         if(str->binary) 
529                         {
530                                 if(str->unicode) 
531                                 {
532                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
533                                         if(ch == -1)
534                                                 return -1;
535                                         str->read_count++;
536                                         return ch;
537                                 }
538                                 else /* Regular file */
539                                 {
540                                         int ch = fgetc(str->file_pointer);
541                                         if(ch == EOF)
542                                                 return -1;
543                                         
544                                         str->read_count++;
545                                         return ch;
546                                 }
547                         }
548                         else /* Text mode is the same for Unicode and regular files */
549                         {
550                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
551                                 if(ch == -1)
552                                         return -1;
553                                         
554                                 str->read_count++;
555                                 return ch;
556                         }
557                 default:
558                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
559                         return -1;
560         }
561 }
562
563 /**
564  * glk_get_char_stream:
565  * @str: An input stream.
566  *
567  * Reads one character from the stream @str. (There is no notion of a
568  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
569  * an output-only stream.
570  *
571  * The result will be between 0 and 255. As with all basic text functions, Glk
572  * assumes the Latin-1 encoding. See <link 
573  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
574  * of the stream has been reached, the result will be -1. 
575  *
576  * <note><para>
577  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
578  *   returned as negative numbers.
579  * </para></note>
580  *
581  * If the stream contains Unicode data &mdash; for example, if it was created
582  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
583  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
584  *
585  * It is usually more efficient to read several characters at once with
586  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
587  * glk_get_char_stream() several times.
588  *
589  * Returns: A character value between 0 and 255, or -1 on end of stream.
590  */
591 glsi32
592 glk_get_char_stream(strid_t str)
593 {
594         VALID_STREAM(str, return -1);
595         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
596         
597         glsi32 ch = get_char_stream_common(str);
598         return (ch > 0xFF)? PLACEHOLDER : ch;
599 }
600
601 /**
602  * glk_get_char_stream_uni:
603  * @str: An input stream.
604  *
605  * Reads one character from the stream @str. The result will be between 0 and 
606  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
607  *
608  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
609  */
610 glsi32
611 glk_get_char_stream_uni(strid_t str)
612 {
613         VALID_STREAM(str, return -1);
614         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
615         
616         return get_char_stream_common(str);
617 }
618
619 /**
620  * glk_get_buffer_stream:
621  * @str: An input stream.
622  * @buf: A buffer with space for at least @len characters.
623  * @len: The number of characters to read.
624  *
625  * Reads @len characters from @str, unless the end of stream is reached first.
626  * No terminal null is placed in the buffer.
627  *
628  * Returns: The number of characters actually read.
629  */
630 glui32
631 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
632 {
633         VALID_STREAM(str, return 0);
634         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
635         g_return_val_if_fail(buf != NULL, 0);
636         
637         switch(str->type)
638         {
639                 case STREAM_TYPE_MEMORY:
640                 {
641                         int copycount = 0;
642                         if(str->unicode)
643                         {
644                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
645                                 {
646                                         glui32 ch = str->ubuffer[str->mark++];
647                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
648                                 }
649                         }
650                         else
651                         {
652                                 if(str->buffer) /* if not, copycount stays 0 */
653                                         copycount = MIN(len, str->buflen - str->mark);
654                                 memmove(buf, str->buffer + str->mark, copycount);
655                                 str->mark += copycount;
656                         }
657
658                         str->read_count += copycount;           
659                         return copycount;
660                 }       
661                 case STREAM_TYPE_FILE:
662                         if(str->binary) 
663                         {
664                                 if(str->unicode) /* Binary file with 4-byte characters */
665                                 {
666                                         /* Read len characters of 4 bytes each */
667                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
668                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
669                                         /* If there was an incomplete character */
670                                         if(count % 4 != 0) 
671                                         {
672                                                 count -= count % 4;
673                                                 WARNING("Incomplete character in binary Unicode file");
674                                         }
675                                         
676                                         int foo;
677                                         for(foo = 0; foo < count; foo += 4)
678                                         {
679                                                 glsi32 ch = readbuffer[foo] << 24
680                                                         | readbuffer[foo + 1] << 16
681                                                         | readbuffer[foo + 2] << 8
682                                                         | readbuffer[foo + 3];
683                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
684                                         }
685                                         g_free(readbuffer);
686                                         str->read_count += count / 4;
687                                         return count / 4;
688                                 }
689                                 else /* Regular binary file */
690                                 {
691                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
692                                         str->read_count += count;
693                                         return count;
694                                 }
695                         }
696                         else /* Text mode is the same for Unicode and regular files */
697                         {
698                                 /* Do it character-by-character */
699                                 int foo;
700                                 for(foo = 0; foo < len; foo++)
701                                 {
702                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
703                                         if(ch == -1)
704                                                 break;
705                                         str->read_count++;
706                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
707                                 }
708                                 return foo;
709                         }
710                 default:
711                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
712                         return 0;
713         }
714 }
715
716 /**
717  * glk_get_buffer_stream_uni:
718  * @str: An input stream.
719  * @buf: A buffer with space for at least @len Unicode code points.
720  * @len: The number of characters to read.
721  *
722  * Reads @len Unicode characters from @str, unless the end of stream is reached 
723  * first. No terminal null is placed in the buffer.
724  *
725  * Returns: The number of Unicode characters actually read.
726  */
727 glui32
728 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
729 {
730         VALID_STREAM(str, return 0);
731         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
732         g_return_val_if_fail(buf != NULL, 0);
733         
734         switch(str->type)
735         {
736                 case STREAM_TYPE_MEMORY:
737                 {
738                         int copycount = 0;
739                         if(str->unicode)
740                         {
741                                 if(str->ubuffer) /* if not, copycount stays 0 */
742                                         copycount = MIN(len, str->buflen - str->mark);
743                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
744                                 str->mark += copycount;
745                         }
746                         else
747                         {
748                                 while(copycount < len && str->buffer && str->mark < str->buflen)
749                                 {
750                                         unsigned char ch = str->buffer[str->mark++];
751                                         buf[copycount++] = ch;
752                                 }
753                         }
754
755                         str->read_count += copycount;           
756                         return copycount;
757                 }       
758                 case STREAM_TYPE_FILE:
759                         if(str->binary) 
760                         {
761                                 if(str->unicode) /* Binary file with 4-byte characters */
762                                 {
763                                         /* Read len characters of 4 bytes each */
764                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
765                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
766                                         /* If there was an incomplete character */
767                                         if(count % 4 != 0) 
768                                         {
769                                                 count -= count % 4;
770                                                 WARNING("Incomplete character in binary Unicode file");
771                                         }
772                                         
773                                         int foo;
774                                         for(foo = 0; foo < count; foo += 4)
775                                                 buf[foo / 4] = readbuffer[foo] << 24
776                                                         | readbuffer[foo + 1] << 16
777                                                         | readbuffer[foo + 2] << 8
778                                                         | readbuffer[foo + 3];
779                                         g_free(readbuffer);
780                                         str->read_count += count / 4;
781                                         return count / 4;
782                                 }
783                                 else /* Regular binary file */
784                                 {
785                                         unsigned char *readbuffer = g_new0(unsigned char, len);
786                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
787                                         int foo;
788                                         for(foo = 0; foo < count; foo++)
789                                                 buf[foo] = readbuffer[foo];
790                                         g_free(readbuffer);
791                                         str->read_count += count;
792                                         return count;
793                                 }
794                         }
795                         else /* Text mode is the same for Unicode and regular files */
796                         {
797                                 /* Do it character-by-character */
798                                 int foo;
799                                 for(foo = 0; foo < len; foo++)
800                                 {
801                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
802                                         if(ch == -1)
803                                                 break;
804                                         str->read_count++;
805                                         buf[foo] = ch;
806                                 }
807                                 return foo;
808                         }
809                 default:
810                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
811                         return 0;
812         }
813 }
814
815 /**
816  * glk_get_line_stream:
817  * @str: An input stream.
818  * @buf: A buffer with space for at least @len characters.
819  * @len: The number of characters to read, plus one.
820  *
821  * Reads characters from @str, until either 
822  * <inlineequation>
823  *   <alt>@len - 1</alt>
824  *   <mathphrase>@len - 1</mathphrase>
825  * </inlineequation>
826  * characters have been read or a newline has been read. It then puts a
827  * terminal null (<code>'\0'</code>) aracter on
828  * the end. It returns the number of characters actually read, including the
829  * newline (if there is one) but not including the terminal null.
830  *
831  * Returns: The number of characters actually read.
832  */
833 glui32
834 glk_get_line_stream(strid_t str, char *buf, glui32 len)
835 {
836         VALID_STREAM(str, return 0);
837         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
838         g_return_val_if_fail(buf != NULL, 0);
839
840         switch(str->type)
841         {
842                 case STREAM_TYPE_MEMORY:
843                 {
844                         int copycount = 0;
845                         if(str->unicode)
846                         {
847                                 /* Do it character-by-character */
848                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
849                                 {
850                                         glui32 ch = str->ubuffer[str->mark++];
851                                         /* Check for Unicode newline; slightly different than
852                                         in file streams */
853                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
854                                         {
855                                                 buf[copycount++] = '\n';
856                                                 break;
857                                         }
858                                         if(ch == 0x0D)
859                                         {
860                                                 if(str->ubuffer[str->mark] == 0x0A)
861                                                         str->mark++; /* skip past next newline */
862                                                 buf[copycount++] = '\n';
863                                                 break;
864                                         }
865                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
866                                 }
867                                 buf[copycount] = '\0';
868                         }
869                         else
870                         {
871                                 if(str->buffer) /* if not, copycount stays 0 */
872                                         copycount = MIN(len - 1, str->buflen - str->mark);
873                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
874                                 if(endptr) /* newline was found */
875                                         copycount = endptr - buf; /* Real copy count */
876                                 buf[copycount] = '\0';
877                                 str->mark += copycount;
878                         }
879                         
880                         str->read_count += copycount;
881                         return copycount;
882                 }       
883                 case STREAM_TYPE_FILE:
884                         if(str->binary) 
885                         {
886                                 if(str->unicode) /* Binary file with 4-byte characters */
887                                 {
888                                         /* Do it character-by-character */
889                                         int foo;
890                                         for(foo = 0; foo < len - 1; foo++)
891                                         {
892                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
893                                                 if(ch == -1) 
894                                                 {
895                                                         buf[foo] = '\0';
896                                                         return foo - 1;
897                                                 }
898                                                 str->read_count++;
899                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
900                                                 {
901                                                         buf[foo] = '\n';
902                                                         buf[foo + 1] = '\0';
903                                                         return foo;
904                                                 }
905                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
906                                         }
907                                         buf[len] = '\0';
908                                         return foo;
909                                 }
910                                 else /* Regular binary file */
911                                 {
912                                         if( !fgets(buf, len, str->file_pointer) ) {
913                                                 *buf = 0;
914                                                 return 0;
915                                         }
916
917                                         int nread = strlen(buf);
918                                         str->read_count += nread;
919                                         return nread;
920                                 }
921                         }
922                         else /* Text mode is the same for Unicode and regular files */
923                         {
924                                 /* Do it character-by-character */
925                                 int foo;
926                                 for(foo = 0; foo < len - 1; foo++)
927                                 {
928                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
929                                         if(ch == -1)
930                                         {
931                                                 buf[foo] = '\0';
932                                                 return foo - 1;
933                                         }
934                                         str->read_count++;
935                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
936                                         {
937                                                 buf[foo] = '\n';
938                                                 buf[foo + 1] = '\0';
939                                                 return foo;
940                                         }
941                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
942                                 }
943                                 buf[len] = '\0';
944                                 return foo;
945                         }
946                 default:
947                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
948                         return 0;
949         }
950 }
951
952 /**
953  * glk_get_line_stream_uni:
954  * @str: An input stream.
955  * @buf: A buffer with space for at least @len Unicode code points.
956  * @len: The number of characters to read, plus one.
957  *
958  * Reads Unicode characters from @str, until either 
959  * <inlineequation>
960  *   <alt>@len - 1</alt>
961  *   <mathphrase>@len - 1</mathphrase>
962  * </inlineequation> 
963  * Unicode characters have been read or a newline has been read. It then puts a
964  * terminal null (a zero value) on the end.
965  *
966  * Returns: The number of characters actually read, including the newline (if
967  * there is one) but not including the terminal null.
968  */
969 glui32
970 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
971 {
972         VALID_STREAM(str, return 0);
973         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
974         g_return_val_if_fail(buf != NULL, 0);
975
976         switch(str->type)
977         {
978                 case STREAM_TYPE_MEMORY:
979                 {
980                         int copycount = 0;
981                         if(str->unicode)
982                         {
983                                 /* Do it character-by-character */
984                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
985                                 {
986                                         glui32 ch = str->ubuffer[str->mark++];
987                                         /* Check for Unicode newline; slightly different than
988                                         in file streams */
989                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
990                                         {
991                                                 buf[copycount++] = '\n';
992                                                 break;
993                                         }
994                                         if(ch == 0x0D)
995                                         {
996                                                 if(str->ubuffer[str->mark] == 0x0A)
997                                                         str->mark++; /* skip past next newline */
998                                                 buf[copycount++] = '\n';
999                                                 break;
1000                                         }
1001                                         buf[copycount++] = ch;
1002                                 }
1003                                 buf[copycount] = '\0';
1004                         }
1005                         else
1006                         {
1007                                 /* No recourse to memccpy(), so do it character-by-character */
1008                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
1009                                 {
1010                                         gchar ch = str->buffer[str->mark++];
1011                                         /* Check for newline */
1012                                         if(ch == '\n') /* Also check for \r and \r\n? */
1013                                         {
1014                                                 buf[copycount++] = '\n';
1015                                                 break;
1016                                         }
1017                                         buf[copycount++] = (unsigned char)ch;
1018                                 }
1019                                 buf[copycount] = 0;
1020                         }
1021                         
1022                         str->read_count += copycount;
1023                         return copycount;
1024                 }       
1025                 case STREAM_TYPE_FILE:
1026                         if(str->binary) 
1027                         {
1028                                 if(str->unicode) /* Binary file with 4-byte characters */
1029                                 {
1030                                         /* Do it character-by-character */
1031                                         int foo;
1032                                         for(foo = 0; foo < len - 1; foo++)
1033                                         {
1034                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
1035                                                 if(ch == -1) 
1036                                                 {
1037                                                         buf[foo] = 0;
1038                                                         return foo - 1;
1039                                                 }
1040                                                 str->read_count++;
1041                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
1042                                                 {
1043                                                         buf[foo] = ch; /* Preserve newline types??? */
1044                                                         buf[foo + 1] = 0;
1045                                                         return foo;
1046                                                 }
1047                                                 buf[foo] = ch;
1048                                         }
1049                                         buf[len] = 0;
1050                                         return foo;
1051                                 }
1052                                 else /* Regular binary file */
1053                                 {
1054                                         gchar *readbuffer = g_new0(gchar, len);
1055                                         if( !fgets(readbuffer, len, str->file_pointer) ) {
1056                                                 *buf = 0;
1057                                                 return 0;
1058                                         }
1059
1060                                         glui32 count = strlen(readbuffer);
1061                                         int foo;
1062                                         for(foo = 0; foo < count + 1; foo++) /* Copy terminator */
1063                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1064                                         str->read_count += count;
1065                                         return count;
1066                                 }
1067                         }
1068                         else /* Text mode is the same for Unicode and regular files */
1069                         {
1070                                 /* Do it character-by-character */
1071                                 int foo;
1072                                 for(foo = 0; foo < len - 1; foo++)
1073                                 {
1074                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1075                                         if(ch == -1)
1076                                         {
1077                                                 buf[foo] = 0;
1078                                                 return foo - 1;
1079                                         }
1080                                         str->read_count++;
1081                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1082                                         {
1083                                                 buf[foo] = ch; /* Preserve newline types??? */
1084                                                 buf[foo + 1] = 0;
1085                                                 return foo;
1086                                         }
1087                                         buf[foo] = ch;
1088                                 }
1089                                 buf[len] = 0;
1090                                 return foo;
1091                         }
1092                 default:
1093                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
1094                         return 0;
1095         }
1096 }
1097
1098 /*
1099  *
1100  **************** SEEKING FUNCTIONS ********************************************
1101  *
1102  */
1103
1104 /**
1105  * glk_stream_get_position:
1106  * @str: A file or memory stream.
1107  *
1108  * Returns the position of the read/write mark in @str. For memory streams and
1109  * binary file streams, this is exactly the number of characters read or written
1110  * from the beginning of the stream (unless you have moved the mark with
1111  * glk_stream_set_position().) For text file streams, matters are more 
1112  * ambiguous, since (for example) writing one byte to a text file may store more
1113  * than one character in the platform's native encoding. You can only be sure
1114  * that the position increases as you read or write to the file.
1115  *
1116  * Additional complication: for Latin-1 memory and file streams, a character is
1117  * a byte. For Unicode memory and file streams (those created by
1118  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1119  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1120  * bytes.
1121  *
1122  * <note><para>
1123  *   If this bothers you, don't use binary Unicode files. I don't think they're
1124  *   good for much anyhow.
1125  * </para></note>
1126  *
1127  * Returns: position of the read/write mark in @str.
1128  */
1129 glui32
1130 glk_stream_get_position(strid_t str)
1131 {
1132         VALID_STREAM(str, return 0);
1133         
1134         switch(str->type)
1135         {
1136                 case STREAM_TYPE_MEMORY:
1137                         return str->mark;
1138                 case STREAM_TYPE_FILE:
1139                         return ftell(str->file_pointer);
1140                 default:
1141                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1142                         return 0;
1143         }
1144 }
1145
1146 /**
1147  * glk_stream_set_position:
1148  * @str: A file or memory stream.
1149  * @pos: The position to set the mark to, relative to @seekmode.
1150  * @seekmode: One of %seekmode_Start, %seekmode_Current, or %seekmode_End.
1151  *
1152  * Sets the position of the read/write mark in @str. The position is controlled
1153  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1154  * <code>seekmode_</code> constants below.
1155  *
1156  * It is illegal to specify a position before the beginning or after the end of
1157  * the file.
1158  *
1159  * In binary files, the mark position is exact &mdash; it corresponds with the
1160  * number of characters you have read or written. In text files, this mapping 
1161  * can vary, because of linefeed conventions or other character-set 
1162  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1163  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1164  * the platform's native encoding &mdash; after character cookery. Therefore,
1165  * in a text stream, it is safest to use glk_stream_set_position() only to move
1166  * to the beginning or end of a file, or to a position determined by
1167  * glk_stream_get_position().
1168  *
1169  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1170  * characters are 32-bit words, or four bytes each.
1171  */
1172 void
1173 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1174 {
1175         VALID_STREAM(str, return);
1176         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1177         g_return_if_fail(!(seekmode == seekmode_End && pos > 0));
1178         
1179         switch(str->type)
1180         {
1181                 case STREAM_TYPE_MEMORY:
1182                         switch(seekmode)
1183                         {
1184                                 case seekmode_Start:   str->mark = pos;  break;
1185                                 case seekmode_Current: str->mark += pos; break;
1186                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1187                                 default:
1188                                         g_return_if_reached();
1189                                         return;
1190                         }
1191                         break;
1192                 case STREAM_TYPE_FILE:
1193                 {
1194                         int whence;
1195                         switch(seekmode)
1196                         {
1197                                 case seekmode_Start:   whence = SEEK_SET; break;
1198                                 case seekmode_Current: whence = SEEK_CUR; break;
1199                                 case seekmode_End:     whence = SEEK_END; break;
1200                                 default:
1201                                         g_return_if_reached();
1202                                         return;
1203                         }
1204                         if(fseek(str->file_pointer, pos, whence) == -1)
1205                                 WARNING("Seek failed on file stream");
1206                         break;
1207                 }
1208                 default:
1209                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1210                         return;
1211         }
1212 }
1213