ea9d7b5d5128a33d5a68a3ab93c367a1d294ae39
[rodin/chimara.git] / libchimara / strio.c
1 #include "charset.h"
2 #include "magic.h"
3 #include "stream.h"
4 #include <errno.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <pager.h>
8 #include <glib.h>
9 #include <glib/gstdio.h>
10
11 /*
12  *
13  **************** WRITING FUNCTIONS ********************************************
14  *
15  */
16
17 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
18 static void
19 write_utf8_to_window_buffer(winid_t win, gchar *s)
20 {
21         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
22         {
23                 ILLEGAL("Tried to print to a text buffer window with line input pending.");
24                 return;
25         }
26
27         // Write to the buffer  
28         g_string_append(win->buffer, s);
29 }
30         
31 /* Internal function: flush a window's text buffer to the screen. */
32 void
33 flush_window_buffer(winid_t win)
34 {
35         if(win->type != wintype_TextBuffer && win->type != wintype_TextGrid)
36                 return;
37
38         if(win->buffer->len == 0)
39                 return;
40
41         gdk_threads_enter();
42
43         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
44
45         switch(win->type) {
46         case wintype_TextBuffer:
47         {
48                 GtkTextIter iter;
49                 gtk_text_buffer_get_end_iter(buffer, &iter);
50
51                 GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
52                 GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
53
54                 if(win->window_stream->hyperlink_mode) {
55                         GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
56                         GtkTextTag *link_tag = win->current_hyperlink->tag;
57                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, style_tag, link_style_tag, link_tag, NULL);
58                 } else {
59                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, style_tag, NULL);
60                 }
61
62                 ChimaraGlk *glk = CHIMARA_GLK(gtk_widget_get_ancestor(win->widget, CHIMARA_TYPE_GLK));
63                 g_assert(glk);
64                 g_signal_emit_by_name(glk, "text-buffer-output", win->rock, win->buffer->str);
65
66                 /* Schedule a check for the pager */
67                 g_idle_add(pager_check, win);
68
69         }
70                 break;
71
72         case wintype_TextGrid:
73         {
74                 /* Number of characters to insert */
75                 glong length = win->buffer->len;
76                 glong chars_left = length;
77                 
78                 GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
79                 
80                 /* Get cursor position */
81                 GtkTextIter start;
82                 gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
83                 /* Spaces available on this line */
84                 gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
85                 
86                 while(chars_left > available_space && !gtk_text_iter_is_end(&start))
87                 {
88                         GtkTextIter end = start;
89                         gtk_text_iter_forward_to_line_end(&end);
90                         gtk_text_buffer_delete(buffer, &start, &end);
91
92                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
93                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
94
95                         if(win->window_stream->hyperlink_mode) {
96                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
97                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
98                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, style_tag, link_style_tag, link_tag, NULL);
99                         } else {
100                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, style_tag, NULL);
101                         }
102
103                         chars_left -= available_space;
104                         gtk_text_iter_forward_line(&start);
105                         available_space = win->width;
106                 }
107                 if(!gtk_text_iter_is_end(&start))
108                 {
109                         GtkTextIter end = start;
110                         gtk_text_iter_forward_chars(&end, chars_left);
111                         gtk_text_buffer_delete(buffer, &start, &end);
112
113                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
114                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
115
116                         if(win->window_stream->hyperlink_mode) {
117                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
118                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
119                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, style_tag, link_style_tag, link_tag, NULL);
120                         } else {
121                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, style_tag, NULL);
122                         }
123                 }
124                 
125                 gtk_text_buffer_move_mark(buffer, cursor, &start);
126         }
127                 break;
128         }
129
130         gdk_threads_leave();
131
132         g_string_truncate(win->buffer, 0);
133 }
134
135 /* Internal function: write a Latin-1 buffer with length to a stream. */
136 static void
137 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
138 {
139         switch(str->type)
140         {
141                 case STREAM_TYPE_WINDOW:
142                         /* Each window type has a different way of printing to it */
143                         switch(str->window->type)
144                         {
145                                 /* Printing to these windows' streams does nothing */
146                                 case wintype_Blank:
147                                 case wintype_Pair:
148                                 case wintype_Graphics:
149                                         str->write_count += len;
150                                         break;
151                                         
152                             /* Text grid/buffer windows */
153                             case wintype_TextGrid:
154                                 case wintype_TextBuffer:
155                             {
156                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
157                                 if(utf8 != NULL) {
158                                                 write_utf8_to_window_buffer(str->window, utf8);
159                                                 g_free(utf8);
160                                         }
161                                 }       
162                                         str->write_count += len;
163                                         break;
164                                 default:
165                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
166                         }
167                         
168                         /* Now write the same buffer to the window's echo stream */
169                         if(str->window->echo_stream != NULL)
170                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
171                         
172                         break;
173                         
174                 case STREAM_TYPE_MEMORY:
175                         if(str->unicode && str->ubuffer)
176                         {
177                                 int foo = 0;
178                                 while(str->mark < str->buflen && foo < len)
179                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
180                         }
181                         if(!str->unicode && str->buffer)
182                         {
183                                 int copycount = MIN(len, str->buflen - str->mark);
184                                 memmove(str->buffer + str->mark, buf, copycount);
185                                 str->mark += copycount;
186                         }
187
188                         str->write_count += len;
189                         break;
190                         
191                 case STREAM_TYPE_FILE:
192                         if(str->binary) 
193                         {
194                                 if(str->unicode) 
195                                 {
196                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
197                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
198                                         g_free(writebuffer);
199                                 } 
200                                 else /* Regular file */
201                                 {
202                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
203                                 }
204                         }
205                         else /* Text mode is the same for Unicode and regular files */
206                         {
207                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
208                                 if(utf8 != NULL)
209                                 {
210                                         g_fprintf(str->file_pointer, "%s", utf8);
211                                         g_free(utf8);
212                                 }
213                         }
214                         
215                         str->write_count += len;
216                         break;
217                 default:
218                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
219         }
220 }
221
222 /* Internal function: write a Unicode buffer with length to a stream. */
223 static void
224 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
225 {
226         switch(str->type)
227         {
228                 case STREAM_TYPE_WINDOW:
229                         /* Each window type has a different way of printing to it */
230                         switch(str->window->type)
231                         {
232                                 /* Printing to these windows' streams does nothing */
233                                 case wintype_Blank:
234                                 case wintype_Pair:
235                                 case wintype_Graphics:
236                                         str->write_count += len;
237                                         break;
238                                         
239                             /* Text grid/buffer windows */
240                             case wintype_TextGrid:
241                             case wintype_TextBuffer:
242                             {
243                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
244                                 if(utf8 != NULL) {
245                                                 write_utf8_to_window_buffer(str->window, utf8);
246                                                 g_free(utf8);
247                                         }
248                                 }       
249                                         str->write_count += len;
250                                         break;
251                                 default:
252                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
253                         }
254                         
255                         /* Now write the same buffer to the window's echo stream */
256                         if(str->window->echo_stream != NULL)
257                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
258                         
259                         break;
260                         
261                 case STREAM_TYPE_MEMORY:
262                         if(str->unicode && str->ubuffer)
263                         {
264                                 int copycount = MIN(len, str->buflen - str->mark);
265                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
266                                 str->mark += copycount;
267                         }
268                         if(!str->unicode && str->buffer)
269                         {
270                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
271                                 int copycount = MIN(len, str->buflen - str->mark);
272                                 memmove(str->buffer + str->mark, latin1, copycount);
273                                 g_free(latin1);
274                                 str->mark += copycount;
275                         }
276
277                         str->write_count += len;
278                         break;
279                         
280                 case STREAM_TYPE_FILE:
281                         if(str->binary) 
282                         {
283                                 if(str->unicode) 
284                                 {
285                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
286                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
287                                         g_free(writebuffer);
288                                 } 
289                                 else /* Regular file */
290                                 {
291                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
292                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
293                                         g_free(latin1);
294                                 }
295                         }
296                         else /* Text mode is the same for Unicode and regular files */
297                         {
298                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
299                                 if(utf8 != NULL) 
300                                 {
301                                         g_fprintf(str->file_pointer, "%s", utf8);
302                                         g_free(utf8);
303                                 }
304                         }
305                         
306                         str->write_count += len;
307                         break;
308                 default:
309                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
310         }
311 }
312
313 /**
314  * glk_put_char_stream:
315  * @str: An output stream.
316  * @ch: A character in Latin-1 encoding.
317  *
318  * The same as glk_put_char(), except that you specify a stream @str to print 
319  * to, instead of using the current stream. It is illegal for @str to be %NULL,
320  * or an input-only stream.
321  */
322 void
323 glk_put_char_stream(strid_t str, unsigned char ch)
324 {
325         VALID_STREAM(str, return);
326         g_return_if_fail(str->file_mode != filemode_Read);
327         
328         write_buffer_to_stream(str, (gchar *)&ch, 1);
329 }
330
331 /**
332  * glk_put_char_stream_uni:
333  * @str: An output stream.
334  * @ch: A Unicode code point.
335  *
336  * The same as glk_put_char_uni(), except that you specify a stream @str to
337  * print to, instead of using the current stream. It is illegal for @str to be 
338  * %NULL, or an input-only stream.
339  */
340 void
341 glk_put_char_stream_uni(strid_t str, glui32 ch)
342 {
343         VALID_STREAM(str, return);
344         g_return_if_fail(str->file_mode != filemode_Read);
345         
346         write_buffer_to_stream_uni(str, &ch, 1);
347 }
348
349 /**
350  * glk_put_string_stream:
351  * @str: An output stream.
352  * @s: A null-terminated string in Latin-1 encoding.
353  *
354  * The same as glk_put_string(), except that you specify a stream @str to print 
355  * to, instead of using the current stream. It is illegal for @str to be %NULL,
356  * or an input-only stream.
357  */
358 void
359 glk_put_string_stream(strid_t str, char *s)
360 {
361         VALID_STREAM(str, return);
362         if(*s == 0)
363                 return;
364
365         g_return_if_fail(str->file_mode != filemode_Read);
366
367         write_buffer_to_stream(str, s, strlen(s));
368 }
369
370 /**
371  * glk_put_string_stream_uni:
372  * @str: An output stream.
373  * @s: A null-terminated array of Unicode code points.
374  *
375  * The same as glk_put_string_uni(), except that you specify a stream @str to
376  * print to, instead of using the current stream. It is illegal for @str to be 
377  * %NULL, or an input-only stream.
378  */
379 void
380 glk_put_string_stream_uni(strid_t str, glui32 *s)
381 {
382         VALID_STREAM(str, return);
383         if(*s == 0)
384                 return;
385
386         g_return_if_fail(str->file_mode != filemode_Read);
387         
388         /* An impromptu strlen() for glui32 arrays */
389         glong len = 0;
390         glui32 *ptr = s;
391         while(*ptr++)
392                 len++;
393         write_buffer_to_stream_uni(str, s, len);
394 }
395
396 /**
397  * glk_put_buffer_stream:
398  * @str: An output stream.
399  * @buf: An array of characters in Latin-1 encoding.
400  * @len: Length of @buf.
401  *
402  * The same as glk_put_buffer(), except that you specify a stream @str to print 
403  * to, instead of using the current stream. It is illegal for @str to be %NULL,
404  * or an input-only stream.
405  */
406 void
407 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
408 {
409         VALID_STREAM(str, return);
410         if(len == 0)
411                 return;
412
413         g_return_if_fail(str->file_mode != filemode_Read);
414         
415         write_buffer_to_stream(str, buf, len);
416 }
417
418 /**
419  * glk_put_buffer_stream_uni:
420  * @str: An output stream.
421  * @buf: An array of Unicode code points.
422  * @len: Length of @buf.
423  *
424  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
425  * print to, instead of using the current stream. It is illegal for @str to be 
426  * %NULL, or an input-only stream.
427  */
428 void
429 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
430 {
431         VALID_STREAM(str, return);
432         if(len == 0)
433                 return;
434
435         g_return_if_fail(str->file_mode != filemode_Read);
436         
437         write_buffer_to_stream_uni(str, buf, len);
438 }
439
440 /*
441  *
442  **************** READING FUNCTIONS ********************************************
443  *
444  */
445
446 /* Internal function: Read one big-endian four-byte character from file fp and
447 return it as a Unicode code point, or -1 on EOF */
448 static glsi32
449 read_ucs4be_char_from_file(FILE *fp)
450 {
451         unsigned char readbuffer[4];
452         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
453                 return -1; /* EOF */
454         return
455                 readbuffer[0] << 24 | 
456                 readbuffer[1] << 16 | 
457                 readbuffer[2] << 8  | 
458                 readbuffer[3];
459 }
460
461 /* Internal function: Read one UTF-8 character, which may be more than one byte,
462 from file fp and return it as a Unicode code point, or -1 on EOF */
463 static glsi32
464 read_utf8_char_from_file(FILE *fp)
465 {
466         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
467         int foo;
468         gunichar charresult = (gunichar)-2;
469         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
470         {
471                 int ch = fgetc(fp);
472                 if(ch == EOF)
473                         return -1;
474                 readbuffer[foo] = (gchar)ch;
475                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
476                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
477                 point otherwise */
478         }
479         /* Silently return unknown characters as 0xFFFD, Replacement Character */
480         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
481                 return 0xFFFD;
482         return charresult;
483 }
484
485 /* Internal function: Tell whether this code point is a Unicode newline. The
486 file pointer and eight-bit flag are included in case the newline is a CR 
487 (U+000D). If the next character is LF (U+000A) then it also belongs to the
488 newline. */
489 static gboolean
490 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
491 {
492         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
493                 return TRUE;
494         if(ch == 0x0D) {
495                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
496                         read_ucs4be_char_from_file(fp);
497                 if(ch2 != 0x0A)
498                         if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1);
499                                 WARNING_S("Seek failed on stream", g_strerror(errno) );
500                 return TRUE;
501         }
502         return FALSE;
503 }
504
505 /* Internal function: Read one character from a stream. Returns a value which
506  can be returned unchanged by glk_get_char_stream_uni(), but 
507  glk_get_char_stream() must replace high values by the placeholder character. */
508 static glsi32
509 get_char_stream_common(strid_t str)
510 {
511         switch(str->type)
512         {
513                 case STREAM_TYPE_MEMORY:
514                         if(str->unicode)
515                         {
516                                 if(!str->ubuffer || str->mark >= str->buflen)
517                                         return -1;
518                                 glui32 ch = str->ubuffer[str->mark++];
519                                 str->read_count++;
520                                 return ch;
521                         }
522                         else
523                         {
524                                 if(!str->buffer || str->mark >= str->buflen)
525                                         return -1;
526                                 unsigned char ch = str->buffer[str->mark++];
527                                 str->read_count++;
528                                 return ch;
529                         }
530                         break;
531                         
532                 case STREAM_TYPE_FILE:
533                         if(str->binary) 
534                         {
535                                 if(str->unicode) 
536                                 {
537                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
538                                         if(ch == -1)
539                                                 return -1;
540                                         str->read_count++;
541                                         return ch;
542                                 }
543                                 else /* Regular file */
544                                 {
545                                         int ch = fgetc(str->file_pointer);
546                                         if(ch == EOF)
547                                                 return -1;
548                                         
549                                         str->read_count++;
550                                         return ch;
551                                 }
552                         }
553                         else /* Text mode is the same for Unicode and regular files */
554                         {
555                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
556                                 if(ch == -1)
557                                         return -1;
558                                         
559                                 str->read_count++;
560                                 return ch;
561                         }
562                 default:
563                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
564                         return -1;
565         }
566 }
567
568 /**
569  * glk_get_char_stream:
570  * @str: An input stream.
571  *
572  * Reads one character from the stream @str. (There is no notion of a
573  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
574  * an output-only stream.
575  *
576  * The result will be between 0 and 255. As with all basic text functions, Glk
577  * assumes the Latin-1 encoding. See <link 
578  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
579  * of the stream has been reached, the result will be -1. 
580  *
581  * <note><para>
582  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
583  *   returned as negative numbers.
584  * </para></note>
585  *
586  * If the stream contains Unicode data &mdash; for example, if it was created
587  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
588  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
589  *
590  * It is usually more efficient to read several characters at once with
591  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
592  * glk_get_char_stream() several times.
593  *
594  * Returns: A character value between 0 and 255, or -1 on end of stream.
595  */
596 glsi32
597 glk_get_char_stream(strid_t str)
598 {
599         VALID_STREAM(str, return -1);
600         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
601         
602         glsi32 ch = get_char_stream_common(str);
603         return (ch > 0xFF)? PLACEHOLDER : ch;
604 }
605
606 /**
607  * glk_get_char_stream_uni:
608  * @str: An input stream.
609  *
610  * Reads one character from the stream @str. The result will be between 0 and 
611  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
612  *
613  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
614  */
615 glsi32
616 glk_get_char_stream_uni(strid_t str)
617 {
618         VALID_STREAM(str, return -1);
619         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
620         
621         return get_char_stream_common(str);
622 }
623
624 /**
625  * glk_get_buffer_stream:
626  * @str: An input stream.
627  * @buf: A buffer with space for at least @len characters.
628  * @len: The number of characters to read.
629  *
630  * Reads @len characters from @str, unless the end of stream is reached first.
631  * No terminal null is placed in the buffer.
632  *
633  * Returns: The number of characters actually read.
634  */
635 glui32
636 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
637 {
638         VALID_STREAM(str, return 0);
639         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
640         g_return_val_if_fail(buf != NULL, 0);
641         
642         switch(str->type)
643         {
644                 case STREAM_TYPE_MEMORY:
645                 {
646                         int copycount = 0;
647                         if(str->unicode)
648                         {
649                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
650                                 {
651                                         glui32 ch = str->ubuffer[str->mark++];
652                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
653                                 }
654                         }
655                         else
656                         {
657                                 if(str->buffer) /* if not, copycount stays 0 */
658                                         copycount = MIN(len, str->buflen - str->mark);
659                                 memmove(buf, str->buffer + str->mark, copycount);
660                                 str->mark += copycount;
661                         }
662
663                         str->read_count += copycount;           
664                         return copycount;
665                 }       
666                 case STREAM_TYPE_FILE:
667                         if(str->binary) 
668                         {
669                                 if(str->unicode) /* Binary file with 4-byte characters */
670                                 {
671                                         /* Read len characters of 4 bytes each */
672                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
673                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
674                                         /* If there was an incomplete character */
675                                         if(count % 4 != 0) 
676                                         {
677                                                 count -= count % 4;
678                                                 WARNING("Incomplete character in binary Unicode file");
679                                         }
680                                         
681                                         int foo;
682                                         for(foo = 0; foo < count; foo += 4)
683                                         {
684                                                 glsi32 ch = readbuffer[foo] << 24
685                                                         | readbuffer[foo + 1] << 16
686                                                         | readbuffer[foo + 2] << 8
687                                                         | readbuffer[foo + 3];
688                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
689                                         }
690                                         g_free(readbuffer);
691                                         str->read_count += count / 4;
692                                         return count / 4;
693                                 }
694                                 else /* Regular binary file */
695                                 {
696                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
697                                         str->read_count += count;
698                                         return count;
699                                 }
700                         }
701                         else /* Text mode is the same for Unicode and regular files */
702                         {
703                                 /* Do it character-by-character */
704                                 int foo;
705                                 for(foo = 0; foo < len; foo++)
706                                 {
707                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
708                                         if(ch == -1)
709                                                 break;
710                                         str->read_count++;
711                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
712                                 }
713                                 return foo;
714                         }
715                 default:
716                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
717                         return 0;
718         }
719 }
720
721 /**
722  * glk_get_buffer_stream_uni:
723  * @str: An input stream.
724  * @buf: A buffer with space for at least @len Unicode code points.
725  * @len: The number of characters to read.
726  *
727  * Reads @len Unicode characters from @str, unless the end of stream is reached 
728  * first. No terminal null is placed in the buffer.
729  *
730  * Returns: The number of Unicode characters actually read.
731  */
732 glui32
733 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
734 {
735         VALID_STREAM(str, return 0);
736         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
737         g_return_val_if_fail(buf != NULL, 0);
738         
739         switch(str->type)
740         {
741                 case STREAM_TYPE_MEMORY:
742                 {
743                         int copycount = 0;
744                         if(str->unicode)
745                         {
746                                 if(str->ubuffer) /* if not, copycount stays 0 */
747                                         copycount = MIN(len, str->buflen - str->mark);
748                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
749                                 str->mark += copycount;
750                         }
751                         else
752                         {
753                                 while(copycount < len && str->buffer && str->mark < str->buflen)
754                                 {
755                                         unsigned char ch = str->buffer[str->mark++];
756                                         buf[copycount++] = ch;
757                                 }
758                         }
759
760                         str->read_count += copycount;           
761                         return copycount;
762                 }       
763                 case STREAM_TYPE_FILE:
764                         if(str->binary) 
765                         {
766                                 if(str->unicode) /* Binary file with 4-byte characters */
767                                 {
768                                         /* Read len characters of 4 bytes each */
769                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
770                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
771                                         /* If there was an incomplete character */
772                                         if(count % 4 != 0) 
773                                         {
774                                                 count -= count % 4;
775                                                 WARNING("Incomplete character in binary Unicode file");
776                                         }
777                                         
778                                         int foo;
779                                         for(foo = 0; foo < count; foo += 4)
780                                                 buf[foo / 4] = readbuffer[foo] << 24
781                                                         | readbuffer[foo + 1] << 16
782                                                         | readbuffer[foo + 2] << 8
783                                                         | readbuffer[foo + 3];
784                                         g_free(readbuffer);
785                                         str->read_count += count / 4;
786                                         return count / 4;
787                                 }
788                                 else /* Regular binary file */
789                                 {
790                                         unsigned char *readbuffer = g_new0(unsigned char, len);
791                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
792                                         int foo;
793                                         for(foo = 0; foo < count; foo++)
794                                                 buf[foo] = readbuffer[foo];
795                                         g_free(readbuffer);
796                                         str->read_count += count;
797                                         return count;
798                                 }
799                         }
800                         else /* Text mode is the same for Unicode and regular files */
801                         {
802                                 /* Do it character-by-character */
803                                 int foo;
804                                 for(foo = 0; foo < len; foo++)
805                                 {
806                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
807                                         if(ch == -1)
808                                                 break;
809                                         str->read_count++;
810                                         buf[foo] = ch;
811                                 }
812                                 return foo;
813                         }
814                 default:
815                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
816                         return 0;
817         }
818 }
819
820 /**
821  * glk_get_line_stream:
822  * @str: An input stream.
823  * @buf: A buffer with space for at least @len characters.
824  * @len: The number of characters to read, plus one.
825  *
826  * Reads characters from @str, until either 
827  * <inlineequation>
828  *   <alt>@len - 1</alt>
829  *   <mathphrase>@len - 1</mathphrase>
830  * </inlineequation>
831  * characters have been read or a newline has been read. It then puts a
832  * terminal null (<code>'\0'</code>) aracter on
833  * the end. It returns the number of characters actually read, including the
834  * newline (if there is one) but not including the terminal null.
835  *
836  * Returns: The number of characters actually read.
837  */
838 glui32
839 glk_get_line_stream(strid_t str, char *buf, glui32 len)
840 {
841         VALID_STREAM(str, return 0);
842         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
843         g_return_val_if_fail(buf != NULL, 0);
844
845         switch(str->type)
846         {
847                 case STREAM_TYPE_MEMORY:
848                 {
849                         int copycount = 0;
850                         if(str->unicode)
851                         {
852                                 /* Do it character-by-character */
853                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
854                                 {
855                                         glui32 ch = str->ubuffer[str->mark++];
856                                         /* Check for Unicode newline; slightly different than
857                                         in file streams */
858                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
859                                         {
860                                                 buf[copycount++] = '\n';
861                                                 break;
862                                         }
863                                         if(ch == 0x0D)
864                                         {
865                                                 if(str->ubuffer[str->mark] == 0x0A)
866                                                         str->mark++; /* skip past next newline */
867                                                 buf[copycount++] = '\n';
868                                                 break;
869                                         }
870                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
871                                 }
872                                 buf[copycount] = '\0';
873                         }
874                         else
875                         {
876                                 if(str->buffer) /* if not, copycount stays 0 */
877                                         copycount = MIN(len - 1, str->buflen - str->mark);
878                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
879                                 if(endptr) /* newline was found */
880                                         copycount = endptr - buf; /* Real copy count */
881                                 buf[copycount] = '\0';
882                                 str->mark += copycount;
883                         }
884                         
885                         str->read_count += copycount;
886                         return copycount;
887                 }       
888                 case STREAM_TYPE_FILE:
889                         if(str->binary) 
890                         {
891                                 if(str->unicode) /* Binary file with 4-byte characters */
892                                 {
893                                         /* Do it character-by-character */
894                                         int foo;
895                                         for(foo = 0; foo < len - 1; foo++)
896                                         {
897                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
898                                                 if(ch == -1) 
899                                                 {
900                                                         buf[foo] = '\0';
901                                                         return foo - 1;
902                                                 }
903                                                 str->read_count++;
904                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
905                                                 {
906                                                         buf[foo] = '\n';
907                                                         buf[foo + 1] = '\0';
908                                                         return foo;
909                                                 }
910                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
911                                         }
912                                         buf[len] = '\0';
913                                         return foo;
914                                 }
915                                 else /* Regular binary file */
916                                 {
917                                         if( !fgets(buf, len, str->file_pointer) ) {
918                                                 *buf = 0;
919                                                 return 0;
920                                         }
921
922                                         int nread = strlen(buf);
923                                         str->read_count += nread;
924                                         return nread;
925                                 }
926                         }
927                         else /* Text mode is the same for Unicode and regular files */
928                         {
929                                 /* Do it character-by-character */
930                                 int foo;
931                                 for(foo = 0; foo < len - 1; foo++)
932                                 {
933                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
934                                         if(ch == -1)
935                                         {
936                                                 buf[foo] = '\0';
937                                                 return foo - 1;
938                                         }
939                                         str->read_count++;
940                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
941                                         {
942                                                 buf[foo] = '\n';
943                                                 buf[foo + 1] = '\0';
944                                                 return foo;
945                                         }
946                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
947                                 }
948                                 buf[len] = '\0';
949                                 return foo;
950                         }
951                 default:
952                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
953                         return 0;
954         }
955 }
956
957 /**
958  * glk_get_line_stream_uni:
959  * @str: An input stream.
960  * @buf: A buffer with space for at least @len Unicode code points.
961  * @len: The number of characters to read, plus one.
962  *
963  * Reads Unicode characters from @str, until either 
964  * <inlineequation>
965  *   <alt>@len - 1</alt>
966  *   <mathphrase>@len - 1</mathphrase>
967  * </inlineequation> 
968  * Unicode characters have been read or a newline has been read. It then puts a
969  * terminal null (a zero value) on the end.
970  *
971  * Returns: The number of characters actually read, including the newline (if
972  * there is one) but not including the terminal null.
973  */
974 glui32
975 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
976 {
977         VALID_STREAM(str, return 0);
978         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
979         g_return_val_if_fail(buf != NULL, 0);
980
981         switch(str->type)
982         {
983                 case STREAM_TYPE_MEMORY:
984                 {
985                         int copycount = 0;
986                         if(str->unicode)
987                         {
988                                 /* Do it character-by-character */
989                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
990                                 {
991                                         glui32 ch = str->ubuffer[str->mark++];
992                                         /* Check for Unicode newline; slightly different than
993                                         in file streams */
994                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
995                                         {
996                                                 buf[copycount++] = '\n';
997                                                 break;
998                                         }
999                                         if(ch == 0x0D)
1000                                         {
1001                                                 if(str->ubuffer[str->mark] == 0x0A)
1002                                                         str->mark++; /* skip past next newline */
1003                                                 buf[copycount++] = '\n';
1004                                                 break;
1005                                         }
1006                                         buf[copycount++] = ch;
1007                                 }
1008                                 buf[copycount] = '\0';
1009                         }
1010                         else
1011                         {
1012                                 /* No recourse to memccpy(), so do it character-by-character */
1013                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
1014                                 {
1015                                         gchar ch = str->buffer[str->mark++];
1016                                         /* Check for newline */
1017                                         if(ch == '\n') /* Also check for \r and \r\n? */
1018                                         {
1019                                                 buf[copycount++] = '\n';
1020                                                 break;
1021                                         }
1022                                         buf[copycount++] = (unsigned char)ch;
1023                                 }
1024                                 buf[copycount] = 0;
1025                         }
1026                         
1027                         str->read_count += copycount;
1028                         return copycount;
1029                 }       
1030                 case STREAM_TYPE_FILE:
1031                         if(str->binary) 
1032                         {
1033                                 if(str->unicode) /* Binary file with 4-byte characters */
1034                                 {
1035                                         /* Do it character-by-character */
1036                                         int foo;
1037                                         for(foo = 0; foo < len - 1; foo++)
1038                                         {
1039                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
1040                                                 if(ch == -1) 
1041                                                 {
1042                                                         buf[foo] = 0;
1043                                                         return foo - 1;
1044                                                 }
1045                                                 str->read_count++;
1046                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
1047                                                 {
1048                                                         buf[foo] = ch; /* Preserve newline types??? */
1049                                                         buf[foo + 1] = 0;
1050                                                         return foo;
1051                                                 }
1052                                                 buf[foo] = ch;
1053                                         }
1054                                         buf[len] = 0;
1055                                         return foo;
1056                                 }
1057                                 else /* Regular binary file */
1058                                 {
1059                                         gchar *readbuffer = g_new0(gchar, len);
1060                                         if( !fgets(readbuffer, len, str->file_pointer) ) {
1061                                                 *buf = 0;
1062                                                 return 0;
1063                                         }
1064
1065                                         glui32 count = strlen(readbuffer);
1066                                         int foo;
1067                                         for(foo = 0; foo < count + 1; foo++) /* Copy terminator */
1068                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1069                                         str->read_count += count;
1070                                         return count;
1071                                 }
1072                         }
1073                         else /* Text mode is the same for Unicode and regular files */
1074                         {
1075                                 /* Do it character-by-character */
1076                                 int foo;
1077                                 for(foo = 0; foo < len - 1; foo++)
1078                                 {
1079                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1080                                         if(ch == -1)
1081                                         {
1082                                                 buf[foo] = 0;
1083                                                 return foo - 1;
1084                                         }
1085                                         str->read_count++;
1086                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1087                                         {
1088                                                 buf[foo] = ch; /* Preserve newline types??? */
1089                                                 buf[foo + 1] = 0;
1090                                                 return foo;
1091                                         }
1092                                         buf[foo] = ch;
1093                                 }
1094                                 buf[len] = 0;
1095                                 return foo;
1096                         }
1097                 default:
1098                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
1099                         return 0;
1100         }
1101 }
1102
1103 /*
1104  *
1105  **************** SEEKING FUNCTIONS ********************************************
1106  *
1107  */
1108
1109 /**
1110  * glk_stream_get_position:
1111  * @str: A file or memory stream.
1112  *
1113  * Returns the position of the read/write mark in @str. For memory streams and
1114  * binary file streams, this is exactly the number of characters read or written
1115  * from the beginning of the stream (unless you have moved the mark with
1116  * glk_stream_set_position().) For text file streams, matters are more 
1117  * ambiguous, since (for example) writing one byte to a text file may store more
1118  * than one character in the platform's native encoding. You can only be sure
1119  * that the position increases as you read or write to the file.
1120  *
1121  * Additional complication: for Latin-1 memory and file streams, a character is
1122  * a byte. For Unicode memory and file streams (those created by
1123  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1124  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1125  * bytes.
1126  *
1127  * <note><para>
1128  *   If this bothers you, don't use binary Unicode files. I don't think they're
1129  *   good for much anyhow.
1130  * </para></note>
1131  *
1132  * Returns: position of the read/write mark in @str.
1133  */
1134 glui32
1135 glk_stream_get_position(strid_t str)
1136 {
1137         VALID_STREAM(str, return 0);
1138         
1139         switch(str->type)
1140         {
1141                 case STREAM_TYPE_MEMORY:
1142                         return str->mark;
1143                 case STREAM_TYPE_FILE:
1144                         return ftell(str->file_pointer);
1145                 default:
1146                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1147                         return 0;
1148         }
1149 }
1150
1151 /**
1152  * glk_stream_set_position:
1153  * @str: A file or memory stream.
1154  * @pos: The position to set the mark to, relative to @seekmode.
1155  * @seekmode: One of %seekmode_Start, %seekmode_Current, or %seekmode_End.
1156  *
1157  * Sets the position of the read/write mark in @str. The position is controlled
1158  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1159  * <code>seekmode_</code> constants below.
1160  *
1161  * It is illegal to specify a position before the beginning or after the end of
1162  * the file.
1163  *
1164  * In binary files, the mark position is exact &mdash; it corresponds with the
1165  * number of characters you have read or written. In text files, this mapping 
1166  * can vary, because of linefeed conventions or other character-set 
1167  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1168  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1169  * the platform's native encoding &mdash; after character cookery. Therefore,
1170  * in a text stream, it is safest to use glk_stream_set_position() only to move
1171  * to the beginning or end of a file, or to a position determined by
1172  * glk_stream_get_position().
1173  *
1174  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1175  * characters are 32-bit words, or four bytes each.
1176  */
1177 void
1178 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1179 {
1180         VALID_STREAM(str, return);
1181         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1182         g_return_if_fail(!(seekmode == seekmode_End && pos > 0));
1183         
1184         switch(str->type)
1185         {
1186                 case STREAM_TYPE_MEMORY:
1187                         switch(seekmode)
1188                         {
1189                                 case seekmode_Start:   str->mark = pos;  break;
1190                                 case seekmode_Current: str->mark += pos; break;
1191                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1192                                 default:
1193                                         g_return_if_reached();
1194                                         return;
1195                         }
1196                         break;
1197                 case STREAM_TYPE_FILE:
1198                 {
1199                         int whence;
1200                         switch(seekmode)
1201                         {
1202                                 case seekmode_Start:   whence = SEEK_SET; break;
1203                                 case seekmode_Current: whence = SEEK_CUR; break;
1204                                 case seekmode_End:     whence = SEEK_END; break;
1205                                 default:
1206                                         g_return_if_reached();
1207                                         return;
1208                         }
1209                         if(fseek(str->file_pointer, pos, whence) == -1)
1210                                 WARNING("Seek failed on file stream");
1211                         break;
1212                 }
1213                 default:
1214                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1215                         return;
1216         }
1217 }
1218