Backend support for using dynamic styles. See also Ticket #49.
[rodin/chimara.git] / libchimara / strio.c
1 #include "charset.h"
2 #include "magic.h"
3 #include "stream.h"
4 #include <errno.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <glib.h>
8 #include <glib/gstdio.h>
9
10 /*
11  *
12  **************** WRITING FUNCTIONS ********************************************
13  *
14  */
15
16 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
17 static void
18 write_utf8_to_window_buffer(winid_t win, gchar *s)
19 {
20         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
21         {
22                 ILLEGAL("Tried to print to a text buffer window with line input pending.");
23                 return;
24         }
25
26         // Write to the buffer  
27         g_string_append(win->buffer, s);
28 }
29         
30 /* Internal function: flush a window's text buffer to the screen. */
31 void
32 flush_window_buffer(winid_t win)
33 {
34         if(win->type != wintype_TextBuffer && win->type != wintype_TextGrid)
35                 return;
36
37         if(win->buffer->len == 0)
38                 return;
39
40         gdk_threads_enter();
41
42         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
43
44         switch(win->type) {
45         case wintype_TextBuffer:
46         {
47                 GtkTextIter iter;
48                 gtk_text_buffer_get_end_iter(buffer, &iter);
49
50                 GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
51                 GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
52                 GtkTextTag *glk_style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->glk_style);
53
54                 if(win->window_stream->hyperlink_mode) {
55                         GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
56                         GtkTextTag *link_tag = win->current_hyperlink->tag;
57                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, style_tag, glk_style_tag, link_style_tag, link_tag, NULL);
58                 } else {
59                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, style_tag, glk_style_tag, NULL);
60                 }
61
62                 ChimaraGlk *glk = CHIMARA_GLK(gtk_widget_get_ancestor(win->widget, CHIMARA_TYPE_GLK));
63                 g_assert(glk);
64                 g_signal_emit_by_name(glk, "text-buffer-output", win->rock, win->buffer->str);
65         }
66                 break;
67
68         case wintype_TextGrid:
69         {
70                 /* Number of characters to insert */
71                 glong length = win->buffer->len;
72                 glong chars_left = length;
73                 
74                 GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
75                 
76                 /* Get cursor position */
77                 GtkTextIter start;
78                 gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
79                 /* Spaces available on this line */
80                 gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
81                 
82                 while(chars_left > available_space && !gtk_text_iter_is_end(&start))
83                 {
84                         GtkTextIter end = start;
85                         gtk_text_iter_forward_to_line_end(&end);
86                         gtk_text_buffer_delete(buffer, &start, &end);
87
88                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
89                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
90                         GtkTextTag *glk_style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->glk_style);
91
92                         if(win->window_stream->hyperlink_mode) {
93                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
94                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
95                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, style_tag, glk_style_tag, link_style_tag, link_tag, NULL);
96                         } else {
97                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, style_tag, glk_style_tag, NULL);
98                         }
99
100                         chars_left -= available_space;
101                         gtk_text_iter_forward_line(&start);
102                         available_space = win->width;
103                 }
104                 if(!gtk_text_iter_is_end(&start))
105                 {
106                         GtkTextIter end = start;
107                         gtk_text_iter_forward_chars(&end, chars_left);
108                         gtk_text_buffer_delete(buffer, &start, &end);
109
110                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
111                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
112                         GtkTextTag *glk_style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->glk_style);
113
114                         if(win->window_stream->hyperlink_mode) {
115                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
116                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
117                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, style_tag, glk_style_tag, link_style_tag, link_tag, NULL);
118                         } else {
119                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, style_tag, glk_style_tag, NULL);
120                         }
121                 }
122                 
123                 gtk_text_buffer_move_mark(buffer, cursor, &start);
124         }
125                 break;
126         }
127
128         gdk_threads_leave();
129
130         g_string_truncate(win->buffer, 0);
131 }
132
133 /* Internal function: write a Latin-1 buffer with length to a stream. */
134 static void
135 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
136 {
137         switch(str->type)
138         {
139                 case STREAM_TYPE_WINDOW:
140                         /* Each window type has a different way of printing to it */
141                         switch(str->window->type)
142                         {
143                                 /* Printing to these windows' streams does nothing */
144                                 case wintype_Blank:
145                                 case wintype_Pair:
146                                 case wintype_Graphics:
147                                         str->write_count += len;
148                                         break;
149                                         
150                             /* Text grid/buffer windows */
151                             case wintype_TextGrid:
152                                 case wintype_TextBuffer:
153                             {
154                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
155                                 if(utf8 != NULL) {
156                                                 write_utf8_to_window_buffer(str->window, utf8);
157                                                 g_free(utf8);
158                                         }
159                                 }       
160                                         str->write_count += len;
161                                         break;
162                                 default:
163                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
164                         }
165                         
166                         /* Now write the same buffer to the window's echo stream */
167                         if(str->window->echo_stream != NULL)
168                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
169                         
170                         break;
171                         
172                 case STREAM_TYPE_MEMORY:
173                         if(str->unicode && str->ubuffer)
174                         {
175                                 int foo = 0;
176                                 while(str->mark < str->buflen && foo < len)
177                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
178                         }
179                         if(!str->unicode && str->buffer)
180                         {
181                                 int copycount = MIN(len, str->buflen - str->mark);
182                                 memmove(str->buffer + str->mark, buf, copycount);
183                                 str->mark += copycount;
184                         }
185
186                         str->write_count += len;
187                         break;
188                         
189                 case STREAM_TYPE_FILE:
190                         if(str->binary) 
191                         {
192                                 if(str->unicode) 
193                                 {
194                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
195                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
196                                         g_free(writebuffer);
197                                 } 
198                                 else /* Regular file */
199                                 {
200                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
201                                 }
202                         }
203                         else /* Text mode is the same for Unicode and regular files */
204                         {
205                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
206                                 if(utf8 != NULL)
207                                 {
208                                         g_fprintf(str->file_pointer, "%s", utf8);
209                                         g_free(utf8);
210                                 }
211                         }
212                         
213                         str->write_count += len;
214                         break;
215                 default:
216                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
217         }
218 }
219
220 /* Internal function: write a Unicode buffer with length to a stream. */
221 static void
222 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
223 {
224         switch(str->type)
225         {
226                 case STREAM_TYPE_WINDOW:
227                         /* Each window type has a different way of printing to it */
228                         switch(str->window->type)
229                         {
230                                 /* Printing to these windows' streams does nothing */
231                                 case wintype_Blank:
232                                 case wintype_Pair:
233                                 case wintype_Graphics:
234                                         str->write_count += len;
235                                         break;
236                                         
237                             /* Text grid/buffer windows */
238                             case wintype_TextGrid:
239                             case wintype_TextBuffer:
240                             {
241                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
242                                 if(utf8 != NULL) {
243                                                 write_utf8_to_window_buffer(str->window, utf8);
244                                                 g_free(utf8);
245                                         }
246                                 }       
247                                         str->write_count += len;
248                                         break;
249                                 default:
250                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
251                         }
252                         
253                         /* Now write the same buffer to the window's echo stream */
254                         if(str->window->echo_stream != NULL)
255                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
256                         
257                         break;
258                         
259                 case STREAM_TYPE_MEMORY:
260                         if(str->unicode && str->ubuffer)
261                         {
262                                 int copycount = MIN(len, str->buflen - str->mark);
263                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
264                                 str->mark += copycount;
265                         }
266                         if(!str->unicode && str->buffer)
267                         {
268                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
269                                 int copycount = MIN(len, str->buflen - str->mark);
270                                 memmove(str->buffer + str->mark, latin1, copycount);
271                                 g_free(latin1);
272                                 str->mark += copycount;
273                         }
274
275                         str->write_count += len;
276                         break;
277                         
278                 case STREAM_TYPE_FILE:
279                         if(str->binary) 
280                         {
281                                 if(str->unicode) 
282                                 {
283                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
284                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
285                                         g_free(writebuffer);
286                                 } 
287                                 else /* Regular file */
288                                 {
289                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
290                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
291                                         g_free(latin1);
292                                 }
293                         }
294                         else /* Text mode is the same for Unicode and regular files */
295                         {
296                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
297                                 if(utf8 != NULL) 
298                                 {
299                                         g_fprintf(str->file_pointer, "%s", utf8);
300                                         g_free(utf8);
301                                 }
302                         }
303                         
304                         str->write_count += len;
305                         break;
306                 default:
307                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
308         }
309 }
310
311 /**
312  * glk_put_char_stream:
313  * @str: An output stream.
314  * @ch: A character in Latin-1 encoding.
315  *
316  * The same as glk_put_char(), except that you specify a stream @str to print 
317  * to, instead of using the current stream. It is illegal for @str to be %NULL,
318  * or an input-only stream.
319  */
320 void
321 glk_put_char_stream(strid_t str, unsigned char ch)
322 {
323         VALID_STREAM(str, return);
324         g_return_if_fail(str->file_mode != filemode_Read);
325         
326         write_buffer_to_stream(str, (gchar *)&ch, 1);
327 }
328
329 /**
330  * glk_put_char_stream_uni:
331  * @str: An output stream.
332  * @ch: A Unicode code point.
333  *
334  * The same as glk_put_char_uni(), except that you specify a stream @str to
335  * print to, instead of using the current stream. It is illegal for @str to be 
336  * %NULL, or an input-only stream.
337  */
338 void
339 glk_put_char_stream_uni(strid_t str, glui32 ch)
340 {
341         VALID_STREAM(str, return);
342         g_return_if_fail(str->file_mode != filemode_Read);
343         
344         write_buffer_to_stream_uni(str, &ch, 1);
345 }
346
347 /**
348  * glk_put_string_stream:
349  * @str: An output stream.
350  * @s: A null-terminated string in Latin-1 encoding.
351  *
352  * The same as glk_put_string(), except that you specify a stream @str to print 
353  * to, instead of using the current stream. It is illegal for @str to be %NULL,
354  * or an input-only stream.
355  */
356 void
357 glk_put_string_stream(strid_t str, char *s)
358 {
359         VALID_STREAM(str, return);
360         if(*s == 0)
361                 return;
362
363         g_return_if_fail(str->file_mode != filemode_Read);
364
365         write_buffer_to_stream(str, s, strlen(s));
366 }
367
368 /**
369  * glk_put_string_stream_uni:
370  * @str: An output stream.
371  * @s: A null-terminated array of Unicode code points.
372  *
373  * The same as glk_put_string_uni(), except that you specify a stream @str to
374  * print to, instead of using the current stream. It is illegal for @str to be 
375  * %NULL, or an input-only stream.
376  */
377 void
378 glk_put_string_stream_uni(strid_t str, glui32 *s)
379 {
380         VALID_STREAM(str, return);
381         if(*s == 0)
382                 return;
383
384         g_return_if_fail(str->file_mode != filemode_Read);
385         
386         /* An impromptu strlen() for glui32 arrays */
387         glong len = 0;
388         glui32 *ptr = s;
389         while(*ptr++)
390                 len++;
391         write_buffer_to_stream_uni(str, s, len);
392 }
393
394 /**
395  * glk_put_buffer_stream:
396  * @str: An output stream.
397  * @buf: An array of characters in Latin-1 encoding.
398  * @len: Length of @buf.
399  *
400  * The same as glk_put_buffer(), except that you specify a stream @str to print 
401  * to, instead of using the current stream. It is illegal for @str to be %NULL,
402  * or an input-only stream.
403  */
404 void
405 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
406 {
407         VALID_STREAM(str, return);
408         if(len == 0)
409                 return;
410
411         g_return_if_fail(str->file_mode != filemode_Read);
412         
413         write_buffer_to_stream(str, buf, len);
414 }
415
416 /**
417  * glk_put_buffer_stream_uni:
418  * @str: An output stream.
419  * @buf: An array of Unicode code points.
420  * @len: Length of @buf.
421  *
422  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
423  * print to, instead of using the current stream. It is illegal for @str to be 
424  * %NULL, or an input-only stream.
425  */
426 void
427 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
428 {
429         VALID_STREAM(str, return);
430         if(len == 0)
431                 return;
432
433         g_return_if_fail(str->file_mode != filemode_Read);
434         
435         write_buffer_to_stream_uni(str, buf, len);
436 }
437
438 /*
439  *
440  **************** READING FUNCTIONS ********************************************
441  *
442  */
443
444 /* Internal function: Read one big-endian four-byte character from file fp and
445 return it as a Unicode code point, or -1 on EOF */
446 static glsi32
447 read_ucs4be_char_from_file(FILE *fp)
448 {
449         unsigned char readbuffer[4];
450         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
451                 return -1; /* EOF */
452         return
453                 readbuffer[0] << 24 | 
454                 readbuffer[1] << 16 | 
455                 readbuffer[2] << 8  | 
456                 readbuffer[3];
457 }
458
459 /* Internal function: Read one UTF-8 character, which may be more than one byte,
460 from file fp and return it as a Unicode code point, or -1 on EOF */
461 static glsi32
462 read_utf8_char_from_file(FILE *fp)
463 {
464         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
465         int foo;
466         gunichar charresult = (gunichar)-2;
467         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
468         {
469                 int ch = fgetc(fp);
470                 if(ch == EOF)
471                         return -1;
472                 readbuffer[foo] = (gchar)ch;
473                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
474                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
475                 point otherwise */
476         }
477         /* Silently return unknown characters as 0xFFFD, Replacement Character */
478         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
479                 return 0xFFFD;
480         return charresult;
481 }
482
483 /* Internal function: Tell whether this code point is a Unicode newline. The
484 file pointer and eight-bit flag are included in case the newline is a CR 
485 (U+000D). If the next character is LF (U+000A) then it also belongs to the
486 newline. */
487 static gboolean
488 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
489 {
490         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
491                 return TRUE;
492         if(ch == 0x0D) {
493                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
494                         read_ucs4be_char_from_file(fp);
495                 if(ch2 != 0x0A)
496                         if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1);
497                                 WARNING_S("Seek failed on stream", g_strerror(errno) );
498                 return TRUE;
499         }
500         return FALSE;
501 }
502
503 /* Internal function: Read one character from a stream. Returns a value which
504  can be returned unchanged by glk_get_char_stream_uni(), but 
505  glk_get_char_stream() must replace high values by the placeholder character. */
506 static glsi32
507 get_char_stream_common(strid_t str)
508 {
509         switch(str->type)
510         {
511                 case STREAM_TYPE_MEMORY:
512                         if(str->unicode)
513                         {
514                                 if(!str->ubuffer || str->mark >= str->buflen)
515                                         return -1;
516                                 glui32 ch = str->ubuffer[str->mark++];
517                                 str->read_count++;
518                                 return ch;
519                         }
520                         else
521                         {
522                                 if(!str->buffer || str->mark >= str->buflen)
523                                         return -1;
524                                 unsigned char ch = str->buffer[str->mark++];
525                                 str->read_count++;
526                                 return ch;
527                         }
528                         break;
529                         
530                 case STREAM_TYPE_FILE:
531                         if(str->binary) 
532                         {
533                                 if(str->unicode) 
534                                 {
535                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
536                                         if(ch == -1)
537                                                 return -1;
538                                         str->read_count++;
539                                         return ch;
540                                 }
541                                 else /* Regular file */
542                                 {
543                                         int ch = fgetc(str->file_pointer);
544                                         if(ch == EOF)
545                                                 return -1;
546                                         
547                                         str->read_count++;
548                                         return ch;
549                                 }
550                         }
551                         else /* Text mode is the same for Unicode and regular files */
552                         {
553                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
554                                 if(ch == -1)
555                                         return -1;
556                                         
557                                 str->read_count++;
558                                 return ch;
559                         }
560                 default:
561                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
562                         return -1;
563         }
564 }
565
566 /**
567  * glk_get_char_stream:
568  * @str: An input stream.
569  *
570  * Reads one character from the stream @str. (There is no notion of a
571  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
572  * an output-only stream.
573  *
574  * The result will be between 0 and 255. As with all basic text functions, Glk
575  * assumes the Latin-1 encoding. See <link 
576  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
577  * of the stream has been reached, the result will be -1. 
578  *
579  * <note><para>
580  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
581  *   returned as negative numbers.
582  * </para></note>
583  *
584  * If the stream contains Unicode data &mdash; for example, if it was created
585  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
586  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
587  *
588  * It is usually more efficient to read several characters at once with
589  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
590  * glk_get_char_stream() several times.
591  *
592  * Returns: A character value between 0 and 255, or -1 on end of stream.
593  */
594 glsi32
595 glk_get_char_stream(strid_t str)
596 {
597         VALID_STREAM(str, return -1);
598         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
599         
600         glsi32 ch = get_char_stream_common(str);
601         return (ch > 0xFF)? PLACEHOLDER : ch;
602 }
603
604 /**
605  * glk_get_char_stream_uni:
606  * @str: An input stream.
607  *
608  * Reads one character from the stream @str. The result will be between 0 and 
609  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
610  *
611  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
612  */
613 glsi32
614 glk_get_char_stream_uni(strid_t str)
615 {
616         VALID_STREAM(str, return -1);
617         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
618         
619         return get_char_stream_common(str);
620 }
621
622 /**
623  * glk_get_buffer_stream:
624  * @str: An input stream.
625  * @buf: A buffer with space for at least @len characters.
626  * @len: The number of characters to read.
627  *
628  * Reads @len characters from @str, unless the end of stream is reached first.
629  * No terminal null is placed in the buffer.
630  *
631  * Returns: The number of characters actually read.
632  */
633 glui32
634 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
635 {
636         VALID_STREAM(str, return 0);
637         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
638         g_return_val_if_fail(buf != NULL, 0);
639         
640         switch(str->type)
641         {
642                 case STREAM_TYPE_MEMORY:
643                 {
644                         int copycount = 0;
645                         if(str->unicode)
646                         {
647                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
648                                 {
649                                         glui32 ch = str->ubuffer[str->mark++];
650                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
651                                 }
652                         }
653                         else
654                         {
655                                 if(str->buffer) /* if not, copycount stays 0 */
656                                         copycount = MIN(len, str->buflen - str->mark);
657                                 memmove(buf, str->buffer + str->mark, copycount);
658                                 str->mark += copycount;
659                         }
660
661                         str->read_count += copycount;           
662                         return copycount;
663                 }       
664                 case STREAM_TYPE_FILE:
665                         if(str->binary) 
666                         {
667                                 if(str->unicode) /* Binary file with 4-byte characters */
668                                 {
669                                         /* Read len characters of 4 bytes each */
670                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
671                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
672                                         /* If there was an incomplete character */
673                                         if(count % 4 != 0) 
674                                         {
675                                                 count -= count % 4;
676                                                 WARNING("Incomplete character in binary Unicode file");
677                                         }
678                                         
679                                         int foo;
680                                         for(foo = 0; foo < count; foo += 4)
681                                         {
682                                                 glsi32 ch = readbuffer[foo] << 24
683                                                         | readbuffer[foo + 1] << 16
684                                                         | readbuffer[foo + 2] << 8
685                                                         | readbuffer[foo + 3];
686                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
687                                         }
688                                         g_free(readbuffer);
689                                         str->read_count += count / 4;
690                                         return count / 4;
691                                 }
692                                 else /* Regular binary file */
693                                 {
694                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
695                                         str->read_count += count;
696                                         return count;
697                                 }
698                         }
699                         else /* Text mode is the same for Unicode and regular files */
700                         {
701                                 /* Do it character-by-character */
702                                 int foo;
703                                 for(foo = 0; foo < len; foo++)
704                                 {
705                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
706                                         if(ch == -1)
707                                                 break;
708                                         str->read_count++;
709                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
710                                 }
711                                 return foo;
712                         }
713                 default:
714                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
715                         return 0;
716         }
717 }
718
719 /**
720  * glk_get_buffer_stream_uni:
721  * @str: An input stream.
722  * @buf: A buffer with space for at least @len Unicode code points.
723  * @len: The number of characters to read.
724  *
725  * Reads @len Unicode characters from @str, unless the end of stream is reached 
726  * first. No terminal null is placed in the buffer.
727  *
728  * Returns: The number of Unicode characters actually read.
729  */
730 glui32
731 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
732 {
733         VALID_STREAM(str, return 0);
734         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
735         g_return_val_if_fail(buf != NULL, 0);
736         
737         switch(str->type)
738         {
739                 case STREAM_TYPE_MEMORY:
740                 {
741                         int copycount = 0;
742                         if(str->unicode)
743                         {
744                                 if(str->ubuffer) /* if not, copycount stays 0 */
745                                         copycount = MIN(len, str->buflen - str->mark);
746                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
747                                 str->mark += copycount;
748                         }
749                         else
750                         {
751                                 while(copycount < len && str->buffer && str->mark < str->buflen)
752                                 {
753                                         unsigned char ch = str->buffer[str->mark++];
754                                         buf[copycount++] = ch;
755                                 }
756                         }
757
758                         str->read_count += copycount;           
759                         return copycount;
760                 }       
761                 case STREAM_TYPE_FILE:
762                         if(str->binary) 
763                         {
764                                 if(str->unicode) /* Binary file with 4-byte characters */
765                                 {
766                                         /* Read len characters of 4 bytes each */
767                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
768                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
769                                         /* If there was an incomplete character */
770                                         if(count % 4 != 0) 
771                                         {
772                                                 count -= count % 4;
773                                                 WARNING("Incomplete character in binary Unicode file");
774                                         }
775                                         
776                                         int foo;
777                                         for(foo = 0; foo < count; foo += 4)
778                                                 buf[foo / 4] = readbuffer[foo] << 24
779                                                         | readbuffer[foo + 1] << 16
780                                                         | readbuffer[foo + 2] << 8
781                                                         | readbuffer[foo + 3];
782                                         g_free(readbuffer);
783                                         str->read_count += count / 4;
784                                         return count / 4;
785                                 }
786                                 else /* Regular binary file */
787                                 {
788                                         unsigned char *readbuffer = g_new0(unsigned char, len);
789                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
790                                         int foo;
791                                         for(foo = 0; foo < count; foo++)
792                                                 buf[foo] = readbuffer[foo];
793                                         g_free(readbuffer);
794                                         str->read_count += count;
795                                         return count;
796                                 }
797                         }
798                         else /* Text mode is the same for Unicode and regular files */
799                         {
800                                 /* Do it character-by-character */
801                                 int foo;
802                                 for(foo = 0; foo < len; foo++)
803                                 {
804                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
805                                         if(ch == -1)
806                                                 break;
807                                         str->read_count++;
808                                         buf[foo] = ch;
809                                 }
810                                 return foo;
811                         }
812                 default:
813                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
814                         return 0;
815         }
816 }
817
818 /**
819  * glk_get_line_stream:
820  * @str: An input stream.
821  * @buf: A buffer with space for at least @len characters.
822  * @len: The number of characters to read, plus one.
823  *
824  * Reads characters from @str, until either 
825  * <inlineequation>
826  *   <alt>@len - 1</alt>
827  *   <mathphrase>@len - 1</mathphrase>
828  * </inlineequation>
829  * characters have been read or a newline has been read. It then puts a
830  * terminal null (<code>'\0'</code>) aracter on
831  * the end. It returns the number of characters actually read, including the
832  * newline (if there is one) but not including the terminal null.
833  *
834  * Returns: The number of characters actually read.
835  */
836 glui32
837 glk_get_line_stream(strid_t str, char *buf, glui32 len)
838 {
839         VALID_STREAM(str, return 0);
840         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
841         g_return_val_if_fail(buf != NULL, 0);
842
843         switch(str->type)
844         {
845                 case STREAM_TYPE_MEMORY:
846                 {
847                         int copycount = 0;
848                         if(str->unicode)
849                         {
850                                 /* Do it character-by-character */
851                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
852                                 {
853                                         glui32 ch = str->ubuffer[str->mark++];
854                                         /* Check for Unicode newline; slightly different than
855                                         in file streams */
856                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
857                                         {
858                                                 buf[copycount++] = '\n';
859                                                 break;
860                                         }
861                                         if(ch == 0x0D)
862                                         {
863                                                 if(str->ubuffer[str->mark] == 0x0A)
864                                                         str->mark++; /* skip past next newline */
865                                                 buf[copycount++] = '\n';
866                                                 break;
867                                         }
868                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
869                                 }
870                                 buf[copycount] = '\0';
871                         }
872                         else
873                         {
874                                 if(str->buffer) /* if not, copycount stays 0 */
875                                         copycount = MIN(len - 1, str->buflen - str->mark);
876                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
877                                 if(endptr) /* newline was found */
878                                         copycount = endptr - buf; /* Real copy count */
879                                 buf[copycount] = '\0';
880                                 str->mark += copycount;
881                         }
882                         
883                         str->read_count += copycount;
884                         return copycount;
885                 }       
886                 case STREAM_TYPE_FILE:
887                         if(str->binary) 
888                         {
889                                 if(str->unicode) /* Binary file with 4-byte characters */
890                                 {
891                                         /* Do it character-by-character */
892                                         int foo;
893                                         for(foo = 0; foo < len - 1; foo++)
894                                         {
895                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
896                                                 if(ch == -1) 
897                                                 {
898                                                         buf[foo] = '\0';
899                                                         return foo - 1;
900                                                 }
901                                                 str->read_count++;
902                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
903                                                 {
904                                                         buf[foo] = '\n';
905                                                         buf[foo + 1] = '\0';
906                                                         return foo;
907                                                 }
908                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
909                                         }
910                                         buf[len] = '\0';
911                                         return foo;
912                                 }
913                                 else /* Regular binary file */
914                                 {
915                                         if( !fgets(buf, len, str->file_pointer) ) {
916                                                 *buf = 0;
917                                                 return 0;
918                                         }
919
920                                         int nread = strlen(buf);
921                                         str->read_count += nread;
922                                         return nread;
923                                 }
924                         }
925                         else /* Text mode is the same for Unicode and regular files */
926                         {
927                                 /* Do it character-by-character */
928                                 int foo;
929                                 for(foo = 0; foo < len - 1; foo++)
930                                 {
931                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
932                                         if(ch == -1)
933                                         {
934                                                 buf[foo] = '\0';
935                                                 return foo - 1;
936                                         }
937                                         str->read_count++;
938                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
939                                         {
940                                                 buf[foo] = '\n';
941                                                 buf[foo + 1] = '\0';
942                                                 return foo;
943                                         }
944                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
945                                 }
946                                 buf[len] = '\0';
947                                 return foo;
948                         }
949                 default:
950                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
951                         return 0;
952         }
953 }
954
955 /**
956  * glk_get_line_stream_uni:
957  * @str: An input stream.
958  * @buf: A buffer with space for at least @len Unicode code points.
959  * @len: The number of characters to read, plus one.
960  *
961  * Reads Unicode characters from @str, until either 
962  * <inlineequation>
963  *   <alt>@len - 1</alt>
964  *   <mathphrase>@len - 1</mathphrase>
965  * </inlineequation> 
966  * Unicode characters have been read or a newline has been read. It then puts a
967  * terminal null (a zero value) on the end.
968  *
969  * Returns: The number of characters actually read, including the newline (if
970  * there is one) but not including the terminal null.
971  */
972 glui32
973 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
974 {
975         VALID_STREAM(str, return 0);
976         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
977         g_return_val_if_fail(buf != NULL, 0);
978
979         switch(str->type)
980         {
981                 case STREAM_TYPE_MEMORY:
982                 {
983                         int copycount = 0;
984                         if(str->unicode)
985                         {
986                                 /* Do it character-by-character */
987                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
988                                 {
989                                         glui32 ch = str->ubuffer[str->mark++];
990                                         /* Check for Unicode newline; slightly different than
991                                         in file streams */
992                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
993                                         {
994                                                 buf[copycount++] = '\n';
995                                                 break;
996                                         }
997                                         if(ch == 0x0D)
998                                         {
999                                                 if(str->ubuffer[str->mark] == 0x0A)
1000                                                         str->mark++; /* skip past next newline */
1001                                                 buf[copycount++] = '\n';
1002                                                 break;
1003                                         }
1004                                         buf[copycount++] = ch;
1005                                 }
1006                                 buf[copycount] = '\0';
1007                         }
1008                         else
1009                         {
1010                                 /* No recourse to memccpy(), so do it character-by-character */
1011                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
1012                                 {
1013                                         gchar ch = str->buffer[str->mark++];
1014                                         /* Check for newline */
1015                                         if(ch == '\n') /* Also check for \r and \r\n? */
1016                                         {
1017                                                 buf[copycount++] = '\n';
1018                                                 break;
1019                                         }
1020                                         buf[copycount++] = (unsigned char)ch;
1021                                 }
1022                                 buf[copycount] = 0;
1023                         }
1024                         
1025                         str->read_count += copycount;
1026                         return copycount;
1027                 }       
1028                 case STREAM_TYPE_FILE:
1029                         if(str->binary) 
1030                         {
1031                                 if(str->unicode) /* Binary file with 4-byte characters */
1032                                 {
1033                                         /* Do it character-by-character */
1034                                         int foo;
1035                                         for(foo = 0; foo < len - 1; foo++)
1036                                         {
1037                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
1038                                                 if(ch == -1) 
1039                                                 {
1040                                                         buf[foo] = 0;
1041                                                         return foo - 1;
1042                                                 }
1043                                                 str->read_count++;
1044                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
1045                                                 {
1046                                                         buf[foo] = ch; /* Preserve newline types??? */
1047                                                         buf[foo + 1] = 0;
1048                                                         return foo;
1049                                                 }
1050                                                 buf[foo] = ch;
1051                                         }
1052                                         buf[len] = 0;
1053                                         return foo;
1054                                 }
1055                                 else /* Regular binary file */
1056                                 {
1057                                         gchar *readbuffer = g_new0(gchar, len);
1058                                         if( !fgets(readbuffer, len, str->file_pointer) ) {
1059                                                 *buf = 0;
1060                                                 return 0;
1061                                         }
1062
1063                                         glui32 count = strlen(readbuffer);
1064                                         int foo;
1065                                         for(foo = 0; foo < count + 1; foo++) /* Copy terminator */
1066                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1067                                         str->read_count += count;
1068                                         return count;
1069                                 }
1070                         }
1071                         else /* Text mode is the same for Unicode and regular files */
1072                         {
1073                                 /* Do it character-by-character */
1074                                 int foo;
1075                                 for(foo = 0; foo < len - 1; foo++)
1076                                 {
1077                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1078                                         if(ch == -1)
1079                                         {
1080                                                 buf[foo] = 0;
1081                                                 return foo - 1;
1082                                         }
1083                                         str->read_count++;
1084                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1085                                         {
1086                                                 buf[foo] = ch; /* Preserve newline types??? */
1087                                                 buf[foo + 1] = 0;
1088                                                 return foo;
1089                                         }
1090                                         buf[foo] = ch;
1091                                 }
1092                                 buf[len] = 0;
1093                                 return foo;
1094                         }
1095                 default:
1096                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
1097                         return 0;
1098         }
1099 }
1100
1101 /*
1102  *
1103  **************** SEEKING FUNCTIONS ********************************************
1104  *
1105  */
1106
1107 /**
1108  * glk_stream_get_position:
1109  * @str: A file or memory stream.
1110  *
1111  * Returns the position of the read/write mark in @str. For memory streams and
1112  * binary file streams, this is exactly the number of characters read or written
1113  * from the beginning of the stream (unless you have moved the mark with
1114  * glk_stream_set_position().) For text file streams, matters are more 
1115  * ambiguous, since (for example) writing one byte to a text file may store more
1116  * than one character in the platform's native encoding. You can only be sure
1117  * that the position increases as you read or write to the file.
1118  *
1119  * Additional complication: for Latin-1 memory and file streams, a character is
1120  * a byte. For Unicode memory and file streams (those created by
1121  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1122  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1123  * bytes.
1124  *
1125  * <note><para>
1126  *   If this bothers you, don't use binary Unicode files. I don't think they're
1127  *   good for much anyhow.
1128  * </para></note>
1129  *
1130  * Returns: position of the read/write mark in @str.
1131  */
1132 glui32
1133 glk_stream_get_position(strid_t str)
1134 {
1135         VALID_STREAM(str, return 0);
1136         
1137         switch(str->type)
1138         {
1139                 case STREAM_TYPE_MEMORY:
1140                         return str->mark;
1141                 case STREAM_TYPE_FILE:
1142                         return ftell(str->file_pointer);
1143                 default:
1144                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1145                         return 0;
1146         }
1147 }
1148
1149 /**
1150  * glk_stream_set_position:
1151  * @str: A file or memory stream.
1152  * @pos: The position to set the mark to, relative to @seekmode.
1153  * @seekmode: One of %seekmode_Start, %seekmode_Current, or %seekmode_End.
1154  *
1155  * Sets the position of the read/write mark in @str. The position is controlled
1156  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1157  * <code>seekmode_</code> constants below.
1158  *
1159  * It is illegal to specify a position before the beginning or after the end of
1160  * the file.
1161  *
1162  * In binary files, the mark position is exact &mdash; it corresponds with the
1163  * number of characters you have read or written. In text files, this mapping 
1164  * can vary, because of linefeed conventions or other character-set 
1165  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1166  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1167  * the platform's native encoding &mdash; after character cookery. Therefore,
1168  * in a text stream, it is safest to use glk_stream_set_position() only to move
1169  * to the beginning or end of a file, or to a position determined by
1170  * glk_stream_get_position().
1171  *
1172  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1173  * characters are 32-bit words, or four bytes each.
1174  */
1175 void
1176 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1177 {
1178         VALID_STREAM(str, return);
1179         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1180         g_return_if_fail(!(seekmode == seekmode_End && pos > 0));
1181         
1182         switch(str->type)
1183         {
1184                 case STREAM_TYPE_MEMORY:
1185                         switch(seekmode)
1186                         {
1187                                 case seekmode_Start:   str->mark = pos;  break;
1188                                 case seekmode_Current: str->mark += pos; break;
1189                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1190                                 default:
1191                                         g_return_if_reached();
1192                                         return;
1193                         }
1194                         break;
1195                 case STREAM_TYPE_FILE:
1196                 {
1197                         int whence;
1198                         switch(seekmode)
1199                         {
1200                                 case seekmode_Start:   whence = SEEK_SET; break;
1201                                 case seekmode_Current: whence = SEEK_CUR; break;
1202                                 case seekmode_End:     whence = SEEK_END; break;
1203                                 default:
1204                                         g_return_if_reached();
1205                                         return;
1206                         }
1207                         if(fseek(str->file_pointer, pos, whence) == -1)
1208                                 WARNING("Seek failed on file stream");
1209                         break;
1210                 }
1211                 default:
1212                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1213                         return;
1214         }
1215 }
1216