Defined a base style
[rodin/chimara.git] / libchimara / strio.c
1 #include "charset.h"
2 #include "magic.h"
3 #include "stream.h"
4 #include <errno.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <glib.h>
8 #include <glib/gstdio.h>
9
10 /*
11  *
12  **************** WRITING FUNCTIONS ********************************************
13  *
14  */
15
16 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
17 static void
18 write_utf8_to_window_buffer(winid_t win, gchar *s)
19 {
20         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
21         {
22                 ILLEGAL("Tried to print to a text buffer window with line input pending.");
23                 return;
24         }
25
26         // Write to the buffer  
27         g_string_append(win->buffer, s);
28 }
29         
30 /* Internal function: flush a window's text buffer to the screen. */
31 void
32 flush_window_buffer(winid_t win)
33 {
34         if(win->type != wintype_TextBuffer && win->type != wintype_TextGrid)
35                 return;
36
37         if(win->buffer->len == 0)
38                 return;
39
40         gdk_threads_enter();
41
42         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
43
44         switch(win->type) {
45         case wintype_TextBuffer:
46         {
47                 GtkTextIter iter;
48                 gtk_text_buffer_get_end_iter(buffer, &iter);
49
50                 GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
51                 GtkTextTag *default_tag = gtk_text_tag_table_lookup(tags, "default");
52                 GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
53                 GtkTextTag *glk_style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->glk_style);
54
55                 if(win->window_stream->hyperlink_mode) {
56                         GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
57                         GtkTextTag *link_tag = win->current_hyperlink->tag;
58                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, default_tag, style_tag, glk_style_tag, link_style_tag, link_tag, NULL);
59                 } else {
60                         gtk_text_buffer_insert_with_tags(buffer, &iter, win->buffer->str, -1, default_tag, style_tag, glk_style_tag, NULL);
61                 }
62
63                 ChimaraGlk *glk = CHIMARA_GLK(gtk_widget_get_ancestor(win->widget, CHIMARA_TYPE_GLK));
64                 g_assert(glk);
65                 g_signal_emit_by_name(glk, "text-buffer-output", win->rock, win->buffer->str);
66         }
67                 break;
68
69         case wintype_TextGrid:
70         {
71                 /* Number of characters to insert */
72                 glong length = win->buffer->len;
73                 glong chars_left = length;
74                 
75                 GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
76                 
77                 /* Get cursor position */
78                 GtkTextIter start;
79                 gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
80                 /* Spaces available on this line */
81                 gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
82                 
83                 while(chars_left > available_space && !gtk_text_iter_is_end(&start))
84                 {
85                         GtkTextIter end = start;
86                         gtk_text_iter_forward_to_line_end(&end);
87                         gtk_text_buffer_delete(buffer, &start, &end);
88
89                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
90                         GtkTextTag *default_tag = gtk_text_tag_table_lookup(tags, "default");
91                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
92                         GtkTextTag *glk_style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->glk_style);
93
94                         if(win->window_stream->hyperlink_mode) {
95                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
96                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
97                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, default_tag, style_tag, glk_style_tag, link_style_tag, link_tag, NULL);
98                         } else {
99                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), available_space, default_tag, style_tag, glk_style_tag, NULL);
100                         }
101
102                         chars_left -= available_space;
103                         gtk_text_iter_forward_line(&start);
104                         available_space = win->width;
105                 }
106                 if(!gtk_text_iter_is_end(&start))
107                 {
108                         GtkTextIter end = start;
109                         gtk_text_iter_forward_chars(&end, chars_left);
110                         gtk_text_buffer_delete(buffer, &start, &end);
111
112                         GtkTextTagTable *tags = gtk_text_buffer_get_tag_table(buffer);
113                         GtkTextTag *default_tag = gtk_text_tag_table_lookup(tags, "default");
114                         GtkTextTag *style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->style);
115                         GtkTextTag *glk_style_tag = gtk_text_tag_table_lookup(tags, win->window_stream->glk_style);
116
117                         if(win->window_stream->hyperlink_mode) {
118                                 GtkTextTag *link_style_tag = gtk_text_tag_table_lookup(tags, "hyperlink");
119                                 GtkTextTag *link_tag = win->current_hyperlink->tag;
120                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, default_tag, style_tag, glk_style_tag, link_style_tag, link_tag, NULL);
121                         } else {
122                                 gtk_text_buffer_insert_with_tags(buffer, &start, win->buffer->str + (length - chars_left), -1, default_tag, style_tag, glk_style_tag, NULL);
123                         }
124                 }
125                 
126                 gtk_text_buffer_move_mark(buffer, cursor, &start);
127         }
128                 break;
129         }
130
131         gdk_threads_leave();
132
133         g_string_truncate(win->buffer, 0);
134 }
135
136 /* Internal function: write a Latin-1 buffer with length to a stream. */
137 static void
138 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
139 {
140         switch(str->type)
141         {
142                 case STREAM_TYPE_WINDOW:
143                         /* Each window type has a different way of printing to it */
144                         switch(str->window->type)
145                         {
146                                 /* Printing to these windows' streams does nothing */
147                                 case wintype_Blank:
148                                 case wintype_Pair:
149                                 case wintype_Graphics:
150                                         str->write_count += len;
151                                         break;
152                                         
153                             /* Text grid/buffer windows */
154                             case wintype_TextGrid:
155                                 case wintype_TextBuffer:
156                             {
157                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
158                                 if(utf8 != NULL) {
159                                                 write_utf8_to_window_buffer(str->window, utf8);
160                                                 g_free(utf8);
161                                         }
162                                 }       
163                                         str->write_count += len;
164                                         break;
165                                 default:
166                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
167                         }
168                         
169                         /* Now write the same buffer to the window's echo stream */
170                         if(str->window->echo_stream != NULL)
171                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
172                         
173                         break;
174                         
175                 case STREAM_TYPE_MEMORY:
176                         if(str->unicode && str->ubuffer)
177                         {
178                                 int foo = 0;
179                                 while(str->mark < str->buflen && foo < len)
180                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
181                         }
182                         if(!str->unicode && str->buffer)
183                         {
184                                 int copycount = MIN(len, str->buflen - str->mark);
185                                 memmove(str->buffer + str->mark, buf, copycount);
186                                 str->mark += copycount;
187                         }
188
189                         str->write_count += len;
190                         break;
191                         
192                 case STREAM_TYPE_FILE:
193                         if(str->binary) 
194                         {
195                                 if(str->unicode) 
196                                 {
197                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
198                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
199                                         g_free(writebuffer);
200                                 } 
201                                 else /* Regular file */
202                                 {
203                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
204                                 }
205                         }
206                         else /* Text mode is the same for Unicode and regular files */
207                         {
208                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
209                                 if(utf8 != NULL)
210                                 {
211                                         g_fprintf(str->file_pointer, "%s", utf8);
212                                         g_free(utf8);
213                                 }
214                         }
215                         
216                         str->write_count += len;
217                         break;
218                 default:
219                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
220         }
221 }
222
223 /* Internal function: write a Unicode buffer with length to a stream. */
224 static void
225 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
226 {
227         switch(str->type)
228         {
229                 case STREAM_TYPE_WINDOW:
230                         /* Each window type has a different way of printing to it */
231                         switch(str->window->type)
232                         {
233                                 /* Printing to these windows' streams does nothing */
234                                 case wintype_Blank:
235                                 case wintype_Pair:
236                                 case wintype_Graphics:
237                                         str->write_count += len;
238                                         break;
239                                         
240                             /* Text grid/buffer windows */
241                             case wintype_TextGrid:
242                             case wintype_TextBuffer:
243                             {
244                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
245                                 if(utf8 != NULL) {
246                                                 write_utf8_to_window_buffer(str->window, utf8);
247                                                 g_free(utf8);
248                                         }
249                                 }       
250                                         str->write_count += len;
251                                         break;
252                                 default:
253                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
254                         }
255                         
256                         /* Now write the same buffer to the window's echo stream */
257                         if(str->window->echo_stream != NULL)
258                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
259                         
260                         break;
261                         
262                 case STREAM_TYPE_MEMORY:
263                         if(str->unicode && str->ubuffer)
264                         {
265                                 int copycount = MIN(len, str->buflen - str->mark);
266                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
267                                 str->mark += copycount;
268                         }
269                         if(!str->unicode && str->buffer)
270                         {
271                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
272                                 int copycount = MIN(len, str->buflen - str->mark);
273                                 memmove(str->buffer + str->mark, latin1, copycount);
274                                 g_free(latin1);
275                                 str->mark += copycount;
276                         }
277
278                         str->write_count += len;
279                         break;
280                         
281                 case STREAM_TYPE_FILE:
282                         if(str->binary) 
283                         {
284                                 if(str->unicode) 
285                                 {
286                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
287                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
288                                         g_free(writebuffer);
289                                 } 
290                                 else /* Regular file */
291                                 {
292                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
293                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
294                                         g_free(latin1);
295                                 }
296                         }
297                         else /* Text mode is the same for Unicode and regular files */
298                         {
299                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
300                                 if(utf8 != NULL) 
301                                 {
302                                         g_fprintf(str->file_pointer, "%s", utf8);
303                                         g_free(utf8);
304                                 }
305                         }
306                         
307                         str->write_count += len;
308                         break;
309                 default:
310                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
311         }
312 }
313
314 /**
315  * glk_put_char_stream:
316  * @str: An output stream.
317  * @ch: A character in Latin-1 encoding.
318  *
319  * The same as glk_put_char(), except that you specify a stream @str to print 
320  * to, instead of using the current stream. It is illegal for @str to be %NULL,
321  * or an input-only stream.
322  */
323 void
324 glk_put_char_stream(strid_t str, unsigned char ch)
325 {
326         VALID_STREAM(str, return);
327         g_return_if_fail(str->file_mode != filemode_Read);
328         
329         write_buffer_to_stream(str, (gchar *)&ch, 1);
330 }
331
332 /**
333  * glk_put_char_stream_uni:
334  * @str: An output stream.
335  * @ch: A Unicode code point.
336  *
337  * The same as glk_put_char_uni(), except that you specify a stream @str to
338  * print to, instead of using the current stream. It is illegal for @str to be 
339  * %NULL, or an input-only stream.
340  */
341 void
342 glk_put_char_stream_uni(strid_t str, glui32 ch)
343 {
344         VALID_STREAM(str, return);
345         g_return_if_fail(str->file_mode != filemode_Read);
346         
347         write_buffer_to_stream_uni(str, &ch, 1);
348 }
349
350 /**
351  * glk_put_string_stream:
352  * @str: An output stream.
353  * @s: A null-terminated string in Latin-1 encoding.
354  *
355  * The same as glk_put_string(), except that you specify a stream @str to print 
356  * to, instead of using the current stream. It is illegal for @str to be %NULL,
357  * or an input-only stream.
358  */
359 void
360 glk_put_string_stream(strid_t str, char *s)
361 {
362         VALID_STREAM(str, return);
363         if(*s == 0)
364                 return;
365
366         g_return_if_fail(str->file_mode != filemode_Read);
367
368         write_buffer_to_stream(str, s, strlen(s));
369 }
370
371 /**
372  * glk_put_string_stream_uni:
373  * @str: An output stream.
374  * @s: A null-terminated array of Unicode code points.
375  *
376  * The same as glk_put_string_uni(), except that you specify a stream @str to
377  * print to, instead of using the current stream. It is illegal for @str to be 
378  * %NULL, or an input-only stream.
379  */
380 void
381 glk_put_string_stream_uni(strid_t str, glui32 *s)
382 {
383         VALID_STREAM(str, return);
384         if(*s == 0)
385                 return;
386
387         g_return_if_fail(str->file_mode != filemode_Read);
388         
389         /* An impromptu strlen() for glui32 arrays */
390         glong len = 0;
391         glui32 *ptr = s;
392         while(*ptr++)
393                 len++;
394         write_buffer_to_stream_uni(str, s, len);
395 }
396
397 /**
398  * glk_put_buffer_stream:
399  * @str: An output stream.
400  * @buf: An array of characters in Latin-1 encoding.
401  * @len: Length of @buf.
402  *
403  * The same as glk_put_buffer(), except that you specify a stream @str to print 
404  * to, instead of using the current stream. It is illegal for @str to be %NULL,
405  * or an input-only stream.
406  */
407 void
408 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
409 {
410         VALID_STREAM(str, return);
411         if(len == 0)
412                 return;
413
414         g_return_if_fail(str->file_mode != filemode_Read);
415         
416         write_buffer_to_stream(str, buf, len);
417 }
418
419 /**
420  * glk_put_buffer_stream_uni:
421  * @str: An output stream.
422  * @buf: An array of Unicode code points.
423  * @len: Length of @buf.
424  *
425  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
426  * print to, instead of using the current stream. It is illegal for @str to be 
427  * %NULL, or an input-only stream.
428  */
429 void
430 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
431 {
432         VALID_STREAM(str, return);
433         if(len == 0)
434                 return;
435
436         g_return_if_fail(str->file_mode != filemode_Read);
437         
438         write_buffer_to_stream_uni(str, buf, len);
439 }
440
441 /*
442  *
443  **************** READING FUNCTIONS ********************************************
444  *
445  */
446
447 /* Internal function: Read one big-endian four-byte character from file fp and
448 return it as a Unicode code point, or -1 on EOF */
449 static glsi32
450 read_ucs4be_char_from_file(FILE *fp)
451 {
452         unsigned char readbuffer[4];
453         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
454                 return -1; /* EOF */
455         return
456                 readbuffer[0] << 24 | 
457                 readbuffer[1] << 16 | 
458                 readbuffer[2] << 8  | 
459                 readbuffer[3];
460 }
461
462 /* Internal function: Read one UTF-8 character, which may be more than one byte,
463 from file fp and return it as a Unicode code point, or -1 on EOF */
464 static glsi32
465 read_utf8_char_from_file(FILE *fp)
466 {
467         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
468         int foo;
469         gunichar charresult = (gunichar)-2;
470         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
471         {
472                 int ch = fgetc(fp);
473                 if(ch == EOF)
474                         return -1;
475                 readbuffer[foo] = (gchar)ch;
476                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
477                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
478                 point otherwise */
479         }
480         /* Silently return unknown characters as 0xFFFD, Replacement Character */
481         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
482                 return 0xFFFD;
483         return charresult;
484 }
485
486 /* Internal function: Tell whether this code point is a Unicode newline. The
487 file pointer and eight-bit flag are included in case the newline is a CR 
488 (U+000D). If the next character is LF (U+000A) then it also belongs to the
489 newline. */
490 static gboolean
491 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
492 {
493         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
494                 return TRUE;
495         if(ch == 0x0D) {
496                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
497                         read_ucs4be_char_from_file(fp);
498                 if(ch2 != 0x0A)
499                         if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1);
500                                 WARNING_S("Seek failed on stream", g_strerror(errno) );
501                 return TRUE;
502         }
503         return FALSE;
504 }
505
506 /* Internal function: Read one character from a stream. Returns a value which
507  can be returned unchanged by glk_get_char_stream_uni(), but 
508  glk_get_char_stream() must replace high values by the placeholder character. */
509 static glsi32
510 get_char_stream_common(strid_t str)
511 {
512         switch(str->type)
513         {
514                 case STREAM_TYPE_MEMORY:
515                         if(str->unicode)
516                         {
517                                 if(!str->ubuffer || str->mark >= str->buflen)
518                                         return -1;
519                                 glui32 ch = str->ubuffer[str->mark++];
520                                 str->read_count++;
521                                 return ch;
522                         }
523                         else
524                         {
525                                 if(!str->buffer || str->mark >= str->buflen)
526                                         return -1;
527                                 unsigned char ch = str->buffer[str->mark++];
528                                 str->read_count++;
529                                 return ch;
530                         }
531                         break;
532                         
533                 case STREAM_TYPE_FILE:
534                         if(str->binary) 
535                         {
536                                 if(str->unicode) 
537                                 {
538                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
539                                         if(ch == -1)
540                                                 return -1;
541                                         str->read_count++;
542                                         return ch;
543                                 }
544                                 else /* Regular file */
545                                 {
546                                         int ch = fgetc(str->file_pointer);
547                                         if(ch == EOF)
548                                                 return -1;
549                                         
550                                         str->read_count++;
551                                         return ch;
552                                 }
553                         }
554                         else /* Text mode is the same for Unicode and regular files */
555                         {
556                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
557                                 if(ch == -1)
558                                         return -1;
559                                         
560                                 str->read_count++;
561                                 return ch;
562                         }
563                 default:
564                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
565                         return -1;
566         }
567 }
568
569 /**
570  * glk_get_char_stream:
571  * @str: An input stream.
572  *
573  * Reads one character from the stream @str. (There is no notion of a
574  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
575  * an output-only stream.
576  *
577  * The result will be between 0 and 255. As with all basic text functions, Glk
578  * assumes the Latin-1 encoding. See <link 
579  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
580  * of the stream has been reached, the result will be -1. 
581  *
582  * <note><para>
583  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
584  *   returned as negative numbers.
585  * </para></note>
586  *
587  * If the stream contains Unicode data &mdash; for example, if it was created
588  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
589  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
590  *
591  * It is usually more efficient to read several characters at once with
592  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
593  * glk_get_char_stream() several times.
594  *
595  * Returns: A character value between 0 and 255, or -1 on end of stream.
596  */
597 glsi32
598 glk_get_char_stream(strid_t str)
599 {
600         VALID_STREAM(str, return -1);
601         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
602         
603         glsi32 ch = get_char_stream_common(str);
604         return (ch > 0xFF)? PLACEHOLDER : ch;
605 }
606
607 /**
608  * glk_get_char_stream_uni:
609  * @str: An input stream.
610  *
611  * Reads one character from the stream @str. The result will be between 0 and 
612  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
613  *
614  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
615  */
616 glsi32
617 glk_get_char_stream_uni(strid_t str)
618 {
619         VALID_STREAM(str, return -1);
620         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
621         
622         return get_char_stream_common(str);
623 }
624
625 /**
626  * glk_get_buffer_stream:
627  * @str: An input stream.
628  * @buf: A buffer with space for at least @len characters.
629  * @len: The number of characters to read.
630  *
631  * Reads @len characters from @str, unless the end of stream is reached first.
632  * No terminal null is placed in the buffer.
633  *
634  * Returns: The number of characters actually read.
635  */
636 glui32
637 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
638 {
639         VALID_STREAM(str, return 0);
640         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
641         g_return_val_if_fail(buf != NULL, 0);
642         
643         switch(str->type)
644         {
645                 case STREAM_TYPE_MEMORY:
646                 {
647                         int copycount = 0;
648                         if(str->unicode)
649                         {
650                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
651                                 {
652                                         glui32 ch = str->ubuffer[str->mark++];
653                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
654                                 }
655                         }
656                         else
657                         {
658                                 if(str->buffer) /* if not, copycount stays 0 */
659                                         copycount = MIN(len, str->buflen - str->mark);
660                                 memmove(buf, str->buffer + str->mark, copycount);
661                                 str->mark += copycount;
662                         }
663
664                         str->read_count += copycount;           
665                         return copycount;
666                 }       
667                 case STREAM_TYPE_FILE:
668                         if(str->binary) 
669                         {
670                                 if(str->unicode) /* Binary file with 4-byte characters */
671                                 {
672                                         /* Read len characters of 4 bytes each */
673                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
674                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
675                                         /* If there was an incomplete character */
676                                         if(count % 4 != 0) 
677                                         {
678                                                 count -= count % 4;
679                                                 WARNING("Incomplete character in binary Unicode file");
680                                         }
681                                         
682                                         int foo;
683                                         for(foo = 0; foo < count; foo += 4)
684                                         {
685                                                 glsi32 ch = readbuffer[foo] << 24
686                                                         | readbuffer[foo + 1] << 16
687                                                         | readbuffer[foo + 2] << 8
688                                                         | readbuffer[foo + 3];
689                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
690                                         }
691                                         g_free(readbuffer);
692                                         str->read_count += count / 4;
693                                         return count / 4;
694                                 }
695                                 else /* Regular binary file */
696                                 {
697                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
698                                         str->read_count += count;
699                                         return count;
700                                 }
701                         }
702                         else /* Text mode is the same for Unicode and regular files */
703                         {
704                                 /* Do it character-by-character */
705                                 int foo;
706                                 for(foo = 0; foo < len; foo++)
707                                 {
708                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
709                                         if(ch == -1)
710                                                 break;
711                                         str->read_count++;
712                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
713                                 }
714                                 return foo;
715                         }
716                 default:
717                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
718                         return 0;
719         }
720 }
721
722 /**
723  * glk_get_buffer_stream_uni:
724  * @str: An input stream.
725  * @buf: A buffer with space for at least @len Unicode code points.
726  * @len: The number of characters to read.
727  *
728  * Reads @len Unicode characters from @str, unless the end of stream is reached 
729  * first. No terminal null is placed in the buffer.
730  *
731  * Returns: The number of Unicode characters actually read.
732  */
733 glui32
734 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
735 {
736         VALID_STREAM(str, return 0);
737         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
738         g_return_val_if_fail(buf != NULL, 0);
739         
740         switch(str->type)
741         {
742                 case STREAM_TYPE_MEMORY:
743                 {
744                         int copycount = 0;
745                         if(str->unicode)
746                         {
747                                 if(str->ubuffer) /* if not, copycount stays 0 */
748                                         copycount = MIN(len, str->buflen - str->mark);
749                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
750                                 str->mark += copycount;
751                         }
752                         else
753                         {
754                                 while(copycount < len && str->buffer && str->mark < str->buflen)
755                                 {
756                                         unsigned char ch = str->buffer[str->mark++];
757                                         buf[copycount++] = ch;
758                                 }
759                         }
760
761                         str->read_count += copycount;           
762                         return copycount;
763                 }       
764                 case STREAM_TYPE_FILE:
765                         if(str->binary) 
766                         {
767                                 if(str->unicode) /* Binary file with 4-byte characters */
768                                 {
769                                         /* Read len characters of 4 bytes each */
770                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
771                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
772                                         /* If there was an incomplete character */
773                                         if(count % 4 != 0) 
774                                         {
775                                                 count -= count % 4;
776                                                 WARNING("Incomplete character in binary Unicode file");
777                                         }
778                                         
779                                         int foo;
780                                         for(foo = 0; foo < count; foo += 4)
781                                                 buf[foo / 4] = readbuffer[foo] << 24
782                                                         | readbuffer[foo + 1] << 16
783                                                         | readbuffer[foo + 2] << 8
784                                                         | readbuffer[foo + 3];
785                                         g_free(readbuffer);
786                                         str->read_count += count / 4;
787                                         return count / 4;
788                                 }
789                                 else /* Regular binary file */
790                                 {
791                                         unsigned char *readbuffer = g_new0(unsigned char, len);
792                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
793                                         int foo;
794                                         for(foo = 0; foo < count; foo++)
795                                                 buf[foo] = readbuffer[foo];
796                                         g_free(readbuffer);
797                                         str->read_count += count;
798                                         return count;
799                                 }
800                         }
801                         else /* Text mode is the same for Unicode and regular files */
802                         {
803                                 /* Do it character-by-character */
804                                 int foo;
805                                 for(foo = 0; foo < len; foo++)
806                                 {
807                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
808                                         if(ch == -1)
809                                                 break;
810                                         str->read_count++;
811                                         buf[foo] = ch;
812                                 }
813                                 return foo;
814                         }
815                 default:
816                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
817                         return 0;
818         }
819 }
820
821 /**
822  * glk_get_line_stream:
823  * @str: An input stream.
824  * @buf: A buffer with space for at least @len characters.
825  * @len: The number of characters to read, plus one.
826  *
827  * Reads characters from @str, until either 
828  * <inlineequation>
829  *   <alt>@len - 1</alt>
830  *   <mathphrase>@len - 1</mathphrase>
831  * </inlineequation>
832  * characters have been read or a newline has been read. It then puts a
833  * terminal null (<code>'\0'</code>) aracter on
834  * the end. It returns the number of characters actually read, including the
835  * newline (if there is one) but not including the terminal null.
836  *
837  * Returns: The number of characters actually read.
838  */
839 glui32
840 glk_get_line_stream(strid_t str, char *buf, glui32 len)
841 {
842         VALID_STREAM(str, return 0);
843         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
844         g_return_val_if_fail(buf != NULL, 0);
845
846         switch(str->type)
847         {
848                 case STREAM_TYPE_MEMORY:
849                 {
850                         int copycount = 0;
851                         if(str->unicode)
852                         {
853                                 /* Do it character-by-character */
854                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
855                                 {
856                                         glui32 ch = str->ubuffer[str->mark++];
857                                         /* Check for Unicode newline; slightly different than
858                                         in file streams */
859                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
860                                         {
861                                                 buf[copycount++] = '\n';
862                                                 break;
863                                         }
864                                         if(ch == 0x0D)
865                                         {
866                                                 if(str->ubuffer[str->mark] == 0x0A)
867                                                         str->mark++; /* skip past next newline */
868                                                 buf[copycount++] = '\n';
869                                                 break;
870                                         }
871                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
872                                 }
873                                 buf[copycount] = '\0';
874                         }
875                         else
876                         {
877                                 if(str->buffer) /* if not, copycount stays 0 */
878                                         copycount = MIN(len - 1, str->buflen - str->mark);
879                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
880                                 if(endptr) /* newline was found */
881                                         copycount = endptr - buf; /* Real copy count */
882                                 buf[copycount] = '\0';
883                                 str->mark += copycount;
884                         }
885                         
886                         str->read_count += copycount;
887                         return copycount;
888                 }       
889                 case STREAM_TYPE_FILE:
890                         if(str->binary) 
891                         {
892                                 if(str->unicode) /* Binary file with 4-byte characters */
893                                 {
894                                         /* Do it character-by-character */
895                                         int foo;
896                                         for(foo = 0; foo < len - 1; foo++)
897                                         {
898                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
899                                                 if(ch == -1) 
900                                                 {
901                                                         buf[foo] = '\0';
902                                                         return foo - 1;
903                                                 }
904                                                 str->read_count++;
905                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
906                                                 {
907                                                         buf[foo] = '\n';
908                                                         buf[foo + 1] = '\0';
909                                                         return foo;
910                                                 }
911                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
912                                         }
913                                         buf[len] = '\0';
914                                         return foo;
915                                 }
916                                 else /* Regular binary file */
917                                 {
918                                         if( !fgets(buf, len, str->file_pointer) ) {
919                                                 *buf = 0;
920                                                 return 0;
921                                         }
922
923                                         int nread = strlen(buf);
924                                         str->read_count += nread;
925                                         return nread;
926                                 }
927                         }
928                         else /* Text mode is the same for Unicode and regular files */
929                         {
930                                 /* Do it character-by-character */
931                                 int foo;
932                                 for(foo = 0; foo < len - 1; foo++)
933                                 {
934                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
935                                         if(ch == -1)
936                                         {
937                                                 buf[foo] = '\0';
938                                                 return foo - 1;
939                                         }
940                                         str->read_count++;
941                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
942                                         {
943                                                 buf[foo] = '\n';
944                                                 buf[foo + 1] = '\0';
945                                                 return foo;
946                                         }
947                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
948                                 }
949                                 buf[len] = '\0';
950                                 return foo;
951                         }
952                 default:
953                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
954                         return 0;
955         }
956 }
957
958 /**
959  * glk_get_line_stream_uni:
960  * @str: An input stream.
961  * @buf: A buffer with space for at least @len Unicode code points.
962  * @len: The number of characters to read, plus one.
963  *
964  * Reads Unicode characters from @str, until either 
965  * <inlineequation>
966  *   <alt>@len - 1</alt>
967  *   <mathphrase>@len - 1</mathphrase>
968  * </inlineequation> 
969  * Unicode characters have been read or a newline has been read. It then puts a
970  * terminal null (a zero value) on the end.
971  *
972  * Returns: The number of characters actually read, including the newline (if
973  * there is one) but not including the terminal null.
974  */
975 glui32
976 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
977 {
978         VALID_STREAM(str, return 0);
979         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
980         g_return_val_if_fail(buf != NULL, 0);
981
982         switch(str->type)
983         {
984                 case STREAM_TYPE_MEMORY:
985                 {
986                         int copycount = 0;
987                         if(str->unicode)
988                         {
989                                 /* Do it character-by-character */
990                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
991                                 {
992                                         glui32 ch = str->ubuffer[str->mark++];
993                                         /* Check for Unicode newline; slightly different than
994                                         in file streams */
995                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
996                                         {
997                                                 buf[copycount++] = '\n';
998                                                 break;
999                                         }
1000                                         if(ch == 0x0D)
1001                                         {
1002                                                 if(str->ubuffer[str->mark] == 0x0A)
1003                                                         str->mark++; /* skip past next newline */
1004                                                 buf[copycount++] = '\n';
1005                                                 break;
1006                                         }
1007                                         buf[copycount++] = ch;
1008                                 }
1009                                 buf[copycount] = '\0';
1010                         }
1011                         else
1012                         {
1013                                 /* No recourse to memccpy(), so do it character-by-character */
1014                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
1015                                 {
1016                                         gchar ch = str->buffer[str->mark++];
1017                                         /* Check for newline */
1018                                         if(ch == '\n') /* Also check for \r and \r\n? */
1019                                         {
1020                                                 buf[copycount++] = '\n';
1021                                                 break;
1022                                         }
1023                                         buf[copycount++] = (unsigned char)ch;
1024                                 }
1025                                 buf[copycount] = 0;
1026                         }
1027                         
1028                         str->read_count += copycount;
1029                         return copycount;
1030                 }       
1031                 case STREAM_TYPE_FILE:
1032                         if(str->binary) 
1033                         {
1034                                 if(str->unicode) /* Binary file with 4-byte characters */
1035                                 {
1036                                         /* Do it character-by-character */
1037                                         int foo;
1038                                         for(foo = 0; foo < len - 1; foo++)
1039                                         {
1040                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
1041                                                 if(ch == -1) 
1042                                                 {
1043                                                         buf[foo] = 0;
1044                                                         return foo - 1;
1045                                                 }
1046                                                 str->read_count++;
1047                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
1048                                                 {
1049                                                         buf[foo] = ch; /* Preserve newline types??? */
1050                                                         buf[foo + 1] = 0;
1051                                                         return foo;
1052                                                 }
1053                                                 buf[foo] = ch;
1054                                         }
1055                                         buf[len] = 0;
1056                                         return foo;
1057                                 }
1058                                 else /* Regular binary file */
1059                                 {
1060                                         gchar *readbuffer = g_new0(gchar, len);
1061                                         if( !fgets(readbuffer, len, str->file_pointer) ) {
1062                                                 *buf = 0;
1063                                                 return 0;
1064                                         }
1065
1066                                         glui32 count = strlen(readbuffer);
1067                                         int foo;
1068                                         for(foo = 0; foo < count + 1; foo++) /* Copy terminator */
1069                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1070                                         str->read_count += count;
1071                                         return count;
1072                                 }
1073                         }
1074                         else /* Text mode is the same for Unicode and regular files */
1075                         {
1076                                 /* Do it character-by-character */
1077                                 int foo;
1078                                 for(foo = 0; foo < len - 1; foo++)
1079                                 {
1080                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1081                                         if(ch == -1)
1082                                         {
1083                                                 buf[foo] = 0;
1084                                                 return foo - 1;
1085                                         }
1086                                         str->read_count++;
1087                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1088                                         {
1089                                                 buf[foo] = ch; /* Preserve newline types??? */
1090                                                 buf[foo + 1] = 0;
1091                                                 return foo;
1092                                         }
1093                                         buf[foo] = ch;
1094                                 }
1095                                 buf[len] = 0;
1096                                 return foo;
1097                         }
1098                 default:
1099                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
1100                         return 0;
1101         }
1102 }
1103
1104 /*
1105  *
1106  **************** SEEKING FUNCTIONS ********************************************
1107  *
1108  */
1109
1110 /**
1111  * glk_stream_get_position:
1112  * @str: A file or memory stream.
1113  *
1114  * Returns the position of the read/write mark in @str. For memory streams and
1115  * binary file streams, this is exactly the number of characters read or written
1116  * from the beginning of the stream (unless you have moved the mark with
1117  * glk_stream_set_position().) For text file streams, matters are more 
1118  * ambiguous, since (for example) writing one byte to a text file may store more
1119  * than one character in the platform's native encoding. You can only be sure
1120  * that the position increases as you read or write to the file.
1121  *
1122  * Additional complication: for Latin-1 memory and file streams, a character is
1123  * a byte. For Unicode memory and file streams (those created by
1124  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1125  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1126  * bytes.
1127  *
1128  * <note><para>
1129  *   If this bothers you, don't use binary Unicode files. I don't think they're
1130  *   good for much anyhow.
1131  * </para></note>
1132  *
1133  * Returns: position of the read/write mark in @str.
1134  */
1135 glui32
1136 glk_stream_get_position(strid_t str)
1137 {
1138         VALID_STREAM(str, return 0);
1139         
1140         switch(str->type)
1141         {
1142                 case STREAM_TYPE_MEMORY:
1143                         return str->mark;
1144                 case STREAM_TYPE_FILE:
1145                         return ftell(str->file_pointer);
1146                 default:
1147                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1148                         return 0;
1149         }
1150 }
1151
1152 /**
1153  * glk_stream_set_position:
1154  * @str: A file or memory stream.
1155  * @pos: The position to set the mark to, relative to @seekmode.
1156  * @seekmode: One of %seekmode_Start, %seekmode_Current, or %seekmode_End.
1157  *
1158  * Sets the position of the read/write mark in @str. The position is controlled
1159  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1160  * <code>seekmode_</code> constants below.
1161  *
1162  * It is illegal to specify a position before the beginning or after the end of
1163  * the file.
1164  *
1165  * In binary files, the mark position is exact &mdash; it corresponds with the
1166  * number of characters you have read or written. In text files, this mapping 
1167  * can vary, because of linefeed conventions or other character-set 
1168  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1169  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1170  * the platform's native encoding &mdash; after character cookery. Therefore,
1171  * in a text stream, it is safest to use glk_stream_set_position() only to move
1172  * to the beginning or end of a file, or to a position determined by
1173  * glk_stream_get_position().
1174  *
1175  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1176  * characters are 32-bit words, or four bytes each.
1177  */
1178 void
1179 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1180 {
1181         VALID_STREAM(str, return);
1182         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1183         g_return_if_fail(!(seekmode == seekmode_End && pos > 0));
1184         
1185         switch(str->type)
1186         {
1187                 case STREAM_TYPE_MEMORY:
1188                         switch(seekmode)
1189                         {
1190                                 case seekmode_Start:   str->mark = pos;  break;
1191                                 case seekmode_Current: str->mark += pos; break;
1192                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1193                                 default:
1194                                         g_return_if_reached();
1195                                         return;
1196                         }
1197                         break;
1198                 case STREAM_TYPE_FILE:
1199                 {
1200                         int whence;
1201                         switch(seekmode)
1202                         {
1203                                 case seekmode_Start:   whence = SEEK_SET; break;
1204                                 case seekmode_Current: whence = SEEK_CUR; break;
1205                                 case seekmode_End:     whence = SEEK_END; break;
1206                                 default:
1207                                         g_return_if_reached();
1208                                         return;
1209                         }
1210                         if(fseek(str->file_pointer, pos, whence) == -1)
1211                                 WARNING("Seek failed on file stream");
1212                         break;
1213                 }
1214                 default:
1215                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1216                         return;
1217         }
1218 }
1219