Got Gtk-Doc working. Now all the fancy /** comments before the functions
[rodin/chimara.git] / src / strio.c
1 #include "charset.h"
2 #include "stream.h"
3 #include <stdio.h>
4 #include <string.h>
5 #include <glib.h>
6 #include <glib/gstdio.h>
7
8 /*
9  *
10  **************** WRITING FUNCTIONS ********************************************
11  *
12  */
13
14 /* Internal function: write a UTF-8 string to a text grid window's text buffer. */
15 static void
16 write_utf8_to_grid(winid_t win, gchar *s)
17 {
18     /* Number of characters to insert */
19     glong length = g_utf8_strlen(s, -1);
20     glong chars_left = length;
21     
22     gdk_threads_enter();
23     
24     GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
25     GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
26     
27     /* Get cursor position */
28     GtkTextIter start;
29     gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
30     /* Spaces available on this line */
31     gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
32     
33     while(chars_left > available_space && !gtk_text_iter_is_end(&start))
34     {
35         GtkTextIter end = start;
36         gtk_text_iter_forward_to_line_end(&end);
37         gtk_text_buffer_delete(buffer, &start, &end);
38         gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), available_space);
39         chars_left -= available_space;
40         gtk_text_iter_forward_line(&start);
41         available_space = win->width;
42     }
43     if(!gtk_text_iter_is_end(&start))
44     {
45         GtkTextIter end = start;
46         gtk_text_iter_forward_chars(&end, chars_left);
47         gtk_text_buffer_delete(buffer, &start, &end);
48         gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), -1);
49     }
50     
51     gtk_text_buffer_move_mark(buffer, cursor, &start);
52     
53     gdk_threads_leave();
54 }
55
56 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
57 static void
58 write_utf8_to_window(winid_t win, gchar *s)
59 {
60         gdk_threads_enter();
61
62         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
63
64         GtkTextIter iter;
65         gtk_text_buffer_get_end_iter(buffer, &iter);
66         gtk_text_buffer_insert(buffer, &iter, s, -1);
67
68         gdk_threads_leave();
69 }
70
71 /* Internal function: write a Latin-1 buffer with length to a stream. */
72 static void
73 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
74 {
75         switch(str->type)
76         {
77                 case STREAM_TYPE_WINDOW:
78                         /* Each window type has a different way of printing to it */
79                         switch(str->window->type)
80                         {
81                                 /* Printing to these windows' streams does nothing */
82                                 case wintype_Blank:
83                                 case wintype_Pair:
84                                 case wintype_Graphics:
85                                         str->write_count += len;
86                                         break;
87                                         
88                             /* Text grid window */
89                             case wintype_TextGrid:
90                             {
91                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
92                                 if(utf8 != NULL)
93                                 {
94                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
95                                     write_utf8_to_grid(str->window, utf8);
96                                     g_free(utf8);
97                                 }
98                             }
99                                 str->write_count += len;
100                                 break;
101                                         
102                                 /* Text buffer window */        
103                                 case wintype_TextBuffer:
104                                 {
105                                         gchar *utf8 = convert_latin1_to_utf8(buf, len);
106                                         if(utf8 != NULL)
107                                         {
108                                                 write_utf8_to_window(str->window, utf8);
109                                                 g_free(utf8);
110                                         }
111                                 }       
112                                         str->write_count += len;
113                                         break;
114                                 default:
115                                         g_warning("%s: Writing to this kind of window unsupported.", __func__);
116                         }
117                         
118                         /* Now write the same buffer to the window's echo stream */
119                         if(str->window->echo_stream != NULL)
120                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
121                         
122                         break;
123                         
124                 case STREAM_TYPE_MEMORY:
125                         if(str->unicode && str->ubuffer)
126                         {
127                                 int foo = 0;
128                                 while(str->mark < str->buflen && foo < len)
129                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
130                         }
131                         if(!str->unicode && str->buffer)
132                         {
133                                 int copycount = MIN(len, str->buflen - str->mark);
134                                 memmove(str->buffer + str->mark, buf, copycount);
135                                 str->mark += copycount;
136                         }
137
138                         str->write_count += len;
139                         break;
140                         
141                 case STREAM_TYPE_FILE:
142                         if(str->binary) 
143                         {
144                                 if(str->unicode) 
145                                 {
146                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
147                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
148                                         g_free(writebuffer);
149                                 } 
150                                 else /* Regular file */
151                                 {
152                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
153                                 }
154                         }
155                         else /* Text mode is the same for Unicode and regular files */
156                         {
157                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
158                                 if(utf8 != NULL)
159                                 {
160                                         g_fprintf(str->file_pointer, "%s", utf8);
161                                         g_free(utf8);
162                                 }
163                         }
164                         
165                         str->write_count += len;
166                         break;
167                 default:
168                         g_warning("%s: Writing to this kind of stream unsupported.", __func__);
169         }
170 }
171
172 /* Internal function: write a Unicode buffer with length to a stream. */
173 static void
174 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
175 {
176         switch(str->type)
177         {
178                 case STREAM_TYPE_WINDOW:
179                         /* Each window type has a different way of printing to it */
180                         switch(str->window->type)
181                         {
182                                 /* Printing to these windows' streams does nothing */
183                                 case wintype_Blank:
184                                 case wintype_Pair:
185                                 case wintype_Graphics:
186                                         str->write_count += len;
187                                         break;
188                                         
189                             /* Text grid window */
190                             case wintype_TextGrid:
191                             {
192                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
193                                 if(utf8 != NULL)
194                                 {
195                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
196                                     write_utf8_to_grid(str->window, utf8);
197                                     g_free(utf8);
198                                 }
199                             }
200                                 str->write_count += len;
201                                 break;
202                                         
203                                 /* Text buffer window */        
204                                 case wintype_TextBuffer:
205                                 {
206                                         gchar *utf8 = convert_ucs4_to_utf8(buf, len);
207                                         if(utf8 != NULL)
208                                         {
209                                                 write_utf8_to_window(str->window, utf8);
210                                                 g_free(utf8);
211                                         }
212                                 }       
213                                         str->write_count += len;
214                                         break;
215                                 default:
216                                         g_warning("%s: Writing to this kind of window unsupported.", __func__);
217                         }
218                         
219                         /* Now write the same buffer to the window's echo stream */
220                         if(str->window->echo_stream != NULL)
221                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
222                         
223                         break;
224                         
225                 case STREAM_TYPE_MEMORY:
226                         if(str->unicode && str->ubuffer)
227                         {
228                                 int copycount = MIN(len, str->buflen - str->mark);
229                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
230                                 str->mark += copycount;
231                         }
232                         if(!str->unicode && str->buffer)
233                         {
234                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
235                                 int copycount = MIN(len, str->buflen - str->mark);
236                                 memmove(str->buffer + str->mark, latin1, copycount);
237                                 g_free(latin1);
238                                 str->mark += copycount;
239                         }
240
241                         str->write_count += len;
242                         break;
243                         
244                 case STREAM_TYPE_FILE:
245                         if(str->binary) 
246                         {
247                                 if(str->unicode) 
248                                 {
249                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
250                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
251                                         g_free(writebuffer);
252                                 } 
253                                 else /* Regular file */
254                                 {
255                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
256                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
257                                         g_free(latin1);
258                                 }
259                         }
260                         else /* Text mode is the same for Unicode and regular files */
261                         {
262                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
263                                 if(utf8 != NULL) 
264                                 {
265                                         g_fprintf(str->file_pointer, "%s", utf8);
266                                         g_free(utf8);
267                                 }
268                         }
269                         
270                         str->write_count += len;
271                         break;
272                 default:
273                         g_warning("%s: Writing to this kind of stream unsupported.", __func__);
274         }
275 }
276
277 /**
278  * glk_put_char_stream:
279  * @str: An output stream.
280  * @ch: A character in Latin-1 encoding.
281  *
282  * The same as glk_put_char(), except that you specify a stream @str to print 
283  * to, instead of using the current stream. It is illegal for @str to be %NULL,
284  * or an input-only stream.
285  */
286 void
287 glk_put_char_stream(strid_t str, unsigned char ch)
288 {
289         g_return_if_fail(str != NULL);
290         g_return_if_fail(str->file_mode != filemode_Read);
291         
292         write_buffer_to_stream(str, (gchar *)&ch, 1);
293 }
294
295 /**
296  * glk_put_char_stream_uni:
297  * @str: An output stream.
298  * @ch: A Unicode code point.
299  *
300  * The same as glk_put_char_uni(), except that you specify a stream @str to
301  * print to, instead of using the current stream. It is illegal for @str to be 
302  * %NULL, or an input-only stream.
303  */
304 void
305 glk_put_char_stream_uni(strid_t str, glui32 ch)
306 {
307         g_return_if_fail(str != NULL);
308         g_return_if_fail(str->file_mode != filemode_Read);
309         
310         write_buffer_to_stream_uni(str, &ch, 1);
311 }
312
313 /**
314  * glk_put_string_stream:
315  * @str: An output stream.
316  * @s: A null-terminated string in Latin-1 encoding.
317  *
318  * The same as glk_put_string(), except that you specify a stream @str to print 
319  * to, instead of using the current stream. It is illegal for @str to be %NULL,
320  * or an input-only stream.
321  */
322 void
323 glk_put_string_stream(strid_t str, char *s)
324 {
325         g_return_if_fail(str != NULL);
326         g_return_if_fail(str->file_mode != filemode_Read);
327
328         write_buffer_to_stream(str, s, strlen(s));
329 }
330
331 /**
332  * glk_put_string_stream_uni:
333  * @str: An output stream.
334  * @s: A null-terminated array of Unicode code points.
335  *
336  * The same as glk_put_string_uni(), except that you specify a stream @str to
337  * print to, instead of using the current stream. It is illegal for @str to be 
338  * %NULL, or an input-only stream.
339  */
340 void
341 glk_put_string_stream_uni(strid_t str, glui32 *s)
342 {
343         g_return_if_fail(str != NULL);
344         g_return_if_fail(str->file_mode != filemode_Read);
345         
346         /* An impromptu strlen() for glui32 arrays */
347         glong len = 0;
348         glui32 *ptr = s;
349         while(*ptr++)
350                 len++;
351         write_buffer_to_stream_uni(str, s, len);
352 }
353
354 /**
355  * glk_put_buffer_stream:
356  * @str: An output stream.
357  * @buf: An array of characters in Latin-1 encoding.
358  * @len: Length of @buf.
359  *
360  * The same as glk_put_buffer(), except that you specify a stream @str to print 
361  * to, instead of using the current stream. It is illegal for @str to be %NULL,
362  * or an input-only stream.
363  */
364 void
365 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
366 {
367         g_return_if_fail(str != NULL);
368         g_return_if_fail(str->file_mode != filemode_Read);
369         
370         write_buffer_to_stream(str, buf, len);
371 }
372
373 /**
374  * glk_put_buffer_stream_uni:
375  * @str: An output stream.
376  * @buf: An array of Unicode code points.
377  * @len: Length of @buf.
378  *
379  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
380  * print to, instead of using the current stream. It is illegal for @str to be 
381  * %NULL, or an input-only stream.
382  */
383 void
384 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
385 {
386         g_return_if_fail(str != NULL);
387         g_return_if_fail(str->file_mode != filemode_Read);
388         
389         write_buffer_to_stream_uni(str, buf, len);
390 }
391
392 /*
393  *
394  **************** READING FUNCTIONS ********************************************
395  *
396  */
397
398 /* Internal function: Read one big-endian four-byte character from file fp and
399 return it as a Unicode code point, or -1 on EOF */
400 static glsi32
401 read_ucs4be_char_from_file(FILE *fp)
402 {
403         unsigned char readbuffer[4];
404         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
405                 return -1; /* EOF */
406         return
407                 readbuffer[0] << 24 | 
408                 readbuffer[1] << 16 | 
409                 readbuffer[2] << 8  | 
410                 readbuffer[3];
411 }
412
413 /* Internal function: Read one UTF-8 character, which may be more than one byte,
414 from file fp and return it as a Unicode code point, or -1 on EOF */
415 static glsi32
416 read_utf8_char_from_file(FILE *fp)
417 {
418         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
419         int foo;
420         gunichar charresult = (gunichar)-2;
421         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
422         {
423                 int ch = fgetc(fp);
424                 if(ch == EOF)
425                         return -1;
426                 readbuffer[foo] = (gchar)ch;
427                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
428                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
429                 point otherwise */
430         }
431         /* Silently return unknown characters as 0xFFFD, Replacement Character */
432         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
433                 return 0xFFFD;
434         return charresult;
435 }
436
437 /* Internal function: Tell whether this code point is a Unicode newline. The
438 file pointer and eight-bit flag are included in case the newline is a CR 
439 (U+000D). If the next character is LF (U+000A) then it also belongs to the
440 newline. */
441 static gboolean
442 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
443 {
444         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
445                 return TRUE;
446         if(ch == 0x0D) {
447                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
448                         read_ucs4be_char_from_file(fp);
449                 if(ch2 != 0x0A)
450                         fseek(fp, utf8? -1 : -4, SEEK_CUR);
451                 return TRUE;
452         }
453         return FALSE;
454 }
455
456 /* Internal function: Read one character from a stream. Returns a value which
457  can be returned unchanged by glk_get_char_stream_uni(), but 
458  glk_get_char_stream() must replace high values by the placeholder character. */
459 glsi32
460 get_char_stream_common(strid_t str)
461 {
462         switch(str->type)
463         {
464                 case STREAM_TYPE_MEMORY:
465                         if(str->unicode)
466                         {
467                                 if(!str->ubuffer || str->mark >= str->buflen)
468                                         return -1;
469                                 glui32 ch = str->ubuffer[str->mark++];
470                                 str->read_count++;
471                                 return ch;
472                         }
473                         else
474                         {
475                                 if(!str->buffer || str->mark >= str->buflen)
476                                         return -1;
477                                 unsigned char ch = str->buffer[str->mark++];
478                                 str->read_count++;
479                                 return ch;
480                         }
481                         break;
482                         
483                 case STREAM_TYPE_FILE:
484                         if(str->binary) 
485                         {
486                                 if(str->unicode) 
487                                 {
488                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
489                                         if(ch == -1)
490                                                 return -1;
491                                         str->read_count++;
492                                         return ch;
493                                 }
494                                 else /* Regular file */
495                                 {
496                                         int ch = fgetc(str->file_pointer);
497                                         if(ch == EOF)
498                                                 return -1;
499                                         
500                                         str->read_count++;
501                                         return ch;
502                                 }
503                         }
504                         else /* Text mode is the same for Unicode and regular files */
505                         {
506                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
507                                 if(ch == -1)
508                                         return -1;
509                                         
510                                 str->read_count++;
511                                 return ch;
512                         }
513                 default:
514                         g_warning("%s: Reading from this kind of stream unsupported.", __func__);
515                         return -1;
516         }
517 }
518
519 /**
520  * glk_get_char_stream:
521  * @str: An input stream.
522  *
523  * Reads one character from the stream @str. (There is no notion of a
524  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
525  * an output-only stream.
526  *
527  * The result will be between 0 and 255. As with all basic text functions, Glk
528  * assumes the Latin-1 encoding. See <link 
529  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
530  * of the stream has been reached, the result will be -1. 
531  *
532  * <note><para>
533  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
534  *   returned as negative numbers.
535  * </para></note>
536  *
537  * If the stream contains Unicode data &mdash; for example, if it was created
538  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
539  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
540  *
541  * It is usually more efficient to read several characters at once with
542  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
543  * glk_get_char_stream() several times.
544  *
545  * Returns: A character value between 0 and 255, or -1 on end of stream.
546  */
547 glsi32
548 glk_get_char_stream(strid_t str)
549 {
550         g_return_val_if_fail(str != NULL, -1);
551         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
552         
553         glsi32 ch = get_char_stream_common(str);
554         return (ch > 0xFF)? PLACEHOLDER : ch;
555 }
556
557 /**
558  * glk_get_char_stream_uni:
559  * @str: An input stream.
560  *
561  * Reads one character from the stream @str. The result will be between 0 and 
562  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
563  *
564  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
565  */
566 glsi32
567 glk_get_char_stream_uni(strid_t str)
568 {
569         g_return_val_if_fail(str != NULL, -1);
570         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
571         
572         return get_char_stream_common(str);
573 }
574
575 /**
576  * glk_get_buffer_stream:
577  * @str: An input stream.
578  * @buf: A buffer with space for at least @len characters.
579  * @len: The number of characters to read.
580  *
581  * Reads @len characters from @str, unless the end of stream is reached first.
582  * No terminal null is placed in the buffer.
583  *
584  * Returns: The number of characters actually read.
585  */
586 glui32
587 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
588 {
589         g_return_val_if_fail(str != NULL, 0);
590         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
591         g_return_val_if_fail(buf != NULL, 0);
592         
593         switch(str->type)
594         {
595                 case STREAM_TYPE_MEMORY:
596                 {
597                         int copycount = 0;
598                         if(str->unicode)
599                         {
600                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
601                                 {
602                                         glui32 ch = str->ubuffer[str->mark++];
603                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
604                                 }
605                         }
606                         else
607                         {
608                                 if(str->buffer) /* if not, copycount stays 0 */
609                                         copycount = MIN(len, str->buflen - str->mark);
610                                 memmove(buf, str->buffer + str->mark, copycount);
611                                 str->mark += copycount;
612                         }
613
614                         str->read_count += copycount;           
615                         return copycount;
616                 }       
617                 case STREAM_TYPE_FILE:
618                         if(str->binary) 
619                         {
620                                 if(str->unicode) /* Binary file with 4-byte characters */
621                                 {
622                                         /* Read len characters of 4 bytes each */
623                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
624                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
625                                         /* If there was an incomplete character */
626                                         if(count % 4 != 0) 
627                                         {
628                                                 count -= count % 4;
629                                                 g_warning("%s: Incomplete character in binary Unicode file.", __func__);
630                                         }
631                                         
632                                         int foo;
633                                         for(foo = 0; foo < count; foo += 4)
634                                         {
635                                                 glsi32 ch = readbuffer[foo] << 24
636                                                         | readbuffer[foo + 1] << 16
637                                                         | readbuffer[foo + 2] << 8
638                                                         | readbuffer[foo + 3];
639                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
640                                         }
641                                         g_free(readbuffer);
642                                         str->read_count += count / 4;
643                                         return count / 4;
644                                 }
645                                 else /* Regular binary file */
646                                 {
647                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
648                                         str->read_count += count;
649                                         return count;
650                                 }
651                         }
652                         else /* Text mode is the same for Unicode and regular files */
653                         {
654                                 /* Do it character-by-character */
655                                 int foo;
656                                 for(foo = 0; foo < len; foo++)
657                                 {
658                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
659                                         if(ch == -1)
660                                                 break;
661                                         str->read_count++;
662                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
663                                 }
664                                 return foo;
665                         }
666                 default:
667                         g_warning("%s: Reading from this kind of stream unsupported.", __func__);
668                         return 0;
669         }
670 }
671
672 /**
673  * glk_get_buffer_stream_uni:
674  * @str: An input stream.
675  * @buf: A buffer with space for at least @len Unicode code points.
676  * @len: The number of characters to read.
677  *
678  * Reads @len Unicode characters from @str, unless the end of stream is reached 
679  * first. No terminal null is placed in the buffer.
680  *
681  * Returns: The number of Unicode characters actually read.
682  */
683 glui32
684 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
685 {
686         g_return_val_if_fail(str != NULL, 0);
687         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
688         g_return_val_if_fail(buf != NULL, 0);
689         
690         switch(str->type)
691         {
692                 case STREAM_TYPE_MEMORY:
693                 {
694                         int copycount = 0;
695                         if(str->unicode)
696                         {
697                                 if(str->ubuffer) /* if not, copycount stays 0 */
698                                         copycount = MIN(len, str->buflen - str->mark);
699                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
700                                 str->mark += copycount;
701                         }
702                         else
703                         {
704                                 while(copycount < len && str->buffer && str->mark < str->buflen)
705                                 {
706                                         unsigned char ch = str->buffer[str->mark++];
707                                         buf[copycount++] = ch;
708                                 }
709                         }
710
711                         str->read_count += copycount;           
712                         return copycount;
713                 }       
714                 case STREAM_TYPE_FILE:
715                         if(str->binary) 
716                         {
717                                 if(str->unicode) /* Binary file with 4-byte characters */
718                                 {
719                                         /* Read len characters of 4 bytes each */
720                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
721                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
722                                         /* If there was an incomplete character */
723                                         if(count % 4 != 0) 
724                                         {
725                                                 count -= count % 4;
726                                                 g_warning("%s: Incomplete character in binary Unicode file.", __func__);
727                                         }
728                                         
729                                         int foo;
730                                         for(foo = 0; foo < count; foo += 4)
731                                                 buf[foo / 4] = readbuffer[foo] << 24
732                                                         | readbuffer[foo + 1] << 16
733                                                         | readbuffer[foo + 2] << 8
734                                                         | readbuffer[foo + 3];
735                                         g_free(readbuffer);
736                                         str->read_count += count / 4;
737                                         return count / 4;
738                                 }
739                                 else /* Regular binary file */
740                                 {
741                                         unsigned char *readbuffer = g_new0(unsigned char, len);
742                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
743                                         int foo;
744                                         for(foo = 0; foo < count; foo++)
745                                                 buf[foo] = readbuffer[foo];
746                                         g_free(readbuffer);
747                                         str->read_count += count;
748                                         return count;
749                                 }
750                         }
751                         else /* Text mode is the same for Unicode and regular files */
752                         {
753                                 /* Do it character-by-character */
754                                 int foo;
755                                 for(foo = 0; foo < len; foo++)
756                                 {
757                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
758                                         if(ch == -1)
759                                                 break;
760                                         str->read_count++;
761                                         buf[foo] = ch;
762                                 }
763                                 return foo;
764                         }
765                 default:
766                         g_warning("%s: Reading from this kind of stream unsupported.", __func__);
767                         return 0;
768         }
769 }
770
771 /**
772  * glk_get_line_stream:
773  * @str: An input stream.
774  * @buf: A buffer with space for at least @len characters.
775  * @len: The number of characters to read, plus one.
776  *
777  * Reads characters from @str, until either 
778  * <inlineequation>
779  *   <alt>@len - 1</alt>
780  *   <mathphrase>@len - 1</mathphrase>
781  * </inlineequation>
782  * characters have been read or a newline has been read. It then puts a
783  * terminal null (<code>'\0'</code>) aracter on
784  * the end. It returns the number of characters actually read, including the
785  * newline (if there is one) but not including the terminal null.
786  *
787  * Returns: The number of characters actually read.
788  */
789 glui32
790 glk_get_line_stream(strid_t str, char *buf, glui32 len)
791 {
792         g_return_val_if_fail(str != NULL, 0);
793         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
794         g_return_val_if_fail(buf != NULL, 0);
795
796         switch(str->type)
797         {
798                 case STREAM_TYPE_MEMORY:
799                 {
800                         int copycount = 0;
801                         if(str->unicode)
802                         {
803                                 /* Do it character-by-character */
804                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
805                                 {
806                                         glui32 ch = str->ubuffer[str->mark++];
807                                         /* Check for Unicode newline; slightly different than
808                                         in file streams */
809                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
810                                         {
811                                                 buf[copycount++] = '\n';
812                                                 break;
813                                         }
814                                         if(ch == 0x0D)
815                                         {
816                                                 if(str->ubuffer[str->mark] == 0x0A)
817                                                         str->mark++; /* skip past next newline */
818                                                 buf[copycount++] = '\n';
819                                                 break;
820                                         }
821                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
822                                 }
823                                 buf[copycount] = '\0';
824                         }
825                         else
826                         {
827                                 if(str->buffer) /* if not, copycount stays 0 */
828                                         copycount = MIN(len - 1, str->buflen - str->mark);
829                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
830                                 if(endptr) /* newline was found */
831                                         copycount = endptr - buf; /* Real copy count */
832                                 buf[copycount] = '\0';
833                                 str->mark += copycount;
834                         }
835                         
836                         str->read_count += copycount;
837                         return copycount;
838                 }       
839                 case STREAM_TYPE_FILE:
840                         if(str->binary) 
841                         {
842                                 if(str->unicode) /* Binary file with 4-byte characters */
843                                 {
844                                         /* Do it character-by-character */
845                                         int foo;
846                                         for(foo = 0; foo < len - 1; foo++)
847                                         {
848                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
849                                                 if(ch == -1) 
850                                                 {
851                                                         buf[foo] = '\0';
852                                                         return foo - 1;
853                                                 }
854                                                 str->read_count++;
855                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
856                                                 {
857                                                         buf[foo] = '\n';
858                                                         buf[foo + 1] = '\0';
859                                                         return foo;
860                                                 }
861                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
862                                         }
863                                         buf[len] = '\0';
864                                         return foo;
865                                 }
866                                 else /* Regular binary file */
867                                 {
868                                         fgets(buf, len, str->file_pointer);
869                                         str->read_count += strlen(buf);
870                                         return strlen(buf);
871                                 }
872                         }
873                         else /* Text mode is the same for Unicode and regular files */
874                         {
875                                 /* Do it character-by-character */
876                                 int foo;
877                                 for(foo = 0; foo < len - 1; foo++)
878                                 {
879                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
880                                         if(ch == -1)
881                                         {
882                                                 buf[foo] = '\0';
883                                                 return foo - 1;
884                                         }
885                                         str->read_count++;
886                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
887                                         {
888                                                 buf[foo] = '\n';
889                                                 buf[foo + 1] = '\0';
890                                                 return foo;
891                                         }
892                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
893                                 }
894                                 buf[len] = '\0';
895                                 return foo;
896                         }
897                 default:
898                         g_warning("%s: Reading from this kind of stream unsupported.", __func__);
899                         return 0;
900         }
901 }
902
903 /**
904  * glk_get_line_stream_uni:
905  * @str: An input stream.
906  * @buf: A buffer with space for at least @len Unicode code points.
907  * @len: The number of characters to read, plus one.
908  *
909  * Reads Unicode characters from @str, until either 
910  * <inlineequation>
911  *   <alt>@len - 1</alt>
912  *   <mathphrase>@len - 1</mathphrase>
913  * </inlineequation> 
914  * Unicode characters have been read or a newline has been read. It then puts a
915  * terminal null (a zero value) on the end.
916  *
917  * Returns: The number of characters actually read, including the newline (if
918  * there is one) but not including the terminal null.
919  */
920 glui32
921 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
922 {
923         g_return_val_if_fail(str != NULL, 0);
924         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
925         g_return_val_if_fail(buf != NULL, 0);
926
927         switch(str->type)
928         {
929                 case STREAM_TYPE_MEMORY:
930                 {
931                         int copycount = 0;
932                         if(str->unicode)
933                         {
934                                 /* Do it character-by-character */
935                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
936                                 {
937                                         glui32 ch = str->ubuffer[str->mark++];
938                                         /* Check for Unicode newline; slightly different than
939                                         in file streams */
940                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
941                                         {
942                                                 buf[copycount++] = '\n';
943                                                 break;
944                                         }
945                                         if(ch == 0x0D)
946                                         {
947                                                 if(str->ubuffer[str->mark] == 0x0A)
948                                                         str->mark++; /* skip past next newline */
949                                                 buf[copycount++] = '\n';
950                                                 break;
951                                         }
952                                         buf[copycount++] = ch;
953                                 }
954                                 buf[copycount] = '\0';
955                         }
956                         else
957                         {
958                                 /* No recourse to memccpy(), so do it character-by-character */
959                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
960                                 {
961                                         gchar ch = str->buffer[str->mark++];
962                                         /* Check for newline */
963                                         if(ch == '\n') /* Also check for \r and \r\n? */
964                                         {
965                                                 buf[copycount++] = '\n';
966                                                 break;
967                                         }
968                                         buf[copycount++] = (unsigned char)ch;
969                                 }
970                                 buf[copycount] = 0;
971                         }
972                         
973                         str->read_count += copycount;
974                         return copycount;
975                 }       
976                 case STREAM_TYPE_FILE:
977                         if(str->binary) 
978                         {
979                                 if(str->unicode) /* Binary file with 4-byte characters */
980                                 {
981                                         /* Do it character-by-character */
982                                         int foo;
983                                         for(foo = 0; foo < len - 1; foo++)
984                                         {
985                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
986                                                 if(ch == -1) 
987                                                 {
988                                                         buf[foo] = 0;
989                                                         return foo - 1;
990                                                 }
991                                                 str->read_count++;
992                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
993                                                 {
994                                                         buf[foo] = ch; /* Preserve newline types??? */
995                                                         buf[foo + 1] = 0;
996                                                         return foo;
997                                                 }
998                                                 buf[foo] = ch;
999                                         }
1000                                         buf[len] = 0;
1001                                         return foo;
1002                                 }
1003                                 else /* Regular binary file */
1004                                 {
1005                                         gchar *readbuffer = g_new0(gchar, len);
1006                                         fgets(readbuffer, len, str->file_pointer);
1007                                         glui32 count = strlen(readbuffer) + 1; /* Copy terminator */
1008                                         int foo;
1009                                         for(foo = 0; foo < count; foo++)
1010                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1011                                         str->read_count += count;
1012                                         return count;
1013                                 }
1014                         }
1015                         else /* Text mode is the same for Unicode and regular files */
1016                         {
1017                                 /* Do it character-by-character */
1018                                 int foo;
1019                                 for(foo = 0; foo < len - 1; foo++)
1020                                 {
1021                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1022                                         if(ch == -1)
1023                                         {
1024                                                 buf[foo] = 0;
1025                                                 return foo - 1;
1026                                         }
1027                                         str->read_count++;
1028                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1029                                         {
1030                                                 buf[foo] = ch; /* Preserve newline types??? */
1031                                                 buf[foo + 1] = 0;
1032                                                 return foo;
1033                                         }
1034                                         buf[foo] = ch;
1035                                 }
1036                                 buf[len] = 0;
1037                                 return foo;
1038                         }
1039                 default:
1040                         g_warning("%s: Reading from this kind of stream unsupported.", __func__);
1041                         return 0;
1042         }
1043 }
1044
1045 /*
1046  *
1047  **************** SEEKING FUNCTIONS ********************************************
1048  *
1049  */
1050
1051 /**
1052  * glk_stream_get_position:
1053  * @str: A file or memory stream.
1054  *
1055  * Returns the position of the read/write mark in @str. For memory streams and
1056  * binary file streams, this is exactly the number of characters read or written
1057  * from the beginning of the stream (unless you have moved the mark with
1058  * glk_stream_set_position().) For text file streams, matters are more 
1059  * ambiguous, since (for example) writing one byte to a text file may store more
1060  * than one character in the platform's native encoding. You can only be sure
1061  * that the position increases as you read or write to the file.
1062  *
1063  * Additional complication: for Latin-1 memory and file streams, a character is
1064  * a byte. For Unicode memory and file streams (those created by
1065  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1066  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1067  * bytes.
1068  *
1069  * <note><para>
1070  *   If this bothers you, don't use binary Unicode files. I don't think they're
1071  *   good for much anyhow.
1072  * </para></note>
1073  *
1074  * Returns: position of the read/write mark in @str.
1075  */
1076 glui32
1077 glk_stream_get_position(strid_t str)
1078 {
1079         g_return_val_if_fail(str != NULL, 0);
1080         
1081         switch(str->type)
1082         {
1083                 case STREAM_TYPE_MEMORY:
1084                         return str->mark;
1085                 case STREAM_TYPE_FILE:
1086                         return ftell(str->file_pointer);
1087                 default:
1088                         g_warning("%s: Seeking not supported on this type of stream.",
1089                                 __func__);
1090                         return 0;
1091         }
1092 }
1093
1094 /**
1095  * glk_stream_set_position:
1096  * @str: A file or memory stream.
1097  * @pos: The position to set the mark to, relative to @seekmode.
1098  * @seekmode: One of #seekmode_Start, #seekmode_Current, or #seekmode_End.
1099  *
1100  * Sets the position of the read/write mark in @str. The position is controlled
1101  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1102  * <code>seekmode_</code> constants below.
1103  *
1104  * It is illegal to specify a position before the beginning or after the end of
1105  * the file.
1106  *
1107  * In binary files, the mark position is exact &mdash; it corresponds with the
1108  * number of characters you have read or written. In text files, this mapping 
1109  * can vary, because of linefeed conventions or other character-set 
1110  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1111  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1112  * the platform's native encoding &mdash; after character cookery. Therefore,
1113  * in a text stream, it is safest to use glk_stream_set_position() only to move
1114  * to the beginning or end of a file, or to a position determined by
1115  * glk_stream_get_position().
1116  *
1117  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1118  * characters are 32-bit words, or four bytes each.
1119  */
1120 void
1121 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1122 {
1123         g_return_if_fail(str != NULL);
1124         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1125         g_return_if_fail(!(seekmode == seekmode_End || pos > 0));
1126         
1127         switch(str->type)
1128         {
1129                 case STREAM_TYPE_MEMORY:
1130                         switch(seekmode)
1131                         {
1132                                 case seekmode_Start:   str->mark = pos;  break;
1133                                 case seekmode_Current: str->mark += pos; break;
1134                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1135                                 default:
1136                                         g_assert_not_reached();
1137                                         return;
1138                         }
1139                         break;
1140                 case STREAM_TYPE_FILE:
1141                 {
1142                         int whence;
1143                         switch(seekmode)
1144                         {
1145                                 case seekmode_Start:   whence = SEEK_SET; break;
1146                                 case seekmode_Current: whence = SEEK_CUR; break;
1147                                 case seekmode_End:     whence = SEEK_END; break;
1148                                 default:
1149                                         g_assert_not_reached();
1150                                         return;
1151                         }
1152                         fseek(str->file_pointer, pos, whence);
1153                         break;
1154                 }
1155                 default:
1156                         g_warning("%s: Seeking not supported on this type of stream.", __func__);
1157                         return;
1158         }
1159 }
1160