- Added pkg-config files (.pc.in), so after the library is installed you can
[rodin/chimara.git] / src / strio.c
1 #include "charset.h"
2 #include "magic.h"
3 #include "stream.h"
4 #include <errno.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <glib.h>
8 #include <glib/gstdio.h>
9
10 /*
11  *
12  **************** WRITING FUNCTIONS ********************************************
13  *
14  */
15
16 /* Internal function: write a UTF-8 string to a text grid window's text buffer. */
17 static void
18 write_utf8_to_grid(winid_t win, gchar *s)
19 {
20     /* Number of characters to insert */
21     glong length = g_utf8_strlen(s, -1);
22     glong chars_left = length;
23     
24     gdk_threads_enter();
25     
26     GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
27     GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
28     
29     /* Get cursor position */
30     GtkTextIter start;
31     gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
32     /* Spaces available on this line */
33     gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
34     
35     while(chars_left > available_space && !gtk_text_iter_is_end(&start))
36     {
37         GtkTextIter end = start;
38         gtk_text_iter_forward_to_line_end(&end);
39         gtk_text_buffer_delete(buffer, &start, &end);
40         gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), available_space);
41         chars_left -= available_space;
42         gtk_text_iter_forward_line(&start);
43         available_space = win->width;
44     }
45     if(!gtk_text_iter_is_end(&start))
46     {
47         GtkTextIter end = start;
48         gtk_text_iter_forward_chars(&end, chars_left);
49         gtk_text_buffer_delete(buffer, &start, &end);
50         gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), -1);
51     }
52     
53     gtk_text_buffer_move_mark(buffer, cursor, &start);
54     
55     gdk_threads_leave();
56 }
57
58 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
59 static void
60 write_utf8_to_window(winid_t win, gchar *s)
61 {
62         gdk_threads_enter();
63
64         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
65
66         GtkTextIter iter;
67         gtk_text_buffer_get_end_iter(buffer, &iter);
68         gtk_text_buffer_insert(buffer, &iter, s, -1);
69
70         gdk_threads_leave();
71 }
72
73 /* Internal function: write a Latin-1 buffer with length to a stream. */
74 static void
75 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
76 {
77         switch(str->type)
78         {
79                 case STREAM_TYPE_WINDOW:
80                         /* Each window type has a different way of printing to it */
81                         switch(str->window->type)
82                         {
83                                 /* Printing to these windows' streams does nothing */
84                                 case wintype_Blank:
85                                 case wintype_Pair:
86                                 case wintype_Graphics:
87                                         str->write_count += len;
88                                         break;
89                                         
90                             /* Text grid window */
91                             case wintype_TextGrid:
92                             {
93                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
94                                 if(utf8 != NULL)
95                                 {
96                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
97                                     write_utf8_to_grid(str->window, utf8);
98                                     g_free(utf8);
99                                 }
100                             }
101                                 str->write_count += len;
102                                 break;
103                                         
104                                 /* Text buffer window */        
105                                 case wintype_TextBuffer:
106                                 {
107                                         gchar *utf8 = convert_latin1_to_utf8(buf, len);
108                                         if(utf8 != NULL)
109                                         {
110                                                 write_utf8_to_window(str->window, utf8);
111                                                 g_free(utf8);
112                                         }
113                                 }       
114                                         str->write_count += len;
115                                         break;
116                                 default:
117                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
118                         }
119                         
120                         /* Now write the same buffer to the window's echo stream */
121                         if(str->window->echo_stream != NULL)
122                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
123                         
124                         break;
125                         
126                 case STREAM_TYPE_MEMORY:
127                         if(str->unicode && str->ubuffer)
128                         {
129                                 int foo = 0;
130                                 while(str->mark < str->buflen && foo < len)
131                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
132                         }
133                         if(!str->unicode && str->buffer)
134                         {
135                                 int copycount = MIN(len, str->buflen - str->mark);
136                                 memmove(str->buffer + str->mark, buf, copycount);
137                                 str->mark += copycount;
138                         }
139
140                         str->write_count += len;
141                         break;
142                         
143                 case STREAM_TYPE_FILE:
144                         if(str->binary) 
145                         {
146                                 if(str->unicode) 
147                                 {
148                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
149                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
150                                         g_free(writebuffer);
151                                 } 
152                                 else /* Regular file */
153                                 {
154                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
155                                 }
156                         }
157                         else /* Text mode is the same for Unicode and regular files */
158                         {
159                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
160                                 if(utf8 != NULL)
161                                 {
162                                         g_fprintf(str->file_pointer, "%s", utf8);
163                                         g_free(utf8);
164                                 }
165                         }
166                         
167                         str->write_count += len;
168                         break;
169                 default:
170                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
171         }
172 }
173
174 /* Internal function: write a Unicode buffer with length to a stream. */
175 static void
176 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
177 {
178         switch(str->type)
179         {
180                 case STREAM_TYPE_WINDOW:
181                         /* Each window type has a different way of printing to it */
182                         switch(str->window->type)
183                         {
184                                 /* Printing to these windows' streams does nothing */
185                                 case wintype_Blank:
186                                 case wintype_Pair:
187                                 case wintype_Graphics:
188                                         str->write_count += len;
189                                         break;
190                                         
191                             /* Text grid window */
192                             case wintype_TextGrid:
193                             {
194                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
195                                 if(utf8 != NULL)
196                                 {
197                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
198                                     write_utf8_to_grid(str->window, utf8);
199                                     g_free(utf8);
200                                 }
201                             }
202                                 str->write_count += len;
203                                 break;
204                                         
205                                 /* Text buffer window */        
206                                 case wintype_TextBuffer:
207                                 {
208                                         gchar *utf8 = convert_ucs4_to_utf8(buf, len);
209                                         if(utf8 != NULL)
210                                         {
211                                                 write_utf8_to_window(str->window, utf8);
212                                                 g_free(utf8);
213                                         }
214                                 }       
215                                         str->write_count += len;
216                                         break;
217                                 default:
218                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
219                         }
220                         
221                         /* Now write the same buffer to the window's echo stream */
222                         if(str->window->echo_stream != NULL)
223                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
224                         
225                         break;
226                         
227                 case STREAM_TYPE_MEMORY:
228                         if(str->unicode && str->ubuffer)
229                         {
230                                 int copycount = MIN(len, str->buflen - str->mark);
231                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
232                                 str->mark += copycount;
233                         }
234                         if(!str->unicode && str->buffer)
235                         {
236                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
237                                 int copycount = MIN(len, str->buflen - str->mark);
238                                 memmove(str->buffer + str->mark, latin1, copycount);
239                                 g_free(latin1);
240                                 str->mark += copycount;
241                         }
242
243                         str->write_count += len;
244                         break;
245                         
246                 case STREAM_TYPE_FILE:
247                         if(str->binary) 
248                         {
249                                 if(str->unicode) 
250                                 {
251                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
252                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
253                                         g_free(writebuffer);
254                                 } 
255                                 else /* Regular file */
256                                 {
257                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
258                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
259                                         g_free(latin1);
260                                 }
261                         }
262                         else /* Text mode is the same for Unicode and regular files */
263                         {
264                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
265                                 if(utf8 != NULL) 
266                                 {
267                                         g_fprintf(str->file_pointer, "%s", utf8);
268                                         g_free(utf8);
269                                 }
270                         }
271                         
272                         str->write_count += len;
273                         break;
274                 default:
275                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
276         }
277 }
278
279 /**
280  * glk_put_char_stream:
281  * @str: An output stream.
282  * @ch: A character in Latin-1 encoding.
283  *
284  * The same as glk_put_char(), except that you specify a stream @str to print 
285  * to, instead of using the current stream. It is illegal for @str to be %NULL,
286  * or an input-only stream.
287  */
288 void
289 glk_put_char_stream(strid_t str, unsigned char ch)
290 {
291         VALID_STREAM(str, return);
292         g_return_if_fail(str->file_mode != filemode_Read);
293         
294         write_buffer_to_stream(str, (gchar *)&ch, 1);
295 }
296
297 /**
298  * glk_put_char_stream_uni:
299  * @str: An output stream.
300  * @ch: A Unicode code point.
301  *
302  * The same as glk_put_char_uni(), except that you specify a stream @str to
303  * print to, instead of using the current stream. It is illegal for @str to be 
304  * %NULL, or an input-only stream.
305  */
306 void
307 glk_put_char_stream_uni(strid_t str, glui32 ch)
308 {
309         VALID_STREAM(str, return);
310         g_return_if_fail(str->file_mode != filemode_Read);
311         
312         write_buffer_to_stream_uni(str, &ch, 1);
313 }
314
315 /**
316  * glk_put_string_stream:
317  * @str: An output stream.
318  * @s: A null-terminated string in Latin-1 encoding.
319  *
320  * The same as glk_put_string(), except that you specify a stream @str to print 
321  * to, instead of using the current stream. It is illegal for @str to be %NULL,
322  * or an input-only stream.
323  */
324 void
325 glk_put_string_stream(strid_t str, char *s)
326 {
327         VALID_STREAM(str, return);
328         g_return_if_fail(str->file_mode != filemode_Read);
329
330         write_buffer_to_stream(str, s, strlen(s));
331 }
332
333 /**
334  * glk_put_string_stream_uni:
335  * @str: An output stream.
336  * @s: A null-terminated array of Unicode code points.
337  *
338  * The same as glk_put_string_uni(), except that you specify a stream @str to
339  * print to, instead of using the current stream. It is illegal for @str to be 
340  * %NULL, or an input-only stream.
341  */
342 void
343 glk_put_string_stream_uni(strid_t str, glui32 *s)
344 {
345         VALID_STREAM(str, return);
346         g_return_if_fail(str->file_mode != filemode_Read);
347         
348         /* An impromptu strlen() for glui32 arrays */
349         glong len = 0;
350         glui32 *ptr = s;
351         while(*ptr++)
352                 len++;
353         write_buffer_to_stream_uni(str, s, len);
354 }
355
356 /**
357  * glk_put_buffer_stream:
358  * @str: An output stream.
359  * @buf: An array of characters in Latin-1 encoding.
360  * @len: Length of @buf.
361  *
362  * The same as glk_put_buffer(), except that you specify a stream @str to print 
363  * to, instead of using the current stream. It is illegal for @str to be %NULL,
364  * or an input-only stream.
365  */
366 void
367 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
368 {
369         VALID_STREAM(str, return);
370         g_return_if_fail(str->file_mode != filemode_Read);
371         
372         write_buffer_to_stream(str, buf, len);
373 }
374
375 /**
376  * glk_put_buffer_stream_uni:
377  * @str: An output stream.
378  * @buf: An array of Unicode code points.
379  * @len: Length of @buf.
380  *
381  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
382  * print to, instead of using the current stream. It is illegal for @str to be 
383  * %NULL, or an input-only stream.
384  */
385 void
386 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
387 {
388         VALID_STREAM(str, return);
389         g_return_if_fail(str->file_mode != filemode_Read);
390         
391         write_buffer_to_stream_uni(str, buf, len);
392 }
393
394 /*
395  *
396  **************** READING FUNCTIONS ********************************************
397  *
398  */
399
400 /* Internal function: Read one big-endian four-byte character from file fp and
401 return it as a Unicode code point, or -1 on EOF */
402 static glsi32
403 read_ucs4be_char_from_file(FILE *fp)
404 {
405         unsigned char readbuffer[4];
406         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
407                 return -1; /* EOF */
408         return
409                 readbuffer[0] << 24 | 
410                 readbuffer[1] << 16 | 
411                 readbuffer[2] << 8  | 
412                 readbuffer[3];
413 }
414
415 /* Internal function: Read one UTF-8 character, which may be more than one byte,
416 from file fp and return it as a Unicode code point, or -1 on EOF */
417 static glsi32
418 read_utf8_char_from_file(FILE *fp)
419 {
420         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
421         int foo;
422         gunichar charresult = (gunichar)-2;
423         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
424         {
425                 int ch = fgetc(fp);
426                 if(ch == EOF)
427                         return -1;
428                 readbuffer[foo] = (gchar)ch;
429                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
430                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
431                 point otherwise */
432         }
433         /* Silently return unknown characters as 0xFFFD, Replacement Character */
434         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
435                 return 0xFFFD;
436         return charresult;
437 }
438
439 /* Internal function: Tell whether this code point is a Unicode newline. The
440 file pointer and eight-bit flag are included in case the newline is a CR 
441 (U+000D). If the next character is LF (U+000A) then it also belongs to the
442 newline. */
443 static gboolean
444 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
445 {
446         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
447                 return TRUE;
448         if(ch == 0x0D) {
449                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
450                         read_ucs4be_char_from_file(fp);
451                 if(ch2 != 0x0A)
452                         if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1);
453                                 WARNING_S("Seek failed on stream", g_strerror(errno) );
454                 return TRUE;
455         }
456         return FALSE;
457 }
458
459 /* Internal function: Read one character from a stream. Returns a value which
460  can be returned unchanged by glk_get_char_stream_uni(), but 
461  glk_get_char_stream() must replace high values by the placeholder character. */
462 static glsi32
463 get_char_stream_common(strid_t str)
464 {
465         switch(str->type)
466         {
467                 case STREAM_TYPE_MEMORY:
468                         if(str->unicode)
469                         {
470                                 if(!str->ubuffer || str->mark >= str->buflen)
471                                         return -1;
472                                 glui32 ch = str->ubuffer[str->mark++];
473                                 str->read_count++;
474                                 return ch;
475                         }
476                         else
477                         {
478                                 if(!str->buffer || str->mark >= str->buflen)
479                                         return -1;
480                                 unsigned char ch = str->buffer[str->mark++];
481                                 str->read_count++;
482                                 return ch;
483                         }
484                         break;
485                         
486                 case STREAM_TYPE_FILE:
487                         if(str->binary) 
488                         {
489                                 if(str->unicode) 
490                                 {
491                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
492                                         if(ch == -1)
493                                                 return -1;
494                                         str->read_count++;
495                                         return ch;
496                                 }
497                                 else /* Regular file */
498                                 {
499                                         int ch = fgetc(str->file_pointer);
500                                         if(ch == EOF)
501                                                 return -1;
502                                         
503                                         str->read_count++;
504                                         return ch;
505                                 }
506                         }
507                         else /* Text mode is the same for Unicode and regular files */
508                         {
509                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
510                                 if(ch == -1)
511                                         return -1;
512                                         
513                                 str->read_count++;
514                                 return ch;
515                         }
516                 default:
517                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
518                         return -1;
519         }
520 }
521
522 /**
523  * glk_get_char_stream:
524  * @str: An input stream.
525  *
526  * Reads one character from the stream @str. (There is no notion of a
527  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
528  * an output-only stream.
529  *
530  * The result will be between 0 and 255. As with all basic text functions, Glk
531  * assumes the Latin-1 encoding. See <link 
532  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
533  * of the stream has been reached, the result will be -1. 
534  *
535  * <note><para>
536  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
537  *   returned as negative numbers.
538  * </para></note>
539  *
540  * If the stream contains Unicode data &mdash; for example, if it was created
541  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
542  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
543  *
544  * It is usually more efficient to read several characters at once with
545  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
546  * glk_get_char_stream() several times.
547  *
548  * Returns: A character value between 0 and 255, or -1 on end of stream.
549  */
550 glsi32
551 glk_get_char_stream(strid_t str)
552 {
553         VALID_STREAM(str, return -1);
554         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
555         
556         glsi32 ch = get_char_stream_common(str);
557         return (ch > 0xFF)? PLACEHOLDER : ch;
558 }
559
560 /**
561  * glk_get_char_stream_uni:
562  * @str: An input stream.
563  *
564  * Reads one character from the stream @str. The result will be between 0 and 
565  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
566  *
567  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
568  */
569 glsi32
570 glk_get_char_stream_uni(strid_t str)
571 {
572         VALID_STREAM(str, return -1);
573         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
574         
575         return get_char_stream_common(str);
576 }
577
578 /**
579  * glk_get_buffer_stream:
580  * @str: An input stream.
581  * @buf: A buffer with space for at least @len characters.
582  * @len: The number of characters to read.
583  *
584  * Reads @len characters from @str, unless the end of stream is reached first.
585  * No terminal null is placed in the buffer.
586  *
587  * Returns: The number of characters actually read.
588  */
589 glui32
590 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
591 {
592         VALID_STREAM(str, return 0);
593         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
594         g_return_val_if_fail(buf != NULL, 0);
595         
596         switch(str->type)
597         {
598                 case STREAM_TYPE_MEMORY:
599                 {
600                         int copycount = 0;
601                         if(str->unicode)
602                         {
603                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
604                                 {
605                                         glui32 ch = str->ubuffer[str->mark++];
606                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
607                                 }
608                         }
609                         else
610                         {
611                                 if(str->buffer) /* if not, copycount stays 0 */
612                                         copycount = MIN(len, str->buflen - str->mark);
613                                 memmove(buf, str->buffer + str->mark, copycount);
614                                 str->mark += copycount;
615                         }
616
617                         str->read_count += copycount;           
618                         return copycount;
619                 }       
620                 case STREAM_TYPE_FILE:
621                         if(str->binary) 
622                         {
623                                 if(str->unicode) /* Binary file with 4-byte characters */
624                                 {
625                                         /* Read len characters of 4 bytes each */
626                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
627                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
628                                         /* If there was an incomplete character */
629                                         if(count % 4 != 0) 
630                                         {
631                                                 count -= count % 4;
632                                                 WARNING("Incomplete character in binary Unicode file");
633                                         }
634                                         
635                                         int foo;
636                                         for(foo = 0; foo < count; foo += 4)
637                                         {
638                                                 glsi32 ch = readbuffer[foo] << 24
639                                                         | readbuffer[foo + 1] << 16
640                                                         | readbuffer[foo + 2] << 8
641                                                         | readbuffer[foo + 3];
642                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
643                                         }
644                                         g_free(readbuffer);
645                                         str->read_count += count / 4;
646                                         return count / 4;
647                                 }
648                                 else /* Regular binary file */
649                                 {
650                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
651                                         str->read_count += count;
652                                         return count;
653                                 }
654                         }
655                         else /* Text mode is the same for Unicode and regular files */
656                         {
657                                 /* Do it character-by-character */
658                                 int foo;
659                                 for(foo = 0; foo < len; foo++)
660                                 {
661                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
662                                         if(ch == -1)
663                                                 break;
664                                         str->read_count++;
665                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
666                                 }
667                                 return foo;
668                         }
669                 default:
670                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
671                         return 0;
672         }
673 }
674
675 /**
676  * glk_get_buffer_stream_uni:
677  * @str: An input stream.
678  * @buf: A buffer with space for at least @len Unicode code points.
679  * @len: The number of characters to read.
680  *
681  * Reads @len Unicode characters from @str, unless the end of stream is reached 
682  * first. No terminal null is placed in the buffer.
683  *
684  * Returns: The number of Unicode characters actually read.
685  */
686 glui32
687 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
688 {
689         VALID_STREAM(str, return 0);
690         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
691         g_return_val_if_fail(buf != NULL, 0);
692         
693         switch(str->type)
694         {
695                 case STREAM_TYPE_MEMORY:
696                 {
697                         int copycount = 0;
698                         if(str->unicode)
699                         {
700                                 if(str->ubuffer) /* if not, copycount stays 0 */
701                                         copycount = MIN(len, str->buflen - str->mark);
702                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
703                                 str->mark += copycount;
704                         }
705                         else
706                         {
707                                 while(copycount < len && str->buffer && str->mark < str->buflen)
708                                 {
709                                         unsigned char ch = str->buffer[str->mark++];
710                                         buf[copycount++] = ch;
711                                 }
712                         }
713
714                         str->read_count += copycount;           
715                         return copycount;
716                 }       
717                 case STREAM_TYPE_FILE:
718                         if(str->binary) 
719                         {
720                                 if(str->unicode) /* Binary file with 4-byte characters */
721                                 {
722                                         /* Read len characters of 4 bytes each */
723                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
724                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
725                                         /* If there was an incomplete character */
726                                         if(count % 4 != 0) 
727                                         {
728                                                 count -= count % 4;
729                                                 WARNING("Incomplete character in binary Unicode file");
730                                         }
731                                         
732                                         int foo;
733                                         for(foo = 0; foo < count; foo += 4)
734                                                 buf[foo / 4] = readbuffer[foo] << 24
735                                                         | readbuffer[foo + 1] << 16
736                                                         | readbuffer[foo + 2] << 8
737                                                         | readbuffer[foo + 3];
738                                         g_free(readbuffer);
739                                         str->read_count += count / 4;
740                                         return count / 4;
741                                 }
742                                 else /* Regular binary file */
743                                 {
744                                         unsigned char *readbuffer = g_new0(unsigned char, len);
745                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
746                                         int foo;
747                                         for(foo = 0; foo < count; foo++)
748                                                 buf[foo] = readbuffer[foo];
749                                         g_free(readbuffer);
750                                         str->read_count += count;
751                                         return count;
752                                 }
753                         }
754                         else /* Text mode is the same for Unicode and regular files */
755                         {
756                                 /* Do it character-by-character */
757                                 int foo;
758                                 for(foo = 0; foo < len; foo++)
759                                 {
760                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
761                                         if(ch == -1)
762                                                 break;
763                                         str->read_count++;
764                                         buf[foo] = ch;
765                                 }
766                                 return foo;
767                         }
768                 default:
769                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
770                         return 0;
771         }
772 }
773
774 /**
775  * glk_get_line_stream:
776  * @str: An input stream.
777  * @buf: A buffer with space for at least @len characters.
778  * @len: The number of characters to read, plus one.
779  *
780  * Reads characters from @str, until either 
781  * <inlineequation>
782  *   <alt>@len - 1</alt>
783  *   <mathphrase>@len - 1</mathphrase>
784  * </inlineequation>
785  * characters have been read or a newline has been read. It then puts a
786  * terminal null (<code>'\0'</code>) aracter on
787  * the end. It returns the number of characters actually read, including the
788  * newline (if there is one) but not including the terminal null.
789  *
790  * Returns: The number of characters actually read.
791  */
792 glui32
793 glk_get_line_stream(strid_t str, char *buf, glui32 len)
794 {
795         VALID_STREAM(str, return 0);
796         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
797         g_return_val_if_fail(buf != NULL, 0);
798
799         switch(str->type)
800         {
801                 case STREAM_TYPE_MEMORY:
802                 {
803                         int copycount = 0;
804                         if(str->unicode)
805                         {
806                                 /* Do it character-by-character */
807                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
808                                 {
809                                         glui32 ch = str->ubuffer[str->mark++];
810                                         /* Check for Unicode newline; slightly different than
811                                         in file streams */
812                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
813                                         {
814                                                 buf[copycount++] = '\n';
815                                                 break;
816                                         }
817                                         if(ch == 0x0D)
818                                         {
819                                                 if(str->ubuffer[str->mark] == 0x0A)
820                                                         str->mark++; /* skip past next newline */
821                                                 buf[copycount++] = '\n';
822                                                 break;
823                                         }
824                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
825                                 }
826                                 buf[copycount] = '\0';
827                         }
828                         else
829                         {
830                                 if(str->buffer) /* if not, copycount stays 0 */
831                                         copycount = MIN(len - 1, str->buflen - str->mark);
832                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
833                                 if(endptr) /* newline was found */
834                                         copycount = endptr - buf; /* Real copy count */
835                                 buf[copycount] = '\0';
836                                 str->mark += copycount;
837                         }
838                         
839                         str->read_count += copycount;
840                         return copycount;
841                 }       
842                 case STREAM_TYPE_FILE:
843                         if(str->binary) 
844                         {
845                                 if(str->unicode) /* Binary file with 4-byte characters */
846                                 {
847                                         /* Do it character-by-character */
848                                         int foo;
849                                         for(foo = 0; foo < len - 1; foo++)
850                                         {
851                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
852                                                 if(ch == -1) 
853                                                 {
854                                                         buf[foo] = '\0';
855                                                         return foo - 1;
856                                                 }
857                                                 str->read_count++;
858                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
859                                                 {
860                                                         buf[foo] = '\n';
861                                                         buf[foo + 1] = '\0';
862                                                         return foo;
863                                                 }
864                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
865                                         }
866                                         buf[len] = '\0';
867                                         return foo;
868                                 }
869                                 else /* Regular binary file */
870                                 {
871                                         fgets(buf, len, str->file_pointer);
872                                         str->read_count += strlen(buf);
873                                         return strlen(buf);
874                                 }
875                         }
876                         else /* Text mode is the same for Unicode and regular files */
877                         {
878                                 /* Do it character-by-character */
879                                 int foo;
880                                 for(foo = 0; foo < len - 1; foo++)
881                                 {
882                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
883                                         if(ch == -1)
884                                         {
885                                                 buf[foo] = '\0';
886                                                 return foo - 1;
887                                         }
888                                         str->read_count++;
889                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
890                                         {
891                                                 buf[foo] = '\n';
892                                                 buf[foo + 1] = '\0';
893                                                 return foo;
894                                         }
895                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
896                                 }
897                                 buf[len] = '\0';
898                                 return foo;
899                         }
900                 default:
901                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
902                         return 0;
903         }
904 }
905
906 /**
907  * glk_get_line_stream_uni:
908  * @str: An input stream.
909  * @buf: A buffer with space for at least @len Unicode code points.
910  * @len: The number of characters to read, plus one.
911  *
912  * Reads Unicode characters from @str, until either 
913  * <inlineequation>
914  *   <alt>@len - 1</alt>
915  *   <mathphrase>@len - 1</mathphrase>
916  * </inlineequation> 
917  * Unicode characters have been read or a newline has been read. It then puts a
918  * terminal null (a zero value) on the end.
919  *
920  * Returns: The number of characters actually read, including the newline (if
921  * there is one) but not including the terminal null.
922  */
923 glui32
924 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
925 {
926         VALID_STREAM(str, return 0);
927         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
928         g_return_val_if_fail(buf != NULL, 0);
929
930         switch(str->type)
931         {
932                 case STREAM_TYPE_MEMORY:
933                 {
934                         int copycount = 0;
935                         if(str->unicode)
936                         {
937                                 /* Do it character-by-character */
938                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
939                                 {
940                                         glui32 ch = str->ubuffer[str->mark++];
941                                         /* Check for Unicode newline; slightly different than
942                                         in file streams */
943                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
944                                         {
945                                                 buf[copycount++] = '\n';
946                                                 break;
947                                         }
948                                         if(ch == 0x0D)
949                                         {
950                                                 if(str->ubuffer[str->mark] == 0x0A)
951                                                         str->mark++; /* skip past next newline */
952                                                 buf[copycount++] = '\n';
953                                                 break;
954                                         }
955                                         buf[copycount++] = ch;
956                                 }
957                                 buf[copycount] = '\0';
958                         }
959                         else
960                         {
961                                 /* No recourse to memccpy(), so do it character-by-character */
962                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
963                                 {
964                                         gchar ch = str->buffer[str->mark++];
965                                         /* Check for newline */
966                                         if(ch == '\n') /* Also check for \r and \r\n? */
967                                         {
968                                                 buf[copycount++] = '\n';
969                                                 break;
970                                         }
971                                         buf[copycount++] = (unsigned char)ch;
972                                 }
973                                 buf[copycount] = 0;
974                         }
975                         
976                         str->read_count += copycount;
977                         return copycount;
978                 }       
979                 case STREAM_TYPE_FILE:
980                         if(str->binary) 
981                         {
982                                 if(str->unicode) /* Binary file with 4-byte characters */
983                                 {
984                                         /* Do it character-by-character */
985                                         int foo;
986                                         for(foo = 0; foo < len - 1; foo++)
987                                         {
988                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
989                                                 if(ch == -1) 
990                                                 {
991                                                         buf[foo] = 0;
992                                                         return foo - 1;
993                                                 }
994                                                 str->read_count++;
995                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
996                                                 {
997                                                         buf[foo] = ch; /* Preserve newline types??? */
998                                                         buf[foo + 1] = 0;
999                                                         return foo;
1000                                                 }
1001                                                 buf[foo] = ch;
1002                                         }
1003                                         buf[len] = 0;
1004                                         return foo;
1005                                 }
1006                                 else /* Regular binary file */
1007                                 {
1008                                         gchar *readbuffer = g_new0(gchar, len);
1009                                         fgets(readbuffer, len, str->file_pointer);
1010                                         glui32 count = strlen(readbuffer) + 1; /* Copy terminator */
1011                                         int foo;
1012                                         for(foo = 0; foo < count; foo++)
1013                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1014                                         str->read_count += count;
1015                                         return count;
1016                                 }
1017                         }
1018                         else /* Text mode is the same for Unicode and regular files */
1019                         {
1020                                 /* Do it character-by-character */
1021                                 int foo;
1022                                 for(foo = 0; foo < len - 1; foo++)
1023                                 {
1024                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1025                                         if(ch == -1)
1026                                         {
1027                                                 buf[foo] = 0;
1028                                                 return foo - 1;
1029                                         }
1030                                         str->read_count++;
1031                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1032                                         {
1033                                                 buf[foo] = ch; /* Preserve newline types??? */
1034                                                 buf[foo + 1] = 0;
1035                                                 return foo;
1036                                         }
1037                                         buf[foo] = ch;
1038                                 }
1039                                 buf[len] = 0;
1040                                 return foo;
1041                         }
1042                 default:
1043                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
1044                         return 0;
1045         }
1046 }
1047
1048 /*
1049  *
1050  **************** SEEKING FUNCTIONS ********************************************
1051  *
1052  */
1053
1054 /**
1055  * glk_stream_get_position:
1056  * @str: A file or memory stream.
1057  *
1058  * Returns the position of the read/write mark in @str. For memory streams and
1059  * binary file streams, this is exactly the number of characters read or written
1060  * from the beginning of the stream (unless you have moved the mark with
1061  * glk_stream_set_position().) For text file streams, matters are more 
1062  * ambiguous, since (for example) writing one byte to a text file may store more
1063  * than one character in the platform's native encoding. You can only be sure
1064  * that the position increases as you read or write to the file.
1065  *
1066  * Additional complication: for Latin-1 memory and file streams, a character is
1067  * a byte. For Unicode memory and file streams (those created by
1068  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1069  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1070  * bytes.
1071  *
1072  * <note><para>
1073  *   If this bothers you, don't use binary Unicode files. I don't think they're
1074  *   good for much anyhow.
1075  * </para></note>
1076  *
1077  * Returns: position of the read/write mark in @str.
1078  */
1079 glui32
1080 glk_stream_get_position(strid_t str)
1081 {
1082         VALID_STREAM(str, return 0);
1083         
1084         switch(str->type)
1085         {
1086                 case STREAM_TYPE_MEMORY:
1087                         return str->mark;
1088                 case STREAM_TYPE_FILE:
1089                         return ftell(str->file_pointer);
1090                 default:
1091                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1092                         return 0;
1093         }
1094 }
1095
1096 /**
1097  * glk_stream_set_position:
1098  * @str: A file or memory stream.
1099  * @pos: The position to set the mark to, relative to @seekmode.
1100  * @seekmode: One of #seekmode_Start, #seekmode_Current, or #seekmode_End.
1101  *
1102  * Sets the position of the read/write mark in @str. The position is controlled
1103  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1104  * <code>seekmode_</code> constants below.
1105  *
1106  * It is illegal to specify a position before the beginning or after the end of
1107  * the file.
1108  *
1109  * In binary files, the mark position is exact &mdash; it corresponds with the
1110  * number of characters you have read or written. In text files, this mapping 
1111  * can vary, because of linefeed conventions or other character-set 
1112  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1113  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1114  * the platform's native encoding &mdash; after character cookery. Therefore,
1115  * in a text stream, it is safest to use glk_stream_set_position() only to move
1116  * to the beginning or end of a file, or to a position determined by
1117  * glk_stream_get_position().
1118  *
1119  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1120  * characters are 32-bit words, or four bytes each.
1121  */
1122 void
1123 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1124 {
1125         VALID_STREAM(str, return);
1126         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1127         g_return_if_fail(!(seekmode == seekmode_End || pos > 0));
1128         
1129         switch(str->type)
1130         {
1131                 case STREAM_TYPE_MEMORY:
1132                         switch(seekmode)
1133                         {
1134                                 case seekmode_Start:   str->mark = pos;  break;
1135                                 case seekmode_Current: str->mark += pos; break;
1136                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1137                                 default:
1138                                         g_return_if_reached();
1139                                         return;
1140                         }
1141                         break;
1142                 case STREAM_TYPE_FILE:
1143                 {
1144                         int whence;
1145                         switch(seekmode)
1146                         {
1147                                 case seekmode_Start:   whence = SEEK_SET; break;
1148                                 case seekmode_Current: whence = SEEK_CUR; break;
1149                                 case seekmode_End:     whence = SEEK_END; break;
1150                                 default:
1151                                         g_return_if_reached();
1152                                         return;
1153                         }
1154                         if(fseek(str->file_pointer, pos, whence) == -1)
1155                                 WARNING("Seek failed on file stream");
1156                         break;
1157                 }
1158                 default:
1159                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1160                         return;
1161         }
1162 }
1163