Added a skeleton implementation of the ChimaraIF widget, which is a subclass of Chima...
[rodin/chimara.git] / libchimara / strio.c
1 #include "charset.h"
2 #include "magic.h"
3 #include "stream.h"
4 #include <errno.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <glib.h>
8 #include <glib/gstdio.h>
9
10 /*
11  *
12  **************** WRITING FUNCTIONS ********************************************
13  *
14  */
15
16 /* Internal function: write a UTF-8 string to a text grid window's text buffer. */
17 static void
18 write_utf8_to_grid(winid_t win, gchar *s)
19 {
20         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
21         {
22                 ILLEGAL("Tried to print to a text grid window with line input pending.");
23                 return;
24         }
25         
26     /* Number of characters to insert */
27     glong length = g_utf8_strlen(s, -1);
28     glong chars_left = length;
29     
30     gdk_threads_enter();
31     
32     GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
33     GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
34     
35     /* Get cursor position */
36     GtkTextIter start;
37     gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
38     /* Spaces available on this line */
39     gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
40     
41     while(chars_left > available_space && !gtk_text_iter_is_end(&start))
42     {
43         GtkTextIter end = start;
44         gtk_text_iter_forward_to_line_end(&end);
45         gtk_text_buffer_delete(buffer, &start, &end);
46         gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), available_space);
47         chars_left -= available_space;
48         gtk_text_iter_forward_line(&start);
49         available_space = win->width;
50     }
51     if(!gtk_text_iter_is_end(&start))
52     {
53         GtkTextIter end = start;
54         gtk_text_iter_forward_chars(&end, chars_left);
55         gtk_text_buffer_delete(buffer, &start, &end);
56         gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), -1);
57     }
58     
59     gtk_text_buffer_move_mark(buffer, cursor, &start);
60     
61     gdk_threads_leave();
62 }
63
64 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
65 static void
66 write_utf8_to_window(winid_t win, gchar *s)
67 {
68         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
69         {
70                 ILLEGAL("Tried to print to a text buffer window with line input pending.");
71                 return;
72         }
73         
74         gdk_threads_enter();
75
76         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
77
78         GtkTextIter iter;
79         gtk_text_buffer_get_end_iter(buffer, &iter);
80         gtk_text_buffer_insert_with_tags_by_name(buffer, &iter, s, -1, win->window_stream->style, NULL);
81
82         gdk_threads_leave();
83         
84         ChimaraGlk *glk = CHIMARA_GLK(gtk_widget_get_ancestor(win->widget, CHIMARA_TYPE_GLK));
85         g_assert(glk);
86         g_signal_emit_by_name(glk, "text-buffer-output", win->rock, s);
87 }
88
89 /* Internal function: write a Latin-1 buffer with length to a stream. */
90 static void
91 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
92 {
93         switch(str->type)
94         {
95                 case STREAM_TYPE_WINDOW:
96                         /* Each window type has a different way of printing to it */
97                         switch(str->window->type)
98                         {
99                                 /* Printing to these windows' streams does nothing */
100                                 case wintype_Blank:
101                                 case wintype_Pair:
102                                 case wintype_Graphics:
103                                         str->write_count += len;
104                                         break;
105                                         
106                             /* Text grid window */
107                             case wintype_TextGrid:
108                             {
109                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
110                                 if(utf8 != NULL)
111                                 {
112                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
113                                     write_utf8_to_grid(str->window, utf8);
114                                     g_free(utf8);
115                                 }
116                             }
117                                 str->write_count += len;
118                                 break;
119                                         
120                                 /* Text buffer window */        
121                                 case wintype_TextBuffer:
122                                 {
123                                         gchar *utf8 = convert_latin1_to_utf8(buf, len);
124                                         if(utf8 != NULL)
125                                         {
126                                                 write_utf8_to_window(str->window, utf8);
127                                                 g_free(utf8);
128                                         }
129                                 }       
130                                         str->write_count += len;
131                                         break;
132                                 default:
133                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
134                         }
135                         
136                         /* Now write the same buffer to the window's echo stream */
137                         if(str->window->echo_stream != NULL)
138                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
139                         
140                         break;
141                         
142                 case STREAM_TYPE_MEMORY:
143                         if(str->unicode && str->ubuffer)
144                         {
145                                 int foo = 0;
146                                 while(str->mark < str->buflen && foo < len)
147                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
148                         }
149                         if(!str->unicode && str->buffer)
150                         {
151                                 int copycount = MIN(len, str->buflen - str->mark);
152                                 memmove(str->buffer + str->mark, buf, copycount);
153                                 str->mark += copycount;
154                         }
155
156                         str->write_count += len;
157                         break;
158                         
159                 case STREAM_TYPE_FILE:
160                         if(str->binary) 
161                         {
162                                 if(str->unicode) 
163                                 {
164                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
165                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
166                                         g_free(writebuffer);
167                                 } 
168                                 else /* Regular file */
169                                 {
170                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
171                                 }
172                         }
173                         else /* Text mode is the same for Unicode and regular files */
174                         {
175                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
176                                 if(utf8 != NULL)
177                                 {
178                                         g_fprintf(str->file_pointer, "%s", utf8);
179                                         g_free(utf8);
180                                 }
181                         }
182                         
183                         str->write_count += len;
184                         break;
185                 default:
186                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
187         }
188 }
189
190 /* Internal function: write a Unicode buffer with length to a stream. */
191 static void
192 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
193 {
194         switch(str->type)
195         {
196                 case STREAM_TYPE_WINDOW:
197                         /* Each window type has a different way of printing to it */
198                         switch(str->window->type)
199                         {
200                                 /* Printing to these windows' streams does nothing */
201                                 case wintype_Blank:
202                                 case wintype_Pair:
203                                 case wintype_Graphics:
204                                         str->write_count += len;
205                                         break;
206                                         
207                             /* Text grid window */
208                             case wintype_TextGrid:
209                             {
210                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
211                                 if(utf8 != NULL)
212                                 {
213                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
214                                     write_utf8_to_grid(str->window, utf8);
215                                     g_free(utf8);
216                                 }
217                             }
218                                 str->write_count += len;
219                                 break;
220                                         
221                                 /* Text buffer window */        
222                                 case wintype_TextBuffer:
223                                 {
224                                         gchar *utf8 = convert_ucs4_to_utf8(buf, len);
225                                         if(utf8 != NULL)
226                                         {
227                                                 write_utf8_to_window(str->window, utf8);
228                                                 g_free(utf8);
229                                         }
230                                 }       
231                                         str->write_count += len;
232                                         break;
233                                 default:
234                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
235                         }
236                         
237                         /* Now write the same buffer to the window's echo stream */
238                         if(str->window->echo_stream != NULL)
239                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
240                         
241                         break;
242                         
243                 case STREAM_TYPE_MEMORY:
244                         if(str->unicode && str->ubuffer)
245                         {
246                                 int copycount = MIN(len, str->buflen - str->mark);
247                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
248                                 str->mark += copycount;
249                         }
250                         if(!str->unicode && str->buffer)
251                         {
252                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
253                                 int copycount = MIN(len, str->buflen - str->mark);
254                                 memmove(str->buffer + str->mark, latin1, copycount);
255                                 g_free(latin1);
256                                 str->mark += copycount;
257                         }
258
259                         str->write_count += len;
260                         break;
261                         
262                 case STREAM_TYPE_FILE:
263                         if(str->binary) 
264                         {
265                                 if(str->unicode) 
266                                 {
267                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
268                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
269                                         g_free(writebuffer);
270                                 } 
271                                 else /* Regular file */
272                                 {
273                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
274                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
275                                         g_free(latin1);
276                                 }
277                         }
278                         else /* Text mode is the same for Unicode and regular files */
279                         {
280                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
281                                 if(utf8 != NULL) 
282                                 {
283                                         g_fprintf(str->file_pointer, "%s", utf8);
284                                         g_free(utf8);
285                                 }
286                         }
287                         
288                         str->write_count += len;
289                         break;
290                 default:
291                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
292         }
293 }
294
295 /**
296  * glk_put_char_stream:
297  * @str: An output stream.
298  * @ch: A character in Latin-1 encoding.
299  *
300  * The same as glk_put_char(), except that you specify a stream @str to print 
301  * to, instead of using the current stream. It is illegal for @str to be %NULL,
302  * or an input-only stream.
303  */
304 void
305 glk_put_char_stream(strid_t str, unsigned char ch)
306 {
307         VALID_STREAM(str, return);
308         g_return_if_fail(str->file_mode != filemode_Read);
309         
310         write_buffer_to_stream(str, (gchar *)&ch, 1);
311 }
312
313 /**
314  * glk_put_char_stream_uni:
315  * @str: An output stream.
316  * @ch: A Unicode code point.
317  *
318  * The same as glk_put_char_uni(), except that you specify a stream @str to
319  * print to, instead of using the current stream. It is illegal for @str to be 
320  * %NULL, or an input-only stream.
321  */
322 void
323 glk_put_char_stream_uni(strid_t str, glui32 ch)
324 {
325         VALID_STREAM(str, return);
326         g_return_if_fail(str->file_mode != filemode_Read);
327         
328         write_buffer_to_stream_uni(str, &ch, 1);
329 }
330
331 /**
332  * glk_put_string_stream:
333  * @str: An output stream.
334  * @s: A null-terminated string in Latin-1 encoding.
335  *
336  * The same as glk_put_string(), except that you specify a stream @str to print 
337  * to, instead of using the current stream. It is illegal for @str to be %NULL,
338  * or an input-only stream.
339  */
340 void
341 glk_put_string_stream(strid_t str, char *s)
342 {
343         VALID_STREAM(str, return);
344         g_return_if_fail(str->file_mode != filemode_Read);
345
346         write_buffer_to_stream(str, s, strlen(s));
347 }
348
349 /**
350  * glk_put_string_stream_uni:
351  * @str: An output stream.
352  * @s: A null-terminated array of Unicode code points.
353  *
354  * The same as glk_put_string_uni(), except that you specify a stream @str to
355  * print to, instead of using the current stream. It is illegal for @str to be 
356  * %NULL, or an input-only stream.
357  */
358 void
359 glk_put_string_stream_uni(strid_t str, glui32 *s)
360 {
361         VALID_STREAM(str, return);
362         g_return_if_fail(str->file_mode != filemode_Read);
363         
364         /* An impromptu strlen() for glui32 arrays */
365         glong len = 0;
366         glui32 *ptr = s;
367         while(*ptr++)
368                 len++;
369         write_buffer_to_stream_uni(str, s, len);
370 }
371
372 /**
373  * glk_put_buffer_stream:
374  * @str: An output stream.
375  * @buf: An array of characters in Latin-1 encoding.
376  * @len: Length of @buf.
377  *
378  * The same as glk_put_buffer(), except that you specify a stream @str to print 
379  * to, instead of using the current stream. It is illegal for @str to be %NULL,
380  * or an input-only stream.
381  */
382 void
383 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
384 {
385         VALID_STREAM(str, return);
386         g_return_if_fail(str->file_mode != filemode_Read);
387         
388         write_buffer_to_stream(str, buf, len);
389 }
390
391 /**
392  * glk_put_buffer_stream_uni:
393  * @str: An output stream.
394  * @buf: An array of Unicode code points.
395  * @len: Length of @buf.
396  *
397  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
398  * print to, instead of using the current stream. It is illegal for @str to be 
399  * %NULL, or an input-only stream.
400  */
401 void
402 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
403 {
404         VALID_STREAM(str, return);
405         g_return_if_fail(str->file_mode != filemode_Read);
406         
407         write_buffer_to_stream_uni(str, buf, len);
408 }
409
410 /*
411  *
412  **************** READING FUNCTIONS ********************************************
413  *
414  */
415
416 /* Internal function: Read one big-endian four-byte character from file fp and
417 return it as a Unicode code point, or -1 on EOF */
418 static glsi32
419 read_ucs4be_char_from_file(FILE *fp)
420 {
421         unsigned char readbuffer[4];
422         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
423                 return -1; /* EOF */
424         return
425                 readbuffer[0] << 24 | 
426                 readbuffer[1] << 16 | 
427                 readbuffer[2] << 8  | 
428                 readbuffer[3];
429 }
430
431 /* Internal function: Read one UTF-8 character, which may be more than one byte,
432 from file fp and return it as a Unicode code point, or -1 on EOF */
433 static glsi32
434 read_utf8_char_from_file(FILE *fp)
435 {
436         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
437         int foo;
438         gunichar charresult = (gunichar)-2;
439         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
440         {
441                 int ch = fgetc(fp);
442                 if(ch == EOF)
443                         return -1;
444                 readbuffer[foo] = (gchar)ch;
445                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
446                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
447                 point otherwise */
448         }
449         /* Silently return unknown characters as 0xFFFD, Replacement Character */
450         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
451                 return 0xFFFD;
452         return charresult;
453 }
454
455 /* Internal function: Tell whether this code point is a Unicode newline. The
456 file pointer and eight-bit flag are included in case the newline is a CR 
457 (U+000D). If the next character is LF (U+000A) then it also belongs to the
458 newline. */
459 static gboolean
460 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
461 {
462         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
463                 return TRUE;
464         if(ch == 0x0D) {
465                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
466                         read_ucs4be_char_from_file(fp);
467                 if(ch2 != 0x0A)
468                         if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1);
469                                 WARNING_S("Seek failed on stream", g_strerror(errno) );
470                 return TRUE;
471         }
472         return FALSE;
473 }
474
475 /* Internal function: Read one character from a stream. Returns a value which
476  can be returned unchanged by glk_get_char_stream_uni(), but 
477  glk_get_char_stream() must replace high values by the placeholder character. */
478 static glsi32
479 get_char_stream_common(strid_t str)
480 {
481         switch(str->type)
482         {
483                 case STREAM_TYPE_MEMORY:
484                         if(str->unicode)
485                         {
486                                 if(!str->ubuffer || str->mark >= str->buflen)
487                                         return -1;
488                                 glui32 ch = str->ubuffer[str->mark++];
489                                 str->read_count++;
490                                 return ch;
491                         }
492                         else
493                         {
494                                 if(!str->buffer || str->mark >= str->buflen)
495                                         return -1;
496                                 unsigned char ch = str->buffer[str->mark++];
497                                 str->read_count++;
498                                 return ch;
499                         }
500                         break;
501                         
502                 case STREAM_TYPE_FILE:
503                         if(str->binary) 
504                         {
505                                 if(str->unicode) 
506                                 {
507                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
508                                         if(ch == -1)
509                                                 return -1;
510                                         str->read_count++;
511                                         return ch;
512                                 }
513                                 else /* Regular file */
514                                 {
515                                         int ch = fgetc(str->file_pointer);
516                                         if(ch == EOF)
517                                                 return -1;
518                                         
519                                         str->read_count++;
520                                         return ch;
521                                 }
522                         }
523                         else /* Text mode is the same for Unicode and regular files */
524                         {
525                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
526                                 if(ch == -1)
527                                         return -1;
528                                         
529                                 str->read_count++;
530                                 return ch;
531                         }
532                 default:
533                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
534                         return -1;
535         }
536 }
537
538 /**
539  * glk_get_char_stream:
540  * @str: An input stream.
541  *
542  * Reads one character from the stream @str. (There is no notion of a
543  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
544  * an output-only stream.
545  *
546  * The result will be between 0 and 255. As with all basic text functions, Glk
547  * assumes the Latin-1 encoding. See <link 
548  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
549  * of the stream has been reached, the result will be -1. 
550  *
551  * <note><para>
552  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
553  *   returned as negative numbers.
554  * </para></note>
555  *
556  * If the stream contains Unicode data &mdash; for example, if it was created
557  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
558  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
559  *
560  * It is usually more efficient to read several characters at once with
561  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
562  * glk_get_char_stream() several times.
563  *
564  * Returns: A character value between 0 and 255, or -1 on end of stream.
565  */
566 glsi32
567 glk_get_char_stream(strid_t str)
568 {
569         VALID_STREAM(str, return -1);
570         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
571         
572         glsi32 ch = get_char_stream_common(str);
573         return (ch > 0xFF)? PLACEHOLDER : ch;
574 }
575
576 /**
577  * glk_get_char_stream_uni:
578  * @str: An input stream.
579  *
580  * Reads one character from the stream @str. The result will be between 0 and 
581  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
582  *
583  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
584  */
585 glsi32
586 glk_get_char_stream_uni(strid_t str)
587 {
588         VALID_STREAM(str, return -1);
589         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
590         
591         return get_char_stream_common(str);
592 }
593
594 /**
595  * glk_get_buffer_stream:
596  * @str: An input stream.
597  * @buf: A buffer with space for at least @len characters.
598  * @len: The number of characters to read.
599  *
600  * Reads @len characters from @str, unless the end of stream is reached first.
601  * No terminal null is placed in the buffer.
602  *
603  * Returns: The number of characters actually read.
604  */
605 glui32
606 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
607 {
608         VALID_STREAM(str, return 0);
609         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
610         g_return_val_if_fail(buf != NULL, 0);
611         
612         switch(str->type)
613         {
614                 case STREAM_TYPE_MEMORY:
615                 {
616                         int copycount = 0;
617                         if(str->unicode)
618                         {
619                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
620                                 {
621                                         glui32 ch = str->ubuffer[str->mark++];
622                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
623                                 }
624                         }
625                         else
626                         {
627                                 if(str->buffer) /* if not, copycount stays 0 */
628                                         copycount = MIN(len, str->buflen - str->mark);
629                                 memmove(buf, str->buffer + str->mark, copycount);
630                                 str->mark += copycount;
631                         }
632
633                         str->read_count += copycount;           
634                         return copycount;
635                 }       
636                 case STREAM_TYPE_FILE:
637                         if(str->binary) 
638                         {
639                                 if(str->unicode) /* Binary file with 4-byte characters */
640                                 {
641                                         /* Read len characters of 4 bytes each */
642                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
643                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
644                                         /* If there was an incomplete character */
645                                         if(count % 4 != 0) 
646                                         {
647                                                 count -= count % 4;
648                                                 WARNING("Incomplete character in binary Unicode file");
649                                         }
650                                         
651                                         int foo;
652                                         for(foo = 0; foo < count; foo += 4)
653                                         {
654                                                 glsi32 ch = readbuffer[foo] << 24
655                                                         | readbuffer[foo + 1] << 16
656                                                         | readbuffer[foo + 2] << 8
657                                                         | readbuffer[foo + 3];
658                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
659                                         }
660                                         g_free(readbuffer);
661                                         str->read_count += count / 4;
662                                         return count / 4;
663                                 }
664                                 else /* Regular binary file */
665                                 {
666                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
667                                         str->read_count += count;
668                                         return count;
669                                 }
670                         }
671                         else /* Text mode is the same for Unicode and regular files */
672                         {
673                                 /* Do it character-by-character */
674                                 int foo;
675                                 for(foo = 0; foo < len; foo++)
676                                 {
677                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
678                                         if(ch == -1)
679                                                 break;
680                                         str->read_count++;
681                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
682                                 }
683                                 return foo;
684                         }
685                 default:
686                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
687                         return 0;
688         }
689 }
690
691 /**
692  * glk_get_buffer_stream_uni:
693  * @str: An input stream.
694  * @buf: A buffer with space for at least @len Unicode code points.
695  * @len: The number of characters to read.
696  *
697  * Reads @len Unicode characters from @str, unless the end of stream is reached 
698  * first. No terminal null is placed in the buffer.
699  *
700  * Returns: The number of Unicode characters actually read.
701  */
702 glui32
703 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
704 {
705         VALID_STREAM(str, return 0);
706         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
707         g_return_val_if_fail(buf != NULL, 0);
708         
709         switch(str->type)
710         {
711                 case STREAM_TYPE_MEMORY:
712                 {
713                         int copycount = 0;
714                         if(str->unicode)
715                         {
716                                 if(str->ubuffer) /* if not, copycount stays 0 */
717                                         copycount = MIN(len, str->buflen - str->mark);
718                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
719                                 str->mark += copycount;
720                         }
721                         else
722                         {
723                                 while(copycount < len && str->buffer && str->mark < str->buflen)
724                                 {
725                                         unsigned char ch = str->buffer[str->mark++];
726                                         buf[copycount++] = ch;
727                                 }
728                         }
729
730                         str->read_count += copycount;           
731                         return copycount;
732                 }       
733                 case STREAM_TYPE_FILE:
734                         if(str->binary) 
735                         {
736                                 if(str->unicode) /* Binary file with 4-byte characters */
737                                 {
738                                         /* Read len characters of 4 bytes each */
739                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
740                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
741                                         /* If there was an incomplete character */
742                                         if(count % 4 != 0) 
743                                         {
744                                                 count -= count % 4;
745                                                 WARNING("Incomplete character in binary Unicode file");
746                                         }
747                                         
748                                         int foo;
749                                         for(foo = 0; foo < count; foo += 4)
750                                                 buf[foo / 4] = readbuffer[foo] << 24
751                                                         | readbuffer[foo + 1] << 16
752                                                         | readbuffer[foo + 2] << 8
753                                                         | readbuffer[foo + 3];
754                                         g_free(readbuffer);
755                                         str->read_count += count / 4;
756                                         return count / 4;
757                                 }
758                                 else /* Regular binary file */
759                                 {
760                                         unsigned char *readbuffer = g_new0(unsigned char, len);
761                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
762                                         int foo;
763                                         for(foo = 0; foo < count; foo++)
764                                                 buf[foo] = readbuffer[foo];
765                                         g_free(readbuffer);
766                                         str->read_count += count;
767                                         return count;
768                                 }
769                         }
770                         else /* Text mode is the same for Unicode and regular files */
771                         {
772                                 /* Do it character-by-character */
773                                 int foo;
774                                 for(foo = 0; foo < len; foo++)
775                                 {
776                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
777                                         if(ch == -1)
778                                                 break;
779                                         str->read_count++;
780                                         buf[foo] = ch;
781                                 }
782                                 return foo;
783                         }
784                 default:
785                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
786                         return 0;
787         }
788 }
789
790 /**
791  * glk_get_line_stream:
792  * @str: An input stream.
793  * @buf: A buffer with space for at least @len characters.
794  * @len: The number of characters to read, plus one.
795  *
796  * Reads characters from @str, until either 
797  * <inlineequation>
798  *   <alt>@len - 1</alt>
799  *   <mathphrase>@len - 1</mathphrase>
800  * </inlineequation>
801  * characters have been read or a newline has been read. It then puts a
802  * terminal null (<code>'\0'</code>) aracter on
803  * the end. It returns the number of characters actually read, including the
804  * newline (if there is one) but not including the terminal null.
805  *
806  * Returns: The number of characters actually read.
807  */
808 glui32
809 glk_get_line_stream(strid_t str, char *buf, glui32 len)
810 {
811         VALID_STREAM(str, return 0);
812         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
813         g_return_val_if_fail(buf != NULL, 0);
814
815         switch(str->type)
816         {
817                 case STREAM_TYPE_MEMORY:
818                 {
819                         int copycount = 0;
820                         if(str->unicode)
821                         {
822                                 /* Do it character-by-character */
823                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
824                                 {
825                                         glui32 ch = str->ubuffer[str->mark++];
826                                         /* Check for Unicode newline; slightly different than
827                                         in file streams */
828                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
829                                         {
830                                                 buf[copycount++] = '\n';
831                                                 break;
832                                         }
833                                         if(ch == 0x0D)
834                                         {
835                                                 if(str->ubuffer[str->mark] == 0x0A)
836                                                         str->mark++; /* skip past next newline */
837                                                 buf[copycount++] = '\n';
838                                                 break;
839                                         }
840                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
841                                 }
842                                 buf[copycount] = '\0';
843                         }
844                         else
845                         {
846                                 if(str->buffer) /* if not, copycount stays 0 */
847                                         copycount = MIN(len - 1, str->buflen - str->mark);
848                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
849                                 if(endptr) /* newline was found */
850                                         copycount = endptr - buf; /* Real copy count */
851                                 buf[copycount] = '\0';
852                                 str->mark += copycount;
853                         }
854                         
855                         str->read_count += copycount;
856                         return copycount;
857                 }       
858                 case STREAM_TYPE_FILE:
859                         if(str->binary) 
860                         {
861                                 if(str->unicode) /* Binary file with 4-byte characters */
862                                 {
863                                         /* Do it character-by-character */
864                                         int foo;
865                                         for(foo = 0; foo < len - 1; foo++)
866                                         {
867                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
868                                                 if(ch == -1) 
869                                                 {
870                                                         buf[foo] = '\0';
871                                                         return foo - 1;
872                                                 }
873                                                 str->read_count++;
874                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
875                                                 {
876                                                         buf[foo] = '\n';
877                                                         buf[foo + 1] = '\0';
878                                                         return foo;
879                                                 }
880                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
881                                         }
882                                         buf[len] = '\0';
883                                         return foo;
884                                 }
885                                 else /* Regular binary file */
886                                 {
887                                         fgets(buf, len, str->file_pointer);
888                                         str->read_count += strlen(buf);
889                                         return strlen(buf);
890                                 }
891                         }
892                         else /* Text mode is the same for Unicode and regular files */
893                         {
894                                 /* Do it character-by-character */
895                                 int foo;
896                                 for(foo = 0; foo < len - 1; foo++)
897                                 {
898                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
899                                         if(ch == -1)
900                                         {
901                                                 buf[foo] = '\0';
902                                                 return foo - 1;
903                                         }
904                                         str->read_count++;
905                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
906                                         {
907                                                 buf[foo] = '\n';
908                                                 buf[foo + 1] = '\0';
909                                                 return foo;
910                                         }
911                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
912                                 }
913                                 buf[len] = '\0';
914                                 return foo;
915                         }
916                 default:
917                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
918                         return 0;
919         }
920 }
921
922 /**
923  * glk_get_line_stream_uni:
924  * @str: An input stream.
925  * @buf: A buffer with space for at least @len Unicode code points.
926  * @len: The number of characters to read, plus one.
927  *
928  * Reads Unicode characters from @str, until either 
929  * <inlineequation>
930  *   <alt>@len - 1</alt>
931  *   <mathphrase>@len - 1</mathphrase>
932  * </inlineequation> 
933  * Unicode characters have been read or a newline has been read. It then puts a
934  * terminal null (a zero value) on the end.
935  *
936  * Returns: The number of characters actually read, including the newline (if
937  * there is one) but not including the terminal null.
938  */
939 glui32
940 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
941 {
942         VALID_STREAM(str, return 0);
943         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
944         g_return_val_if_fail(buf != NULL, 0);
945
946         switch(str->type)
947         {
948                 case STREAM_TYPE_MEMORY:
949                 {
950                         int copycount = 0;
951                         if(str->unicode)
952                         {
953                                 /* Do it character-by-character */
954                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
955                                 {
956                                         glui32 ch = str->ubuffer[str->mark++];
957                                         /* Check for Unicode newline; slightly different than
958                                         in file streams */
959                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
960                                         {
961                                                 buf[copycount++] = '\n';
962                                                 break;
963                                         }
964                                         if(ch == 0x0D)
965                                         {
966                                                 if(str->ubuffer[str->mark] == 0x0A)
967                                                         str->mark++; /* skip past next newline */
968                                                 buf[copycount++] = '\n';
969                                                 break;
970                                         }
971                                         buf[copycount++] = ch;
972                                 }
973                                 buf[copycount] = '\0';
974                         }
975                         else
976                         {
977                                 /* No recourse to memccpy(), so do it character-by-character */
978                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
979                                 {
980                                         gchar ch = str->buffer[str->mark++];
981                                         /* Check for newline */
982                                         if(ch == '\n') /* Also check for \r and \r\n? */
983                                         {
984                                                 buf[copycount++] = '\n';
985                                                 break;
986                                         }
987                                         buf[copycount++] = (unsigned char)ch;
988                                 }
989                                 buf[copycount] = 0;
990                         }
991                         
992                         str->read_count += copycount;
993                         return copycount;
994                 }       
995                 case STREAM_TYPE_FILE:
996                         if(str->binary) 
997                         {
998                                 if(str->unicode) /* Binary file with 4-byte characters */
999                                 {
1000                                         /* Do it character-by-character */
1001                                         int foo;
1002                                         for(foo = 0; foo < len - 1; foo++)
1003                                         {
1004                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
1005                                                 if(ch == -1) 
1006                                                 {
1007                                                         buf[foo] = 0;
1008                                                         return foo - 1;
1009                                                 }
1010                                                 str->read_count++;
1011                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
1012                                                 {
1013                                                         buf[foo] = ch; /* Preserve newline types??? */
1014                                                         buf[foo + 1] = 0;
1015                                                         return foo;
1016                                                 }
1017                                                 buf[foo] = ch;
1018                                         }
1019                                         buf[len] = 0;
1020                                         return foo;
1021                                 }
1022                                 else /* Regular binary file */
1023                                 {
1024                                         gchar *readbuffer = g_new0(gchar, len);
1025                                         fgets(readbuffer, len, str->file_pointer);
1026                                         glui32 count = strlen(readbuffer) + 1; /* Copy terminator */
1027                                         int foo;
1028                                         for(foo = 0; foo < count; foo++)
1029                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1030                                         str->read_count += count;
1031                                         return count;
1032                                 }
1033                         }
1034                         else /* Text mode is the same for Unicode and regular files */
1035                         {
1036                                 /* Do it character-by-character */
1037                                 int foo;
1038                                 for(foo = 0; foo < len - 1; foo++)
1039                                 {
1040                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1041                                         if(ch == -1)
1042                                         {
1043                                                 buf[foo] = 0;
1044                                                 return foo - 1;
1045                                         }
1046                                         str->read_count++;
1047                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1048                                         {
1049                                                 buf[foo] = ch; /* Preserve newline types??? */
1050                                                 buf[foo + 1] = 0;
1051                                                 return foo;
1052                                         }
1053                                         buf[foo] = ch;
1054                                 }
1055                                 buf[len] = 0;
1056                                 return foo;
1057                         }
1058                 default:
1059                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
1060                         return 0;
1061         }
1062 }
1063
1064 /*
1065  *
1066  **************** SEEKING FUNCTIONS ********************************************
1067  *
1068  */
1069
1070 /**
1071  * glk_stream_get_position:
1072  * @str: A file or memory stream.
1073  *
1074  * Returns the position of the read/write mark in @str. For memory streams and
1075  * binary file streams, this is exactly the number of characters read or written
1076  * from the beginning of the stream (unless you have moved the mark with
1077  * glk_stream_set_position().) For text file streams, matters are more 
1078  * ambiguous, since (for example) writing one byte to a text file may store more
1079  * than one character in the platform's native encoding. You can only be sure
1080  * that the position increases as you read or write to the file.
1081  *
1082  * Additional complication: for Latin-1 memory and file streams, a character is
1083  * a byte. For Unicode memory and file streams (those created by
1084  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1085  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1086  * bytes.
1087  *
1088  * <note><para>
1089  *   If this bothers you, don't use binary Unicode files. I don't think they're
1090  *   good for much anyhow.
1091  * </para></note>
1092  *
1093  * Returns: position of the read/write mark in @str.
1094  */
1095 glui32
1096 glk_stream_get_position(strid_t str)
1097 {
1098         VALID_STREAM(str, return 0);
1099         
1100         switch(str->type)
1101         {
1102                 case STREAM_TYPE_MEMORY:
1103                         return str->mark;
1104                 case STREAM_TYPE_FILE:
1105                         return ftell(str->file_pointer);
1106                 default:
1107                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1108                         return 0;
1109         }
1110 }
1111
1112 /**
1113  * glk_stream_set_position:
1114  * @str: A file or memory stream.
1115  * @pos: The position to set the mark to, relative to @seekmode.
1116  * @seekmode: One of %seekmode_Start, %seekmode_Current, or %seekmode_End.
1117  *
1118  * Sets the position of the read/write mark in @str. The position is controlled
1119  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1120  * <code>seekmode_</code> constants below.
1121  *
1122  * It is illegal to specify a position before the beginning or after the end of
1123  * the file.
1124  *
1125  * In binary files, the mark position is exact &mdash; it corresponds with the
1126  * number of characters you have read or written. In text files, this mapping 
1127  * can vary, because of linefeed conventions or other character-set 
1128  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1129  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1130  * the platform's native encoding &mdash; after character cookery. Therefore,
1131  * in a text stream, it is safest to use glk_stream_set_position() only to move
1132  * to the beginning or end of a file, or to a position determined by
1133  * glk_stream_get_position().
1134  *
1135  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1136  * characters are 32-bit words, or four bytes each.
1137  */
1138 void
1139 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1140 {
1141         VALID_STREAM(str, return);
1142         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1143         g_return_if_fail(!(seekmode == seekmode_End && pos > 0));
1144         
1145         switch(str->type)
1146         {
1147                 case STREAM_TYPE_MEMORY:
1148                         switch(seekmode)
1149                         {
1150                                 case seekmode_Start:   str->mark = pos;  break;
1151                                 case seekmode_Current: str->mark += pos; break;
1152                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1153                                 default:
1154                                         g_return_if_reached();
1155                                         return;
1156                         }
1157                         break;
1158                 case STREAM_TYPE_FILE:
1159                 {
1160                         int whence;
1161                         switch(seekmode)
1162                         {
1163                                 case seekmode_Start:   whence = SEEK_SET; break;
1164                                 case seekmode_Current: whence = SEEK_CUR; break;
1165                                 case seekmode_End:     whence = SEEK_END; break;
1166                                 default:
1167                                         g_return_if_reached();
1168                                         return;
1169                         }
1170                         if(fseek(str->file_pointer, pos, whence) == -1)
1171                                 WARNING("Seek failed on file stream");
1172                         break;
1173                 }
1174                 default:
1175                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1176                         return;
1177         }
1178 }
1179