Gestalt selector claimed we couldn't print unicode characters, but we can
[rodin/chimara.git] / libchimara / strio.c
1 #include "charset.h"
2 #include "magic.h"
3 #include "stream.h"
4 #include <errno.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <glib.h>
8 #include <glib/gstdio.h>
9
10 /*
11  *
12  **************** WRITING FUNCTIONS ********************************************
13  *
14  */
15
16 /* Internal function: write a UTF-8 string to a text grid window's text buffer. */
17 static void
18 write_utf8_to_grid(winid_t win, gchar *s)
19 {
20         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
21         {
22                 ILLEGAL("Tried to print to a text grid window with line input pending.");
23                 return;
24         }
25         
26     /* Number of characters to insert */
27     glong length = g_utf8_strlen(s, -1);
28     glong chars_left = length;
29     
30     gdk_threads_enter();
31     
32     GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
33     GtkTextMark *cursor = gtk_text_buffer_get_mark(buffer, "cursor_position");
34     
35     /* Get cursor position */
36     GtkTextIter start;
37     gtk_text_buffer_get_iter_at_mark(buffer, &start, cursor);
38     /* Spaces available on this line */
39     gint available_space = win->width - gtk_text_iter_get_line_offset(&start);
40     
41     while(chars_left > available_space && !gtk_text_iter_is_end(&start))
42     {
43         GtkTextIter end = start;
44         gtk_text_iter_forward_to_line_end(&end);
45         gtk_text_buffer_delete(buffer, &start, &end);
46         gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), available_space);
47         chars_left -= available_space;
48         gtk_text_iter_forward_line(&start);
49         available_space = win->width;
50     }
51     if(!gtk_text_iter_is_end(&start))
52     {
53         GtkTextIter end = start;
54         gtk_text_iter_forward_chars(&end, chars_left);
55         gtk_text_buffer_delete(buffer, &start, &end);
56         gtk_text_buffer_insert(buffer, &start, s + (length - chars_left), -1);
57     }
58     
59     gtk_text_buffer_move_mark(buffer, cursor, &start);
60     
61     gdk_threads_leave();
62 }
63
64 /* Internal function: write a UTF-8 string to a text buffer window's text buffer. */
65 static void
66 write_utf8_to_window(winid_t win, gchar *s)
67 {
68         if(win->input_request_type == INPUT_REQUEST_LINE || win->input_request_type == INPUT_REQUEST_LINE_UNICODE)
69         {
70                 ILLEGAL("Tried to print to a text buffer window with line input pending.");
71                 return;
72         }
73         
74         gdk_threads_enter();
75
76         GtkTextBuffer *buffer = gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
77
78         GtkTextIter iter;
79         gtk_text_buffer_get_end_iter(buffer, &iter);
80         gtk_text_buffer_insert_with_tags_by_name(buffer, &iter, s, -1, win->window_stream->style, NULL);
81
82         gdk_threads_leave();
83 }
84
85 /* Internal function: write a Latin-1 buffer with length to a stream. */
86 static void
87 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
88 {
89         switch(str->type)
90         {
91                 case STREAM_TYPE_WINDOW:
92                         /* Each window type has a different way of printing to it */
93                         switch(str->window->type)
94                         {
95                                 /* Printing to these windows' streams does nothing */
96                                 case wintype_Blank:
97                                 case wintype_Pair:
98                                 case wintype_Graphics:
99                                         str->write_count += len;
100                                         break;
101                                         
102                             /* Text grid window */
103                             case wintype_TextGrid:
104                             {
105                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
106                                 if(utf8 != NULL)
107                                 {
108                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
109                                     write_utf8_to_grid(str->window, utf8);
110                                     g_free(utf8);
111                                 }
112                             }
113                                 str->write_count += len;
114                                 break;
115                                         
116                                 /* Text buffer window */        
117                                 case wintype_TextBuffer:
118                                 {
119                                         gchar *utf8 = convert_latin1_to_utf8(buf, len);
120                                         if(utf8 != NULL)
121                                         {
122                                                 write_utf8_to_window(str->window, utf8);
123                                                 g_free(utf8);
124                                         }
125                                 }       
126                                         str->write_count += len;
127                                         break;
128                                 default:
129                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
130                         }
131                         
132                         /* Now write the same buffer to the window's echo stream */
133                         if(str->window->echo_stream != NULL)
134                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
135                         
136                         break;
137                         
138                 case STREAM_TYPE_MEMORY:
139                         if(str->unicode && str->ubuffer)
140                         {
141                                 int foo = 0;
142                                 while(str->mark < str->buflen && foo < len)
143                                         str->ubuffer[str->mark++] = (unsigned char)buf[foo++];
144                         }
145                         if(!str->unicode && str->buffer)
146                         {
147                                 int copycount = MIN(len, str->buflen - str->mark);
148                                 memmove(str->buffer + str->mark, buf, copycount);
149                                 str->mark += copycount;
150                         }
151
152                         str->write_count += len;
153                         break;
154                         
155                 case STREAM_TYPE_FILE:
156                         if(str->binary) 
157                         {
158                                 if(str->unicode) 
159                                 {
160                                         gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
161                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
162                                         g_free(writebuffer);
163                                 } 
164                                 else /* Regular file */
165                                 {
166                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
167                                 }
168                         }
169                         else /* Text mode is the same for Unicode and regular files */
170                         {
171                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
172                                 if(utf8 != NULL)
173                                 {
174                                         g_fprintf(str->file_pointer, "%s", utf8);
175                                         g_free(utf8);
176                                 }
177                         }
178                         
179                         str->write_count += len;
180                         break;
181                 default:
182                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
183         }
184 }
185
186 /* Internal function: write a Unicode buffer with length to a stream. */
187 static void
188 write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
189 {
190         switch(str->type)
191         {
192                 case STREAM_TYPE_WINDOW:
193                         /* Each window type has a different way of printing to it */
194                         switch(str->window->type)
195                         {
196                                 /* Printing to these windows' streams does nothing */
197                                 case wintype_Blank:
198                                 case wintype_Pair:
199                                 case wintype_Graphics:
200                                         str->write_count += len;
201                                         break;
202                                         
203                             /* Text grid window */
204                             case wintype_TextGrid:
205                             {
206                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
207                                 if(utf8 != NULL)
208                                 {
209                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
210                                     write_utf8_to_grid(str->window, utf8);
211                                     g_free(utf8);
212                                 }
213                             }
214                                 str->write_count += len;
215                                 break;
216                                         
217                                 /* Text buffer window */        
218                                 case wintype_TextBuffer:
219                                 {
220                                         gchar *utf8 = convert_ucs4_to_utf8(buf, len);
221                                         if(utf8 != NULL)
222                                         {
223                                                 write_utf8_to_window(str->window, utf8);
224                                                 g_free(utf8);
225                                         }
226                                 }       
227                                         str->write_count += len;
228                                         break;
229                                 default:
230                                         ILLEGAL_PARAM("Unknown window type: %u", str->window->type);
231                         }
232                         
233                         /* Now write the same buffer to the window's echo stream */
234                         if(str->window->echo_stream != NULL)
235                                 write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
236                         
237                         break;
238                         
239                 case STREAM_TYPE_MEMORY:
240                         if(str->unicode && str->ubuffer)
241                         {
242                                 int copycount = MIN(len, str->buflen - str->mark);
243                                 memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
244                                 str->mark += copycount;
245                         }
246                         if(!str->unicode && str->buffer)
247                         {
248                                 gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
249                                 int copycount = MIN(len, str->buflen - str->mark);
250                                 memmove(str->buffer + str->mark, latin1, copycount);
251                                 g_free(latin1);
252                                 str->mark += copycount;
253                         }
254
255                         str->write_count += len;
256                         break;
257                         
258                 case STREAM_TYPE_FILE:
259                         if(str->binary) 
260                         {
261                                 if(str->unicode) 
262                                 {
263                                         gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
264                                         fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
265                                         g_free(writebuffer);
266                                 } 
267                                 else /* Regular file */
268                                 {
269                                         gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
270                                         fwrite(latin1, sizeof(gchar), len, str->file_pointer);
271                                         g_free(latin1);
272                                 }
273                         }
274                         else /* Text mode is the same for Unicode and regular files */
275                         {
276                                 gchar *utf8 = convert_ucs4_to_utf8(buf, len);
277                                 if(utf8 != NULL) 
278                                 {
279                                         g_fprintf(str->file_pointer, "%s", utf8);
280                                         g_free(utf8);
281                                 }
282                         }
283                         
284                         str->write_count += len;
285                         break;
286                 default:
287                         ILLEGAL_PARAM("Unknown stream type: %u", str->type);
288         }
289 }
290
291 /**
292  * glk_put_char_stream:
293  * @str: An output stream.
294  * @ch: A character in Latin-1 encoding.
295  *
296  * The same as glk_put_char(), except that you specify a stream @str to print 
297  * to, instead of using the current stream. It is illegal for @str to be %NULL,
298  * or an input-only stream.
299  */
300 void
301 glk_put_char_stream(strid_t str, unsigned char ch)
302 {
303         VALID_STREAM(str, return);
304         g_return_if_fail(str->file_mode != filemode_Read);
305         
306         write_buffer_to_stream(str, (gchar *)&ch, 1);
307 }
308
309 /**
310  * glk_put_char_stream_uni:
311  * @str: An output stream.
312  * @ch: A Unicode code point.
313  *
314  * The same as glk_put_char_uni(), except that you specify a stream @str to
315  * print to, instead of using the current stream. It is illegal for @str to be 
316  * %NULL, or an input-only stream.
317  */
318 void
319 glk_put_char_stream_uni(strid_t str, glui32 ch)
320 {
321         VALID_STREAM(str, return);
322         g_return_if_fail(str->file_mode != filemode_Read);
323         
324         write_buffer_to_stream_uni(str, &ch, 1);
325 }
326
327 /**
328  * glk_put_string_stream:
329  * @str: An output stream.
330  * @s: A null-terminated string in Latin-1 encoding.
331  *
332  * The same as glk_put_string(), except that you specify a stream @str to print 
333  * to, instead of using the current stream. It is illegal for @str to be %NULL,
334  * or an input-only stream.
335  */
336 void
337 glk_put_string_stream(strid_t str, char *s)
338 {
339         VALID_STREAM(str, return);
340         g_return_if_fail(str->file_mode != filemode_Read);
341
342         write_buffer_to_stream(str, s, strlen(s));
343 }
344
345 /**
346  * glk_put_string_stream_uni:
347  * @str: An output stream.
348  * @s: A null-terminated array of Unicode code points.
349  *
350  * The same as glk_put_string_uni(), except that you specify a stream @str to
351  * print to, instead of using the current stream. It is illegal for @str to be 
352  * %NULL, or an input-only stream.
353  */
354 void
355 glk_put_string_stream_uni(strid_t str, glui32 *s)
356 {
357         VALID_STREAM(str, return);
358         g_return_if_fail(str->file_mode != filemode_Read);
359         
360         /* An impromptu strlen() for glui32 arrays */
361         glong len = 0;
362         glui32 *ptr = s;
363         while(*ptr++)
364                 len++;
365         write_buffer_to_stream_uni(str, s, len);
366 }
367
368 /**
369  * glk_put_buffer_stream:
370  * @str: An output stream.
371  * @buf: An array of characters in Latin-1 encoding.
372  * @len: Length of @buf.
373  *
374  * The same as glk_put_buffer(), except that you specify a stream @str to print 
375  * to, instead of using the current stream. It is illegal for @str to be %NULL,
376  * or an input-only stream.
377  */
378 void
379 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
380 {
381         VALID_STREAM(str, return);
382         g_return_if_fail(str->file_mode != filemode_Read);
383         
384         write_buffer_to_stream(str, buf, len);
385 }
386
387 /**
388  * glk_put_buffer_stream_uni:
389  * @str: An output stream.
390  * @buf: An array of Unicode code points.
391  * @len: Length of @buf.
392  *
393  * The same as glk_put_buffer_uni(), except that you specify a stream @str to
394  * print to, instead of using the current stream. It is illegal for @str to be 
395  * %NULL, or an input-only stream.
396  */
397 void
398 glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
399 {
400         VALID_STREAM(str, return);
401         g_return_if_fail(str->file_mode != filemode_Read);
402         
403         write_buffer_to_stream_uni(str, buf, len);
404 }
405
406 /*
407  *
408  **************** READING FUNCTIONS ********************************************
409  *
410  */
411
412 /* Internal function: Read one big-endian four-byte character from file fp and
413 return it as a Unicode code point, or -1 on EOF */
414 static glsi32
415 read_ucs4be_char_from_file(FILE *fp)
416 {
417         unsigned char readbuffer[4];
418         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
419                 return -1; /* EOF */
420         return
421                 readbuffer[0] << 24 | 
422                 readbuffer[1] << 16 | 
423                 readbuffer[2] << 8  | 
424                 readbuffer[3];
425 }
426
427 /* Internal function: Read one UTF-8 character, which may be more than one byte,
428 from file fp and return it as a Unicode code point, or -1 on EOF */
429 static glsi32
430 read_utf8_char_from_file(FILE *fp)
431 {
432         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
433         int foo;
434         gunichar charresult = (gunichar)-2;
435         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
436         {
437                 int ch = fgetc(fp);
438                 if(ch == EOF)
439                         return -1;
440                 readbuffer[foo] = (gchar)ch;
441                 charresult = g_utf8_get_char_validated(readbuffer, foo + 1);
442                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
443                 point otherwise */
444         }
445         /* Silently return unknown characters as 0xFFFD, Replacement Character */
446         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
447                 return 0xFFFD;
448         return charresult;
449 }
450
451 /* Internal function: Tell whether this code point is a Unicode newline. The
452 file pointer and eight-bit flag are included in case the newline is a CR 
453 (U+000D). If the next character is LF (U+000A) then it also belongs to the
454 newline. */
455 static gboolean
456 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
457 {
458         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
459                 return TRUE;
460         if(ch == 0x0D) {
461                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
462                         read_ucs4be_char_from_file(fp);
463                 if(ch2 != 0x0A)
464                         if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1);
465                                 WARNING_S("Seek failed on stream", g_strerror(errno) );
466                 return TRUE;
467         }
468         return FALSE;
469 }
470
471 /* Internal function: Read one character from a stream. Returns a value which
472  can be returned unchanged by glk_get_char_stream_uni(), but 
473  glk_get_char_stream() must replace high values by the placeholder character. */
474 static glsi32
475 get_char_stream_common(strid_t str)
476 {
477         switch(str->type)
478         {
479                 case STREAM_TYPE_MEMORY:
480                         if(str->unicode)
481                         {
482                                 if(!str->ubuffer || str->mark >= str->buflen)
483                                         return -1;
484                                 glui32 ch = str->ubuffer[str->mark++];
485                                 str->read_count++;
486                                 return ch;
487                         }
488                         else
489                         {
490                                 if(!str->buffer || str->mark >= str->buflen)
491                                         return -1;
492                                 unsigned char ch = str->buffer[str->mark++];
493                                 str->read_count++;
494                                 return ch;
495                         }
496                         break;
497                         
498                 case STREAM_TYPE_FILE:
499                         if(str->binary) 
500                         {
501                                 if(str->unicode) 
502                                 {
503                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
504                                         if(ch == -1)
505                                                 return -1;
506                                         str->read_count++;
507                                         return ch;
508                                 }
509                                 else /* Regular file */
510                                 {
511                                         int ch = fgetc(str->file_pointer);
512                                         if(ch == EOF)
513                                                 return -1;
514                                         
515                                         str->read_count++;
516                                         return ch;
517                                 }
518                         }
519                         else /* Text mode is the same for Unicode and regular files */
520                         {
521                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
522                                 if(ch == -1)
523                                         return -1;
524                                         
525                                 str->read_count++;
526                                 return ch;
527                         }
528                 default:
529                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
530                         return -1;
531         }
532 }
533
534 /**
535  * glk_get_char_stream:
536  * @str: An input stream.
537  *
538  * Reads one character from the stream @str. (There is no notion of a
539  * <quote>current input stream.</quote>) It is illegal for @str to be %NULL, or
540  * an output-only stream.
541  *
542  * The result will be between 0 and 255. As with all basic text functions, Glk
543  * assumes the Latin-1 encoding. See <link 
544  * linkend="chimara-Character-Encoding">Character Encoding</link>. If the end
545  * of the stream has been reached, the result will be -1. 
546  *
547  * <note><para>
548  *   Note that high-bit characters (128..255) are <emphasis>not</emphasis>
549  *   returned as negative numbers.
550  * </para></note>
551  *
552  * If the stream contains Unicode data &mdash; for example, if it was created
553  * with glk_stream_open_file_uni() or glk_stream_open_memory_uni() &mdash; then
554  * characters beyond 255 will be returned as 0x3F (<code>"?"</code>).
555  *
556  * It is usually more efficient to read several characters at once with
557  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
558  * glk_get_char_stream() several times.
559  *
560  * Returns: A character value between 0 and 255, or -1 on end of stream.
561  */
562 glsi32
563 glk_get_char_stream(strid_t str)
564 {
565         VALID_STREAM(str, return -1);
566         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
567         
568         glsi32 ch = get_char_stream_common(str);
569         return (ch > 0xFF)? PLACEHOLDER : ch;
570 }
571
572 /**
573  * glk_get_char_stream_uni:
574  * @str: An input stream.
575  *
576  * Reads one character from the stream @str. The result will be between 0 and 
577  * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
578  *
579  * Returns: A value between 0 and 0x7FFFFFFF, or -1 on end of stream.
580  */
581 glsi32
582 glk_get_char_stream_uni(strid_t str)
583 {
584         VALID_STREAM(str, return -1);
585         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
586         
587         return get_char_stream_common(str);
588 }
589
590 /**
591  * glk_get_buffer_stream:
592  * @str: An input stream.
593  * @buf: A buffer with space for at least @len characters.
594  * @len: The number of characters to read.
595  *
596  * Reads @len characters from @str, unless the end of stream is reached first.
597  * No terminal null is placed in the buffer.
598  *
599  * Returns: The number of characters actually read.
600  */
601 glui32
602 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
603 {
604         VALID_STREAM(str, return 0);
605         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
606         g_return_val_if_fail(buf != NULL, 0);
607         
608         switch(str->type)
609         {
610                 case STREAM_TYPE_MEMORY:
611                 {
612                         int copycount = 0;
613                         if(str->unicode)
614                         {
615                                 while(copycount < len && str->ubuffer && str->mark < str->buflen) 
616                                 {
617                                         glui32 ch = str->ubuffer[str->mark++];
618                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
619                                 }
620                         }
621                         else
622                         {
623                                 if(str->buffer) /* if not, copycount stays 0 */
624                                         copycount = MIN(len, str->buflen - str->mark);
625                                 memmove(buf, str->buffer + str->mark, copycount);
626                                 str->mark += copycount;
627                         }
628
629                         str->read_count += copycount;           
630                         return copycount;
631                 }       
632                 case STREAM_TYPE_FILE:
633                         if(str->binary) 
634                         {
635                                 if(str->unicode) /* Binary file with 4-byte characters */
636                                 {
637                                         /* Read len characters of 4 bytes each */
638                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
639                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
640                                         /* If there was an incomplete character */
641                                         if(count % 4 != 0) 
642                                         {
643                                                 count -= count % 4;
644                                                 WARNING("Incomplete character in binary Unicode file");
645                                         }
646                                         
647                                         int foo;
648                                         for(foo = 0; foo < count; foo += 4)
649                                         {
650                                                 glsi32 ch = readbuffer[foo] << 24
651                                                         | readbuffer[foo + 1] << 16
652                                                         | readbuffer[foo + 2] << 8
653                                                         | readbuffer[foo + 3];
654                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
655                                         }
656                                         g_free(readbuffer);
657                                         str->read_count += count / 4;
658                                         return count / 4;
659                                 }
660                                 else /* Regular binary file */
661                                 {
662                                         size_t count = fread(buf, sizeof(char), len, str->file_pointer);
663                                         str->read_count += count;
664                                         return count;
665                                 }
666                         }
667                         else /* Text mode is the same for Unicode and regular files */
668                         {
669                                 /* Do it character-by-character */
670                                 int foo;
671                                 for(foo = 0; foo < len; foo++)
672                                 {
673                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
674                                         if(ch == -1)
675                                                 break;
676                                         str->read_count++;
677                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
678                                 }
679                                 return foo;
680                         }
681                 default:
682                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
683                         return 0;
684         }
685 }
686
687 /**
688  * glk_get_buffer_stream_uni:
689  * @str: An input stream.
690  * @buf: A buffer with space for at least @len Unicode code points.
691  * @len: The number of characters to read.
692  *
693  * Reads @len Unicode characters from @str, unless the end of stream is reached 
694  * first. No terminal null is placed in the buffer.
695  *
696  * Returns: The number of Unicode characters actually read.
697  */
698 glui32
699 glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
700 {
701         VALID_STREAM(str, return 0);
702         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
703         g_return_val_if_fail(buf != NULL, 0);
704         
705         switch(str->type)
706         {
707                 case STREAM_TYPE_MEMORY:
708                 {
709                         int copycount = 0;
710                         if(str->unicode)
711                         {
712                                 if(str->ubuffer) /* if not, copycount stays 0 */
713                                         copycount = MIN(len, str->buflen - str->mark);
714                                 memmove(buf, str->ubuffer + str->mark, copycount * 4);
715                                 str->mark += copycount;
716                         }
717                         else
718                         {
719                                 while(copycount < len && str->buffer && str->mark < str->buflen)
720                                 {
721                                         unsigned char ch = str->buffer[str->mark++];
722                                         buf[copycount++] = ch;
723                                 }
724                         }
725
726                         str->read_count += copycount;           
727                         return copycount;
728                 }       
729                 case STREAM_TYPE_FILE:
730                         if(str->binary) 
731                         {
732                                 if(str->unicode) /* Binary file with 4-byte characters */
733                                 {
734                                         /* Read len characters of 4 bytes each */
735                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
736                                         size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
737                                         /* If there was an incomplete character */
738                                         if(count % 4 != 0) 
739                                         {
740                                                 count -= count % 4;
741                                                 WARNING("Incomplete character in binary Unicode file");
742                                         }
743                                         
744                                         int foo;
745                                         for(foo = 0; foo < count; foo += 4)
746                                                 buf[foo / 4] = readbuffer[foo] << 24
747                                                         | readbuffer[foo + 1] << 16
748                                                         | readbuffer[foo + 2] << 8
749                                                         | readbuffer[foo + 3];
750                                         g_free(readbuffer);
751                                         str->read_count += count / 4;
752                                         return count / 4;
753                                 }
754                                 else /* Regular binary file */
755                                 {
756                                         unsigned char *readbuffer = g_new0(unsigned char, len);
757                                         size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
758                                         int foo;
759                                         for(foo = 0; foo < count; foo++)
760                                                 buf[foo] = readbuffer[foo];
761                                         g_free(readbuffer);
762                                         str->read_count += count;
763                                         return count;
764                                 }
765                         }
766                         else /* Text mode is the same for Unicode and regular files */
767                         {
768                                 /* Do it character-by-character */
769                                 int foo;
770                                 for(foo = 0; foo < len; foo++)
771                                 {
772                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
773                                         if(ch == -1)
774                                                 break;
775                                         str->read_count++;
776                                         buf[foo] = ch;
777                                 }
778                                 return foo;
779                         }
780                 default:
781                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
782                         return 0;
783         }
784 }
785
786 /**
787  * glk_get_line_stream:
788  * @str: An input stream.
789  * @buf: A buffer with space for at least @len characters.
790  * @len: The number of characters to read, plus one.
791  *
792  * Reads characters from @str, until either 
793  * <inlineequation>
794  *   <alt>@len - 1</alt>
795  *   <mathphrase>@len - 1</mathphrase>
796  * </inlineequation>
797  * characters have been read or a newline has been read. It then puts a
798  * terminal null (<code>'\0'</code>) aracter on
799  * the end. It returns the number of characters actually read, including the
800  * newline (if there is one) but not including the terminal null.
801  *
802  * Returns: The number of characters actually read.
803  */
804 glui32
805 glk_get_line_stream(strid_t str, char *buf, glui32 len)
806 {
807         VALID_STREAM(str, return 0);
808         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
809         g_return_val_if_fail(buf != NULL, 0);
810
811         switch(str->type)
812         {
813                 case STREAM_TYPE_MEMORY:
814                 {
815                         int copycount = 0;
816                         if(str->unicode)
817                         {
818                                 /* Do it character-by-character */
819                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
820                                 {
821                                         glui32 ch = str->ubuffer[str->mark++];
822                                         /* Check for Unicode newline; slightly different than
823                                         in file streams */
824                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
825                                         {
826                                                 buf[copycount++] = '\n';
827                                                 break;
828                                         }
829                                         if(ch == 0x0D)
830                                         {
831                                                 if(str->ubuffer[str->mark] == 0x0A)
832                                                         str->mark++; /* skip past next newline */
833                                                 buf[copycount++] = '\n';
834                                                 break;
835                                         }
836                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
837                                 }
838                                 buf[copycount] = '\0';
839                         }
840                         else
841                         {
842                                 if(str->buffer) /* if not, copycount stays 0 */
843                                         copycount = MIN(len - 1, str->buflen - str->mark);
844                                 char *endptr = memccpy(buf, str->buffer + str->mark, '\n', copycount);
845                                 if(endptr) /* newline was found */
846                                         copycount = endptr - buf; /* Real copy count */
847                                 buf[copycount] = '\0';
848                                 str->mark += copycount;
849                         }
850                         
851                         str->read_count += copycount;
852                         return copycount;
853                 }       
854                 case STREAM_TYPE_FILE:
855                         if(str->binary) 
856                         {
857                                 if(str->unicode) /* Binary file with 4-byte characters */
858                                 {
859                                         /* Do it character-by-character */
860                                         int foo;
861                                         for(foo = 0; foo < len - 1; foo++)
862                                         {
863                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
864                                                 if(ch == -1) 
865                                                 {
866                                                         buf[foo] = '\0';
867                                                         return foo - 1;
868                                                 }
869                                                 str->read_count++;
870                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
871                                                 {
872                                                         buf[foo] = '\n';
873                                                         buf[foo + 1] = '\0';
874                                                         return foo;
875                                                 }
876                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
877                                         }
878                                         buf[len] = '\0';
879                                         return foo;
880                                 }
881                                 else /* Regular binary file */
882                                 {
883                                         fgets(buf, len, str->file_pointer);
884                                         str->read_count += strlen(buf);
885                                         return strlen(buf);
886                                 }
887                         }
888                         else /* Text mode is the same for Unicode and regular files */
889                         {
890                                 /* Do it character-by-character */
891                                 int foo;
892                                 for(foo = 0; foo < len - 1; foo++)
893                                 {
894                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
895                                         if(ch == -1)
896                                         {
897                                                 buf[foo] = '\0';
898                                                 return foo - 1;
899                                         }
900                                         str->read_count++;
901                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
902                                         {
903                                                 buf[foo] = '\n';
904                                                 buf[foo + 1] = '\0';
905                                                 return foo;
906                                         }
907                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
908                                 }
909                                 buf[len] = '\0';
910                                 return foo;
911                         }
912                 default:
913                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
914                         return 0;
915         }
916 }
917
918 /**
919  * glk_get_line_stream_uni:
920  * @str: An input stream.
921  * @buf: A buffer with space for at least @len Unicode code points.
922  * @len: The number of characters to read, plus one.
923  *
924  * Reads Unicode characters from @str, until either 
925  * <inlineequation>
926  *   <alt>@len - 1</alt>
927  *   <mathphrase>@len - 1</mathphrase>
928  * </inlineequation> 
929  * Unicode characters have been read or a newline has been read. It then puts a
930  * terminal null (a zero value) on the end.
931  *
932  * Returns: The number of characters actually read, including the newline (if
933  * there is one) but not including the terminal null.
934  */
935 glui32
936 glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
937 {
938         VALID_STREAM(str, return 0);
939         g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
940         g_return_val_if_fail(buf != NULL, 0);
941
942         switch(str->type)
943         {
944                 case STREAM_TYPE_MEMORY:
945                 {
946                         int copycount = 0;
947                         if(str->unicode)
948                         {
949                                 /* Do it character-by-character */
950                                 while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
951                                 {
952                                         glui32 ch = str->ubuffer[str->mark++];
953                                         /* Check for Unicode newline; slightly different than
954                                         in file streams */
955                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
956                                         {
957                                                 buf[copycount++] = '\n';
958                                                 break;
959                                         }
960                                         if(ch == 0x0D)
961                                         {
962                                                 if(str->ubuffer[str->mark] == 0x0A)
963                                                         str->mark++; /* skip past next newline */
964                                                 buf[copycount++] = '\n';
965                                                 break;
966                                         }
967                                         buf[copycount++] = ch;
968                                 }
969                                 buf[copycount] = '\0';
970                         }
971                         else
972                         {
973                                 /* No recourse to memccpy(), so do it character-by-character */
974                                 while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
975                                 {
976                                         gchar ch = str->buffer[str->mark++];
977                                         /* Check for newline */
978                                         if(ch == '\n') /* Also check for \r and \r\n? */
979                                         {
980                                                 buf[copycount++] = '\n';
981                                                 break;
982                                         }
983                                         buf[copycount++] = (unsigned char)ch;
984                                 }
985                                 buf[copycount] = 0;
986                         }
987                         
988                         str->read_count += copycount;
989                         return copycount;
990                 }       
991                 case STREAM_TYPE_FILE:
992                         if(str->binary) 
993                         {
994                                 if(str->unicode) /* Binary file with 4-byte characters */
995                                 {
996                                         /* Do it character-by-character */
997                                         int foo;
998                                         for(foo = 0; foo < len - 1; foo++)
999                                         {
1000                                                 glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
1001                                                 if(ch == -1) 
1002                                                 {
1003                                                         buf[foo] = 0;
1004                                                         return foo - 1;
1005                                                 }
1006                                                 str->read_count++;
1007                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
1008                                                 {
1009                                                         buf[foo] = ch; /* Preserve newline types??? */
1010                                                         buf[foo + 1] = 0;
1011                                                         return foo;
1012                                                 }
1013                                                 buf[foo] = ch;
1014                                         }
1015                                         buf[len] = 0;
1016                                         return foo;
1017                                 }
1018                                 else /* Regular binary file */
1019                                 {
1020                                         gchar *readbuffer = g_new0(gchar, len);
1021                                         fgets(readbuffer, len, str->file_pointer);
1022                                         glui32 count = strlen(readbuffer) + 1; /* Copy terminator */
1023                                         int foo;
1024                                         for(foo = 0; foo < count; foo++)
1025                                                 buf[foo] = (unsigned char)(readbuffer[foo]);
1026                                         str->read_count += count;
1027                                         return count;
1028                                 }
1029                         }
1030                         else /* Text mode is the same for Unicode and regular files */
1031                         {
1032                                 /* Do it character-by-character */
1033                                 int foo;
1034                                 for(foo = 0; foo < len - 1; foo++)
1035                                 {
1036                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
1037                                         if(ch == -1)
1038                                         {
1039                                                 buf[foo] = 0;
1040                                                 return foo - 1;
1041                                         }
1042                                         str->read_count++;
1043                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
1044                                         {
1045                                                 buf[foo] = ch; /* Preserve newline types??? */
1046                                                 buf[foo + 1] = 0;
1047                                                 return foo;
1048                                         }
1049                                         buf[foo] = ch;
1050                                 }
1051                                 buf[len] = 0;
1052                                 return foo;
1053                         }
1054                 default:
1055                         ILLEGAL_PARAM("Reading illegal on stream type: %u", str->type);
1056                         return 0;
1057         }
1058 }
1059
1060 /*
1061  *
1062  **************** SEEKING FUNCTIONS ********************************************
1063  *
1064  */
1065
1066 /**
1067  * glk_stream_get_position:
1068  * @str: A file or memory stream.
1069  *
1070  * Returns the position of the read/write mark in @str. For memory streams and
1071  * binary file streams, this is exactly the number of characters read or written
1072  * from the beginning of the stream (unless you have moved the mark with
1073  * glk_stream_set_position().) For text file streams, matters are more 
1074  * ambiguous, since (for example) writing one byte to a text file may store more
1075  * than one character in the platform's native encoding. You can only be sure
1076  * that the position increases as you read or write to the file.
1077  *
1078  * Additional complication: for Latin-1 memory and file streams, a character is
1079  * a byte. For Unicode memory and file streams (those created by
1080  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
1081  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
1082  * bytes.
1083  *
1084  * <note><para>
1085  *   If this bothers you, don't use binary Unicode files. I don't think they're
1086  *   good for much anyhow.
1087  * </para></note>
1088  *
1089  * Returns: position of the read/write mark in @str.
1090  */
1091 glui32
1092 glk_stream_get_position(strid_t str)
1093 {
1094         VALID_STREAM(str, return 0);
1095         
1096         switch(str->type)
1097         {
1098                 case STREAM_TYPE_MEMORY:
1099                         return str->mark;
1100                 case STREAM_TYPE_FILE:
1101                         return ftell(str->file_pointer);
1102                 default:
1103                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1104                         return 0;
1105         }
1106 }
1107
1108 /**
1109  * glk_stream_set_position:
1110  * @str: A file or memory stream.
1111  * @pos: The position to set the mark to, relative to @seekmode.
1112  * @seekmode: One of %seekmode_Start, %seekmode_Current, or %seekmode_End.
1113  *
1114  * Sets the position of the read/write mark in @str. The position is controlled
1115  * by @pos, and the meaning of @pos is controlled by @seekmode. See the
1116  * <code>seekmode_</code> constants below.
1117  *
1118  * It is illegal to specify a position before the beginning or after the end of
1119  * the file.
1120  *
1121  * In binary files, the mark position is exact &mdash; it corresponds with the
1122  * number of characters you have read or written. In text files, this mapping 
1123  * can vary, because of linefeed conventions or other character-set 
1124  * approximations. See <link linkend="chimara-Streams">Streams</link>.
1125  * glk_stream_set_position() and glk_stream_get_position() measure positions in
1126  * the platform's native encoding &mdash; after character cookery. Therefore,
1127  * in a text stream, it is safest to use glk_stream_set_position() only to move
1128  * to the beginning or end of a file, or to a position determined by
1129  * glk_stream_get_position().
1130  *
1131  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
1132  * characters are 32-bit words, or four bytes each.
1133  */
1134 void
1135 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
1136 {
1137         VALID_STREAM(str, return);
1138         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
1139         g_return_if_fail(!(seekmode == seekmode_End && pos > 0));
1140         
1141         switch(str->type)
1142         {
1143                 case STREAM_TYPE_MEMORY:
1144                         switch(seekmode)
1145                         {
1146                                 case seekmode_Start:   str->mark = pos;  break;
1147                                 case seekmode_Current: str->mark += pos; break;
1148                                 case seekmode_End:     str->mark = str->buflen + pos; break;
1149                                 default:
1150                                         g_return_if_reached();
1151                                         return;
1152                         }
1153                         break;
1154                 case STREAM_TYPE_FILE:
1155                 {
1156                         int whence;
1157                         switch(seekmode)
1158                         {
1159                                 case seekmode_Start:   whence = SEEK_SET; break;
1160                                 case seekmode_Current: whence = SEEK_CUR; break;
1161                                 case seekmode_End:     whence = SEEK_END; break;
1162                                 default:
1163                                         g_return_if_reached();
1164                                         return;
1165                         }
1166                         if(fseek(str->file_pointer, pos, whence) == -1)
1167                                 WARNING("Seek failed on file stream");
1168                         break;
1169                 }
1170                 default:
1171                         ILLEGAL_PARAM("Seeking illegal on stream type: %u", str->type);
1172                         return;
1173         }
1174 }
1175