File en memory streams, en lezen van input streams. Nog niet getest. Alle
[projects/chimara/chimara.git] / src / strio.c
1 #include "stream.h"
2 #include <stdio.h>
3 #include <string.h>
4 #include <glib.h>
5 #include <glib/gstdio.h>
6
7 #define min(x,y) ( (x > y)? y : x )
8
9 /*
10  *
11  **************** WRITING FUNCTIONS ********************************************
12  *
13  */
14
15 /* Internal function: change illegal (control) characters in a string to a
16 placeholder character. Must free returned string afterwards. */
17 static gchar *
18 remove_latin1_control_characters(unsigned char *s, gssize len)
19 {
20         gchar *retval = g_new0(gchar, len);
21         int i;
22         for(i = 0; i < len; i++)
23                 if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) )
24                         retval[i] = '?';
25                         /* Our placeholder character is '?'; other options are possible,
26                         like printing "0x7F" or something */
27                 else
28                         retval[i] = s[i];
29         return retval;
30 }
31
32 /* Internal function: convert a Latin-1 string to a UTF-8 string, replacing
33 Latin-1 control characters by a placeholder first. The UTF-8 string must be
34 freed afterwards. Returns NULL on error. */
35 static gchar *
36 convert_latin1_to_utf8(gchar *s, gssize len)
37 {
38         GError *error = NULL;
39         gchar *utf8;
40         gchar *canonical = remove_latin1_control_characters( (unsigned char *)s,
41                 len);
42         utf8 = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
43         g_free(canonical);
44         
45         if(utf8 == NULL)
46         {
47                 error_dialog(NULL, error, "Error during latin1->utf8 conversion: ");
48                 return NULL;
49         }
50         
51         return utf8;
52 }
53
54 /* Internal function: write a UTF-8 string to a window's text buffer. */
55 static void
56 write_utf8_to_window(winid_t win, gchar *s)
57 {
58         GtkTextBuffer *buffer = 
59                 gtk_text_view_get_buffer( GTK_TEXT_VIEW(win->widget) );
60
61         GtkTextIter iter;
62         gtk_text_buffer_get_end_iter(buffer, &iter);
63         gtk_text_buffer_insert(buffer, &iter, s, -1);
64 }
65
66 /* Internal function: write a UTF-8 buffer with length to a stream. */
67 static void
68 write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
69 {
70         switch(str->stream_type)
71         {
72                 case STREAM_TYPE_WINDOW:
73                         /* Each window type has a different way of printing to it */
74                         switch(str->window->window_type)
75                         {
76                                 /* Printing to these windows' streams does nothing */
77                                 case wintype_Blank:
78                                 case wintype_Pair:
79                                 case wintype_Graphics:
80                                         str->write_count += len;
81                                         break;
82                                 /* Text buffer window */        
83                                 case wintype_TextBuffer:
84                                 {
85                                         gchar *utf8 = convert_latin1_to_utf8(buf, len);
86                                         write_utf8_to_window(str->window, utf8);
87                                         g_free(utf8);
88                                 }       
89                                         str->write_count += len;
90                                         break;
91                                 default:
92                                         g_warning("%s: Writing to this kind of window unsupported.",
93                                                 __func__);
94                         }
95                         
96                         /* Now write the same buffer to the window's echo stream */
97                         if(str->window->echo_stream != NULL)
98                                 write_buffer_to_stream(str->window->echo_stream, buf, len);
99                         
100                         break;
101                         
102                 case STREAM_TYPE_MEMORY:
103                         if(str->unicode && str->ubuffer)
104                         {
105                                 int foo = 0;
106                                 while(str->mark < str->buflen && foo < len)
107                                         str->ubuffer[str->mark++] = (glui32)buf[foo++];
108                         }
109                         if(!str->unicode && str->buffer)
110                         {
111                                 memmove(str->buffer + str->mark, buf, 
112                                         min(len, str->buflen - str->mark));
113                         }
114
115                         str->write_count += len;
116                         break;
117                         
118                 case STREAM_TYPE_FILE:
119                         if(str->binary) 
120                         {
121                                 if(str->unicode) 
122                                 {
123                                         /* Convert to four-byte big-endian */
124                                         gchar *writebuffer = g_new0(gchar, len * 4);
125                                         int i;
126                                         for(i = 0; i < len; i++)
127                                                 writebuffer[i * 4 + 3] = buf[i];
128                                         fwrite(writebuffer, sizeof(gchar), len * 4, 
129                                                 str->file_pointer);
130                                 } 
131                                 else /* Regular file */
132                                 {
133                                         fwrite(buf, sizeof(gchar), len, str->file_pointer);
134                                 }
135                         }
136                         else /* Text mode is the same for Unicode and regular files */
137                         {
138                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
139                                 g_fprintf(str->file_pointer, "%s", utf8);
140                                 g_free(utf8);
141                         }
142                         
143                         str->write_count += len;
144                         break;
145                 default:
146                         g_warning("%s: Writing to this kind of stream unsupported.",
147                                 __func__);
148         }
149 }
150
151 /**
152  * glk_put_char_stream:
153  * @str: An output stream.
154  * @ch: A character in Latin-1 encoding.
155  *
156  * Prints one character @ch to the stream @str. It is illegal for @str to be
157  * %NULL, or an input-only stream.
158  */
159 void
160 glk_put_char_stream(strid_t str, unsigned char ch)
161 {
162         g_return_if_fail(str != NULL);
163         g_return_if_fail(str->file_mode != filemode_Read);
164         
165         write_buffer_to_stream(str, (gchar *)&ch, 1);
166 }
167
168 /**
169  * glk_put_string_stream:
170  * @str: An output stream.
171  * @s: A null-terminated string in Latin-1 encoding.
172  *
173  * Prints @s to the stream @str. It is illegal for @str to be %NULL, or an
174  * input-only stream.
175  */
176 void
177 glk_put_string_stream(strid_t str, char *s)
178 {
179         g_return_if_fail(str != NULL);
180         g_return_if_fail(str->file_mode != filemode_Read);
181
182         write_buffer_to_stream(str, (gchar *)s, strlen(s));
183 }
184
185 /**
186  * glk_put_buffer_stream:
187  * @str: An output stream.
188  * @buf: An array of characters in Latin-1 encoding.
189  * @len: Length of @buf.
190  *
191  * Prints @buf to the stream @str. It is illegal for @str to be %NULL, or an
192  * input-only stream.
193  */
194 void
195 glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
196 {
197         g_return_if_fail(str != NULL);
198         g_return_if_fail(str->file_mode != filemode_Read);
199         
200         write_buffer_to_stream(str, (gchar *)buf, len);
201 }
202
203 /*
204  *
205  **************** READING FUNCTIONS ********************************************
206  *
207  */
208
209 /* Internal function: Read one big-endian four-byte character from file fp and
210 return it as a Unicode code point, or -1 on EOF */
211 static glsi32
212 read_ucs4be_char_from_file(FILE *fp)
213 {
214         unsigned char readbuffer[4];
215         if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4)
216                 return -1; /* EOF */
217         return
218                 readbuffer[0] << 24 | 
219                 readbuffer[1] << 16 | 
220                 readbuffer[2] << 8  | 
221                 readbuffer[3];
222 }
223
224 /* Internal function: Read one UTF-8 character, which may be more than one byte,
225 from file fp and return it as a Unicode code point, or -1 on EOF */
226 static glsi32
227 read_utf8_char_from_file(FILE *fp)
228 {
229         gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */
230         int foo;
231         gunichar charresult = (gunichar)-2;
232         for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) 
233         {
234                 int ch = fgetc(fp);
235                 if(ch == EOF)
236                         return -1;
237                 readbuffer[foo] = (gchar)ch;
238                 charresult = g_utf8_get_char_validated(readbuffer, foo);
239                 /* charresult is -1 if invalid, -2 if incomplete, and the unicode code
240                 point otherwise */
241         }
242         /* Silently return unknown characters as 0xFFFD, Replacement Character */
243         if(charresult == (gunichar)-1 || charresult == (gunichar)-2) 
244                 return 0xFFFD;
245         return charresult;
246 }
247
248 /* Internal function: Tell whether this code point is a Unicode newline. The
249 file pointer and eight-bit flag are included in case the newline is a CR 
250 (U+000D). If the next character is LF (U+000A) then it also belongs to the
251 newline. */
252 static gboolean
253 is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
254 {
255         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
256                 return TRUE;
257         if(ch == 0x0D) {
258                 glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : 
259                         read_ucs4be_char_from_file(fp);
260                 if(ch2 != 0x0A)
261                         fseek(fp, utf8? -1 : -4, SEEK_CUR);
262                 return TRUE;
263         }
264         return FALSE;
265 }
266
267 /**
268  * glk_get_char_stream:
269  * @str: An input stream.
270  *
271  * Reads one character from the stream @str. (There is no notion of a ``current
272  * input stream.'') It is illegal for @str to be %NULL, or an output-only
273  * stream.
274  *
275  * The result will be between 0 and 255. As with all basic text functions, Glk
276  * assumes the Latin-1 encoding. If the end of the stream has been reached, the
277  * result will be -1. Note that high-bit characters (128..255) are
278  * <emphasis>not</emphasis> returned as negative numbers.
279  *
280  * If the stream contains Unicode data --- for example, if it was created with
281  * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then
282  * characters beyond 255 will be returned as 0x3F ("?").
283  *
284  * Returns: A character value between 0 and 255, or -1 on end of stream.
285  */
286 glsi32
287 glk_get_char_stream(strid_t str)
288 {
289         g_return_val_if_fail(str != NULL, -1);
290         g_return_val_if_fail(str->file_mode == filemode_Read
291                 || str->file_mode == filemode_ReadWrite, -1);
292         
293         switch(str->stream_type)
294         {
295                 case STREAM_TYPE_MEMORY:
296                         if(str->unicode)
297                         {
298                                 if(!str->ubuffer || str->mark >= str->buflen)
299                                         return -1;
300                                 glui32 ch = str->ubuffer[str->mark++];
301                                 str->read_count++;
302                                 return (ch > 0xFF)? 0x3F : ch;
303                         }
304                         else
305                         {
306                                 if(!str->buffer || str->mark >= str->buflen)
307                                         return -1;
308                                 char ch = str->buffer[str->mark++];
309                                 str->read_count++;
310                                 return ch;
311                         }
312                         break;
313                         
314                 case STREAM_TYPE_FILE:
315                         if(str->binary) 
316                         {
317                                 if(str->unicode) 
318                                 {
319                                         glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
320                                         if(ch == -1)
321                                                 return -1;
322                                         str->read_count++;
323                                         return (ch > 0xFF)? 0x3F : ch;
324                                 }
325                                 else /* Regular file */
326                                 {
327                                         int ch = fgetc(str->file_pointer);
328                                         if(ch == EOF)
329                                                 return -1;
330                                         
331                                         str->read_count++;
332                                         return ch;
333                                 }
334                         }
335                         else /* Text mode is the same for Unicode and regular files */
336                         {
337                                 glsi32 ch = read_utf8_char_from_file(str->file_pointer);
338                                 if(ch == -1)
339                                         return -1;
340                                         
341                                 str->read_count++;
342                                 return (ch > 0xFF)? 0x3F : ch;
343                         }
344                 default:
345                         g_warning("%s: Reading from this kind of stream unsupported.",
346                                 __func__);
347                         return -1;
348         }
349 }
350
351 /**
352  * glk_get_buffer_stream:
353  * @str: An input stream.
354  * @buf: A buffer with space for at least @len characters.
355  * @len: The number of characters to read.
356  *
357  * Reads @len characters from @str, unless the end of stream is reached first.
358  * No terminal null is placed in the buffer.
359  *
360  * Returns: The number of characters actually read.
361  */
362 glui32
363 glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
364 {
365         g_return_val_if_fail(str != NULL, 0);
366         g_return_val_if_fail(str->file_mode == filemode_Read
367                 || str->file_mode == filemode_ReadWrite, 0);
368         g_return_val_if_fail(buf != NULL, 0);
369         
370         switch(str->stream_type)
371         {
372                 case STREAM_TYPE_MEMORY:
373                 {
374                         int copycount = 0;
375                         if(str->unicode)
376                         {
377                                 while(copycount < len && str->ubuffer 
378                                         && str->mark < str->buflen) 
379                                 {
380                                         glui32 ch = str->ubuffer[str->mark++];
381                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
382                                 }
383                         }
384                         else
385                         {
386                                 if(str->buffer) /* if not, copycount stays 0 */
387                                         copycount = min(len, str->buflen - str->mark);
388                                 memmove(buf, str->buffer + str->mark, copycount);
389                         }
390
391                         str->read_count += copycount;           
392                         return copycount;
393                 }       
394                 case STREAM_TYPE_FILE:
395                         if(str->binary) 
396                         {
397                                 if(str->unicode) /* Binary file with 4-byte characters */
398                                 {
399                                         /* Read len characters of 4 bytes each */
400                                         unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
401                                         size_t count = fread(readbuffer, sizeof(unsigned char), 
402                                                 4 * len, str->file_pointer);
403                                         /* If there was an incomplete character */
404                                         if(count % 4 != 0) 
405                                         {
406                                                 count -= count % 4;
407                                                 g_warning("%s: Incomplete character in binary Unicode "
408                                                         "file.", __func__);
409                                         }
410                                         
411                                         str->read_count += count / 4;
412                                         int foo;
413                                         for(foo = 0; foo < count; foo += 4)
414                                         {
415                                                 glsi32 ch = readbuffer[foo] << 24
416                                                         | readbuffer[foo + 1] << 16
417                                                         | readbuffer[foo + 2] << 8
418                                                         | readbuffer[foo + 3];
419                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
420                                         }
421                                         g_free(readbuffer);
422                                         return count / 4;
423                                 }
424                                 else /* Regular binary file */
425                                 {
426                                         size_t count = fread(buf, sizeof(char), len, 
427                                                 str->file_pointer);
428                                         str->read_count += count;
429                                         return count;
430                                 }
431                         }
432                         else /* Text mode is the same for Unicode and regular files */
433                         {
434                                 /* Do it character-by-character */
435                                 int foo;
436                                 for(foo = 0; foo < len; foo++)
437                                 {
438                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
439                                         if(ch == -1)
440                                                 break;
441                                         str->read_count++;
442                                         buf[foo] = (ch > 0xFF)? 0x3F : (gchar)ch;
443                                 }
444                                 return foo;
445                         }
446                 default:
447                         g_warning("%s: Reading from this kind of stream unsupported.",
448                                 __func__);
449                         return 0;
450         }
451 }
452
453 /**
454  * glk_get_line_stream:
455  * @str: An input stream.
456  * @buf: A buffer with space for at least @len characters.
457  * @len: The number of characters to read, plus one.
458  *
459  * Reads characters from @str, until either @len - 1 characters have been read
460  * or a newline has been read. It then puts a terminal null ('\0') aracter on
461  * the end. It returns the number of characters actually read, including the
462  * newline (if there is one) but not including the terminal null.
463  *
464  * It is usually more efficient to read several characters at once with
465  * glk_get_buffer_stream() or glk_get_line_stream(), as opposed to calling
466  * glk_get_char_stream() several times.
467  *
468  * Returns: The number of characters actually read.
469  */
470 glui32
471 glk_get_line_stream(strid_t str, char *buf, glui32 len)
472 {
473         g_return_val_if_fail(str != NULL, 0);
474         g_return_val_if_fail(str->file_mode == filemode_Read
475                 || str->file_mode == filemode_ReadWrite, 0);
476         g_return_val_if_fail(buf != NULL, 0);
477
478         switch(str->stream_type)
479         {
480                 case STREAM_TYPE_MEMORY:
481                 {
482                         int copycount = 0;
483                         if(str->unicode)
484                         {
485                                 /* Do it character-by-character */
486                                 while(copycount < len - 1 && str->ubuffer 
487                                         && str->mark < str->buflen) 
488                                 {
489                                         glui32 ch = str->ubuffer[str->mark++];
490                                         /* Check for Unicode newline; slightly different than
491                                         in file streams */
492                                         if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 
493                                                 || ch == 0x2029)
494                                         {
495                                                 buf[copycount++] = '\n';
496                                                 break;
497                                         }
498                                         if(ch == 0x0D)
499                                         {
500                                                 if(str->ubuffer[str->mark] == 0x0A)
501                                                         str->mark++; /* skip past next newline */
502                                                 buf[copycount++] = '\n';
503                                                 break;
504                                         }
505                                         buf[copycount++] = (ch > 0xFF)? '?' : (char)ch;
506                                 }
507                                 buf[copycount] = '\0';
508                         }
509                         else
510                         {
511                                 if(str->buffer) /* if not, copycount stays 0 */
512                                         copycount = min(len, str->buflen - str->mark);
513                                 memccpy(buf, str->buffer + str->mark, '\n', copycount);
514                         }
515                         
516                         str->read_count += copycount;
517                         return copycount;
518                 }       
519                 case STREAM_TYPE_FILE:
520                         if(str->binary) 
521                         {
522                                 if(str->unicode) /* Binary file with 4-byte characters */
523                                 {
524                                         /* Do it character-by-character */
525                                         int foo;
526                                         for(foo = 0; foo < len - 1; foo++)
527                                         {
528                                                 glsi32 ch = 
529                                                         read_ucs4be_char_from_file(str->file_pointer);
530                                                 if(ch == -1) 
531                                                 {
532                                                         buf[foo] = '\0';
533                                                         return foo - 1;
534                                                 }
535                                                 str->read_count++;
536                                                 if(is_unicode_newline(ch, str->file_pointer, FALSE))
537                                                 {
538                                                         buf[foo] = '\n';
539                                                         buf[foo + 1] = '\0';
540                                                         return foo;
541                                                 }
542                                                 buf[foo] = (ch > 0xFF)? '?' : (char)ch;
543                                         }
544                                         buf[len] = '\0';
545                                         return foo;
546                                 }
547                                 else /* Regular binary file */
548                                 {
549                                         fgets(buf, len, str->file_pointer);
550                                         str->read_count += strlen(buf);
551                                         return strlen(buf);
552                                 }
553                         }
554                         else /* Text mode is the same for Unicode and regular files */
555                         {
556                                 /* Do it character-by-character */
557                                 int foo;
558                                 for(foo = 0; foo < len - 1; foo++)
559                                 {
560                                         glsi32 ch = read_utf8_char_from_file(str->file_pointer);
561                                         if(ch == -1)
562                                         {
563                                                 buf[foo] = '\0';
564                                                 return foo - 1;
565                                         }
566                                         str->read_count++;
567                                         if(is_unicode_newline(ch, str->file_pointer, TRUE))
568                                         {
569                                                 buf[foo] = '\n';
570                                                 buf[foo + 1] = '\0';
571                                                 return foo;
572                                         }
573                                         buf[foo] = (ch > 0xFF)? 0x3F : (char)ch;
574                                 }
575                                 buf[len] = '\0';
576                                 return foo;
577                         }
578                 default:
579                         g_warning("%s: Reading from this kind of stream unsupported.",
580                                 __func__);
581                         return 0;
582         }
583 }
584
585 /*
586  *
587  **************** SEEKING FUNCTIONS ********************************************
588  *
589  */
590
591 /**
592  * glk_stream_get_position:
593  * @str: A file or memory stream.
594  *
595  * Returns the position of the read/write mark in @str. For memory streams and
596  * binary file streams, this is exactly the number of characters read or written
597  * from the beginning of the stream (unless you have moved the mark with
598  * glk_stream_set_position().) For text file streams, matters are more 
599  * ambiguous, since (for example) writing one byte to a text file may store more
600  * than one character in the platform's native encoding. You can only be sure
601  * that the position increases as you read or write to the file.
602  *
603  * Additional complication: for Latin-1 memory and file streams, a character is
604  * a byte. For Unicode memory and file streams (those created by
605  * glk_stream_open_file_uni() and glk_stream_open_memory_uni()), a character is
606  * a 32-bit word. So in a binary Unicode file, positions are multiples of four
607  * bytes.
608  *
609  * Returns: position of the read/write mark in @str.
610  */
611 glui32
612 glk_stream_get_position(strid_t str)
613 {
614         g_return_val_if_fail(str != NULL, 0);
615         
616         switch(str->stream_type)
617         {
618                 case STREAM_TYPE_MEMORY:
619                         return str->mark;
620                 case STREAM_TYPE_FILE:
621                         return ftell(str->file_pointer);
622                 default:
623                         g_warning("%s: Seeking not supported on this type of stream.",
624                                 __func__);
625                         return 0;
626         }
627 }
628
629 /**
630  * glk_stream_set_position:
631  * @str: A file or memory stream.
632  * @pos: The position to set the mark to, relative to @seekmode.
633  * @seekmode: One of #seekmode_Start, #seekmode_Current, or #seekmode_End.
634  *
635  * Sets the position of the read/write mark in @str. The position is controlled
636  * by @pos, and the meaning of @pos is controlled by @seekmode:
637  * <itemizedlist>
638  *  <listitem>#seekmode_Start: @pos characters after the beginning of the file.
639  *  </listitem>
640  *  <listitem>#seekmode_Current: @pos characters after the current position
641  *  (moving backwards if @pos is negative.)</listitem>
642  *  <listitem>#seekmode_End: @pos characters after the end of the file. (@pos
643  *  should always be zero or negative, so that this will move backwards to a
644  *  position within the file.</listitem>
645  * </itemizedlist>
646  * It is illegal to specify a position before the beginning or after the end of
647  * the file.
648  *
649  * In binary files, the mark position is exact --- it corresponds with the
650  * number of characters you have read or written. In text files, this mapping 
651  * can vary, because of linefeed conventions or other character-set 
652  * approximations. glk_stream_set_position() and glk_stream_get_position()
653  * measure positions in the platform's native encoding --- after character
654  * cookery. Therefore, in a text stream, it is safest to use
655  * glk_stream_set_position() only to move to the beginning or end of a file, or
656  * to a position determined by glk_stream_get_position().
657  *
658  * Again, in Latin-1 streams, characters are bytes. In Unicode streams,
659  * characters are 32-bit words, or four bytes each.
660  */
661 void
662 glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode)
663 {
664         g_return_if_fail(str != NULL);
665         g_return_if_fail(!(seekmode == seekmode_Start && pos < 0));
666         g_return_if_fail(!(seekmode == seekmode_End || pos > 0));
667         
668         switch(str->stream_type)
669         {
670                 case STREAM_TYPE_MEMORY:
671                         switch(seekmode)
672                         {
673                                 case seekmode_Start:   str->mark = pos;  break;
674                                 case seekmode_Current: str->mark += pos; break;
675                                 case seekmode_End:     str->mark = str->buflen + pos; break;
676                                 default:
677                                         g_assert_not_reached();
678                                         return;
679                         }
680                         break;
681                 case STREAM_TYPE_FILE:
682                 {
683                         int whence;
684                         switch(seekmode)
685                         {
686                                 case seekmode_Start:   whence = SEEK_SET; break;
687                                 case seekmode_Current: whence = SEEK_CUR; break;
688                                 case seekmode_End:     whence = SEEK_END; break;
689                                 default:
690                                         g_assert_not_reached();
691                                         return;
692                         }
693                         fseek(str->file_pointer, pos, whence);
694                         break;
695                 }
696                 default:
697                         g_warning("%s: Seeking not supported on this type of stream.",
698                                 __func__);
699                         return;
700         }
701 }
702