X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;f=libchimara%2Fstrio.c;h=d8dda82a00627d273e705ffd8cf3e0ec2ed0fbf0;hb=7cbc2c68d146433b1d438d60406ca05e14a523ce;hp=24899c5e34c2516254897c44157f7a851949d478;hpb=ed5dac111cfdb3805d259fd0a1997c36c8807722;p=projects%2Fchimara%2Fchimara.git diff --git a/libchimara/strio.c b/libchimara/strio.c index 24899c5..d8dda82 100644 --- a/libchimara/strio.c +++ b/libchimara/strio.c @@ -1,3 +1,4 @@ +#include #include "charset.h" #include "magic.h" #include "stream.h" @@ -6,6 +7,24 @@ #include #include #include +#include + +/* Internal function: ensure that an fseek() is called on a file pointer in + between reading and writing operations, and vice versa. This will only come up + for ReadWrite or WriteAppend files. */ +static void +ensure_file_operation(strid_t str, glui32 op) +{ + if(str->lastop != 0 && str->lastop != op) + { + long pos = ftell(str->file_pointer); + if(pos == -1) + WARNING_S("ftell() failed", g_strerror(errno)); + if(fseek(str->file_pointer, pos, SEEK_SET) != 0) + WARNING_S("fseek() failed", g_strerror(errno)); + } + str->lastop = op; /* Not 0, because we are about to do the operation anyway */ +} /* * @@ -293,6 +312,10 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) str->mark += copycount; } + /* Move the EOF marker if we wrote past it */ + if(str->mark > str->endmark) + str->endmark = str->mark; + str->write_count += len; break; @@ -302,11 +325,13 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) if(str->unicode) { gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len); + ensure_file_operation(str, filemode_Write); fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer); g_free(writebuffer); } else /* Regular file */ { + ensure_file_operation(str, filemode_Write); fwrite(buf, sizeof(gchar), len, str->file_pointer); } } @@ -315,6 +340,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) gchar *utf8 = convert_latin1_to_utf8(buf, len); if(utf8 != NULL) { + ensure_file_operation(str, filemode_Write); g_fprintf(str->file_pointer, "%s", utf8); g_free(utf8); } @@ -322,6 +348,9 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) str->write_count += len; break; + case STREAM_TYPE_RESOURCE: + ILLEGAL(_("Writing to a resource stream is illegal.")); + break; default: ILLEGAL_PARAM("Unknown stream type: %u", str->type); } @@ -382,6 +411,10 @@ write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len) str->mark += copycount; } + /* Move the EOF marker if we wrote past it */ + if(str->mark > str->endmark) + str->endmark = str->mark; + str->write_count += len; break; @@ -391,12 +424,14 @@ write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len) if(str->unicode) { gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len); + ensure_file_operation(str, filemode_Write); fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer); g_free(writebuffer); } else /* Regular file */ { gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len); + ensure_file_operation(str, filemode_Write); fwrite(latin1, sizeof(gchar), len, str->file_pointer); g_free(latin1); } @@ -406,6 +441,7 @@ write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len) gchar *utf8 = convert_ucs4_to_utf8(buf, len); if(utf8 != NULL) { + ensure_file_operation(str, filemode_Write); g_fprintf(str->file_pointer, "%s", utf8); g_free(utf8); } @@ -413,6 +449,9 @@ write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len) str->write_count += len; break; + case STREAM_TYPE_RESOURCE: + ILLEGAL(_("Writing to a resource stream is illegal.")); + break; default: ILLEGAL_PARAM("Unknown stream type: %u", str->type); } @@ -554,10 +593,11 @@ glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) /* Internal function: Read one big-endian four-byte character from file fp and return it as a Unicode code point, or -1 on EOF */ static glsi32 -read_ucs4be_char_from_file(FILE *fp) +read_ucs4be_char_from_file(strid_t str) { unsigned char readbuffer[4]; - if(fread(readbuffer, sizeof(unsigned char), 4, fp) < 4) + ensure_file_operation(str, filemode_Read); + if(fread(readbuffer, sizeof(unsigned char), 4, str->file_pointer) < 4) return -1; /* EOF */ return readbuffer[0] << 24 | @@ -569,14 +609,15 @@ read_ucs4be_char_from_file(FILE *fp) /* Internal function: Read one UTF-8 character, which may be more than one byte, from file fp and return it as a Unicode code point, or -1 on EOF */ static glsi32 -read_utf8_char_from_file(FILE *fp) +read_utf8_char_from_file(strid_t str) { gchar readbuffer[4] = {0, 0, 0, 0}; /* Max UTF-8 width */ int foo; gunichar charresult = (gunichar)-2; + ensure_file_operation(str, filemode_Read); for(foo = 0; foo < 4 && charresult == (gunichar)-2; foo++) { - int ch = fgetc(fp); + int ch = fgetc(str->file_pointer); if(ch == EOF) return -1; readbuffer[foo] = (gchar)ch; @@ -590,21 +631,75 @@ read_utf8_char_from_file(FILE *fp) return charresult; } +/* Internal function: Read one UTF-8 character, which may be more than one byte, +from a memory stream @str, and return it as a Unicode code point. */ +static glsi32 +read_utf8_char_from_buffer(strid_t str) +{ + size_t foo; + gunichar charresult = (gunichar)-2; + char *buffer = str->buffer + str->mark; + size_t maxlen = str->buflen - str->mark; + + if(maxlen == 0) + return -1; + + for(foo = 1; foo <= maxlen; foo++) + { + charresult = g_utf8_get_char_validated(buffer, foo); + /* charresult is -1 if invalid, -2 if incomplete, and the + Unicode code point otherwise */ + if(charresult != (gunichar)-2) + break; + } + str->mark += foo; + str->read_count++; + + /* Return -1 on EOS */ + if(charresult == (gunichar)-2) + return -1; + /* Silently return unknown characters as 0xFFFD, Replacement Character */ + if(charresult == (gunichar)-1) + return 0xFFFD; + return charresult; +} + +/* Internal function: Read one big-endian four-byte character from memory and +return it as a Unicode code point, or -1 on EOF */ +static glsi32 +read_ucs4be_char_from_buffer(strid_t str) +{ + glui32 ch = str->buffer[str->mark++]; + if(str->mark >= str->buflen) + return -1; + ch = (ch << 8) | (str->buffer[str->mark++] & 0xFF); + if(str->mark >= str->buflen) + return -1; + ch = (ch << 8) | (str->buffer[str->mark++] & 0xFF); + if(str->mark >= str->buflen) + return -1; + ch = (ch << 8) | (str->buffer[str->mark++] & 0xFF); + str->read_count++; + return ch; +} + /* Internal function: Tell whether this code point is a Unicode newline. The file pointer and eight-bit flag are included in case the newline is a CR (U+000D). If the next character is LF (U+000A) then it also belongs to the newline. */ static gboolean -is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8) +is_unicode_newline(glsi32 ch, strid_t str, gboolean utf8) { if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029) return TRUE; if(ch == 0x0D) { - glsi32 ch2 = utf8? read_utf8_char_from_file(fp) : - read_ucs4be_char_from_file(fp); - if(ch2 != 0x0A) - if(fseek(fp, utf8? -1 : -4, SEEK_CUR) == -1); + glsi32 ch2 = utf8? read_utf8_char_from_file(str) : + read_ucs4be_char_from_file(str); + if(ch2 != 0x0A) { + if(fseek(str->file_pointer, utf8? -1 : -4, SEEK_CUR) == -1) WARNING_S("Seek failed on stream", g_strerror(errno) ); + str->lastop = 0; /* can read or write after a seek */ + } return TRUE; } return FALSE; @@ -618,6 +713,18 @@ get_char_stream_common(strid_t str) { switch(str->type) { + case STREAM_TYPE_RESOURCE: + if(str->unicode) + { + if(!str->buffer || str->mark >= str->buflen) + return -1; + if(str->binary) + /* Cheap big-endian stream */ + return read_ucs4be_char_from_buffer(str); + /* slightly less cheap UTF8 stream */ + return read_utf8_char_from_buffer(str); + } + /* for text streams, fall through to memory case */ case STREAM_TYPE_MEMORY: if(str->unicode) { @@ -642,7 +749,7 @@ get_char_stream_common(strid_t str) { if(str->unicode) { - glsi32 ch = read_ucs4be_char_from_file(str->file_pointer); + glsi32 ch = read_ucs4be_char_from_file(str); if(ch == -1) return -1; str->read_count++; @@ -650,6 +757,7 @@ get_char_stream_common(strid_t str) } else /* Regular file */ { + ensure_file_operation(str, filemode_Read); int ch = fgetc(str->file_pointer); if(ch == EOF) return -1; @@ -660,7 +768,7 @@ get_char_stream_common(strid_t str) } else /* Text mode is the same for Unicode and regular files */ { - glsi32 ch = read_utf8_char_from_file(str->file_pointer); + glsi32 ch = read_utf8_char_from_file(str); if(ch == -1) return -1; @@ -749,6 +857,24 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) switch(str->type) { + case STREAM_TYPE_RESOURCE: + { + int copycount = 0; + if(str->unicode) + { + while(copycount < len && str->buffer && str->mark < str->buflen) + { + glui32 ch; + if(str->binary) + ch = read_ucs4be_char_from_buffer(str); + else + ch = read_utf8_char_from_buffer(str); + buf[copycount++] = (ch > 0xFF)? '?' : (char)ch; + } + return copycount; + } + /* for text streams, fall through to memory case */ + } case STREAM_TYPE_MEMORY: { int copycount = 0; @@ -778,6 +904,7 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) { /* Read len characters of 4 bytes each */ unsigned char *readbuffer = g_new0(unsigned char, 4 * len); + ensure_file_operation(str, filemode_Read); size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer); /* If there was an incomplete character */ if(count % 4 != 0) @@ -801,6 +928,7 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) } else /* Regular binary file */ { + ensure_file_operation(str, filemode_Read); size_t count = fread(buf, sizeof(char), len, str->file_pointer); str->read_count += count; return count; @@ -812,7 +940,7 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) int foo; for(foo = 0; foo < len; foo++) { - glsi32 ch = read_utf8_char_from_file(str->file_pointer); + glsi32 ch = read_utf8_char_from_file(str); if(ch == -1) break; str->read_count++; @@ -846,6 +974,24 @@ glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) switch(str->type) { + case STREAM_TYPE_RESOURCE: + { + int copycount = 0; + if(str->unicode) + { + while(copycount < len && str->buffer && str->mark < str->buflen) + { + glui32 ch; + if(str->binary) + ch = read_ucs4be_char_from_buffer(str); + else + ch = read_utf8_char_from_buffer(str); + buf[copycount++] = ch; + } + return copycount; + } + /* for text streams, fall through to memory case */ + } case STREAM_TYPE_MEMORY: { int copycount = 0; @@ -875,6 +1021,7 @@ glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) { /* Read len characters of 4 bytes each */ unsigned char *readbuffer = g_new0(unsigned char, 4 * len); + ensure_file_operation(str, filemode_Read); size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer); /* If there was an incomplete character */ if(count % 4 != 0) @@ -896,6 +1043,7 @@ glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) else /* Regular binary file */ { unsigned char *readbuffer = g_new0(unsigned char, len); + ensure_file_operation(str, filemode_Read); size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer); int foo; for(foo = 0; foo < count; foo++) @@ -911,7 +1059,7 @@ glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) int foo; for(foo = 0; foo < len; foo++) { - glsi32 ch = read_utf8_char_from_file(str->file_pointer); + glsi32 ch = read_utf8_char_from_file(str); if(ch == -1) break; str->read_count++; @@ -952,6 +1100,40 @@ glk_get_line_stream(strid_t str, char *buf, glui32 len) switch(str->type) { + case STREAM_TYPE_RESOURCE: + { + int copycount = 0; + if(str->unicode) + { + /* Do it character-by-character */ + while(copycount < len - 1 && str->buffer && str->mark < str->buflen) + { + glsi32 ch; + if(str->binary) + ch = read_ucs4be_char_from_buffer(str); + else + ch = read_utf8_char_from_buffer(str); + /* Check for Unicode newline; slightly different than + in file streams */ + if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029) + { + buf[copycount++] = '\n'; + break; + } + if(ch == 0x0D) + { + if(str->buffer[str->mark] == 0x0A) + str->mark++; /* skip past next newline */ + buf[copycount++] = '\n'; + break; + } + buf[copycount++] = (ch > 0xFF)? '?' : (char)ch; + } + buf[copycount] = '\0'; + return copycount; + } + /* for text streams, fall through to the memory case */ + } case STREAM_TYPE_MEMORY: { int copycount = 0; @@ -1002,14 +1184,14 @@ glk_get_line_stream(strid_t str, char *buf, glui32 len) int copycount; for(copycount = 0; copycount < len - 1; copycount++) { - glsi32 ch = read_ucs4be_char_from_file(str->file_pointer); + glsi32 ch = read_ucs4be_char_from_file(str); if(ch == -1) { buf[copycount] = '\0'; return copycount; } str->read_count++; - if(is_unicode_newline(ch, str->file_pointer, FALSE)) + if(is_unicode_newline(ch, str, FALSE)) { buf[copycount++] = '\n'; buf[copycount] = '\0'; @@ -1022,6 +1204,7 @@ glk_get_line_stream(strid_t str, char *buf, glui32 len) } else /* Regular binary file */ { + ensure_file_operation(str, filemode_Read); if( !fgets(buf, len, str->file_pointer) ) { *buf = 0; return 0; @@ -1038,14 +1221,14 @@ glk_get_line_stream(strid_t str, char *buf, glui32 len) int foo; for(foo = 0; foo < len - 1; foo++) { - glsi32 ch = read_utf8_char_from_file(str->file_pointer); + glsi32 ch = read_utf8_char_from_file(str); if(ch == -1) { buf[foo] = '\0'; return foo - 1; } str->read_count++; - if(is_unicode_newline(ch, str->file_pointer, TRUE)) + if(is_unicode_newline(ch, str, TRUE)) { buf[foo] = '\n'; buf[foo + 1] = '\0'; @@ -1088,6 +1271,40 @@ glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len) switch(str->type) { + case STREAM_TYPE_RESOURCE: + { + int copycount = 0; + if(str->unicode) + { + /* Do it character-by-character */ + while(copycount < len - 1 && str->buffer && str->mark < str->buflen) + { + glsi32 ch; + if(str->binary) + ch = read_ucs4be_char_from_buffer(str); + else + ch = read_utf8_char_from_buffer(str); + /* Check for Unicode newline; slightly different than + in file streams */ + if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029) + { + buf[copycount++] = '\n'; + break; + } + if(ch == 0x0D) + { + if(str->ubuffer[str->mark] == 0x0A) + str->mark++; /* skip past next newline */ + buf[copycount++] = '\n'; + break; + } + buf[copycount++] = ch; + } + buf[copycount] = '\0'; + return copycount; + } + /* for text streams, fall through to the memory case */ + } case STREAM_TYPE_MEMORY: { int copycount = 0; @@ -1144,14 +1361,14 @@ glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len) int copycount; for(copycount = 0; copycount < len - 1; copycount++) { - glsi32 ch = read_ucs4be_char_from_file(str->file_pointer); + glsi32 ch = read_ucs4be_char_from_file(str); if(ch == -1) { buf[copycount] = 0; return copycount; } str->read_count++; - if(is_unicode_newline(ch, str->file_pointer, FALSE)) + if(is_unicode_newline(ch, str, FALSE)) { buf[copycount++] = ch; /* Preserve newline types??? */ buf[copycount] = 0; @@ -1165,6 +1382,7 @@ glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len) else /* Regular binary file */ { gchar *readbuffer = g_new0(gchar, len); + ensure_file_operation(str, filemode_Read); if( !fgets(readbuffer, len, str->file_pointer) ) { *buf = 0; return 0; @@ -1184,14 +1402,14 @@ glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len) int foo; for(foo = 0; foo < len - 1; foo++) { - glsi32 ch = read_utf8_char_from_file(str->file_pointer); + glsi32 ch = read_utf8_char_from_file(str); if(ch == -1) { buf[foo] = 0; return foo - 1; } str->read_count++; - if(is_unicode_newline(ch, str->file_pointer, TRUE)) + if(is_unicode_newline(ch, str, TRUE)) { buf[foo] = ch; /* Preserve newline types??? */ buf[foo + 1] = 0; @@ -1255,6 +1473,7 @@ glk_stream_get_position(strid_t str) switch(str->type) { case STREAM_TYPE_MEMORY: + case STREAM_TYPE_RESOURCE: return str->mark; case STREAM_TYPE_FILE: return ftell(str->file_pointer); @@ -1304,12 +1523,13 @@ glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode) switch(str->type) { + case STREAM_TYPE_RESOURCE: case STREAM_TYPE_MEMORY: switch(seekmode) { case seekmode_Start: str->mark = pos; break; case seekmode_Current: str->mark += pos; break; - case seekmode_End: str->mark = str->buflen + pos; break; + case seekmode_End: str->mark = str->endmark + pos; break; default: g_return_if_reached(); return; @@ -1329,6 +1549,7 @@ glk_stream_set_position(strid_t str, glsi32 pos, glui32 seekmode) } if(fseek(str->file_pointer, pos, whence) == -1) WARNING("Seek failed on file stream"); + str->lastop = 0; /* Either reading or writing is legal after fseek() */ break; } case STREAM_TYPE_WINDOW: