From 8bf874cd9d56a5850bc474df37642170a4c20b28 Mon Sep 17 00:00:00 2001 From: Philip Chimento Date: Tue, 3 Mar 2009 23:21:53 +0000 Subject: [PATCH] Wrote all remaining Unicode input and output functions. Updated gestalt to support Unicode. Split off functions to convert from one character set to another and put them in charset.c. glk_put_char_uni(), glk_put_string_uni(), glk_put_buffer_uni(), glk_put_char_stream_uni(), glk_put_string_stream_uni(), glk_put_buffer_stream_uni(), glk_get_char_stream_uni(), glk_get_buffer_stream_uni(), glk_get_line_stream_uni() git-svn-id: http://lassie.dyndns-server.com/svn/gargoyle-gtk@29 ddfedd41-794f-dd11-ae45-00112f111e67 --- src/Makefile.am | 4 +- src/charset.c | 128 +++++++++++ src/charset.h | 18 ++ src/gestalt.c | 38 +++- src/input.c | 20 +- src/main.c | 2 +- src/model.c | 75 +++---- src/stream.c | 44 ++++ src/strio.c | 552 ++++++++++++++++++++++++++++++++++++++++-------- 9 files changed, 732 insertions(+), 149 deletions(-) create mode 100644 src/charset.c create mode 100644 src/charset.h diff --git a/src/Makefile.am b/src/Makefile.am index 4a90df4..4a3192b 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -24,6 +24,7 @@ lib_LTLIBRARIES = libchimara.la libchimara_la_SOURCES = \ abort.c abort.h \ case.c \ + charset.c charset.h \ chimara-glk.c chimara-glk.h chimara-glk-private.h \ event.c event.h \ fileref.c fileref.h \ @@ -33,7 +34,8 @@ libchimara_la_SOURCES = \ stream.c stream.h \ strio.c \ style.c \ - window.c window.h + window.c window.h + libchimara_la_LIBADD = $(CHIMARA_LIBS) libchimara_la_LDFLAGS = -no-undefined -export-symbols-regex "^chimara_glk_.*|^glk_.*" libchimara_includedir = $(includedir)/chimara/chimara diff --git a/src/charset.c b/src/charset.c new file mode 100644 index 0000000..c060d8c --- /dev/null +++ b/src/charset.c @@ -0,0 +1,128 @@ +#include "charset.h" +#include + +/* Internal function: change illegal (control) characters in a string to a +placeholder character. Must free returned string afterwards. */ +static gchar * +remove_latin1_control_characters(const unsigned char *s, const gsize len) +{ + /* If len == 0, then return an empty string, not NULL */ + if(len == 0) + return g_strdup(""); + + gchar *retval = g_new0(gchar, len); + int i; + for(i = 0; i < len; i++) + if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) ) + retval[i] = PLACEHOLDER; + else + retval[i] = s[i]; + return retval; +} + +/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing +Latin-1 control characters by a placeholder first. The UTF-8 string must be +freed afterwards. Returns NULL on error. */ +gchar * +convert_latin1_to_utf8(const gchar *s, const gsize len) +{ + GError *error = NULL; + gchar *canonical = remove_latin1_control_characters( (unsigned char *)s, + len); + gchar *retval = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error); + g_free(canonical); + + if(retval == NULL) + g_warning("Error during latin1->utf8 conversion: %s", error->message); + + return retval; +} + +/* Internal function: convert a Latin-1 string to a four-byte-per-character +big-endian string of gchars. The string must be freed afterwards. */ +gchar * +convert_latin1_to_ucs4be_string(const gchar *s, const gsize len) +{ + /* "UCS-4BE" is also a conversion type in g_convert()... but this may be more efficient */ + gchar *retval = g_new0(gchar, len * 4); + int i; + for(i = 0; i < len; i++) + retval[i * 4 + 3] = s[i]; + return retval; +} + +/* Internal function: convert a null-terminated UTF-8 string to a +null-terminated Latin-1 string, replacing characters that cannot be represented +in Latin-1 by a placeholder. If bytes_written is not NULL it will be filled with +the number of bytes returned, not counting the NULL terminator. The returned +string must be freed afterwards. Returns NULL on error. */ +gchar * +convert_utf8_to_latin1(const gchar *s, gsize *bytes_written) +{ + GError *error = NULL; + gchar *retval = g_convert_with_fallback(s, -1, "ISO-8859-1", "UTF-8", PLACEHOLDER_STRING, NULL, bytes_written, &error); + + if(retval == NULL) + g_warning("Error during utf8->latin1 conversion: %s", error->message); + + return retval; +} + +/* Internal function: convert a null-terminated UTF-8 string to a +null-terminated UCS4 string. If items_written is not NULL it will be filled with +the number of code points returned, not counting the terminator. The returned +string must be freed afterwards. Returns NULL on error. */ +gunichar * +convert_utf8_to_ucs4(const gchar *s, glong *items_written) +{ + gunichar *retval = g_utf8_to_ucs4_fast(s, -1, items_written); + + if(retval == NULL) + g_warning("Error during utf8->unicode conversion"); + + return retval; +} + +/* Internal function: Convert a Unicode buffer to a null-terminated UTF-8 +string. The returned string must be freed afterwards. Returns NULL on error. */ +gchar * +convert_ucs4_to_utf8(const gunichar *buf, const glong len) +{ + GError *error = NULL; + gchar *retval = g_ucs4_to_utf8(buf, len, NULL, NULL, &error); + + if(retval == NULL) + g_warning("Error during unicode->utf8 conversion: %s", error->message); + + return retval; +} + +/* Internal function: Convert a Unicode buffer to a Latin-1 string. Do not do +any character processing, just return values > 255 as the placeholder character. +The returned string must be freed afterwards.*/ +gchar * +convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len) +{ + gchar *retval = g_new0(gchar, len); + int foo; + for(foo = 0; foo < len; foo++) + retval[foo] = (buf[foo] > 255)? PLACEHOLDER : buf[foo]; + return retval; +} + +/* Internal function: convert a Unicode buffer to a four-byte-per-character +big-endian string of gchars. The string must be freed afterwards. */ +gchar * +convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len) +{ + gchar *retval = g_new0(gchar, len * 4); + int i; + for(i = 0; i < len; i++) + { + retval[i * 4] = buf[i] >> 24 ; + retval[i * 4 + 1] = buf[i] >> 16 & 0xFF; + retval[i * 4 + 2] = buf[i] >> 8 & 0xFF; + retval[i * 4 + 3] = buf[i] & 0xFF; + } + return retval; +} diff --git a/src/charset.h b/src/charset.h new file mode 100644 index 0000000..0a18295 --- /dev/null +++ b/src/charset.h @@ -0,0 +1,18 @@ +#ifndef CHARSET_H +#define CHARSET_H + +#include + +#define PLACEHOLDER '?' +#define PLACEHOLDER_STRING "?" +/* Our placeholder character is '?'; other options are possible, like printing "0x7F" or something */ + +gchar *convert_latin1_to_utf8(const gchar *s, const gsize len); +gchar *convert_latin1_to_ucs4be_string(const gchar *s, const gsize len); +gchar *convert_utf8_to_latin1(const gchar *s, gsize *bytes_written); +gunichar *convert_utf8_to_ucs4(const gchar *s, glong *items_written); +gchar *convert_ucs4_to_utf8(const gunichar *buf, const glong len); +gchar *convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len); +gchar *convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len); + +#endif /* CHARSET_H */ diff --git a/src/gestalt.c b/src/gestalt.c index 4a586ce..0c3d27b 100644 --- a/src/gestalt.c +++ b/src/gestalt.c @@ -71,17 +71,6 @@ glk_gestalt_ext(glui32 sel, glui32 val, glui32 *arr, glui32 arrlen) case gestalt_Version: return (MAJOR_VERSION << 16) + (MINOR_VERSION << 8) + SUB_VERSION; - /* Which characters can we print? */ - case gestalt_CharOutput: - /* All characters are printed as one character, in any case */ - if(arr && arrlen > 0) - *arr = 1; - /* Cannot print control chars except \n, or chars > 255 */ - if( (val < 32 && val != 10) || (val >= 127 && val <= 159) || (val > 255) ) - return gestalt_CharOutput_CannotPrint; - /* Can print all other Latin-1 characters */ - return gestalt_CharOutput_ExactPrint; - /* Which characters can the player type in line input? */ case gestalt_LineInput: /* Does not accept control chars */ @@ -95,7 +84,34 @@ glk_gestalt_ext(glui32 sel, glui32 val, glui32 *arr, glui32 arrlen) if( val < 32 || (val >= 127 && val <= 159) || val == keycode_Unknown ) return 0; return 1; + + /* Which characters can we print? */ + case gestalt_CharOutput: + /* All characters are printed as one character, in any case */ + if(arr && arrlen > 0) + *arr = 1; + /* Cannot print control chars except \n, or chars > 255 */ + if( (val < 32 && val != 10) || (val >= 127 && val <= 159) || (val > 255) ) + return gestalt_CharOutput_CannotPrint; + /* Can print all other Latin-1 characters */ + return gestalt_CharOutput_ExactPrint; + + /* Unicode capabilities present */ + case gestalt_Unicode: + return 1; + /* Unsupported capabilities */ + case gestalt_MouseInput: + case gestalt_Timer: + case gestalt_Graphics: + case gestalt_DrawImage: + case gestalt_Sound: + case gestalt_SoundVolume: + case gestalt_SoundNotify: + case gestalt_Hyperlinks: + case gestalt_HyperlinkInput: + case gestalt_SoundMusic: + case gestalt_GraphicsTransparency: /* Selector not supported */ default: return 0; diff --git a/src/input.c b/src/input.c index 7eebfa5..8956c2c 100644 --- a/src/input.c +++ b/src/input.c @@ -1,3 +1,4 @@ +#include "charset.h" #include "input.h" /** glk_request_char_event: @@ -203,14 +204,9 @@ glk_request_line_event_uni(winid_t win, glui32 *buf, glui32 maxlen, glui32 initl gchar *utf8; if(initlen > 0) { - GError *error = NULL; - utf8 = g_ucs4_to_utf8(buf, initlen, NULL, NULL, &error); - + utf8 = convert_ucs4_to_utf8(buf, initlen); if(utf8 == NULL) - { - g_warning("Error during unicode->utf8 conversion: %s", error->message); return; - } } else utf8 = g_strdup(""); @@ -325,14 +321,11 @@ end_line_input_request(winid_t win, const gchar *inserted_text) /* Convert the string from UTF-8 to Latin-1 or Unicode */ if(win->input_request_type == INPUT_REQUEST_LINE) { - GError *error = NULL; - gchar *latin1; gsize bytes_written; - latin1 = g_convert_with_fallback(inserted_text, -1, "ISO-8859-1", "UTF-8", "?", NULL, &bytes_written, &error); + gchar *latin1 = convert_utf8_to_latin1(inserted_text, &bytes_written); if(latin1 == NULL) { - g_warning("Error during utf8->latin1 conversion: %s", error->message); event_throw(evtype_LineInput, win, 0, 0); return; } @@ -349,21 +342,18 @@ end_line_input_request(winid_t win, const gchar *inserted_text) } else if(win->input_request_type == INPUT_REQUEST_LINE_UNICODE) { - gunichar *unicode; glong items_written; - unicode = g_utf8_to_ucs4_fast(inserted_text, -1, &items_written); + gunichar *unicode = convert_utf8_to_ucs4(inserted_text, &items_written); if(unicode == NULL) { - g_warning("Error during utf8->unicode conversion"); event_throw(evtype_LineInput, win, 0, 0); return; } /* Place input in the echo stream */ - /* TODO: glk_put_string_stream_uni not implemented yet if(win->echo_stream != NULL) - glk_put_string_stream_uni(window->echo_stream, unicode);*/ + glk_put_string_stream_uni(win->echo_stream, unicode); /* Copy the string (but not the NULL at the end) */ int copycount = MIN(win->line_input_buffer_max_len, items_written); diff --git a/src/main.c b/src/main.c index 324e49e..6398348 100644 --- a/src/main.c +++ b/src/main.c @@ -115,7 +115,7 @@ main(int argc, char *argv[]) g_object_unref( G_OBJECT(builder) ); - if( !chimara_glk_run(CHIMARA_GLK(glk), ".libs/first.so", &error) ) { + if( !chimara_glk_run(CHIMARA_GLK(glk), ".libs/model.so", &error) ) { error_dialog(GTK_WINDOW(window), error, "Error starting Glk library: "); return 1; } diff --git a/src/model.c b/src/model.c index 3823df8..4cf2609 100644 --- a/src/model.c +++ b/src/model.c @@ -17,59 +17,60 @@ void glk_main(void) nothing we can do without it, so exit. */ return; } - -/* char buffer[256]; + glui32 buffer[1024]; int i; - for(i = 0; i < 256; i++) - buffer[i] = (char)glk_char_to_upper(i); + for(i = 0; i < 512; i++) { + buffer[i * 2] = i + 33; + buffer[i * 2 + 1] = 32; + } - frefid_t f = glk_fileref_create_temp(fileusage_BinaryMode, 0); +/* frefid_t f = glk_fileref_create_temp(fileusage_BinaryMode, 0); if(f) { - - strid_t s = glk_stream_open_file(f, - filemode_ReadWrite, 0); - glk_stream_set_current(s); - - glk_put_char('X'); - glk_put_string("Philip en Marijn zijn vet goed.\n"); - glk_put_buffer(buffer, 256); - - glk_stream_set_position(s, 0, seekmode_Start); - glk_set_window(mainwin); - glk_put_char( glk_get_char_stream(s) ); - glk_put_char('\n'); - g_printerr("Line read: %d\n", glk_get_line_stream(s, buffer, 256)); - glk_put_string(buffer); - int count = glk_get_buffer_stream(s, buffer, 256); - g_printerr("Buffer read: %d\n", count); - glk_put_buffer(buffer, count); - - stream_result_t result; - glk_stream_close(s, &result); - - g_printerr("Read count: %d\nWrite count: %d\n", result.readcount, - result.writecount); - glk_fileref_destroy(f); - } - */ + strid_t s = glk_stream_open_file(f, filemode_ReadWrite, 0);*/ + glui32 membuf[512]; + strid_t s = glk_stream_open_memory_uni(membuf, 512, filemode_ReadWrite, 0); + glk_stream_set_current(s); + + glk_put_char_uni('X'); + glk_put_string("Philip en Marijn zijn vet goed.\n"); + glk_put_buffer_uni(buffer, 1024); - glk_set_window(mainwin); + glk_stream_set_position(s, 0, seekmode_Start); + glk_set_window(mainwin); + glk_put_char_uni( glk_get_char_stream_uni(s) ); + glk_put_char('\n'); + g_printerr( "Line read: %d\n", glk_get_line_stream_uni(s, buffer, 1024) ); + g_printerr("string[5] = %X\n", buffer[5]); + glk_put_string_uni(buffer); + int count = glk_get_buffer_stream_uni(s, buffer, 1024); + g_printerr("Buffer read: %d\n", count); + glk_put_string("\n---SOME CHARACTERS---\n"); + glk_put_buffer_uni(buffer, count); + glk_put_string("\n---THE SAME CHARACTERS IN UPPERCASE---\n"); + int newcount = glk_buffer_to_upper_case_uni(buffer, 1024, 1024); + glk_put_buffer_uni(buffer, newcount); + + stream_result_t result; + glk_stream_close(s, &result); + + g_printerr("Read count: %d\nWrite count: %d\n", result.readcount, result.writecount); +/* glk_fileref_destroy(f); + }*/ glk_set_interrupt_handler(&sayit); - gchar buffer[256]; event_t ev; while(1) { - glk_put_string("prompt> "); - glk_request_line_event(mainwin, buffer, 256, 0); + glk_put_string("\nprompt> "); + glk_request_line_event_uni(mainwin, buffer, 1024, 0); glk_select(&ev); switch(ev.type) { default: printf("Received event:\n"); printf("Type: %d\n", ev.type); - printf("Win: %d\n", glk_window_get_rock(ev.win)); + printf("Win: %d\n", glk_window_get_rock(ev.win) ); printf("Var1: %d\n", ev.val1); printf("Var2: %d\n", ev.val2); } diff --git a/src/stream.c b/src/stream.c index e51d1f1..7e61dc9 100644 --- a/src/stream.c +++ b/src/stream.c @@ -118,6 +118,20 @@ glk_put_char(unsigned char ch) glk_put_char_stream(glk_data->current_stream, ch); } +/** + * glk_put_char_uni: + * @ch: A Unicode code point. + * + * Prints one character to the current stream. The character is assumed to be a + * Unicode code point. + */ +void +glk_put_char_uni(glui32 ch) +{ + g_return_if_fail(glk_data->current_stream != NULL); + glk_put_char_stream_uni(glk_data->current_stream, ch); +} + /** * glk_put_string: * @s: A null-terminated string in Latin-1 encoding. @@ -137,6 +151,21 @@ glk_put_string(char *s) glk_put_string_stream(glk_data->current_stream, s); } +/** + * glk_put_string_uni: + * @s: A zero-terminated string of Unicode code points. + * + * Prints a string of Unicode characters to the current stream. It is equivalent + * to a series of glk_put_char_uni() calls. A string ends on a #glui32 whose + * value is 0. + */ +void +glk_put_string_uni(glui32 *s) +{ + g_return_if_fail(glk_data->current_stream != NULL); + glk_put_string_stream_uni(glk_data->current_stream, s); +} + /** * glk_put_buffer: * @buf: An array of characters in Latin-1 encoding. @@ -157,6 +186,21 @@ glk_put_buffer(char *buf, glui32 len) glk_put_buffer_stream(glk_data->current_stream, buf, len); } +/** + * glk_put_buffer_uni: + * @buf: An array of Unicode code points. + * @len: Length of @buf. + * + * Prints a block of Unicode characters to the current stream. It is equivalent + * to a series of glk_put_char_uni() calls. + */ +void +glk_put_buffer_uni(glui32 *buf, glui32 len) +{ + g_return_if_fail(glk_data->current_stream != NULL); + glk_put_buffer_stream_uni(glk_data->current_stream, buf, len); +} + /** * glk_stream_open_memory: * @buf: An allocated buffer, or %NULL. diff --git a/src/strio.c b/src/strio.c index 974a109..98140ad 100644 --- a/src/strio.c +++ b/src/strio.c @@ -1,3 +1,4 @@ +#include "charset.h" #include "stream.h" #include #include @@ -10,49 +11,6 @@ * */ -/* Internal function: change illegal (control) characters in a string to a -placeholder character. Must free returned string afterwards. */ -static gchar * -remove_latin1_control_characters(unsigned char *s, gsize len) -{ - /* If len == 0, then return an empty string, not NULL */ - if(len == 0) - return g_strdup(""); - - gchar *retval = g_new0(gchar, len); - int i; - for(i = 0; i < len; i++) - if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) ) - retval[i] = '?'; - /* Our placeholder character is '?'; other options are possible, - like printing "0x7F" or something */ - else - retval[i] = s[i]; - return retval; -} - -/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing -Latin-1 control characters by a placeholder first. The UTF-8 string must be -freed afterwards. Returns NULL on error. */ -static gchar * -convert_latin1_to_utf8(gchar *s, gsize len) -{ - GError *error = NULL; - gchar *utf8; - gchar *canonical = remove_latin1_control_characters( (unsigned char *)s, - len); - utf8 = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error); - g_free(canonical); - - if(utf8 == NULL) - { - g_warning("Error during latin1->utf8 conversion: %s", error->message); - return NULL; - } - - return utf8; -} - /* Internal function: write a UTF-8 string to a text grid window's text buffer. */ static void write_utf8_to_grid(winid_t win, gchar *s) @@ -131,7 +89,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) case wintype_TextGrid: { gchar *utf8 = convert_latin1_to_utf8(buf, len); - if(utf8) + if(utf8 != NULL) { /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */ write_utf8_to_grid(str->window, utf8); @@ -145,7 +103,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) case wintype_TextBuffer: { gchar *utf8 = convert_latin1_to_utf8(buf, len); - if(utf8) + if(utf8 != NULL) { write_utf8_to_window(str->window, utf8); g_free(utf8); @@ -185,13 +143,9 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) { if(str->unicode) { - /* Convert to four-byte big-endian */ - gchar *writebuffer = g_new0(gchar, len * 4); - int i; - for(i = 0; i < len; i++) - writebuffer[i * 4 + 3] = buf[i]; - fwrite(writebuffer, sizeof(gchar), len * 4, - str->file_pointer); + gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len); + fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer); + g_free(writebuffer); } else /* Regular file */ { @@ -201,8 +155,116 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) else /* Text mode is the same for Unicode and regular files */ { gchar *utf8 = convert_latin1_to_utf8(buf, len); - g_fprintf(str->file_pointer, "%s", utf8); - g_free(utf8); + if(utf8 != NULL) + { + g_fprintf(str->file_pointer, "%s", utf8); + g_free(utf8); + } + } + + str->write_count += len; + break; + default: + g_warning("%s: Writing to this kind of stream unsupported.", __func__); + } +} + +/* Internal function: write a Unicode buffer with length to a stream. */ +static void +write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len) +{ + switch(str->type) + { + case STREAM_TYPE_WINDOW: + /* Each window type has a different way of printing to it */ + switch(str->window->type) + { + /* Printing to these windows' streams does nothing */ + case wintype_Blank: + case wintype_Pair: + case wintype_Graphics: + str->write_count += len; + break; + + /* Text grid window */ + case wintype_TextGrid: + { + gchar *utf8 = convert_ucs4_to_utf8(buf, len); + if(utf8 != NULL) + { + /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */ + write_utf8_to_grid(str->window, utf8); + g_free(utf8); + } + } + str->write_count += len; + break; + + /* Text buffer window */ + case wintype_TextBuffer: + { + gchar *utf8 = convert_ucs4_to_utf8(buf, len); + if(utf8 != NULL) + { + write_utf8_to_window(str->window, utf8); + g_free(utf8); + } + } + str->write_count += len; + break; + default: + g_warning("%s: Writing to this kind of window unsupported.", __func__); + } + + /* Now write the same buffer to the window's echo stream */ + if(str->window->echo_stream != NULL) + write_buffer_to_stream_uni(str->window->echo_stream, buf, len); + + break; + + case STREAM_TYPE_MEMORY: + if(str->unicode && str->ubuffer) + { + int copycount = MIN(len, str->buflen - str->mark); + memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32)); + str->mark += copycount; + } + if(!str->unicode && str->buffer) + { + gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len); + int copycount = MIN(len, str->buflen - str->mark); + memmove(str->buffer + str->mark, latin1, copycount); + g_free(latin1); + str->mark += copycount; + } + + str->write_count += len; + break; + + case STREAM_TYPE_FILE: + if(str->binary) + { + if(str->unicode) + { + gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len); + fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer); + g_free(writebuffer); + } + else /* Regular file */ + { + gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len); + fwrite(latin1, sizeof(gchar), len, str->file_pointer); + g_free(latin1); + } + } + else /* Text mode is the same for Unicode and regular files */ + { + gchar *utf8 = convert_ucs4_to_utf8(buf, len); + if(utf8 != NULL) + { + g_fprintf(str->file_pointer, "%s", utf8); + g_free(utf8); + } } str->write_count += len; @@ -229,6 +291,23 @@ glk_put_char_stream(strid_t str, unsigned char ch) write_buffer_to_stream(str, (gchar *)&ch, 1); } +/** + * glk_put_char_stream_uni: + * @str: An output stream. + * @ch: A Unicode code point. + * + * Prints one character @ch to the stream @str. It is illegal for @str to be + * %NULL, or an input-only stream. + */ +void +glk_put_char_stream_uni(strid_t str, glui32 ch) +{ + g_return_if_fail(str != NULL); + g_return_if_fail(str->file_mode != filemode_Read); + + write_buffer_to_stream_uni(str, &ch, 1); +} + /** * glk_put_string_stream: * @str: An output stream. @@ -243,7 +322,29 @@ glk_put_string_stream(strid_t str, char *s) g_return_if_fail(str != NULL); g_return_if_fail(str->file_mode != filemode_Read); - write_buffer_to_stream(str, (gchar *)s, strlen(s)); + write_buffer_to_stream(str, s, strlen(s)); +} + +/** + * glk_put_string_stream_uni: + * @str: An output stream. + * @s: A null-terminated array of Unicode code points. + * + * Prints @s to the stream @str. It is illegal for @str to be %NULL, or an + * input-only stream. + */ +void +glk_put_string_stream_uni(strid_t str, glui32 *s) +{ + g_return_if_fail(str != NULL); + g_return_if_fail(str->file_mode != filemode_Read); + + /* An impromptu strlen() for glui32 arrays */ + glong len = 0; + glui32 *ptr = s; + while(*ptr++) + len++; + write_buffer_to_stream_uni(str, s, len); } /** @@ -261,7 +362,25 @@ glk_put_buffer_stream(strid_t str, char *buf, glui32 len) g_return_if_fail(str != NULL); g_return_if_fail(str->file_mode != filemode_Read); - write_buffer_to_stream(str, (gchar *)buf, len); + write_buffer_to_stream(str, buf, len); +} + +/** + * glk_put_buffer_stream_uni: + * @str: An output stream. + * @buf: An array of Unicode code points. + * @len: Length of @buf. + * + * Prints @buf to the stream @str. It is illegal for @str to be %NULL, or an + * input-only stream. + */ +void +glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) +{ + g_return_if_fail(str != NULL); + g_return_if_fail(str->file_mode != filemode_Read); + + write_buffer_to_stream_uni(str, buf, len); } /* @@ -328,31 +447,12 @@ is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8) return FALSE; } -/** - * glk_get_char_stream: - * @str: An input stream. - * - * Reads one character from the stream @str. (There is no notion of a ``current - * input stream.'') It is illegal for @str to be %NULL, or an output-only - * stream. - * - * The result will be between 0 and 255. As with all basic text functions, Glk - * assumes the Latin-1 encoding. If the end of the stream has been reached, the - * result will be -1. Note that high-bit characters (128..255) are - * not returned as negative numbers. - * - * If the stream contains Unicode data --- for example, if it was created with - * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then - * characters beyond 255 will be returned as 0x3F ("?"). - * - * Returns: A character value between 0 and 255, or -1 on end of stream. - */ +/* Internal function: Read one character from a stream. Returns a value which + can be returned unchanged by glk_get_char_stream_uni(), but + glk_get_char_stream() must replace high values by the placeholder character. */ glsi32 -glk_get_char_stream(strid_t str) +get_char_stream_common(strid_t str) { - g_return_val_if_fail(str != NULL, -1); - g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1); - switch(str->type) { case STREAM_TYPE_MEMORY: @@ -362,13 +462,13 @@ glk_get_char_stream(strid_t str) return -1; glui32 ch = str->ubuffer[str->mark++]; str->read_count++; - return (ch > 0xFF)? 0x3F : ch; + return ch; } else { if(!str->buffer || str->mark >= str->buflen) return -1; - char ch = str->buffer[str->mark++]; + unsigned char ch = str->buffer[str->mark++]; str->read_count++; return ch; } @@ -383,7 +483,7 @@ glk_get_char_stream(strid_t str) if(ch == -1) return -1; str->read_count++; - return (ch > 0xFF)? 0x3F : ch; + return ch; } else /* Regular file */ { @@ -402,7 +502,7 @@ glk_get_char_stream(strid_t str) return -1; str->read_count++; - return (ch > 0xFF)? 0x3F : ch; + return ch; } default: g_warning("%s: Reading from this kind of stream unsupported.", __func__); @@ -410,6 +510,53 @@ glk_get_char_stream(strid_t str) } } +/** + * glk_get_char_stream: + * @str: An input stream. + * + * Reads one character from the stream @str. (There is no notion of a ``current + * input stream.'') It is illegal for @str to be %NULL, or an output-only + * stream. + * + * The result will be between 0 and 255. As with all basic text functions, Glk + * assumes the Latin-1 encoding. If the end of the stream has been reached, the + * result will be -1. Note that high-bit characters (128..255) are + * not returned as negative numbers. + * + * If the stream contains Unicode data --- for example, if it was created with + * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then + * characters beyond 255 will be returned as 0x3F ("?"). + * + * Returns: A character value between 0 and 255, or -1 on end of stream. + */ +glsi32 +glk_get_char_stream(strid_t str) +{ + g_return_val_if_fail(str != NULL, -1); + g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1); + + glsi32 ch = get_char_stream_common(str); + return (ch > 0xFF)? PLACEHOLDER : ch; +} + +/** + * glk_get_char_stream_uni: + * @str: An input stream. + * + * Reads one character from the stream @str. The result will be between 0 and + * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1. + * + * Returns: A character value between 0 and 255, or -1 on end of stream. + */ +glsi32 +glk_get_char_stream_uni(strid_t str) +{ + g_return_val_if_fail(str != NULL, -1); + g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1); + + return get_char_stream_common(str); +} + /** * glk_get_buffer_stream: * @str: An input stream. @@ -467,7 +614,6 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) g_warning("%s: Incomplete character in binary Unicode file.", __func__); } - str->read_count += count / 4; int foo; for(foo = 0; foo < count; foo += 4) { @@ -478,6 +624,7 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) buf[foo / 4] = (ch > 255)? 0x3F : (char)ch; } g_free(readbuffer); + str->read_count += count / 4; return count / 4; } else /* Regular binary file */ @@ -507,6 +654,105 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) } } +/** + * glk_get_buffer_stream_uni: + * @str: An input stream. + * @buf: A buffer with space for at least @len Unicode code points. + * @len: The number of characters to read. + * + * Reads @len Unicode characters from @str, unless the end of stream is reached + * first. No terminal null is placed in the buffer. + * + * Returns: The number of Unicode characters actually read. + */ +glui32 +glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) +{ + g_return_val_if_fail(str != NULL, 0); + g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0); + g_return_val_if_fail(buf != NULL, 0); + + switch(str->type) + { + case STREAM_TYPE_MEMORY: + { + int copycount = 0; + if(str->unicode) + { + if(str->ubuffer) /* if not, copycount stays 0 */ + copycount = MIN(len, str->buflen - str->mark); + memmove(buf, str->ubuffer + str->mark, copycount * 4); + str->mark += copycount; + } + else + { + while(copycount < len && str->buffer && str->mark < str->buflen) + { + unsigned char ch = str->buffer[str->mark++]; + buf[copycount++] = ch; + } + } + + str->read_count += copycount; + return copycount; + } + case STREAM_TYPE_FILE: + if(str->binary) + { + if(str->unicode) /* Binary file with 4-byte characters */ + { + /* Read len characters of 4 bytes each */ + unsigned char *readbuffer = g_new0(unsigned char, 4 * len); + size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer); + /* If there was an incomplete character */ + if(count % 4 != 0) + { + count -= count % 4; + g_warning("%s: Incomplete character in binary Unicode file.", __func__); + } + + int foo; + for(foo = 0; foo < count; foo += 4) + buf[foo / 4] = readbuffer[foo] << 24 + | readbuffer[foo + 1] << 16 + | readbuffer[foo + 2] << 8 + | readbuffer[foo + 3]; + g_free(readbuffer); + str->read_count += count / 4; + return count / 4; + } + else /* Regular binary file */ + { + unsigned char *readbuffer = g_new0(unsigned char, len); + size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer); + int foo; + for(foo = 0; foo < count; foo++) + buf[foo] = readbuffer[foo]; + g_free(readbuffer); + str->read_count += count; + return count; + } + } + else /* Text mode is the same for Unicode and regular files */ + { + /* Do it character-by-character */ + int foo; + for(foo = 0; foo < len; foo++) + { + glsi32 ch = read_utf8_char_from_file(str->file_pointer); + if(ch == -1) + break; + str->read_count++; + buf[foo] = ch; + } + return foo; + } + default: + g_warning("%s: Reading from this kind of stream unsupported.", __func__); + return 0; + } +} + /** * glk_get_line_stream: * @str: An input stream. @@ -638,6 +884,144 @@ glk_get_line_stream(strid_t str, char *buf, glui32 len) } } +/** + * glk_get_line_stream_uni: + * @str: An input stream. + * @buf: A buffer with space for at least @len Unicode code points. + * @len: The number of characters to read, plus one. + * + * Reads Unicode characters from @str, until either @len - 1 Unicode characters + * have been read or a newline has been read. It then puts a terminal null (a + * zero value) on the end. + * + * Returns: The number of characters actually read, including the newline (if + * there is one) but not including the terminal null. + */ +glui32 +glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len) +{ + g_return_val_if_fail(str != NULL, 0); + g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0); + g_return_val_if_fail(buf != NULL, 0); + + switch(str->type) + { + case STREAM_TYPE_MEMORY: + { + int copycount = 0; + if(str->unicode) + { + /* Do it character-by-character */ + while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) + { + glui32 ch = str->ubuffer[str->mark++]; + /* Check for Unicode newline; slightly different than + in file streams */ + if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029) + { + buf[copycount++] = '\n'; + break; + } + if(ch == 0x0D) + { + if(str->ubuffer[str->mark] == 0x0A) + str->mark++; /* skip past next newline */ + buf[copycount++] = '\n'; + break; + } + buf[copycount++] = ch; + } + buf[copycount] = '\0'; + } + else + { + /* No recourse to memccpy(), so do it character-by-character */ + while(copycount < len - 1 && str->buffer && str->mark < str->buflen) + { + gchar ch = str->buffer[str->mark++]; + /* Check for newline */ + if(ch == '\n') /* Also check for \r and \r\n? */ + { + buf[copycount++] = '\n'; + break; + } + buf[copycount++] = (unsigned char)ch; + } + buf[copycount] = 0; + } + + str->read_count += copycount; + return copycount; + } + case STREAM_TYPE_FILE: + if(str->binary) + { + if(str->unicode) /* Binary file with 4-byte characters */ + { + /* Do it character-by-character */ + int foo; + for(foo = 0; foo < len - 1; foo++) + { + glsi32 ch = read_ucs4be_char_from_file(str->file_pointer); + if(ch == -1) + { + buf[foo] = 0; + return foo - 1; + } + str->read_count++; + if(is_unicode_newline(ch, str->file_pointer, FALSE)) + { + buf[foo] = ch; /* Preserve newline types??? */ + buf[foo + 1] = 0; + return foo; + } + buf[foo] = ch; + } + buf[len] = 0; + return foo; + } + else /* Regular binary file */ + { + gchar *readbuffer = g_new0(gchar, len); + fgets(readbuffer, len, str->file_pointer); + glui32 count = strlen(readbuffer) + 1; /* Copy terminator */ + int foo; + for(foo = 0; foo < count; foo++) + buf[foo] = (unsigned char)(readbuffer[foo]); + str->read_count += count; + return count; + } + } + else /* Text mode is the same for Unicode and regular files */ + { + /* Do it character-by-character */ + int foo; + for(foo = 0; foo < len - 1; foo++) + { + glsi32 ch = read_utf8_char_from_file(str->file_pointer); + if(ch == -1) + { + buf[foo] = 0; + return foo - 1; + } + str->read_count++; + if(is_unicode_newline(ch, str->file_pointer, TRUE)) + { + buf[foo] = ch; /* Preserve newline types??? */ + buf[foo + 1] = 0; + return foo; + } + buf[foo] = ch; + } + buf[len] = 0; + return foo; + } + default: + g_warning("%s: Reading from this kind of stream unsupported.", __func__); + return 0; + } +} + /* * **************** SEEKING FUNCTIONS ******************************************** -- 2.30.2