From: fliep Date: Tue, 3 Mar 2009 23:21:53 +0000 (+0000) Subject: Wrote all remaining Unicode input and output functions. X-Git-Tag: v0.9~436 X-Git-Url: https://git.stderr.nl/gitweb?a=commitdiff_plain;h=13c37314da325b696a8aca3ffce36b144a5559d1;p=projects%2Fchimara%2Fchimara.git Wrote all remaining Unicode input and output functions. Updated gestalt to support Unicode. Split off functions to convert from one character set to another and put them in charset.c. glk_put_char_uni(), glk_put_string_uni(), glk_put_buffer_uni(), glk_put_char_stream_uni(), glk_put_string_stream_uni(), glk_put_buffer_stream_uni(), glk_get_char_stream_uni(), glk_get_buffer_stream_uni(), glk_get_line_stream_uni() --- diff --git a/src/Makefile.am b/src/Makefile.am index 4a90df4..4a3192b 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -24,6 +24,7 @@ lib_LTLIBRARIES = libchimara.la libchimara_la_SOURCES = \ abort.c abort.h \ case.c \ + charset.c charset.h \ chimara-glk.c chimara-glk.h chimara-glk-private.h \ event.c event.h \ fileref.c fileref.h \ @@ -33,7 +34,8 @@ libchimara_la_SOURCES = \ stream.c stream.h \ strio.c \ style.c \ - window.c window.h + window.c window.h + libchimara_la_LIBADD = $(CHIMARA_LIBS) libchimara_la_LDFLAGS = -no-undefined -export-symbols-regex "^chimara_glk_.*|^glk_.*" libchimara_includedir = $(includedir)/chimara/chimara diff --git a/src/charset.c b/src/charset.c new file mode 100644 index 0000000..c060d8c --- /dev/null +++ b/src/charset.c @@ -0,0 +1,128 @@ +#include "charset.h" +#include + +/* Internal function: change illegal (control) characters in a string to a +placeholder character. Must free returned string afterwards. */ +static gchar * +remove_latin1_control_characters(const unsigned char *s, const gsize len) +{ + /* If len == 0, then return an empty string, not NULL */ + if(len == 0) + return g_strdup(""); + + gchar *retval = g_new0(gchar, len); + int i; + for(i = 0; i < len; i++) + if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) ) + retval[i] = PLACEHOLDER; + else + retval[i] = s[i]; + return retval; +} + +/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing +Latin-1 control characters by a placeholder first. The UTF-8 string must be +freed afterwards. Returns NULL on error. */ +gchar * +convert_latin1_to_utf8(const gchar *s, const gsize len) +{ + GError *error = NULL; + gchar *canonical = remove_latin1_control_characters( (unsigned char *)s, + len); + gchar *retval = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error); + g_free(canonical); + + if(retval == NULL) + g_warning("Error during latin1->utf8 conversion: %s", error->message); + + return retval; +} + +/* Internal function: convert a Latin-1 string to a four-byte-per-character +big-endian string of gchars. The string must be freed afterwards. */ +gchar * +convert_latin1_to_ucs4be_string(const gchar *s, const gsize len) +{ + /* "UCS-4BE" is also a conversion type in g_convert()... but this may be more efficient */ + gchar *retval = g_new0(gchar, len * 4); + int i; + for(i = 0; i < len; i++) + retval[i * 4 + 3] = s[i]; + return retval; +} + +/* Internal function: convert a null-terminated UTF-8 string to a +null-terminated Latin-1 string, replacing characters that cannot be represented +in Latin-1 by a placeholder. If bytes_written is not NULL it will be filled with +the number of bytes returned, not counting the NULL terminator. The returned +string must be freed afterwards. Returns NULL on error. */ +gchar * +convert_utf8_to_latin1(const gchar *s, gsize *bytes_written) +{ + GError *error = NULL; + gchar *retval = g_convert_with_fallback(s, -1, "ISO-8859-1", "UTF-8", PLACEHOLDER_STRING, NULL, bytes_written, &error); + + if(retval == NULL) + g_warning("Error during utf8->latin1 conversion: %s", error->message); + + return retval; +} + +/* Internal function: convert a null-terminated UTF-8 string to a +null-terminated UCS4 string. If items_written is not NULL it will be filled with +the number of code points returned, not counting the terminator. The returned +string must be freed afterwards. Returns NULL on error. */ +gunichar * +convert_utf8_to_ucs4(const gchar *s, glong *items_written) +{ + gunichar *retval = g_utf8_to_ucs4_fast(s, -1, items_written); + + if(retval == NULL) + g_warning("Error during utf8->unicode conversion"); + + return retval; +} + +/* Internal function: Convert a Unicode buffer to a null-terminated UTF-8 +string. The returned string must be freed afterwards. Returns NULL on error. */ +gchar * +convert_ucs4_to_utf8(const gunichar *buf, const glong len) +{ + GError *error = NULL; + gchar *retval = g_ucs4_to_utf8(buf, len, NULL, NULL, &error); + + if(retval == NULL) + g_warning("Error during unicode->utf8 conversion: %s", error->message); + + return retval; +} + +/* Internal function: Convert a Unicode buffer to a Latin-1 string. Do not do +any character processing, just return values > 255 as the placeholder character. +The returned string must be freed afterwards.*/ +gchar * +convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len) +{ + gchar *retval = g_new0(gchar, len); + int foo; + for(foo = 0; foo < len; foo++) + retval[foo] = (buf[foo] > 255)? PLACEHOLDER : buf[foo]; + return retval; +} + +/* Internal function: convert a Unicode buffer to a four-byte-per-character +big-endian string of gchars. The string must be freed afterwards. */ +gchar * +convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len) +{ + gchar *retval = g_new0(gchar, len * 4); + int i; + for(i = 0; i < len; i++) + { + retval[i * 4] = buf[i] >> 24 ; + retval[i * 4 + 1] = buf[i] >> 16 & 0xFF; + retval[i * 4 + 2] = buf[i] >> 8 & 0xFF; + retval[i * 4 + 3] = buf[i] & 0xFF; + } + return retval; +} diff --git a/src/charset.h b/src/charset.h new file mode 100644 index 0000000..0a18295 --- /dev/null +++ b/src/charset.h @@ -0,0 +1,18 @@ +#ifndef CHARSET_H +#define CHARSET_H + +#include + +#define PLACEHOLDER '?' +#define PLACEHOLDER_STRING "?" +/* Our placeholder character is '?'; other options are possible, like printing "0x7F" or something */ + +gchar *convert_latin1_to_utf8(const gchar *s, const gsize len); +gchar *convert_latin1_to_ucs4be_string(const gchar *s, const gsize len); +gchar *convert_utf8_to_latin1(const gchar *s, gsize *bytes_written); +gunichar *convert_utf8_to_ucs4(const gchar *s, glong *items_written); +gchar *convert_ucs4_to_utf8(const gunichar *buf, const glong len); +gchar *convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len); +gchar *convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len); + +#endif /* CHARSET_H */ diff --git a/src/gestalt.c b/src/gestalt.c index 4a586ce..0c3d27b 100644 --- a/src/gestalt.c +++ b/src/gestalt.c @@ -71,17 +71,6 @@ glk_gestalt_ext(glui32 sel, glui32 val, glui32 *arr, glui32 arrlen) case gestalt_Version: return (MAJOR_VERSION << 16) + (MINOR_VERSION << 8) + SUB_VERSION; - /* Which characters can we print? */ - case gestalt_CharOutput: - /* All characters are printed as one character, in any case */ - if(arr && arrlen > 0) - *arr = 1; - /* Cannot print control chars except \n, or chars > 255 */ - if( (val < 32 && val != 10) || (val >= 127 && val <= 159) || (val > 255) ) - return gestalt_CharOutput_CannotPrint; - /* Can print all other Latin-1 characters */ - return gestalt_CharOutput_ExactPrint; - /* Which characters can the player type in line input? */ case gestalt_LineInput: /* Does not accept control chars */ @@ -95,7 +84,34 @@ glk_gestalt_ext(glui32 sel, glui32 val, glui32 *arr, glui32 arrlen) if( val < 32 || (val >= 127 && val <= 159) || val == keycode_Unknown ) return 0; return 1; + + /* Which characters can we print? */ + case gestalt_CharOutput: + /* All characters are printed as one character, in any case */ + if(arr && arrlen > 0) + *arr = 1; + /* Cannot print control chars except \n, or chars > 255 */ + if( (val < 32 && val != 10) || (val >= 127 && val <= 159) || (val > 255) ) + return gestalt_CharOutput_CannotPrint; + /* Can print all other Latin-1 characters */ + return gestalt_CharOutput_ExactPrint; + + /* Unicode capabilities present */ + case gestalt_Unicode: + return 1; + /* Unsupported capabilities */ + case gestalt_MouseInput: + case gestalt_Timer: + case gestalt_Graphics: + case gestalt_DrawImage: + case gestalt_Sound: + case gestalt_SoundVolume: + case gestalt_SoundNotify: + case gestalt_Hyperlinks: + case gestalt_HyperlinkInput: + case gestalt_SoundMusic: + case gestalt_GraphicsTransparency: /* Selector not supported */ default: return 0; diff --git a/src/input.c b/src/input.c index 7eebfa5..8956c2c 100644 --- a/src/input.c +++ b/src/input.c @@ -1,3 +1,4 @@ +#include "charset.h" #include "input.h" /** glk_request_char_event: @@ -203,14 +204,9 @@ glk_request_line_event_uni(winid_t win, glui32 *buf, glui32 maxlen, glui32 initl gchar *utf8; if(initlen > 0) { - GError *error = NULL; - utf8 = g_ucs4_to_utf8(buf, initlen, NULL, NULL, &error); - + utf8 = convert_ucs4_to_utf8(buf, initlen); if(utf8 == NULL) - { - g_warning("Error during unicode->utf8 conversion: %s", error->message); return; - } } else utf8 = g_strdup(""); @@ -325,14 +321,11 @@ end_line_input_request(winid_t win, const gchar *inserted_text) /* Convert the string from UTF-8 to Latin-1 or Unicode */ if(win->input_request_type == INPUT_REQUEST_LINE) { - GError *error = NULL; - gchar *latin1; gsize bytes_written; - latin1 = g_convert_with_fallback(inserted_text, -1, "ISO-8859-1", "UTF-8", "?", NULL, &bytes_written, &error); + gchar *latin1 = convert_utf8_to_latin1(inserted_text, &bytes_written); if(latin1 == NULL) { - g_warning("Error during utf8->latin1 conversion: %s", error->message); event_throw(evtype_LineInput, win, 0, 0); return; } @@ -349,21 +342,18 @@ end_line_input_request(winid_t win, const gchar *inserted_text) } else if(win->input_request_type == INPUT_REQUEST_LINE_UNICODE) { - gunichar *unicode; glong items_written; - unicode = g_utf8_to_ucs4_fast(inserted_text, -1, &items_written); + gunichar *unicode = convert_utf8_to_ucs4(inserted_text, &items_written); if(unicode == NULL) { - g_warning("Error during utf8->unicode conversion"); event_throw(evtype_LineInput, win, 0, 0); return; } /* Place input in the echo stream */ - /* TODO: glk_put_string_stream_uni not implemented yet if(win->echo_stream != NULL) - glk_put_string_stream_uni(window->echo_stream, unicode);*/ + glk_put_string_stream_uni(win->echo_stream, unicode); /* Copy the string (but not the NULL at the end) */ int copycount = MIN(win->line_input_buffer_max_len, items_written); diff --git a/src/main.c b/src/main.c index 324e49e..6398348 100644 --- a/src/main.c +++ b/src/main.c @@ -115,7 +115,7 @@ main(int argc, char *argv[]) g_object_unref( G_OBJECT(builder) ); - if( !chimara_glk_run(CHIMARA_GLK(glk), ".libs/first.so", &error) ) { + if( !chimara_glk_run(CHIMARA_GLK(glk), ".libs/model.so", &error) ) { error_dialog(GTK_WINDOW(window), error, "Error starting Glk library: "); return 1; } diff --git a/src/model.c b/src/model.c index 3823df8..4cf2609 100644 --- a/src/model.c +++ b/src/model.c @@ -17,59 +17,60 @@ void glk_main(void) nothing we can do without it, so exit. */ return; } - -/* char buffer[256]; + glui32 buffer[1024]; int i; - for(i = 0; i < 256; i++) - buffer[i] = (char)glk_char_to_upper(i); + for(i = 0; i < 512; i++) { + buffer[i * 2] = i + 33; + buffer[i * 2 + 1] = 32; + } - frefid_t f = glk_fileref_create_temp(fileusage_BinaryMode, 0); +/* frefid_t f = glk_fileref_create_temp(fileusage_BinaryMode, 0); if(f) { - - strid_t s = glk_stream_open_file(f, - filemode_ReadWrite, 0); - glk_stream_set_current(s); - - glk_put_char('X'); - glk_put_string("Philip en Marijn zijn vet goed.\n"); - glk_put_buffer(buffer, 256); - - glk_stream_set_position(s, 0, seekmode_Start); - glk_set_window(mainwin); - glk_put_char( glk_get_char_stream(s) ); - glk_put_char('\n'); - g_printerr("Line read: %d\n", glk_get_line_stream(s, buffer, 256)); - glk_put_string(buffer); - int count = glk_get_buffer_stream(s, buffer, 256); - g_printerr("Buffer read: %d\n", count); - glk_put_buffer(buffer, count); - - stream_result_t result; - glk_stream_close(s, &result); - - g_printerr("Read count: %d\nWrite count: %d\n", result.readcount, - result.writecount); - glk_fileref_destroy(f); - } - */ + strid_t s = glk_stream_open_file(f, filemode_ReadWrite, 0);*/ + glui32 membuf[512]; + strid_t s = glk_stream_open_memory_uni(membuf, 512, filemode_ReadWrite, 0); + glk_stream_set_current(s); + + glk_put_char_uni('X'); + glk_put_string("Philip en Marijn zijn vet goed.\n"); + glk_put_buffer_uni(buffer, 1024); - glk_set_window(mainwin); + glk_stream_set_position(s, 0, seekmode_Start); + glk_set_window(mainwin); + glk_put_char_uni( glk_get_char_stream_uni(s) ); + glk_put_char('\n'); + g_printerr( "Line read: %d\n", glk_get_line_stream_uni(s, buffer, 1024) ); + g_printerr("string[5] = %X\n", buffer[5]); + glk_put_string_uni(buffer); + int count = glk_get_buffer_stream_uni(s, buffer, 1024); + g_printerr("Buffer read: %d\n", count); + glk_put_string("\n---SOME CHARACTERS---\n"); + glk_put_buffer_uni(buffer, count); + glk_put_string("\n---THE SAME CHARACTERS IN UPPERCASE---\n"); + int newcount = glk_buffer_to_upper_case_uni(buffer, 1024, 1024); + glk_put_buffer_uni(buffer, newcount); + + stream_result_t result; + glk_stream_close(s, &result); + + g_printerr("Read count: %d\nWrite count: %d\n", result.readcount, result.writecount); +/* glk_fileref_destroy(f); + }*/ glk_set_interrupt_handler(&sayit); - gchar buffer[256]; event_t ev; while(1) { - glk_put_string("prompt> "); - glk_request_line_event(mainwin, buffer, 256, 0); + glk_put_string("\nprompt> "); + glk_request_line_event_uni(mainwin, buffer, 1024, 0); glk_select(&ev); switch(ev.type) { default: printf("Received event:\n"); printf("Type: %d\n", ev.type); - printf("Win: %d\n", glk_window_get_rock(ev.win)); + printf("Win: %d\n", glk_window_get_rock(ev.win) ); printf("Var1: %d\n", ev.val1); printf("Var2: %d\n", ev.val2); } diff --git a/src/stream.c b/src/stream.c index e51d1f1..7e61dc9 100644 --- a/src/stream.c +++ b/src/stream.c @@ -118,6 +118,20 @@ glk_put_char(unsigned char ch) glk_put_char_stream(glk_data->current_stream, ch); } +/** + * glk_put_char_uni: + * @ch: A Unicode code point. + * + * Prints one character to the current stream. The character is assumed to be a + * Unicode code point. + */ +void +glk_put_char_uni(glui32 ch) +{ + g_return_if_fail(glk_data->current_stream != NULL); + glk_put_char_stream_uni(glk_data->current_stream, ch); +} + /** * glk_put_string: * @s: A null-terminated string in Latin-1 encoding. @@ -137,6 +151,21 @@ glk_put_string(char *s) glk_put_string_stream(glk_data->current_stream, s); } +/** + * glk_put_string_uni: + * @s: A zero-terminated string of Unicode code points. + * + * Prints a string of Unicode characters to the current stream. It is equivalent + * to a series of glk_put_char_uni() calls. A string ends on a #glui32 whose + * value is 0. + */ +void +glk_put_string_uni(glui32 *s) +{ + g_return_if_fail(glk_data->current_stream != NULL); + glk_put_string_stream_uni(glk_data->current_stream, s); +} + /** * glk_put_buffer: * @buf: An array of characters in Latin-1 encoding. @@ -157,6 +186,21 @@ glk_put_buffer(char *buf, glui32 len) glk_put_buffer_stream(glk_data->current_stream, buf, len); } +/** + * glk_put_buffer_uni: + * @buf: An array of Unicode code points. + * @len: Length of @buf. + * + * Prints a block of Unicode characters to the current stream. It is equivalent + * to a series of glk_put_char_uni() calls. + */ +void +glk_put_buffer_uni(glui32 *buf, glui32 len) +{ + g_return_if_fail(glk_data->current_stream != NULL); + glk_put_buffer_stream_uni(glk_data->current_stream, buf, len); +} + /** * glk_stream_open_memory: * @buf: An allocated buffer, or %NULL. diff --git a/src/strio.c b/src/strio.c index 974a109..98140ad 100644 --- a/src/strio.c +++ b/src/strio.c @@ -1,3 +1,4 @@ +#include "charset.h" #include "stream.h" #include #include @@ -10,49 +11,6 @@ * */ -/* Internal function: change illegal (control) characters in a string to a -placeholder character. Must free returned string afterwards. */ -static gchar * -remove_latin1_control_characters(unsigned char *s, gsize len) -{ - /* If len == 0, then return an empty string, not NULL */ - if(len == 0) - return g_strdup(""); - - gchar *retval = g_new0(gchar, len); - int i; - for(i = 0; i < len; i++) - if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) ) - retval[i] = '?'; - /* Our placeholder character is '?'; other options are possible, - like printing "0x7F" or something */ - else - retval[i] = s[i]; - return retval; -} - -/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing -Latin-1 control characters by a placeholder first. The UTF-8 string must be -freed afterwards. Returns NULL on error. */ -static gchar * -convert_latin1_to_utf8(gchar *s, gsize len) -{ - GError *error = NULL; - gchar *utf8; - gchar *canonical = remove_latin1_control_characters( (unsigned char *)s, - len); - utf8 = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error); - g_free(canonical); - - if(utf8 == NULL) - { - g_warning("Error during latin1->utf8 conversion: %s", error->message); - return NULL; - } - - return utf8; -} - /* Internal function: write a UTF-8 string to a text grid window's text buffer. */ static void write_utf8_to_grid(winid_t win, gchar *s) @@ -131,7 +89,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) case wintype_TextGrid: { gchar *utf8 = convert_latin1_to_utf8(buf, len); - if(utf8) + if(utf8 != NULL) { /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */ write_utf8_to_grid(str->window, utf8); @@ -145,7 +103,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) case wintype_TextBuffer: { gchar *utf8 = convert_latin1_to_utf8(buf, len); - if(utf8) + if(utf8 != NULL) { write_utf8_to_window(str->window, utf8); g_free(utf8); @@ -185,13 +143,9 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) { if(str->unicode) { - /* Convert to four-byte big-endian */ - gchar *writebuffer = g_new0(gchar, len * 4); - int i; - for(i = 0; i < len; i++) - writebuffer[i * 4 + 3] = buf[i]; - fwrite(writebuffer, sizeof(gchar), len * 4, - str->file_pointer); + gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len); + fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer); + g_free(writebuffer); } else /* Regular file */ { @@ -201,8 +155,116 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len) else /* Text mode is the same for Unicode and regular files */ { gchar *utf8 = convert_latin1_to_utf8(buf, len); - g_fprintf(str->file_pointer, "%s", utf8); - g_free(utf8); + if(utf8 != NULL) + { + g_fprintf(str->file_pointer, "%s", utf8); + g_free(utf8); + } + } + + str->write_count += len; + break; + default: + g_warning("%s: Writing to this kind of stream unsupported.", __func__); + } +} + +/* Internal function: write a Unicode buffer with length to a stream. */ +static void +write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len) +{ + switch(str->type) + { + case STREAM_TYPE_WINDOW: + /* Each window type has a different way of printing to it */ + switch(str->window->type) + { + /* Printing to these windows' streams does nothing */ + case wintype_Blank: + case wintype_Pair: + case wintype_Graphics: + str->write_count += len; + break; + + /* Text grid window */ + case wintype_TextGrid: + { + gchar *utf8 = convert_ucs4_to_utf8(buf, len); + if(utf8 != NULL) + { + /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */ + write_utf8_to_grid(str->window, utf8); + g_free(utf8); + } + } + str->write_count += len; + break; + + /* Text buffer window */ + case wintype_TextBuffer: + { + gchar *utf8 = convert_ucs4_to_utf8(buf, len); + if(utf8 != NULL) + { + write_utf8_to_window(str->window, utf8); + g_free(utf8); + } + } + str->write_count += len; + break; + default: + g_warning("%s: Writing to this kind of window unsupported.", __func__); + } + + /* Now write the same buffer to the window's echo stream */ + if(str->window->echo_stream != NULL) + write_buffer_to_stream_uni(str->window->echo_stream, buf, len); + + break; + + case STREAM_TYPE_MEMORY: + if(str->unicode && str->ubuffer) + { + int copycount = MIN(len, str->buflen - str->mark); + memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32)); + str->mark += copycount; + } + if(!str->unicode && str->buffer) + { + gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len); + int copycount = MIN(len, str->buflen - str->mark); + memmove(str->buffer + str->mark, latin1, copycount); + g_free(latin1); + str->mark += copycount; + } + + str->write_count += len; + break; + + case STREAM_TYPE_FILE: + if(str->binary) + { + if(str->unicode) + { + gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len); + fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer); + g_free(writebuffer); + } + else /* Regular file */ + { + gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len); + fwrite(latin1, sizeof(gchar), len, str->file_pointer); + g_free(latin1); + } + } + else /* Text mode is the same for Unicode and regular files */ + { + gchar *utf8 = convert_ucs4_to_utf8(buf, len); + if(utf8 != NULL) + { + g_fprintf(str->file_pointer, "%s", utf8); + g_free(utf8); + } } str->write_count += len; @@ -229,6 +291,23 @@ glk_put_char_stream(strid_t str, unsigned char ch) write_buffer_to_stream(str, (gchar *)&ch, 1); } +/** + * glk_put_char_stream_uni: + * @str: An output stream. + * @ch: A Unicode code point. + * + * Prints one character @ch to the stream @str. It is illegal for @str to be + * %NULL, or an input-only stream. + */ +void +glk_put_char_stream_uni(strid_t str, glui32 ch) +{ + g_return_if_fail(str != NULL); + g_return_if_fail(str->file_mode != filemode_Read); + + write_buffer_to_stream_uni(str, &ch, 1); +} + /** * glk_put_string_stream: * @str: An output stream. @@ -243,7 +322,29 @@ glk_put_string_stream(strid_t str, char *s) g_return_if_fail(str != NULL); g_return_if_fail(str->file_mode != filemode_Read); - write_buffer_to_stream(str, (gchar *)s, strlen(s)); + write_buffer_to_stream(str, s, strlen(s)); +} + +/** + * glk_put_string_stream_uni: + * @str: An output stream. + * @s: A null-terminated array of Unicode code points. + * + * Prints @s to the stream @str. It is illegal for @str to be %NULL, or an + * input-only stream. + */ +void +glk_put_string_stream_uni(strid_t str, glui32 *s) +{ + g_return_if_fail(str != NULL); + g_return_if_fail(str->file_mode != filemode_Read); + + /* An impromptu strlen() for glui32 arrays */ + glong len = 0; + glui32 *ptr = s; + while(*ptr++) + len++; + write_buffer_to_stream_uni(str, s, len); } /** @@ -261,7 +362,25 @@ glk_put_buffer_stream(strid_t str, char *buf, glui32 len) g_return_if_fail(str != NULL); g_return_if_fail(str->file_mode != filemode_Read); - write_buffer_to_stream(str, (gchar *)buf, len); + write_buffer_to_stream(str, buf, len); +} + +/** + * glk_put_buffer_stream_uni: + * @str: An output stream. + * @buf: An array of Unicode code points. + * @len: Length of @buf. + * + * Prints @buf to the stream @str. It is illegal for @str to be %NULL, or an + * input-only stream. + */ +void +glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) +{ + g_return_if_fail(str != NULL); + g_return_if_fail(str->file_mode != filemode_Read); + + write_buffer_to_stream_uni(str, buf, len); } /* @@ -328,31 +447,12 @@ is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8) return FALSE; } -/** - * glk_get_char_stream: - * @str: An input stream. - * - * Reads one character from the stream @str. (There is no notion of a ``current - * input stream.'') It is illegal for @str to be %NULL, or an output-only - * stream. - * - * The result will be between 0 and 255. As with all basic text functions, Glk - * assumes the Latin-1 encoding. If the end of the stream has been reached, the - * result will be -1. Note that high-bit characters (128..255) are - * not returned as negative numbers. - * - * If the stream contains Unicode data --- for example, if it was created with - * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then - * characters beyond 255 will be returned as 0x3F ("?"). - * - * Returns: A character value between 0 and 255, or -1 on end of stream. - */ +/* Internal function: Read one character from a stream. Returns a value which + can be returned unchanged by glk_get_char_stream_uni(), but + glk_get_char_stream() must replace high values by the placeholder character. */ glsi32 -glk_get_char_stream(strid_t str) +get_char_stream_common(strid_t str) { - g_return_val_if_fail(str != NULL, -1); - g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1); - switch(str->type) { case STREAM_TYPE_MEMORY: @@ -362,13 +462,13 @@ glk_get_char_stream(strid_t str) return -1; glui32 ch = str->ubuffer[str->mark++]; str->read_count++; - return (ch > 0xFF)? 0x3F : ch; + return ch; } else { if(!str->buffer || str->mark >= str->buflen) return -1; - char ch = str->buffer[str->mark++]; + unsigned char ch = str->buffer[str->mark++]; str->read_count++; return ch; } @@ -383,7 +483,7 @@ glk_get_char_stream(strid_t str) if(ch == -1) return -1; str->read_count++; - return (ch > 0xFF)? 0x3F : ch; + return ch; } else /* Regular file */ { @@ -402,7 +502,7 @@ glk_get_char_stream(strid_t str) return -1; str->read_count++; - return (ch > 0xFF)? 0x3F : ch; + return ch; } default: g_warning("%s: Reading from this kind of stream unsupported.", __func__); @@ -410,6 +510,53 @@ glk_get_char_stream(strid_t str) } } +/** + * glk_get_char_stream: + * @str: An input stream. + * + * Reads one character from the stream @str. (There is no notion of a ``current + * input stream.'') It is illegal for @str to be %NULL, or an output-only + * stream. + * + * The result will be between 0 and 255. As with all basic text functions, Glk + * assumes the Latin-1 encoding. If the end of the stream has been reached, the + * result will be -1. Note that high-bit characters (128..255) are + * not returned as negative numbers. + * + * If the stream contains Unicode data --- for example, if it was created with + * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then + * characters beyond 255 will be returned as 0x3F ("?"). + * + * Returns: A character value between 0 and 255, or -1 on end of stream. + */ +glsi32 +glk_get_char_stream(strid_t str) +{ + g_return_val_if_fail(str != NULL, -1); + g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1); + + glsi32 ch = get_char_stream_common(str); + return (ch > 0xFF)? PLACEHOLDER : ch; +} + +/** + * glk_get_char_stream_uni: + * @str: An input stream. + * + * Reads one character from the stream @str. The result will be between 0 and + * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1. + * + * Returns: A character value between 0 and 255, or -1 on end of stream. + */ +glsi32 +glk_get_char_stream_uni(strid_t str) +{ + g_return_val_if_fail(str != NULL, -1); + g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1); + + return get_char_stream_common(str); +} + /** * glk_get_buffer_stream: * @str: An input stream. @@ -467,7 +614,6 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) g_warning("%s: Incomplete character in binary Unicode file.", __func__); } - str->read_count += count / 4; int foo; for(foo = 0; foo < count; foo += 4) { @@ -478,6 +624,7 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) buf[foo / 4] = (ch > 255)? 0x3F : (char)ch; } g_free(readbuffer); + str->read_count += count / 4; return count / 4; } else /* Regular binary file */ @@ -507,6 +654,105 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len) } } +/** + * glk_get_buffer_stream_uni: + * @str: An input stream. + * @buf: A buffer with space for at least @len Unicode code points. + * @len: The number of characters to read. + * + * Reads @len Unicode characters from @str, unless the end of stream is reached + * first. No terminal null is placed in the buffer. + * + * Returns: The number of Unicode characters actually read. + */ +glui32 +glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len) +{ + g_return_val_if_fail(str != NULL, 0); + g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0); + g_return_val_if_fail(buf != NULL, 0); + + switch(str->type) + { + case STREAM_TYPE_MEMORY: + { + int copycount = 0; + if(str->unicode) + { + if(str->ubuffer) /* if not, copycount stays 0 */ + copycount = MIN(len, str->buflen - str->mark); + memmove(buf, str->ubuffer + str->mark, copycount * 4); + str->mark += copycount; + } + else + { + while(copycount < len && str->buffer && str->mark < str->buflen) + { + unsigned char ch = str->buffer[str->mark++]; + buf[copycount++] = ch; + } + } + + str->read_count += copycount; + return copycount; + } + case STREAM_TYPE_FILE: + if(str->binary) + { + if(str->unicode) /* Binary file with 4-byte characters */ + { + /* Read len characters of 4 bytes each */ + unsigned char *readbuffer = g_new0(unsigned char, 4 * len); + size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer); + /* If there was an incomplete character */ + if(count % 4 != 0) + { + count -= count % 4; + g_warning("%s: Incomplete character in binary Unicode file.", __func__); + } + + int foo; + for(foo = 0; foo < count; foo += 4) + buf[foo / 4] = readbuffer[foo] << 24 + | readbuffer[foo + 1] << 16 + | readbuffer[foo + 2] << 8 + | readbuffer[foo + 3]; + g_free(readbuffer); + str->read_count += count / 4; + return count / 4; + } + else /* Regular binary file */ + { + unsigned char *readbuffer = g_new0(unsigned char, len); + size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer); + int foo; + for(foo = 0; foo < count; foo++) + buf[foo] = readbuffer[foo]; + g_free(readbuffer); + str->read_count += count; + return count; + } + } + else /* Text mode is the same for Unicode and regular files */ + { + /* Do it character-by-character */ + int foo; + for(foo = 0; foo < len; foo++) + { + glsi32 ch = read_utf8_char_from_file(str->file_pointer); + if(ch == -1) + break; + str->read_count++; + buf[foo] = ch; + } + return foo; + } + default: + g_warning("%s: Reading from this kind of stream unsupported.", __func__); + return 0; + } +} + /** * glk_get_line_stream: * @str: An input stream. @@ -638,6 +884,144 @@ glk_get_line_stream(strid_t str, char *buf, glui32 len) } } +/** + * glk_get_line_stream_uni: + * @str: An input stream. + * @buf: A buffer with space for at least @len Unicode code points. + * @len: The number of characters to read, plus one. + * + * Reads Unicode characters from @str, until either @len - 1 Unicode characters + * have been read or a newline has been read. It then puts a terminal null (a + * zero value) on the end. + * + * Returns: The number of characters actually read, including the newline (if + * there is one) but not including the terminal null. + */ +glui32 +glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len) +{ + g_return_val_if_fail(str != NULL, 0); + g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0); + g_return_val_if_fail(buf != NULL, 0); + + switch(str->type) + { + case STREAM_TYPE_MEMORY: + { + int copycount = 0; + if(str->unicode) + { + /* Do it character-by-character */ + while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) + { + glui32 ch = str->ubuffer[str->mark++]; + /* Check for Unicode newline; slightly different than + in file streams */ + if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029) + { + buf[copycount++] = '\n'; + break; + } + if(ch == 0x0D) + { + if(str->ubuffer[str->mark] == 0x0A) + str->mark++; /* skip past next newline */ + buf[copycount++] = '\n'; + break; + } + buf[copycount++] = ch; + } + buf[copycount] = '\0'; + } + else + { + /* No recourse to memccpy(), so do it character-by-character */ + while(copycount < len - 1 && str->buffer && str->mark < str->buflen) + { + gchar ch = str->buffer[str->mark++]; + /* Check for newline */ + if(ch == '\n') /* Also check for \r and \r\n? */ + { + buf[copycount++] = '\n'; + break; + } + buf[copycount++] = (unsigned char)ch; + } + buf[copycount] = 0; + } + + str->read_count += copycount; + return copycount; + } + case STREAM_TYPE_FILE: + if(str->binary) + { + if(str->unicode) /* Binary file with 4-byte characters */ + { + /* Do it character-by-character */ + int foo; + for(foo = 0; foo < len - 1; foo++) + { + glsi32 ch = read_ucs4be_char_from_file(str->file_pointer); + if(ch == -1) + { + buf[foo] = 0; + return foo - 1; + } + str->read_count++; + if(is_unicode_newline(ch, str->file_pointer, FALSE)) + { + buf[foo] = ch; /* Preserve newline types??? */ + buf[foo + 1] = 0; + return foo; + } + buf[foo] = ch; + } + buf[len] = 0; + return foo; + } + else /* Regular binary file */ + { + gchar *readbuffer = g_new0(gchar, len); + fgets(readbuffer, len, str->file_pointer); + glui32 count = strlen(readbuffer) + 1; /* Copy terminator */ + int foo; + for(foo = 0; foo < count; foo++) + buf[foo] = (unsigned char)(readbuffer[foo]); + str->read_count += count; + return count; + } + } + else /* Text mode is the same for Unicode and regular files */ + { + /* Do it character-by-character */ + int foo; + for(foo = 0; foo < len - 1; foo++) + { + glsi32 ch = read_utf8_char_from_file(str->file_pointer); + if(ch == -1) + { + buf[foo] = 0; + return foo - 1; + } + str->read_count++; + if(is_unicode_newline(ch, str->file_pointer, TRUE)) + { + buf[foo] = ch; /* Preserve newline types??? */ + buf[foo + 1] = 0; + return foo; + } + buf[foo] = ch; + } + buf[len] = 0; + return foo; + } + default: + g_warning("%s: Reading from this kind of stream unsupported.", __func__); + return 0; + } +} + /* * **************** SEEKING FUNCTIONS ********************************************