From 8bf874cd9d56a5850bc474df37642170a4c20b28 Mon Sep 17 00:00:00 2001
From: Philip Chimento <philip.chimento@gmail.com>
Date: Tue, 3 Mar 2009 23:21:53 +0000
Subject: [PATCH] Wrote all remaining Unicode input and output functions.
 Updated gestalt to support Unicode. Split off functions to convert from one
 character set to another and put them in charset.c.

glk_put_char_uni(), glk_put_string_uni(), glk_put_buffer_uni(),
glk_put_char_stream_uni(), glk_put_string_stream_uni(),
glk_put_buffer_stream_uni(), glk_get_char_stream_uni(),
glk_get_buffer_stream_uni(), glk_get_line_stream_uni()

git-svn-id: http://lassie.dyndns-server.com/svn/gargoyle-gtk@29 ddfedd41-794f-dd11-ae45-00112f111e67
---
 src/Makefile.am |   4 +-
 src/charset.c   | 128 +++++++++++
 src/charset.h   |  18 ++
 src/gestalt.c   |  38 +++-
 src/input.c     |  20 +-
 src/main.c      |   2 +-
 src/model.c     |  75 +++----
 src/stream.c    |  44 ++++
 src/strio.c     | 552 ++++++++++++++++++++++++++++++++++++++++--------
 9 files changed, 732 insertions(+), 149 deletions(-)
 create mode 100644 src/charset.c
 create mode 100644 src/charset.h

diff --git a/src/Makefile.am b/src/Makefile.am
index 4a90df4..4a3192b 100755
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -24,6 +24,7 @@ lib_LTLIBRARIES = libchimara.la
 libchimara_la_SOURCES = \
 	abort.c abort.h \
 	case.c \
+	charset.c charset.h \
 	chimara-glk.c chimara-glk.h chimara-glk-private.h \
 	event.c event.h \
 	fileref.c fileref.h \
@@ -33,7 +34,8 @@ libchimara_la_SOURCES = \
 	stream.c stream.h \
 	strio.c \
 	style.c \
-	window.c window.h
+	window.c window.h 
+
 libchimara_la_LIBADD = $(CHIMARA_LIBS)
 libchimara_la_LDFLAGS = -no-undefined -export-symbols-regex "^chimara_glk_.*|^glk_.*"
 libchimara_includedir = $(includedir)/chimara/chimara
diff --git a/src/charset.c b/src/charset.c
new file mode 100644
index 0000000..c060d8c
--- /dev/null
+++ b/src/charset.c
@@ -0,0 +1,128 @@
+#include "charset.h"
+#include <glib.h>
+
+/* Internal function: change illegal (control) characters in a string to a
+placeholder character. Must free returned string afterwards. */
+static gchar *
+remove_latin1_control_characters(const unsigned char *s, const gsize len)
+{
+	/* If len == 0, then return an empty string, not NULL */
+	if(len == 0)
+		return g_strdup("");
+			
+	gchar *retval = g_new0(gchar, len);
+	int i;
+	for(i = 0; i < len; i++)
+		if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) )
+			retval[i] = PLACEHOLDER;
+		else
+			retval[i] = s[i];
+	return retval;
+}
+
+/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing
+Latin-1 control characters by a placeholder first. The UTF-8 string must be
+freed afterwards. Returns NULL on error. */
+gchar *
+convert_latin1_to_utf8(const gchar *s, const gsize len)
+{
+	GError *error = NULL;
+	gchar *canonical = remove_latin1_control_characters( (unsigned char *)s,
+		len);
+	gchar *retval = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
+	g_free(canonical);
+	
+	if(retval == NULL)
+		g_warning("Error during latin1->utf8 conversion: %s", error->message);
+	
+	return retval;
+}
+
+/* Internal function: convert a Latin-1 string to a four-byte-per-character
+big-endian string of gchars. The string must be freed afterwards. */
+gchar *
+convert_latin1_to_ucs4be_string(const gchar *s, const gsize len)
+{
+	/* "UCS-4BE" is also a conversion type in g_convert()... but this may be more efficient */
+	gchar *retval = g_new0(gchar, len * 4);
+	int i;
+	for(i = 0; i < len; i++)
+		retval[i * 4 + 3] = s[i];
+	return retval;
+}
+
+/* Internal function: convert a null-terminated UTF-8 string to a 
+null-terminated Latin-1 string, replacing characters that cannot be represented 
+in Latin-1 by a placeholder. If bytes_written is not NULL it will be filled with
+the number of bytes returned, not counting the NULL terminator. The returned
+string must be freed afterwards. Returns NULL on error. */
+gchar *
+convert_utf8_to_latin1(const gchar *s, gsize *bytes_written)
+{
+	GError *error = NULL;
+	gchar *retval = g_convert_with_fallback(s, -1, "ISO-8859-1", "UTF-8", PLACEHOLDER_STRING, NULL, bytes_written, &error);
+	
+	if(retval == NULL)
+		g_warning("Error during utf8->latin1 conversion: %s", error->message);
+
+	return retval;
+}
+
+/* Internal function: convert a null-terminated UTF-8 string to a
+null-terminated UCS4 string. If items_written is not NULL it will be filled with
+the number of code points returned, not counting the terminator. The returned
+string must be freed afterwards. Returns NULL on error. */
+gunichar *
+convert_utf8_to_ucs4(const gchar *s, glong *items_written)
+{
+	gunichar *retval = g_utf8_to_ucs4_fast(s, -1, items_written);
+	
+	if(retval == NULL)
+		g_warning("Error during utf8->unicode conversion");
+
+	return retval;
+}
+
+/* Internal function: Convert a Unicode buffer to a null-terminated UTF-8 
+string. The returned string must be freed afterwards. Returns NULL on error. */
+gchar *
+convert_ucs4_to_utf8(const gunichar *buf, const glong len)
+{
+	GError *error = NULL;
+	gchar *retval = g_ucs4_to_utf8(buf, len, NULL, NULL, &error);
+		
+	if(retval == NULL)
+		g_warning("Error during unicode->utf8 conversion: %s", error->message);
+		
+	return retval;
+}
+
+/* Internal function: Convert a Unicode buffer to a Latin-1 string. Do not do
+any character processing, just return values > 255 as the placeholder character.
+The returned string must be freed afterwards.*/
+gchar *
+convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len)
+{
+	gchar *retval = g_new0(gchar, len);
+	int foo;
+	for(foo = 0; foo < len; foo++)
+		retval[foo] = (buf[foo] > 255)? PLACEHOLDER : buf[foo];
+	return retval;
+}
+
+/* Internal function: convert a Unicode buffer to a four-byte-per-character
+big-endian string of gchars. The string must be freed afterwards. */
+gchar *
+convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len)
+{
+	gchar *retval = g_new0(gchar, len * 4);
+	int i;
+	for(i = 0; i < len; i++)
+	{
+		retval[i * 4]     = buf[i] >> 24       ;
+		retval[i * 4 + 1] = buf[i] >> 16 & 0xFF;
+		retval[i * 4 + 2] = buf[i] >> 8  & 0xFF;
+		retval[i * 4 + 3] = buf[i]       & 0xFF;
+	}
+	return retval;
+}
diff --git a/src/charset.h b/src/charset.h
new file mode 100644
index 0000000..0a18295
--- /dev/null
+++ b/src/charset.h
@@ -0,0 +1,18 @@
+#ifndef CHARSET_H
+#define CHARSET_H
+
+#include <glib.h>
+
+#define PLACEHOLDER '?'
+#define PLACEHOLDER_STRING "?"
+/* Our placeholder character is '?'; other options are possible, like printing "0x7F" or something */
+
+gchar *convert_latin1_to_utf8(const gchar *s, const gsize len);
+gchar *convert_latin1_to_ucs4be_string(const gchar *s, const gsize len);
+gchar *convert_utf8_to_latin1(const gchar *s, gsize *bytes_written);
+gunichar *convert_utf8_to_ucs4(const gchar *s, glong *items_written);
+gchar *convert_ucs4_to_utf8(const gunichar *buf, const glong len);
+gchar *convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len);
+gchar *convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len);
+
+#endif /* CHARSET_H */
diff --git a/src/gestalt.c b/src/gestalt.c
index 4a586ce..0c3d27b 100644
--- a/src/gestalt.c
+++ b/src/gestalt.c
@@ -71,17 +71,6 @@ glk_gestalt_ext(glui32 sel, glui32 val, glui32 *arr, glui32 arrlen)
 		case gestalt_Version:
 			return (MAJOR_VERSION << 16) + (MINOR_VERSION << 8) + SUB_VERSION;
 		
-		/* Which characters can we print? */	
-		case gestalt_CharOutput:
-			/* All characters are printed as one character, in any case */
-			if(arr && arrlen > 0)
-				*arr = 1;
-			/* Cannot print control chars except \n, or chars > 255 */
-			if( (val < 32 && val != 10) || (val >= 127 && val <= 159) || (val > 255) )
-				return gestalt_CharOutput_CannotPrint;
-			/* Can print all other Latin-1 characters */
-			return gestalt_CharOutput_ExactPrint;
-		
 		/* Which characters can the player type in line input? */
 		case gestalt_LineInput:
 			/* Does not accept control chars */
@@ -95,7 +84,34 @@ glk_gestalt_ext(glui32 sel, glui32 val, glui32 *arr, glui32 arrlen)
 			if( val < 32 || (val >= 127 && val <= 159) || val == keycode_Unknown )
 				return 0;
 			return 1;
+		
+		/* Which characters can we print? */	
+		case gestalt_CharOutput:
+			/* All characters are printed as one character, in any case */
+			if(arr && arrlen > 0)
+				*arr = 1;
+			/* Cannot print control chars except \n, or chars > 255 */
+			if( (val < 32 && val != 10) || (val >= 127 && val <= 159) || (val > 255) )
+				return gestalt_CharOutput_CannotPrint;
+			/* Can print all other Latin-1 characters */
+			return gestalt_CharOutput_ExactPrint;
+		
+		/* Unicode capabilities present */
+		case gestalt_Unicode:
+			return 1;
 			
+		/* Unsupported capabilities */
+		case gestalt_MouseInput:
+		case gestalt_Timer:
+		case gestalt_Graphics:
+		case gestalt_DrawImage:
+		case gestalt_Sound:
+		case gestalt_SoundVolume:
+		case gestalt_SoundNotify:
+		case gestalt_Hyperlinks:
+		case gestalt_HyperlinkInput:
+		case gestalt_SoundMusic:
+		case gestalt_GraphicsTransparency:
 		/* Selector not supported */	
 		default:
 			return 0;
diff --git a/src/input.c b/src/input.c
index 7eebfa5..8956c2c 100644
--- a/src/input.c
+++ b/src/input.c
@@ -1,3 +1,4 @@
+#include "charset.h"
 #include "input.h"
 
 /** glk_request_char_event:
@@ -203,14 +204,9 @@ glk_request_line_event_uni(winid_t win, glui32 *buf, glui32 maxlen, glui32 initl
 
 	gchar *utf8;
 	if(initlen > 0) {
-		GError *error = NULL;
-		utf8 = g_ucs4_to_utf8(buf, initlen, NULL, NULL, &error);
-			
+		utf8 = convert_ucs4_to_utf8(buf, initlen);
 		if(utf8 == NULL)
-		{
-			g_warning("Error during unicode->utf8 conversion: %s", error->message);
 			return;
-		}
 	}
 	else
 		utf8 = g_strdup("");
@@ -325,14 +321,11 @@ end_line_input_request(winid_t win, const gchar *inserted_text)
     /* Convert the string from UTF-8 to Latin-1 or Unicode */
     if(win->input_request_type == INPUT_REQUEST_LINE) 
     {
-        GError *error = NULL;
-        gchar *latin1;
         gsize bytes_written;
-        latin1 = g_convert_with_fallback(inserted_text, -1, "ISO-8859-1", "UTF-8", "?", NULL, &bytes_written, &error);
+        gchar *latin1 = convert_utf8_to_latin1(inserted_text, &bytes_written);
         
         if(latin1 == NULL)
         {
-            g_warning("Error during utf8->latin1 conversion: %s", error->message);
             event_throw(evtype_LineInput, win, 0, 0);
             return;
         }
@@ -349,21 +342,18 @@ end_line_input_request(winid_t win, const gchar *inserted_text)
     }
     else if(win->input_request_type == INPUT_REQUEST_LINE_UNICODE) 
     {
-        gunichar *unicode;
         glong items_written;
-        unicode = g_utf8_to_ucs4_fast(inserted_text, -1, &items_written);
+        gunichar *unicode = convert_utf8_to_ucs4(inserted_text, &items_written);
         
         if(unicode == NULL)
         {
-            g_warning("Error during utf8->unicode conversion");
             event_throw(evtype_LineInput, win, 0, 0);
             return;
         }
 
         /* Place input in the echo stream */
-        /* TODO: glk_put_string_stream_uni not implemented yet
         if(win->echo_stream != NULL) 
-            glk_put_string_stream_uni(window->echo_stream, unicode);*/
+            glk_put_string_stream_uni(win->echo_stream, unicode);
 
         /* Copy the string (but not the NULL at the end) */
         int copycount = MIN(win->line_input_buffer_max_len, items_written);
diff --git a/src/main.c b/src/main.c
index 324e49e..6398348 100644
--- a/src/main.c
+++ b/src/main.c
@@ -115,7 +115,7 @@ main(int argc, char *argv[])
 
 	g_object_unref( G_OBJECT(builder) );
 
-    if( !chimara_glk_run(CHIMARA_GLK(glk), ".libs/first.so", &error) ) {
+    if( !chimara_glk_run(CHIMARA_GLK(glk), ".libs/model.so", &error) ) {
         error_dialog(GTK_WINDOW(window), error, "Error starting Glk library: ");
         return 1;
     }
diff --git a/src/model.c b/src/model.c
index 3823df8..4cf2609 100644
--- a/src/model.c
+++ b/src/model.c
@@ -17,59 +17,60 @@ void glk_main(void)
             nothing we can do without it, so exit. */
         return; 
     }
-    
 	
-/*    char buffer[256];
+    glui32 buffer[1024];
     int i;
-    for(i = 0; i < 256; i++)
-    	buffer[i] = (char)glk_char_to_upper(i);
+    for(i = 0; i < 512; i++) {
+    	buffer[i * 2] = i + 33;
+		buffer[i * 2 + 1] = 32;
+	}
     
-    frefid_t f = glk_fileref_create_temp(fileusage_BinaryMode, 0);
+/*    frefid_t f = glk_fileref_create_temp(fileusage_BinaryMode, 0);
     if(f) 
     {
-    
-	strid_t s = glk_stream_open_file(f, 
-		filemode_ReadWrite, 0);
-	glk_stream_set_current(s);
-	
-	glk_put_char('X');
-	glk_put_string("Philip en Marijn zijn vet goed.\n");
-	glk_put_buffer(buffer, 256);
-
-	glk_stream_set_position(s, 0, seekmode_Start);
-	glk_set_window(mainwin);
-	glk_put_char( glk_get_char_stream(s) );
-	glk_put_char('\n');
-	g_printerr("Line read: %d\n", glk_get_line_stream(s, buffer, 256));
-	glk_put_string(buffer);
-	int count = glk_get_buffer_stream(s, buffer, 256);
-	g_printerr("Buffer read: %d\n", count);
-	glk_put_buffer(buffer, count);		
-	
-	stream_result_t result;
-	glk_stream_close(s, &result);
-	
-	g_printerr("Read count: %d\nWrite count: %d\n", result.readcount,
-		result.writecount);
-		glk_fileref_destroy(f);
-	}
-	*/
+		strid_t s = glk_stream_open_file(f, filemode_ReadWrite, 0);*/
+		glui32 membuf[512];
+		strid_t s = glk_stream_open_memory_uni(membuf, 512, filemode_ReadWrite, 0);
+		glk_stream_set_current(s);
+		
+		glk_put_char_uni('X');
+		glk_put_string("Philip en Marijn zijn vet goed.\n");
+		glk_put_buffer_uni(buffer, 1024);
 
-	glk_set_window(mainwin);
+		glk_stream_set_position(s, 0, seekmode_Start);
+		glk_set_window(mainwin);
+		glk_put_char_uni( glk_get_char_stream_uni(s) );
+		glk_put_char('\n');
+		g_printerr( "Line read: %d\n", glk_get_line_stream_uni(s, buffer, 1024) );
+		g_printerr("string[5] = %X\n", buffer[5]);
+		glk_put_string_uni(buffer);
+		int count = glk_get_buffer_stream_uni(s, buffer, 1024);
+		g_printerr("Buffer read: %d\n", count);
+		glk_put_string("\n---SOME CHARACTERS---\n");
+		glk_put_buffer_uni(buffer, count);
+		glk_put_string("\n---THE SAME CHARACTERS IN UPPERCASE---\n");
+		int newcount = glk_buffer_to_upper_case_uni(buffer, 1024, 1024);
+		glk_put_buffer_uni(buffer, newcount);
+		
+		stream_result_t result;
+		glk_stream_close(s, &result);
+		
+		g_printerr("Read count: %d\nWrite count: %d\n", result.readcount, result.writecount);
+/*		glk_fileref_destroy(f);
+	}*/
 
 	glk_set_interrupt_handler(&sayit);
 
-	gchar buffer[256];
 	event_t ev;
 	while(1) {
-		glk_put_string("prompt> ");
-		glk_request_line_event(mainwin, buffer, 256, 0);
+		glk_put_string("\nprompt> ");
+		glk_request_line_event_uni(mainwin, buffer, 1024, 0);
 		glk_select(&ev);
 		switch(ev.type) {
 			default:
 				printf("Received event:\n");
 				printf("Type: %d\n", ev.type);
-				printf("Win: %d\n", glk_window_get_rock(ev.win));
+				printf("Win: %d\n", glk_window_get_rock(ev.win) );
 				printf("Var1: %d\n", ev.val1);
 				printf("Var2: %d\n", ev.val2);
 		}
diff --git a/src/stream.c b/src/stream.c
index e51d1f1..7e61dc9 100644
--- a/src/stream.c
+++ b/src/stream.c
@@ -118,6 +118,20 @@ glk_put_char(unsigned char ch)
 	glk_put_char_stream(glk_data->current_stream, ch);
 }
 
+/**
+ * glk_put_char_uni:
+ * @ch: A Unicode code point.
+ *
+ * Prints one character to the current stream. The character is assumed to be a
+ * Unicode code point.
+ */
+void
+glk_put_char_uni(glui32 ch)
+{
+	g_return_if_fail(glk_data->current_stream != NULL);
+	glk_put_char_stream_uni(glk_data->current_stream, ch);
+}
+
 /**
  * glk_put_string:
  * @s: A null-terminated string in Latin-1 encoding.
@@ -137,6 +151,21 @@ glk_put_string(char *s)
 	glk_put_string_stream(glk_data->current_stream, s);
 }
 
+/**
+ * glk_put_string_uni:
+ * @s: A zero-terminated string of Unicode code points.
+ * 
+ * Prints a string of Unicode characters to the current stream. It is equivalent
+ * to a series of glk_put_char_uni() calls. A string ends on a #glui32 whose
+ * value is 0.
+ */
+void
+glk_put_string_uni(glui32 *s)
+{
+	g_return_if_fail(glk_data->current_stream != NULL);
+	glk_put_string_stream_uni(glk_data->current_stream, s);
+}
+
 /**
  * glk_put_buffer:
  * @buf: An array of characters in Latin-1 encoding.
@@ -157,6 +186,21 @@ glk_put_buffer(char *buf, glui32 len)
 	glk_put_buffer_stream(glk_data->current_stream, buf, len);
 }
 
+/**
+ * glk_put_buffer_uni:
+ * @buf: An array of Unicode code points.
+ * @len: Length of @buf.
+ *
+ * Prints a block of Unicode characters to the current stream. It is equivalent
+ * to a series of glk_put_char_uni() calls.
+ */
+void
+glk_put_buffer_uni(glui32 *buf, glui32 len)
+{
+	g_return_if_fail(glk_data->current_stream != NULL);
+	glk_put_buffer_stream_uni(glk_data->current_stream, buf, len);
+}
+
 /**
  * glk_stream_open_memory:
  * @buf: An allocated buffer, or %NULL.
diff --git a/src/strio.c b/src/strio.c
index 974a109..98140ad 100644
--- a/src/strio.c
+++ b/src/strio.c
@@ -1,3 +1,4 @@
+#include "charset.h"
 #include "stream.h"
 #include <stdio.h>
 #include <string.h>
@@ -10,49 +11,6 @@
  *
  */
 
-/* Internal function: change illegal (control) characters in a string to a
-placeholder character. Must free returned string afterwards. */
-static gchar *
-remove_latin1_control_characters(unsigned char *s, gsize len)
-{
-	/* If len == 0, then return an empty string, not NULL */
-	if(len == 0)
-		return g_strdup("");
-			
-	gchar *retval = g_new0(gchar, len);
-	int i;
-	for(i = 0; i < len; i++)
-		if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) )
-			retval[i] = '?';
-			/* Our placeholder character is '?'; other options are possible,
-			like printing "0x7F" or something */
-		else
-			retval[i] = s[i];
-	return retval;
-}
-
-/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing
-Latin-1 control characters by a placeholder first. The UTF-8 string must be
-freed afterwards. Returns NULL on error. */
-static gchar *
-convert_latin1_to_utf8(gchar *s, gsize len)
-{
-	GError *error = NULL;
-	gchar *utf8;
-	gchar *canonical = remove_latin1_control_characters( (unsigned char *)s,
-		len);
-	utf8 = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
-	g_free(canonical);
-	
-	if(utf8 == NULL)
-	{
-		g_warning("Error during latin1->utf8 conversion: %s", error->message);
-		return NULL;
-	}
-	
-	return utf8;
-}
-
 /* Internal function: write a UTF-8 string to a text grid window's text buffer. */
 static void
 write_utf8_to_grid(winid_t win, gchar *s)
@@ -131,7 +89,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
 			    case wintype_TextGrid:
 			    {
 			        gchar *utf8 = convert_latin1_to_utf8(buf, len);
-			        if(utf8)
+			        if(utf8 != NULL)
 			        {
 			            /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
 			            write_utf8_to_grid(str->window, utf8);
@@ -145,7 +103,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
 				case wintype_TextBuffer:
 				{
 					gchar *utf8 = convert_latin1_to_utf8(buf, len);
-					if(utf8)
+					if(utf8 != NULL)
 					{
 						write_utf8_to_window(str->window, utf8);
 						g_free(utf8);
@@ -185,13 +143,9 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
 			{
 				if(str->unicode) 
 				{
-					/* Convert to four-byte big-endian */
-					gchar *writebuffer = g_new0(gchar, len * 4);
-					int i;
-					for(i = 0; i < len; i++)
-						writebuffer[i * 4 + 3] = buf[i];
-					fwrite(writebuffer, sizeof(gchar), len * 4, 
-						str->file_pointer);
+					gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
+					fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
+					g_free(writebuffer);
 				} 
 				else /* Regular file */
 				{
@@ -201,8 +155,116 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
 			else /* Text mode is the same for Unicode and regular files */
 			{
 				gchar *utf8 = convert_latin1_to_utf8(buf, len);
-				g_fprintf(str->file_pointer, "%s", utf8);
-				g_free(utf8);
+				if(utf8 != NULL)
+				{
+					g_fprintf(str->file_pointer, "%s", utf8);
+					g_free(utf8);
+				}
+			}
+			
+			str->write_count += len;
+			break;
+		default:
+			g_warning("%s: Writing to this kind of stream unsupported.", __func__);
+	}
+}
+
+/* Internal function: write a Unicode buffer with length to a stream. */
+static void
+write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
+{
+	switch(str->type)
+	{
+		case STREAM_TYPE_WINDOW:
+			/* Each window type has a different way of printing to it */
+			switch(str->window->type)
+			{
+				/* Printing to these windows' streams does nothing */
+				case wintype_Blank:
+				case wintype_Pair:
+				case wintype_Graphics:
+					str->write_count += len;
+					break;
+					
+			    /* Text grid window */
+			    case wintype_TextGrid:
+			    {
+			        gchar *utf8 = convert_ucs4_to_utf8(buf, len);
+			        if(utf8 != NULL)
+			        {
+			            /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
+			            write_utf8_to_grid(str->window, utf8);
+			            g_free(utf8);
+			        }
+			    }
+			        str->write_count += len;
+			        break;
+					
+				/* Text buffer window */	
+				case wintype_TextBuffer:
+				{
+					gchar *utf8 = convert_ucs4_to_utf8(buf, len);
+					if(utf8 != NULL)
+					{
+						write_utf8_to_window(str->window, utf8);
+						g_free(utf8);
+					}
+				}	
+					str->write_count += len;
+					break;
+				default:
+					g_warning("%s: Writing to this kind of window unsupported.", __func__);
+			}
+			
+			/* Now write the same buffer to the window's echo stream */
+			if(str->window->echo_stream != NULL)
+				write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
+			
+			break;
+			
+		case STREAM_TYPE_MEMORY:
+			if(str->unicode && str->ubuffer)
+			{
+				int copycount = MIN(len, str->buflen - str->mark);
+				memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
+				str->mark += copycount;
+			}
+			if(!str->unicode && str->buffer)
+			{
+				gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
+				int copycount = MIN(len, str->buflen - str->mark);
+				memmove(str->buffer + str->mark, latin1, copycount);
+				g_free(latin1);
+				str->mark += copycount;
+			}
+
+			str->write_count += len;
+			break;
+			
+		case STREAM_TYPE_FILE:
+			if(str->binary) 
+			{
+				if(str->unicode) 
+				{
+					gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
+					fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
+					g_free(writebuffer);
+				} 
+				else /* Regular file */
+				{
+					gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
+					fwrite(latin1, sizeof(gchar), len, str->file_pointer);
+					g_free(latin1);
+				}
+			}
+			else /* Text mode is the same for Unicode and regular files */
+			{
+				gchar *utf8 = convert_ucs4_to_utf8(buf, len);
+				if(utf8 != NULL) 
+				{
+					g_fprintf(str->file_pointer, "%s", utf8);
+					g_free(utf8);
+				}
 			}
 			
 			str->write_count += len;
@@ -229,6 +291,23 @@ glk_put_char_stream(strid_t str, unsigned char ch)
 	write_buffer_to_stream(str, (gchar *)&ch, 1);
 }
 
+/**
+ * glk_put_char_stream_uni:
+ * @str: An output stream.
+ * @ch: A Unicode code point.
+ *
+ * Prints one character @ch to the stream @str. It is illegal for @str to be
+ * %NULL, or an input-only stream.
+ */
+void
+glk_put_char_stream_uni(strid_t str, glui32 ch)
+{
+	g_return_if_fail(str != NULL);
+	g_return_if_fail(str->file_mode != filemode_Read);
+	
+	write_buffer_to_stream_uni(str, &ch, 1);
+}
+
 /**
  * glk_put_string_stream:
  * @str: An output stream.
@@ -243,7 +322,29 @@ glk_put_string_stream(strid_t str, char *s)
 	g_return_if_fail(str != NULL);
 	g_return_if_fail(str->file_mode != filemode_Read);
 
-	write_buffer_to_stream(str, (gchar *)s, strlen(s));
+	write_buffer_to_stream(str, s, strlen(s));
+}
+
+/**
+ * glk_put_string_stream_uni:
+ * @str: An output stream.
+ * @s: A null-terminated array of Unicode code points.
+ *
+ * Prints @s to the stream @str. It is illegal for @str to be %NULL, or an
+ * input-only stream.
+ */
+void
+glk_put_string_stream_uni(strid_t str, glui32 *s)
+{
+	g_return_if_fail(str != NULL);
+	g_return_if_fail(str->file_mode != filemode_Read);
+	
+	/* An impromptu strlen() for glui32 arrays */
+	glong len = 0;
+	glui32 *ptr = s;
+	while(*ptr++)
+		len++;
+	write_buffer_to_stream_uni(str, s, len);
 }
 
 /**
@@ -261,7 +362,25 @@ glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
 	g_return_if_fail(str != NULL);
 	g_return_if_fail(str->file_mode != filemode_Read);
 	
-	write_buffer_to_stream(str, (gchar *)buf, len);
+	write_buffer_to_stream(str, buf, len);
+}
+
+/**
+ * glk_put_buffer_stream_uni:
+ * @str: An output stream.
+ * @buf: An array of Unicode code points.
+ * @len: Length of @buf.
+ *
+ * Prints @buf to the stream @str. It is illegal for @str to be %NULL, or an
+ * input-only stream.
+ */
+void
+glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
+{
+	g_return_if_fail(str != NULL);
+	g_return_if_fail(str->file_mode != filemode_Read);
+	
+	write_buffer_to_stream_uni(str, buf, len);
 }
 
 /*
@@ -328,31 +447,12 @@ is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
 	return FALSE;
 }
 
-/**
- * glk_get_char_stream:
- * @str: An input stream.
- *
- * Reads one character from the stream @str. (There is no notion of a ``current
- * input stream.'') It is illegal for @str to be %NULL, or an output-only
- * stream.
- *
- * The result will be between 0 and 255. As with all basic text functions, Glk
- * assumes the Latin-1 encoding. If the end of the stream has been reached, the
- * result will be -1. Note that high-bit characters (128..255) are
- * <emphasis>not</emphasis> returned as negative numbers.
- *
- * If the stream contains Unicode data --- for example, if it was created with
- * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then
- * characters beyond 255 will be returned as 0x3F ("?").
- *
- * Returns: A character value between 0 and 255, or -1 on end of stream.
- */
+/* Internal function: Read one character from a stream. Returns a value which
+ can be returned unchanged by glk_get_char_stream_uni(), but 
+ glk_get_char_stream() must replace high values by the placeholder character. */
 glsi32
-glk_get_char_stream(strid_t str)
+get_char_stream_common(strid_t str)
 {
-	g_return_val_if_fail(str != NULL, -1);
-	g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
-	
 	switch(str->type)
 	{
 		case STREAM_TYPE_MEMORY:
@@ -362,13 +462,13 @@ glk_get_char_stream(strid_t str)
 					return -1;
 				glui32 ch = str->ubuffer[str->mark++];
 				str->read_count++;
-				return (ch > 0xFF)? 0x3F : ch;
+				return ch;
 			}
 			else
 			{
 				if(!str->buffer || str->mark >= str->buflen)
 					return -1;
-				char ch = str->buffer[str->mark++];
+				unsigned char ch = str->buffer[str->mark++];
 				str->read_count++;
 				return ch;
 			}
@@ -383,7 +483,7 @@ glk_get_char_stream(strid_t str)
 					if(ch == -1)
 						return -1;
 					str->read_count++;
-					return (ch > 0xFF)? 0x3F : ch;
+					return ch;
 				}
 				else /* Regular file */
 				{
@@ -402,7 +502,7 @@ glk_get_char_stream(strid_t str)
 					return -1;
 					
 				str->read_count++;
-				return (ch > 0xFF)? 0x3F : ch;
+				return ch;
 			}
 		default:
 			g_warning("%s: Reading from this kind of stream unsupported.", __func__);
@@ -410,6 +510,53 @@ glk_get_char_stream(strid_t str)
 	}
 }
 
+/**
+ * glk_get_char_stream:
+ * @str: An input stream.
+ *
+ * Reads one character from the stream @str. (There is no notion of a ``current
+ * input stream.'') It is illegal for @str to be %NULL, or an output-only
+ * stream.
+ *
+ * The result will be between 0 and 255. As with all basic text functions, Glk
+ * assumes the Latin-1 encoding. If the end of the stream has been reached, the
+ * result will be -1. Note that high-bit characters (128..255) are
+ * <emphasis>not</emphasis> returned as negative numbers.
+ *
+ * If the stream contains Unicode data --- for example, if it was created with
+ * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then
+ * characters beyond 255 will be returned as 0x3F ("?").
+ *
+ * Returns: A character value between 0 and 255, or -1 on end of stream.
+ */
+glsi32
+glk_get_char_stream(strid_t str)
+{
+	g_return_val_if_fail(str != NULL, -1);
+	g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
+	
+	glsi32 ch = get_char_stream_common(str);
+	return (ch > 0xFF)? PLACEHOLDER : ch;
+}
+
+/**
+ * glk_get_char_stream_uni:
+ * @str: An input stream.
+ *
+ * Reads one character from the stream @str. The result will be between 0 and 
+ * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
+ *
+ * Returns: A character value between 0 and 255, or -1 on end of stream.
+ */
+glsi32
+glk_get_char_stream_uni(strid_t str)
+{
+	g_return_val_if_fail(str != NULL, -1);
+	g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
+	
+	return get_char_stream_common(str);
+}
+
 /**
  * glk_get_buffer_stream:
  * @str: An input stream.
@@ -467,7 +614,6 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
 						g_warning("%s: Incomplete character in binary Unicode file.", __func__);
 					}
 					
-					str->read_count += count / 4;
 					int foo;
 					for(foo = 0; foo < count; foo += 4)
 					{
@@ -478,6 +624,7 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
 						buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
 					}
 					g_free(readbuffer);
+					str->read_count += count / 4;
 					return count / 4;
 				}
 				else /* Regular binary file */
@@ -507,6 +654,105 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
 	}
 }
 
+/**
+ * glk_get_buffer_stream_uni:
+ * @str: An input stream.
+ * @buf: A buffer with space for at least @len Unicode code points.
+ * @len: The number of characters to read.
+ *
+ * Reads @len Unicode characters from @str, unless the end of stream is reached 
+ * first. No terminal null is placed in the buffer.
+ *
+ * Returns: The number of Unicode characters actually read.
+ */
+glui32
+glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
+{
+	g_return_val_if_fail(str != NULL, 0);
+	g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
+	g_return_val_if_fail(buf != NULL, 0);
+	
+	switch(str->type)
+	{
+		case STREAM_TYPE_MEMORY:
+		{
+			int copycount = 0;
+			if(str->unicode)
+			{
+				if(str->ubuffer) /* if not, copycount stays 0 */
+					copycount = MIN(len, str->buflen - str->mark);
+				memmove(buf, str->ubuffer + str->mark, copycount * 4);
+				str->mark += copycount;
+			}
+			else
+			{
+				while(copycount < len && str->buffer && str->mark < str->buflen)
+				{
+					unsigned char ch = str->buffer[str->mark++];
+					buf[copycount++] = ch;
+				}
+			}
+
+			str->read_count += copycount;		
+			return copycount;
+		}	
+		case STREAM_TYPE_FILE:
+			if(str->binary) 
+			{
+				if(str->unicode) /* Binary file with 4-byte characters */
+				{
+					/* Read len characters of 4 bytes each */
+					unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
+					size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
+					/* If there was an incomplete character */
+					if(count % 4 != 0) 
+					{
+						count -= count % 4;
+						g_warning("%s: Incomplete character in binary Unicode file.", __func__);
+					}
+					
+					int foo;
+					for(foo = 0; foo < count; foo += 4)
+						buf[foo / 4] = readbuffer[foo] << 24
+							| readbuffer[foo + 1] << 16
+							| readbuffer[foo + 2] << 8
+							| readbuffer[foo + 3];
+					g_free(readbuffer);
+					str->read_count += count / 4;
+					return count / 4;
+				}
+				else /* Regular binary file */
+				{
+					unsigned char *readbuffer = g_new0(unsigned char, len);
+					size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
+					int foo;
+					for(foo = 0; foo < count; foo++)
+						buf[foo] = readbuffer[foo];
+					g_free(readbuffer);
+					str->read_count += count;
+					return count;
+				}
+			}
+			else /* Text mode is the same for Unicode and regular files */
+			{
+				/* Do it character-by-character */
+				int foo;
+				for(foo = 0; foo < len; foo++)
+				{
+					glsi32 ch = read_utf8_char_from_file(str->file_pointer);
+					if(ch == -1)
+						break;
+					str->read_count++;
+					buf[foo] = ch;
+				}
+				return foo;
+			}
+		default:
+			g_warning("%s: Reading from this kind of stream unsupported.", __func__);
+			return 0;
+	}
+}
+
 /**
  * glk_get_line_stream:
  * @str: An input stream.
@@ -638,6 +884,144 @@ glk_get_line_stream(strid_t str, char *buf, glui32 len)
 	}
 }
 
+/**
+ * glk_get_line_stream_uni:
+ * @str: An input stream.
+ * @buf: A buffer with space for at least @len Unicode code points.
+ * @len: The number of characters to read, plus one.
+ *
+ * Reads Unicode characters from @str, until either @len - 1 Unicode characters
+ * have been read or a newline has been read. It then puts a terminal null (a
+ * zero value) on the end.
+ *
+ * Returns: The number of characters actually read, including the newline (if
+ * there is one) but not including the terminal null.
+ */
+glui32
+glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
+{
+	g_return_val_if_fail(str != NULL, 0);
+	g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
+	g_return_val_if_fail(buf != NULL, 0);
+
+	switch(str->type)
+	{
+		case STREAM_TYPE_MEMORY:
+		{
+			int copycount = 0;
+			if(str->unicode)
+			{
+				/* Do it character-by-character */
+				while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
+				{
+					glui32 ch = str->ubuffer[str->mark++];
+					/* Check for Unicode newline; slightly different than
+					in file streams */
+					if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
+					{
+						buf[copycount++] = '\n';
+						break;
+					}
+					if(ch == 0x0D)
+					{
+						if(str->ubuffer[str->mark] == 0x0A)
+							str->mark++; /* skip past next newline */
+						buf[copycount++] = '\n';
+						break;
+					}
+					buf[copycount++] = ch;
+				}
+				buf[copycount] = '\0';
+			}
+			else
+			{
+				/* No recourse to memccpy(), so do it character-by-character */
+				while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
+				{
+					gchar ch = str->buffer[str->mark++];
+					/* Check for newline */
+					if(ch == '\n') /* Also check for \r and \r\n? */
+					{
+						buf[copycount++] = '\n';
+						break;
+					}
+					buf[copycount++] = (unsigned char)ch;
+				}
+				buf[copycount] = 0;
+			}
+			
+			str->read_count += copycount;
+			return copycount;
+		}	
+		case STREAM_TYPE_FILE:
+			if(str->binary) 
+			{
+				if(str->unicode) /* Binary file with 4-byte characters */
+				{
+					/* Do it character-by-character */
+					int foo;
+					for(foo = 0; foo < len - 1; foo++)
+					{
+						glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
+						if(ch == -1) 
+						{
+							buf[foo] = 0;
+							return foo - 1;
+						}
+						str->read_count++;
+						if(is_unicode_newline(ch, str->file_pointer, FALSE))
+						{
+							buf[foo] = ch; /* Preserve newline types??? */
+							buf[foo + 1] = 0;
+							return foo;
+						}
+						buf[foo] = ch;
+					}
+					buf[len] = 0;
+					return foo;
+				}
+				else /* Regular binary file */
+				{
+					gchar *readbuffer = g_new0(gchar, len);
+					fgets(readbuffer, len, str->file_pointer);
+					glui32 count = strlen(readbuffer) + 1; /* Copy terminator */
+					int foo;
+					for(foo = 0; foo < count; foo++)
+						buf[foo] = (unsigned char)(readbuffer[foo]);
+					str->read_count += count;
+					return count;
+				}
+			}
+			else /* Text mode is the same for Unicode and regular files */
+			{
+				/* Do it character-by-character */
+				int foo;
+				for(foo = 0; foo < len - 1; foo++)
+				{
+					glsi32 ch = read_utf8_char_from_file(str->file_pointer);
+					if(ch == -1)
+					{
+						buf[foo] = 0;
+						return foo - 1;
+					}
+					str->read_count++;
+					if(is_unicode_newline(ch, str->file_pointer, TRUE))
+					{
+						buf[foo] = ch; /* Preserve newline types??? */
+						buf[foo + 1] = 0;
+						return foo;
+					}
+					buf[foo] = ch;
+				}
+				buf[len] = 0;
+				return foo;
+			}
+		default:
+			g_warning("%s: Reading from this kind of stream unsupported.", __func__);
+			return 0;
+	}
+}
+
 /*
  *
  **************** SEEKING FUNCTIONS ********************************************
-- 
2.30.2