Wrote all remaining Unicode input and output functions.

author Philip Chimento <philip.chimento@gmail.com>

Tue, 3 Mar 2009 23:21:53 +0000 (23:21 +0000)

committer Philip Chimento <philip.chimento@gmail.com>

Tue, 3 Mar 2009 23:21:53 +0000 (23:21 +0000)
author Philip Chimento <philip.chimento@gmail.com>
Tue, 3 Mar 2009 23:21:53 +0000 (23:21 +0000)
committer Philip Chimento <philip.chimento@gmail.com>
Tue, 3 Mar 2009 23:21:53 +0000 (23:21 +0000)
diff --git a/src/Makefile.am b/src/Makefile.am

index 4a90df4a5c08dba8b8e1e8aef71fc0194c25b2f8..4a3192b7b0cff4578b9e40c6fc7561b5e937b2ff 100755 (executable)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -24,6 +24,7 @@ lib_LTLIBRARIES = libchimara.la
  libchimara_la_SOURCES = \
         abort.c abort.h \
         case.c \
+       charset.c charset.h \
         chimara-glk.c chimara-glk.h chimara-glk-private.h \
         event.c event.h \
         fileref.c fileref.h \
@@ -33,7 +34,8 @@ libchimara_la_SOURCES = \
         stream.c stream.h \
         strio.c \
         style.c \
-       window.c window.h
+       window.c window.h 
+
  libchimara_la_LIBADD = $(CHIMARA_LIBS)
  libchimara_la_LDFLAGS = -no-undefined -export-symbols-regex "^chimara_glk_.*|^glk_.*"
  libchimara_includedir = $(includedir)/chimara/chimara
diff --git a/src/charset.c b/src/charset.c

new file mode 100644 (file)

index 0000000..c060d8c
--- /dev/null
+++ b/src/charset.c
@@ -0,0 +1,128 @@
+#include "charset.h"
+#include <glib.h>
+
+/* Internal function: change illegal (control) characters in a string to a
+placeholder character. Must free returned string afterwards. */
+static gchar *
+remove_latin1_control_characters(const unsigned char *s, const gsize len)
+{
+       /* If len == 0, then return an empty string, not NULL */
+       if(len == 0)
+               return g_strdup("");
+                       
+       gchar *retval = g_new0(gchar, len);
+       int i;
+       for(i = 0; i < len; i++)
+               if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) )
+                       retval[i] = PLACEHOLDER;
+               else
+                       retval[i] = s[i];
+       return retval;
+}
+
+/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing
+Latin-1 control characters by a placeholder first. The UTF-8 string must be
+freed afterwards. Returns NULL on error. */
+gchar *
+convert_latin1_to_utf8(const gchar *s, const gsize len)
+{
+       GError *error = NULL;
+       gchar *canonical = remove_latin1_control_characters( (unsigned char *)s,
+               len);
+       gchar *retval = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
+       g_free(canonical);
+       
+       if(retval == NULL)
+               g_warning("Error during latin1->utf8 conversion: %s", error->message);
+       
+       return retval;
+}
+
+/* Internal function: convert a Latin-1 string to a four-byte-per-character
+big-endian string of gchars. The string must be freed afterwards. */
+gchar *
+convert_latin1_to_ucs4be_string(const gchar *s, const gsize len)
+{
+       /* "UCS-4BE" is also a conversion type in g_convert()... but this may be more efficient */
+       gchar *retval = g_new0(gchar, len * 4);
+       int i;
+       for(i = 0; i < len; i++)
+               retval[i * 4 + 3] = s[i];
+       return retval;
+}
+
+/* Internal function: convert a null-terminated UTF-8 string to a 
+null-terminated Latin-1 string, replacing characters that cannot be represented 
+in Latin-1 by a placeholder. If bytes_written is not NULL it will be filled with
+the number of bytes returned, not counting the NULL terminator. The returned
+string must be freed afterwards. Returns NULL on error. */
+gchar *
+convert_utf8_to_latin1(const gchar *s, gsize *bytes_written)
+{
+       GError *error = NULL;
+       gchar *retval = g_convert_with_fallback(s, -1, "ISO-8859-1", "UTF-8", PLACEHOLDER_STRING, NULL, bytes_written, &error);
+       
+       if(retval == NULL)
+               g_warning("Error during utf8->latin1 conversion: %s", error->message);
+
+       return retval;
+}
+
+/* Internal function: convert a null-terminated UTF-8 string to a
+null-terminated UCS4 string. If items_written is not NULL it will be filled with
+the number of code points returned, not counting the terminator. The returned
+string must be freed afterwards. Returns NULL on error. */
+gunichar *
+convert_utf8_to_ucs4(const gchar *s, glong *items_written)
+{
+       gunichar *retval = g_utf8_to_ucs4_fast(s, -1, items_written);
+       
+       if(retval == NULL)
+               g_warning("Error during utf8->unicode conversion");
+
+       return retval;
+}
+
+/* Internal function: Convert a Unicode buffer to a null-terminated UTF-8 
+string. The returned string must be freed afterwards. Returns NULL on error. */
+gchar *
+convert_ucs4_to_utf8(const gunichar *buf, const glong len)
+{
+       GError *error = NULL;
+       gchar *retval = g_ucs4_to_utf8(buf, len, NULL, NULL, &error);
+               
+       if(retval == NULL)
+               g_warning("Error during unicode->utf8 conversion: %s", error->message);
+               
+       return retval;
+}
+
+/* Internal function: Convert a Unicode buffer to a Latin-1 string. Do not do
+any character processing, just return values > 255 as the placeholder character.
+The returned string must be freed afterwards.*/
+gchar *
+convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len)
+{
+       gchar *retval = g_new0(gchar, len);
+       int foo;
+       for(foo = 0; foo < len; foo++)
+               retval[foo] = (buf[foo] > 255)? PLACEHOLDER : buf[foo];
+       return retval;
+}
+
+/* Internal function: convert a Unicode buffer to a four-byte-per-character
+big-endian string of gchars. The string must be freed afterwards. */
+gchar *
+convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len)
+{
+       gchar *retval = g_new0(gchar, len * 4);
+       int i;
+       for(i = 0; i < len; i++)
+       {
+               retval[i * 4]     = buf[i] >> 24       ;
+               retval[i * 4 + 1] = buf[i] >> 16 & 0xFF;
+               retval[i * 4 + 2] = buf[i] >> 8  & 0xFF;
+               retval[i * 4 + 3] = buf[i]       & 0xFF;
+       }
+       return retval;
+}
diff --git a/src/charset.h b/src/charset.h

new file mode 100644 (file)

index 0000000..0a18295
--- /dev/null
+++ b/src/charset.h
@@ -0,0 +1,18 @@
+#ifndef CHARSET_H
+#define CHARSET_H
+
+#include <glib.h>
+
+#define PLACEHOLDER '?'
+#define PLACEHOLDER_STRING "?"
+/* Our placeholder character is '?'; other options are possible, like printing "0x7F" or something */
+
+gchar *convert_latin1_to_utf8(const gchar *s, const gsize len);
+gchar *convert_latin1_to_ucs4be_string(const gchar *s, const gsize len);
+gchar *convert_utf8_to_latin1(const gchar *s, gsize *bytes_written);
+gunichar *convert_utf8_to_ucs4(const gchar *s, glong *items_written);
+gchar *convert_ucs4_to_utf8(const gunichar *buf, const glong len);
+gchar *convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len);
+gchar *convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len);
+
+#endif /* CHARSET_H */
diff --git a/src/gestalt.c b/src/gestalt.c

index 4a586ce1d192cce9b2a470e0b20c2b7b753dfaa1..0c3d27b855d8a917f545c218b550c0239008acd1 100644 (file)
--- a/src/gestalt.c
+++ b/src/gestalt.c
@@ -71,17 +71,6 @@ glk_gestalt_ext(glui32 sel, glui32 val, glui32 *arr, glui32 arrlen)
                 case gestalt_Version:
                         return (MAJOR_VERSION << 16) + (MINOR_VERSION << 8) + SUB_VERSION;
                 
-               /* Which characters can we print? */    
-               case gestalt_CharOutput:
-                       /* All characters are printed as one character, in any case */
-                       if(arr && arrlen > 0)
-                               *arr = 1;
-                       /* Cannot print control chars except \n, or chars > 255 */
-                       if( (val < 32 && val != 10) || (val >= 127 && val <= 159) || (val > 255) )
-                               return gestalt_CharOutput_CannotPrint;
-                       /* Can print all other Latin-1 characters */
-                       return gestalt_CharOutput_ExactPrint;
-               
                 /* Which characters can the player type in line input? */
                 case gestalt_LineInput:
                         /* Does not accept control chars */
@@ -95,7 +84,34 @@ glk_gestalt_ext(glui32 sel, glui32 val, glui32 *arr, glui32 arrlen)
                         if( val < 32 || (val >= 127 && val <= 159) || val == keycode_Unknown )
                                 return 0;
                         return 1;
+               
+               /* Which characters can we print? */    
+               case gestalt_CharOutput:
+                       /* All characters are printed as one character, in any case */
+                       if(arr && arrlen > 0)
+                               *arr = 1;
+                       /* Cannot print control chars except \n, or chars > 255 */
+                       if( (val < 32 && val != 10) || (val >= 127 && val <= 159) || (val > 255) )
+                               return gestalt_CharOutput_CannotPrint;
+                       /* Can print all other Latin-1 characters */
+                       return gestalt_CharOutput_ExactPrint;
+               
+               /* Unicode capabilities present */
+               case gestalt_Unicode:
+                       return 1;
                         
+               /* Unsupported capabilities */
+               case gestalt_MouseInput:
+               case gestalt_Timer:
+               case gestalt_Graphics:
+               case gestalt_DrawImage:
+               case gestalt_Sound:
+               case gestalt_SoundVolume:
+               case gestalt_SoundNotify:
+               case gestalt_Hyperlinks:
+               case gestalt_HyperlinkInput:
+               case gestalt_SoundMusic:
+               case gestalt_GraphicsTransparency:
                 /* Selector not supported */    
                 default:
                         return 0;
diff --git a/src/input.c b/src/input.c

index 7eebfa596ac2e85324cb8c2917ec80d15dded6e5..8956c2c53438dad52dd3446d88acd3012c54a1f3 100644 (file)
--- a/src/input.c
+++ b/src/input.c
@@ -1,3 +1,4 @@
+#include "charset.h"
  #include "input.h"
  
  /** glk_request_char_event:
@@ -203,14 +204,9 @@ glk_request_line_event_uni(winid_t win, glui32 *buf, glui32 maxlen, glui32 initl
  
         gchar *utf8;
         if(initlen > 0) {
-               GError *error = NULL;
-               utf8 = g_ucs4_to_utf8(buf, initlen, NULL, NULL, &error);
-                       
+               utf8 = convert_ucs4_to_utf8(buf, initlen);
                 if(utf8 == NULL)
-               {
-                       g_warning("Error during unicode->utf8 conversion: %s", error->message);
                         return;
-               }
         }
         else
                 utf8 = g_strdup("");
@@ -325,14 +321,11 @@ end_line_input_request(winid_t win, const gchar *inserted_text)
      /* Convert the string from UTF-8 to Latin-1 or Unicode */
      if(win->input_request_type == INPUT_REQUEST_LINE) 
      {
-        GError *error = NULL;
-        gchar *latin1;
          gsize bytes_written;
-        latin1 = g_convert_with_fallback(inserted_text, -1, "ISO-8859-1", "UTF-8", "?", NULL, &bytes_written, &error);
+        gchar *latin1 = convert_utf8_to_latin1(inserted_text, &bytes_written);
          
          if(latin1 == NULL)
          {
-            g_warning("Error during utf8->latin1 conversion: %s", error->message);
              event_throw(evtype_LineInput, win, 0, 0);
              return;
          }
@@ -349,21 +342,18 @@ end_line_input_request(winid_t win, const gchar *inserted_text)
      }
      else if(win->input_request_type == INPUT_REQUEST_LINE_UNICODE) 
      {
-        gunichar *unicode;
          glong items_written;
-        unicode = g_utf8_to_ucs4_fast(inserted_text, -1, &items_written);
+        gunichar *unicode = convert_utf8_to_ucs4(inserted_text, &items_written);
          
          if(unicode == NULL)
          {
-            g_warning("Error during utf8->unicode conversion");
              event_throw(evtype_LineInput, win, 0, 0);
              return;
          }
  
          /* Place input in the echo stream */
-        /* TODO: glk_put_string_stream_uni not implemented yet
          if(win->echo_stream != NULL) 
-            glk_put_string_stream_uni(window->echo_stream, unicode);*/
+            glk_put_string_stream_uni(win->echo_stream, unicode);
  
          /* Copy the string (but not the NULL at the end) */
          int copycount = MIN(win->line_input_buffer_max_len, items_written);
diff --git a/src/main.c b/src/main.c

index 324e49e1a2201bf692e15ca617402c096a250843..63983489e19b2a4d3fa469378b33cddf603fd8d4 100644 (file)
--- a/src/main.c
+++ b/src/main.c
@@ -115,7 +115,7 @@ main(int argc, char *argv[])
  
         g_object_unref( G_OBJECT(builder) );
  
-    if( !chimara_glk_run(CHIMARA_GLK(glk), ".libs/first.so", &error) ) {
+    if( !chimara_glk_run(CHIMARA_GLK(glk), ".libs/model.so", &error) ) {
          error_dialog(GTK_WINDOW(window), error, "Error starting Glk library: ");
          return 1;
      }
diff --git a/src/model.c b/src/model.c

index 3823df85c9e0d10204e770c6cf44176622aad292..4cf2609c7059ac899f722c361512493efcf9a879 100644 (file)
--- a/src/model.c
+++ b/src/model.c
@@ -17,59 +17,60 @@ void glk_main(void)
              nothing we can do without it, so exit. */
          return; 
      }
-    
         
-/*    char buffer[256];
+    glui32 buffer[1024];
      int i;
-    for(i = 0; i < 256; i++)
-       buffer[i] = (char)glk_char_to_upper(i);
+    for(i = 0; i < 512; i++) {
+       buffer[i * 2] = i + 33;
+               buffer[i * 2 + 1] = 32;
+       }
      
-    frefid_t f = glk_fileref_create_temp(fileusage_BinaryMode, 0);
+/*    frefid_t f = glk_fileref_create_temp(fileusage_BinaryMode, 0);
      if(f) 
      {
-    
-       strid_t s = glk_stream_open_file(f, 
-               filemode_ReadWrite, 0);
-       glk_stream_set_current(s);
-       
-       glk_put_char('X');
-       glk_put_string("Philip en Marijn zijn vet goed.\n");
-       glk_put_buffer(buffer, 256);
-
-       glk_stream_set_position(s, 0, seekmode_Start);
-       glk_set_window(mainwin);
-       glk_put_char( glk_get_char_stream(s) );
-       glk_put_char('\n');
-       g_printerr("Line read: %d\n", glk_get_line_stream(s, buffer, 256));
-       glk_put_string(buffer);
-       int count = glk_get_buffer_stream(s, buffer, 256);
-       g_printerr("Buffer read: %d\n", count);
-       glk_put_buffer(buffer, count);          
-       
-       stream_result_t result;
-       glk_stream_close(s, &result);
-       
-       g_printerr("Read count: %d\nWrite count: %d\n", result.readcount,
-               result.writecount);
-               glk_fileref_destroy(f);
-       }
-       */
+               strid_t s = glk_stream_open_file(f, filemode_ReadWrite, 0);*/
+               glui32 membuf[512];
+               strid_t s = glk_stream_open_memory_uni(membuf, 512, filemode_ReadWrite, 0);
+               glk_stream_set_current(s);
+               
+               glk_put_char_uni('X');
+               glk_put_string("Philip en Marijn zijn vet goed.\n");
+               glk_put_buffer_uni(buffer, 1024);
  
-       glk_set_window(mainwin);
+               glk_stream_set_position(s, 0, seekmode_Start);
+               glk_set_window(mainwin);
+               glk_put_char_uni( glk_get_char_stream_uni(s) );
+               glk_put_char('\n');
+               g_printerr( "Line read: %d\n", glk_get_line_stream_uni(s, buffer, 1024) );
+               g_printerr("string[5] = %X\n", buffer[5]);
+               glk_put_string_uni(buffer);
+               int count = glk_get_buffer_stream_uni(s, buffer, 1024);
+               g_printerr("Buffer read: %d\n", count);
+               glk_put_string("\n---SOME CHARACTERS---\n");
+               glk_put_buffer_uni(buffer, count);
+               glk_put_string("\n---THE SAME CHARACTERS IN UPPERCASE---\n");
+               int newcount = glk_buffer_to_upper_case_uni(buffer, 1024, 1024);
+               glk_put_buffer_uni(buffer, newcount);
+               
+               stream_result_t result;
+               glk_stream_close(s, &result);
+               
+               g_printerr("Read count: %d\nWrite count: %d\n", result.readcount, result.writecount);
+/*             glk_fileref_destroy(f);
+       }*/
  
         glk_set_interrupt_handler(&sayit);
  
-       gchar buffer[256];
         event_t ev;
         while(1) {
-               glk_put_string("prompt> ");
-               glk_request_line_event(mainwin, buffer, 256, 0);
+               glk_put_string("\nprompt> ");
+               glk_request_line_event_uni(mainwin, buffer, 1024, 0);
                 glk_select(&ev);
                 switch(ev.type) {
                         default:
                                 printf("Received event:\n");
                                 printf("Type: %d\n", ev.type);
-                               printf("Win: %d\n", glk_window_get_rock(ev.win));
+                               printf("Win: %d\n", glk_window_get_rock(ev.win) );
                                 printf("Var1: %d\n", ev.val1);
                                 printf("Var2: %d\n", ev.val2);
                 }
diff --git a/src/stream.c b/src/stream.c

index e51d1f1a9077c43cd86ad2def9854c90cb924e58..7e61dc93b9a4b5183b8e99cc3b9c82571aea6546 100644 (file)
--- a/src/stream.c
+++ b/src/stream.c
@@ -118,6 +118,20 @@ glk_put_char(unsigned char ch)
         glk_put_char_stream(glk_data->current_stream, ch);
  }
  
+/**
+ * glk_put_char_uni:
+ * @ch: A Unicode code point.
+ *
+ * Prints one character to the current stream. The character is assumed to be a
+ * Unicode code point.
+ */
+void
+glk_put_char_uni(glui32 ch)
+{
+       g_return_if_fail(glk_data->current_stream != NULL);
+       glk_put_char_stream_uni(glk_data->current_stream, ch);
+}
+
  /**
   * glk_put_string:
   * @s: A null-terminated string in Latin-1 encoding.
@@ -137,6 +151,21 @@ glk_put_string(char *s)
         glk_put_string_stream(glk_data->current_stream, s);
  }
  
+/**
+ * glk_put_string_uni:
+ * @s: A zero-terminated string of Unicode code points.
+ * 
+ * Prints a string of Unicode characters to the current stream. It is equivalent
+ * to a series of glk_put_char_uni() calls. A string ends on a #glui32 whose
+ * value is 0.
+ */
+void
+glk_put_string_uni(glui32 *s)
+{
+       g_return_if_fail(glk_data->current_stream != NULL);
+       glk_put_string_stream_uni(glk_data->current_stream, s);
+}
+
  /**
   * glk_put_buffer:
   * @buf: An array of characters in Latin-1 encoding.
@@ -157,6 +186,21 @@ glk_put_buffer(char *buf, glui32 len)
         glk_put_buffer_stream(glk_data->current_stream, buf, len);
  }
  
+/**
+ * glk_put_buffer_uni:
+ * @buf: An array of Unicode code points.
+ * @len: Length of @buf.
+ *
+ * Prints a block of Unicode characters to the current stream. It is equivalent
+ * to a series of glk_put_char_uni() calls.
+ */
+void
+glk_put_buffer_uni(glui32 *buf, glui32 len)
+{
+       g_return_if_fail(glk_data->current_stream != NULL);
+       glk_put_buffer_stream_uni(glk_data->current_stream, buf, len);
+}
+
  /**
   * glk_stream_open_memory:
   * @buf: An allocated buffer, or %NULL.
diff --git a/src/strio.c b/src/strio.c

index 974a1099b79f0f1005a4e75f95245e2c08e69f55..98140ada9fd1891656722a00d04719440b2332eb 100644 (file)
--- a/src/strio.c
+++ b/src/strio.c
@@ -1,3 +1,4 @@
+#include "charset.h"
  #include "stream.h"
  #include <stdio.h>
  #include <string.h>
@@ -10,49 +11,6 @@
   *
   */
  
-/* Internal function: change illegal (control) characters in a string to a
-placeholder character. Must free returned string afterwards. */
-static gchar *
-remove_latin1_control_characters(unsigned char *s, gsize len)
-{
-       /* If len == 0, then return an empty string, not NULL */
-       if(len == 0)
-               return g_strdup("");
-                       
-       gchar *retval = g_new0(gchar, len);
-       int i;
-       for(i = 0; i < len; i++)
-               if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) )
-                       retval[i] = '?';
-                       /* Our placeholder character is '?'; other options are possible,
-                       like printing "0x7F" or something */
-               else
-                       retval[i] = s[i];
-       return retval;
-}
-
-/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing
-Latin-1 control characters by a placeholder first. The UTF-8 string must be
-freed afterwards. Returns NULL on error. */
-static gchar *
-convert_latin1_to_utf8(gchar *s, gsize len)
-{
-       GError *error = NULL;
-       gchar *utf8;
-       gchar *canonical = remove_latin1_control_characters( (unsigned char *)s,
-               len);
-       utf8 = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
-       g_free(canonical);
-       
-       if(utf8 == NULL)
-       {
-               g_warning("Error during latin1->utf8 conversion: %s", error->message);
-               return NULL;
-       }
-       
-       return utf8;
-}
-
  /* Internal function: write a UTF-8 string to a text grid window's text buffer. */
  static void
  write_utf8_to_grid(winid_t win, gchar *s)
@@ -131,7 +89,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
                             case wintype_TextGrid:
                             {
                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
-                               if(utf8)
+                               if(utf8 != NULL)
                                 {
                                     /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
                                     write_utf8_to_grid(str->window, utf8);
@@ -145,7 +103,7 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
                                 case wintype_TextBuffer:
                                 {
                                         gchar *utf8 = convert_latin1_to_utf8(buf, len);
-                                       if(utf8)
+                                       if(utf8 != NULL)
                                         {
                                                 write_utf8_to_window(str->window, utf8);
                                                 g_free(utf8);
@@ -185,13 +143,9 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
                         {
                                 if(str->unicode) 
                                 {
-                                       /* Convert to four-byte big-endian */
-                                       gchar *writebuffer = g_new0(gchar, len * 4);
-                                       int i;
-                                       for(i = 0; i < len; i++)
-                                               writebuffer[i * 4 + 3] = buf[i];
-                                       fwrite(writebuffer, sizeof(gchar), len * 4, 
-                                               str->file_pointer);
+                                       gchar *writebuffer = convert_latin1_to_ucs4be_string(buf, len);
+                                       fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
+                                       g_free(writebuffer);
                                 } 
                                 else /* Regular file */
                                 {
@@ -201,8 +155,116 @@ write_buffer_to_stream(strid_t str, gchar *buf, glui32 len)
                         else /* Text mode is the same for Unicode and regular files */
                         {
                                 gchar *utf8 = convert_latin1_to_utf8(buf, len);
-                               g_fprintf(str->file_pointer, "%s", utf8);
-                               g_free(utf8);
+                               if(utf8 != NULL)
+                               {
+                                       g_fprintf(str->file_pointer, "%s", utf8);
+                                       g_free(utf8);
+                               }
+                       }
+                       
+                       str->write_count += len;
+                       break;
+               default:
+                       g_warning("%s: Writing to this kind of stream unsupported.", __func__);
+       }
+}
+
+/* Internal function: write a Unicode buffer with length to a stream. */
+static void
+write_buffer_to_stream_uni(strid_t str, glui32 *buf, glui32 len)
+{
+       switch(str->type)
+       {
+               case STREAM_TYPE_WINDOW:
+                       /* Each window type has a different way of printing to it */
+                       switch(str->window->type)
+                       {
+                               /* Printing to these windows' streams does nothing */
+                               case wintype_Blank:
+                               case wintype_Pair:
+                               case wintype_Graphics:
+                                       str->write_count += len;
+                                       break;
+                                       
+                           /* Text grid window */
+                           case wintype_TextGrid:
+                           {
+                               gchar *utf8 = convert_ucs4_to_utf8(buf, len);
+                               if(utf8 != NULL)
+                               {
+                                   /* FIXME: What to do if string contains \n? Split the input string at newlines and write each string separately? */
+                                   write_utf8_to_grid(str->window, utf8);
+                                   g_free(utf8);
+                               }
+                           }
+                               str->write_count += len;
+                               break;
+                                       
+                               /* Text buffer window */        
+                               case wintype_TextBuffer:
+                               {
+                                       gchar *utf8 = convert_ucs4_to_utf8(buf, len);
+                                       if(utf8 != NULL)
+                                       {
+                                               write_utf8_to_window(str->window, utf8);
+                                               g_free(utf8);
+                                       }
+                               }       
+                                       str->write_count += len;
+                                       break;
+                               default:
+                                       g_warning("%s: Writing to this kind of window unsupported.", __func__);
+                       }
+                       
+                       /* Now write the same buffer to the window's echo stream */
+                       if(str->window->echo_stream != NULL)
+                               write_buffer_to_stream_uni(str->window->echo_stream, buf, len);
+                       
+                       break;
+                       
+               case STREAM_TYPE_MEMORY:
+                       if(str->unicode && str->ubuffer)
+                       {
+                               int copycount = MIN(len, str->buflen - str->mark);
+                               memmove(str->ubuffer + str->mark, buf, copycount * sizeof(glui32));
+                               str->mark += copycount;
+                       }
+                       if(!str->unicode && str->buffer)
+                       {
+                               gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
+                               int copycount = MIN(len, str->buflen - str->mark);
+                               memmove(str->buffer + str->mark, latin1, copycount);
+                               g_free(latin1);
+                               str->mark += copycount;
+                       }
+
+                       str->write_count += len;
+                       break;
+                       
+               case STREAM_TYPE_FILE:
+                       if(str->binary) 
+                       {
+                               if(str->unicode) 
+                               {
+                                       gchar *writebuffer = convert_ucs4_to_ucs4be_string(buf, len);
+                                       fwrite(writebuffer, sizeof(gchar), len * 4, str->file_pointer);
+                                       g_free(writebuffer);
+                               } 
+                               else /* Regular file */
+                               {
+                                       gchar *latin1 = convert_ucs4_to_latin1_binary(buf, len);
+                                       fwrite(latin1, sizeof(gchar), len, str->file_pointer);
+                                       g_free(latin1);
+                               }
+                       }
+                       else /* Text mode is the same for Unicode and regular files */
+                       {
+                               gchar *utf8 = convert_ucs4_to_utf8(buf, len);
+                               if(utf8 != NULL) 
+                               {
+                                       g_fprintf(str->file_pointer, "%s", utf8);
+                                       g_free(utf8);
+                               }
                         }
                         
                         str->write_count += len;
@@ -229,6 +291,23 @@ glk_put_char_stream(strid_t str, unsigned char ch)
         write_buffer_to_stream(str, (gchar *)&ch, 1);
  }
  
+/**
+ * glk_put_char_stream_uni:
+ * @str: An output stream.
+ * @ch: A Unicode code point.
+ *
+ * Prints one character @ch to the stream @str. It is illegal for @str to be
+ * %NULL, or an input-only stream.
+ */
+void
+glk_put_char_stream_uni(strid_t str, glui32 ch)
+{
+       g_return_if_fail(str != NULL);
+       g_return_if_fail(str->file_mode != filemode_Read);
+       
+       write_buffer_to_stream_uni(str, &ch, 1);
+}
+
  /**
   * glk_put_string_stream:
   * @str: An output stream.
@@ -243,7 +322,29 @@ glk_put_string_stream(strid_t str, char *s)
         g_return_if_fail(str != NULL);
         g_return_if_fail(str->file_mode != filemode_Read);
  
-       write_buffer_to_stream(str, (gchar *)s, strlen(s));
+       write_buffer_to_stream(str, s, strlen(s));
+}
+
+/**
+ * glk_put_string_stream_uni:
+ * @str: An output stream.
+ * @s: A null-terminated array of Unicode code points.
+ *
+ * Prints @s to the stream @str. It is illegal for @str to be %NULL, or an
+ * input-only stream.
+ */
+void
+glk_put_string_stream_uni(strid_t str, glui32 *s)
+{
+       g_return_if_fail(str != NULL);
+       g_return_if_fail(str->file_mode != filemode_Read);
+       
+       /* An impromptu strlen() for glui32 arrays */
+       glong len = 0;
+       glui32 *ptr = s;
+       while(*ptr++)
+               len++;
+       write_buffer_to_stream_uni(str, s, len);
  }
  
  /**
@@ -261,7 +362,25 @@ glk_put_buffer_stream(strid_t str, char *buf, glui32 len)
         g_return_if_fail(str != NULL);
         g_return_if_fail(str->file_mode != filemode_Read);
         
-       write_buffer_to_stream(str, (gchar *)buf, len);
+       write_buffer_to_stream(str, buf, len);
+}
+
+/**
+ * glk_put_buffer_stream_uni:
+ * @str: An output stream.
+ * @buf: An array of Unicode code points.
+ * @len: Length of @buf.
+ *
+ * Prints @buf to the stream @str. It is illegal for @str to be %NULL, or an
+ * input-only stream.
+ */
+void
+glk_put_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
+{
+       g_return_if_fail(str != NULL);
+       g_return_if_fail(str->file_mode != filemode_Read);
+       
+       write_buffer_to_stream_uni(str, buf, len);
  }
  
  /*
@@ -328,31 +447,12 @@ is_unicode_newline(glsi32 ch, FILE *fp, gboolean utf8)
         return FALSE;
  }
  
-/**
- * glk_get_char_stream:
- * @str: An input stream.
- *
- * Reads one character from the stream @str. (There is no notion of a ``current
- * input stream.'') It is illegal for @str to be %NULL, or an output-only
- * stream.
- *
- * The result will be between 0 and 255. As with all basic text functions, Glk
- * assumes the Latin-1 encoding. If the end of the stream has been reached, the
- * result will be -1. Note that high-bit characters (128..255) are
- * <emphasis>not</emphasis> returned as negative numbers.
- *
- * If the stream contains Unicode data --- for example, if it was created with
- * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then
- * characters beyond 255 will be returned as 0x3F ("?").
- *
- * Returns: A character value between 0 and 255, or -1 on end of stream.
- */
+/* Internal function: Read one character from a stream. Returns a value which
+ can be returned unchanged by glk_get_char_stream_uni(), but 
+ glk_get_char_stream() must replace high values by the placeholder character. */
  glsi32
-glk_get_char_stream(strid_t str)
+get_char_stream_common(strid_t str)
  {
-       g_return_val_if_fail(str != NULL, -1);
-       g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
-       
         switch(str->type)
         {
                 case STREAM_TYPE_MEMORY:
@@ -362,13 +462,13 @@ glk_get_char_stream(strid_t str)
                                         return -1;
                                 glui32 ch = str->ubuffer[str->mark++];
                                 str->read_count++;
-                               return (ch > 0xFF)? 0x3F : ch;
+                               return ch;
                         }
                         else
                         {
                                 if(!str->buffer || str->mark >= str->buflen)
                                         return -1;
-                               char ch = str->buffer[str->mark++];
+                               unsigned char ch = str->buffer[str->mark++];
                                 str->read_count++;
                                 return ch;
                         }
@@ -383,7 +483,7 @@ glk_get_char_stream(strid_t str)
                                         if(ch == -1)
                                                 return -1;
                                         str->read_count++;
-                                       return (ch > 0xFF)? 0x3F : ch;
+                                       return ch;
                                 }
                                 else /* Regular file */
                                 {
@@ -402,7 +502,7 @@ glk_get_char_stream(strid_t str)
                                         return -1;
                                         
                                 str->read_count++;
-                               return (ch > 0xFF)? 0x3F : ch;
+                               return ch;
                         }
                 default:
                         g_warning("%s: Reading from this kind of stream unsupported.", __func__);
@@ -410,6 +510,53 @@ glk_get_char_stream(strid_t str)
         }
  }
  
+/**
+ * glk_get_char_stream:
+ * @str: An input stream.
+ *
+ * Reads one character from the stream @str. (There is no notion of a ``current
+ * input stream.'') It is illegal for @str to be %NULL, or an output-only
+ * stream.
+ *
+ * The result will be between 0 and 255. As with all basic text functions, Glk
+ * assumes the Latin-1 encoding. If the end of the stream has been reached, the
+ * result will be -1. Note that high-bit characters (128..255) are
+ * <emphasis>not</emphasis> returned as negative numbers.
+ *
+ * If the stream contains Unicode data --- for example, if it was created with
+ * glk_stream_open_file_uni() or glk_stream_open_memory_uni() --- then
+ * characters beyond 255 will be returned as 0x3F ("?").
+ *
+ * Returns: A character value between 0 and 255, or -1 on end of stream.
+ */
+glsi32
+glk_get_char_stream(strid_t str)
+{
+       g_return_val_if_fail(str != NULL, -1);
+       g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
+       
+       glsi32 ch = get_char_stream_common(str);
+       return (ch > 0xFF)? PLACEHOLDER : ch;
+}
+
+/**
+ * glk_get_char_stream_uni:
+ * @str: An input stream.
+ *
+ * Reads one character from the stream @str. The result will be between 0 and 
+ * 0x7FFFFFFF. If the end of the stream has been reached, the result will be -1.
+ *
+ * Returns: A character value between 0 and 255, or -1 on end of stream.
+ */
+glsi32
+glk_get_char_stream_uni(strid_t str)
+{
+       g_return_val_if_fail(str != NULL, -1);
+       g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, -1);
+       
+       return get_char_stream_common(str);
+}
+
  /**
   * glk_get_buffer_stream:
   * @str: An input stream.
@@ -467,7 +614,6 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
                                                 g_warning("%s: Incomplete character in binary Unicode file.", __func__);
                                         }
                                         
-                                       str->read_count += count / 4;
                                         int foo;
                                         for(foo = 0; foo < count; foo += 4)
                                         {
@@ -478,6 +624,7 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
                                                 buf[foo / 4] = (ch > 255)? 0x3F : (char)ch;
                                         }
                                         g_free(readbuffer);
+                                       str->read_count += count / 4;
                                         return count / 4;
                                 }
                                 else /* Regular binary file */
@@ -507,6 +654,105 @@ glk_get_buffer_stream(strid_t str, char *buf, glui32 len)
         }
  }
  
+/**
+ * glk_get_buffer_stream_uni:
+ * @str: An input stream.
+ * @buf: A buffer with space for at least @len Unicode code points.
+ * @len: The number of characters to read.
+ *
+ * Reads @len Unicode characters from @str, unless the end of stream is reached 
+ * first. No terminal null is placed in the buffer.
+ *
+ * Returns: The number of Unicode characters actually read.
+ */
+glui32
+glk_get_buffer_stream_uni(strid_t str, glui32 *buf, glui32 len)
+{
+       g_return_val_if_fail(str != NULL, 0);
+       g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
+       g_return_val_if_fail(buf != NULL, 0);
+       
+       switch(str->type)
+       {
+               case STREAM_TYPE_MEMORY:
+               {
+                       int copycount = 0;
+                       if(str->unicode)
+                       {
+                               if(str->ubuffer) /* if not, copycount stays 0 */
+                                       copycount = MIN(len, str->buflen - str->mark);
+                               memmove(buf, str->ubuffer + str->mark, copycount * 4);
+                               str->mark += copycount;
+                       }
+                       else
+                       {
+                               while(copycount < len && str->buffer && str->mark < str->buflen)
+                               {
+                                       unsigned char ch = str->buffer[str->mark++];
+                                       buf[copycount++] = ch;
+                               }
+                       }
+
+                       str->read_count += copycount;           
+                       return copycount;
+               }       
+               case STREAM_TYPE_FILE:
+                       if(str->binary) 
+                       {
+                               if(str->unicode) /* Binary file with 4-byte characters */
+                               {
+                                       /* Read len characters of 4 bytes each */
+                                       unsigned char *readbuffer = g_new0(unsigned char, 4 * len);
+                                       size_t count = fread(readbuffer, sizeof(unsigned char), 4 * len, str->file_pointer);
+                                       /* If there was an incomplete character */
+                                       if(count % 4 != 0) 
+                                       {
+                                               count -= count % 4;
+                                               g_warning("%s: Incomplete character in binary Unicode file.", __func__);
+                                       }
+                                       
+                                       int foo;
+                                       for(foo = 0; foo < count; foo += 4)
+                                               buf[foo / 4] = readbuffer[foo] << 24
+                                                       | readbuffer[foo + 1] << 16
+                                                       | readbuffer[foo + 2] << 8
+                                                       | readbuffer[foo + 3];
+                                       g_free(readbuffer);
+                                       str->read_count += count / 4;
+                                       return count / 4;
+                               }
+                               else /* Regular binary file */
+                               {
+                                       unsigned char *readbuffer = g_new0(unsigned char, len);
+                                       size_t count = fread(readbuffer, sizeof(unsigned char), len, str->file_pointer);
+                                       int foo;
+                                       for(foo = 0; foo < count; foo++)
+                                               buf[foo] = readbuffer[foo];
+                                       g_free(readbuffer);
+                                       str->read_count += count;
+                                       return count;
+                               }
+                       }
+                       else /* Text mode is the same for Unicode and regular files */
+                       {
+                               /* Do it character-by-character */
+                               int foo;
+                               for(foo = 0; foo < len; foo++)
+                               {
+                                       glsi32 ch = read_utf8_char_from_file(str->file_pointer);
+                                       if(ch == -1)
+                                               break;
+                                       str->read_count++;
+                                       buf[foo] = ch;
+                               }
+                               return foo;
+                       }
+               default:
+                       g_warning("%s: Reading from this kind of stream unsupported.", __func__);
+                       return 0;
+       }
+}
+
  /**
   * glk_get_line_stream:
   * @str: An input stream.
@@ -638,6 +884,144 @@ glk_get_line_stream(strid_t str, char *buf, glui32 len)
         }
  }
  
+/**
+ * glk_get_line_stream_uni:
+ * @str: An input stream.
+ * @buf: A buffer with space for at least @len Unicode code points.
+ * @len: The number of characters to read, plus one.
+ *
+ * Reads Unicode characters from @str, until either @len - 1 Unicode characters
+ * have been read or a newline has been read. It then puts a terminal null (a
+ * zero value) on the end.
+ *
+ * Returns: The number of characters actually read, including the newline (if
+ * there is one) but not including the terminal null.
+ */
+glui32
+glk_get_line_stream_uni(strid_t str, glui32 *buf, glui32 len)
+{
+       g_return_val_if_fail(str != NULL, 0);
+       g_return_val_if_fail(str->file_mode == filemode_Read || str->file_mode == filemode_ReadWrite, 0);
+       g_return_val_if_fail(buf != NULL, 0);
+
+       switch(str->type)
+       {
+               case STREAM_TYPE_MEMORY:
+               {
+                       int copycount = 0;
+                       if(str->unicode)
+                       {
+                               /* Do it character-by-character */
+                               while(copycount < len - 1 && str->ubuffer && str->mark < str->buflen) 
+                               {
+                                       glui32 ch = str->ubuffer[str->mark++];
+                                       /* Check for Unicode newline; slightly different than
+                                       in file streams */
+                                       if(ch == 0x0A || ch == 0x85 || ch == 0x0C || ch == 0x2028 || ch == 0x2029)
+                                       {
+                                               buf[copycount++] = '\n';
+                                               break;
+                                       }
+                                       if(ch == 0x0D)
+                                       {
+                                               if(str->ubuffer[str->mark] == 0x0A)
+                                                       str->mark++; /* skip past next newline */
+                                               buf[copycount++] = '\n';
+                                               break;
+                                       }
+                                       buf[copycount++] = ch;
+                               }
+                               buf[copycount] = '\0';
+                       }
+                       else
+                       {
+                               /* No recourse to memccpy(), so do it character-by-character */
+                               while(copycount < len - 1 && str->buffer && str->mark < str->buflen)
+                               {
+                                       gchar ch = str->buffer[str->mark++];
+                                       /* Check for newline */
+                                       if(ch == '\n') /* Also check for \r and \r\n? */
+                                       {
+                                               buf[copycount++] = '\n';
+                                               break;
+                                       }
+                                       buf[copycount++] = (unsigned char)ch;
+                               }
+                               buf[copycount] = 0;
+                       }
+                       
+                       str->read_count += copycount;
+                       return copycount;
+               }       
+               case STREAM_TYPE_FILE:
+                       if(str->binary) 
+                       {
+                               if(str->unicode) /* Binary file with 4-byte characters */
+                               {
+                                       /* Do it character-by-character */
+                                       int foo;
+                                       for(foo = 0; foo < len - 1; foo++)
+                                       {
+                                               glsi32 ch = read_ucs4be_char_from_file(str->file_pointer);
+                                               if(ch == -1) 
+                                               {
+                                                       buf[foo] = 0;
+                                                       return foo - 1;
+                                               }
+                                               str->read_count++;
+                                               if(is_unicode_newline(ch, str->file_pointer, FALSE))
+                                               {
+                                                       buf[foo] = ch; /* Preserve newline types??? */
+                                                       buf[foo + 1] = 0;
+                                                       return foo;
+                                               }
+                                               buf[foo] = ch;
+                                       }
+                                       buf[len] = 0;
+                                       return foo;
+                               }
+                               else /* Regular binary file */
+                               {
+                                       gchar *readbuffer = g_new0(gchar, len);
+                                       fgets(readbuffer, len, str->file_pointer);
+                                       glui32 count = strlen(readbuffer) + 1; /* Copy terminator */
+                                       int foo;
+                                       for(foo = 0; foo < count; foo++)
+                                               buf[foo] = (unsigned char)(readbuffer[foo]);
+                                       str->read_count += count;
+                                       return count;
+                               }
+                       }
+                       else /* Text mode is the same for Unicode and regular files */
+                       {
+                               /* Do it character-by-character */
+                               int foo;
+                               for(foo = 0; foo < len - 1; foo++)
+                               {
+                                       glsi32 ch = read_utf8_char_from_file(str->file_pointer);
+                                       if(ch == -1)
+                                       {
+                                               buf[foo] = 0;
+                                               return foo - 1;
+                                       }
+                                       str->read_count++;
+                                       if(is_unicode_newline(ch, str->file_pointer, TRUE))
+                                       {
+                                               buf[foo] = ch; /* Preserve newline types??? */
+                                               buf[foo + 1] = 0;
+                                               return foo;
+                                       }
+                                       buf[foo] = ch;
+                               }
+                               buf[len] = 0;
+                               return foo;
+                       }
+               default:
+                       g_warning("%s: Reading from this kind of stream unsupported.", __func__);
+                       return 0;
+       }
+}
+
  /*
   *
   **************** SEEKING FUNCTIONS ********************************************
author	Philip Chimento <philip.chimento@gmail.com>
	Tue, 3 Mar 2009 23:21:53 +0000 (23:21 +0000)
committer	Philip Chimento <philip.chimento@gmail.com>
	Tue, 3 Mar 2009 23:21:53 +0000 (23:21 +0000)
src/Makefile.am		patch \| blob \| history
src/charset.c	[new file with mode: 0644]	patch \| blob
src/charset.h	[new file with mode: 0644]	patch \| blob
src/gestalt.c		patch \| blob \| history
src/input.c		patch \| blob \| history
src/main.c		patch \| blob \| history
src/model.c		patch \| blob \| history
src/stream.c		patch \| blob \| history
src/strio.c		patch \| blob \| history