X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;f=libchimara%2Fcase.c;h=3176a9a1adf2d60dd8aa7a204175db92772d1bce;hb=HEAD;hp=1b6eed72ba68fddcc6d7c2aa279b048827e34bad;hpb=f19a194bd066ea4320c1c6b6b8c5a375f7787af5;p=projects%2Fchimara%2Fchimara.git

diff --git a/libchimara/case.c b/libchimara/case.c
index 1b6eed7..3176a9a 100644
--- a/libchimara/case.c
+++ b/libchimara/case.c
@@ -1,5 +1,7 @@
-#include <gtk/gtk.h>
+#include <string.h>
+#include <glib.h>
 #include "glk.h"
+#include "charset.h"
 
 /**
  * glk_char_to_lower:
@@ -93,15 +95,25 @@ glk_buffer_to_lower_case_uni(glui32 *buf, glui32 len, glui32 numchars)
 {
 	g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
 	g_return_val_if_fail(numchars <= len, 0);
+
+	long outchars;
+
+	/* Lowercase the string */
+	char *utf8 = convert_ucs4_to_utf8(buf, numchars);
+	if(!utf8)
+		return numchars;
+	char *lowered = g_utf8_strdown(utf8, -1);
+	g_free(utf8);
+	gunichar *outbuf = convert_utf8_to_ucs4(lowered, &outchars);
+	g_free(lowered);
+	if(!outbuf)
+		return numchars;
 	
-	/* GLib has a function that converts _one_ UCS-4 character to _one_
-	lowercase UCS-4 character; so apparently we don't have to worry about the
-	string length changing... */
-	glui32 *ptr;
-	for(ptr = buf; ptr < buf + numchars; ptr++)
-		*ptr = g_unichar_tolower(*ptr);
+	/* Copy the output buffer to the original buffer */
+	memcpy(buf, outbuf, MIN(outchars, len) * 4);
+	g_free(outbuf);
 	
-	return numchars;
+	return outchars;
 }
 
 /**
@@ -121,14 +133,24 @@ glk_buffer_to_upper_case_uni(glui32 *buf, glui32 len, glui32 numchars)
 	g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
 	g_return_val_if_fail(numchars <= len, 0);
 	
-	/* GLib has a function that converts _one_ UCS-4 character to _one_
-	uppercase UCS-4 character; so apparently we don't have to worry about the
-	string length changing... */
-	glui32 *ptr;
-	for(ptr = buf; ptr < buf + numchars; ptr++)
-		*ptr = g_unichar_toupper(*ptr);
+	long outchars;
 	
-	return numchars;
+	/* Uppercase the string */
+	char *utf8 = convert_ucs4_to_utf8(buf, numchars);
+	if(!utf8)
+		return numchars;
+	char *uppered = g_utf8_strup(utf8, -1);
+	g_free(utf8);
+	gunichar *outbuf = convert_utf8_to_ucs4(uppered, &outchars);
+	g_free(uppered);
+	if(!outbuf)
+		return numchars;
+
+	/* Copy the output buffer to the original buffer */
+	memcpy(buf, outbuf, MIN(outchars, len) * 4);
+	g_free(outbuf);
+
+	return outchars;
 }
 
 /**
@@ -160,9 +182,14 @@ glk_buffer_to_title_case_uni(glui32 *buf, glui32 len, glui32 numchars, glui32 lo
 	g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
 	g_return_val_if_fail(numchars <= len, 0);
 	
-	/* GLib has a function that converts _one_ UCS-4 character to _one_
-	titlecase UCS-4 character; so apparently we don't have to worry about the
-	string length changing... */
+	/* FIXME: This is wrong. g_unichar_totitle() which returns the titlecase of
+	 one Unicode code point, but that only returns the correct result if the
+	 titlecase character is also one code point.
+	 For example, the one-character 'ffi' ligature should be title-cased to the
+	 three-character string 'Ffi'. This code leaves it as the 'ffi' ligature,
+	 which is incorrect.
+	 However, nothing much can be done about it unless GLib gets a
+	 g_utf8_strtitle() function, or we roll our own. */
 	*buf = g_unichar_totitle(*buf);
 	/* Call lowercase on the rest of the string */
 	if(lowerrest)
@@ -193,8 +220,24 @@ glk_buffer_canon_decompose_uni(glui32 *buf, glui32 len, glui32 numchars)
 	g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
 	g_return_val_if_fail(numchars <= len, 0);
 
-	/* TODO: Implement this */
-	return numchars;
+	long outchars;
+
+	/* Normalize the string */
+	char *utf8 = convert_ucs4_to_utf8(buf, numchars);
+	if(!utf8)
+		return numchars;
+	char *decomposed = g_utf8_normalize(utf8, -1, G_NORMALIZE_NFD);
+	g_free(utf8);
+	gunichar *outbuf = convert_utf8_to_ucs4(decomposed, &outchars);
+	g_free(decomposed);
+	if(!outbuf)
+		return numchars;
+
+	/* Copy the output buffer to the original buffer */
+	memcpy(buf, outbuf, MIN(outchars, len) * 4);
+	g_free(outbuf);
+
+	return outchars;
 }
 
 /**
@@ -255,6 +298,22 @@ glk_buffer_canon_normalize_uni(glui32 *buf, glui32 len, glui32 numchars)
 	g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
 	g_return_val_if_fail(numchars <= len, 0);
 
-	/* TODO: Implement this */
-	return numchars;
+	long outchars;
+
+	/* Normalize the string */
+	char *utf8 = convert_ucs4_to_utf8(buf, numchars);
+	if(!utf8)
+		return numchars;
+	char *decomposed = g_utf8_normalize(utf8, -1, G_NORMALIZE_NFC);
+	g_free(utf8);
+	gunichar *outbuf = convert_utf8_to_ucs4(decomposed, &outchars);
+	g_free(decomposed);
+	if(!outbuf)
+		return numchars;
+
+	/* Copy the output buffer to the original buffer */
+	memcpy(buf, outbuf, MIN(outchars, len) * 4);
+	g_free(outbuf);
+
+	return outchars;
 }