From 10ad9d86414a4ade89743c1b4b706be914ba00e6 Mon Sep 17 00:00:00 2001
From: "P. F. Chimento" <philip.chimento@gmail.com>
Date: Sun, 8 May 2011 00:18:58 +0200
Subject: [PATCH] Fix (most) UniCaseTest failures (fix #27)

Upper- or lowercasing Unicode strings is not possible by mapping one
character to one character in each case, in contradiction of what the
GLib functions g_unichar_tolower() and g_unichar_toupper() would seem
to suggest. The casing operations now operate on the whole string.

Unfortunately, this introduces a new bug, #28, since there is no
title-casing function in GLib that allows for the title-case of a
character to be longer than one character.
---
 libchimara/case.c | 59 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/libchimara/case.c b/libchimara/case.c
index 5b1a5d7..3176a9a 100644
--- a/libchimara/case.c
+++ b/libchimara/case.c
@@ -95,15 +95,25 @@ glk_buffer_to_lower_case_uni(glui32 *buf, glui32 len, glui32 numchars)
 {
 	g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
 	g_return_val_if_fail(numchars <= len, 0);
+
+	long outchars;
+
+	/* Lowercase the string */
+	char *utf8 = convert_ucs4_to_utf8(buf, numchars);
+	if(!utf8)
+		return numchars;
+	char *lowered = g_utf8_strdown(utf8, -1);
+	g_free(utf8);
+	gunichar *outbuf = convert_utf8_to_ucs4(lowered, &outchars);
+	g_free(lowered);
+	if(!outbuf)
+		return numchars;
 	
-	/* GLib has a function that converts _one_ UCS-4 character to _one_
-	lowercase UCS-4 character; so apparently we don't have to worry about the
-	string length changing... */
-	glui32 *ptr;
-	for(ptr = buf; ptr < buf + numchars; ptr++)
-		*ptr = g_unichar_tolower(*ptr);
+	/* Copy the output buffer to the original buffer */
+	memcpy(buf, outbuf, MIN(outchars, len) * 4);
+	g_free(outbuf);
 	
-	return numchars;
+	return outchars;
 }
 
 /**
@@ -123,14 +133,24 @@ glk_buffer_to_upper_case_uni(glui32 *buf, glui32 len, glui32 numchars)
 	g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
 	g_return_val_if_fail(numchars <= len, 0);
 	
-	/* GLib has a function that converts _one_ UCS-4 character to _one_
-	uppercase UCS-4 character; so apparently we don't have to worry about the
-	string length changing... */
-	glui32 *ptr;
-	for(ptr = buf; ptr < buf + numchars; ptr++)
-		*ptr = g_unichar_toupper(*ptr);
+	long outchars;
 	
-	return numchars;
+	/* Uppercase the string */
+	char *utf8 = convert_ucs4_to_utf8(buf, numchars);
+	if(!utf8)
+		return numchars;
+	char *uppered = g_utf8_strup(utf8, -1);
+	g_free(utf8);
+	gunichar *outbuf = convert_utf8_to_ucs4(uppered, &outchars);
+	g_free(uppered);
+	if(!outbuf)
+		return numchars;
+
+	/* Copy the output buffer to the original buffer */
+	memcpy(buf, outbuf, MIN(outchars, len) * 4);
+	g_free(outbuf);
+
+	return outchars;
 }
 
 /**
@@ -162,9 +182,14 @@ glk_buffer_to_title_case_uni(glui32 *buf, glui32 len, glui32 numchars, glui32 lo
 	g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
 	g_return_val_if_fail(numchars <= len, 0);
 	
-	/* GLib has a function that converts _one_ UCS-4 character to _one_
-	titlecase UCS-4 character; so apparently we don't have to worry about the
-	string length changing... */
+	/* FIXME: This is wrong. g_unichar_totitle() which returns the titlecase of
+	 one Unicode code point, but that only returns the correct result if the
+	 titlecase character is also one code point.
+	 For example, the one-character 'ffi' ligature should be title-cased to the
+	 three-character string 'Ffi'. This code leaves it as the 'ffi' ligature,
+	 which is incorrect.
+	 However, nothing much can be done about it unless GLib gets a
+	 g_utf8_strtitle() function, or we roll our own. */
 	*buf = g_unichar_totitle(*buf);
 	/* Call lowercase on the rest of the string */
 	if(lowerrest)
-- 
2.30.2