X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;f=libchimara%2Fcase.c;h=3176a9a1adf2d60dd8aa7a204175db92772d1bce;hb=HEAD;hp=1b6eed72ba68fddcc6d7c2aa279b048827e34bad;hpb=f19a194bd066ea4320c1c6b6b8c5a375f7787af5;p=projects%2Fchimara%2Fchimara.git diff --git a/libchimara/case.c b/libchimara/case.c index 1b6eed7..3176a9a 100644 --- a/libchimara/case.c +++ b/libchimara/case.c @@ -1,5 +1,7 @@ -#include +#include +#include #include "glk.h" +#include "charset.h" /** * glk_char_to_lower: @@ -93,15 +95,25 @@ glk_buffer_to_lower_case_uni(glui32 *buf, glui32 len, glui32 numchars) { g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0); g_return_val_if_fail(numchars <= len, 0); + + long outchars; + + /* Lowercase the string */ + char *utf8 = convert_ucs4_to_utf8(buf, numchars); + if(!utf8) + return numchars; + char *lowered = g_utf8_strdown(utf8, -1); + g_free(utf8); + gunichar *outbuf = convert_utf8_to_ucs4(lowered, &outchars); + g_free(lowered); + if(!outbuf) + return numchars; - /* GLib has a function that converts _one_ UCS-4 character to _one_ - lowercase UCS-4 character; so apparently we don't have to worry about the - string length changing... */ - glui32 *ptr; - for(ptr = buf; ptr < buf + numchars; ptr++) - *ptr = g_unichar_tolower(*ptr); + /* Copy the output buffer to the original buffer */ + memcpy(buf, outbuf, MIN(outchars, len) * 4); + g_free(outbuf); - return numchars; + return outchars; } /** @@ -121,14 +133,24 @@ glk_buffer_to_upper_case_uni(glui32 *buf, glui32 len, glui32 numchars) g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0); g_return_val_if_fail(numchars <= len, 0); - /* GLib has a function that converts _one_ UCS-4 character to _one_ - uppercase UCS-4 character; so apparently we don't have to worry about the - string length changing... */ - glui32 *ptr; - for(ptr = buf; ptr < buf + numchars; ptr++) - *ptr = g_unichar_toupper(*ptr); + long outchars; - return numchars; + /* Uppercase the string */ + char *utf8 = convert_ucs4_to_utf8(buf, numchars); + if(!utf8) + return numchars; + char *uppered = g_utf8_strup(utf8, -1); + g_free(utf8); + gunichar *outbuf = convert_utf8_to_ucs4(uppered, &outchars); + g_free(uppered); + if(!outbuf) + return numchars; + + /* Copy the output buffer to the original buffer */ + memcpy(buf, outbuf, MIN(outchars, len) * 4); + g_free(outbuf); + + return outchars; } /** @@ -160,9 +182,14 @@ glk_buffer_to_title_case_uni(glui32 *buf, glui32 len, glui32 numchars, glui32 lo g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0); g_return_val_if_fail(numchars <= len, 0); - /* GLib has a function that converts _one_ UCS-4 character to _one_ - titlecase UCS-4 character; so apparently we don't have to worry about the - string length changing... */ + /* FIXME: This is wrong. g_unichar_totitle() which returns the titlecase of + one Unicode code point, but that only returns the correct result if the + titlecase character is also one code point. + For example, the one-character 'ffi' ligature should be title-cased to the + three-character string 'Ffi'. This code leaves it as the 'ffi' ligature, + which is incorrect. + However, nothing much can be done about it unless GLib gets a + g_utf8_strtitle() function, or we roll our own. */ *buf = g_unichar_totitle(*buf); /* Call lowercase on the rest of the string */ if(lowerrest) @@ -193,8 +220,24 @@ glk_buffer_canon_decompose_uni(glui32 *buf, glui32 len, glui32 numchars) g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0); g_return_val_if_fail(numchars <= len, 0); - /* TODO: Implement this */ - return numchars; + long outchars; + + /* Normalize the string */ + char *utf8 = convert_ucs4_to_utf8(buf, numchars); + if(!utf8) + return numchars; + char *decomposed = g_utf8_normalize(utf8, -1, G_NORMALIZE_NFD); + g_free(utf8); + gunichar *outbuf = convert_utf8_to_ucs4(decomposed, &outchars); + g_free(decomposed); + if(!outbuf) + return numchars; + + /* Copy the output buffer to the original buffer */ + memcpy(buf, outbuf, MIN(outchars, len) * 4); + g_free(outbuf); + + return outchars; } /** @@ -255,6 +298,22 @@ glk_buffer_canon_normalize_uni(glui32 *buf, glui32 len, glui32 numchars) g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0); g_return_val_if_fail(numchars <= len, 0); - /* TODO: Implement this */ - return numchars; + long outchars; + + /* Normalize the string */ + char *utf8 = convert_ucs4_to_utf8(buf, numchars); + if(!utf8) + return numchars; + char *decomposed = g_utf8_normalize(utf8, -1, G_NORMALIZE_NFC); + g_free(utf8); + gunichar *outbuf = convert_utf8_to_ucs4(decomposed, &outchars); + g_free(decomposed); + if(!outbuf) + return numchars; + + /* Copy the output buffer to the original buffer */ + memcpy(buf, outbuf, MIN(outchars, len) * 4); + g_free(outbuf); + + return outchars; }