X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;f=src%2Fcase.c;h=47086e15ba9e92d19a386ef575f74ef26dd97bd9;hb=0da322b6699ac19cef78a8d6fd146b3720894a8a;hp=ecb7862bd7291e3866edee3aea5eff982c26d7ae;hpb=ff04f3593c5689d7bd09081f7c05fd7b3277a869;p=rodin%2Fchimara.git
diff --git a/src/case.c b/src/case.c
index ecb7862..47086e1 100644
--- a/src/case.c
+++ b/src/case.c
@@ -5,8 +5,21 @@
* glk_char_to_lower:
* @ch: A Latin-1 character.
*
- * If @ch is an uppercase character in the Latin-1 character set, converts it
- * to lowercase. Otherwise, leaves it unchanged.
+ * You can convert Latin-1 characters between upper and lower case with two Glk
+ * utility functions, glk_char_to_lower() and glk_char_to_upper(). These have a
+ * few advantages over the standard ANSI tolower() and
+ * toupper() macros. They work for the entire Latin-1
+ * character set, including accented letters; they behave consistently on all
+ * platforms, since they're part of the Glk library; and they are safe for all
+ * characters. That is, if you call glk_char_to_lower() on a lower-case
+ * character, or a character which is not a letter, you'll get the argument
+ * back unchanged.
+ *
+ * The case-sensitive characters in Latin-1 are the ranges 0x41..0x5A,
+ * 0xC0..0xD6, 0xD8..0xDE (upper case) and the ranges 0x61..0x7A, 0xE0..0xF6,
+ * 0xF8..0xFE (lower case). These are arranged in parallel; so
+ * glk_char_to_lower() will add 0x20 to values in the upper-case ranges, and
+ * glk_char_to_upper() will subtract 0x20 from values in the lower-case ranges.
*
* Returns: A lowercase or non-letter Latin-1 character.
*/
@@ -23,7 +36,7 @@ glk_char_to_lower(unsigned char ch)
* @ch: A Latin-1 character.
*
* If @ch is a lowercase character in the Latin-1 character set, converts it to
- * uppercase. Otherwise, leaves it unchanged.
+ * uppercase. Otherwise, leaves it unchanged. See glk_char_to_lower().
*
* Returns: An uppercase or non-letter Latin-1 character.
*/
@@ -35,26 +48,45 @@ glk_char_to_upper(unsigned char ch)
return ch;
}
-#ifdef GLK_MODULE_UNICODE
-
/**
* glk_buffer_to_lower_case_uni:
* @buf: A character array in UCS-4.
* @len: Available length of @buf.
* @numchars: Number of characters in @buf.
*
- * Converts the first @numchars characters of @buf to their lowercase
- * equivalents, if there is such a thing. These functions provide two length
- * arguments because a string of Unicode characters may expand when its case
- * changes. The @len argument is the available length of the buffer; @numchars
- * is the number of characters in the buffer initially. (So @numchars must be
- * less than or equal to @len. The contents of the buffer after @numchars do
- * not affect the operation.)
+ * Unicode character conversion is trickier, and must be applied to character
+ * arrays, not single characters. These functions
+ * (glk_buffer_to_lower_case_uni(), glk_buffer_to_upper_case_uni(), and
+ * glk_buffer_to_title_case_uni()) provide two length arguments because a
+ * string of Unicode characters may expand when its case changes. The @len
+ * argument is the available length of the buffer; @numchars is the number of
+ * characters in the buffer initially. (So @numchars must be less than or equal
+ * to @len. The contents of the buffer after @numchars do not affect the
+ * operation.)
+ *
+ * The functions return the number of characters after conversion. If this is
+ * greater than @len, the characters in the array will be safely truncated at
+ * @len, but the true count will be returned. (The contents of the buffer after
+ * the returned count are undefined.)
+ *
+ * The lower_case
and upper_case
functions do what
+ * you'd expect: they convert every character in the buffer (the first @numchars
+ * of them) to its upper or lower-case equivalent, if there is such a thing.
+ *
+ * See the Unicode spec (chapter 3.13, chapter 4.2, etc) for the exact
+ * definitions of upper, lower, and title-case mapping.
+ *
+ *
+ * Unicode has some strange case cases. For example, a combined character
+ * that looks like ss
might properly be upper-cased into
+ * two characters S
. Title-casing is even
+ * stranger; ss
(at the beginning of a word) might be
+ * title-cased into a different combined character that looks like
+ * Ss
. The glk_buffer_to_title_case_uni() function is actually
+ * title-casing the first character of the buffer. If it makes a difference.
+ *
*
- * Returns: The number of characters after conversion. If this is greater than
- * @len, the characters in the array will be safely truncated at len, but the
- * true count will be returned. (The contents of the buffer after the returned
- * count are undefined.)
+ * Returns: The number of characters after conversion.
*/
glui32
glk_buffer_to_lower_case_uni(glui32 *buf, glui32 len, glui32 numchars)
@@ -104,30 +136,37 @@ glk_buffer_to_upper_case_uni(glui32 *buf, glui32 len, glui32 numchars)
* @buf: A character array in UCS-4.
* @len: Available length of @buf.
* @numchars: Number of characters in @buf.
- * @lowerrest: #TRUE if the rest of @buf should be lowercased, #FALSE
+ * @lowerrest: %TRUE if the rest of @buf should be lowercased, %FALSE
* otherwise.
*
- * Converts the first character of @buf to uppercase, if there is such a thing.
- * See glk_buffer_to_lower_case_uni(). If @lowerrest is #TRUE, then the
- * remainder of @buf is lowercased.
+ * See glk_buffer_to_lower_case_uni(). The title_case
function has
+ * an additional (boolean) flag. Its basic function is to change the first
+ * character of the buffer to upper-case, and leave the rest of the buffer
+ * unchanged. If @lowerrest is true, it changes all the non-first characters to
+ * lower-case (instead of leaving them alone.)
*
+ *
+ * Earlier drafts of this spec had a separate function which title-cased the
+ * first character of every word in the buffer. I took
+ * this out after reading Unicode Standard Annex #29, which explains how
+ * to divide a string into words. If you want it, feel free to implement it.
+ *
+ *
* Returns: The number of characters after conversion.
*/
glui32
-glk_buffer_to_title_case_uni(glui32 *buf, glui32 len, glui32 numchars,
- glui32 lowerrest)
+glk_buffer_to_title_case_uni(glui32 *buf, glui32 len, glui32 numchars, glui32 lowerrest)
{
g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
g_return_val_if_fail(numchars <= len, 0);
/* GLib has a function that converts _one_ UCS-4 character to _one_
- uppercase UCS-4 character; so apparently we don't have to worry about the
+ titlecase UCS-4 character; so apparently we don't have to worry about the
string length changing... */
*buf = g_unichar_totitle(*buf);
/* Call lowercase on the rest of the string */
if(lowerrest)
- return glk_buffer_to_lower_case_uni(buf + 1, len - 1, numchars - 1) +1;
+ return glk_buffer_to_lower_case_uni(buf + 1, len - 1, numchars - 1) + 1;
return numchars;
}
-#endif /* GLK_MODULE_UNICODE */