X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;f=src%2Fcase.c;h=47086e15ba9e92d19a386ef575f74ef26dd97bd9;hb=062bbb9e7322c8e3f5162f86fc2f93bfc587b830;hp=9e73de10240413aa315dae16a94e27c8887ab89c;hpb=db0f0fc8ee25926da8d34b826a3fc80d6186adbb;p=rodin%2Fchimara.git diff --git a/src/case.c b/src/case.c index 9e73de1..47086e1 100644 --- a/src/case.c +++ b/src/case.c @@ -5,8 +5,21 @@ * glk_char_to_lower: * @ch: A Latin-1 character. * - * If @ch is an uppercase character in the Latin-1 character set, converts it - * to lowercase. Otherwise, leaves it unchanged. + * You can convert Latin-1 characters between upper and lower case with two Glk + * utility functions, glk_char_to_lower() and glk_char_to_upper(). These have a + * few advantages over the standard ANSI tolower() and + * toupper() macros. They work for the entire Latin-1 + * character set, including accented letters; they behave consistently on all + * platforms, since they're part of the Glk library; and they are safe for all + * characters. That is, if you call glk_char_to_lower() on a lower-case + * character, or a character which is not a letter, you'll get the argument + * back unchanged. + * + * The case-sensitive characters in Latin-1 are the ranges 0x41..0x5A, + * 0xC0..0xD6, 0xD8..0xDE (upper case) and the ranges 0x61..0x7A, 0xE0..0xF6, + * 0xF8..0xFE (lower case). These are arranged in parallel; so + * glk_char_to_lower() will add 0x20 to values in the upper-case ranges, and + * glk_char_to_upper() will subtract 0x20 from values in the lower-case ranges. * * Returns: A lowercase or non-letter Latin-1 character. */ @@ -23,7 +36,7 @@ glk_char_to_lower(unsigned char ch) * @ch: A Latin-1 character. * * If @ch is a lowercase character in the Latin-1 character set, converts it to - * uppercase. Otherwise, leaves it unchanged. + * uppercase. Otherwise, leaves it unchanged. See glk_char_to_lower(). * * Returns: An uppercase or non-letter Latin-1 character. */ @@ -41,18 +54,39 @@ glk_char_to_upper(unsigned char ch) * @len: Available length of @buf. * @numchars: Number of characters in @buf. * - * Converts the first @numchars characters of @buf to their lowercase - * equivalents, if there is such a thing. These functions provide two length - * arguments because a string of Unicode characters may expand when its case - * changes. The @len argument is the available length of the buffer; @numchars - * is the number of characters in the buffer initially. (So @numchars must be - * less than or equal to @len. The contents of the buffer after @numchars do - * not affect the operation.) + * Unicode character conversion is trickier, and must be applied to character + * arrays, not single characters. These functions + * (glk_buffer_to_lower_case_uni(), glk_buffer_to_upper_case_uni(), and + * glk_buffer_to_title_case_uni()) provide two length arguments because a + * string of Unicode characters may expand when its case changes. The @len + * argument is the available length of the buffer; @numchars is the number of + * characters in the buffer initially. (So @numchars must be less than or equal + * to @len. The contents of the buffer after @numchars do not affect the + * operation.) * - * Returns: The number of characters after conversion. If this is greater than - * @len, the characters in the array will be safely truncated at len, but the - * true count will be returned. (The contents of the buffer after the returned - * count are undefined.) + * The functions return the number of characters after conversion. If this is + * greater than @len, the characters in the array will be safely truncated at + * @len, but the true count will be returned. (The contents of the buffer after + * the returned count are undefined.) + * + * The lower_case and upper_case functions do what + * you'd expect: they convert every character in the buffer (the first @numchars + * of them) to its upper or lower-case equivalent, if there is such a thing. + * + * See the Unicode spec (chapter 3.13, chapter 4.2, etc) for the exact + * definitions of upper, lower, and title-case mapping. + * + * + * Unicode has some strange case cases. For example, a combined character + * that looks like ss might properly be upper-cased into + * two characters S. Title-casing is even + * stranger; ss (at the beginning of a word) might be + * title-cased into a different combined character that looks like + * Ss. The glk_buffer_to_title_case_uni() function is actually + * title-casing the first character of the buffer. If it makes a difference. + * + * + * Returns: The number of characters after conversion. */ glui32 glk_buffer_to_lower_case_uni(glui32 *buf, glui32 len, glui32 numchars) @@ -105,10 +139,19 @@ glk_buffer_to_upper_case_uni(glui32 *buf, glui32 len, glui32 numchars) * @lowerrest: %TRUE if the rest of @buf should be lowercased, %FALSE * otherwise. * - * Converts the first character of @buf to uppercase, if there is such a thing. - * See glk_buffer_to_lower_case_uni(). If @lowerrest is %TRUE, then the - * remainder of @buf is lowercased. + * See glk_buffer_to_lower_case_uni(). The title_case function has + * an additional (boolean) flag. Its basic function is to change the first + * character of the buffer to upper-case, and leave the rest of the buffer + * unchanged. If @lowerrest is true, it changes all the non-first characters to + * lower-case (instead of leaving them alone.) * + * + * Earlier drafts of this spec had a separate function which title-cased the + * first character of every word in the buffer. I took + * this out after reading Unicode Standard Annex #29, which explains how + * to divide a string into words. If you want it, feel free to implement it. + * + * * Returns: The number of characters after conversion. */ glui32