6 * @ch: A Latin-1 character.
8 * You can convert Latin-1 characters between upper and lower case with two Glk
9 * utility functions, glk_char_to_lower() and glk_char_to_upper(). These have a
10 * few advantages over the standard ANSI <function>tolower()</function> and
11 * <function>toupper()</function> macros. They work for the entire Latin-1
12 * character set, including accented letters; they behave consistently on all
13 * platforms, since they're part of the Glk library; and they are safe for all
14 * characters. That is, if you call glk_char_to_lower() on a lower-case
15 * character, or a character which is not a letter, you'll get the argument
18 * The case-sensitive characters in Latin-1 are the ranges 0x41..0x5A,
19 * 0xC0..0xD6, 0xD8..0xDE (upper case) and the ranges 0x61..0x7A, 0xE0..0xF6,
20 * 0xF8..0xFE (lower case). These are arranged in parallel; so
21 * glk_char_to_lower() will add 0x20 to values in the upper-case ranges, and
22 * glk_char_to_upper() will subtract 0x20 from values in the lower-case ranges.
24 * Returns: A lowercase or non-letter Latin-1 character.
27 glk_char_to_lower(unsigned char ch)
29 if( (ch >= 0x41 && ch <= 0x5A) || (ch >= 0xC0 && ch <= 0xD6) || (ch >= 0xD8 && ch <= 0xDE) )
36 * @ch: A Latin-1 character.
38 * If @ch is a lowercase character in the Latin-1 character set, converts it to
39 * uppercase. Otherwise, leaves it unchanged. See glk_char_to_lower().
41 * Returns: An uppercase or non-letter Latin-1 character.
44 glk_char_to_upper(unsigned char ch)
46 if( (ch >= 0x61 && ch <= 0x7A) || (ch >= 0xE0 && ch <= 0xF6) || (ch >= 0xF8 && ch <= 0xFE) )
52 * glk_buffer_to_lower_case_uni:
53 * @buf: A character array in UCS-4.
54 * @len: Available length of @buf.
55 * @numchars: Number of characters in @buf.
57 * Unicode character conversion is trickier, and must be applied to character
58 * arrays, not single characters. These functions
59 * (glk_buffer_to_lower_case_uni(), glk_buffer_to_upper_case_uni(), and
60 * glk_buffer_to_title_case_uni()) provide two length arguments because a
61 * string of Unicode characters may expand when its case changes. The @len
62 * argument is the available length of the buffer; @numchars is the number of
63 * characters in the buffer initially. (So @numchars must be less than or equal
64 * to @len. The contents of the buffer after @numchars do not affect the
67 * The functions return the number of characters after conversion. If this is
68 * greater than @len, the characters in the array will be safely truncated at
69 * @len, but the true count will be returned. (The contents of the buffer after
70 * the returned count are undefined.)
72 * The <code>lower_case</code> and <code>upper_case</code> functions do what
73 * you'd expect: they convert every character in the buffer (the first @numchars
74 * of them) to its upper or lower-case equivalent, if there is such a thing.
76 * See the Unicode spec (chapter 3.13, chapter 4.2, etc) for the exact
77 * definitions of upper, lower, and title-case mapping.
80 * Unicode has some strange case cases. For example, a combined character
81 * that looks like <quote>ss</quote> might properly be upper-cased into
82 * <emphasis>two</emphasis> characters <quote>S</quote>. Title-casing is even
83 * stranger; <quote>ss</quote> (at the beginning of a word) might be
84 * title-cased into a different combined character that looks like
85 * <quote>Ss</quote>. The glk_buffer_to_title_case_uni() function is actually
86 * title-casing the first character of the buffer. If it makes a difference.
89 * Returns: The number of characters after conversion.
92 glk_buffer_to_lower_case_uni(glui32 *buf, glui32 len, glui32 numchars)
94 g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
95 g_return_val_if_fail(numchars <= len, 0);
97 /* GLib has a function that converts _one_ UCS-4 character to _one_
98 lowercase UCS-4 character; so apparently we don't have to worry about the
99 string length changing... */
101 for(ptr = buf; ptr < buf + numchars; ptr++)
102 *ptr = g_unichar_tolower(*ptr);
108 * glk_buffer_to_upper_case_uni:
109 * @buf: A character array in UCS-4.
110 * @len: Available length of @buf.
111 * @numchars: Number of characters in @buf.
113 * Converts the first @numchars characters of @buf to their uppercase
114 * equivalents, if there is such a thing. See glk_buffer_to_lower_case_uni().
116 * Returns: The number of characters after conversion.
119 glk_buffer_to_upper_case_uni(glui32 *buf, glui32 len, glui32 numchars)
121 g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
122 g_return_val_if_fail(numchars <= len, 0);
124 /* GLib has a function that converts _one_ UCS-4 character to _one_
125 uppercase UCS-4 character; so apparently we don't have to worry about the
126 string length changing... */
128 for(ptr = buf; ptr < buf + numchars; ptr++)
129 *ptr = g_unichar_toupper(*ptr);
135 * glk_buffer_to_title_case_uni:
136 * @buf: A character array in UCS-4.
137 * @len: Available length of @buf.
138 * @numchars: Number of characters in @buf.
139 * @lowerrest: %TRUE if the rest of @buf should be lowercased, %FALSE
142 * See glk_buffer_to_lower_case_uni(). The <code>title_case</code> function has
143 * an additional (boolean) flag. Its basic function is to change the first
144 * character of the buffer to upper-case, and leave the rest of the buffer
145 * unchanged. If @lowerrest is true, it changes all the non-first characters to
146 * lower-case (instead of leaving them alone.)
149 * Earlier drafts of this spec had a separate function which title-cased the
150 * first character of every <emphasis>word</emphasis> in the buffer. I took
151 * this out after reading Unicode Standard Annex #29, which explains how
152 * to divide a string into words. If you want it, feel free to implement it.
155 * Returns: The number of characters after conversion.
158 glk_buffer_to_title_case_uni(glui32 *buf, glui32 len, glui32 numchars, glui32 lowerrest)
160 g_return_val_if_fail(buf != NULL && (len > 0 || numchars > 0), 0);
161 g_return_val_if_fail(numchars <= len, 0);
163 /* GLib has a function that converts _one_ UCS-4 character to _one_
164 titlecase UCS-4 character; so apparently we don't have to worry about the
165 string length changing... */
166 *buf = g_unichar_totitle(*buf);
167 /* Call lowercase on the rest of the string */
169 return glk_buffer_to_lower_case_uni(buf + 1, len - 1, numchars - 1) + 1;