5 /* Internal function: change illegal (control) characters in a string to a
6 placeholder character. Must free returned string afterwards. */
8 remove_latin1_control_characters(const unsigned char *s, const gsize len)
10 /* If len == 0, then return an empty string, not NULL */
14 gchar *retval = g_new0(gchar, len);
16 for(i = 0; i < len; i++)
17 if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) )
18 retval[i] = PLACEHOLDER;
24 /* Internal function: convert a Latin-1 string to a UTF-8 string, replacing
25 Latin-1 control characters by a placeholder first. The UTF-8 string must be
26 freed afterwards. Returns NULL on error. */
28 convert_latin1_to_utf8(const gchar *s, const gsize len)
31 gchar *canonical = remove_latin1_control_characters( (unsigned char *)s,
33 gchar *retval = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error);
37 IO_WARNING("Error during latin1->utf8 conversion of string", s, error->message);
42 /* Internal function: convert a Latin-1 string to a four-byte-per-character
43 big-endian string of gchars. The string must be freed afterwards. */
45 convert_latin1_to_ucs4be_string(const gchar *s, const gsize len)
47 /* "UCS-4BE" is also a conversion type in g_convert()... but this may be more efficient */
48 gchar *retval = g_new0(gchar, len * 4);
50 for(i = 0; i < len; i++)
51 retval[i * 4 + 3] = s[i];
55 /* Internal function: convert a null-terminated UTF-8 string to a
56 null-terminated Latin-1 string, replacing characters that cannot be represented
57 in Latin-1 by a placeholder. If bytes_written is not NULL it will be filled with
58 the number of bytes returned, not counting the NULL terminator. The returned
59 string must be freed afterwards. Returns NULL on error. */
61 convert_utf8_to_latin1(const gchar *s, gsize *bytes_written)
64 gchar *retval = g_convert_with_fallback(s, -1, "ISO-8859-1", "UTF-8", PLACEHOLDER_STRING, NULL, bytes_written, &error);
67 IO_WARNING("Error during utf8->latin1 conversion of string", s, error->message);
72 /* Internal function: convert a null-terminated UTF-8 string to a
73 null-terminated UCS4 string. If items_written is not NULL it will be filled with
74 the number of code points returned, not counting the terminator. The returned
75 string must be freed afterwards. Returns NULL on error. */
77 convert_utf8_to_ucs4(const gchar *s, glong *items_written)
79 gunichar *retval = g_utf8_to_ucs4_fast(s, -1, items_written);
82 WARNING_S("Error during utf8->unicode conversion of string", s);
87 /* Internal function: Convert a Unicode buffer to a null-terminated UTF-8
88 string. The returned string must be freed afterwards. Returns NULL on error. */
90 convert_ucs4_to_utf8(const gunichar *buf, const glong len)
93 gchar *retval = g_ucs4_to_utf8(buf, len, NULL, NULL, &error);
96 WARNING_S("Error during unicode->utf8 conversion", error->message);
101 /* Internal function: Convert a Unicode buffer to a Latin-1 string. Do not do
102 any character processing, just return values > 255 as the placeholder character.
103 The returned string must be freed afterwards.*/
105 convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len)
107 gchar *retval = g_new0(gchar, len);
109 for(foo = 0; foo < len; foo++)
110 retval[foo] = (buf[foo] > 255)? PLACEHOLDER : buf[foo];
114 /* Internal function: convert a Unicode buffer to a four-byte-per-character
115 big-endian string of gchars. The string must be freed afterwards. */
117 convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len)
119 gchar *retval = g_new0(gchar, len * 4);
121 for(i = 0; i < len; i++)
123 retval[i * 4] = buf[i] >> 24 ;
124 retval[i * 4 + 1] = buf[i] >> 16 & 0xFF;
125 retval[i * 4 + 2] = buf[i] >> 8 & 0xFF;
126 retval[i * 4 + 3] = buf[i] & 0xFF;