X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;ds=inline;f=src%2Fcharset.c;fp=src%2Fcharset.c;h=0000000000000000000000000000000000000000;hb=0b85f1dd5993e2ed111ec2ba13bbbb4ebda06ada;hp=3f047cc1979c6c48b382b9fa0d1d65f927b7e005;hpb=08f8444e2ae5480eea1cf7e2c1e2eb57f46152db;p=rodin%2Fchimara.git diff --git a/src/charset.c b/src/charset.c deleted file mode 100644 index 3f047cc..0000000 --- a/src/charset.c +++ /dev/null @@ -1,129 +0,0 @@ -#include "charset.h" -#include "magic.h" -#include - -/* Internal function: change illegal (control) characters in a string to a -placeholder character. Must free returned string afterwards. */ -static gchar * -remove_latin1_control_characters(const unsigned char *s, const gsize len) -{ - /* If len == 0, then return an empty string, not NULL */ - if(len == 0) - return g_strdup(""); - - gchar *retval = g_new0(gchar, len); - int i; - for(i = 0; i < len; i++) - if( (s[i] < 32 && s[i] != 10) || (s[i] >= 127 && s[i] <= 159) ) - retval[i] = PLACEHOLDER; - else - retval[i] = s[i]; - return retval; -} - -/* Internal function: convert a Latin-1 string to a UTF-8 string, replacing -Latin-1 control characters by a placeholder first. The UTF-8 string must be -freed afterwards. Returns NULL on error. */ -gchar * -convert_latin1_to_utf8(const gchar *s, const gsize len) -{ - GError *error = NULL; - gchar *canonical = remove_latin1_control_characters( (unsigned char *)s, - len); - gchar *retval = g_convert(canonical, len, "UTF-8", "ISO-8859-1", NULL, NULL, &error); - g_free(canonical); - - if(retval == NULL) - IO_WARNING("Error during latin1->utf8 conversion of string", s, error->message); - - return retval; -} - -/* Internal function: convert a Latin-1 string to a four-byte-per-character -big-endian string of gchars. The string must be freed afterwards. */ -gchar * -convert_latin1_to_ucs4be_string(const gchar *s, const gsize len) -{ - /* "UCS-4BE" is also a conversion type in g_convert()... but this may be more efficient */ - gchar *retval = g_new0(gchar, len * 4); - int i; - for(i = 0; i < len; i++) - retval[i * 4 + 3] = s[i]; - return retval; -} - -/* Internal function: convert a null-terminated UTF-8 string to a -null-terminated Latin-1 string, replacing characters that cannot be represented -in Latin-1 by a placeholder. If bytes_written is not NULL it will be filled with -the number of bytes returned, not counting the NULL terminator. The returned -string must be freed afterwards. Returns NULL on error. */ -gchar * -convert_utf8_to_latin1(const gchar *s, gsize *bytes_written) -{ - GError *error = NULL; - gchar *retval = g_convert_with_fallback(s, -1, "ISO-8859-1", "UTF-8", PLACEHOLDER_STRING, NULL, bytes_written, &error); - - if(retval == NULL) - IO_WARNING("Error during utf8->latin1 conversion of string", s, error->message); - - return retval; -} - -/* Internal function: convert a null-terminated UTF-8 string to a -null-terminated UCS4 string. If items_written is not NULL it will be filled with -the number of code points returned, not counting the terminator. The returned -string must be freed afterwards. Returns NULL on error. */ -gunichar * -convert_utf8_to_ucs4(const gchar *s, glong *items_written) -{ - gunichar *retval = g_utf8_to_ucs4_fast(s, -1, items_written); - - if(retval == NULL) - WARNING_S("Error during utf8->unicode conversion of string", s); - - return retval; -} - -/* Internal function: Convert a Unicode buffer to a null-terminated UTF-8 -string. The returned string must be freed afterwards. Returns NULL on error. */ -gchar * -convert_ucs4_to_utf8(const gunichar *buf, const glong len) -{ - GError *error = NULL; - gchar *retval = g_ucs4_to_utf8(buf, len, NULL, NULL, &error); - - if(retval == NULL) - WARNING_S("Error during unicode->utf8 conversion", error->message); - - return retval; -} - -/* Internal function: Convert a Unicode buffer to a Latin-1 string. Do not do -any character processing, just return values > 255 as the placeholder character. -The returned string must be freed afterwards.*/ -gchar * -convert_ucs4_to_latin1_binary(const gunichar *buf, const glong len) -{ - gchar *retval = g_new0(gchar, len); - int foo; - for(foo = 0; foo < len; foo++) - retval[foo] = (buf[foo] > 255)? PLACEHOLDER : buf[foo]; - return retval; -} - -/* Internal function: convert a Unicode buffer to a four-byte-per-character -big-endian string of gchars. The string must be freed afterwards. */ -gchar * -convert_ucs4_to_ucs4be_string(const gunichar *buf, const glong len) -{ - gchar *retval = g_new0(gchar, len * 4); - int i; - for(i = 0; i < len; i++) - { - retval[i * 4] = buf[i] >> 24 ; - retval[i * 4 + 1] = buf[i] >> 16 & 0xFF; - retval[i * 4 + 2] = buf[i] >> 8 & 0xFF; - retval[i * 4 + 3] = buf[i] & 0xFF; - } - return retval; -}