X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Funilbrk%2Fulc-possible-linebreaks.c;h=1632010ba93ce0f4a5f575fee88598b7226c62d9;hb=5e117d03ff9c6ca545f1e8be91c92cfcca605e37;hp=444e9a9461559cb1d7474c1bc5900ad75287cdc6;hpb=9abda809d44be61025d67d5980a4e780010c7b5b;p=gnulib.git diff --git a/lib/unilbrk/ulc-possible-linebreaks.c b/lib/unilbrk/ulc-possible-linebreaks.c index 444e9a946..1632010ba 100644 --- a/lib/unilbrk/ulc-possible-linebreaks.c +++ b/lib/unilbrk/ulc-possible-linebreaks.c @@ -1,5 +1,5 @@ /* Line breaking of strings. - Copyright (C) 2001-2003, 2006-2008 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2006-2011 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify it @@ -21,13 +21,10 @@ #include "unilbrk.h" #include -#if HAVE_ICONV -# include -#endif +#include #include "c-ctype.h" -#include "streq.h" -#include "xsize.h" +#include "uniconv.h" #include "unilbrk/ulc-common.h" /* Line breaking of a string in an arbitrary encoding. @@ -45,94 +42,76 @@ void ulc_possible_linebreaks (const char *s, size_t n, const char *encoding, - char *p) + char *p) { - if (n == 0) - return; - if (is_utf8_encoding (encoding)) - u8_possible_linebreaks ((const uint8_t *) s, n, encoding, p); - else + if (n > 0) { -#if HAVE_ICONV - iconv_t to_utf8; - /* Avoid glibc-2.1 bug with EUC-KR. */ -# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION - if (STREQ (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)) - to_utf8 = (iconv_t)(-1); + if (is_utf8_encoding (encoding)) + u8_possible_linebreaks ((const uint8_t *) s, n, encoding, p); else -# endif - /* Avoid Solaris 9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK, - GB18030. */ -# if defined __sun && !defined _LIBICONV_VERSION - if ( STREQ (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0) - || STREQ (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0) - || STREQ (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0) - || STREQ (encoding, "BIG5-HKSCS", 'B', 'I', 'G', '5', '-', 'H', 'K', 'S', 'C') - || STREQ (encoding, "GBK", 'G', 'B', 'K', 0, 0, 0, 0, 0, 0) - || STREQ (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0)) - to_utf8 = (iconv_t)(-1); - else -# endif - to_utf8 = iconv_open (UTF8_NAME, encoding); - if (to_utf8 != (iconv_t)(-1)) - { - /* Determine the length of the resulting UTF-8 string. */ - size_t m = iconv_string_length (to_utf8, s, n); - if (m != (size_t)(-1)) - { - /* Convert the string to UTF-8 and build a translation table - from offsets into s to offsets into the translated string. */ - size_t memory_size = xsum3 (xtimes (n, sizeof (size_t)), m, m); - char *memory = - (size_in_bounds_p (memory_size) ? malloc (memory_size) : NULL); - if (memory != NULL) - { - size_t *offtable = (size_t *) memory; - char *t = (char *) (offtable + n); - char *q = (char *) (t + m); - size_t i; - - iconv_string_keeping_offsets (to_utf8, s, n, offtable, t, m); - - /* Determine the possible line breaks of the UTF-8 string. */ - u8_possible_linebreaks ((const uint8_t *) t, m, encoding, q); - - /* Translate the result back to the original string. */ - memset (p, UC_BREAK_PROHIBITED, n); - for (i = 0; i < n; i++) - if (offtable[i] != (size_t)(-1)) - p[i] = q[offtable[i]]; - - free (memory); - iconv_close (to_utf8); - return; - } - } - iconv_close (to_utf8); - } -#endif - /* Impossible to convert. */ + { + /* Convert the string to UTF-8 and build a translation table + from offsets into s to offsets into the translated string. */ + size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); + + if (offsets != NULL) + { + uint8_t *t; + size_t m; + + t = u8_conv_from_encoding (encoding, iconveh_question_mark, + s, n, offsets, NULL, &m); + if (t != NULL) + { + char *q = (char *) (m > 0 ? malloc (m) : NULL); + + if (m == 0 || q != NULL) + { + size_t i; + + /* Determine the possible line breaks of the UTF-8 + string. */ + u8_possible_linebreaks (t, m, encoding, q); + + /* Translate the result back to the original string. */ + memset (p, UC_BREAK_PROHIBITED, n); + for (i = 0; i < n; i++) + if (offsets[i] != (size_t)(-1)) + p[i] = q[offsets[i]]; + + free (q); + free (t); + free (offsets); + return; + } + free (t); + } + free (offsets); + } + + /* Impossible to convert. */ #if C_CTYPE_ASCII - if (is_all_ascii (s, n)) - { - /* ASCII is a subset of UTF-8. */ - u8_possible_linebreaks ((const uint8_t *) s, n, encoding, p); - return; - } + if (is_all_ascii (s, n)) + { + /* ASCII is a subset of UTF-8. */ + u8_possible_linebreaks ((const uint8_t *) s, n, encoding, p); + return; + } #endif - /* We have a non-ASCII string and cannot convert it. - Don't produce line breaks except those already present in the - input string. All we assume here is that the encoding is - minimally ASCII compatible. */ - { - const char *s_end = s + n; - while (s < s_end) - { - *p = (*s == '\n' ? UC_BREAK_MANDATORY : UC_BREAK_PROHIBITED); - s++; - p++; - } - } + /* We have a non-ASCII string and cannot convert it. + Don't produce line breaks except those already present in the + input string. All we assume here is that the encoding is + minimally ASCII compatible. */ + { + const char *s_end = s + n; + while (s < s_end) + { + *p = (*s == '\n' ? UC_BREAK_MANDATORY : UC_BREAK_PROHIBITED); + s++; + p++; + } + } + } } } @@ -157,28 +136,28 @@ read_file (FILE *stream) while (! feof (stream)) { if (size + BUFSIZE > alloc) - { - alloc = alloc + alloc / 2; - if (alloc < size + BUFSIZE) - alloc = size + BUFSIZE; - buf = realloc (buf, alloc); - if (buf == NULL) - { - fprintf (stderr, "out of memory\n"); - exit (1); - } - } + { + alloc = alloc + alloc / 2; + if (alloc < size + BUFSIZE) + alloc = size + BUFSIZE; + buf = realloc (buf, alloc); + if (buf == NULL) + { + fprintf (stderr, "out of memory\n"); + exit (1); + } + } count = fread (buf + size, 1, BUFSIZE, stream); if (count == 0) - { - if (ferror (stream)) - { - perror ("fread"); - exit (1); - } - } + { + if (ferror (stream)) + { + perror ("fread"); + exit (1); + } + } else - size += count; + size += count; } buf = realloc (buf, size + 1); if (buf == NULL) @@ -206,21 +185,21 @@ main (int argc, char * argv[]) ulc_possible_linebreaks (input, length, locale_charset (), breaks); for (i = 0; i < length; i++) - { - switch (breaks[i]) - { - case UC_BREAK_POSSIBLE: - putc ('|', stdout); - break; - case UC_BREAK_MANDATORY: - break; - case UC_BREAK_PROHIBITED: - break; - default: - abort (); - } - putc (input[i], stdout); - } + { + switch (breaks[i]) + { + case UC_BREAK_POSSIBLE: + putc ('|', stdout); + break; + case UC_BREAK_MANDATORY: + break; + case UC_BREAK_PROHIBITED: + break; + default: + abort (); + } + putc (input[i], stdout); + } free (breaks);