X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Flinebreak.c;h=efb6cb06bacfa992110a44c2270e40314e934fd9;hb=6caf406fc9b7666a8bbdbc66dff4d7029a92ba07;hp=e1d47ea46ced1b0c20b21f8e88e786db5a4cb9ad;hpb=2fd230b5dc34baf4246d24eba3d4879aa934e730;p=gnulib.git diff --git a/lib/linebreak.c b/lib/linebreak.c index e1d47ea46..efb6cb06b 100644 --- a/lib/linebreak.c +++ b/lib/linebreak.c @@ -14,7 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H # include @@ -26,6 +26,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include "c-ctype.h" +#include "xsize.h" #include "utf8-ucs4.h" @@ -233,7 +234,7 @@ static const unsigned char nonspacing_table_data[16*64] = { /* 0x0000-0x01ff */ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0000-0x003f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0040-0x007f */ - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0080-0x00bf */ + 0xff, 0xff, 0xff, 0xff, 0x00, 0x20, 0x00, 0x00, /* 0x0080-0x00bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00c0-0x00ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0100-0x013f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0140-0x017f */ @@ -245,7 +246,7 @@ static const unsigned char nonspacing_table_data[16*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0280-0x02bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x02c0-0x02ff */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x0300-0x033f */ - 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, /* 0x0340-0x037f */ + 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, 0x00, 0x00, /* 0x0340-0x037f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0380-0x03bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x03c0-0x03ff */ /* 0x0400-0x05ff */ @@ -258,8 +259,8 @@ static const unsigned char nonspacing_table_data[16*64] = { 0x00, 0x00, 0xfe, 0xff, 0xfb, 0xff, 0xff, 0xbb, /* 0x0580-0x05bf */ 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x05c0-0x05ff */ /* 0x0600-0x07ff */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0600-0x063f */ - 0x00, 0xf8, 0x3f, 0x00, 0x00, 0x00, 0x01, 0x00, /* 0x0640-0x067f */ + 0x0f, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0600-0x063f */ + 0x00, 0xf8, 0xff, 0x01, 0x00, 0x00, 0x01, 0x00, /* 0x0640-0x067f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0680-0x06bf */ 0x00, 0x00, 0xc0, 0xff, 0x9f, 0x3d, 0x00, 0x00, /* 0x06c0-0x06ff */ 0x00, 0x80, 0x02, 0x00, 0x00, 0x00, 0xff, 0xff, /* 0x0700-0x073f */ @@ -276,10 +277,10 @@ static const unsigned char nonspacing_table_data[16*64] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */ 0x1e, 0x20, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x09c0-0x09ff */ /* 0x0a00-0x0bff */ - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0a00-0x0a3f */ + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0a00-0x0a3f */ 0x86, 0x39, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, /* 0x0a40-0x0a7f */ 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0a80-0x0abf */ - 0xbe, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0ac0-0x0aff */ + 0xbe, 0x21, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0ac0-0x0aff */ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, /* 0x0b00-0x0b3f */ 0x0e, 0x20, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0b40-0x0b7f */ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0b80-0x0bbf */ @@ -287,8 +288,8 @@ static const unsigned char nonspacing_table_data[16*64] = { /* 0x0c00-0x0dff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, /* 0x0c00-0x0c3f */ 0xc1, 0x3d, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0c40-0x0c7f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0c80-0x0cbf */ - 0x40, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0cc0-0x0cff */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0c80-0x0cbf */ + 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0cc0-0x0cff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0d00-0x0d3f */ 0x0e, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0d40-0x0d7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0d80-0x0dbf */ @@ -318,14 +319,14 @@ static const unsigned char nonspacing_table_data[16*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16c0-0x16ff */ 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00, /* 0x1700-0x173f */ 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, /* 0x1740-0x177f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, /* 0x1780-0x17bf */ - 0x40, 0xfe, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x17c0-0x17ff */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x3f, /* 0x1780-0x17bf */ + 0x40, 0xfe, 0x0f, 0x20, 0x00, 0x00, 0x00, 0x00, /* 0x17c0-0x17ff */ /* 0x1800-0x19ff */ - 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */ + 0x00, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1840-0x187f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* 0x1880-0x18bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18c0-0x18ff */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1900-0x193f */ + 0x00, 0x00, 0x00, 0x00, 0x87, 0x0f, 0x04, 0x0e, /* 0x1900-0x193f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1940-0x197f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1980-0x19bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x19c0-0x19ff */ @@ -419,7 +420,7 @@ uc_width (unsigned int uc, const char *encoding) if (ind >= 0) if ((nonspacing_table_data[64*ind + ((uc >> 3) & 63)] >> (uc & 7)) & 1) { - if (uc > 0 && uc < 0x100) + if (uc > 0 && uc < 0xa0) return -1; else return 0; @@ -427,7 +428,9 @@ uc_width (unsigned int uc, const char *encoding) } else if ((uc >> 9) == (0xe0000 >> 9)) { - if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001) + if (uc < 0xe0100 + ? (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001) + : (uc <= 0xe01ef)) return 0; } /* Test for double-width character. @@ -436,15 +439,16 @@ uc_width (unsigned int uc, const char *encoding) */ if (uc >= 0x1100 && ((uc < 0x1160) /* Hangul Jamo */ - || (uc >= 0x2e80 && uc < 0xa4d0 /* CJK ... Yi */ + || (uc >= 0x2e80 && uc < 0x4dc0 /* CJK */ && !(uc == 0x303f)) + || (uc >= 0x4e00 && uc < 0xa4d0) /* CJK ... Yi */ || (uc >= 0xac00 && uc < 0xd7a4) /* Hangul Syllables */ || (uc >= 0xf900 && uc < 0xfb00) /* CJK Compatibility Ideographs */ || (uc >= 0xfe30 && uc < 0xfe70) /* CJK Compatibility Forms */ || (uc >= 0xff00 && uc < 0xff61) /* Fullwidth Forms */ || (uc >= 0xffe0 && uc < 0xffe7) - || (uc >= 0x20000 && uc <= 0x2a6d6) /* CJK */ - || (uc >= 0x2f800 && uc <= 0x2fa1d) /* CJK Compatibility Ideographs */ + || (uc >= 0x20000 && uc <= 0x2fffd) /* CJK, CJK Compatibility Ideographs */ + || (uc >= 0x30000 && uc <= 0x3fffd) ) ) return 2; /* In ancient CJK encodings, Cyrillic and most other characters are @@ -1020,7 +1024,7 @@ u8_width_linebreaks (const unsigned char *s, size_t n, /* No line break for the moment, may be turned into UC_BREAK_POSSIBLE later, via last_p. */ } - + *p = UC_BREAK_PROHIBITED; w = uc_width (uc, encoding); @@ -1104,7 +1108,7 @@ u16_width_linebreaks (const unsigned short *s, size_t n, /* No line break for the moment, may be turned into UC_BREAK_POSSIBLE later, via last_p. */ } - + *p = UC_BREAK_PROHIBITED; w = uc_width (uc, encoding); @@ -1187,7 +1191,7 @@ u32_width_linebreaks (const unsigned int *s, size_t n, /* No line break for the moment, may be turned into UC_BREAK_POSSIBLE later, via last_p. */ } - + *p = UC_BREAK_PROHIBITED; w = uc_width (uc, encoding); @@ -1376,11 +1380,11 @@ iconv_string_length (iconv_t cd, const char *s, size_t n) char *outptr = tmpbuf; size_t outsize = TMPBUFSIZE; size_t res = iconv (cd, (ICONV_CONST char **) &inptr, &insize, &outptr, &outsize); - if (res == (size_t)(-1)) + if (res == (size_t)(-1) && errno != E2BIG) return (size_t)(-1); count += outptr - tmpbuf; } - /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ + /* Avoid glibc-2.1 bug and Solaris 7 through 9 bug. */ #if defined _LIBICONV_VERSION \ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) { @@ -1445,7 +1449,7 @@ iconv_string_keeping_offsets (iconv_t cd, const char *s, size_t n, if (res == (size_t)(-1)) abort (); } - /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ + /* Avoid glibc-2.1 bug and Solaris 7 bug. */ #if defined _LIBICONV_VERSION \ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) if (iconv (cd, NULL, NULL, &outptr, &outsize) == (size_t)(-1)) @@ -1495,7 +1499,7 @@ mbs_possible_linebreaks (const char *s, size_t n, const char *encoding, to_utf8 = (iconv_t)(-1); else # endif - /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK, + /* Avoid Solaris 9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK, GB18030. */ # if defined __sun && !defined _LIBICONV_VERSION if ( STREQ (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0) @@ -1516,7 +1520,9 @@ mbs_possible_linebreaks (const char *s, size_t n, const char *encoding, { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ - char *memory = malloc (n * sizeof (size_t) + m + m); + size_t memory_size = xsum3 (xtimes (n, sizeof (size_t)), m, m); + char *memory = + (size_in_bounds_p (memory_size) ? malloc (memory_size) : NULL); if (memory != NULL) { size_t *offtable = (size_t *) memory; @@ -1588,7 +1594,7 @@ mbs_width_linebreaks (const char *s, size_t n, to_utf8 = (iconv_t)(-1); else # endif - /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK, + /* Avoid Solaris 9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK, GB18030. */ # if defined __sun && !defined _LIBICONV_VERSION if ( STREQ (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0) @@ -1609,7 +1615,11 @@ mbs_width_linebreaks (const char *s, size_t n, { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ - char *memory = malloc (n * sizeof (size_t) + m + m + (o != NULL ? m : 0)); + size_t memory_size = + xsum4 (xtimes (n, sizeof (size_t)), m, m, + (o != NULL ? m : 0)); + char *memory = + (size_in_bounds_p (memory_size) ? malloc (memory_size) : NULL); if (memory != NULL) { size_t *offtable = (size_t *) memory;