From 221d8efe25aa7f053d3a2d2dedb4f98886500784 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 1 Jan 2011 08:47:34 -0800 Subject: [PATCH] unigbrk: New modules for grapheme clusters. New module 'u8-grapheme-len'. * modules/unigbrk/u8-grapheme-len: New file. * modules/unigbrk/u8-grapheme-len-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u8-grapheme-len.c: New file. * tests/unigbrk/test-u8-grapheme-len.c: New file. New module 'u16-grapheme-len'. * modules/unigbrk/u16-grapheme-len: New file. * modules/unigbrk/u16-grapheme-len-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u16-grapheme-len.c: New file. * tests/unigbrk/test-u16-grapheme-len.c: New file. New module 'u32-grapheme-len'. * modules/unigbrk/u32-grapheme-len: New file. * modules/unigbrk/u32-grapheme-len-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u32-grapheme-len.c: New file. * tests/unigbrk/test-u32-grapheme-len.c: New file. New module 'u8-grapheme-next'. * modules/unigbrk/u8-grapheme-next: New file. * modules/unigbrk/u8-grapheme-next-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u8-grapheme-next.c: New file. * tests/unigbrk/test-u8-grapheme-next.c: New file. New module 'u16-grapheme-next'. * modules/unigbrk/u16-grapheme-next: New file. * modules/unigbrk/u16-grapheme-next-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u16-grapheme-next.c: New file. * tests/unigbrk/test-u16-grapheme-next.c: New file. New module 'u32-grapheme-next'. * modules/unigbrk/u32-grapheme-next: New file. * modules/unigbrk/u32-grapheme-next-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u32-grapheme-next.c: New file. * tests/unigbrk/test-u32-grapheme-next.c: New file. New module 'u8-grapheme-prev'. * modules/unigbrk/u8-grapheme-prev: New file. * modules/unigbrk/u8-grapheme-prev-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u8-grapheme-prev.c: New file. * tests/unigbrk/test-u8-grapheme-prev.c: New file. New module 'u16-grapheme-prev'. * modules/unigbrk/u16-grapheme-prev: New file. * modules/unigbrk/u16-grapheme-prev-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u16-grapheme-prev.c: New file. * tests/unigbrk/test-u16-grapheme-prev.c: New file. New module 'u32-grapheme-prev'. * modules/unigbrk/u32-grapheme-prev: New file. * modules/unigbrk/u32-grapheme-prev-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u32-grapheme-prev.c: New file. * tests/unigbrk/test-u32-grapheme-prev.c: New file. New module 'u8-grapheme-breaks'. * modules/unigbrk/u8-grapheme-breaks: New file. * modules/unigbrk/u8-grapheme-breaks-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u8-grapheme-breaks.c: New file. * tests/unigbrk/test-u8-grapheme-breaks.c: New file. New module 'u16-grapheme-breaks'. * modules/unigbrk/u16-grapheme-breaks: New file. * modules/unigbrk/u16-grapheme-breaks-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u16-grapheme-breaks.c: New file. * tests/unigbrk/test-u16-grapheme-breaks.c: New file. New module 'u32-grapheme-breaks'. * modules/unigbrk/u32-grapheme-breaks: New file. * modules/unigbrk/u32-grapheme-breaks-tests: New file. * lib/unigbrk.in.h: Add prototype for new function. * lib/unigbrk/u32-grapheme-breaks.c: New file. * tests/unigbrk/test-u32-grapheme-breaks.c: New file. New module 'ulc-grapheme-breaks'. * modules/unigbrk/ulc-grapheme-breaks: New file. * modules/unigbrk/ulc-grapheme-breaks-tests: New file. * m4/locale-ar.m4: New file. * lib/unigbrk/ulc-grapheme-breaks.c: New file. * tests/unigbrk/test-ulc-grapheme-breaks.c: New file. * tests/unigbrk/test-ulc-grapheme-breaks.sh: New file. --- ChangeLog | 94 +++++++++++++++++++++ lib/unigbrk.in.h | 45 +++++++++++ lib/unigbrk/u16-grapheme-breaks.c | 45 +++++++++++ lib/unigbrk/u16-grapheme-len.c | 47 +++++++++++ lib/unigbrk/u16-grapheme-next.c | 46 +++++++++++ lib/unigbrk/u16-grapheme-prev.c | 55 +++++++++++++ lib/unigbrk/u32-grapheme-breaks.c | 42 ++++++++++ lib/unigbrk/u32-grapheme-len.c | 47 +++++++++++ lib/unigbrk/u32-grapheme-next.c | 46 +++++++++++ lib/unigbrk/u32-grapheme-prev.c | 51 ++++++++++++ lib/unigbrk/u8-grapheme-breaks.c | 46 +++++++++++ lib/unigbrk/u8-grapheme-len.c | 47 +++++++++++ lib/unigbrk/u8-grapheme-next.c | 46 +++++++++++ lib/unigbrk/u8-grapheme-prev.c | 55 +++++++++++++ lib/unigbrk/ulc-grapheme-breaks.c | 130 ++++++++++++++++++++++++++++++ m4/locale-ar.m4 | 69 ++++++++++++++++ modules/unigbrk/u16-grapheme-breaks | 28 +++++++ modules/unigbrk/u16-grapheme-breaks-tests | 12 +++ modules/unigbrk/u16-grapheme-len | 28 +++++++ modules/unigbrk/u16-grapheme-len-tests | 12 +++ modules/unigbrk/u16-grapheme-next | 28 +++++++ modules/unigbrk/u16-grapheme-next-tests | 12 +++ modules/unigbrk/u16-grapheme-prev | 28 +++++++ modules/unigbrk/u16-grapheme-prev-tests | 12 +++ modules/unigbrk/u32-grapheme-breaks | 28 +++++++ modules/unigbrk/u32-grapheme-breaks-tests | 12 +++ modules/unigbrk/u32-grapheme-len | 28 +++++++ modules/unigbrk/u32-grapheme-len-tests | 12 +++ modules/unigbrk/u32-grapheme-next | 28 +++++++ modules/unigbrk/u32-grapheme-next-tests | 12 +++ modules/unigbrk/u32-grapheme-prev | 28 +++++++ modules/unigbrk/u32-grapheme-prev-tests | 12 +++ modules/unigbrk/u8-grapheme-breaks | 28 +++++++ modules/unigbrk/u8-grapheme-breaks-tests | 12 +++ modules/unigbrk/u8-grapheme-len | 28 +++++++ modules/unigbrk/u8-grapheme-len-tests | 12 +++ modules/unigbrk/u8-grapheme-next | 28 +++++++ modules/unigbrk/u8-grapheme-next-tests | 12 +++ modules/unigbrk/u8-grapheme-prev | 28 +++++++ modules/unigbrk/u8-grapheme-prev-tests | 12 +++ modules/unigbrk/ulc-grapheme-breaks | 30 +++++++ modules/unigbrk/ulc-grapheme-breaks-tests | 18 +++++ tests/unigbrk/test-u16-grapheme-breaks.c | 105 ++++++++++++++++++++++++ tests/unigbrk/test-u16-grapheme-len.c | 95 ++++++++++++++++++++++ tests/unigbrk/test-u16-grapheme-next.c | 102 +++++++++++++++++++++++ tests/unigbrk/test-u16-grapheme-prev.c | 104 ++++++++++++++++++++++++ tests/unigbrk/test-u32-grapheme-breaks.c | 105 ++++++++++++++++++++++++ tests/unigbrk/test-u32-grapheme-len.c | 96 ++++++++++++++++++++++ tests/unigbrk/test-u32-grapheme-next.c | 103 +++++++++++++++++++++++ tests/unigbrk/test-u32-grapheme-prev.c | 105 ++++++++++++++++++++++++ tests/unigbrk/test-u8-grapheme-breaks.c | 96 ++++++++++++++++++++++ tests/unigbrk/test-u8-grapheme-len.c | 52 ++++++++++++ tests/unigbrk/test-u8-grapheme-next.c | 78 ++++++++++++++++++ tests/unigbrk/test-u8-grapheme-prev.c | 79 ++++++++++++++++++ tests/unigbrk/test-ulc-grapheme-breaks.c | 86 ++++++++++++++++++++ tests/unigbrk/test-ulc-grapheme-breaks.sh | 15 ++++ 56 files changed, 2660 insertions(+) create mode 100644 lib/unigbrk/u16-grapheme-breaks.c create mode 100644 lib/unigbrk/u16-grapheme-len.c create mode 100644 lib/unigbrk/u16-grapheme-next.c create mode 100644 lib/unigbrk/u16-grapheme-prev.c create mode 100644 lib/unigbrk/u32-grapheme-breaks.c create mode 100644 lib/unigbrk/u32-grapheme-len.c create mode 100644 lib/unigbrk/u32-grapheme-next.c create mode 100644 lib/unigbrk/u32-grapheme-prev.c create mode 100644 lib/unigbrk/u8-grapheme-breaks.c create mode 100644 lib/unigbrk/u8-grapheme-len.c create mode 100644 lib/unigbrk/u8-grapheme-next.c create mode 100644 lib/unigbrk/u8-grapheme-prev.c create mode 100644 lib/unigbrk/ulc-grapheme-breaks.c create mode 100644 m4/locale-ar.m4 create mode 100644 modules/unigbrk/u16-grapheme-breaks create mode 100644 modules/unigbrk/u16-grapheme-breaks-tests create mode 100644 modules/unigbrk/u16-grapheme-len create mode 100644 modules/unigbrk/u16-grapheme-len-tests create mode 100644 modules/unigbrk/u16-grapheme-next create mode 100644 modules/unigbrk/u16-grapheme-next-tests create mode 100644 modules/unigbrk/u16-grapheme-prev create mode 100644 modules/unigbrk/u16-grapheme-prev-tests create mode 100644 modules/unigbrk/u32-grapheme-breaks create mode 100644 modules/unigbrk/u32-grapheme-breaks-tests create mode 100644 modules/unigbrk/u32-grapheme-len create mode 100644 modules/unigbrk/u32-grapheme-len-tests create mode 100644 modules/unigbrk/u32-grapheme-next create mode 100644 modules/unigbrk/u32-grapheme-next-tests create mode 100644 modules/unigbrk/u32-grapheme-prev create mode 100644 modules/unigbrk/u32-grapheme-prev-tests create mode 100644 modules/unigbrk/u8-grapheme-breaks create mode 100644 modules/unigbrk/u8-grapheme-breaks-tests create mode 100644 modules/unigbrk/u8-grapheme-len create mode 100644 modules/unigbrk/u8-grapheme-len-tests create mode 100644 modules/unigbrk/u8-grapheme-next create mode 100644 modules/unigbrk/u8-grapheme-next-tests create mode 100644 modules/unigbrk/u8-grapheme-prev create mode 100644 modules/unigbrk/u8-grapheme-prev-tests create mode 100644 modules/unigbrk/ulc-grapheme-breaks create mode 100644 modules/unigbrk/ulc-grapheme-breaks-tests create mode 100644 tests/unigbrk/test-u16-grapheme-breaks.c create mode 100644 tests/unigbrk/test-u16-grapheme-len.c create mode 100644 tests/unigbrk/test-u16-grapheme-next.c create mode 100644 tests/unigbrk/test-u16-grapheme-prev.c create mode 100644 tests/unigbrk/test-u32-grapheme-breaks.c create mode 100644 tests/unigbrk/test-u32-grapheme-len.c create mode 100644 tests/unigbrk/test-u32-grapheme-next.c create mode 100644 tests/unigbrk/test-u32-grapheme-prev.c create mode 100644 tests/unigbrk/test-u8-grapheme-breaks.c create mode 100644 tests/unigbrk/test-u8-grapheme-len.c create mode 100644 tests/unigbrk/test-u8-grapheme-next.c create mode 100644 tests/unigbrk/test-u8-grapheme-prev.c create mode 100644 tests/unigbrk/test-ulc-grapheme-breaks.c create mode 100755 tests/unigbrk/test-ulc-grapheme-breaks.sh diff --git a/ChangeLog b/ChangeLog index 97214e7d4..7acf4b618 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,99 @@ 2010-12-31 Ben Pfaff + New module 'u8-grapheme-len'. + * modules/unigbrk/u8-grapheme-len: New file. + * modules/unigbrk/u8-grapheme-len-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u8-grapheme-len.c: New file. + * tests/unigbrk/test-u8-grapheme-len.c: New file. + + New module 'u16-grapheme-len'. + * modules/unigbrk/u16-grapheme-len: New file. + * modules/unigbrk/u16-grapheme-len-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u16-grapheme-len.c: New file. + * tests/unigbrk/test-u16-grapheme-len.c: New file. + + New module 'u32-grapheme-len'. + * modules/unigbrk/u32-grapheme-len: New file. + * modules/unigbrk/u32-grapheme-len-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u32-grapheme-len.c: New file. + * tests/unigbrk/test-u32-grapheme-len.c: New file. + + New module 'u8-grapheme-next'. + * modules/unigbrk/u8-grapheme-next: New file. + * modules/unigbrk/u8-grapheme-next-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u8-grapheme-next.c: New file. + * tests/unigbrk/test-u8-grapheme-next.c: New file. + + New module 'u16-grapheme-next'. + * modules/unigbrk/u16-grapheme-next: New file. + * modules/unigbrk/u16-grapheme-next-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u16-grapheme-next.c: New file. + * tests/unigbrk/test-u16-grapheme-next.c: New file. + + New module 'u32-grapheme-next'. + * modules/unigbrk/u32-grapheme-next: New file. + * modules/unigbrk/u32-grapheme-next-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u32-grapheme-next.c: New file. + * tests/unigbrk/test-u32-grapheme-next.c: New file. + + New module 'u8-grapheme-prev'. + * modules/unigbrk/u8-grapheme-prev: New file. + * modules/unigbrk/u8-grapheme-prev-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u8-grapheme-prev.c: New file. + * tests/unigbrk/test-u8-grapheme-prev.c: New file. + + New module 'u16-grapheme-prev'. + * modules/unigbrk/u16-grapheme-prev: New file. + * modules/unigbrk/u16-grapheme-prev-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u16-grapheme-prev.c: New file. + * tests/unigbrk/test-u16-grapheme-prev.c: New file. + + New module 'u32-grapheme-prev'. + * modules/unigbrk/u32-grapheme-prev: New file. + * modules/unigbrk/u32-grapheme-prev-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u32-grapheme-prev.c: New file. + * tests/unigbrk/test-u32-grapheme-prev.c: New file. + + New module 'u8-grapheme-breaks'. + * modules/unigbrk/u8-grapheme-breaks: New file. + * modules/unigbrk/u8-grapheme-breaks-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u8-grapheme-breaks.c: New file. + * tests/unigbrk/test-u8-grapheme-breaks.c: New file. + + New module 'u16-grapheme-breaks'. + * modules/unigbrk/u16-grapheme-breaks: New file. + * modules/unigbrk/u16-grapheme-breaks-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u16-grapheme-breaks.c: New file. + * tests/unigbrk/test-u16-grapheme-breaks.c: New file. + + New module 'u32-grapheme-breaks'. + * modules/unigbrk/u32-grapheme-breaks: New file. + * modules/unigbrk/u32-grapheme-breaks-tests: New file. + * lib/unigbrk.in.h: Add prototype for new function. + * lib/unigbrk/u32-grapheme-breaks.c: New file. + * tests/unigbrk/test-u32-grapheme-breaks.c: New file. + + New module 'ulc-grapheme-breaks'. + * modules/unigbrk/ulc-grapheme-breaks: New file. + * modules/unigbrk/ulc-grapheme-breaks-tests: New file. + * m4/locale-ar.m4: New file. + * lib/unigbrk/ulc-grapheme-breaks.c: New file. + * tests/unigbrk/test-ulc-grapheme-breaks.c: New file. + * tests/unigbrk/test-ulc-grapheme-breaks.sh: New file. + +2010-12-31 Ben Pfaff + gbrkprop: Fix implementation of uc_graphemeclusterbreak_property. * lib/unigbrk/gbrkprop.h: Regenerate with gen-uni-tables.c. I had modified how this file was generated before I initially submitted diff --git a/lib/unigbrk.in.h b/lib/unigbrk.in.h index c6056b376..5310d6fc6 100644 --- a/lib/unigbrk.in.h +++ b/lib/unigbrk.in.h @@ -21,6 +21,9 @@ /* Get bool. */ #include +/* Get size_t. */ +#include + #include "unitypes.h" #ifdef __cplusplus @@ -75,6 +78,48 @@ extern int extern bool uc_is_grapheme_cluster_break (ucs4_t a, ucs4_t b); +/* Returns the length (in units) of the first grapheme cluster in the N units + in S. If the return value is N, then extending the string could extend the + length of the grapheme cluster too. Returns 0 only if N is zero. */ +extern size_t + u8_grapheme_len (const uint8_t *s, size_t n); +extern size_t + u16_grapheme_len (const uint16_t *s, size_t n); +extern size_t + u32_grapheme_len (const uint32_t *s, size_t n); + +/* Returns the start of the next grapheme cluster following S, or NULL if the + end of the string has ben reached. */ +extern const uint8_t * + u8_grapheme_next (const uint8_t *s, const uint8_t *end); +extern const uint16_t * + u16_grapheme_next (const uint16_t *s, const uint16_t *end); +extern const uint32_t * + u32_grapheme_next (const uint32_t *s, const uint32_t *end); + +/* Returns the start of the previous grapheme cluster before S, or NULL if the + start of the string has ben reached. */ +extern const uint8_t * + u8_grapheme_prev (const uint8_t *s, const uint8_t *start); +extern const uint16_t * + u16_grapheme_prev (const uint16_t *s, const uint16_t *start); +extern const uint32_t * + u32_grapheme_prev (const uint32_t *s, const uint32_t *start); + +/* Determine the grapheme cluster boundaries in S, and store the result at + p[0..n-1]. p[i] = 1 means that a new grapheme cluster begins at s[i]. p[i] + = 0 means that s[i-1] and s[i] are part of the same grapheme cluster. p[0] + will always be 1. + */ +extern void + u8_grapheme_breaks (const uint8_t *s, size_t n, char *p); +extern void + u16_grapheme_breaks (const uint16_t *s, size_t n, char *p); +extern void + u32_grapheme_breaks (const uint32_t *s, size_t n, char *p); +extern void + ulc_grapheme_breaks (const char *s, size_t n, char *p); + /* ========================================================================= */ #ifdef __cplusplus diff --git a/lib/unigbrk/u16-grapheme-breaks.c b/lib/unigbrk/u16-grapheme-breaks.c new file mode 100644 index 000000000..38e01c29e --- /dev/null +++ b/lib/unigbrk/u16-grapheme-breaks.c @@ -0,0 +1,45 @@ +/* Grapheme cluster breaks function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +void +u16_grapheme_breaks (const uint16_t *s, size_t n, char *p) +{ + ucs4_t prev; + int mblen; + + prev = 0; + for (; n > 0; s += mblen, p += mblen, n -= mblen) + { + ucs4_t next; + int i; + + mblen = u16_mbtouc (&next, s, n); + + p[0] = uc_is_grapheme_cluster_break (prev, next); + if (mblen > 1) + p[1] = 0; + + prev = next; + } +} diff --git a/lib/unigbrk/u16-grapheme-len.c b/lib/unigbrk/u16-grapheme-len.c new file mode 100644 index 000000000..6960227de --- /dev/null +++ b/lib/unigbrk/u16-grapheme-len.c @@ -0,0 +1,47 @@ +/* Grapheme cluster length function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +size_t +u16_grapheme_len (const uint16_t *s, size_t n) +{ + ucs4_t prev; + size_t ofs; + int mblen; + + if (n == 0) + return 0; + + for (ofs = u16_mbtouc (&prev, s, n); ofs < n; ofs += mblen) + { + ucs4_t next; + + mblen = u16_mbtouc (&next, s + ofs, n - ofs); + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + prev = next; + } + + return ofs; +} diff --git a/lib/unigbrk/u16-grapheme-next.c b/lib/unigbrk/u16-grapheme-next.c new file mode 100644 index 000000000..49bee8b1a --- /dev/null +++ b/lib/unigbrk/u16-grapheme-next.c @@ -0,0 +1,46 @@ +/* Next grapheme cluster function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +const uint16_t * +u16_grapheme_next (const uint16_t *s, const uint16_t *end) +{ + ucs4_t prev; + int mblen; + + if (s == end) + return NULL; + + for (s += u16_mbtouc (&prev, s, end - s); s != end; s += mblen) + { + ucs4_t next; + + mblen = u16_mbtouc (&next, s, end - s); + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + prev = next; + } + + return s; +} diff --git a/lib/unigbrk/u16-grapheme-prev.c b/lib/unigbrk/u16-grapheme-prev.c new file mode 100644 index 000000000..08e74d887 --- /dev/null +++ b/lib/unigbrk/u16-grapheme-prev.c @@ -0,0 +1,55 @@ +/* Previous grapheme cluster function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +const uint16_t * +u16_grapheme_prev (const uint16_t *s, const uint16_t *start) +{ + ucs4_t next; + int mblen; + + if (s == start) + return NULL; + + s = u16_prev (&next, s, start); + while (s != start) + { + const uint16_t *prev_s; + ucs4_t prev; + + prev_s = u16_prev (&prev, s, start); + if (prev_s == NULL) + { + /* Ill-formed UTF-16 encoding. */ + return start; + } + + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + s = prev_s; + next = prev; + } + + return s; +} diff --git a/lib/unigbrk/u32-grapheme-breaks.c b/lib/unigbrk/u32-grapheme-breaks.c new file mode 100644 index 000000000..358fd067b --- /dev/null +++ b/lib/unigbrk/u32-grapheme-breaks.c @@ -0,0 +1,42 @@ +/* Grapheme cluster breaks function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +void +u32_grapheme_breaks (const uint32_t *s, size_t n, char *p) +{ + ucs4_t prev; + size_t i; + + prev = 0; + for (i = 0; i < n; i++) + { + ucs4_t next; + + u32_mbtouc (&next, &s[i], 1); + + p[i] = uc_is_grapheme_cluster_break (prev, next); + + prev = next; + } +} diff --git a/lib/unigbrk/u32-grapheme-len.c b/lib/unigbrk/u32-grapheme-len.c new file mode 100644 index 000000000..4a6f5e77e --- /dev/null +++ b/lib/unigbrk/u32-grapheme-len.c @@ -0,0 +1,47 @@ +/* Grapheme cluster length function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +size_t +u32_grapheme_len (const uint32_t *s, size_t n) +{ + ucs4_t prev; + size_t ofs; + + if (n == 0) + return 0; + + u32_mbtouc (&prev, s, n); + for (ofs = 1; ofs < n; ofs++) + { + ucs4_t next; + + u32_mbtouc (&next, s + ofs, n - ofs); + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + prev = next; + } + + return ofs; +} diff --git a/lib/unigbrk/u32-grapheme-next.c b/lib/unigbrk/u32-grapheme-next.c new file mode 100644 index 000000000..d9b6ce40f --- /dev/null +++ b/lib/unigbrk/u32-grapheme-next.c @@ -0,0 +1,46 @@ +/* Next grapheme cluster function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +const uint32_t * +u32_grapheme_next (const uint32_t *s, const uint32_t *end) +{ + ucs4_t prev; + + if (s == end) + return NULL; + + u32_mbtouc (&prev, s, end - s); + for (s++; s != end; s++) + { + ucs4_t next; + + u32_mbtouc (&next, s, end - s); + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + prev = next; + } + + return s; +} diff --git a/lib/unigbrk/u32-grapheme-prev.c b/lib/unigbrk/u32-grapheme-prev.c new file mode 100644 index 000000000..587ab4e22 --- /dev/null +++ b/lib/unigbrk/u32-grapheme-prev.c @@ -0,0 +1,51 @@ +/* Previous grapheme cluster function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +const uint32_t * +u32_grapheme_prev (const uint32_t *s, const uint32_t *start) +{ + ucs4_t next; + + if (s == start) + return NULL; + + u32_prev (&next, s, start); + for (s--; s != start; s--) + { + ucs4_t prev; + + if (u32_prev (&prev, s, start) == NULL) + { + /* Ill-formed UTF-32 encoding. */ + return start; + } + + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + next = prev; + } + + return s; +} diff --git a/lib/unigbrk/u8-grapheme-breaks.c b/lib/unigbrk/u8-grapheme-breaks.c new file mode 100644 index 000000000..811d95ea9 --- /dev/null +++ b/lib/unigbrk/u8-grapheme-breaks.c @@ -0,0 +1,46 @@ +/* Grapheme cluster breaks function. + Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010, + based on code written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +void +u8_grapheme_breaks (const uint8_t *s, size_t n, char *p) +{ + ucs4_t prev; + int mblen; + + prev = 0; + for (; n > 0; s += mblen, p += mblen, n -= mblen) + { + ucs4_t next; + int i; + + mblen = u8_mbtouc (&next, s, n); + + p[0] = uc_is_grapheme_cluster_break (prev, next); + for (i = 1; i < mblen; i++) + p[i] = 0; + + prev = next; + } +} diff --git a/lib/unigbrk/u8-grapheme-len.c b/lib/unigbrk/u8-grapheme-len.c new file mode 100644 index 000000000..9ef3938d2 --- /dev/null +++ b/lib/unigbrk/u8-grapheme-len.c @@ -0,0 +1,47 @@ +/* Grapheme cluster length function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +size_t +u8_grapheme_len (const uint8_t *s, size_t n) +{ + ucs4_t prev; + size_t ofs; + int mblen; + + if (n == 0) + return 0; + + for (ofs = u8_mbtouc (&prev, s, n); ofs < n; ofs += mblen) + { + ucs4_t next; + + mblen = u8_mbtouc (&next, s + ofs, n - ofs); + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + prev = next; + } + + return ofs; +} diff --git a/lib/unigbrk/u8-grapheme-next.c b/lib/unigbrk/u8-grapheme-next.c new file mode 100644 index 000000000..8511a6e6f --- /dev/null +++ b/lib/unigbrk/u8-grapheme-next.c @@ -0,0 +1,46 @@ +/* Next grapheme cluster function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +const uint8_t * +u8_grapheme_next (const uint8_t *s, const uint8_t *end) +{ + ucs4_t prev; + int mblen; + + if (s == end) + return NULL; + + for (s += u8_mbtouc (&prev, s, end - s); s != end; s += mblen) + { + ucs4_t next; + + mblen = u8_mbtouc (&next, s, end - s); + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + prev = next; + } + + return s; +} diff --git a/lib/unigbrk/u8-grapheme-prev.c b/lib/unigbrk/u8-grapheme-prev.c new file mode 100644 index 000000000..39943fdc6 --- /dev/null +++ b/lib/unigbrk/u8-grapheme-prev.c @@ -0,0 +1,55 @@ +/* Previous grapheme cluster function. + Copyright (C) 2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include "unistr.h" + +const uint8_t * +u8_grapheme_prev (const uint8_t *s, const uint8_t *start) +{ + ucs4_t next; + int mblen; + + if (s == start) + return NULL; + + s = u8_prev (&next, s, start); + while (s != start) + { + const uint8_t *prev_s; + ucs4_t prev; + + prev_s = u8_prev (&prev, s, start); + if (prev_s == NULL) + { + /* Ill-formed UTF-8 encoding. */ + return start; + } + + if (uc_is_grapheme_cluster_break (prev, next)) + break; + + s = prev_s; + next = prev; + } + + return s; +} diff --git a/lib/unigbrk/ulc-grapheme-breaks.c b/lib/unigbrk/ulc-grapheme-breaks.c new file mode 100644 index 000000000..d77490205 --- /dev/null +++ b/lib/unigbrk/ulc-grapheme-breaks.c @@ -0,0 +1,130 @@ +/* Grapheme cluster breaks function. + Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc. + Written by Ben Pfaff , 2010, + based on code written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unigbrk.h" + +#include +#include + +#include "c-ctype.h" +#include "c-strcaseeq.h" +#include "localcharset.h" +#include "uniconv.h" + +static int +is_utf8_encoding (const char *encoding) +{ + if (STRCASEEQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0)) + return 1; + return 0; +} + +#if C_CTYPE_ASCII +/* Assume that every ASCII character starts a new grapheme, which is often + true, except that CR-LF is a single grapheme. */ +static void +ascii_grapheme_breaks (const char *s, size_t n, char *p) +{ + size_t i; + + p[0] = 1; + for (i = 1; i < n; i++) + { + bool is_ascii = c_isprint (s[i]) || c_isspace (s[i]); + p[i] = is_ascii && (s[i] != '\n' || s[i - 1] != '\r'); + } +} +#endif + +/* Grapheme boundaries in a string in an arbitrary encoding. + + We convert the input string to Unicode. + + The standardized Unicode encodings are UTF-8, UCS-2, UCS-4, UTF-16, + UTF-16BE, UTF-16LE, UTF-7. UCS-2 supports only characters up to + \U0000FFFF. UTF-16 and variants support only characters up to + \U0010FFFF. UTF-7 is way too complex and not supported by glibc-2.1. + UCS-4 specification leaves doubts about endianness and byte order mark. + glibc currently interprets it as big endian without byte order mark, + but this is not backed by an RFC. So we use UTF-8. It supports + characters up to \U7FFFFFFF and is unambiguously defined. */ + +void +ulc_grapheme_breaks (const char *s, size_t n, char *p) +{ + if (n > 0) + { + const char *encoding = locale_charset (); + + if (is_utf8_encoding (encoding)) + u8_grapheme_breaks ((const uint8_t *) s, n, p); + else + { + /* Convert the string to UTF-8 and build a translation table + from offsets into s to offsets into the translated string. */ + size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); + + if (offsets != NULL) + { + uint8_t *t; + size_t m; + + t = u8_conv_from_encoding (encoding, iconveh_question_mark, + s, n, offsets, NULL, &m); + if (t != NULL) + { + char *q = (char *) (m > 0 ? malloc (m) : NULL); + + if (m == 0 || q != NULL) + { + size_t i; + + /* Determine the grapheme breaks of the UTF-8 string. */ + u8_grapheme_breaks (t, m, q); + + /* Translate the result back to the original string. */ + memset (p, 0, n); + for (i = 0; i < n; i++) + if (offsets[i] != (size_t)(-1)) + p[i] = q[offsets[i]]; + + free (q); + free (t); + free (offsets); + return; + } + free (t); + } + free (offsets); + } + + /* Impossible to convert. */ +#if C_CTYPE_ASCII + /* Fall back to ASCII as best we can. */ + ascii_grapheme_breaks (s, n, p); +#else + /* We cannot make any assumptions. */ + p[0] = 1; + memset (p + 1, 0, n - 1); +#endif + } + } +} diff --git a/m4/locale-ar.m4 b/m4/locale-ar.m4 new file mode 100644 index 000000000..beb8ab387 --- /dev/null +++ b/m4/locale-ar.m4 @@ -0,0 +1,69 @@ +# locale-ar.m4 serial 1 +dnl Copyright (C) 2003, 2005-2010 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Ben Pfaff, based on locale-fr.m4 by Bruno Haible. + +dnl Determine the name of an Arabic locale with traditional encoding. +AC_DEFUN([gt_LOCALE_AR], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a traditional Arabic locale], [gt_cv_locale_ar], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include +#include +#if HAVE_LANGINFO_CODESET +# include +#endif +#include +#include +struct tm t; +char buf[16]; +int main () { + /* Check whether the given locale name is recognized by the system. */ + if (setlocale (LC_ALL, "") == NULL) return 1; + /* Check that nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". */ +#if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0 + || strcmp (cs, "UTF-8") == 0) + return 1; + } +#endif +#ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +#endif + /* Check that the name of the first month begins with U+0643 (ك) as encoded + by ISO 8859-6. This excludes the UTF-8 encoding. */ + t.tm_year = 1975 - 1900; t.tm_mon = 1 - 1; t.tm_mday = 4; + strftime (buf, sizeof (buf), "%B", &t); + if ((unsigned char) buf[0] != 0xe3) return 1; + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the usual locale name. + for gt_cv_locale_ar in ar_SA ar_SA.ISO-8859-6 ar_EG ar_EG.ISO-8859-6 none; do + if test $gt_cv_locale_ar = none || (LC_ALL=$gt_cv_locale_ar LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + break + fi + done + fi + rm -fr conftest* + ]) + LOCALE_AR=$gt_cv_locale_ar + AC_SUBST([LOCALE_AR]) +]) diff --git a/modules/unigbrk/u16-grapheme-breaks b/modules/unigbrk/u16-grapheme-breaks new file mode 100644 index 000000000..61fd4526d --- /dev/null +++ b/modules/unigbrk/u16-grapheme-breaks @@ -0,0 +1,28 @@ +Description: +Find grapheme cluster breaks in UTF-16 string. + +Files: +lib/unigbrk/u16-grapheme-breaks.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u16-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u16-grapheme-breaks]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u16-grapheme-breaks]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U16_GRAPHEME_BREAKS +lib_SOURCES += unigbrk/u16-grapheme-breaks.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u16-grapheme-breaks-tests b/modules/unigbrk/u16-grapheme-breaks-tests new file mode 100644 index 000000000..9f7f2dffe --- /dev/null +++ b/modules/unigbrk/u16-grapheme-breaks-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u16-grapheme-breaks.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u16-grapheme-breaks +check_PROGRAMS += test-u16-grapheme-breaks +test_u16_grapheme_breaks_SOURCES = unigbrk/test-u16-grapheme-breaks.c +test_u16_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u16-grapheme-len b/modules/unigbrk/u16-grapheme-len new file mode 100644 index 000000000..06d064603 --- /dev/null +++ b/modules/unigbrk/u16-grapheme-len @@ -0,0 +1,28 @@ +Description: +Length of first grapheme cluster in UTF-16 string. + +Files: +lib/unigbrk/u16-grapheme-len.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u16-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u16-grapheme-len]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u16-grapheme-len]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U16_GRAPHEME_LEN +lib_SOURCES += unigbrk/u16-grapheme-len.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u16-grapheme-len-tests b/modules/unigbrk/u16-grapheme-len-tests new file mode 100644 index 000000000..66f7aac7d --- /dev/null +++ b/modules/unigbrk/u16-grapheme-len-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u16-grapheme-len.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u16-grapheme-len +check_PROGRAMS += test-u16-grapheme-len +test_u16_grapheme_len_SOURCES = unigbrk/test-u16-grapheme-len.c +test_u16_grapheme_len_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u16-grapheme-next b/modules/unigbrk/u16-grapheme-next new file mode 100644 index 000000000..3d3a9c7f2 --- /dev/null +++ b/modules/unigbrk/u16-grapheme-next @@ -0,0 +1,28 @@ +Description: +Find start of next grapheme cluster in UTF-16 string. + +Files: +lib/unigbrk/u16-grapheme-next.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u16-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u16-grapheme-next]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u16-grapheme-next]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U16_GRAPHEME_NEXT +lib_SOURCES += unigbrk/u16-grapheme-next.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u16-grapheme-next-tests b/modules/unigbrk/u16-grapheme-next-tests new file mode 100644 index 000000000..a947afe3f --- /dev/null +++ b/modules/unigbrk/u16-grapheme-next-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u16-grapheme-next.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u16-grapheme-next +check_PROGRAMS += test-u16-grapheme-next +test_u16_grapheme_next_SOURCES = unigbrk/test-u16-grapheme-next.c +test_u16_grapheme_next_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u16-grapheme-prev b/modules/unigbrk/u16-grapheme-prev new file mode 100644 index 000000000..a58219e93 --- /dev/null +++ b/modules/unigbrk/u16-grapheme-prev @@ -0,0 +1,28 @@ +Description: +Find start of previous grapheme cluster in UTF-16 string. + +Files: +lib/unigbrk/u16-grapheme-prev.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u16-prev + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u16-grapheme-prev]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u16-grapheme-prev]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U16_GRAPHEME_PREV +lib_SOURCES += unigbrk/u16-grapheme-prev.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u16-grapheme-prev-tests b/modules/unigbrk/u16-grapheme-prev-tests new file mode 100644 index 000000000..be62d2468 --- /dev/null +++ b/modules/unigbrk/u16-grapheme-prev-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u16-grapheme-prev.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u16-grapheme-prev +check_PROGRAMS += test-u16-grapheme-prev +test_u16_grapheme_prev_SOURCES = unigbrk/test-u16-grapheme-prev.c +test_u16_grapheme_prev_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u32-grapheme-breaks b/modules/unigbrk/u32-grapheme-breaks new file mode 100644 index 000000000..5ae699822 --- /dev/null +++ b/modules/unigbrk/u32-grapheme-breaks @@ -0,0 +1,28 @@ +Description: +Find grapheme cluster breaks in UTF-32 string. + +Files: +lib/unigbrk/u32-grapheme-breaks.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u32-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u32-grapheme-breaks]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u32-grapheme-breaks]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_BREAKS +lib_SOURCES += unigbrk/u32-grapheme-breaks.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u32-grapheme-breaks-tests b/modules/unigbrk/u32-grapheme-breaks-tests new file mode 100644 index 000000000..456134474 --- /dev/null +++ b/modules/unigbrk/u32-grapheme-breaks-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u32-grapheme-breaks.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u32-grapheme-breaks +check_PROGRAMS += test-u32-grapheme-breaks +test_u32_grapheme_breaks_SOURCES = unigbrk/test-u32-grapheme-breaks.c +test_u32_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u32-grapheme-len b/modules/unigbrk/u32-grapheme-len new file mode 100644 index 000000000..4129fb1b2 --- /dev/null +++ b/modules/unigbrk/u32-grapheme-len @@ -0,0 +1,28 @@ +Description: +Length of first grapheme cluster in UTF-32 string. + +Files: +lib/unigbrk/u32-grapheme-len.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u32-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u32-grapheme-len]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u32-grapheme-len]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_LEN +lib_SOURCES += unigbrk/u32-grapheme-len.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u32-grapheme-len-tests b/modules/unigbrk/u32-grapheme-len-tests new file mode 100644 index 000000000..1618abda8 --- /dev/null +++ b/modules/unigbrk/u32-grapheme-len-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u32-grapheme-len.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u32-grapheme-len +check_PROGRAMS += test-u32-grapheme-len +test_u32_grapheme_len_SOURCES = unigbrk/test-u32-grapheme-len.c +test_u32_grapheme_len_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u32-grapheme-next b/modules/unigbrk/u32-grapheme-next new file mode 100644 index 000000000..d2fab5a47 --- /dev/null +++ b/modules/unigbrk/u32-grapheme-next @@ -0,0 +1,28 @@ +Description: +Find start of next grapheme cluster in UTF-32 string. + +Files: +lib/unigbrk/u32-grapheme-next.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u32-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u32-grapheme-next]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u32-grapheme-next]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_NEXT +lib_SOURCES += unigbrk/u32-grapheme-next.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u32-grapheme-next-tests b/modules/unigbrk/u32-grapheme-next-tests new file mode 100644 index 000000000..4ed8b1f94 --- /dev/null +++ b/modules/unigbrk/u32-grapheme-next-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u32-grapheme-next.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u32-grapheme-next +check_PROGRAMS += test-u32-grapheme-next +test_u32_grapheme_next_SOURCES = unigbrk/test-u32-grapheme-next.c +test_u32_grapheme_next_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u32-grapheme-prev b/modules/unigbrk/u32-grapheme-prev new file mode 100644 index 000000000..c1a637590 --- /dev/null +++ b/modules/unigbrk/u32-grapheme-prev @@ -0,0 +1,28 @@ +Description: +Find start of previous grapheme cluster in UTF-32 string. + +Files: +lib/unigbrk/u32-grapheme-prev.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u32-prev + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u32-grapheme-prev]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u32-grapheme-prev]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_PREV +lib_SOURCES += unigbrk/u32-grapheme-prev.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u32-grapheme-prev-tests b/modules/unigbrk/u32-grapheme-prev-tests new file mode 100644 index 000000000..090072279 --- /dev/null +++ b/modules/unigbrk/u32-grapheme-prev-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u32-grapheme-prev.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u32-grapheme-prev +check_PROGRAMS += test-u32-grapheme-prev +test_u32_grapheme_prev_SOURCES = unigbrk/test-u32-grapheme-prev.c +test_u32_grapheme_prev_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u8-grapheme-breaks b/modules/unigbrk/u8-grapheme-breaks new file mode 100644 index 000000000..6d0e98958 --- /dev/null +++ b/modules/unigbrk/u8-grapheme-breaks @@ -0,0 +1,28 @@ +Description: +Find grapheme cluster breaks in UTF-8 string. + +Files: +lib/unigbrk/u8-grapheme-breaks.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u8-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u8-grapheme-breaks]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u8-grapheme-breaks]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U8_GRAPHEME_BREAKS +lib_SOURCES += unigbrk/u8-grapheme-breaks.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u8-grapheme-breaks-tests b/modules/unigbrk/u8-grapheme-breaks-tests new file mode 100644 index 000000000..7d1f6d98d --- /dev/null +++ b/modules/unigbrk/u8-grapheme-breaks-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u8-grapheme-breaks.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u8-grapheme-breaks +check_PROGRAMS += test-u8-grapheme-breaks +test_u8_grapheme_breaks_SOURCES = unigbrk/test-u8-grapheme-breaks.c +test_u8_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u8-grapheme-len b/modules/unigbrk/u8-grapheme-len new file mode 100644 index 000000000..04e6c40ef --- /dev/null +++ b/modules/unigbrk/u8-grapheme-len @@ -0,0 +1,28 @@ +Description: +Length of first grapheme cluster in UTF-8 string. + +Files: +lib/unigbrk/u8-grapheme-len.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u8-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u8-grapheme-len]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u8-grapheme-len]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U8_GRAPHEME_LEN +lib_SOURCES += unigbrk/u8-grapheme-len.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u8-grapheme-len-tests b/modules/unigbrk/u8-grapheme-len-tests new file mode 100644 index 000000000..4067abfe3 --- /dev/null +++ b/modules/unigbrk/u8-grapheme-len-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u8-grapheme-len.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u8-grapheme-len +check_PROGRAMS += test-u8-grapheme-len +test_u8_grapheme_len_SOURCES = unigbrk/test-u8-grapheme-len.c +test_u8_grapheme_len_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u8-grapheme-next b/modules/unigbrk/u8-grapheme-next new file mode 100644 index 000000000..355757f3b --- /dev/null +++ b/modules/unigbrk/u8-grapheme-next @@ -0,0 +1,28 @@ +Description: +Find start of next grapheme cluster in UTF-8 string. + +Files: +lib/unigbrk/u8-grapheme-next.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u8-mbtouc + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u8-grapheme-next]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u8-grapheme-next]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U8_GRAPHEME_NEXT +lib_SOURCES += unigbrk/u8-grapheme-next.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u8-grapheme-next-tests b/modules/unigbrk/u8-grapheme-next-tests new file mode 100644 index 000000000..d30709ea6 --- /dev/null +++ b/modules/unigbrk/u8-grapheme-next-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u8-grapheme-next.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u8-grapheme-next +check_PROGRAMS += test-u8-grapheme-next +test_u8_grapheme_next_SOURCES = unigbrk/test-u8-grapheme-next.c +test_u8_grapheme_next_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/u8-grapheme-prev b/modules/unigbrk/u8-grapheme-prev new file mode 100644 index 000000000..182dbb0fe --- /dev/null +++ b/modules/unigbrk/u8-grapheme-prev @@ -0,0 +1,28 @@ +Description: +Find start of previous grapheme cluster in UTF-8 string. + +Files: +lib/unigbrk/u8-grapheme-prev.c +tests/macros.h + +Depends-on: +unigbrk/uc-is-grapheme-break +unistr/u8-prev + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/u8-grapheme-prev]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u8-grapheme-prev]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_U8_GRAPHEME_PREV +lib_SOURCES += unigbrk/u8-grapheme-prev.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/u8-grapheme-prev-tests b/modules/unigbrk/u8-grapheme-prev-tests new file mode 100644 index 000000000..137e8464f --- /dev/null +++ b/modules/unigbrk/u8-grapheme-prev-tests @@ -0,0 +1,12 @@ +Files: +tests/unigbrk/test-u8-grapheme-prev.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-u8-grapheme-prev +check_PROGRAMS += test-u8-grapheme-prev +test_u8_grapheme_prev_SOURCES = unigbrk/test-u8-grapheme-prev.c +test_u8_grapheme_prev_LDADD = $(LDADD) $(LIBUNISTRING) diff --git a/modules/unigbrk/ulc-grapheme-breaks b/modules/unigbrk/ulc-grapheme-breaks new file mode 100644 index 000000000..e13e7fde8 --- /dev/null +++ b/modules/unigbrk/ulc-grapheme-breaks @@ -0,0 +1,30 @@ +Description: +Grapheme cluster breaks in locale-encoded string. + +Files: +lib/unigbrk/ulc-grapheme-breaks.c + +Depends-on: +unigbrk/base +unigbrk/u8-grapheme-breaks +uniconv/u8-conv-from-enc +c-ctype +localcharset + +configure.ac: +gl_MODULE_INDICATOR([unigbrk/ulc-grapheme-breaks]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/ulc-grapheme-breaks]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNIGBRK_ULC_GRAPHEME_BREAKS +lib_SOURCES += unigbrk/ulc-grapheme-breaks.c +endif + +Include: +"unigbrk.h" + +License: +LGPL + +Maintainer: +Ben Pfaff, Bruno Haible diff --git a/modules/unigbrk/ulc-grapheme-breaks-tests b/modules/unigbrk/ulc-grapheme-breaks-tests new file mode 100644 index 000000000..60cd513f7 --- /dev/null +++ b/modules/unigbrk/ulc-grapheme-breaks-tests @@ -0,0 +1,18 @@ +Files: +tests/unigbrk/test-ulc-grapheme-breaks.sh +tests/unigbrk/test-ulc-grapheme-breaks.c +tests/macros.h +m4/locale-ar.m4 +m4/codeset.m4 + +Depends-on: + +configure.ac: +gt_LOCALE_AR + +Makefile.am: +TESTS += unigbrk/test-ulc-grapheme-breaks.sh +TESTS_ENVIRONMENT += LOCALE_AR='@LOCALE_AR@' +check_PROGRAMS += test-ulc-grapheme-breaks +test_ulc_grapheme_breaks_SOURCES = unigbrk/test-ulc-grapheme-breaks.c +test_ulc_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING) @LIBICONV@ diff --git a/tests/unigbrk/test-u16-grapheme-breaks.c b/tests/unigbrk/test-u16-grapheme-breaks.c new file mode 100644 index 000000000..98cd7631a --- /dev/null +++ b/tests/unigbrk/test-u16-grapheme-breaks.c @@ -0,0 +1,105 @@ +/* Grapheme cluster breaks test. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include +#include + +#include "macros.h" + +static void +test_u16_grapheme_breaks (const char *expected, ...) +{ + size_t n = strlen (expected); + uint16_t s[16]; + va_list args; + char breaks[16]; + size_t i; + + ASSERT (n <= 16); + + memset (breaks, 0xcc, n); + + va_start (args, expected); + for (i = 0; i < n; i++) + { + int unit = va_arg (args, int); + ASSERT (unit >= 0); + s[i] = unit; + } + ASSERT (va_arg (args, int) == -1); + va_end (args); + + u16_grapheme_breaks (s, n, breaks); + for (i = 0; i < n; i++) + if (breaks[i] != (expected[i] == '#')) + { + size_t j; + + fprintf (stderr, "wrong grapheme breaks:\n"); + + fprintf (stderr, " input:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %02x", s[j]); + putc ('\n', stderr); + + fprintf (stderr, "expected:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %d", expected[j] == '#'); + putc ('\n', stderr); + + fprintf (stderr, " actual:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %d", breaks[j]); + putc ('\n', stderr); + + abort (); + } +} + +int +main (void) +{ + static const char s[] = "abc"; + + /* Standalone 1-unit graphemes. */ + test_u16_grapheme_breaks ("#", 'a', -1); + test_u16_grapheme_breaks ("##", 'a', 'b', -1); + test_u16_grapheme_breaks ("###", 'a', 'b', 'c', -1); + +#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */ + test_u16_grapheme_breaks ("#", HIRAGANA_A, -1); + test_u16_grapheme_breaks ("##", HIRAGANA_A, 'x', -1); + test_u16_grapheme_breaks ("##", HIRAGANA_A, HIRAGANA_A, -1); + + /* Combining accents. */ +#define GRAVE 0x0300 /* Combining grave accent. */ +#define ACUTE 0x0301 /* Combining acute accent. */ + test_u16_grapheme_breaks ("#_", 'e', ACUTE, -1); + test_u16_grapheme_breaks ("#__", 'e', ACUTE, GRAVE, -1); + test_u16_grapheme_breaks ("#_#", 'e', ACUTE, 'x', -1); + test_u16_grapheme_breaks ("#_#_", 'e', ACUTE, 'e', GRAVE, -1); + + return 0; +} diff --git a/tests/unigbrk/test-u16-grapheme-len.c b/tests/unigbrk/test-u16-grapheme-len.c new file mode 100644 index 000000000..cce9ea63a --- /dev/null +++ b/tests/unigbrk/test-u16-grapheme-len.c @@ -0,0 +1,95 @@ +/* Grapheme cluster length test. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include + +static void +test_u16_grapheme_len (size_t len, ...) +{ + uint16_t s[16]; + size_t retval; + va_list args; + size_t n; + + va_start (args, len); + n = 0; + for (;;) + { + int unit = va_arg (args, int); + if (unit == -1) + break; + else if (n >= sizeof s / sizeof *s) + abort (); + + s[n++] = unit; + } + va_end (args); + + retval = u16_grapheme_len (s, n); + if (retval != len) + { + size_t i; + + fprintf (stderr, "u16_grapheme_len counted %zu units, expected %zu:", + retval, len); + for (i = 0; i < n; i++) + fprintf (stderr, " %04x", s[i]); + putc ('\n', stderr); + abort (); + } +} + + +int +main (void) +{ + /* Empty string. */ + test_u16_grapheme_len (0, -1); + + /* Standalone 1-unit graphemes. */ + test_u16_grapheme_len (1, 'a', -1); + test_u16_grapheme_len (1, 'a', 'b', -1); + test_u16_grapheme_len (1, 'a', 'b', 'c', -1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */ + test_u16_grapheme_len (1, HIRAGANA_A, -1); + test_u16_grapheme_len (1, HIRAGANA_A, 'x', -1); + test_u16_grapheme_len (1, HIRAGANA_A, HIRAGANA_A, -1); + + /* Combining accents. */ +#define GRAVE 0x0300 /* Combining grave accent. */ +#define ACUTE 0x0301 /* Combining acute accent. */ + test_u16_grapheme_len (2, 'e', ACUTE, -1); + test_u16_grapheme_len (3, 'e', ACUTE, GRAVE, -1); + test_u16_grapheme_len (2, 'e', ACUTE, 'x', -1); + test_u16_grapheme_len (2, 'e', ACUTE, 'e', ACUTE, -1); + + /* Surrogate pairs. */ + test_u16_grapheme_len (2, 0xd83d, 0xde10, -1); /* 😐: neutral face. */ + test_u16_grapheme_len (3, 0xd83d, 0xde10, GRAVE, -1); + + return 0; +} diff --git a/tests/unigbrk/test-u16-grapheme-next.c b/tests/unigbrk/test-u16-grapheme-next.c new file mode 100644 index 000000000..b69bad75e --- /dev/null +++ b/tests/unigbrk/test-u16-grapheme-next.c @@ -0,0 +1,102 @@ +/* Next grapheme cluster length test. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include + +#include "macros.h" + +static void +test_u16_grapheme_next (size_t len, ...) +{ + const uint16_t *next; + uint16_t s[16]; + va_list args; + size_t n; + + va_start (args, len); + n = 0; + for (;;) + { + int unit = va_arg (args, int); + if (unit == -1) + break; + else if (n >= sizeof s / sizeof *s) + abort (); + + s[n++] = unit; + } + va_end (args); + + next = u16_grapheme_next (s, s + n); + if (next != s + len) + { + size_t i; + + if (next == NULL) + fputs ("u16_grapheme_next returned NULL", stderr); + else + fprintf (stderr, "u16_grapheme_next skipped %zu units", next - s); + fprintf (stderr, ", expected %zu:\n", len); + for (i = 0; i < n; i++) + fprintf (stderr, " %04x", s[i]); + putc ('\n', stderr); + abort (); + } +} + +int +main (void) +{ + static const uint16_t s[] = { 'a', 'b', 'c' }; + + /* Empty string. */ + ASSERT (u16_grapheme_next (NULL, NULL) == NULL); + ASSERT (u16_grapheme_next (s, s) == NULL); + + /* Standalone 1-unit graphemes. */ + test_u16_grapheme_next (1, 'a', -1); + test_u16_grapheme_next (1, 'a', 'b', -1); + test_u16_grapheme_next (1, 'a', 'b', 'c', -1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */ + test_u16_grapheme_next (1, HIRAGANA_A, -1); + test_u16_grapheme_next (1, HIRAGANA_A, 'x', -1); + test_u16_grapheme_next (1, HIRAGANA_A, HIRAGANA_A, -1); + + /* Combining accents. */ +#define GRAVE 0x0300 /* Combining grave accent. */ +#define ACUTE 0x0301 /* Combining acute accent. */ + test_u16_grapheme_next (2, 'e', ACUTE, -1); + test_u16_grapheme_next (3, 'e', ACUTE, GRAVE, -1); + test_u16_grapheme_next (2, 'e', ACUTE, 'x', -1); + test_u16_grapheme_next (2, 'e', ACUTE, 'e', ACUTE, -1); + + /* Surrogate pairs. */ + test_u16_grapheme_next (2, 0xd83d, 0xde10, -1); /* 😐: neutral face. */ + test_u16_grapheme_next (3, 0xd83d, 0xde10, GRAVE, -1); + + return 0; +} diff --git a/tests/unigbrk/test-u16-grapheme-prev.c b/tests/unigbrk/test-u16-grapheme-prev.c new file mode 100644 index 000000000..8d7ec6668 --- /dev/null +++ b/tests/unigbrk/test-u16-grapheme-prev.c @@ -0,0 +1,104 @@ +/* Previous grapheme cluster test. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include + +#include "macros.h" + +static void +test_u16_grapheme_prev (size_t len, ...) +{ + const uint16_t *prev; + const uint16_t *end; + uint16_t s[16]; + va_list args; + size_t n; + + va_start (args, len); + n = 0; + for (;;) + { + int unit = va_arg (args, int); + if (unit == -1) + break; + else if (n >= sizeof s / sizeof *s) + abort (); + + s[n++] = unit; + } + va_end (args); + + end = s + n; + prev = u16_grapheme_prev (end, s); + if (prev != end - len) + { + size_t i; + + if (prev == NULL) + fputs ("u16_grapheme_prev returned NULL", stderr); + else + fprintf (stderr, "u16_grapheme_prev skipped %zu units", end - prev); + fprintf (stderr, ", expected %zu:\n", len); + for (i = 0; i < n; i++) + fprintf (stderr, " %04x", s[i]); + putc ('\n', stderr); + abort (); + } +} + +int +main (void) +{ + static const uint16_t s[] = { 'a', 'b', 'c' }; + + /* Empty string. */ + ASSERT (u16_grapheme_prev (NULL, NULL) == NULL); + ASSERT (u16_grapheme_prev (s, s) == NULL); + + /* Standalone 1-unit graphemes. */ + test_u16_grapheme_prev (1, 'a', -1); + test_u16_grapheme_prev (1, 'a', 'b', -1); + test_u16_grapheme_prev (1, 'a', 'b', 'c', -1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */ + test_u16_grapheme_prev (1, HIRAGANA_A, -1); + test_u16_grapheme_prev (1, HIRAGANA_A, 'x', -1); + test_u16_grapheme_prev (1, HIRAGANA_A, HIRAGANA_A, -1); + + /* Combining accents. */ +#define GRAVE 0x0300 /* Combining grave accent. */ +#define ACUTE 0x0301 /* Combining acute accent. */ + test_u16_grapheme_prev (2, 'e', ACUTE, -1); + test_u16_grapheme_prev (3, 'e', ACUTE, GRAVE, -1); + test_u16_grapheme_prev (1, 'e', ACUTE, 'x', -1); + test_u16_grapheme_prev (2, 'e', ACUTE, 'e', ACUTE, -1); + + /* Surrogate pairs. */ + test_u16_grapheme_prev (2, 0xd83d, 0xde10, -1); /* 😐: neutral face. */ + test_u16_grapheme_prev (3, 0xd83d, 0xde10, GRAVE, -1); + + return 0; +} diff --git a/tests/unigbrk/test-u32-grapheme-breaks.c b/tests/unigbrk/test-u32-grapheme-breaks.c new file mode 100644 index 000000000..e4e91ff1f --- /dev/null +++ b/tests/unigbrk/test-u32-grapheme-breaks.c @@ -0,0 +1,105 @@ +/* Grapheme cluster breaks test. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include +#include + +#include "macros.h" + +static void +test_u32_grapheme_breaks (const char *expected, ...) +{ + size_t n = strlen (expected); + uint32_t s[16]; + va_list args; + char breaks[16]; + size_t i; + + ASSERT (n <= 16); + + memset (breaks, 0xcc, n); + + va_start (args, expected); + for (i = 0; i < n; i++) + { + int unit = va_arg (args, int); + ASSERT (unit >= 0); + s[i] = unit; + } + ASSERT (va_arg (args, int) == -1); + va_end (args); + + u32_grapheme_breaks (s, n, breaks); + for (i = 0; i < n; i++) + if (breaks[i] != (expected[i] == '#')) + { + size_t j; + + fprintf (stderr, "wrong grapheme breaks:\n"); + + fprintf (stderr, " input:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %02x", s[j]); + putc ('\n', stderr); + + fprintf (stderr, "expected:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %d", expected[j] == '#'); + putc ('\n', stderr); + + fprintf (stderr, " actual:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %d", breaks[j]); + putc ('\n', stderr); + + abort (); + } +} + +int +main (void) +{ + static const char s[] = "abc"; + + /* Standalone 1-unit graphemes. */ + test_u32_grapheme_breaks ("#", 'a', -1); + test_u32_grapheme_breaks ("##", 'a', 'b', -1); + test_u32_grapheme_breaks ("###", 'a', 'b', 'c', -1); + +#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */ + test_u32_grapheme_breaks ("#", HIRAGANA_A, -1); + test_u32_grapheme_breaks ("##", HIRAGANA_A, 'x', -1); + test_u32_grapheme_breaks ("##", HIRAGANA_A, HIRAGANA_A, -1); + + /* Combining accents. */ +#define GRAVE 0x0300 /* Combining grave accent. */ +#define ACUTE 0x0301 /* Combining acute accent. */ + test_u32_grapheme_breaks ("#_", 'e', ACUTE, -1); + test_u32_grapheme_breaks ("#__", 'e', ACUTE, GRAVE, -1); + test_u32_grapheme_breaks ("#_#", 'e', ACUTE, 'x', -1); + test_u32_grapheme_breaks ("#_#_", 'e', ACUTE, 'e', GRAVE, -1); + + return 0; +} diff --git a/tests/unigbrk/test-u32-grapheme-len.c b/tests/unigbrk/test-u32-grapheme-len.c new file mode 100644 index 000000000..af002f07e --- /dev/null +++ b/tests/unigbrk/test-u32-grapheme-len.c @@ -0,0 +1,96 @@ +/* Grapheme cluster length test. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include + +static void +test_u32_grapheme_len (size_t len, ...) +{ + uint32_t s[32]; + size_t retval; + va_list args; + size_t n; + + va_start (args, len); + n = 0; + for (;;) + { + int unit = va_arg (args, int); + if (unit == -1) + break; + else if (n >= sizeof s / sizeof *s) + abort (); + + s[n++] = unit; + } + va_end (args); + + retval = u32_grapheme_len (s, n); + if (retval != len) + { + size_t i; + + fprintf (stderr, "u32_grapheme_len counted %zu units, expected %zu:", + retval, len); + for (i = 0; i < n; i++) + fprintf (stderr, " %04x", (unsigned int) s[i]); + putc ('\n', stderr); + abort (); + } +} + + +int +main (void) +{ + /* Empty string. */ + test_u32_grapheme_len (0, -1); + + /* Standalone 1-unit graphemes. */ + test_u32_grapheme_len (1, 'a', -1); + test_u32_grapheme_len (1, 'a', 'b', -1); + test_u32_grapheme_len (1, 'a', 'b', 'c', -1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */ + test_u32_grapheme_len (1, HIRAGANA_A, -1); + test_u32_grapheme_len (1, HIRAGANA_A, 'x', -1); + test_u32_grapheme_len (1, HIRAGANA_A, HIRAGANA_A, -1); + + /* Combining accents. */ +#define GRAVE 0x0300 /* Combining grave accent. */ +#define ACUTE 0x0301 /* Combining acute accent. */ + test_u32_grapheme_len (2, 'e', ACUTE, -1); + test_u32_grapheme_len (3, 'e', ACUTE, GRAVE, -1); + test_u32_grapheme_len (2, 'e', ACUTE, 'x', -1); + test_u32_grapheme_len (2, 'e', ACUTE, 'e', ACUTE, -1); + + /* Outside BMP. */ +#define NEUTRAL_FACE 0x1f610 /* 😐: neutral face. */ + test_u32_grapheme_len (1, NEUTRAL_FACE, -1); + test_u32_grapheme_len (2, NEUTRAL_FACE, GRAVE, -1); + + return 0; +} diff --git a/tests/unigbrk/test-u32-grapheme-next.c b/tests/unigbrk/test-u32-grapheme-next.c new file mode 100644 index 000000000..0c4017eb4 --- /dev/null +++ b/tests/unigbrk/test-u32-grapheme-next.c @@ -0,0 +1,103 @@ +/* Next grapheme cluster length test. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include + +#include "macros.h" + +static void +test_u32_grapheme_next (size_t len, ...) +{ + const uint32_t *next; + uint32_t s[32]; + va_list args; + size_t n; + + va_start (args, len); + n = 0; + for (;;) + { + int unit = va_arg (args, int); + if (unit == -1) + break; + else if (n >= sizeof s / sizeof *s) + abort (); + + s[n++] = unit; + } + va_end (args); + + next = u32_grapheme_next (s, s + n); + if (next != s + len) + { + size_t i; + + if (next == NULL) + fputs ("u32_grapheme_next returned NULL", stderr); + else + fprintf (stderr, "u32_grapheme_next skipped %zu units", next - s); + fprintf (stderr, ", expected %zu:\n", len); + for (i = 0; i < n; i++) + fprintf (stderr, " %04x", s[i]); + putc ('\n', stderr); + abort (); + } +} + +int +main (void) +{ + static const uint32_t s[] = { 'a', 'b', 'c' }; + + /* Empty string. */ + ASSERT (u32_grapheme_next (NULL, NULL) == NULL); + ASSERT (u32_grapheme_next (s, s) == NULL); + + /* Standalone 1-unit graphemes. */ + test_u32_grapheme_next (1, 'a', -1); + test_u32_grapheme_next (1, 'a', 'b', -1); + test_u32_grapheme_next (1, 'a', 'b', 'c', -1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */ + test_u32_grapheme_next (1, HIRAGANA_A, -1); + test_u32_grapheme_next (1, HIRAGANA_A, 'x', -1); + test_u32_grapheme_next (1, HIRAGANA_A, HIRAGANA_A, -1); + + /* Combining accents. */ +#define GRAVE 0x0300 /* Combining grave accent. */ +#define ACUTE 0x0301 /* Combining acute accent. */ + test_u32_grapheme_next (2, 'e', ACUTE, -1); + test_u32_grapheme_next (3, 'e', ACUTE, GRAVE, -1); + test_u32_grapheme_next (2, 'e', ACUTE, 'x', -1); + test_u32_grapheme_next (2, 'e', ACUTE, 'e', ACUTE, -1); + + /* Outside BMP. */ +#define NEUTRAL_FACE 0x1f610 /* 😐: neutral face. */ + test_u32_grapheme_next (1, NEUTRAL_FACE, -1); + test_u32_grapheme_next (2, NEUTRAL_FACE, GRAVE, -1); + + return 0; +} diff --git a/tests/unigbrk/test-u32-grapheme-prev.c b/tests/unigbrk/test-u32-grapheme-prev.c new file mode 100644 index 000000000..691ed8055 --- /dev/null +++ b/tests/unigbrk/test-u32-grapheme-prev.c @@ -0,0 +1,105 @@ +/* Previous grapheme cluster test. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include + +#include "macros.h" + +static void +test_u32_grapheme_prev (size_t len, ...) +{ + const uint32_t *prev; + const uint32_t *end; + uint32_t s[16]; + va_list args; + size_t n; + + va_start (args, len); + n = 0; + for (;;) + { + int unit = va_arg (args, int); + if (unit == -1) + break; + else if (n >= sizeof s / sizeof *s) + abort (); + + s[n++] = unit; + } + va_end (args); + + end = s + n; + prev = u32_grapheme_prev (end, s); + if (prev != end - len) + { + size_t i; + + if (prev == NULL) + fputs ("u32_grapheme_prev returned NULL", stderr); + else + fprintf (stderr, "u32_grapheme_prev skipped %zu units", end - prev); + fprintf (stderr, ", expected %zu:\n", len); + for (i = 0; i < n; i++) + fprintf (stderr, " %04x", s[i]); + putc ('\n', stderr); + abort (); + } +} + +int +main (void) +{ + static const uint32_t s[] = { 'a', 'b', 'c' }; + + /* Empty string. */ + ASSERT (u32_grapheme_prev (NULL, NULL) == NULL); + ASSERT (u32_grapheme_prev (s, s) == NULL); + + /* Standalone 1-unit graphemes. */ + test_u32_grapheme_prev (1, 'a', -1); + test_u32_grapheme_prev (1, 'a', 'b', -1); + test_u32_grapheme_prev (1, 'a', 'b', 'c', -1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */ + test_u32_grapheme_prev (1, HIRAGANA_A, -1); + test_u32_grapheme_prev (1, HIRAGANA_A, 'x', -1); + test_u32_grapheme_prev (1, HIRAGANA_A, HIRAGANA_A, -1); + + /* Combining accents. */ +#define GRAVE 0x0300 /* Combining grave accent. */ +#define ACUTE 0x0301 /* Combining acute accent. */ + test_u32_grapheme_prev (2, 'e', ACUTE, -1); + test_u32_grapheme_prev (3, 'e', ACUTE, GRAVE, -1); + test_u32_grapheme_prev (1, 'e', ACUTE, 'x', -1); + test_u32_grapheme_prev (2, 'e', ACUTE, 'e', ACUTE, -1); + + /* Outside BMP. */ +#define NEUTRAL_FACE 0x1f610 /* 😐: neutral face. */ + test_u32_grapheme_prev (1, NEUTRAL_FACE, -1); + test_u32_grapheme_prev (2, NEUTRAL_FACE, GRAVE, -1); + + return 0; +} diff --git a/tests/unigbrk/test-u8-grapheme-breaks.c b/tests/unigbrk/test-u8-grapheme-breaks.c new file mode 100644 index 000000000..3bbebb29b --- /dev/null +++ b/tests/unigbrk/test-u8-grapheme-breaks.c @@ -0,0 +1,96 @@ +/* Grapheme cluster breaks test. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include + +#include "macros.h" + +static void +test_u8_grapheme_breaks (const uint8_t *s, const char *expected) +{ + size_t n = strlen (expected); + char *breaks; + size_t i; + + breaks = malloc (n); + if (!breaks) + abort (); + memset (breaks, 0xcc, n); + + u8_grapheme_breaks (s, n, breaks); + for (i = 0; i < n; i++) + if (breaks[i] != (expected[i] == '#')) + { + size_t j; + + fprintf (stderr, "wrong grapheme breaks:\n"); + + fprintf (stderr, " input:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %02x", s[j]); + putc ('\n', stderr); + + fprintf (stderr, "expected:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %d", expected[j] == '#'); + putc ('\n', stderr); + + fprintf (stderr, " actual:"); + for (j = 0; j < n; j++) + fprintf (stderr, " %d", breaks[j]); + putc ('\n', stderr); + + abort (); + } + + free (breaks); +} + +int +main (void) +{ + static const char s[] = "abc"; + + /* Standalone 1-unit graphemes. */ + test_u8_grapheme_breaks ("a", "#"); + test_u8_grapheme_breaks ("ab", "##"); + test_u8_grapheme_breaks ("abc", "###"); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A "\343\201\202" /* あ: Hiragana letter 'a'. */ + test_u8_grapheme_breaks (HIRAGANA_A, "#__"); + test_u8_grapheme_breaks (HIRAGANA_A"x", "#__#"); + test_u8_grapheme_breaks (HIRAGANA_A HIRAGANA_A, "#__#__"); + + /* Combining accents. */ +#define GRAVE "\314\200" /* Combining grave accent. */ +#define ACUTE "\314\201" /* Combining acute accent. */ + test_u8_grapheme_breaks ("e"ACUTE, "#__"); + test_u8_grapheme_breaks ("e"ACUTE GRAVE, "#____"); + test_u8_grapheme_breaks ("e"ACUTE"x", "#__#"); + test_u8_grapheme_breaks ("e"ACUTE "e"ACUTE, "#__#__"); + + return 0; +} diff --git a/tests/unigbrk/test-u8-grapheme-len.c b/tests/unigbrk/test-u8-grapheme-len.c new file mode 100644 index 000000000..19b7a543a --- /dev/null +++ b/tests/unigbrk/test-u8-grapheme-len.c @@ -0,0 +1,52 @@ +/* Grapheme cluster length test. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include "macros.h" + +int +main (void) +{ + /* Empty string. */ + ASSERT (u8_grapheme_len ("", 0) == 0); + + /* Standalone 1-unit graphemes. */ + ASSERT (u8_grapheme_len ("a", 1) == 1); + ASSERT (u8_grapheme_len ("ab", 2) == 1); + ASSERT (u8_grapheme_len ("abc", 3) == 1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A "\343\201\202" /* あ: Hiragana letter 'a'. */ + ASSERT (u8_grapheme_len (HIRAGANA_A, 3) == 3); + ASSERT (u8_grapheme_len (HIRAGANA_A"x", 4) == 3); + ASSERT (u8_grapheme_len (HIRAGANA_A HIRAGANA_A, 6) == 3); + + /* Combining accents. */ +#define GRAVE "\314\200" /* Combining grave accent. */ +#define ACUTE "\314\201" /* Combining acute accent. */ + ASSERT (u8_grapheme_len ("e"ACUTE, 3) == 3); + ASSERT (u8_grapheme_len ("e"ACUTE GRAVE, 5) == 5); + ASSERT (u8_grapheme_len ("e"ACUTE"x", 4) == 3); + ASSERT (u8_grapheme_len ("e"ACUTE "e"ACUTE, 6) == 3); + + return 0; +} diff --git a/tests/unigbrk/test-u8-grapheme-next.c b/tests/unigbrk/test-u8-grapheme-next.c new file mode 100644 index 000000000..e67178ec7 --- /dev/null +++ b/tests/unigbrk/test-u8-grapheme-next.c @@ -0,0 +1,78 @@ +/* Next grapheme cluster length test. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include + +#include "macros.h" + +static void +test_u8_grapheme_next (const uint8_t *s, size_t n, size_t len) +{ + const uint8_t *next = u8_grapheme_next (s, s + n); + if (next != s + len) + { + size_t i; + + if (next == NULL) + fputs ("u8_grapheme_next returned NULL", stderr); + else + fprintf (stderr, "u8_grapheme_next skipped %zu bytes", next - s); + fprintf (stderr, ", expected %zu:\n", len); + for (i = 0; i < n; i++) + fprintf (stderr, " %02x", s[i]); + putc ('\n', stderr); + abort (); + } +} + +int +main (void) +{ + static const uint8_t s[] = "abc"; + + /* Empty string. */ + ASSERT (u8_grapheme_next (NULL, NULL) == NULL); + ASSERT (u8_grapheme_next (s, s) == NULL); + + /* Standalone 1-unit graphemes. */ + test_u8_grapheme_next ("a", 1, 1); + test_u8_grapheme_next ("ab", 2, 1); + test_u8_grapheme_next ("abc", 3, 1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A "\343\201\202" /* あ: Hiragana letter 'a'. */ + test_u8_grapheme_next (HIRAGANA_A, 3, 3); + test_u8_grapheme_next (HIRAGANA_A"x", 4, 3); + test_u8_grapheme_next (HIRAGANA_A HIRAGANA_A, 6, 3); + + /* Combining accents. */ +#define GRAVE "\314\200" /* Combining grave accent. */ +#define ACUTE "\314\201" /* Combining acute accent. */ + test_u8_grapheme_next ("e"ACUTE, 3, 3); + test_u8_grapheme_next ("e"ACUTE GRAVE, 5, 5); + test_u8_grapheme_next ("e"ACUTE"x", 4, 3); + test_u8_grapheme_next ("e"ACUTE "e"ACUTE, 6, 3); + + return 0; +} diff --git a/tests/unigbrk/test-u8-grapheme-prev.c b/tests/unigbrk/test-u8-grapheme-prev.c new file mode 100644 index 000000000..2f1090131 --- /dev/null +++ b/tests/unigbrk/test-u8-grapheme-prev.c @@ -0,0 +1,79 @@ +/* Previous grapheme cluster test. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010. */ + +#include + +/* Specification. */ +#include + +#include +#include + +#include "macros.h" + +static void +test_u8_grapheme_prev (const uint8_t *s, size_t n, size_t len) +{ + const uint8_t *end = s + n; + const uint8_t *prev = u8_grapheme_prev (end, s); + if (prev != end - len) + { + size_t i; + + if (prev == NULL) + fputs ("u8_grapheme_prev returned NULL", stderr); + else + fprintf (stderr, "u8_grapheme_prev skipped %zu bytes", end - prev); + fprintf (stderr, ", expected %zu:\n", len); + for (i = 0; i < n; i++) + fprintf (stderr, " %02x", s[i]); + putc ('\n', stderr); + abort (); + } +} + +int +main (void) +{ + static const char s[] = "abc"; + + /* Empty string. */ + ASSERT (u8_grapheme_prev (NULL, NULL) == NULL); + ASSERT (u8_grapheme_prev (s, s) == NULL); + + /* Standalone 1-unit graphemes. */ + test_u8_grapheme_prev ("a", 1, 1); + test_u8_grapheme_prev ("ab", 2, 1); + test_u8_grapheme_prev ("abc", 3, 1); + + /* Multi-unit, single code point graphemes. */ +#define HIRAGANA_A "\343\201\202" /* あ: Hiragana letter 'a'. */ + test_u8_grapheme_prev (HIRAGANA_A, 3, 3); + test_u8_grapheme_prev (HIRAGANA_A"x", 4, 1); + test_u8_grapheme_prev (HIRAGANA_A HIRAGANA_A, 6, 3); + + /* Combining accents. */ +#define GRAVE "\314\200" /* Combining grave accent. */ +#define ACUTE "\314\201" /* Combining acute accent. */ + test_u8_grapheme_prev ("e"ACUTE, 3, 3); + test_u8_grapheme_prev ("e"ACUTE GRAVE, 5, 5); + test_u8_grapheme_prev ("e"ACUTE"x", 4, 1); + test_u8_grapheme_prev ("e"ACUTE "e"ACUTE, 6, 3); + + return 0; +} diff --git a/tests/unigbrk/test-ulc-grapheme-breaks.c b/tests/unigbrk/test-ulc-grapheme-breaks.c new file mode 100644 index 000000000..5bb7bdada --- /dev/null +++ b/tests/unigbrk/test-ulc-grapheme-breaks.c @@ -0,0 +1,86 @@ +/* Grapheme cluster breaks test. + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Ben Pfaff , 2010, + based on code by Bruno Haible , 2009. */ + +#include + +#include "unigbrk.h" + +#include +#include + +#include "macros.h" + +static bool +is_8859_6_break (unsigned char c) +{ + /* ISO-8859-6 has combining characters in positions 0xeb through 0xf2. */ + return !(c >= 0xeb && c <= 0xf2); +} + +int +main () +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + /* Test case n = 0. */ + ulc_grapheme_breaks (NULL, 0, NULL); + +#if HAVE_ICONV + { + /* This is just a random collection of bytes from ISO-8859-6. + + (We use ISO-8859-6 because it is one of very few non-UTF-8 locale + encodings supported by glibc that have combining characters.) */ + static const char s[] = "ZYX\352\353W\360\361V\362"; + enum { LENGTH = sizeof s - 1 }; + char p[LENGTH]; + size_t i; + + ulc_grapheme_breaks (s, LENGTH, p); + for (i = 0; i < LENGTH; i++) + if (p[i] != is_8859_6_break (s[i])) + { + size_t j; + + fprintf (stderr, "wrong grapheme breaks:\n"); + + fprintf (stderr, " input:"); + for (j = 0; j < LENGTH; j++) + fprintf (stderr, " %02x", (unsigned char) s[j]); + putc ('\n', stderr); + + fprintf (stderr, "expected:"); + for (j = 0; j < LENGTH; j++) + fprintf (stderr, " %d", is_8859_6_break (s[j])); + putc ('\n', stderr); + + fprintf (stderr, " actual:"); + for (j = 0; j < LENGTH; j++) + fprintf (stderr, " %d", p[j]); + putc ('\n', stderr); + + abort (); + } + } +#endif + + return 0; +} diff --git a/tests/unigbrk/test-ulc-grapheme-breaks.sh b/tests/unigbrk/test-ulc-grapheme-breaks.sh new file mode 100755 index 000000000..534df61b3 --- /dev/null +++ b/tests/unigbrk/test-ulc-grapheme-breaks.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test in an ISO-8895-6 locale. +: ${LOCALE_AR=ar_SA} +if test $LOCALE_AR = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no traditional Arabic locale is installed" + else + echo "Skipping test: no traditional Arabic locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_AR \ +./test-ulc-grapheme-breaks${EXEEXT} -- 2.11.0