From 29d6db73db13da13b1b1dd6a2635d3cc16effab0 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sat, 21 Feb 2009 12:46:44 +0100 Subject: [PATCH] Tests for module 'uninorm/nfd'. --- ChangeLog | 13 ++ modules/uninorm/nfd-tests | 34 ++++ tests/uninorm/test-nfd.c | 38 ++++ tests/uninorm/test-u16-nfd.c | 326 ++++++++++++++++++++++++++++++ tests/uninorm/test-u32-nfd-big.c | 125 ++++++++++++ tests/uninorm/test-u32-nfd-big.sh | 2 + tests/uninorm/test-u32-nfd.c | 326 ++++++++++++++++++++++++++++++ tests/uninorm/test-u32-normalize-big.c | 315 +++++++++++++++++++++++++++++ tests/uninorm/test-u32-normalize-big.h | 70 +++++++ tests/uninorm/test-u8-nfd.c | 355 +++++++++++++++++++++++++++++++++ 10 files changed, 1604 insertions(+) create mode 100644 modules/uninorm/nfd-tests create mode 100644 tests/uninorm/test-nfd.c create mode 100644 tests/uninorm/test-u16-nfd.c create mode 100644 tests/uninorm/test-u32-nfd-big.c create mode 100755 tests/uninorm/test-u32-nfd-big.sh create mode 100644 tests/uninorm/test-u32-nfd.c create mode 100644 tests/uninorm/test-u32-normalize-big.c create mode 100644 tests/uninorm/test-u32-normalize-big.h create mode 100644 tests/uninorm/test-u8-nfd.c diff --git a/ChangeLog b/ChangeLog index eece730c0..2f60da33f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,18 @@ 2009-02-21 Bruno Haible + Tests for module 'uninorm/nfd'. + * tests/uninorm/test-nfd.c: New file. + * tests/uninorm/test-u8-nfd.c: New file. + * tests/uninorm/test-u16-nfd.c: New file. + * tests/uninorm/test-u32-nfd.c: New file. + * tests/uninorm/test-u32-nfd-big.sh: New file. + * tests/uninorm/test-u32-nfd-big.c: New file. + * tests/uninorm/test-u32-normalize-big.h: New file. + * tests/uninorm/test-u32-normalize-big.c: New file. + * tests/uninorm/NormalizationTest.txt: New file, created from + Unicode 5.1.0 NormalizationTest.txt. + * modules/uninorm/nfd-tests: New file. + New module 'uninorm/nfd'. * lib/uninorm/nfd.c: New file. * modules/uninorm/nfd: New file. diff --git a/modules/uninorm/nfd-tests b/modules/uninorm/nfd-tests new file mode 100644 index 000000000..63bcbcee0 --- /dev/null +++ b/modules/uninorm/nfd-tests @@ -0,0 +1,34 @@ +Files: +tests/uninorm/test-nfd.c +tests/uninorm/test-u8-nfd.c +tests/uninorm/test-u16-nfd.c +tests/uninorm/test-u32-nfd.c +tests/uninorm/test-u32-nfd-big.sh +tests/uninorm/test-u32-nfd-big.c +tests/uninorm/test-u32-normalize-big.h +tests/uninorm/test-u32-normalize-big.c +tests/uninorm/NormalizationTest.txt + +Depends-on: +unistr/u8-cmp +unistr/u16-cmp +unistr/u32-cmp +unistr/u32-strlen +xalloc +progname + +configure.ac: +AC_CHECK_DECLS_ONCE([alarm]) + +Makefile.am: +TESTS += test-nfd uninorm/test-u32-nfd-big.sh +check_PROGRAMS += test-nfd test-u32-nfd-big +test_nfd_SOURCES = \ + uninorm/test-nfd.c \ + uninorm/test-u8-nfd.c \ + uninorm/test-u16-nfd.c \ + uninorm/test-u32-nfd.c +test_u32_nfd_big_SOURCES = \ + uninorm/test-u32-nfd-big.c \ + uninorm/test-u32-normalize-big.c + diff --git a/tests/uninorm/test-nfd.c b/tests/uninorm/test-nfd.c new file mode 100644 index 000000000..99580cb33 --- /dev/null +++ b/tests/uninorm/test-nfd.c @@ -0,0 +1,38 @@ +/* Test of canonical decomposition of Unicode strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "uninorm.h" + +/* Check that UNINORM_NFD is defined and links. */ +uninorm_t n = UNINORM_NFD; + +extern void test_u8_nfd (void); +extern void test_u16_nfd (void); +extern void test_u32_nfd (void); + +int +main () +{ + test_u32_nfd (); + test_u16_nfd (); + test_u8_nfd (); + + return 0; +} diff --git a/tests/uninorm/test-u16-nfd.c b/tests/uninorm/test-u16-nfd.c new file mode 100644 index 000000000..84499f0d3 --- /dev/null +++ b/tests/uninorm/test-u16-nfd.c @@ -0,0 +1,326 @@ +/* Test of canonical decomposition of UTF-16 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#if GNULIB_UNINORM_U16_NORMALIZE + +#include "uninorm.h" + +#include +#include +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint16_t *input, size_t input_length, + const uint16_t *expected, size_t expected_length) +{ + size_t length; + uint16_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u16_normalize (UNINORM_NFD, input, input_length, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint16_t *preallocated; + + length = expected_length - 1; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_normalize (UNINORM_NFD, input, input_length, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint16_t *preallocated; + + length = expected_length; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_normalize (UNINORM_NFD, input, input_length, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +void +test_u16_nfd (void) +{ + { /* SPACE */ + static const uint16_t input[] = { 0x0020 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + static const uint16_t input[] = { 0x00C4 }; + static const uint16_t expected[] = { 0x0041, 0x0308 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ + static const uint16_t input[] = { 0x01DE }; + static const uint16_t expected[] = { 0x0041, 0x0308, 0x0304 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* GREEK DIALYTIKA AND PERISPOMENI */ + static const uint16_t input[] = { 0x1FC1 }; + static const uint16_t expected[] = { 0x00A8, 0x0342 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* SCRIPT SMALL L */ + static const uint16_t input[] = { 0x2113 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* NO-BREAK SPACE */ + static const uint16_t input[] = { 0x00A0 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH INITIAL FORM */ + static const uint16_t input[] = { 0xFB6C }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH MEDIAL FORM */ + static const uint16_t input[] = { 0xFB6D }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH FINAL FORM */ + static const uint16_t input[] = { 0xFB6B }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH ISOLATED FORM */ + static const uint16_t input[] = { 0xFB6A }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* CIRCLED NUMBER FIFTEEN */ + static const uint16_t input[] = { 0x246E }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* TRADE MARK SIGN */ + static const uint16_t input[] = { 0x2122 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* LATIN SUBSCRIPT SMALL LETTER I */ + static const uint16_t input[] = { 0x1D62 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ + static const uint16_t input[] = { 0xFE35 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* FULLWIDTH LATIN CAPITAL LETTER A */ + static const uint16_t input[] = { 0xFF21 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* HALFWIDTH IDEOGRAPHIC COMMA */ + static const uint16_t input[] = { 0xFF64 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* SMALL IDEOGRAPHIC COMMA */ + static const uint16_t input[] = { 0xFE51 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* SQUARE MHZ */ + static const uint16_t input[] = { 0x3392 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* VULGAR FRACTION THREE EIGHTHS */ + static const uint16_t input[] = { 0x215C }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* MICRO SIGN */ + static const uint16_t input[] = { 0x00B5 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ + static const uint16_t input[] = { 0xFDFA }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* HANGUL SYLLABLE GEUL */ + static const uint16_t input[] = { 0xAE00 }; + static const uint16_t expected[] = { 0x1100, 0x1173, 0x11AF }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* HANGUL SYLLABLE GEU */ + static const uint16_t input[] = { 0xADF8 }; + static const uint16_t expected[] = { 0x1100, 0x1173 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint16_t input[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint16_t expected[] = + { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0438, 0x0306, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', + 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n' + }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + +#if HAVE_DECL_ALARM + /* Declare failure if test takes too long, by using default abort + caused by SIGALRM. */ + signal (SIGALRM, SIG_DFL); + alarm (50); +#endif + + /* Check that the sorting is not O(n²) but O(n log n). */ + { + int pass; + for (pass = 0; pass < 3; pass++) + { + size_t repeat = 1; + size_t m = 100000; + uint16_t *input = (uint16_t *) malloc (2 * m * sizeof (uint16_t)); + if (input != NULL) + { + uint16_t *expected = input + m; + size_t m1 = m / 2; + size_t m2 = (m - 1) / 2; + /* NB: m1 + m2 == m - 1. */ + uint16_t *p; + size_t i; + + input[0] = 0x0041; + p = input + 1; + switch (pass) + { + case 0: + for (i = 0; i < m1; i++) + *p++ = 0x0319; + for (i = 0; i < m2; i++) + *p++ = 0x0300; + break; + + case 1: + for (i = 0; i < m2; i++) + *p++ = 0x0300; + for (i = 0; i < m1; i++) + *p++ = 0x0319; + break; + + case 2: + for (i = 0; i < m2; i++) + { + *p++ = 0x0319; + *p++ = 0x0300; + } + for (; i < m1; i++) + *p++ = 0x0319; + break; + + default: + abort (); + } + + expected[0] = 0x0041; + p = expected + 1; + for (i = 0; i < m1; i++) + *p++ = 0x0319; + for (i = 0; i < m2; i++) + *p++ = 0x0300; + + for (; repeat > 0; repeat--) + ASSERT (check (input, m, expected, m) == 0); + + free (input); + } + } + } +} + +#else + +void +test_u16_nfd (void) +{ +} + +#endif diff --git a/tests/uninorm/test-u32-nfd-big.c b/tests/uninorm/test-u32-nfd-big.c new file mode 100644 index 000000000..9bfd0d895 --- /dev/null +++ b/tests/uninorm/test-u32-nfd-big.c @@ -0,0 +1,125 @@ +/* Test of Unicode compliance of canonical decomposition of UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#if GNULIB_UNINORM_U32_NORMALIZE + +#include "uninorm.h" + +#include + +#include "unistr.h" +#include "progname.h" +#include "test-u32-normalize-big.h" + +static int +check (const uint32_t *c1, size_t c1_length, + const uint32_t *c2, size_t c2_length, + const uint32_t *c3, size_t c3_length, + const uint32_t *c4, size_t c4_length, + const uint32_t *c5, size_t c5_length) +{ + /* Check + c3 == NFD(c1) == NFD(c2) == NFD(c3) + c5 == NFD(c4) == NFD(c5) + */ + { + size_t length; + uint32_t *result; + + result = u32_normalize (UNINORM_NFD, c1, c1_length, NULL, &length); + if (!(result != NULL + && length == c3_length + && u32_cmp (result, c3, c3_length) == 0)) + return 1; + free (result); + } + { + size_t length; + uint32_t *result; + + result = u32_normalize (UNINORM_NFD, c2, c2_length, NULL, &length); + if (!(result != NULL + && length == c3_length + && u32_cmp (result, c3, c3_length) == 0)) + return 2; + free (result); + } + { + size_t length; + uint32_t *result; + + result = u32_normalize (UNINORM_NFD, c3, c3_length, NULL, &length); + if (!(result != NULL + && length == c3_length + && u32_cmp (result, c3, c3_length) == 0)) + return 3; + free (result); + } + { + size_t length; + uint32_t *result; + + result = u32_normalize (UNINORM_NFD, c4, c4_length, NULL, &length); + if (!(result != NULL + && length == c5_length + && u32_cmp (result, c5, c5_length) == 0)) + return 4; + free (result); + } + { + size_t length; + uint32_t *result; + + result = u32_normalize (UNINORM_NFD, c5, c5_length, NULL, &length); + if (!(result != NULL + && length == c5_length + && u32_cmp (result, c5, c5_length) == 0)) + return 5; + free (result); + } + return 0; +} + +int +main (int argc, char *argv[]) +{ + struct normalization_test_file file; + + set_program_name (argv[0]); + read_normalization_test_file (argv[1], &file); + + test_specific (&file, check); + test_other (&file, UNINORM_NFD); + + return 0; +} + +#else + +#include + +int +main () +{ + fprintf (stderr, "Skipping test: uninorm/u32-normalize module not included.\n"); + return 77; +} + +#endif diff --git a/tests/uninorm/test-u32-nfd-big.sh b/tests/uninorm/test-u32-nfd-big.sh new file mode 100755 index 000000000..3cfe2c562 --- /dev/null +++ b/tests/uninorm/test-u32-nfd-big.sh @@ -0,0 +1,2 @@ +#!/bin/sh +exec ./test-u32-nfd-big${EXEEXT} "$srcdir/uninorm/NormalizationTest.txt" diff --git a/tests/uninorm/test-u32-nfd.c b/tests/uninorm/test-u32-nfd.c new file mode 100644 index 000000000..9aa820a3d --- /dev/null +++ b/tests/uninorm/test-u32-nfd.c @@ -0,0 +1,326 @@ +/* Test of canonical decomposition of UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#if GNULIB_UNINORM_U32_NORMALIZE + +#include "uninorm.h" + +#include +#include +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint32_t *input, size_t input_length, + const uint32_t *expected, size_t expected_length) +{ + size_t length; + uint32_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u32_normalize (UNINORM_NFD, input, input_length, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint32_t *preallocated; + + length = expected_length - 1; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_normalize (UNINORM_NFD, input, input_length, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint32_t *preallocated; + + length = expected_length; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_normalize (UNINORM_NFD, input, input_length, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +void +test_u32_nfd (void) +{ + { /* SPACE */ + static const uint32_t input[] = { 0x0020 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + static const uint32_t input[] = { 0x00C4 }; + static const uint32_t expected[] = { 0x0041, 0x0308 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ + static const uint32_t input[] = { 0x01DE }; + static const uint32_t expected[] = { 0x0041, 0x0308, 0x0304 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* GREEK DIALYTIKA AND PERISPOMENI */ + static const uint32_t input[] = { 0x1FC1 }; + static const uint32_t expected[] = { 0x00A8, 0x0342 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* SCRIPT SMALL L */ + static const uint32_t input[] = { 0x2113 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* NO-BREAK SPACE */ + static const uint32_t input[] = { 0x00A0 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH INITIAL FORM */ + static const uint32_t input[] = { 0xFB6C }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH MEDIAL FORM */ + static const uint32_t input[] = { 0xFB6D }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH FINAL FORM */ + static const uint32_t input[] = { 0xFB6B }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH ISOLATED FORM */ + static const uint32_t input[] = { 0xFB6A }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* CIRCLED NUMBER FIFTEEN */ + static const uint32_t input[] = { 0x246E }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* TRADE MARK SIGN */ + static const uint32_t input[] = { 0x2122 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* LATIN SUBSCRIPT SMALL LETTER I */ + static const uint32_t input[] = { 0x1D62 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ + static const uint32_t input[] = { 0xFE35 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* FULLWIDTH LATIN CAPITAL LETTER A */ + static const uint32_t input[] = { 0xFF21 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* HALFWIDTH IDEOGRAPHIC COMMA */ + static const uint32_t input[] = { 0xFF64 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* SMALL IDEOGRAPHIC COMMA */ + static const uint32_t input[] = { 0xFE51 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* SQUARE MHZ */ + static const uint32_t input[] = { 0x3392 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* VULGAR FRACTION THREE EIGHTHS */ + static const uint32_t input[] = { 0x215C }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* MICRO SIGN */ + static const uint32_t input[] = { 0x00B5 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ + static const uint32_t input[] = { 0xFDFA }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* HANGUL SYLLABLE GEUL */ + static const uint32_t input[] = { 0xAE00 }; + static const uint32_t expected[] = { 0x1100, 0x1173, 0x11AF }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* HANGUL SYLLABLE GEU */ + static const uint32_t input[] = { 0xADF8 }; + static const uint32_t expected[] = { 0x1100, 0x1173 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint32_t input[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint32_t expected[] = + { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0438, 0x0306, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', + 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n' + }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + +#if HAVE_DECL_ALARM + /* Declare failure if test takes too long, by using default abort + caused by SIGALRM. */ + signal (SIGALRM, SIG_DFL); + alarm (50); +#endif + + /* Check that the sorting is not O(n²) but O(n log n). */ + { + int pass; + for (pass = 0; pass < 3; pass++) + { + size_t repeat = 1; + size_t m = 100000; + uint32_t *input = (uint32_t *) malloc (2 * m * sizeof (uint32_t)); + if (input != NULL) + { + uint32_t *expected = input + m; + size_t m1 = m / 2; + size_t m2 = (m - 1) / 2; + /* NB: m1 + m2 == m - 1. */ + uint32_t *p; + size_t i; + + input[0] = 0x0041; + p = input + 1; + switch (pass) + { + case 0: + for (i = 0; i < m1; i++) + *p++ = 0x0319; + for (i = 0; i < m2; i++) + *p++ = 0x0300; + break; + + case 1: + for (i = 0; i < m2; i++) + *p++ = 0x0300; + for (i = 0; i < m1; i++) + *p++ = 0x0319; + break; + + case 2: + for (i = 0; i < m2; i++) + { + *p++ = 0x0319; + *p++ = 0x0300; + } + for (; i < m1; i++) + *p++ = 0x0319; + break; + + default: + abort (); + } + + expected[0] = 0x0041; + p = expected + 1; + for (i = 0; i < m1; i++) + *p++ = 0x0319; + for (i = 0; i < m2; i++) + *p++ = 0x0300; + + for (; repeat > 0; repeat--) + ASSERT (check (input, m, expected, m) == 0); + + free (input); + } + } + } +} + +#else + +void +test_u32_nfd (void) +{ +} + +#endif diff --git a/tests/uninorm/test-u32-normalize-big.c b/tests/uninorm/test-u32-normalize-big.c new file mode 100644 index 000000000..0b80ef652 --- /dev/null +++ b/tests/uninorm/test-u32-normalize-big.c @@ -0,0 +1,315 @@ +/* Test of Unicode compliance of normalization of UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +/* Specification. */ +#include "test-u32-normalize-big.h" + +#if GNULIB_UNINORM_U32_NORMALIZE + +#include +#include + +#include "xalloc.h" +#include "unistr.h" + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define ASSERT_WITH_LINE(expr, file, line) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed for %s:%u\n", \ + __FILE__, __LINE__, file, line); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +cmp_ucs4_t (const void *a, const void *b) +{ + ucs4_t a_value = *(const ucs4_t *)a; + ucs4_t b_value = *(const ucs4_t *)b; + return (a_value < b_value ? -1 : a_value > b_value ? 1 : 0); +} + +void +read_normalization_test_file (const char *filename, + struct normalization_test_file *file) +{ + FILE *stream; + unsigned int lineno; + int part_index; + struct normalization_test_line *lines; + size_t lines_length; + size_t lines_allocated; + + stream = fopen (filename, "r"); + if (stream == NULL) + { + fprintf (stderr, "error during fopen of '%s'\n", filename); + exit (1); + } + + for (part_index = 0; part_index < 4; part_index++) + { + file->parts[part_index].lines = NULL; + file->parts[part_index].lines_length = 0; + } + + lineno = 0; + + part_index = -1; + lines = NULL; + lines_length = 0; + lines_allocated = 0; + + for (;;) + { + char buf[1000+1]; + char *ptr; + int c; + struct normalization_test_line line; + size_t sequence_index; + + lineno++; + + /* Read a line. */ + ptr = buf; + do + { + c = getc (stream); + if (c == EOF || c == '\n') + break; + *ptr++ = c; + } + while (ptr < buf + 1000); + *ptr = '\0'; + if (c == EOF) + break; + + /* Ignore empty lines and comment lines. */ + if (buf[0] == '\0' || buf[0] == '#') + continue; + + /* Handle lines that introduce a new part. */ + if (buf[0] == '@') + { + /* Switch to the next part. */ + if (part_index >= 0) + { + lines = + (struct normalization_test_line *) + xnrealloc (lines, lines_length, sizeof (struct normalization_test_line)); + file->parts[part_index].lines = lines; + file->parts[part_index].lines_length = lines_length; + } + part_index++; + lines = NULL; + lines_length = 0; + lines_allocated = 0; + continue; + } + + /* It's a line containing 5 sequences of Unicode characters. + Parse it and append it to the current part. */ + if (!(part_index >= 0 && part_index < 4)) + { + fprintf (stderr, "unexpected structure of '%s'\n", filename); + exit (1); + } + ptr = buf; + line.lineno = lineno; + for (sequence_index = 0; sequence_index < 5; sequence_index++) + line.sequences[sequence_index] = NULL; + for (sequence_index = 0; sequence_index < 5; sequence_index++) + { + uint32_t *sequence = XNMALLOC (1, uint32_t); + size_t sequence_length = 0; + + for (;;) + { + char *endptr; + unsigned int uc; + + uc = strtoul (ptr, &endptr, 16); + if (endptr == ptr) + break; + ptr = endptr; + + /* Append uc to the sequence. */ + sequence = + (uint32_t *) + xnrealloc (sequence, sequence_length + 2, sizeof (uint32_t)); + sequence[sequence_length] = uc; + sequence_length++; + + if (*ptr == ' ') + ptr++; + } + if (sequence_length == 0) + { + fprintf (stderr, "empty character sequence in '%s'\n", filename); + exit (1); + } + sequence[sequence_length] = 0; /* terminator */ + + line.sequences[sequence_index] = sequence; + + if (*ptr != ';') + { + fprintf (stderr, "error parsing '%s'\n", filename); + exit (1); + } + ptr++; + } + + /* Append the line to the current part. */ + if (lines_length == lines_allocated) + { + lines_allocated = 2 * lines_allocated; + if (lines_allocated < 7) + lines_allocated = 7; + lines = + (struct normalization_test_line *) + xnrealloc (lines, lines_allocated, sizeof (struct normalization_test_line)); + } + lines[lines_length] = line; + lines_length++; + } + + if (part_index >= 0) + { + lines = + (struct normalization_test_line *) + xnrealloc (lines, lines_length, sizeof (struct normalization_test_line)); + file->parts[part_index].lines = lines; + file->parts[part_index].lines_length = lines_length; + } + + { + /* Collect all c1 values from the part 1 in an array. */ + const struct normalization_test_part *p = &file->parts[1]; + ucs4_t *c1_array = XNMALLOC (p->lines_length + 1, ucs4_t); + size_t line_index; + + for (line_index = 0; line_index < p->lines_length; line_index++) + { + const unsigned int *sequence = p->lines[line_index].sequences[0]; + /* In part 1, every sequences[0] consists of a single character. */ + if (!(sequence[0] != 0 && sequence[1] == 0)) + abort (); + c1_array[line_index] = sequence[0]; + } + + /* Sort this array. */ + qsort (c1_array, p->lines_length, sizeof (ucs4_t), cmp_ucs4_t); + + /* Add the sentinel at the end. */ + c1_array[p->lines_length] = 0x110000; + + file->part1_c1_sorted = c1_array; + } + + file->filename = xstrdup (filename); + + if (ferror (stream) || fclose (stream)) + { + fprintf (stderr, "error reading from '%s'\n", filename); + exit (1); + } +} + +void +test_specific (const struct normalization_test_file *file, + int (*check) (const uint32_t *c1, size_t c1_length, + const uint32_t *c2, size_t c2_length, + const uint32_t *c3, size_t c3_length, + const uint32_t *c4, size_t c4_length, + const uint32_t *c5, size_t c5_length)) +{ + size_t part_index; + + for (part_index = 0; part_index < 4; part_index++) + { + const struct normalization_test_part *p = &file->parts[part_index]; + size_t line_index; + + for (line_index = 0; line_index < p->lines_length; line_index++) + { + const struct normalization_test_line *l = &p->lines[line_index]; + + ASSERT_WITH_LINE (check (l->sequences[0], u32_strlen (l->sequences[0]), + l->sequences[1], u32_strlen (l->sequences[1]), + l->sequences[2], u32_strlen (l->sequences[2]), + l->sequences[3], u32_strlen (l->sequences[3]), + l->sequences[4], u32_strlen (l->sequences[4])) + == 0, + file->filename, l->lineno); + } + } +} + +void +test_other (const struct normalization_test_file *file, uninorm_t nf) +{ + /* Check that for every character not listed in part 1 of the + NormalizationTest.txt file, the character maps to itself in each + of the four normalization forms. */ + const ucs4_t *p = file->part1_c1_sorted; + ucs4_t uc; + + for (uc = 0; uc < 0x110000; uc++) + { + if (uc >= 0xD800 && uc < 0xE000) + { + /* A surrogate, not a character. Skip uc. */ + } + else if (uc == *p) + { + /* Skip uc. */ + p++; + } + else + { + uint32_t input[1]; + size_t length; + uint32_t *result; + + input[0] = uc; + result = u32_normalize (nf, input, 1, NULL, &length); + ASSERT (result != NULL && length == 1 && result[0] == uc); + } + } +} + +#endif diff --git a/tests/uninorm/test-u32-normalize-big.h b/tests/uninorm/test-u32-normalize-big.h new file mode 100644 index 000000000..f154daa91 --- /dev/null +++ b/tests/uninorm/test-u32-normalize-big.h @@ -0,0 +1,70 @@ +/* Test of Unicode compliance of normalization of UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "unitypes.h" +#include "uninorm.h" + +/* The NormalizationTest.txt is from www.unicode.org, with stripped comments: + sed -e 's| *#.*||' < .../ucd/NormalizationTest.txt \ + > tests/uninorm/NormalizationTest.txt + It is only used to verify the compliance of this implementation of the + Unicode normalization forms. It is not used by the library code, only + by the unit tests. */ + +/* Representation of a line in the NormalizationTest.txt file. */ +struct normalization_test_line +{ + unsigned int lineno; + uint32_t *sequences[5]; +}; + +/* Representation of a delimited part of the NormalizationTest.txt file. */ +struct normalization_test_part +{ + struct normalization_test_line *lines; + size_t lines_length; +}; + +/* Representation of the entire NormalizationTest.txt file. */ +struct normalization_test_file +{ + struct normalization_test_part parts[4]; + /* The set of c1 values from part 1, sorted in ascending order, with a + sentinel value of 0x110000 at the end. */ + ucs4_t *part1_c1_sorted; + /* The filename of the NormalizationTest.txt file. */ + const char *filename; +}; + +/* Read the NormalizationTest.txt file and return its contents. */ +extern void + read_normalization_test_file (const char *filename, + struct normalization_test_file *file); + +/* Perform the first compliance test. */ +extern void + test_specific (const struct normalization_test_file *file, + int (*check) (const uint32_t *c1, size_t c1_length, + const uint32_t *c2, size_t c2_length, + const uint32_t *c3, size_t c3_length, + const uint32_t *c4, size_t c4_length, + const uint32_t *c5, size_t c5_length)); + +/* Perform the second compliance test. */ +extern void + test_other (const struct normalization_test_file *file, uninorm_t nf); diff --git a/tests/uninorm/test-u8-nfd.c b/tests/uninorm/test-u8-nfd.c new file mode 100644 index 000000000..9dfde02c2 --- /dev/null +++ b/tests/uninorm/test-u8-nfd.c @@ -0,0 +1,355 @@ +/* Test of canonical decomposition of UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#if GNULIB_UNINORM_U8_NORMALIZE + +#include "uninorm.h" + +#include +#include +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint8_t *input, size_t input_length, + const uint8_t *expected, size_t expected_length) +{ + size_t length; + uint8_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u8_normalize (UNINORM_NFD, input, input_length, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint8_t *preallocated; + + length = expected_length - 1; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint8_t *preallocated; + + length = expected_length; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +void +test_u8_nfd (void) +{ + { /* SPACE */ + static const uint8_t input[] = { 0x20 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + static const uint8_t input[] = { 0xC3, 0x84 }; + static const uint8_t expected[] = { 0x41, 0xCC, 0x88 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ + static const uint8_t input[] = { 0xC7, 0x9E }; + static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* GREEK DIALYTIKA AND PERISPOMENI */ + static const uint8_t input[] = { 0xE1, 0xBF, 0x81 }; + static const uint8_t expected[] = { 0xC2, 0xA8, 0xCD, 0x82 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* SCRIPT SMALL L */ + static const uint8_t input[] = { 0xE2, 0x84, 0x93 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* NO-BREAK SPACE */ + static const uint8_t input[] = { 0xC2, 0xA0 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH INITIAL FORM */ + static const uint8_t input[] = { 0xEF, 0xAD, 0xAC }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH MEDIAL FORM */ + static const uint8_t input[] = { 0xEF, 0xAD, 0xAD }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH FINAL FORM */ + static const uint8_t input[] = { 0xEF, 0xAD, 0xAB }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LETTER VEH ISOLATED FORM */ + static const uint8_t input[] = { 0xEF, 0xAD, 0xAA }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* CIRCLED NUMBER FIFTEEN */ + static const uint8_t input[] = { 0xE2, 0x91, 0xAE }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* TRADE MARK SIGN */ + static const uint8_t input[] = { 0xE2, 0x84, 0xA2 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* LATIN SUBSCRIPT SMALL LETTER I */ + static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ + static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* FULLWIDTH LATIN CAPITAL LETTER A */ + static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* HALFWIDTH IDEOGRAPHIC COMMA */ + static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* SMALL IDEOGRAPHIC COMMA */ + static const uint8_t input[] = { 0xEF, 0xB9, 0x91 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* SQUARE MHZ */ + static const uint8_t input[] = { 0xE3, 0x8E, 0x92 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* VULGAR FRACTION THREE EIGHTHS */ + static const uint8_t input[] = { 0xE2, 0x85, 0x9C }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* MICRO SIGN */ + static const uint8_t input[] = { 0xC2, 0xB5 }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ + static const uint8_t input[] = { 0xEF, 0xB7, 0xBA }; + ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); + } + + { /* HANGUL SYLLABLE GEUL */ + static const uint8_t input[] = { 0xEA, 0xB8, 0x80 }; + static const uint8_t expected[] = + { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* HANGUL SYLLABLE GEU */ + static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 }; + static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint8_t input[] = + { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', + ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, + 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, + 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, + 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', + '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, + 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, + 0xEA, 0xB8, 0x80, '\n' + }; + static const uint8_t expected[] = + { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', + ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, + 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86, + 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, + 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', + '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, + 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB, + 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n' + }; + ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); + } + +#if HAVE_DECL_ALARM + /* Declare failure if test takes too long, by using default abort + caused by SIGALRM. */ + signal (SIGALRM, SIG_DFL); + alarm (50); +#endif + + /* Check that the sorting is not O(n²) but O(n log n). */ + { + int pass; + for (pass = 0; pass < 3; pass++) + { + size_t repeat = 1; + size_t m = 100000; + uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t)); + if (input != NULL) + { + uint8_t *expected = input + (2 * m - 1); + size_t m1 = m / 2; + size_t m2 = (m - 1) / 2; + /* NB: m1 + m2 == m - 1. */ + uint8_t *p; + size_t i; + + input[0] = 0x41; + p = input + 1; + switch (pass) + { + case 0: + for (i = 0; i < m1; i++) + { + *p++ = 0xCC; + *p++ = 0x99; + } + for (i = 0; i < m2; i++) + { + *p++ = 0xCC; + *p++ = 0x80; + } + break; + + case 1: + for (i = 0; i < m2; i++) + { + *p++ = 0xCC; + *p++ = 0x80; + } + for (i = 0; i < m1; i++) + { + *p++ = 0xCC; + *p++ = 0x99; + } + break; + + case 2: + for (i = 0; i < m2; i++) + { + *p++ = 0xCC; + *p++ = 0x99; + *p++ = 0xCC; + *p++ = 0x80; + } + for (; i < m1; i++) + { + *p++ = 0xCC; + *p++ = 0x99; + } + break; + + default: + abort (); + } + + expected[0] = 0x41; + p = expected + 1; + for (i = 0; i < m1; i++) + { + *p++ = 0xCC; + *p++ = 0x99; + } + for (i = 0; i < m2; i++) + { + *p++ = 0xCC; + *p++ = 0x80; + } + + for (; repeat > 0; repeat--) + ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0); + + free (input); + } + } + } +} + +#else + +void +test_u8_nfd (void) +{ +} + +#endif -- 2.11.0