From dff731fd63fefabc0daf15cdf59fb122d471146c Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sat, 7 Mar 2009 16:51:37 +0100 Subject: [PATCH] New module 'uninorm/u8-normcoll'. --- ChangeLog | 7 ++++++ lib/uninorm.h | 16 ++++++++++++ lib/uninorm/u-normcoll.h | 61 +++++++++++++++++++++++++++++++++++++++++++++ lib/uninorm/u8-normcoll.c | 31 +++++++++++++++++++++++ modules/uninorm/u8-normcoll | 26 +++++++++++++++++++ 5 files changed, 141 insertions(+) create mode 100644 lib/uninorm/u-normcoll.h create mode 100644 lib/uninorm/u8-normcoll.c create mode 100644 modules/uninorm/u8-normcoll diff --git a/ChangeLog b/ChangeLog index 97d59d6f7..4a9d9acbf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2009-03-07 Bruno Haible + New module 'uninorm/u8-normcoll'. + * lib/uninorm.h (u8_normcoll, u16_normcoll, u32_normcoll): New + declarations. + * lib/uninorm/u8-normcoll.c: New file. + * lib/uninorm/u-normcoll.h: New file. + * modules/uninorm/u8-normcoll: New file. + New module 'uninorm/u32-normxfrm'. * lib/uninorm/u32-normxfrm.c: New file. * modules/uninorm/u32-normxfrm: New file. diff --git a/lib/uninorm.h b/lib/uninorm.h index 6680f819c..e4153cf63 100644 --- a/lib/uninorm.h +++ b/lib/uninorm.h @@ -179,6 +179,22 @@ extern char * char *resultbuf, size_t *lengthp); +/* Compare S1 and S2, ignoring differences in normalization, using the + collation rules of the current locale. + NF must be either UNINORM_NFC or UNINORM_NFKC. + If successful, set *RESULT to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and + return 0. Upon failure, return the error number. */ +extern int + u8_normcoll (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2, + uninorm_t nf, int *result); +extern int + u16_normcoll (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2, + uninorm_t nf, int *result); +extern int + u32_normcoll (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2, + uninorm_t nf, int *result); + + /* Normalization of a stream of Unicode characters. A "stream of Unicode characters" is essentially a function that accepts an diff --git a/lib/uninorm/u-normcoll.h b/lib/uninorm/u-normcoll.h new file mode 100644 index 000000000..5af764eba --- /dev/null +++ b/lib/uninorm/u-normcoll.h @@ -0,0 +1,61 @@ +/* Locale dependent, normalization insensitive comparison of Unicode strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +int +FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2, + uninorm_t nf, int *result) +{ + char buf1[2048]; + char buf2[2048]; + char *transformed1; + size_t transformed1_length; + char *transformed2; + size_t transformed2_length; + int cmp; + + /* Normalize and transform S1. */ + transformed1_length = sizeof (buf1); + transformed1 = U_NORMXFRM (s1, n1, nf, buf1, &transformed1_length); + if (transformed1 == NULL) + return errno; + + /* Normalize and transform S2. */ + transformed2_length = sizeof (buf2); + transformed2 = U_NORMXFRM (s2, n2, nf, buf2, &transformed2_length); + if (transformed2 == NULL) + { + int saved_errno = errno; + if (transformed1 != buf1) + free (transformed1); + return saved_errno; + } + + /* Compare the transformed strings. */ + cmp = memcmp2 (transformed1, transformed1_length, + transformed2, transformed2_length); + if (cmp < 0) + cmp = -1; + else if (cmp > 0) + cmp = 1; + + if (transformed2 != buf2) + free (transformed2); + if (transformed1 != buf1) + free (transformed1); + *result = cmp; + return 0; +} diff --git a/lib/uninorm/u8-normcoll.c b/lib/uninorm/u8-normcoll.c new file mode 100644 index 000000000..4d163d41c --- /dev/null +++ b/lib/uninorm/u8-normcoll.c @@ -0,0 +1,31 @@ +/* Locale dependent, normalization insensitive comparison of UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "uninorm.h" + +#include +#include + +#include "memcmp2.h" + +#define FUNC u8_normcoll +#define UNIT uint8_t +#define U_NORMXFRM u8_normxfrm +#include "u-normcoll.h" diff --git a/modules/uninorm/u8-normcoll b/modules/uninorm/u8-normcoll new file mode 100644 index 000000000..143e18ed2 --- /dev/null +++ b/modules/uninorm/u8-normcoll @@ -0,0 +1,26 @@ +Description: +Locale dependent, normalization insensitive comparison of UTF-8 strings. + +Files: +lib/uninorm/u8-normcoll.c +lib/uninorm/u-normcoll.h + +Depends-on: +uninorm/base +uninorm/u8-normxfrm +memcmp2 + +configure.ac: + +Makefile.am: +lib_SOURCES += uninorm/u8-normcoll.c + +Include: +"uninorm.h" + +License: +LGPL + +Maintainer: +Bruno Haible + -- 2.11.0