From 2acbd879db6eed728272f219f374da1b22bd5df8 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Tue, 30 Jun 2009 00:02:45 +0200 Subject: [PATCH] New module 'unicase/u8-ct-casefold'. --- ChangeLog | 6 +++ lib/unicase/u-ct-casefold.h | 107 +++++++++++++++++++++++++++++++++++++++++ lib/unicase/u8-ct-casefold.c | 35 ++++++++++++++ modules/unicase/u8-ct-casefold | 30 ++++++++++++ 4 files changed, 178 insertions(+) create mode 100644 lib/unicase/u-ct-casefold.h create mode 100644 lib/unicase/u8-ct-casefold.c create mode 100644 modules/unicase/u8-ct-casefold diff --git a/ChangeLog b/ChangeLog index c361ce97c..0b3885ad6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2009-06-29 Bruno Haible + New module 'unicase/u8-ct-casefold'. + * lib/unicase/u8-ct-casefold.c: New file. + * lib/unicase/u-ct-casefold.h: New file, derived from + lib/unicase/u-casefold.h. + * modules/unicase/u8-ct-casefold: New file. + New module 'unicase/u32-ct-totitle'. * lib/unicase/u32-ct-totitle.c: New file. * modules/unicase/u32-ct-totitle: New file. diff --git a/lib/unicase/u-ct-casefold.h b/lib/unicase/u-ct-casefold.h new file mode 100644 index 000000000..d97be264b --- /dev/null +++ b/lib/unicase/u-ct-casefold.h @@ -0,0 +1,107 @@ +/* Casefolding mapping for Unicode substrings (locale dependent). + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +UNIT * +FUNC (const UNIT *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + UNIT *resultbuf, size_t *lengthp) +{ + /* Implement the three definitions of caseless matching, as described in + Unicode 5.0, section "Default caseless matching": + - If no normalization is requested, simply apply the casefolding. + X -> toCasefold(X). + - If canonical normalization is requested, apply it, and apply an NFD + before. + X -> NFD(toCasefold(NFD(X))). + - If compatibility normalization is requested, apply it twice, apply + the normalization after each, and apply an NFD before: + X -> NFKD(toCasefold(NFKD(toCasefold(NFD(X))))). */ + if (nf == NULL) + /* X -> toCasefold(X) */ + return U_CASEMAP (s, n, prefix_context, suffix_context, iso639_language, + uc_tocasefold, offsetof (struct special_casing_rule, casefold[0]), + NULL, + resultbuf, lengthp); + else + { + uninorm_t nfd = uninorm_decomposing_form (nf); + /* X -> nf(toCasefold(NFD(X))) or + X -> nf(toCasefold(nfd(toCasefold(NFD(X))))) */ + int repeat = (uninorm_is_compat_decomposing (nf) ? 2 : 1); + UNIT tmpbuf1[2048 / sizeof (UNIT)]; + UNIT tmpbuf2[2048 / sizeof (UNIT)]; + UNIT *tmp1; + size_t tmp1_length; + UNIT *tmp2; + size_t tmp2_length; + + tmp1_length = sizeof (tmpbuf1) / sizeof (UNIT); + tmp1 = U_NORMALIZE (UNINORM_NFD, s, n, tmpbuf1, &tmp1_length); + if (tmp1 == NULL) + /* errno is set here. */ + return NULL; + + do + { + tmp2_length = sizeof (tmpbuf2) / sizeof (UNIT); + tmp2 = U_CASEMAP (tmp1, tmp1_length, + prefix_context, suffix_context, iso639_language, + uc_tocasefold, offsetof (struct special_casing_rule, casefold[0]), + NULL, + tmpbuf2, &tmp2_length); + if (tmp2 == NULL) + { + int saved_errno = errno; + if (tmp1 != tmpbuf1) + free (tmp1); + errno = saved_errno; + return NULL; + } + + if (tmp1 != tmpbuf1) + free (tmp1); + + if (repeat > 1) + { + tmp1_length = sizeof (tmpbuf1) / sizeof (UNIT); + tmp1 = U_NORMALIZE (nfd, tmp2, tmp2_length, + tmpbuf1, &tmp1_length); + } + else + /* Last run through this loop. */ + tmp1 = U_NORMALIZE (nf, tmp2, tmp2_length, + resultbuf, lengthp); + if (tmp1 == NULL) + { + int saved_errno = errno; + if (tmp2 != tmpbuf2) + free (tmp2); + errno = saved_errno; + return NULL; + } + + if (tmp2 != tmpbuf2) + free (tmp2); + } + while (--repeat > 0); + + return tmp1; + } +} diff --git a/lib/unicase/u8-ct-casefold.c b/lib/unicase/u8-ct-casefold.c new file mode 100644 index 000000000..4cc149c36 --- /dev/null +++ b/lib/unicase/u8-ct-casefold.c @@ -0,0 +1,35 @@ +/* Casefolding mapping for UTF-8 substrings (locale dependent). + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unicase.h" + +#include +#include +#include + +#include "unicasemap.h" +#include "special-casing.h" +#include "casefold.h" + +#define FUNC u8_ct_casefold +#define UNIT uint8_t +#define U_CASEMAP u8_casemap +#define U_NORMALIZE u8_normalize +#include "u-ct-casefold.h" diff --git a/modules/unicase/u8-ct-casefold b/modules/unicase/u8-ct-casefold new file mode 100644 index 000000000..d82086cbc --- /dev/null +++ b/modules/unicase/u8-ct-casefold @@ -0,0 +1,30 @@ +Description: +Casefolding mapping for UTF-8 substrings (locale dependent). + +Files: +lib/unicase/u8-ct-casefold.c +lib/unicase/u-ct-casefold.h + +Depends-on: +unicase/base +unicase/u8-casemap +unicase/special-casing +unicase/tocasefold +uninorm/decomposing-form +uninorm/u8-normalize +uninorm/nfd + +configure.ac: + +Makefile.am: +lib_SOURCES += unicase/u8-ct-casefold.c + +Include: +"unicase.h" + +License: +LGPL + +Maintainer: +Bruno Haible + -- 2.11.0