From: Bruno Haible Date: Tue, 9 Jan 2007 14:09:39 +0000 (+0000) Subject: New modules 'unistr/u8-strcmp', 'unistr/u16-strcmp', 'unistr/u32-strcmp'. X-Git-Tag: cvs-readonly~1387 X-Git-Url: https://erislabs.net/gitweb/?a=commitdiff_plain;h=3aa8af7b48c544b3c4c9b63103eb32670368874f;hp=c02c2a767745cd9ae29179e5d9c788bd31d361c8;p=gnulib.git New modules 'unistr/u8-strcmp', 'unistr/u16-strcmp', 'unistr/u32-strcmp'. --- diff --git a/lib/unistr/u16-strcmp.c b/lib/unistr/u16-strcmp.c new file mode 100644 index 000000000..d6078f20a --- /dev/null +++ b/lib/unistr/u16-strcmp.c @@ -0,0 +1,52 @@ +/* Compare UTF-16 strings. + Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "unistr.h" + +int +u16_strcmp (const uint16_t *s1, const uint16_t *s2) +{ + /* Note that the UTF-16 encoding does NOT preserve lexicographic order. + Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair, + we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */ + for (;;) + { + uint16_t c1 = *s1++; + uint16_t c2 = *s2++; + if (c1 != 0 && c1 == c2) + continue; + if (c1 < 0xd800 || c1 >= 0xe000) + { + if (!(c2 < 0xd800 || c2 >= 0xe000)) + /* c2 is a surrogate, but c1 is not. */ + return -1; + } + else + { + if (c2 < 0xd800 || c2 >= 0xe000) + /* c1 is a surrogate, but c2 is not. */ + return 1; + } + return (int)c1 - (int)c2; + /* > 0 if c1 > c2, < 0 if c1 < c2. */ + } +} diff --git a/lib/unistr/u32-strcmp.c b/lib/unistr/u32-strcmp.c new file mode 100644 index 000000000..eafb499ae --- /dev/null +++ b/lib/unistr/u32-strcmp.c @@ -0,0 +1,38 @@ +/* Compare UTF-32 strings. + Copyright (C) 1999, 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "unistr.h" + +int +u32_strcmp (const uint32_t *s1, const uint32_t *s2) +{ + for (;;) + { + uint32_t uc1 = *s1++; + uint32_t uc2 = *s2++; + if (uc1 != 0 && uc1 == uc2) + continue; + /* Note that uc1 and uc2 each have at most 31 bits. */ + return (int)uc1 - (int)uc2; + /* > 0 if uc1 > uc2, < 0 if uc1 < uc2. */ + } +} diff --git a/lib/unistr/u8-strcmp.c b/lib/unistr/u8-strcmp.c new file mode 100644 index 000000000..463901455 --- /dev/null +++ b/lib/unistr/u8-strcmp.c @@ -0,0 +1,32 @@ +/* Compare UTF-8 strings. + Copyright (C) 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "unistr.h" + +#include + +int +u8_strcmp (const uint8_t *s1, const uint8_t *s2) +{ + /* Use the fact that the UTF-8 encoding preserves lexicographic order. */ + return strcmp ((const char *) s1, (const char *) s2); +} diff --git a/modules/unistr/u16-strcmp b/modules/unistr/u16-strcmp new file mode 100644 index 000000000..7bfcf6c9d --- /dev/null +++ b/modules/unistr/u16-strcmp @@ -0,0 +1,23 @@ +Description: +Compare UTF-16 strings. + +Files: +lib/unistr/u16-strcmp.c + +Depends-on: +unistr/base + +configure.ac: + +Makefile.am: +lib_SOURCES += unistr/u16-strcmp.c + +Include: +"unistr.h" + +License: +LGPL + +Maintainer: +Bruno Haible + diff --git a/modules/unistr/u32-strcmp b/modules/unistr/u32-strcmp new file mode 100644 index 000000000..e3c7825df --- /dev/null +++ b/modules/unistr/u32-strcmp @@ -0,0 +1,23 @@ +Description: +Compare UTF-32 strings. + +Files: +lib/unistr/u32-strcmp.c + +Depends-on: +unistr/base + +configure.ac: + +Makefile.am: +lib_SOURCES += unistr/u32-strcmp.c + +Include: +"unistr.h" + +License: +LGPL + +Maintainer: +Bruno Haible + diff --git a/modules/unistr/u8-strcmp b/modules/unistr/u8-strcmp new file mode 100644 index 000000000..05248489f --- /dev/null +++ b/modules/unistr/u8-strcmp @@ -0,0 +1,23 @@ +Description: +Compare UTF-8 strings. + +Files: +lib/unistr/u8-strcmp.c + +Depends-on: +unistr/base + +configure.ac: + +Makefile.am: +lib_SOURCES += unistr/u8-strcmp.c + +Include: +"unistr.h" + +License: +LGPL + +Maintainer: +Bruno Haible +