X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Funistr.h;h=26a3e33e458fa42943924eb90dd6709ad7516e47;hb=e8269e90e35140ebb75f39669d39bd021e364105;hp=5643ea72b42b26d7ac02916e074dae8ba62b8663;hpb=2d92c4fc77ddf2ccb407258878a62b917b67fbfd;p=gnulib.git diff --git a/lib/unistr.h b/lib/unistr.h index 5643ea72b..26a3e33e4 100644 --- a/lib/unistr.h +++ b/lib/unistr.h @@ -1,26 +1,27 @@ /* Elementary Unicode string functions. - Copyright (C) 2002, 2005-2007 Free Software Foundation, Inc. + Copyright (C) 2001-2002, 2005-2010 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU Library General Public License as published - by the Free Software Foundation; either version 2, or (at your option) - any later version. + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. + Lesser General Public License for more details. - You should have received a copy of the GNU Library General Public - License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - USA. */ + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ #ifndef _UNISTR_H #define _UNISTR_H #include "unitypes.h" +/* Get common macros for C. */ +#include "unused-parameter.h" + /* Get bool. */ #include @@ -83,32 +84,32 @@ extern const uint32_t * /* Convert an UTF-8 string to an UTF-16 string. */ extern uint16_t * u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf, - size_t *lengthp); + size_t *lengthp); /* Convert an UTF-8 string to an UCS-4 string. */ extern uint32_t * u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf, - size_t *lengthp); + size_t *lengthp); /* Convert an UTF-16 string to an UTF-8 string. */ extern uint8_t * u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf, - size_t *lengthp); + size_t *lengthp); /* Convert an UTF-16 string to an UCS-4 string. */ extern uint32_t * u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf, - size_t *lengthp); + size_t *lengthp); /* Convert an UCS-4 string to an UTF-8 string. */ extern uint8_t * u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf, - size_t *lengthp); + size_t *lengthp); /* Convert an UCS-4 string to an UTF-16 string. */ extern uint16_t * u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf, - size_t *lengthp); + size_t *lengthp); /* Elementary string functions. */ @@ -126,9 +127,10 @@ extern int /* Return the length (number of units) of the first character in S, putting its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, - and an appropriate number of units is returned. */ -/* Similar to mbtowc(), except that puc and s must not be NULL, and the NUL - character is not treated specially. */ + and an appropriate number of units is returned. + The number of available units, N, must be > 0. */ +/* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0, + and the NUL character is not treated specially. */ /* The variants with _safe suffix are safe, even if the library is compiled without --enable-safety. */ @@ -137,7 +139,21 @@ extern int extern int u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n); # else -# include "utf8-ucs4-unsafe.h" +extern int + u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n); +static inline int +u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else + return u8_mbtouc_unsafe_aux (puc, s, n); +} # endif #endif @@ -146,7 +162,21 @@ extern int extern int u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n); # else -# include "utf16-ucs4-unsafe.h" +extern int + u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n); +static inline int +u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n) +{ + uint16_t c = *s; + + if (c < 0xd800 || c >= 0xe000) + { + *puc = c; + return 1; + } + else + return u16_mbtouc_unsafe_aux (puc, s, n); +} # endif #endif @@ -156,7 +186,8 @@ extern int u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n); # else static inline int -u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n) +u32_mbtouc_unsafe (ucs4_t *puc, + const uint32_t *s, size_t n _GL_UNUSED_PARAMETER) { uint32_t c = *s; @@ -179,7 +210,21 @@ u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n) extern int u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n); # else -# include "utf8-ucs4.h" +extern int + u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n); +static inline int +u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else + return u8_mbtouc_aux (puc, s, n); +} # endif #endif @@ -188,7 +233,21 @@ extern int extern int u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n); # else -# include "utf16-ucs4.h" +extern int + u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n); +static inline int +u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n) +{ + uint16_t c = *s; + + if (c < 0xd800 || c >= 0xe000) + { + *puc = c; + return 1; + } + else + return u16_mbtouc_aux (puc, s, n); +} # endif #endif @@ -198,7 +257,7 @@ extern int u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n); # else static inline int -u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n) +u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _GL_UNUSED_PARAMETER) { uint32_t c = *s; @@ -212,6 +271,29 @@ u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n) # endif #endif +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, + and -1 is returned for an invalid sequence of units, -2 is returned for an + incomplete sequence of units. + The number of available units, N, must be > 0. */ +/* Similar to u*_mbtouc(), except that the return value gives more details + about the failure, similar to mbrtowc(). */ + +#ifdef GNULIB_UNISTR_U8_MBTOUCR +extern int + u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n); +#endif + +#ifdef GNULIB_UNISTR_U16_MBTOUCR +extern int + u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n); +#endif + +#ifdef GNULIB_UNISTR_U32_MBTOUCR +extern int + u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n); +#endif + /* Put the multibyte character represented by UC in S, returning its length. Return -1 upon failure, -2 if the number of available units, N, is too small. The latter case cannot occur if N >= 6/2/1, respectively. */ @@ -219,20 +301,46 @@ u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n) must be specified. */ #ifdef GNULIB_UNISTR_U8_UCTOMB +/* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */ +extern int + u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n); # if !HAVE_INLINE extern int u8_uctomb (uint8_t *s, ucs4_t uc, int n); # else -# include "ucs4-utf8.h" +static inline int +u8_uctomb (uint8_t *s, ucs4_t uc, int n) +{ + if (uc < 0x80 && n > 0) + { + s[0] = uc; + return 1; + } + else + return u8_uctomb_aux (s, uc, n); +} # endif #endif #ifdef GNULIB_UNISTR_U16_UCTOMB +/* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */ +extern int + u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n); # if !HAVE_INLINE extern int u16_uctomb (uint16_t *s, ucs4_t uc, int n); # else -# include "ucs4-utf16.h" +static inline int +u16_uctomb (uint16_t *s, ucs4_t uc, int n) +{ + if (uc < 0xd800 && n > 0) + { + s[0] = uc; + return 1; + } + else + return u16_uctomb_aux (s, uc, n); +} # endif #endif @@ -247,12 +355,12 @@ u32_uctomb (uint32_t *s, ucs4_t uc, int n) if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000)) { if (n > 0) - { - *s = uc; - return 1; - } + { + *s = uc; + return 1; + } else - return -2; + return -2; } else return -1; @@ -298,6 +406,15 @@ extern int extern int u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n); +/* Compare S1 and S2. */ +/* Similar to the gnulib function memcmp2(). */ +extern int + u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2); +extern int + u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2); +extern int + u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2); + /* Search the string at S for UC. */ /* Similar to memchr(). */ extern uint8_t * @@ -307,6 +424,15 @@ extern uint16_t * extern uint32_t * u32_chr (const uint32_t *s, size_t n, ucs4_t uc); +/* Count the number of Unicode characters in the N units from S. */ +/* Similar to mbsnlen(). */ +extern size_t + u8_mbsnlen (const uint8_t *s, size_t n); +extern size_t + u16_mbsnlen (const uint16_t *s, size_t n); +extern size_t + u32_mbsnlen (const uint32_t *s, size_t n); + /* Elementary string functions with memory allocation. */ /* Make a freshly allocated copy of S, of length N. */ @@ -403,8 +529,8 @@ extern uint16_t * extern uint32_t * u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n); -/* Copy no more than N characters of SRC to DEST, returning the address of - the last character written into DEST. */ +/* Copy no more than N units of SRC to DEST. Return a pointer past the last + non-NUL unit written into DEST. */ /* Similar to stpncpy(). */ extern uint8_t * u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n); @@ -440,6 +566,17 @@ extern int extern int u32_strcmp (const uint32_t *s1, const uint32_t *s2); +/* Compare S1 and S2 using the collation rules of the current locale. + Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. + Upon failure, set errno and return any value. */ +/* Similar to strcoll(), wcscoll(). */ +extern int + u8_strcoll (const uint8_t *s1, const uint8_t *s2); +extern int + u16_strcoll (const uint16_t *s1, const uint16_t *s2); +extern int + u32_strcoll (const uint32_t *s1, const uint32_t *s2); + /* Compare no more than N units of S1 and S2. */ /* Similar to strncmp(), wcsncmp(). */ extern int