X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Funicase.h;h=7438ad05a8c08f0e179d663627d8efda2891138d;hb=28e80472ab11689bbc4dfc878cf114fe57371e48;hp=ca3ab32e4c480ea3fb82fd478084c4419e7b9632;hpb=0cd58ed61ba28249f1b9029a697d5f5d9287adae;p=gnulib.git diff --git a/lib/unicase.h b/lib/unicase.h index ca3ab32e4..7438ad05a 100644 --- a/lib/unicase.h +++ b/lib/unicase.h @@ -1,5 +1,5 @@ /* Unicode character case mappings. - Copyright (C) 2002, 2009 Free Software Foundation, Inc. + Copyright (C) 2002, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published @@ -19,6 +19,7 @@ #include "unitypes.h" +/* Get bool. */ #include /* Get size_t. */ @@ -90,64 +91,218 @@ extern const char * case-mapping. It can also be NULL, for no normalization. */ extern uint8_t * u8_toupper (const uint8_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint8_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); extern uint16_t * u16_toupper (const uint16_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint16_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); extern uint32_t * u32_toupper (const uint32_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint32_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); /* Return the lowercase mapping of a string. The nf argument identifies the normalization form to apply after the case-mapping. It can also be NULL, for no normalization. */ extern uint8_t * u8_tolower (const uint8_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint8_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); extern uint16_t * u16_tolower (const uint16_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint16_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); extern uint32_t * u32_tolower (const uint32_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint32_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); /* Return the titlecase mapping of a string. The nf argument identifies the normalization form to apply after the case-mapping. It can also be NULL, for no normalization. */ extern uint8_t * u8_totitle (const uint8_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint8_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); extern uint16_t * u16_totitle (const uint16_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint16_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); extern uint32_t * u32_totitle (const uint32_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint32_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); + +/* The case-mapping context given by a prefix string. */ +typedef struct casing_prefix_context + { + /* These fields are private, undocumented. */ + uint32_t last_char_except_ignorable; + uint32_t last_char_normal_or_above; + } + casing_prefix_context_t; +/* The case-mapping context of the empty prefix string. */ +extern const casing_prefix_context_t unicase_empty_prefix_context; +/* Return the case-mapping context of a given prefix string. */ +extern casing_prefix_context_t + u8_casing_prefix_context (const uint8_t *s, size_t n); +extern casing_prefix_context_t + u16_casing_prefix_context (const uint16_t *s, size_t n); +extern casing_prefix_context_t + u32_casing_prefix_context (const uint32_t *s, size_t n); +/* Return the case-mapping context of the prefix concat(A, S), given the + case-mapping context of the prefix A. */ +extern casing_prefix_context_t + u8_casing_prefixes_context (const uint8_t *s, size_t n, + casing_prefix_context_t a_context); +extern casing_prefix_context_t + u16_casing_prefixes_context (const uint16_t *s, size_t n, + casing_prefix_context_t a_context); +extern casing_prefix_context_t + u32_casing_prefixes_context (const uint32_t *s, size_t n, + casing_prefix_context_t a_context); + +/* The case-mapping context given by a suffix string. */ +typedef struct casing_suffix_context + { + /* These fields are private, undocumented. */ + uint32_t first_char_except_ignorable; + uint32_t bits; + } + casing_suffix_context_t; +/* The case-mapping context of the empty suffix string. */ +extern const casing_suffix_context_t unicase_empty_suffix_context; +/* Return the case-mapping context of a given suffix string. */ +extern casing_suffix_context_t + u8_casing_suffix_context (const uint8_t *s, size_t n); +extern casing_suffix_context_t + u16_casing_suffix_context (const uint16_t *s, size_t n); +extern casing_suffix_context_t + u32_casing_suffix_context (const uint32_t *s, size_t n); +/* Return the case-mapping context of the suffix concat(S, A), given the + case-mapping context of the suffix A. */ +extern casing_suffix_context_t + u8_casing_suffixes_context (const uint8_t *s, size_t n, + casing_suffix_context_t a_context); +extern casing_suffix_context_t + u16_casing_suffixes_context (const uint16_t *s, size_t n, + casing_suffix_context_t a_context); +extern casing_suffix_context_t + u32_casing_suffixes_context (const uint32_t *s, size_t n, + casing_suffix_context_t a_context); + +/* Return the uppercase mapping of a string that is surrounded by a prefix + and a suffix. */ +extern uint8_t * + u8_ct_toupper (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); +extern uint16_t * + u16_ct_toupper (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); +extern uint32_t * + u32_ct_toupper (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); + +/* Return the lowercase mapping of a string that is surrounded by a prefix + and a suffix. */ +extern uint8_t * + u8_ct_tolower (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); +extern uint16_t * + u16_ct_tolower (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); +extern uint32_t * + u32_ct_tolower (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); + +/* Return the titlecase mapping of a string that is surrounded by a prefix + and a suffix. */ +extern uint8_t * + u8_ct_totitle (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); +extern uint16_t * + u16_ct_totitle (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); +extern uint32_t * + u32_ct_totitle (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); /* Return the case folded string. + Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent + to comparing S1 and S2 with uN_casecmp(). The nf argument identifies the normalization form to apply after the case-mapping. It can also be NULL, for no normalization. */ extern uint8_t * u8_casefold (const uint8_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint8_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); extern uint16_t * u16_casefold (const uint16_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint16_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); extern uint32_t * u32_casefold (const uint32_t *s, size_t n, const char *iso639_language, - uninorm_t nf, - uint32_t *resultbuf, size_t *lengthp); + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); +/* Likewise, for a string that is surrounded by a prefix and a suffix. */ +extern uint8_t * + u8_ct_casefold (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); +extern uint16_t * + u16_ct_casefold (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); +extern uint32_t * + u32_ct_casefold (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); /* Compare S1 and S2, ignoring differences in case and normalization. The nf argument identifies the normalization form to apply after the @@ -156,37 +311,37 @@ extern uint32_t * return 0. Upon failure, return -1 with errno set. */ extern int u8_casecmp (const uint8_t *s1, size_t n1, - const uint8_t *s2, size_t n2, - const char *iso639_language, uninorm_t nf, int *resultp); + const uint8_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); extern int u16_casecmp (const uint16_t *s1, size_t n1, - const uint16_t *s2, size_t n2, - const char *iso639_language, uninorm_t nf, int *resultp); + const uint16_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); extern int u32_casecmp (const uint32_t *s1, size_t n1, - const uint32_t *s2, size_t n2, - const char *iso639_language, uninorm_t nf, int *resultp); + const uint32_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); extern int ulc_casecmp (const char *s1, size_t n1, - const char *s2, size_t n2, - const char *iso639_language, uninorm_t nf, int *resultp); + const char *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); -/* Converts the string S of length N to a string in locale encoding, in such a - way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with memcmp2() is - equivalent to comparing S1 and S2 with uN_casecoll(). +/* Convert the string S of length N to a NUL-terminated byte sequence, in such + a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib + function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll(). NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */ extern char * u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language, - uninorm_t nf, char *resultbuf, size_t *lengthp); + uninorm_t nf, char *resultbuf, size_t *lengthp); extern char * u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language, - uninorm_t nf, char *resultbuf, size_t *lengthp); + uninorm_t nf, char *resultbuf, size_t *lengthp); extern char * u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language, - uninorm_t nf, char *resultbuf, size_t *lengthp); + uninorm_t nf, char *resultbuf, size_t *lengthp); extern char * ulc_casexfrm (const char *s, size_t n, const char *iso639_language, - uninorm_t nf, char *resultbuf, size_t *lengthp); + uninorm_t nf, char *resultbuf, size_t *lengthp); /* Compare S1 and S2, ignoring differences in case and normalization, using the collation rules of the current locale. @@ -197,77 +352,81 @@ extern char * return 0. Upon failure, return -1 with errno set. */ extern int u8_casecoll (const uint8_t *s1, size_t n1, - const uint8_t *s2, size_t n2, - const char *iso639_language, uninorm_t nf, int *resultp); + const uint8_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); extern int u16_casecoll (const uint16_t *s1, size_t n1, - const uint16_t *s2, size_t n2, - const char *iso639_language, uninorm_t nf, int *resultp); + const uint16_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); extern int u32_casecoll (const uint32_t *s1, size_t n1, - const uint32_t *s2, size_t n2, - const char *iso639_language, uninorm_t nf, int *resultp); + const uint32_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + ulc_casecoll (const char *s1, size_t n1, + const char *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false otherwise, and return 0. Upon failure, return -1 with errno set. */ extern int u8_is_uppercase (const uint8_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u16_is_uppercase (const uint16_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u32_is_uppercase (const uint32_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false otherwise, and return 0. Upon failure, return -1 with errno set. */ extern int u8_is_lowercase (const uint8_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u16_is_lowercase (const uint16_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u32_is_lowercase (const uint32_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false otherwise, and return 0. Upon failure, return -1 with errno set. */ extern int u8_is_titlecase (const uint8_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u16_is_titlecase (const uint16_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u32_is_titlecase (const uint32_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to false otherwise, and return 0. Upon failure, return -1 with errno set. */ extern int u8_is_casefolded (const uint8_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u16_is_casefolded (const uint16_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u32_is_casefolded (const uint32_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to either upper case or lower case or title case is not a no-op. @@ -277,16 +436,16 @@ extern int Upon failure, return -1 with errno set. */ extern int u8_is_cased (const uint8_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u16_is_cased (const uint16_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); extern int u32_is_cased (const uint32_t *s, size_t n, - const char *iso639_language, - bool *resultp); + const char *iso639_language, + bool *resultp); /* ========================================================================= */