/* Unicode character classification and properties.
- Copyright (C) 2002, 2005-2007 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2005-2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
/* Bits and bit masks denoting General category values. UnicodeData-3.2.0.html
says a 32-bit integer will always suffice to represent them.
- These bit masks are just informative; you cannot use them in any API. */
+ These bit masks can only be used with the uc_is_general_category_withtable
+ function. */
enum
{
UC_CATEGORY_MASK_L = 0x0000001f,
This corresponds to the unions of the two sets of characters. */
extern uc_general_category_t
uc_general_category_or (uc_general_category_t category1,
- uc_general_category_t category2);
+ uc_general_category_t category2);
/* Return the intersection of two general categories as bit masks.
This *does*not* correspond to the intersection of the two sets of
characters. */
extern uc_general_category_t
uc_general_category_and (uc_general_category_t category1,
- uc_general_category_t category2);
+ uc_general_category_t category2);
/* Return the intersection of a general category with the complement of a
second general category, as bit masks.
viewing the categories as sets of characters. */
extern uc_general_category_t
uc_general_category_and_not (uc_general_category_t category1,
- uc_general_category_t category2);
+ uc_general_category_t category2);
/* Return the name of a general category. */
extern const char *
uc_general_category (ucs4_t uc);
/* Test whether a Unicode character belongs to a given category.
- The CATEGORY argument can be the combination of several built-in
+ The CATEGORY argument can be the combination of several predefined
general categories. */
extern bool
uc_is_general_category (ucs4_t uc, uc_general_category_t category);
UC_CCC_AR = 232, /* Above Right */
UC_CCC_DB = 233, /* Double Below */
UC_CCC_DA = 234, /* Double Above */
- UC_CCC_IS = 240, /* Iota Subscript */
+ UC_CCC_IS = 240 /* Iota Subscript */
};
/* Return the canonical combining class of a Unicode character. */
/* ========================================================================= */
/* Field 5 of Unicode Character Database: Character decomposition mapping.
- See "unicomp.h". */
+ See "uninorm.h". */
/* ========================================================================= */
extern uc_property_t
uc_property_byname (const char *property_name);
+/* Test whether a property is valid. */
+#define uc_property_is_valid(property) ((property).test_fn != NULL)
+
/* Test whether a Unicode character has a given property. */
extern bool
uc_is_property (ucs4_t uc, uc_property_t property);
/* ========================================================================= */
-/* Subdivision of the the Unicode characters into scripts. */
+/* Subdivision of the Unicode characters into scripts. */
typedef struct
{
extern bool
uc_is_upper (ucs4_t uc);
-/* Test for any character that corresponds to a hexadecimal-digit character
- equivalent to that performed by the functions described in the previous
- subclause. */
+/* Test for any character that corresponds to a hexadecimal-digit
+ character. */
extern bool
uc_is_xdigit (ucs4_t uc);