From 90656bd76c6dc0ca59a12715b2d92e580f9e2eae Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Fri, 25 Mar 2011 23:23:09 +0100 Subject: [PATCH] unictype/category-byname: Recognize long names as well. * lib/unictype.in.h (uc_general_category_byname): Allow argument to be a long name. * lib/unictype/categ_byname.c: Include , , unictype/categ_byname.h. (UC_CATEGORY_INDEX_*): New enumeration values. (uc_general_category_byname): Use uc_general_category_lookup and convert from index to value. * lib/unictype/categ_byname.gperf: New file. * modules/unictype/category-byname (Files): Add lib/unictype/categ_byname.gperf. (Depends-on): Add gperf. (Makefile.am): Add rule for generating unictype/categ_byname.h. * tests/unictype/test-categ_byname.c (main): Test the recognition of long names. --- ChangeLog | 16 +++ lib/unictype.in.h | 3 +- lib/unictype/categ_byname.c | 231 +++++++++++++++++++++++++------------ lib/unictype/categ_byname.gperf | 116 +++++++++++++++++++ modules/unictype/category-byname | 10 ++ tests/unictype/test-categ_byname.c | 134 ++++++++++++++++++++- 6 files changed, 435 insertions(+), 75 deletions(-) create mode 100644 lib/unictype/categ_byname.gperf diff --git a/ChangeLog b/ChangeLog index 9199bea5e..66193bef1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,21 @@ 2011-03-25 Bruno Haible + unictype/category-byname: Recognize long names as well. + * lib/unictype.in.h (uc_general_category_byname): Allow argument to be + a long name. + * lib/unictype/categ_byname.c: Include , , + unictype/categ_byname.h. + (UC_CATEGORY_INDEX_*): New enumeration values. + (uc_general_category_byname): Use uc_general_category_lookup and + convert from index to value. + * lib/unictype/categ_byname.gperf: New file. + * modules/unictype/category-byname (Files): Add + lib/unictype/categ_byname.gperf. + (Depends-on): Add gperf. + (Makefile.am): Add rule for generating unictype/categ_byname.h. + * tests/unictype/test-categ_byname.c (main): Test the recognition of + long names. + Tests for module 'unictype/category-longname'. * modules/unictype/category-longname-tests: New file. * tests/unictype/test-categ_longname.c: New file. diff --git a/lib/unictype.in.h b/lib/unictype.in.h index ebaa616ab..1285dc6ba 100644 --- a/lib/unictype.in.h +++ b/lib/unictype.in.h @@ -211,7 +211,8 @@ extern const char * extern const char * uc_general_category_long_name (uc_general_category_t category); -/* Return the general category given by name, e.g. "Lu". */ +/* Return the general category given by name, e.g. "Lu", or by long name, + e.g. "Uppercase Letter". */ extern uc_general_category_t uc_general_category_byname (const char *category_name); diff --git a/lib/unictype/categ_byname.c b/lib/unictype/categ_byname.c index d2da64e43..80e97ab32 100644 --- a/lib/unictype/categ_byname.c +++ b/lib/unictype/categ_byname.c @@ -20,87 +20,172 @@ /* Specification. */ #include "unictype.h" +#include +#include + +/* Indices stored in the 'struct named_category' elements of the perfect hash + table. We don't use uc_general_category_t values or their addresses + directly, because this would introduce load-time relocations. */ +enum +{ + UC_CATEGORY_INDEX_L, + UC_CATEGORY_INDEX_LC, + UC_CATEGORY_INDEX_Lu, + UC_CATEGORY_INDEX_Ll, + UC_CATEGORY_INDEX_Lt, + UC_CATEGORY_INDEX_Lm, + UC_CATEGORY_INDEX_Lo, + UC_CATEGORY_INDEX_M, + UC_CATEGORY_INDEX_Mn, + UC_CATEGORY_INDEX_Mc, + UC_CATEGORY_INDEX_Me, + UC_CATEGORY_INDEX_N, + UC_CATEGORY_INDEX_Nd, + UC_CATEGORY_INDEX_Nl, + UC_CATEGORY_INDEX_No, + UC_CATEGORY_INDEX_P, + UC_CATEGORY_INDEX_Pc, + UC_CATEGORY_INDEX_Pd, + UC_CATEGORY_INDEX_Ps, + UC_CATEGORY_INDEX_Pe, + UC_CATEGORY_INDEX_Pi, + UC_CATEGORY_INDEX_Pf, + UC_CATEGORY_INDEX_Po, + UC_CATEGORY_INDEX_S, + UC_CATEGORY_INDEX_Sm, + UC_CATEGORY_INDEX_Sc, + UC_CATEGORY_INDEX_Sk, + UC_CATEGORY_INDEX_So, + UC_CATEGORY_INDEX_Z, + UC_CATEGORY_INDEX_Zs, + UC_CATEGORY_INDEX_Zl, + UC_CATEGORY_INDEX_Zp, + UC_CATEGORY_INDEX_C, + UC_CATEGORY_INDEX_Cc, + UC_CATEGORY_INDEX_Cf, + UC_CATEGORY_INDEX_Cs, + UC_CATEGORY_INDEX_Co, + UC_CATEGORY_INDEX_Cn +}; + +#include "unictype/categ_byname.h" + uc_general_category_t uc_general_category_byname (const char *category_name) { - if (category_name[0] != '\0' - && (category_name[1] == '\0' || category_name[2] == '\0')) - switch (category_name[0]) + size_t len; + + len = strlen (category_name); + if (len <= MAX_WORD_LENGTH) + { + char buf[MAX_WORD_LENGTH + 1]; + const struct named_category *found; + + /* Copy category_name into buf, converting '_' and '-' to ' '. */ { - case 'L': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_L; - case 'C': return UC_CATEGORY_LC; - case 'u': return UC_CATEGORY_Lu; - case 'l': return UC_CATEGORY_Ll; - case 't': return UC_CATEGORY_Lt; - case 'm': return UC_CATEGORY_Lm; - case 'o': return UC_CATEGORY_Lo; - } - break; - case 'M': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_M; - case 'n': return UC_CATEGORY_Mn; - case 'c': return UC_CATEGORY_Mc; - case 'e': return UC_CATEGORY_Me; - } - break; - case 'N': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_N; - case 'd': return UC_CATEGORY_Nd; - case 'l': return UC_CATEGORY_Nl; - case 'o': return UC_CATEGORY_No; - } - break; - case 'P': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_P; - case 'c': return UC_CATEGORY_Pc; - case 'd': return UC_CATEGORY_Pd; - case 's': return UC_CATEGORY_Ps; - case 'e': return UC_CATEGORY_Pe; - case 'i': return UC_CATEGORY_Pi; - case 'f': return UC_CATEGORY_Pf; - case 'o': return UC_CATEGORY_Po; - } - break; - case 'S': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_S; - case 'm': return UC_CATEGORY_Sm; - case 'c': return UC_CATEGORY_Sc; - case 'k': return UC_CATEGORY_Sk; - case 'o': return UC_CATEGORY_So; - } - break; - case 'Z': - switch (category_name[1]) + const char *p = category_name; + char *q = buf; + + for (;; p++, q++) { - case '\0': return UC_CATEGORY_Z; - case 's': return UC_CATEGORY_Zs; - case 'l': return UC_CATEGORY_Zl; - case 'p': return UC_CATEGORY_Zp; + char c = *p; + + if (c == '_' || c == '-') + c = ' '; + *q = c; + if (c == '\0') + break; } - break; - case 'C': - switch (category_name[1]) + } + /* Here q == buf + len. */ + + /* Do a hash table lookup, with case-insensitive comparison. */ + found = uc_general_category_lookup (buf, len); + if (found != NULL) + /* Use a 'switch' statement here, because a table would introduce + load-time relocations. */ + switch (found->category_index) { - case '\0': return UC_CATEGORY_C; - case 'c': return UC_CATEGORY_Cc; - case 'f': return UC_CATEGORY_Cf; - case 's': return UC_CATEGORY_Cs; - case 'o': return UC_CATEGORY_Co; - case 'n': return UC_CATEGORY_Cn; + case UC_CATEGORY_INDEX_L: + return UC_CATEGORY_L; + case UC_CATEGORY_INDEX_LC: + return UC_CATEGORY_LC; + case UC_CATEGORY_INDEX_Lu: + return UC_CATEGORY_Lu; + case UC_CATEGORY_INDEX_Ll: + return UC_CATEGORY_Ll; + case UC_CATEGORY_INDEX_Lt: + return UC_CATEGORY_Lt; + case UC_CATEGORY_INDEX_Lm: + return UC_CATEGORY_Lm; + case UC_CATEGORY_INDEX_Lo: + return UC_CATEGORY_Lo; + case UC_CATEGORY_INDEX_M: + return UC_CATEGORY_M; + case UC_CATEGORY_INDEX_Mn: + return UC_CATEGORY_Mn; + case UC_CATEGORY_INDEX_Mc: + return UC_CATEGORY_Mc; + case UC_CATEGORY_INDEX_Me: + return UC_CATEGORY_Me; + case UC_CATEGORY_INDEX_N: + return UC_CATEGORY_N; + case UC_CATEGORY_INDEX_Nd: + return UC_CATEGORY_Nd; + case UC_CATEGORY_INDEX_Nl: + return UC_CATEGORY_Nl; + case UC_CATEGORY_INDEX_No: + return UC_CATEGORY_No; + case UC_CATEGORY_INDEX_P: + return UC_CATEGORY_P; + case UC_CATEGORY_INDEX_Pc: + return UC_CATEGORY_Pc; + case UC_CATEGORY_INDEX_Pd: + return UC_CATEGORY_Pd; + case UC_CATEGORY_INDEX_Ps: + return UC_CATEGORY_Ps; + case UC_CATEGORY_INDEX_Pe: + return UC_CATEGORY_Pe; + case UC_CATEGORY_INDEX_Pi: + return UC_CATEGORY_Pi; + case UC_CATEGORY_INDEX_Pf: + return UC_CATEGORY_Pf; + case UC_CATEGORY_INDEX_Po: + return UC_CATEGORY_Po; + case UC_CATEGORY_INDEX_S: + return UC_CATEGORY_S; + case UC_CATEGORY_INDEX_Sm: + return UC_CATEGORY_Sm; + case UC_CATEGORY_INDEX_Sc: + return UC_CATEGORY_Sc; + case UC_CATEGORY_INDEX_Sk: + return UC_CATEGORY_Sk; + case UC_CATEGORY_INDEX_So: + return UC_CATEGORY_So; + case UC_CATEGORY_INDEX_Z: + return UC_CATEGORY_Z; + case UC_CATEGORY_INDEX_Zs: + return UC_CATEGORY_Zs; + case UC_CATEGORY_INDEX_Zl: + return UC_CATEGORY_Zl; + case UC_CATEGORY_INDEX_Zp: + return UC_CATEGORY_Zp; + case UC_CATEGORY_INDEX_C: + return UC_CATEGORY_C; + case UC_CATEGORY_INDEX_Cc: + return UC_CATEGORY_Cc; + case UC_CATEGORY_INDEX_Cf: + return UC_CATEGORY_Cf; + case UC_CATEGORY_INDEX_Cs: + return UC_CATEGORY_Cs; + case UC_CATEGORY_INDEX_Co: + return UC_CATEGORY_Co; + case UC_CATEGORY_INDEX_Cn: + return UC_CATEGORY_Cn; + default: + abort (); } - break; - } + } /* Invalid category name. */ return _UC_CATEGORY_NONE; } diff --git a/lib/unictype/categ_byname.gperf b/lib/unictype/categ_byname.gperf new file mode 100644 index 000000000..54d1996ef --- /dev/null +++ b/lib/unictype/categ_byname.gperf @@ -0,0 +1,116 @@ +/* Categories of Unicode characters. */ +struct named_category { int name; unsigned int category_index; }; +%struct-type +%ignore-case +%language=ANSI-C +%define hash-function-name general_category_hash +%define lookup-function-name uc_general_category_lookup +%readonly-tables +%global-table +%define word-array-name general_category_names +%pic +%define string-pool-name general_category_stringpool +%% +L, UC_CATEGORY_INDEX_L +LC, UC_CATEGORY_INDEX_LC +Lu, UC_CATEGORY_INDEX_Lu +Ll, UC_CATEGORY_INDEX_Ll +Lt, UC_CATEGORY_INDEX_Lt +Lm, UC_CATEGORY_INDEX_Lm +Lo, UC_CATEGORY_INDEX_Lo +M, UC_CATEGORY_INDEX_M +Mn, UC_CATEGORY_INDEX_Mn +Mc, UC_CATEGORY_INDEX_Mc +Me, UC_CATEGORY_INDEX_Me +N, UC_CATEGORY_INDEX_N +Nd, UC_CATEGORY_INDEX_Nd +Nl, UC_CATEGORY_INDEX_Nl +No, UC_CATEGORY_INDEX_No +P, UC_CATEGORY_INDEX_P +Pc, UC_CATEGORY_INDEX_Pc +Pd, UC_CATEGORY_INDEX_Pd +Ps, UC_CATEGORY_INDEX_Ps +Pe, UC_CATEGORY_INDEX_Pe +Pi, UC_CATEGORY_INDEX_Pi +Pf, UC_CATEGORY_INDEX_Pf +Po, UC_CATEGORY_INDEX_Po +S, UC_CATEGORY_INDEX_S +Sm, UC_CATEGORY_INDEX_Sm +Sc, UC_CATEGORY_INDEX_Sc +Sk, UC_CATEGORY_INDEX_Sk +So, UC_CATEGORY_INDEX_So +Z, UC_CATEGORY_INDEX_Z +Zs, UC_CATEGORY_INDEX_Zs +Zl, UC_CATEGORY_INDEX_Zl +Zp, UC_CATEGORY_INDEX_Zp +C, UC_CATEGORY_INDEX_C +Cc, UC_CATEGORY_INDEX_Cc +Cf, UC_CATEGORY_INDEX_Cf +Cs, UC_CATEGORY_INDEX_Cs +Co, UC_CATEGORY_INDEX_Co +Cn, UC_CATEGORY_INDEX_Cn +Letter, UC_CATEGORY_INDEX_L +Cased Letter, UC_CATEGORY_INDEX_LC +CasedLetter, UC_CATEGORY_INDEX_LC +Uppercase Letter, UC_CATEGORY_INDEX_Lu +UppercaseLetter, UC_CATEGORY_INDEX_Lu +Lowercase Letter, UC_CATEGORY_INDEX_Ll +LowercaseLetter, UC_CATEGORY_INDEX_Ll +Titlecase Letter, UC_CATEGORY_INDEX_Lt +TitlecaseLetter, UC_CATEGORY_INDEX_Lt +Modifier Letter, UC_CATEGORY_INDEX_Lm +ModifierLetter, UC_CATEGORY_INDEX_Lm +Other Letter, UC_CATEGORY_INDEX_Lo +OtherLetter, UC_CATEGORY_INDEX_Lo +Mark, UC_CATEGORY_INDEX_M +Nonspacing Mark, UC_CATEGORY_INDEX_Mn +NonspacingMark, UC_CATEGORY_INDEX_Mn +Spacing Mark, UC_CATEGORY_INDEX_Mc +SpacingMark, UC_CATEGORY_INDEX_Mc +Enclosing Mark, UC_CATEGORY_INDEX_Me +EnclosingMark, UC_CATEGORY_INDEX_Me +Number, UC_CATEGORY_INDEX_N +Decimal Number, UC_CATEGORY_INDEX_Nd +DecimalNumber, UC_CATEGORY_INDEX_Nd +Letter Number, UC_CATEGORY_INDEX_Nl +LetterNumber, UC_CATEGORY_INDEX_Nl +Other Number, UC_CATEGORY_INDEX_No +OtherNumber, UC_CATEGORY_INDEX_No +Punctuation, UC_CATEGORY_INDEX_P +Connector Punctuation, UC_CATEGORY_INDEX_Pc +ConnectorPunctuation, UC_CATEGORY_INDEX_Pc +Dash Punctuation, UC_CATEGORY_INDEX_Pd +DashPunctuation, UC_CATEGORY_INDEX_Pd +Open Punctuation, UC_CATEGORY_INDEX_Ps +OpenPunctuation, UC_CATEGORY_INDEX_Ps +Close Punctuation, UC_CATEGORY_INDEX_Pe +ClosePunctuation, UC_CATEGORY_INDEX_Pe +Initial Punctuation, UC_CATEGORY_INDEX_Pi +InitialPunctuation, UC_CATEGORY_INDEX_Pi +Final Punctuation, UC_CATEGORY_INDEX_Pf +FinalPunctuation, UC_CATEGORY_INDEX_Pf +Other Punctuation, UC_CATEGORY_INDEX_Po +OtherPunctuation, UC_CATEGORY_INDEX_Po +Symbol, UC_CATEGORY_INDEX_S +Math Symbol, UC_CATEGORY_INDEX_Sm +MathSymbol, UC_CATEGORY_INDEX_Sm +Currency Symbol, UC_CATEGORY_INDEX_Sc +CurrencySymbol, UC_CATEGORY_INDEX_Sc +Modifier Symbol, UC_CATEGORY_INDEX_Sk +ModifierSymbol, UC_CATEGORY_INDEX_Sk +Other Symbol, UC_CATEGORY_INDEX_So +OtherSymbol, UC_CATEGORY_INDEX_So +Separator, UC_CATEGORY_INDEX_Z +Space Separator, UC_CATEGORY_INDEX_Zs +SpaceSeparator, UC_CATEGORY_INDEX_Zs +Line Separator, UC_CATEGORY_INDEX_Zl +LineSeparator, UC_CATEGORY_INDEX_Zl +Paragraph Separator, UC_CATEGORY_INDEX_Zp +ParagraphSeparator, UC_CATEGORY_INDEX_Zp +Other, UC_CATEGORY_INDEX_C +Control, UC_CATEGORY_INDEX_Cc +Format, UC_CATEGORY_INDEX_Cf +Surrogate, UC_CATEGORY_INDEX_Cs +Private Use, UC_CATEGORY_INDEX_Co +PrivateUse, UC_CATEGORY_INDEX_Co +Unassigned, UC_CATEGORY_INDEX_Cn diff --git a/modules/unictype/category-byname b/modules/unictype/category-byname index 3d19aa8ac..b9538fc7e 100644 --- a/modules/unictype/category-byname +++ b/modules/unictype/category-byname @@ -3,6 +3,7 @@ Find a Unicode character category, given its name. Files: lib/unictype/categ_byname.c +lib/unictype/categ_byname.gperf Depends-on: unictype/base @@ -45,6 +46,7 @@ unictype/category-Zl unictype/category-Zp unictype/category-Zs unictype/category-none +gperf configure.ac: gl_LIBUNISTRING_MODULE([0.9.4], [unictype/category-byname]) @@ -54,6 +56,14 @@ if LIBUNISTRING_COMPILE_UNICTYPE_CATEGORY_BYNAME lib_SOURCES += unictype/categ_byname.c endif +unictype/categ_byname.h: unictype/categ_byname.gperf + $(GPERF) -m 10 $(srcdir)/unictype/categ_byname.gperf > $(srcdir)/unictype/categ_byname.h-t + mv $(srcdir)/unictype/categ_byname.h-t $(srcdir)/unictype/categ_byname.h +BUILT_SOURCES += unictype/categ_byname.h +MOSTLYCLEANFILES += unictype/categ_byname.h-t +MAINTAINERCLEANFILES += unictype/categ_byname.h +EXTRA_DIST += unictype/categ_byname.h + Include: "unictype.h" diff --git a/tests/unictype/test-categ_byname.c b/tests/unictype/test-categ_byname.c index 0da16259c..b07962a27 100644 --- a/tests/unictype/test-categ_byname.c +++ b/tests/unictype/test-categ_byname.c @@ -1,5 +1,5 @@ /* Test the Unicode character type functions. - Copyright (C) 2007-2010 Free Software Foundation, Inc. + Copyright (C) 2007-2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,6 +38,7 @@ int main () { ASSERT (category_equals (uc_general_category_byname ("L"), UC_CATEGORY_L)); + ASSERT (category_equals (uc_general_category_byname ("LC"), UC_CATEGORY_LC)); ASSERT (category_equals (uc_general_category_byname ("Lu"), UC_CATEGORY_Lu)); ASSERT (category_equals (uc_general_category_byname ("Ll"), UC_CATEGORY_Ll)); ASSERT (category_equals (uc_general_category_byname ("Lt"), UC_CATEGORY_Lt)); @@ -75,6 +76,137 @@ main () ASSERT (category_equals (uc_general_category_byname ("Co"), UC_CATEGORY_Co)); ASSERT (category_equals (uc_general_category_byname ("Cn"), UC_CATEGORY_Cn)); + ASSERT (category_equals (uc_general_category_byname ("LETTER"), UC_CATEGORY_L)); + ASSERT (category_equals (uc_general_category_byname ("Letter"), UC_CATEGORY_L)); + ASSERT (category_equals (uc_general_category_byname ("CASED LETTER"), UC_CATEGORY_LC)); + ASSERT (category_equals (uc_general_category_byname ("Cased Letter"), UC_CATEGORY_LC)); + ASSERT (category_equals (uc_general_category_byname ("Cased_Letter"), UC_CATEGORY_LC)); + ASSERT (category_equals (uc_general_category_byname ("CasedLetter"), UC_CATEGORY_LC)); + ASSERT (category_equals (uc_general_category_byname ("UPPERCASE LETTER"), UC_CATEGORY_Lu)); + ASSERT (category_equals (uc_general_category_byname ("Uppercase Letter"), UC_CATEGORY_Lu)); + ASSERT (category_equals (uc_general_category_byname ("Uppercase_Letter"), UC_CATEGORY_Lu)); + ASSERT (category_equals (uc_general_category_byname ("UppercaseLetter"), UC_CATEGORY_Lu)); + ASSERT (category_equals (uc_general_category_byname ("LOWERCASE LETTER"), UC_CATEGORY_Ll)); + ASSERT (category_equals (uc_general_category_byname ("Lowercase Letter"), UC_CATEGORY_Ll)); + ASSERT (category_equals (uc_general_category_byname ("Lowercase_Letter"), UC_CATEGORY_Ll)); + ASSERT (category_equals (uc_general_category_byname ("LowercaseLetter"), UC_CATEGORY_Ll)); + ASSERT (category_equals (uc_general_category_byname ("TITLECASE LETTER"), UC_CATEGORY_Lt)); + ASSERT (category_equals (uc_general_category_byname ("Titlecase Letter"), UC_CATEGORY_Lt)); + ASSERT (category_equals (uc_general_category_byname ("Titlecase_Letter"), UC_CATEGORY_Lt)); + ASSERT (category_equals (uc_general_category_byname ("TitlecaseLetter"), UC_CATEGORY_Lt)); + ASSERT (category_equals (uc_general_category_byname ("MODIFIER LETTER"), UC_CATEGORY_Lm)); + ASSERT (category_equals (uc_general_category_byname ("Modifier Letter"), UC_CATEGORY_Lm)); + ASSERT (category_equals (uc_general_category_byname ("Modifier_Letter"), UC_CATEGORY_Lm)); + ASSERT (category_equals (uc_general_category_byname ("ModifierLetter"), UC_CATEGORY_Lm)); + ASSERT (category_equals (uc_general_category_byname ("OTHER LETTER"), UC_CATEGORY_Lo)); + ASSERT (category_equals (uc_general_category_byname ("Other Letter"), UC_CATEGORY_Lo)); + ASSERT (category_equals (uc_general_category_byname ("Other_Letter"), UC_CATEGORY_Lo)); + ASSERT (category_equals (uc_general_category_byname ("OtherLetter"), UC_CATEGORY_Lo)); + ASSERT (category_equals (uc_general_category_byname ("MARK"), UC_CATEGORY_M)); + ASSERT (category_equals (uc_general_category_byname ("Mark"), UC_CATEGORY_M)); + ASSERT (category_equals (uc_general_category_byname ("NONSPACING MARK"), UC_CATEGORY_Mn)); + ASSERT (category_equals (uc_general_category_byname ("Nonspacing Mark"), UC_CATEGORY_Mn)); + ASSERT (category_equals (uc_general_category_byname ("Nonspacing_Mark"), UC_CATEGORY_Mn)); + ASSERT (category_equals (uc_general_category_byname ("NonspacingMark"), UC_CATEGORY_Mn)); + ASSERT (category_equals (uc_general_category_byname ("SPACING MARK"), UC_CATEGORY_Mc)); + ASSERT (category_equals (uc_general_category_byname ("Spacing Mark"), UC_CATEGORY_Mc)); + ASSERT (category_equals (uc_general_category_byname ("Spacing_Mark"), UC_CATEGORY_Mc)); + ASSERT (category_equals (uc_general_category_byname ("SpacingMark"), UC_CATEGORY_Mc)); + ASSERT (category_equals (uc_general_category_byname ("ENCLOSING MARK"), UC_CATEGORY_Me)); + ASSERT (category_equals (uc_general_category_byname ("Enclosing Mark"), UC_CATEGORY_Me)); + ASSERT (category_equals (uc_general_category_byname ("Enclosing_Mark"), UC_CATEGORY_Me)); + ASSERT (category_equals (uc_general_category_byname ("EnclosingMark"), UC_CATEGORY_Me)); + ASSERT (category_equals (uc_general_category_byname ("NUMBER"), UC_CATEGORY_N)); + ASSERT (category_equals (uc_general_category_byname ("Number"), UC_CATEGORY_N)); + ASSERT (category_equals (uc_general_category_byname ("DECIMAL NUMBER"), UC_CATEGORY_Nd)); + ASSERT (category_equals (uc_general_category_byname ("Decimal Number"), UC_CATEGORY_Nd)); + ASSERT (category_equals (uc_general_category_byname ("Decimal_Number"), UC_CATEGORY_Nd)); + ASSERT (category_equals (uc_general_category_byname ("DecimalNumber"), UC_CATEGORY_Nd)); + ASSERT (category_equals (uc_general_category_byname ("LETTER NUMBER"), UC_CATEGORY_Nl)); + ASSERT (category_equals (uc_general_category_byname ("Letter Number"), UC_CATEGORY_Nl)); + ASSERT (category_equals (uc_general_category_byname ("Letter_Number"), UC_CATEGORY_Nl)); + ASSERT (category_equals (uc_general_category_byname ("LetterNumber"), UC_CATEGORY_Nl)); + ASSERT (category_equals (uc_general_category_byname ("OTHER NUMBER"), UC_CATEGORY_No)); + ASSERT (category_equals (uc_general_category_byname ("Other Number"), UC_CATEGORY_No)); + ASSERT (category_equals (uc_general_category_byname ("Other_Number"), UC_CATEGORY_No)); + ASSERT (category_equals (uc_general_category_byname ("OtherNumber"), UC_CATEGORY_No)); + ASSERT (category_equals (uc_general_category_byname ("PUNCTUATION"), UC_CATEGORY_P)); + ASSERT (category_equals (uc_general_category_byname ("Punctuation"), UC_CATEGORY_P)); + ASSERT (category_equals (uc_general_category_byname ("CONNECTOR PUNCTUATION"), UC_CATEGORY_Pc)); + ASSERT (category_equals (uc_general_category_byname ("Connector Punctuation"), UC_CATEGORY_Pc)); + ASSERT (category_equals (uc_general_category_byname ("Connector_Punctuation"), UC_CATEGORY_Pc)); + ASSERT (category_equals (uc_general_category_byname ("ConnectorPunctuation"), UC_CATEGORY_Pc)); + ASSERT (category_equals (uc_general_category_byname ("DASH PUNCTUATION"), UC_CATEGORY_Pd)); + ASSERT (category_equals (uc_general_category_byname ("Dash Punctuation"), UC_CATEGORY_Pd)); + ASSERT (category_equals (uc_general_category_byname ("Dash_Punctuation"), UC_CATEGORY_Pd)); + ASSERT (category_equals (uc_general_category_byname ("DashPunctuation"), UC_CATEGORY_Pd)); + ASSERT (category_equals (uc_general_category_byname ("OPEN PUNCTUATION"), UC_CATEGORY_Ps)); + ASSERT (category_equals (uc_general_category_byname ("Open Punctuation"), UC_CATEGORY_Ps)); + ASSERT (category_equals (uc_general_category_byname ("Open_Punctuation"), UC_CATEGORY_Ps)); + ASSERT (category_equals (uc_general_category_byname ("OpenPunctuation"), UC_CATEGORY_Ps)); + ASSERT (category_equals (uc_general_category_byname ("CLOSE PUNCTUATION"), UC_CATEGORY_Pe)); + ASSERT (category_equals (uc_general_category_byname ("Close Punctuation"), UC_CATEGORY_Pe)); + ASSERT (category_equals (uc_general_category_byname ("Close_Punctuation"), UC_CATEGORY_Pe)); + ASSERT (category_equals (uc_general_category_byname ("ClosePunctuation"), UC_CATEGORY_Pe)); + ASSERT (category_equals (uc_general_category_byname ("INITIAL PUNCTUATION"), UC_CATEGORY_Pi)); + ASSERT (category_equals (uc_general_category_byname ("Initial Punctuation"), UC_CATEGORY_Pi)); + ASSERT (category_equals (uc_general_category_byname ("Initial_Punctuation"), UC_CATEGORY_Pi)); + ASSERT (category_equals (uc_general_category_byname ("InitialPunctuation"), UC_CATEGORY_Pi)); + ASSERT (category_equals (uc_general_category_byname ("FINAL PUNCTUATION"), UC_CATEGORY_Pf)); + ASSERT (category_equals (uc_general_category_byname ("Final Punctuation"), UC_CATEGORY_Pf)); + ASSERT (category_equals (uc_general_category_byname ("Final_Punctuation"), UC_CATEGORY_Pf)); + ASSERT (category_equals (uc_general_category_byname ("FinalPunctuation"), UC_CATEGORY_Pf)); + ASSERT (category_equals (uc_general_category_byname ("OTHER PUNCTUATION"), UC_CATEGORY_Po)); + ASSERT (category_equals (uc_general_category_byname ("Other Punctuation"), UC_CATEGORY_Po)); + ASSERT (category_equals (uc_general_category_byname ("Other_Punctuation"), UC_CATEGORY_Po)); + ASSERT (category_equals (uc_general_category_byname ("OtherPunctuation"), UC_CATEGORY_Po)); + ASSERT (category_equals (uc_general_category_byname ("SYMBOL"), UC_CATEGORY_S)); + ASSERT (category_equals (uc_general_category_byname ("Symbol"), UC_CATEGORY_S)); + ASSERT (category_equals (uc_general_category_byname ("MATH SYMBOL"), UC_CATEGORY_Sm)); + ASSERT (category_equals (uc_general_category_byname ("Math Symbol"), UC_CATEGORY_Sm)); + ASSERT (category_equals (uc_general_category_byname ("Math_Symbol"), UC_CATEGORY_Sm)); + ASSERT (category_equals (uc_general_category_byname ("MathSymbol"), UC_CATEGORY_Sm)); + ASSERT (category_equals (uc_general_category_byname ("CURRENCY SYMBOL"), UC_CATEGORY_Sc)); + ASSERT (category_equals (uc_general_category_byname ("Currency Symbol"), UC_CATEGORY_Sc)); + ASSERT (category_equals (uc_general_category_byname ("Currency_Symbol"), UC_CATEGORY_Sc)); + ASSERT (category_equals (uc_general_category_byname ("CurrencySymbol"), UC_CATEGORY_Sc)); + ASSERT (category_equals (uc_general_category_byname ("MODIFIER SYMBOL"), UC_CATEGORY_Sk)); + ASSERT (category_equals (uc_general_category_byname ("Modifier Symbol"), UC_CATEGORY_Sk)); + ASSERT (category_equals (uc_general_category_byname ("Modifier_Symbol"), UC_CATEGORY_Sk)); + ASSERT (category_equals (uc_general_category_byname ("ModifierSymbol"), UC_CATEGORY_Sk)); + ASSERT (category_equals (uc_general_category_byname ("OTHER SYMBOL"), UC_CATEGORY_So)); + ASSERT (category_equals (uc_general_category_byname ("Other Symbol"), UC_CATEGORY_So)); + ASSERT (category_equals (uc_general_category_byname ("Other_Symbol"), UC_CATEGORY_So)); + ASSERT (category_equals (uc_general_category_byname ("OtherSymbol"), UC_CATEGORY_So)); + ASSERT (category_equals (uc_general_category_byname ("SEPARATOR"), UC_CATEGORY_Z)); + ASSERT (category_equals (uc_general_category_byname ("Separator"), UC_CATEGORY_Z)); + ASSERT (category_equals (uc_general_category_byname ("SPACE SEPARATOR"), UC_CATEGORY_Zs)); + ASSERT (category_equals (uc_general_category_byname ("Space Separator"), UC_CATEGORY_Zs)); + ASSERT (category_equals (uc_general_category_byname ("Space_Separator"), UC_CATEGORY_Zs)); + ASSERT (category_equals (uc_general_category_byname ("SpaceSeparator"), UC_CATEGORY_Zs)); + ASSERT (category_equals (uc_general_category_byname ("LINE SEPARATOR"), UC_CATEGORY_Zl)); + ASSERT (category_equals (uc_general_category_byname ("Line Separator"), UC_CATEGORY_Zl)); + ASSERT (category_equals (uc_general_category_byname ("Line_Separator"), UC_CATEGORY_Zl)); + ASSERT (category_equals (uc_general_category_byname ("LineSeparator"), UC_CATEGORY_Zl)); + ASSERT (category_equals (uc_general_category_byname ("PARAGRAPH SEPARATOR"), UC_CATEGORY_Zp)); + ASSERT (category_equals (uc_general_category_byname ("Paragraph Separator"), UC_CATEGORY_Zp)); + ASSERT (category_equals (uc_general_category_byname ("Paragraph_Separator"), UC_CATEGORY_Zp)); + ASSERT (category_equals (uc_general_category_byname ("ParagraphSeparator"), UC_CATEGORY_Zp)); + ASSERT (category_equals (uc_general_category_byname ("OTHER"), UC_CATEGORY_C)); + ASSERT (category_equals (uc_general_category_byname ("Other"), UC_CATEGORY_C)); + ASSERT (category_equals (uc_general_category_byname ("CONTROL"), UC_CATEGORY_Cc)); + ASSERT (category_equals (uc_general_category_byname ("Control"), UC_CATEGORY_Cc)); + ASSERT (category_equals (uc_general_category_byname ("FORMAT"), UC_CATEGORY_Cf)); + ASSERT (category_equals (uc_general_category_byname ("Format"), UC_CATEGORY_Cf)); + ASSERT (category_equals (uc_general_category_byname ("SURROGATE"), UC_CATEGORY_Cs)); + ASSERT (category_equals (uc_general_category_byname ("Surrogate"), UC_CATEGORY_Cs)); + ASSERT (category_equals (uc_general_category_byname ("PRIVATE USE"), UC_CATEGORY_Co)); + ASSERT (category_equals (uc_general_category_byname ("Private Use"), UC_CATEGORY_Co)); + ASSERT (category_equals (uc_general_category_byname ("Private_Use"), UC_CATEGORY_Co)); + ASSERT (category_equals (uc_general_category_byname ("PrivateUse"), UC_CATEGORY_Co)); + ASSERT (category_equals (uc_general_category_byname ("UNASSIGNED"), UC_CATEGORY_Cn)); + ASSERT (category_equals (uc_general_category_byname ("Unassigned"), UC_CATEGORY_Cn)); + uc_general_category_byname ("Nl"); { -- 2.11.0