From: Bruno Haible Date: Sat, 26 Mar 2011 12:38:00 +0000 (+0100) Subject: unictype/joiningtype-byname: Recognize long names as well. X-Git-Tag: v0.1~3027 X-Git-Url: http://erislabs.net/gitweb/?p=gnulib.git;a=commitdiff_plain;h=81aea5d9bec07d25f49b227d80605f46986397b5 unictype/joiningtype-byname: Recognize long names as well. * lib/unictype.in.h (uc_joiningtype_class_byname): Allow argument to be a long name. * lib/unictype/joiningtype_byname.c: Include , unictype/joiningtype_byname.h. (uc_joiningtype_class_byname): Use uc_joining_type_lookup. * lib/unictype/joiningtype_byname.gperf: New file. * modules/unictype/joiningtype-byname (Files): Add lib/unictype/joiningtype_byname.gperf. (Depends-on): Add gperf. (Makefile.am): Add rule for generating unictype/joiningtype_byname.h. * tests/unictype/test-joiningtype_byname.c (main): Test the recognition of long names. --- diff --git a/ChangeLog b/ChangeLog index e9a8da59e..5742cd3f3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,19 @@ 2011-03-26 Bruno Haible + unictype/joiningtype-byname: Recognize long names as well. + * lib/unictype.in.h (uc_joiningtype_class_byname): Allow argument to be + a long name. + * lib/unictype/joiningtype_byname.c: Include , + unictype/joiningtype_byname.h. + (uc_joiningtype_class_byname): Use uc_joining_type_lookup. + * lib/unictype/joiningtype_byname.gperf: New file. + * modules/unictype/joiningtype-byname (Files): Add + lib/unictype/joiningtype_byname.gperf. + (Depends-on): Add gperf. + (Makefile.am): Add rule for generating unictype/joiningtype_byname.h. + * tests/unictype/test-joiningtype_byname.c (main): Test the recognition of + long names. + Tests for module 'unictype/joiningtype-longname'. * modules/unictype/joiningtype-longname-tests: New file. * tests/unictype/test-joiningtype_longname.c: New file. diff --git a/lib/unictype.in.h b/lib/unictype.in.h index dd5503666..19faea12c 100644 --- a/lib/unictype.in.h +++ b/lib/unictype.in.h @@ -419,7 +419,8 @@ extern const char * extern const char * uc_joining_type_long_name (int joining_type); -/* Return the joining type given by name, e.g. "D". */ +/* Return the joining type given by name, e.g. "D", or by long name, e.g. + "Dual Joining". */ extern int uc_joining_type_byname (const char *joining_type_name); diff --git a/lib/unictype/joiningtype_byname.c b/lib/unictype/joiningtype_byname.c index c618a7e98..11b4f69db 100644 --- a/lib/unictype/joiningtype_byname.c +++ b/lib/unictype/joiningtype_byname.c @@ -20,25 +20,44 @@ /* Specification. */ #include "unictype.h" +#include + +#include "unictype/joiningtype_byname.h" + int uc_joining_type_byname (const char *joining_type_name) { - if (joining_type_name[0] != '\0' && joining_type_name[1] == '\0') - switch (joining_type_name[0]) + size_t len; + + len = strlen (joining_type_name); + if (len <= MAX_WORD_LENGTH) + { + char buf[MAX_WORD_LENGTH + 1]; + const struct named_joining_type *found; + + /* Copy joining_type_name into buf, converting '_' and '-' to ' '. */ { - case 'C': - return UC_JOINING_TYPE_C; - case 'D': - return UC_JOINING_TYPE_D; - case 'L': - return UC_JOINING_TYPE_L; - case 'R': - return UC_JOINING_TYPE_R; - case 'T': - return UC_JOINING_TYPE_T; - case 'U': - return UC_JOINING_TYPE_U; + const char *p = joining_type_name; + char *q = buf; + + for (;; p++, q++) + { + char c = *p; + + if (c == '_' || c == '-') + c = ' '; + *q = c; + if (c == '\0') + break; + } } + /* Here q == buf + len. */ + + /* Do a hash table lookup, with case-insensitive comparison. */ + found = uc_joining_type_lookup (buf, len); + if (found != NULL) + return found->joining_type; + } /* Invalid joining type name. */ return -1; } diff --git a/lib/unictype/joiningtype_byname.gperf b/lib/unictype/joiningtype_byname.gperf new file mode 100644 index 000000000..6b1e7d719 --- /dev/null +++ b/lib/unictype/joiningtype_byname.gperf @@ -0,0 +1,30 @@ +/* Arabic joining type of Unicode characters. */ +struct named_joining_type { int name; int joining_type; }; +%struct-type +%ignore-case +%language=ANSI-C +%define hash-function-name joining_type_hash +%define lookup-function-name uc_joining_type_lookup +%readonly-tables +%global-table +%define word-array-name joining_type_names +%pic +%define string-pool-name joining_type_stringpool +%% +C, UC_JOINING_TYPE_C +D, UC_JOINING_TYPE_D +L, UC_JOINING_TYPE_L +R, UC_JOINING_TYPE_R +T, UC_JOINING_TYPE_T +U, UC_JOINING_TYPE_U +Join Causing, UC_JOINING_TYPE_C +JoinCausing, UC_JOINING_TYPE_C +Dual Joining, UC_JOINING_TYPE_D +DualJoining, UC_JOINING_TYPE_D +Left Joining, UC_JOINING_TYPE_L +LeftJoining, UC_JOINING_TYPE_L +Right Joining, UC_JOINING_TYPE_R +RightJoining, UC_JOINING_TYPE_R +Transparent, UC_JOINING_TYPE_T +Non Joining, UC_JOINING_TYPE_U +NonJoining, UC_JOINING_TYPE_U diff --git a/modules/unictype/joiningtype-byname b/modules/unictype/joiningtype-byname index 1f6931b71..cff0f91e2 100644 --- a/modules/unictype/joiningtype-byname +++ b/modules/unictype/joiningtype-byname @@ -3,9 +3,11 @@ Find a Unicode character Arabic joining type, given its name. Files: lib/unictype/joiningtype_byname.c +lib/unictype/joiningtype_byname.gperf Depends-on: unictype/base +gperf configure.ac: gl_LIBUNISTRING_MODULE([0.9.4], [unictype/joiningtype-byname]) @@ -15,6 +17,14 @@ if LIBUNISTRING_COMPILE_UNICTYPE_JOININGTYPE_BYNAME lib_SOURCES += unictype/joiningtype_byname.c endif +unictype/joiningtype_byname.h: unictype/joiningtype_byname.gperf + $(GPERF) -m 10 $(srcdir)/unictype/joiningtype_byname.gperf > $(srcdir)/unictype/joiningtype_byname.h-t + mv $(srcdir)/unictype/joiningtype_byname.h-t $(srcdir)/unictype/joiningtype_byname.h +BUILT_SOURCES += unictype/joiningtype_byname.h +MOSTLYCLEANFILES += unictype/joiningtype_byname.h-t +MAINTAINERCLEANFILES += unictype/joiningtype_byname.h +EXTRA_DIST += unictype/joiningtype_byname.h + Include: "unictype.h" diff --git a/tests/unictype/test-joiningtype_byname.c b/tests/unictype/test-joiningtype_byname.c index 900b38a8a..1065bff20 100644 --- a/tests/unictype/test-joiningtype_byname.c +++ b/tests/unictype/test-joiningtype_byname.c @@ -33,6 +33,30 @@ main () ASSERT (uc_joining_type_byname ("L") == UC_JOINING_TYPE_L); ASSERT (uc_joining_type_byname ("R") == UC_JOINING_TYPE_R); ASSERT (uc_joining_type_byname ("D") == UC_JOINING_TYPE_D); + + ASSERT (uc_joining_type_byname ("JOIN CAUSING") == UC_JOINING_TYPE_C); + ASSERT (uc_joining_type_byname ("Join Causing") == UC_JOINING_TYPE_C); + ASSERT (uc_joining_type_byname ("Join_Causing") == UC_JOINING_TYPE_C); + ASSERT (uc_joining_type_byname ("JoinCausing") == UC_JOINING_TYPE_C); + ASSERT (uc_joining_type_byname ("DUAL JOINING") == UC_JOINING_TYPE_D); + ASSERT (uc_joining_type_byname ("Dual Joining") == UC_JOINING_TYPE_D); + ASSERT (uc_joining_type_byname ("Dual_Joining") == UC_JOINING_TYPE_D); + ASSERT (uc_joining_type_byname ("DualJoining") == UC_JOINING_TYPE_D); + ASSERT (uc_joining_type_byname ("LEFT JOINING") == UC_JOINING_TYPE_L); + ASSERT (uc_joining_type_byname ("Left Joining") == UC_JOINING_TYPE_L); + ASSERT (uc_joining_type_byname ("Left_Joining") == UC_JOINING_TYPE_L); + ASSERT (uc_joining_type_byname ("LeftJoining") == UC_JOINING_TYPE_L); + ASSERT (uc_joining_type_byname ("RIGHT JOINING") == UC_JOINING_TYPE_R); + ASSERT (uc_joining_type_byname ("Right Joining") == UC_JOINING_TYPE_R); + ASSERT (uc_joining_type_byname ("Right_Joining") == UC_JOINING_TYPE_R); + ASSERT (uc_joining_type_byname ("RightJoining") == UC_JOINING_TYPE_R); + ASSERT (uc_joining_type_byname ("TRANSPARENT") == UC_JOINING_TYPE_T); + ASSERT (uc_joining_type_byname ("Transparent") == UC_JOINING_TYPE_T); + ASSERT (uc_joining_type_byname ("NON JOINING") == UC_JOINING_TYPE_U); + ASSERT (uc_joining_type_byname ("Non Joining") == UC_JOINING_TYPE_U); + ASSERT (uc_joining_type_byname ("Non_Joining") == UC_JOINING_TYPE_U); + ASSERT (uc_joining_type_byname ("NonJoining") == UC_JOINING_TYPE_U); + ASSERT (uc_joining_type_byname ("X") < 0); ASSERT (uc_joining_type_byname ("") < 0);