unictype/joiningtype-byname: Recognize long names as well.
authorBruno Haible <bruno@clisp.org>
Sat, 26 Mar 2011 12:38:00 +0000 (13:38 +0100)
committerBruno Haible <bruno@clisp.org>
Sat, 26 Mar 2011 12:38:00 +0000 (13:38 +0100)
* lib/unictype.in.h (uc_joiningtype_class_byname): Allow argument to be
a long name.
* lib/unictype/joiningtype_byname.c: Include <string.h>,
unictype/joiningtype_byname.h.
(uc_joiningtype_class_byname): Use uc_joining_type_lookup.
* lib/unictype/joiningtype_byname.gperf: New file.
* modules/unictype/joiningtype-byname (Files): Add
lib/unictype/joiningtype_byname.gperf.
(Depends-on): Add gperf.
(Makefile.am): Add rule for generating unictype/joiningtype_byname.h.
* tests/unictype/test-joiningtype_byname.c (main): Test the recognition of
long names.

ChangeLog
lib/unictype.in.h
lib/unictype/joiningtype_byname.c
lib/unictype/joiningtype_byname.gperf [new file with mode: 0644]
modules/unictype/joiningtype-byname
tests/unictype/test-joiningtype_byname.c

index e9a8da5..5742cd3 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
 2011-03-26  Bruno Haible  <bruno@clisp.org>
 
+       unictype/joiningtype-byname: Recognize long names as well.
+       * lib/unictype.in.h (uc_joiningtype_class_byname): Allow argument to be
+       a long name.
+       * lib/unictype/joiningtype_byname.c: Include <string.h>,
+       unictype/joiningtype_byname.h.
+       (uc_joiningtype_class_byname): Use uc_joining_type_lookup.
+       * lib/unictype/joiningtype_byname.gperf: New file.
+       * modules/unictype/joiningtype-byname (Files): Add
+       lib/unictype/joiningtype_byname.gperf.
+       (Depends-on): Add gperf.
+       (Makefile.am): Add rule for generating unictype/joiningtype_byname.h.
+       * tests/unictype/test-joiningtype_byname.c (main): Test the recognition of
+       long names.
+
        Tests for module 'unictype/joiningtype-longname'.
        * modules/unictype/joiningtype-longname-tests: New file.
        * tests/unictype/test-joiningtype_longname.c: New file.
index dd55036..19faea1 100644 (file)
@@ -419,7 +419,8 @@ extern const char *
 extern const char *
        uc_joining_type_long_name (int joining_type);
 
-/* Return the joining type given by name, e.g. "D".  */
+/* Return the joining type given by name, e.g. "D", or by long name, e.g.
+   "Dual Joining".  */
 extern int
        uc_joining_type_byname (const char *joining_type_name);
 
index c618a7e..11b4f69 100644 (file)
 /* Specification.  */
 #include "unictype.h"
 
+#include <string.h>
+
+#include "unictype/joiningtype_byname.h"
+
 int
 uc_joining_type_byname (const char *joining_type_name)
 {
-  if (joining_type_name[0] != '\0' && joining_type_name[1] == '\0')
-    switch (joining_type_name[0])
+  size_t len;
+
+  len = strlen (joining_type_name);
+  if (len <= MAX_WORD_LENGTH)
+    {
+      char buf[MAX_WORD_LENGTH + 1];
+      const struct named_joining_type *found;
+
+      /* Copy joining_type_name into buf, converting '_' and '-' to ' '.  */
       {
-      case 'C':
-        return UC_JOINING_TYPE_C;
-      case 'D':
-        return UC_JOINING_TYPE_D;
-      case 'L':
-        return UC_JOINING_TYPE_L;
-      case 'R':
-        return UC_JOINING_TYPE_R;
-      case 'T':
-        return UC_JOINING_TYPE_T;
-      case 'U':
-        return UC_JOINING_TYPE_U;
+        const char *p = joining_type_name;
+        char *q = buf;
+
+        for (;; p++, q++)
+          {
+            char c = *p;
+
+            if (c == '_' || c == '-')
+              c = ' ';
+            *q = c;
+            if (c == '\0')
+              break;
+          }
       }
+      /* Here q == buf + len.  */
+
+      /* Do a hash table lookup, with case-insensitive comparison.  */
+      found = uc_joining_type_lookup (buf, len);
+      if (found != NULL)
+        return found->joining_type;
+    }
   /* Invalid joining type name.  */
   return -1;
 }
diff --git a/lib/unictype/joiningtype_byname.gperf b/lib/unictype/joiningtype_byname.gperf
new file mode 100644 (file)
index 0000000..6b1e7d7
--- /dev/null
@@ -0,0 +1,30 @@
+/* Arabic joining type of Unicode characters.  */
+struct named_joining_type { int name; int joining_type; };
+%struct-type
+%ignore-case
+%language=ANSI-C
+%define hash-function-name joining_type_hash
+%define lookup-function-name uc_joining_type_lookup
+%readonly-tables
+%global-table
+%define word-array-name joining_type_names
+%pic
+%define string-pool-name joining_type_stringpool
+%%
+C, UC_JOINING_TYPE_C
+D, UC_JOINING_TYPE_D
+L, UC_JOINING_TYPE_L
+R, UC_JOINING_TYPE_R
+T, UC_JOINING_TYPE_T
+U, UC_JOINING_TYPE_U
+Join Causing, UC_JOINING_TYPE_C
+JoinCausing, UC_JOINING_TYPE_C
+Dual Joining, UC_JOINING_TYPE_D
+DualJoining, UC_JOINING_TYPE_D
+Left Joining, UC_JOINING_TYPE_L
+LeftJoining, UC_JOINING_TYPE_L
+Right Joining, UC_JOINING_TYPE_R
+RightJoining, UC_JOINING_TYPE_R
+Transparent, UC_JOINING_TYPE_T
+Non Joining, UC_JOINING_TYPE_U
+NonJoining, UC_JOINING_TYPE_U
index 1f6931b..cff0f91 100644 (file)
@@ -3,9 +3,11 @@ Find a Unicode character Arabic joining type, given its name.
 
 Files:
 lib/unictype/joiningtype_byname.c
+lib/unictype/joiningtype_byname.gperf
 
 Depends-on:
 unictype/base
+gperf
 
 configure.ac:
 gl_LIBUNISTRING_MODULE([0.9.4], [unictype/joiningtype-byname])
@@ -15,6 +17,14 @@ if LIBUNISTRING_COMPILE_UNICTYPE_JOININGTYPE_BYNAME
 lib_SOURCES += unictype/joiningtype_byname.c
 endif
 
+unictype/joiningtype_byname.h: unictype/joiningtype_byname.gperf
+       $(GPERF) -m 10 $(srcdir)/unictype/joiningtype_byname.gperf > $(srcdir)/unictype/joiningtype_byname.h-t
+       mv $(srcdir)/unictype/joiningtype_byname.h-t $(srcdir)/unictype/joiningtype_byname.h
+BUILT_SOURCES        += unictype/joiningtype_byname.h
+MOSTLYCLEANFILES     += unictype/joiningtype_byname.h-t
+MAINTAINERCLEANFILES += unictype/joiningtype_byname.h
+EXTRA_DIST           += unictype/joiningtype_byname.h
+
 Include:
 "unictype.h"
 
index 900b38a..1065bff 100644 (file)
@@ -33,6 +33,30 @@ main ()
   ASSERT (uc_joining_type_byname ("L") == UC_JOINING_TYPE_L);
   ASSERT (uc_joining_type_byname ("R") == UC_JOINING_TYPE_R);
   ASSERT (uc_joining_type_byname ("D") == UC_JOINING_TYPE_D);
+
+  ASSERT (uc_joining_type_byname ("JOIN CAUSING") == UC_JOINING_TYPE_C);
+  ASSERT (uc_joining_type_byname ("Join Causing") == UC_JOINING_TYPE_C);
+  ASSERT (uc_joining_type_byname ("Join_Causing") == UC_JOINING_TYPE_C);
+  ASSERT (uc_joining_type_byname ("JoinCausing") == UC_JOINING_TYPE_C);
+  ASSERT (uc_joining_type_byname ("DUAL JOINING") == UC_JOINING_TYPE_D);
+  ASSERT (uc_joining_type_byname ("Dual Joining") == UC_JOINING_TYPE_D);
+  ASSERT (uc_joining_type_byname ("Dual_Joining") == UC_JOINING_TYPE_D);
+  ASSERT (uc_joining_type_byname ("DualJoining") == UC_JOINING_TYPE_D);
+  ASSERT (uc_joining_type_byname ("LEFT JOINING") == UC_JOINING_TYPE_L);
+  ASSERT (uc_joining_type_byname ("Left Joining") == UC_JOINING_TYPE_L);
+  ASSERT (uc_joining_type_byname ("Left_Joining") == UC_JOINING_TYPE_L);
+  ASSERT (uc_joining_type_byname ("LeftJoining") == UC_JOINING_TYPE_L);
+  ASSERT (uc_joining_type_byname ("RIGHT JOINING") == UC_JOINING_TYPE_R);
+  ASSERT (uc_joining_type_byname ("Right Joining") == UC_JOINING_TYPE_R);
+  ASSERT (uc_joining_type_byname ("Right_Joining") == UC_JOINING_TYPE_R);
+  ASSERT (uc_joining_type_byname ("RightJoining") == UC_JOINING_TYPE_R);
+  ASSERT (uc_joining_type_byname ("TRANSPARENT") == UC_JOINING_TYPE_T);
+  ASSERT (uc_joining_type_byname ("Transparent") == UC_JOINING_TYPE_T);
+  ASSERT (uc_joining_type_byname ("NON JOINING") == UC_JOINING_TYPE_U);
+  ASSERT (uc_joining_type_byname ("Non Joining") == UC_JOINING_TYPE_U);
+  ASSERT (uc_joining_type_byname ("Non_Joining") == UC_JOINING_TYPE_U);
+  ASSERT (uc_joining_type_byname ("NonJoining") == UC_JOINING_TYPE_U);
+
   ASSERT (uc_joining_type_byname ("X") < 0);
   ASSERT (uc_joining_type_byname ("") < 0);