New module 'unictype/combining-class-byname'.
authorBruno Haible <bruno@clisp.org>
Sat, 26 Mar 2011 01:39:24 +0000 (02:39 +0100)
committerBruno Haible <bruno@clisp.org>
Sat, 26 Mar 2011 01:39:24 +0000 (02:39 +0100)
* lib/unictype.in.h (uc_combining_class_byname): New declaration.
* lib/unictype/combiningclass_byname.c: New file.
* lib/unictype/combiningclass_byname.gperf: New file.
* modules/unictype/combining-class-byname: New file.

ChangeLog
lib/unictype.in.h
lib/unictype/combiningclass_byname.c [new file with mode: 0644]
lib/unictype/combiningclass_byname.gperf [new file with mode: 0644]
modules/unictype/combining-class-byname [new file with mode: 0644]

index c67be25..7dd9ef1 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2011-03-25  Bruno Haible  <bruno@clisp.org>
 
+       New module 'unictype/combining-class-byname'.
+       * lib/unictype.in.h (uc_combining_class_byname): New declaration.
+       * lib/unictype/combiningclass_byname.c: New file.
+       * lib/unictype/combiningclass_byname.gperf: New file.
+       * modules/unictype/combining-class-byname: New file.
+
        Tests for module 'unictype/combining-class-longname'.
        * modules/unictype/combining-class-longname-tests: New file.
        * tests/unictype/test-combiningclass_longname.c: New file.
index 293e18f..ce0957a 100644 (file)
@@ -272,6 +272,11 @@ extern const char *
 extern const char *
        uc_combining_class_long_name (int ccc);
 
+/* Return the canonical combining class given by name, e.g. "BL", or by long
+   name, e.g. "Below Left".  */
+extern int
+       uc_combining_class_byname (const char *ccc_name);
+
 /* ========================================================================= */
 
 /* Field 4 of Unicode Character Database: Bidi class.
diff --git a/lib/unictype/combiningclass_byname.c b/lib/unictype/combiningclass_byname.c
new file mode 100644 (file)
index 0000000..3880523
--- /dev/null
@@ -0,0 +1,63 @@
+/* Canonical combining classes of Unicode characters.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2011.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "unictype.h"
+
+#include <string.h>
+
+#include "unictype/combiningclass_byname.h"
+
+int
+uc_combining_class_byname (const char *ccc_name)
+{
+  size_t len;
+
+  len = strlen (ccc_name);
+  if (len <= MAX_WORD_LENGTH)
+    {
+      char buf[MAX_WORD_LENGTH + 1];
+      const struct named_combining_class *found;
+
+      /* Copy ccc_name into buf, converting '_' and '-' to ' '.  */
+      {
+        const char *p = ccc_name;
+        char *q = buf;
+
+        for (;; p++, q++)
+          {
+            char c = *p;
+
+            if (c == '_' || c == '-')
+              c = ' ';
+            *q = c;
+            if (c == '\0')
+              break;
+          }
+      }
+      /* Here q == buf + len.  */
+
+      /* Do a hash table lookup, with case-insensitive comparison.  */
+      found = uc_combining_class_lookup (buf, len);
+      if (found != NULL)
+        return found->combining_class;
+    }
+  /* Invalid combining class name.  */
+  return -1;
+}
diff --git a/lib/unictype/combiningclass_byname.gperf b/lib/unictype/combiningclass_byname.gperf
new file mode 100644 (file)
index 0000000..4096850
--- /dev/null
@@ -0,0 +1,66 @@
+/* Categories of Unicode characters.  */
+struct named_combining_class { int name; int combining_class; };
+%struct-type
+%ignore-case
+%language=ANSI-C
+%define hash-function-name combining_class_hash
+%define lookup-function-name uc_combining_class_lookup
+%readonly-tables
+%global-table
+%define word-array-name combining_class_names
+%pic
+%define string-pool-name combining_class_stringpool
+%%
+NR, UC_CCC_NR
+OV, UC_CCC_OV
+NK, UC_CCC_NK
+KV, UC_CCC_KV
+VR, UC_CCC_VR
+ATBL, UC_CCC_ATBL
+ATB, UC_CCC_ATB
+ATA, UC_CCC_ATA
+ATAR, UC_CCC_ATAR
+BL, UC_CCC_BL
+B, UC_CCC_B
+BR, UC_CCC_BR
+L, UC_CCC_L
+R, UC_CCC_R
+AL, UC_CCC_AL
+A, UC_CCC_A
+AR, UC_CCC_AR
+DB, UC_CCC_DB
+DA, UC_CCC_DA
+IS, UC_CCC_IS
+Not Reordered, UC_CCC_NR
+NotReordered, UC_CCC_NR
+Overlay, UC_CCC_OV
+Nukta, UC_CCC_NK
+Kana Voicing, UC_CCC_KV
+KanaVoicing, UC_CCC_KV
+Virama, UC_CCC_VR
+Attached Below Left, UC_CCC_ATBL
+AttachedBelowLeft, UC_CCC_ATBL
+Attached Below, UC_CCC_ATB
+AttachedBelow, UC_CCC_ATB
+Attached Above, UC_CCC_ATA
+AttachedAbove, UC_CCC_ATA
+Attached Above Right, UC_CCC_ATAR
+AttachedAboveRight, UC_CCC_ATAR
+Below Left, UC_CCC_BL
+BelowLeft, UC_CCC_BL
+Below, UC_CCC_B
+Below Right, UC_CCC_BR
+BelowRight, UC_CCC_BR
+Left, UC_CCC_L
+Right, UC_CCC_R
+Above Left, UC_CCC_AL
+AboveLeft, UC_CCC_AL
+Above, UC_CCC_A
+Above Right, UC_CCC_AR
+AboveRight, UC_CCC_AR
+Double Below, UC_CCC_DB
+DoubleBelow, UC_CCC_DB
+Double Above, UC_CCC_DA
+DoubleAbove, UC_CCC_DA
+Iota Subscript, UC_CCC_IS
+IotaSubscript, UC_CCC_IS
diff --git a/modules/unictype/combining-class-byname b/modules/unictype/combining-class-byname
new file mode 100644 (file)
index 0000000..a0524b8
--- /dev/null
@@ -0,0 +1,35 @@
+Description:
+Find a Unicode character canonical combining class, given its name.
+
+Files:
+lib/unictype/combiningclass_byname.c
+lib/unictype/combiningclass_byname.gperf
+
+Depends-on:
+unictype/base
+gperf
+
+configure.ac:
+gl_LIBUNISTRING_MODULE([0.9.4], [unictype/combining-class-byname])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNICTYPE_COMBINING_CLASS_BYNAME
+lib_SOURCES += unictype/combiningclass_byname.c
+endif
+
+unictype/combiningclass_byname.h: unictype/combiningclass_byname.gperf
+       $(GPERF) -m 10 $(srcdir)/unictype/combiningclass_byname.gperf > $(srcdir)/unictype/combiningclass_byname.h-t
+       mv $(srcdir)/unictype/combiningclass_byname.h-t $(srcdir)/unictype/combiningclass_byname.h
+BUILT_SOURCES        += unictype/combiningclass_byname.h
+MOSTLYCLEANFILES     += unictype/combiningclass_byname.h-t
+MAINTAINERCLEANFILES += unictype/combiningclass_byname.h
+EXTRA_DIST           += unictype/combiningclass_byname.h
+
+Include:
+"unictype.h"
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible