New module 'unictype/category-longname'.
authorBruno Haible <bruno@clisp.org>
Fri, 25 Mar 2011 22:14:10 +0000 (23:14 +0100)
committerBruno Haible <bruno@clisp.org>
Fri, 25 Mar 2011 22:14:10 +0000 (23:14 +0100)
* lib/unictype.in.h (uc_general_category_long_name): New declaration.
* lib/unictype/categ_longname.c: New file.
* modules/unictype/category-longname: New file.
* modules/unictype/category-all (Depends-on): Add it.

ChangeLog
lib/unictype.in.h
lib/unictype/categ_longname.c [new file with mode: 0644]
modules/unictype/category-all
modules/unictype/category-longname [new file with mode: 0644]

index a1da946..b33f027 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
 2011-03-25  Bruno Haible  <bruno@clisp.org>
 
+       New module 'unictype/category-longname'.
+       * lib/unictype.in.h (uc_general_category_long_name): New declaration.
+       * lib/unictype/categ_longname.c: New file.
+       * modules/unictype/category-longname: New file.
+       * modules/unictype/category-all (Depends-on): Add it.
+
+2011-03-25  Bruno Haible  <bruno@clisp.org>
+
        Tests for module 'unictype/category-LC'.
        * modules/unictype/category-LC-tests: New file.
        * tests/unictype/test-categ_LC.c: New file, automatically generated.
index b25a461..ebaa616 100644 (file)
@@ -207,6 +207,10 @@ extern uc_general_category_t
 extern const char *
        uc_general_category_name (uc_general_category_t category);
 
+/* Return the long name of a general category.  */
+extern const char *
+       uc_general_category_long_name (uc_general_category_t category);
+
 /* Return the general category given by name, e.g. "Lu".  */
 extern uc_general_category_t
        uc_general_category_byname (const char *category_name);
diff --git a/lib/unictype/categ_longname.c b/lib/unictype/categ_longname.c
new file mode 100644 (file)
index 0000000..58a653b
--- /dev/null
@@ -0,0 +1,106 @@
+/* Categories of Unicode characters.
+   Copyright (C) 2002, 2006-2007, 2011 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2011.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "unictype.h"
+
+static const char u_category_long_name[30][22] =
+{
+  "Uppercase Letter",
+  "Lowercase Letter",
+  "Titlecase Letter",
+  "Modifier Letter",
+  "Other Letter",
+  "Nonspacing Mark",
+  "Spacing Mark",
+  "Enclosing Mark",
+  "Decimal Number",
+  "Letter Number",
+  "Other Number",
+  "Connector Punctuation",
+  "Dash Punctuation",
+  "Open Punctuation",
+  "Close Punctuation",
+  "Initial Punctuation",
+  "Final Punctuation",
+  "Other Punctuation",
+  "Math Symbol",
+  "Currency Symbol",
+  "Modifier Symbol",
+  "Other Symbol",
+  "Space Separator",
+  "Line Separator",
+  "Paragraph Separator",
+  "Control",
+  "Format",
+  "Surrogate",
+  "Private Use",
+  "Unassigned"
+};
+
+const char *
+uc_general_category_long_name (uc_general_category_t category)
+{
+  uint32_t bitmask = category.bitmask;
+  /* bitmask should consist of a single bit.  */
+  if (bitmask != 0)
+    {
+      if ((bitmask & (bitmask - 1)) == 0)
+        {
+          int bit;
+          /* Take log2 using a variant of Robert Harley's method.
+             Found by Bruno Haible 1996.  */
+          uint32_t n = bitmask;
+          static const char ord2_tab[64] =
+            {
+              -1,  0,  1, 12,  2,  6, -1, 13,  3, -1,  7, -1, -1, -1, -1, 14,
+              10,  4, -1, -1,  8, -1, -1, 25, -1, -1, -1, -1, -1, 21, 27, 15,
+              31, 11,  5, -1, -1, -1, -1, -1,  9, -1, -1, 24, -1, -1, 20, 26,
+              30, -1, -1, -1, -1, 23, -1, 19, 29, -1, 22, 18, 28, 17, 16, -1
+            };
+          n += n << 4;
+          n += n << 6;
+          n = (n << 16) - n;
+          bit = ord2_tab[n >> 26];
+
+          if (bit < sizeof (u_category_long_name) / sizeof (u_category_long_name[0]))
+            return u_category_long_name[bit];
+        }
+      else
+        {
+          if (bitmask == UC_CATEGORY_MASK_L)
+            return "Letter";
+          if (bitmask == UC_CATEGORY_MASK_LC)
+            return "Cased Letter";
+          if (bitmask == UC_CATEGORY_MASK_M)
+            return "Mark";
+          if (bitmask == UC_CATEGORY_MASK_N)
+            return "Number";
+          if (bitmask == UC_CATEGORY_MASK_P)
+            return "Punctuation";
+          if (bitmask == UC_CATEGORY_MASK_S)
+            return "Symbol";
+          if (bitmask == UC_CATEGORY_MASK_Z)
+            return "Separator";
+          if (bitmask == UC_CATEGORY_MASK_C)
+            return "Other";
+        }
+    }
+  return NULL;
+}
index 76ea162..2a5cbac 100644 (file)
@@ -45,6 +45,7 @@ unictype/category-Zs
 unictype/category-and
 unictype/category-and-not
 unictype/category-byname
+unictype/category-longname
 unictype/category-name
 unictype/category-of
 unictype/category-or
diff --git a/modules/unictype/category-longname b/modules/unictype/category-longname
new file mode 100644 (file)
index 0000000..76db6d9
--- /dev/null
@@ -0,0 +1,25 @@
+Description:
+Name of Unicode character category.
+
+Files:
+lib/unictype/categ_longname.c
+
+Depends-on:
+unictype/base
+
+configure.ac:
+gl_LIBUNISTRING_MODULE([0.9.4], [unictype/category-longname])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNICTYPE_CATEGORY_LONGNAME
+lib_SOURCES += unictype/categ_longname.c
+endif
+
+Include:
+"unictype.h"
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible