Add context arguments to u*_casemap functions.
authorBruno Haible <bruno@clisp.org>
Mon, 29 Jun 2009 21:51:02 +0000 (23:51 +0200)
committerBruno Haible <bruno@clisp.org>
Mon, 29 Jun 2009 21:51:02 +0000 (23:51 +0200)
ChangeLog
lib/unicase/u-casemap.h
lib/unicase/u16-casemap.c
lib/unicase/u32-casemap.c
lib/unicase/u8-casemap.c
lib/unicase/unicasemap.h
modules/unicase/u16-casemap
modules/unicase/u32-casemap
modules/unicase/u8-casemap

index 204a0d1..e5624fd 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
 2009-06-29  Bruno Haible  <bruno@clisp.org>
 
+       Add context arguments to u*_casemap functions.
+       * lib/unicase/unicasemap.h: Include unicase.h.
+       (u8_casemap, u16_casemap, u32_casemap): Add prefix_context and
+       suffix_context arguments.
+       * lib/unicase/u-casemap.h (is_cased, is_case_ignorable): Remove
+       functions.
+       (FUNC): Add prefix_context and suffix_context arguments. Use
+       uc_is_cased and uc_is_case_ignorable.
+       * lib/unicase/u8-casemap.c: Include caseprop.h and context.h.
+       * lib/unicase/u16-casemap.c: Likewise.
+       * lib/unicase/u32-casemap.c: Likewise.
+       * modules/unicase/u8-casemap (Files): Add lib/unicase/context.h.
+       (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+       * modules/unicase/u16-casemap (Files): Add lib/unicase/context.h.
+       (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+       * modules/unicase/u32-casemap (Files): Add lib/unicase/context.h.
+       (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+
        New module 'unicase/u32-suffix-context'.
        * lib/unicase/u32-suffix-context.c: New file.
        * modules/unicase/u32-suffix-context: New file.
index 760fca7..d904eb4 100644 (file)
    You should have received a copy of the GNU Lesser General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
-/* Quoting the Unicode standard:
-     Definition: A character is defined to be "cased" if it has the Lowercase or
-     Uppercase property or has a General_Category value of Titlecase_Letter.  */
-static inline bool
-is_cased (ucs4_t uc)
-{
-  return (uc_is_property_lowercase (uc)
-         || uc_is_property_uppercase (uc)
-         || uc_is_general_category (uc, UC_TITLECASE_LETTER));
-}
-
-/* Quoting the Unicode standard:
-     Definition: A character is defined to be "case-ignorable" if it has the
-     value MidLetter {or the value MidNumLet} for the Word_Break property or
-     its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
-     Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
-   The text marked in braces was added in Unicode 5.1.0, see
-   <http://www.unicode.org/versions/Unicode5.1.0/> section "Update of
-   Definition of case-ignorable".   */
-static inline bool
-is_case_ignorable (ucs4_t uc)
-{
-  int wbp = uc_wordbreak_property (uc);
-
-  return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET
-         || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn
-                                                  | UC_CATEGORY_MASK_Me
-                                                  | UC_CATEGORY_MASK_Cf
-                                                  | UC_CATEGORY_MASK_Lm
-                                                  | UC_CATEGORY_MASK_Sk));
-}
-
 UNIT *
-FUNC (const UNIT *s, size_t n, const char *iso639_language,
+FUNC (const UNIT *s, size_t n,
+      casing_prefix_context_t prefix_context,
+      casing_suffix_context_t suffix_context,
+      const char *iso639_language,
       ucs4_t (*single_character_map) (ucs4_t),
       size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
       uninorm_t nf,
@@ -77,11 +48,13 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
 
     /* Helper for evaluating the FINAL_SIGMA condition:
        Last character that was not case-ignorable.  */
-    ucs4_t last_char_except_ignorable = 0xFFFD;
+    ucs4_t last_char_except_ignorable =
+      prefix_context.last_char_except_ignorable;
 
     /* Helper for evaluating the AFTER_SOFT_DOTTED and AFTER_I conditions:
        Last character that was of combining class 230 ("Above") or 0.  */
-    ucs4_t last_char_normal_or_above = 0xFFFD;
+    ucs4_t last_char_normal_or_above =
+      prefix_context.last_char_normal_or_above;
 
     while (s < s_end)
       {
@@ -134,23 +107,31 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
                           consisting of a case-ignorable sequence and then a
                           cased letter.  */
                        /* Test the "before" condition.  */
-                       applies = is_cased (last_char_except_ignorable);
+                       applies = uc_is_cased (last_char_except_ignorable);
                        /* Test the "after" condition.  */
                        if (applies)
                          {
                            const UNIT *s2 = s + count;
-                           while (s2 < s_end)
+                           for (;;)
                              {
-                               ucs4_t uc2;
-                               int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-                               if (is_cased (uc2))
+                               if (s2 < s_end)
                                  {
-                                   applies = false;
+                                   ucs4_t uc2;
+                                   int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+                                   if (uc_is_cased (uc2))
+                                     {
+                                       applies = false;
+                                       break;
+                                     }
+                                   if (!uc_is_case_ignorable (uc2))
+                                     break;
+                                   s2 += count2;
+                                 }
+                               else
+                                 {
+                                   applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0);
                                    break;
                                  }
-                               if (!is_case_ignorable (uc2))
-                                 break;
-                               s2 += count2;
                              }
                          }
                        break;
@@ -171,19 +152,27 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
                        {
                          const UNIT *s2 = s + count;
                          applies = false;
-                         while (s2 < s_end)
+                         for (;;)
                            {
-                             ucs4_t uc2;
-                             int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-                             int ccc = uc_combining_class (uc2);
-                             if (ccc == UC_CCC_A)
+                             if (s2 < s_end)
                                {
-                                 applies = true;
+                                 ucs4_t uc2;
+                                 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+                                 int ccc = uc_combining_class (uc2);
+                                 if (ccc == UC_CCC_A)
+                                   {
+                                     applies = true;
+                                     break;
+                                   }
+                                 if (ccc == UC_CCC_NR)
+                                   break;
+                                 s2 += count2;
+                               }
+                             else
+                               {
+                                 applies = ((suffix_context.bits & SCC_MORE_ABOVE_MASK) != 0);
                                  break;
                                }
-                             if (ccc == UC_CCC_NR)
-                               break;
-                             s2 += count2;
                            }
                        }
                        break;
@@ -198,21 +187,29 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
                        {
                          const UNIT *s2 = s + count;
                          applies = false;
-                         while (s2 < s_end)
+                         for (;;)
                            {
-                             ucs4_t uc2;
-                             int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-                             if (uc2 == 0x0307) /* COMBINING DOT ABOVE */
+                             if (s2 < s_end)
                                {
-                                 applies = true;
-                                 break;
+                                 ucs4_t uc2;
+                                 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+                                 if (uc2 == 0x0307) /* COMBINING DOT ABOVE */
+                                   {
+                                     applies = true;
+                                     break;
+                                   }
+                                 {
+                                   int ccc = uc_combining_class (uc2);
+                                   if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
+                                     break;
+                                 }
+                                 s2 += count2;
                                }
-                             {
-                               int ccc = uc_combining_class (uc2);
-                               if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
+                             else
+                               {
+                                 applies = ((suffix_context.bits & SCC_BEFORE_DOT_MASK) != 0);
                                  break;
-                             }
-                             s2 += count2;
+                               }
                            }
                        }
                        break;
@@ -354,7 +351,7 @@ FUNC (const UNIT *s, size_t n, const char *iso639_language,
            }
        }
 
-       if (!is_case_ignorable (uc))
+       if (!uc_is_case_ignorable (uc))
          last_char_except_ignorable = uc;
 
        {
index 4221aaa..1b1952e 100644 (file)
@@ -28,6 +28,8 @@
 #include "unictype.h"
 #include "uniwbrk.h"
 #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
 #include "special-casing.h"
 
 #define FUNC u16_casemap
index 084f8f6..f628493 100644 (file)
@@ -28,6 +28,8 @@
 #include "unictype.h"
 #include "uniwbrk.h"
 #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
 #include "special-casing.h"
 
 #define FUNC u32_casemap
index 96268a3..52c8f45 100644 (file)
@@ -28,6 +28,8 @@
 #include "unictype.h"
 #include "uniwbrk.h"
 #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
 #include "special-casing.h"
 
 #define FUNC u8_casemap
index 4581cd6..8da8c51 100644 (file)
 #include <stddef.h>
 
 #include "unitypes.h"
+#include "unicase.h"
 #include "uninorm.h"
 
 extern uint8_t *
-       u8_casemap (const uint8_t *s, size_t n, const char *iso639_language,
+       u8_casemap (const uint8_t *s, size_t n,
+                  casing_prefix_context_t prefix_context,
+                  casing_suffix_context_t suffix_context,
+                  const char *iso639_language,
                   ucs4_t (*single_character_map) (ucs4_t),
                   size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
                   uninorm_t nf,
                   uint8_t *resultbuf, size_t *lengthp);
 
 extern uint16_t *
-       u16_casemap (const uint16_t *s, size_t n, const char *iso639_language,
+       u16_casemap (const uint16_t *s, size_t n,
+                   casing_prefix_context_t prefix_context,
+                   casing_suffix_context_t suffix_context,
+                   const char *iso639_language,
                    ucs4_t (*single_character_map) (ucs4_t),
                    size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
                    uninorm_t nf,
                    uint16_t *resultbuf, size_t *lengthp);
 
 extern uint32_t *
-       u32_casemap (const uint32_t *s, size_t n, const char *iso639_language,
+       u32_casemap (const uint32_t *s, size_t n,
+                   casing_prefix_context_t prefix_context,
+                   casing_suffix_context_t suffix_context,
+                   const char *iso639_language,
                    ucs4_t (*single_character_map) (ucs4_t),
                    size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
                    uninorm_t nf,
index 5e3910c..cb2e6c4 100644 (file)
@@ -5,17 +5,14 @@ Files:
 lib/unicase/unicasemap.h
 lib/unicase/u16-casemap.c
 lib/unicase/u-casemap.h
+lib/unicase/context.h
 
 Depends-on:
 unicase/base
+unicase/cased
+unicase/ignorable
 unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
 unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
 unictype/property-soft-dotted
 unistr/u16-mbtouc-unsafe
 unistr/u16-uctomb
index f2b6345..4285d1c 100644 (file)
@@ -5,17 +5,14 @@ Files:
 lib/unicase/unicasemap.h
 lib/unicase/u32-casemap.c
 lib/unicase/u-casemap.h
+lib/unicase/context.h
 
 Depends-on:
 unicase/base
+unicase/cased
+unicase/ignorable
 unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
 unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
 unictype/property-soft-dotted
 unistr/u32-mbtouc-unsafe
 unistr/u32-uctomb
index a84e479..3c482da 100644 (file)
@@ -5,17 +5,14 @@ Files:
 lib/unicase/unicasemap.h
 lib/unicase/u8-casemap.c
 lib/unicase/u-casemap.h
+lib/unicase/context.h
 
 Depends-on:
 unicase/base
+unicase/cased
+unicase/ignorable
 unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
 unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
 unictype/property-soft-dotted
 unistr/u8-mbtouc-unsafe
 unistr/u8-uctomb