From 27819fa43544f9f2ddce39f689d5a9faf9a0368e Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sat, 26 Mar 2011 14:10:30 +0100 Subject: [PATCH] unictype/joininggroup-byname: Allow hyphens, omitted word separators. * lib/unictype/joininggroup_byname.c (uc_joining_group_byname): Convert also hyphens to space. * lib/unictype/joininggroup_byname.gperf: Recognize the names also without spaces. * tests/unictype/test-joininggroup_byname.c (main): Add more tests. --- ChangeLog | 9 +++++++++ lib/unictype/joininggroup_byname.c | 4 ++-- lib/unictype/joininggroup_byname.gperf | 15 +++++++++++++++ tests/unictype/test-joininggroup_byname.c | 17 +++++++++++++++++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5742cd3f3..afcd124e5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,14 @@ 2011-03-26 Bruno Haible + unictype/joininggroup-byname: Allow hyphens, omitted word separators. + * lib/unictype/joininggroup_byname.c (uc_joining_group_byname): Convert + also hyphens to space. + * lib/unictype/joininggroup_byname.gperf: Recognize the names also + without spaces. + * tests/unictype/test-joininggroup_byname.c (main): Add more tests. + +2011-03-26 Bruno Haible + unictype/joiningtype-byname: Recognize long names as well. * lib/unictype.in.h (uc_joiningtype_class_byname): Allow argument to be a long name. diff --git a/lib/unictype/joininggroup_byname.c b/lib/unictype/joininggroup_byname.c index ca9f1a659..063ec2df5 100644 --- a/lib/unictype/joininggroup_byname.c +++ b/lib/unictype/joininggroup_byname.c @@ -35,7 +35,7 @@ uc_joining_group_byname (const char *joining_group_name) char buf[MAX_WORD_LENGTH + 1]; const struct named_joining_group *found; - /* Copy joining_group_name into buf, converting '_' to ' '. */ + /* Copy joining_group_name into buf, converting '_' and '-' to ' '. */ { const char *p = joining_group_name; char *q = buf; @@ -44,7 +44,7 @@ uc_joining_group_byname (const char *joining_group_name) { char c = *p; - if (c == '_') + if (c == '_' || c == '-') c = ' '; *q = c; if (c == '\0') diff --git a/lib/unictype/joininggroup_byname.gperf b/lib/unictype/joininggroup_byname.gperf index 518d1c9cd..bc2fbc8d9 100644 --- a/lib/unictype/joininggroup_byname.gperf +++ b/lib/unictype/joininggroup_byname.gperf @@ -12,30 +12,37 @@ struct named_joining_group { int name; int joining_group; }; %define string-pool-name joining_group_stringpool %% No Joining Group, UC_JOINING_GROUP_NONE +NoJoiningGroup, UC_JOINING_GROUP_NONE Ain, UC_JOINING_GROUP_AIN Alaph, UC_JOINING_GROUP_ALAPH Alef, UC_JOINING_GROUP_ALEF Beh, UC_JOINING_GROUP_BEH Beth, UC_JOINING_GROUP_BETH Burushaski Yeh Barree, UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE +BurushaskiYehBarree, UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE Dal, UC_JOINING_GROUP_DAL Dalath Rish, UC_JOINING_GROUP_DALATH_RISH +DalathRish, UC_JOINING_GROUP_DALATH_RISH E, UC_JOINING_GROUP_E Farsi Yeh, UC_JOINING_GROUP_FARSI_YEH +FarsiYeh, UC_JOINING_GROUP_FARSI_YEH Fe, UC_JOINING_GROUP_FE Feh, UC_JOINING_GROUP_FEH Final Semkath, UC_JOINING_GROUP_FINAL_SEMKATH +FinalSemkath, UC_JOINING_GROUP_FINAL_SEMKATH Gaf, UC_JOINING_GROUP_GAF Gamal, UC_JOINING_GROUP_GAMAL Hah, UC_JOINING_GROUP_HAH He, UC_JOINING_GROUP_HE Heh, UC_JOINING_GROUP_HEH Heh Goal, UC_JOINING_GROUP_HEH_GOAL +HehGoal, UC_JOINING_GROUP_HEH_GOAL Heth, UC_JOINING_GROUP_HETH Kaf, UC_JOINING_GROUP_KAF Kaph, UC_JOINING_GROUP_KAPH Khaph, UC_JOINING_GROUP_KHAPH Knotted Heh, UC_JOINING_GROUP_KNOTTED_HEH +KnottedHeh, UC_JOINING_GROUP_KNOTTED_HEH Lam, UC_JOINING_GROUP_LAM Lamadh, UC_JOINING_GROUP_LAMADH Meem, UC_JOINING_GROUP_MEEM @@ -48,23 +55,31 @@ Qaf, UC_JOINING_GROUP_QAF Qaph, UC_JOINING_GROUP_QAPH Reh, UC_JOINING_GROUP_REH Reversed Pe, UC_JOINING_GROUP_REVERSED_PE +ReversedPe, UC_JOINING_GROUP_REVERSED_PE Sad, UC_JOINING_GROUP_SAD Sadhe, UC_JOINING_GROUP_SADHE Seen, UC_JOINING_GROUP_SEEN Semkath, UC_JOINING_GROUP_SEMKATH Shin, UC_JOINING_GROUP_SHIN Swash Kaf, UC_JOINING_GROUP_SWASH_KAF +SwashKaf, UC_JOINING_GROUP_SWASH_KAF Syriac Waw, UC_JOINING_GROUP_SYRIAC_WAW +SyriacWaw, UC_JOINING_GROUP_SYRIAC_WAW Tah, UC_JOINING_GROUP_TAH Taw, UC_JOINING_GROUP_TAW Teh Marbuta, UC_JOINING_GROUP_TEH_MARBUTA +TehMarbuta, UC_JOINING_GROUP_TEH_MARBUTA Teh Marbuta Goal, UC_JOINING_GROUP_TEH_MARBUTA_GOAL +TehMarbutaGoal, UC_JOINING_GROUP_TEH_MARBUTA_GOAL Teth, UC_JOINING_GROUP_TETH Waw, UC_JOINING_GROUP_WAW Yeh, UC_JOINING_GROUP_YEH Yeh Barree, UC_JOINING_GROUP_YEH_BARREE +YehBarree, UC_JOINING_GROUP_YEH_BARREE Yeh with tail, UC_JOINING_GROUP_YEH_WITH_TAIL +YehWithTail, UC_JOINING_GROUP_YEH_WITH_TAIL Yudh, UC_JOINING_GROUP_YUDH Yudh He, UC_JOINING_GROUP_YUDH_HE +YudhHe, UC_JOINING_GROUP_YUDH_HE Zain, UC_JOINING_GROUP_ZAIN Zhain, UC_JOINING_GROUP_ZHAIN diff --git a/tests/unictype/test-joininggroup_byname.c b/tests/unictype/test-joininggroup_byname.c index 31b073bed..9f2119af6 100644 --- a/tests/unictype/test-joininggroup_byname.c +++ b/tests/unictype/test-joininggroup_byname.c @@ -30,6 +30,7 @@ main () ASSERT (uc_joining_group_byname ("NO JOINING GROUP") == UC_JOINING_GROUP_NONE); ASSERT (uc_joining_group_byname ("No Joining Group") == UC_JOINING_GROUP_NONE); ASSERT (uc_joining_group_byname ("No_Joining_Group") == UC_JOINING_GROUP_NONE); + ASSERT (uc_joining_group_byname ("NoJoiningGroup") == UC_JOINING_GROUP_NONE); ASSERT (uc_joining_group_byname ("AIN") == UC_JOINING_GROUP_AIN); ASSERT (uc_joining_group_byname ("Ain") == UC_JOINING_GROUP_AIN); ASSERT (uc_joining_group_byname ("ALAPH") == UC_JOINING_GROUP_ALAPH); @@ -43,15 +44,18 @@ main () ASSERT (uc_joining_group_byname ("BURUSHASKI YEH BARREE") == UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE); ASSERT (uc_joining_group_byname ("Burushaski Yeh Barree") == UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE); ASSERT (uc_joining_group_byname ("Burushaski_Yeh_Barree") == UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE); + ASSERT (uc_joining_group_byname ("BurushaskiYehBarree") == UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE); ASSERT (uc_joining_group_byname ("DAL") == UC_JOINING_GROUP_DAL); ASSERT (uc_joining_group_byname ("Dal") == UC_JOINING_GROUP_DAL); ASSERT (uc_joining_group_byname ("DALATH RISH") == UC_JOINING_GROUP_DALATH_RISH); ASSERT (uc_joining_group_byname ("Dalath Rish") == UC_JOINING_GROUP_DALATH_RISH); ASSERT (uc_joining_group_byname ("Dalath_Rish") == UC_JOINING_GROUP_DALATH_RISH); + ASSERT (uc_joining_group_byname ("DalathRish") == UC_JOINING_GROUP_DALATH_RISH); ASSERT (uc_joining_group_byname ("E") == UC_JOINING_GROUP_E); ASSERT (uc_joining_group_byname ("FARSI YEH") == UC_JOINING_GROUP_FARSI_YEH); ASSERT (uc_joining_group_byname ("Farsi Yeh") == UC_JOINING_GROUP_FARSI_YEH); ASSERT (uc_joining_group_byname ("Farsi_Yeh") == UC_JOINING_GROUP_FARSI_YEH); + ASSERT (uc_joining_group_byname ("FarsiYeh") == UC_JOINING_GROUP_FARSI_YEH); ASSERT (uc_joining_group_byname ("FE") == UC_JOINING_GROUP_FE); ASSERT (uc_joining_group_byname ("Fe") == UC_JOINING_GROUP_FE); ASSERT (uc_joining_group_byname ("FEH") == UC_JOINING_GROUP_FEH); @@ -59,6 +63,7 @@ main () ASSERT (uc_joining_group_byname ("FINAL SEMKATH") == UC_JOINING_GROUP_FINAL_SEMKATH); ASSERT (uc_joining_group_byname ("Final Semkath") == UC_JOINING_GROUP_FINAL_SEMKATH); ASSERT (uc_joining_group_byname ("Final_Semkath") == UC_JOINING_GROUP_FINAL_SEMKATH); + ASSERT (uc_joining_group_byname ("FinalSemkath") == UC_JOINING_GROUP_FINAL_SEMKATH); ASSERT (uc_joining_group_byname ("GAF") == UC_JOINING_GROUP_GAF); ASSERT (uc_joining_group_byname ("Gaf") == UC_JOINING_GROUP_GAF); ASSERT (uc_joining_group_byname ("GAMAL") == UC_JOINING_GROUP_GAMAL); @@ -72,6 +77,7 @@ main () ASSERT (uc_joining_group_byname ("HEH GOAL") == UC_JOINING_GROUP_HEH_GOAL); ASSERT (uc_joining_group_byname ("Heh Goal") == UC_JOINING_GROUP_HEH_GOAL); ASSERT (uc_joining_group_byname ("Heh_Goal") == UC_JOINING_GROUP_HEH_GOAL); + ASSERT (uc_joining_group_byname ("HehGoal") == UC_JOINING_GROUP_HEH_GOAL); ASSERT (uc_joining_group_byname ("HETH") == UC_JOINING_GROUP_HETH); ASSERT (uc_joining_group_byname ("Heth") == UC_JOINING_GROUP_HETH); ASSERT (uc_joining_group_byname ("KAF") == UC_JOINING_GROUP_KAF); @@ -83,6 +89,7 @@ main () ASSERT (uc_joining_group_byname ("KNOTTED HEH") == UC_JOINING_GROUP_KNOTTED_HEH); ASSERT (uc_joining_group_byname ("Knotted Heh") == UC_JOINING_GROUP_KNOTTED_HEH); ASSERT (uc_joining_group_byname ("Knotted_Heh") == UC_JOINING_GROUP_KNOTTED_HEH); + ASSERT (uc_joining_group_byname ("KnottedHeh") == UC_JOINING_GROUP_KNOTTED_HEH); ASSERT (uc_joining_group_byname ("LAM") == UC_JOINING_GROUP_LAM); ASSERT (uc_joining_group_byname ("Lam") == UC_JOINING_GROUP_LAM); ASSERT (uc_joining_group_byname ("LAMADH") == UC_JOINING_GROUP_LAMADH); @@ -108,6 +115,7 @@ main () ASSERT (uc_joining_group_byname ("REVERSED PE") == UC_JOINING_GROUP_REVERSED_PE); ASSERT (uc_joining_group_byname ("Reversed Pe") == UC_JOINING_GROUP_REVERSED_PE); ASSERT (uc_joining_group_byname ("Reversed_Pe") == UC_JOINING_GROUP_REVERSED_PE); + ASSERT (uc_joining_group_byname ("ReversedPe") == UC_JOINING_GROUP_REVERSED_PE); ASSERT (uc_joining_group_byname ("SAD") == UC_JOINING_GROUP_SAD); ASSERT (uc_joining_group_byname ("Sad") == UC_JOINING_GROUP_SAD); ASSERT (uc_joining_group_byname ("SADHE") == UC_JOINING_GROUP_SADHE); @@ -121,8 +129,11 @@ main () ASSERT (uc_joining_group_byname ("SWASH KAF") == UC_JOINING_GROUP_SWASH_KAF); ASSERT (uc_joining_group_byname ("Swash Kaf") == UC_JOINING_GROUP_SWASH_KAF); ASSERT (uc_joining_group_byname ("Swash_Kaf") == UC_JOINING_GROUP_SWASH_KAF); + ASSERT (uc_joining_group_byname ("SwashKaf") == UC_JOINING_GROUP_SWASH_KAF); ASSERT (uc_joining_group_byname ("SYRIAC WAW") == UC_JOINING_GROUP_SYRIAC_WAW); ASSERT (uc_joining_group_byname ("Syriac Waw") == UC_JOINING_GROUP_SYRIAC_WAW); + ASSERT (uc_joining_group_byname ("Syriac_Waw") == UC_JOINING_GROUP_SYRIAC_WAW); + ASSERT (uc_joining_group_byname ("SyriacWaw") == UC_JOINING_GROUP_SYRIAC_WAW); ASSERT (uc_joining_group_byname ("TAH") == UC_JOINING_GROUP_TAH); ASSERT (uc_joining_group_byname ("Tah") == UC_JOINING_GROUP_TAH); ASSERT (uc_joining_group_byname ("TAW") == UC_JOINING_GROUP_TAW); @@ -130,9 +141,11 @@ main () ASSERT (uc_joining_group_byname ("TEH MARBUTA") == UC_JOINING_GROUP_TEH_MARBUTA); ASSERT (uc_joining_group_byname ("Teh Marbuta") == UC_JOINING_GROUP_TEH_MARBUTA); ASSERT (uc_joining_group_byname ("Teh_Marbuta") == UC_JOINING_GROUP_TEH_MARBUTA); + ASSERT (uc_joining_group_byname ("TehMarbuta") == UC_JOINING_GROUP_TEH_MARBUTA); ASSERT (uc_joining_group_byname ("TEH MARBUTA GOAL") == UC_JOINING_GROUP_TEH_MARBUTA_GOAL); ASSERT (uc_joining_group_byname ("Teh Marbuta Goal") == UC_JOINING_GROUP_TEH_MARBUTA_GOAL); ASSERT (uc_joining_group_byname ("Teh_Marbuta_Goal") == UC_JOINING_GROUP_TEH_MARBUTA_GOAL); + ASSERT (uc_joining_group_byname ("TehMarbutaGoal") == UC_JOINING_GROUP_TEH_MARBUTA_GOAL); ASSERT (uc_joining_group_byname ("TETH") == UC_JOINING_GROUP_TETH); ASSERT (uc_joining_group_byname ("Teth") == UC_JOINING_GROUP_TETH); ASSERT (uc_joining_group_byname ("WAW") == UC_JOINING_GROUP_WAW); @@ -142,13 +155,17 @@ main () ASSERT (uc_joining_group_byname ("YEH BARREE") == UC_JOINING_GROUP_YEH_BARREE); ASSERT (uc_joining_group_byname ("Yeh Barree") == UC_JOINING_GROUP_YEH_BARREE); ASSERT (uc_joining_group_byname ("Yeh_Barree") == UC_JOINING_GROUP_YEH_BARREE); + ASSERT (uc_joining_group_byname ("YehBarree") == UC_JOINING_GROUP_YEH_BARREE); ASSERT (uc_joining_group_byname ("YEH WITH TAIL") == UC_JOINING_GROUP_YEH_WITH_TAIL); ASSERT (uc_joining_group_byname ("Yeh with tail") == UC_JOINING_GROUP_YEH_WITH_TAIL); ASSERT (uc_joining_group_byname ("Yeh_with_tail") == UC_JOINING_GROUP_YEH_WITH_TAIL); + ASSERT (uc_joining_group_byname ("YehWithTail") == UC_JOINING_GROUP_YEH_WITH_TAIL); ASSERT (uc_joining_group_byname ("YUDH") == UC_JOINING_GROUP_YUDH); ASSERT (uc_joining_group_byname ("Yudh") == UC_JOINING_GROUP_YUDH); ASSERT (uc_joining_group_byname ("YUDH HE") == UC_JOINING_GROUP_YUDH_HE); + ASSERT (uc_joining_group_byname ("Yudh He") == UC_JOINING_GROUP_YUDH_HE); ASSERT (uc_joining_group_byname ("Yudh_He") == UC_JOINING_GROUP_YUDH_HE); + ASSERT (uc_joining_group_byname ("YudhHe") == UC_JOINING_GROUP_YUDH_HE); ASSERT (uc_joining_group_byname ("ZAIN") == UC_JOINING_GROUP_ZAIN); ASSERT (uc_joining_group_byname ("Zain") == UC_JOINING_GROUP_ZAIN); ASSERT (uc_joining_group_byname ("ZHAIN") == UC_JOINING_GROUP_ZHAIN); -- 2.11.0