Commit facb79abdc20572cb727deb42a4d2e70563450de

Werner Lemberg 2015-12-06T19:34:59

[autofit] Add support for Khmer script. We split Khmer into two auto-hinter scripts: `Khmer' (`khmr') and `Khmer symbols' (`khms', U+19E0-U+19FF). * src/autofit/afblue.dat: Add blue zone data for Khmer. * src/autofit/afblue.c, src/autofit/afblue.h: Regenerated. * src/autofit/afscript.h: Add Khmer standard characters. * src/autofit/afranges.c: Add Khmer data. * src/autofit/afstyles.h: Add Khmer data.

diff --git a/ChangeLog b/ChangeLog
index ec4890f..2d710a9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
 2015-12-06  Werner Lemberg  <wl@gnu.org>
 
+	[autofit] Add support for Khmer script.
+
+	We split Khmer into two auto-hinter scripts: `Khmer' (`khmr') and
+	`Khmer symbols' (`khms', U+19E0-U+19FF).
+
+	* src/autofit/afblue.dat: Add blue zone data for Khmer.
+
+	* src/autofit/afblue.c, src/autofit/afblue.h: Regenerated.
+
+	* src/autofit/afscript.h: Add Khmer standard characters.
+
+	* src/autofit/afranges.c: Add Khmer data.
+
+	* src/autofit/afstyles.h: Add Khmer data.
+
+2015-12-06  Werner Lemberg  <wl@gnu.org>
+
 	[autofit] Rewrite HarfBuzz interface to support character clusters.
 
 	Scripts like Khmer have blue zones that can't be directly
diff --git a/src/autofit/afblue.c b/src/autofit/afblue.c
index f528908..be8099e 100644
--- a/src/autofit/afblue.c
+++ b/src/autofit/afblue.c
@@ -64,6 +64,18 @@
     '\0',
     '\xD7', '\xA7', ' ', '\xD7', '\x9A', ' ', '\xD7', '\x9F', ' ', '\xD7', '\xA3', ' ', '\xD7', '\xA5',  /* ק ך ן ף ץ */
     '\0',
+    '\xE1', '\x9E', '\x81', ' ', '\xE1', '\x9E', '\x91', ' ', '\xE1', '\x9E', '\x93', ' ', '\xE1', '\x9E', '\xA7', ' ', '\xE1', '\x9E', '\xA9', ' ', '\xE1', '\x9E', '\xB6',  /* ខ ទ ន ឧ ឩ ា */
+    '\0',
+    '\xE1', '\x9E', '\x81', ' ', '\xE1', '\x9E', '\x83', ' ', '\xE1', '\x9E', '\x85', ' ', '\xE1', '\x9E', '\x8B', ' ', '\xE1', '\x9E', '\x94', ' ', '\xE1', '\x9E', '\x98', ' ', '\xE1', '\x9E', '\x99', ' ', '\xE1', '\x9E', '\xB2',  /* ខ ឃ ច ឋ ប ម យ ឲ */
+    '\0',
+    '\xE1', '\x9E', '\x8F', '\xE1', '\x9F', '\x92', '\xE1', '\x9E', '\x9A', ' ', '\xE1', '\x9E', '\x9A', '\xE1', '\x9F', '\x80', ' ', '\xE1', '\x9E', '\xB2', '\xE1', '\x9F', '\x92', '\xE1', '\x9E', '\x99',  /* ត្រ រៀ ឲ្យ */
+    '\0',
+    '\xE1', '\x9E', '\x84', '\xE1', '\x9F', '\x92', '\xE1', '\x9E', '\x81', '\xE1', '\x9F', '\x92', '\xE1', '\x9E', '\x99', ' ', '\xE1', '\x9E', '\x80', '\xE1', '\x9F', '\x92', '\xE1', '\x9E', '\x94', '\xE1', '\x9F', '\x80',  /* ង្ខ្យ ក្បៀ */
+    '\0',
+    '\xE1', '\xA7', '\xA0', ' ', '\xE1', '\xA7', '\xA1',  /* ᧠ ᧡ */
+    '\0',
+    '\xE1', '\xA7', '\xB6', ' ', '\xE1', '\xA7', '\xB9',  /* ᧶ ᧹ */
+    '\0',
     '\xE0', '\xBA', '\xB2', ' ', '\xE0', '\xBA', '\x94', ' ', '\xE0', '\xBA', '\xAD', ' ', '\xE0', '\xBA', '\xA1', ' ', '\xE0', '\xBA', '\xA5', ' ', '\xE0', '\xBA', '\xA7', ' ', '\xE0', '\xBA', '\xA3', ' ', '\xE0', '\xBA', '\x87',  /* າ ດ ອ ມ ລ ວ ຣ ງ */
     '\0',
     '\xE0', '\xBA', '\xB2', ' ', '\xE0', '\xBA', '\xAD', ' ', '\xE0', '\xBA', '\x9A', ' ', '\xE0', '\xBA', '\x8D', ' ', '\xE0', '\xBA', '\xA3', ' ', '\xE0', '\xBA', '\xAE', ' ', '\xE0', '\xBA', '\xA7', ' ', '\xE0', '\xBA', '\xA2',  /* າ ອ ບ ຍ ຣ ຮ ວ ຢ */
@@ -203,6 +215,16 @@
     { AF_BLUE_STRING_HEBREW_BOTTOM,    0                             },
     { AF_BLUE_STRING_HEBREW_DESCENDER, 0                             },
     { AF_BLUE_STRING_MAX,              0                             },
+    { AF_BLUE_STRING_KHMER_TOP,             AF_BLUE_PROPERTY_LATIN_TOP      |
+                                            AF_BLUE_PROPERTY_LATIN_X_HEIGHT   },
+    { AF_BLUE_STRING_KHMER_BOTTOM,          0                                 },
+    { AF_BLUE_STRING_KHMER_DESCENDER,       0                                 },
+    { AF_BLUE_STRING_KHMER_LARGE_DESCENDER, 0                                 },
+    { AF_BLUE_STRING_MAX,                   0                                 },
+    { AF_BLUE_STRING_KHMER_SYMBOLS_WAXING_TOP,    AF_BLUE_PROPERTY_LATIN_TOP      |
+                                                  AF_BLUE_PROPERTY_LATIN_X_HEIGHT   },
+    { AF_BLUE_STRING_KHMER_SYMBOLS_WANING_BOTTOM, 0                                 },
+    { AF_BLUE_STRING_MAX,                         0                                 },
     { AF_BLUE_STRING_LAO_TOP,             AF_BLUE_PROPERTY_LATIN_TOP      |
                                           AF_BLUE_PROPERTY_LATIN_X_HEIGHT   },
     { AF_BLUE_STRING_LAO_BOTTOM,          0                                 },
diff --git a/src/autofit/afblue.dat b/src/autofit/afblue.dat
index 117f025..ae85efb 100644
--- a/src/autofit/afblue.dat
+++ b/src/autofit/afblue.dat
@@ -125,6 +125,20 @@ AF_BLUE_STRING_ENUM AF_BLUE_STRINGS_ARRAY AF_BLUE_STRING_MAX_LEN:
   AF_BLUE_STRING_HEBREW_DESCENDER
     "ק ך ן ף ץ"
 
+  AF_BLUE_STRING_KHMER_TOP
+    "ខ ទ ន ឧ ឩ ា"
+  AF_BLUE_STRING_KHMER_BOTTOM
+    "ខ ឃ ច ឋ ប ម យ ឲ"
+  AF_BLUE_STRING_KHMER_DESCENDER
+    "ត្រ រៀ ឲ្យ"
+  AF_BLUE_STRING_KHMER_LARGE_DESCENDER
+    "ង្ខ្យ ក្បៀ"
+
+  AF_BLUE_STRING_KHMER_SYMBOLS_WAXING_TOP
+    "᧠ ᧡"
+  AF_BLUE_STRING_KHMER_SYMBOLS_WANING_BOTTOM
+    "᧶ ᧹"
+
   AF_BLUE_STRING_LAO_TOP
     "າ ດ ອ ມ ລ ວ ຣ ງ"
   AF_BLUE_STRING_LAO_BOTTOM
@@ -265,6 +279,14 @@ AF_BLUE_STRING_ENUM AF_BLUE_STRINGS_ARRAY AF_BLUE_STRING_MAX_LEN:
 // respectively.  Only horizontal blue zones (i.e., adjusting vertical
 // coordinate values) are supported.
 //
+// Some scripts like Khmer need character composition to get all necessary
+// blue zones, since Unicode only provides an abstract data model that
+// doesn't represent all possible glyph shapes.  For such character
+// clusters, the HarfBuzz library is used to convert them into the
+// corresponding glyphs.  The largest glyph element (where `largest' can be
+// either `largest ascender' or `largest descender') then defines the
+// corresponding flat or round extremum.
+//
 // For the latin auto-hinter, the overshoot should be larger than the
 // reference for top zones, and vice versa for bottom zones.
 //
@@ -376,6 +398,20 @@ AF_BLUE_STRINGSET_ENUM AF_BLUE_STRINGSETS_ARRAY AF_BLUE_STRINGSET_MAX_LEN:
     { AF_BLUE_STRING_HEBREW_DESCENDER, 0                             }
     { AF_BLUE_STRING_MAX,              0                             }
 
+  AF_BLUE_STRINGSET_KHMR
+    { AF_BLUE_STRING_KHMER_TOP,             AF_BLUE_PROPERTY_LATIN_TOP      |
+                                            AF_BLUE_PROPERTY_LATIN_X_HEIGHT   }
+    { AF_BLUE_STRING_KHMER_BOTTOM,          0                                 }
+    { AF_BLUE_STRING_KHMER_DESCENDER,       0                                 }
+    { AF_BLUE_STRING_KHMER_LARGE_DESCENDER, 0                                 }
+    { AF_BLUE_STRING_MAX,                   0                                 }
+
+  AF_BLUE_STRINGSET_KHMS
+    { AF_BLUE_STRING_KHMER_SYMBOLS_WAXING_TOP,    AF_BLUE_PROPERTY_LATIN_TOP      |
+                                                  AF_BLUE_PROPERTY_LATIN_X_HEIGHT   }
+    { AF_BLUE_STRING_KHMER_SYMBOLS_WANING_BOTTOM, 0                                 }
+    { AF_BLUE_STRING_MAX,                         0                                 }
+
   AF_BLUE_STRINGSET_LAO
     { AF_BLUE_STRING_LAO_TOP,             AF_BLUE_PROPERTY_LATIN_TOP      |
                                           AF_BLUE_PROPERTY_LATIN_X_HEIGHT   }
diff --git a/src/autofit/afblue.h b/src/autofit/afblue.h
index 2ee1ed1..a31ec71 100644
--- a/src/autofit/afblue.h
+++ b/src/autofit/afblue.h
@@ -96,36 +96,42 @@ FT_BEGIN_HEADER
     AF_BLUE_STRING_HEBREW_TOP = 326,
     AF_BLUE_STRING_HEBREW_BOTTOM = 350,
     AF_BLUE_STRING_HEBREW_DESCENDER = 368,
-    AF_BLUE_STRING_LAO_TOP = 383,
-    AF_BLUE_STRING_LAO_BOTTOM = 415,
-    AF_BLUE_STRING_LAO_ASCENDER = 447,
-    AF_BLUE_STRING_LAO_LARGE_ASCENDER = 463,
-    AF_BLUE_STRING_LAO_DESCENDER = 475,
-    AF_BLUE_STRING_LATIN_CAPITAL_TOP = 499,
-    AF_BLUE_STRING_LATIN_CAPITAL_BOTTOM = 515,
-    AF_BLUE_STRING_LATIN_SMALL_F_TOP = 531,
-    AF_BLUE_STRING_LATIN_SMALL = 545,
-    AF_BLUE_STRING_LATIN_SMALL_DESCENDER = 559,
-    AF_BLUE_STRING_LATIN_SUBS_CAPITAL_TOP = 569,
-    AF_BLUE_STRING_LATIN_SUBS_CAPITAL_BOTTOM = 589,
-    AF_BLUE_STRING_LATIN_SUBS_SMALL_F_TOP = 609,
-    AF_BLUE_STRING_LATIN_SUBS_SMALL = 629,
-    AF_BLUE_STRING_LATIN_SUBS_SMALL_DESCENDER = 665,
-    AF_BLUE_STRING_LATIN_SUPS_CAPITAL_TOP = 685,
-    AF_BLUE_STRING_LATIN_SUPS_CAPITAL_BOTTOM = 716,
-    AF_BLUE_STRING_LATIN_SUPS_SMALL_F_TOP = 745,
-    AF_BLUE_STRING_LATIN_SUPS_SMALL = 771,
-    AF_BLUE_STRING_LATIN_SUPS_SMALL_DESCENDER = 796,
-    AF_BLUE_STRING_TELUGU_TOP = 807,
-    AF_BLUE_STRING_TELUGU_BOTTOM = 835,
-    AF_BLUE_STRING_THAI_TOP = 863,
-    AF_BLUE_STRING_THAI_BOTTOM = 887,
-    AF_BLUE_STRING_THAI_ASCENDER = 915,
-    AF_BLUE_STRING_THAI_LARGE_ASCENDER = 927,
-    AF_BLUE_STRING_THAI_DESCENDER = 939,
-    AF_BLUE_STRING_THAI_LARGE_DESCENDER = 955,
-    AF_BLUE_STRING_THAI_DIGIT_TOP = 963,
-    af_blue_1_1 = 974,
+    AF_BLUE_STRING_KHMER_TOP = 383,
+    AF_BLUE_STRING_KHMER_BOTTOM = 407,
+    AF_BLUE_STRING_KHMER_DESCENDER = 439,
+    AF_BLUE_STRING_KHMER_LARGE_DESCENDER = 466,
+    AF_BLUE_STRING_KHMER_SYMBOLS_WAXING_TOP = 495,
+    AF_BLUE_STRING_KHMER_SYMBOLS_WANING_BOTTOM = 503,
+    AF_BLUE_STRING_LAO_TOP = 511,
+    AF_BLUE_STRING_LAO_BOTTOM = 543,
+    AF_BLUE_STRING_LAO_ASCENDER = 575,
+    AF_BLUE_STRING_LAO_LARGE_ASCENDER = 591,
+    AF_BLUE_STRING_LAO_DESCENDER = 603,
+    AF_BLUE_STRING_LATIN_CAPITAL_TOP = 627,
+    AF_BLUE_STRING_LATIN_CAPITAL_BOTTOM = 643,
+    AF_BLUE_STRING_LATIN_SMALL_F_TOP = 659,
+    AF_BLUE_STRING_LATIN_SMALL = 673,
+    AF_BLUE_STRING_LATIN_SMALL_DESCENDER = 687,
+    AF_BLUE_STRING_LATIN_SUBS_CAPITAL_TOP = 697,
+    AF_BLUE_STRING_LATIN_SUBS_CAPITAL_BOTTOM = 717,
+    AF_BLUE_STRING_LATIN_SUBS_SMALL_F_TOP = 737,
+    AF_BLUE_STRING_LATIN_SUBS_SMALL = 757,
+    AF_BLUE_STRING_LATIN_SUBS_SMALL_DESCENDER = 793,
+    AF_BLUE_STRING_LATIN_SUPS_CAPITAL_TOP = 813,
+    AF_BLUE_STRING_LATIN_SUPS_CAPITAL_BOTTOM = 844,
+    AF_BLUE_STRING_LATIN_SUPS_SMALL_F_TOP = 873,
+    AF_BLUE_STRING_LATIN_SUPS_SMALL = 899,
+    AF_BLUE_STRING_LATIN_SUPS_SMALL_DESCENDER = 924,
+    AF_BLUE_STRING_TELUGU_TOP = 935,
+    AF_BLUE_STRING_TELUGU_BOTTOM = 963,
+    AF_BLUE_STRING_THAI_TOP = 991,
+    AF_BLUE_STRING_THAI_BOTTOM = 1015,
+    AF_BLUE_STRING_THAI_ASCENDER = 1043,
+    AF_BLUE_STRING_THAI_LARGE_ASCENDER = 1055,
+    AF_BLUE_STRING_THAI_DESCENDER = 1067,
+    AF_BLUE_STRING_THAI_LARGE_DESCENDER = 1083,
+    AF_BLUE_STRING_THAI_DIGIT_TOP = 1091,
+    af_blue_1_1 = 1102,
 #ifdef AF_CONFIG_OPTION_CJK
     AF_BLUE_STRING_CJK_TOP = af_blue_1_1 + 1,
     AF_BLUE_STRING_CJK_BOTTOM = af_blue_1_1 + 203,
@@ -188,13 +194,15 @@ FT_BEGIN_HEADER
     AF_BLUE_STRINGSET_DEVA = 10,
     AF_BLUE_STRINGSET_GREK = 16,
     AF_BLUE_STRINGSET_HEBR = 23,
-    AF_BLUE_STRINGSET_LAO = 27,
-    AF_BLUE_STRINGSET_LATN = 33,
-    AF_BLUE_STRINGSET_LATB = 40,
-    AF_BLUE_STRINGSET_LATP = 47,
-    AF_BLUE_STRINGSET_TELU = 54,
-    AF_BLUE_STRINGSET_THAI = 57,
-    af_blue_2_1 = 65,
+    AF_BLUE_STRINGSET_KHMR = 27,
+    AF_BLUE_STRINGSET_KHMS = 32,
+    AF_BLUE_STRINGSET_LAO = 35,
+    AF_BLUE_STRINGSET_LATN = 41,
+    AF_BLUE_STRINGSET_LATB = 48,
+    AF_BLUE_STRINGSET_LATP = 55,
+    AF_BLUE_STRINGSET_TELU = 62,
+    AF_BLUE_STRINGSET_THAI = 65,
+    af_blue_2_1 = 73,
 #ifdef AF_CONFIG_OPTION_CJK
     AF_BLUE_STRINGSET_HANI = af_blue_2_1 + 0,
     af_blue_2_1_1 = af_blue_2_1 + 2,
diff --git a/src/autofit/afranges.c b/src/autofit/afranges.c
index 13c2213..64edc79 100644
--- a/src/autofit/afranges.c
+++ b/src/autofit/afranges.c
@@ -179,6 +179,34 @@
   };
 
 
+  const AF_Script_UniRangeRec  af_khmr_uniranges[] =
+  {
+    AF_UNIRANGE_REC(  0x1780UL,  0x17FFUL ),  /* Khmer */
+    AF_UNIRANGE_REC(       0UL,       0UL )
+  };
+
+  const AF_Script_UniRangeRec  af_khmr_nonbase_uniranges[] =
+  {
+    AF_UNIRANGE_REC(  0x17B7UL,  0x17BDUL ),
+    AF_UNIRANGE_REC(  0x17C6UL,  0x17C6UL ),
+    AF_UNIRANGE_REC(  0x17C9UL,  0x17D3UL ),
+    AF_UNIRANGE_REC(  0x17DDUL,  0x17DDUL ),
+    AF_UNIRANGE_REC(       0UL,       0UL )
+  };
+
+
+  const AF_Script_UniRangeRec  af_khms_uniranges[] =
+  {
+    AF_UNIRANGE_REC(  0x19E0UL,  0x19FFUL ),  /* Khmer Symbols */
+    AF_UNIRANGE_REC(       0UL,       0UL )
+  };
+
+  const AF_Script_UniRangeRec  af_khms_nonbase_uniranges[] =
+  {
+    AF_UNIRANGE_REC( 0UL, 0UL )
+  };
+
+
   const AF_Script_UniRangeRec  af_lao_uniranges[] =
   {
     AF_UNIRANGE_REC(  0x0E80UL,  0x0EFFUL ),  /* Lao */
diff --git a/src/autofit/afscript.h b/src/autofit/afscript.h
index d249098..77763e4 100644
--- a/src/autofit/afscript.h
+++ b/src/autofit/afscript.h
@@ -61,6 +61,17 @@
           HB_SCRIPT_LAO,
           "\xE0\xBB\x90" ) /* ໐ */
 
+  /* only digit zero has a simple shape in the Khmer script */
+  SCRIPT( khmr, KHMR,
+          "Khmer",
+          HB_SCRIPT_KHMER,
+          "\xE1\x9F\xA0" ) /* ០ */
+
+  SCRIPT( khms, KHMS,
+          "Khmer Symbols",
+          HB_SCRIPT_INVALID,
+          "\xE1\xA7\xA1 \xE1\xA7\xAA" ) /* ᧡ ᧪ */
+
   SCRIPT( latn, LATN,
           "Latin",
           HB_SCRIPT_LATIN,
diff --git a/src/autofit/afstyles.h b/src/autofit/afstyles.h
index 87663c9..16105c6 100644
--- a/src/autofit/afstyles.h
+++ b/src/autofit/afstyles.h
@@ -108,6 +108,20 @@
          AF_BLUE_STRINGSET_HEBR,
          AF_COVERAGE_DEFAULT )
 
+  STYLE( khmr_dflt, KHMR_DFLT,
+         "Khmer default style",
+         AF_WRITING_SYSTEM_LATIN,
+         AF_SCRIPT_KHMR,
+         AF_BLUE_STRINGSET_KHMR,
+         AF_COVERAGE_DEFAULT )
+
+  STYLE( khms_dflt, KHMS_DFLT,
+         "Khmer Symbols default style",
+         AF_WRITING_SYSTEM_LATIN,
+         AF_SCRIPT_KHMS,
+         AF_BLUE_STRINGSET_KHMS,
+         AF_COVERAGE_DEFAULT )
+
   STYLE( lao_dflt, LAO_DFLT,
          "Lao default style",
          AF_WRITING_SYSTEM_LATIN,