Hash :
07cfbe21
Author :
Date :
2012-09-06T01:16:39
[OT] Streamline Arabic fallback shaping table
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
#!/usr/bin/python
import sys
import os.path
if len (sys.argv) != 3:
print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt"
sys.exit (1)
files = [file (x) for x in sys.argv[1:]]
headers = [[files[0].readline (), files[0].readline ()]]
headers.append (["UnicodeData.txt does not have a header."])
while files[0].readline ().find ('##################') < 0:
pass
def print_joining_table(f):
print
print "static const uint8_t joining_table[] ="
print "{"
min_u = 0x110000
max_u = 0
num = 0
last = -1
block = ''
for line in f:
if line[0] == '#':
if line.find (" characters"):
block = line[2:].strip ()
continue
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
u = int (fields[0], 16)
if u == 0x200C or u == 0x200D:
continue
if u < last:
raise Exception ("Input data character not sorted", u)
min_u = min (min_u, u)
max_u = max (max_u, u)
num += 1
if block:
print "\n /* %s */\n" % block
block = ''
if last != -1:
last += 1
while last < u:
print " JOINING_TYPE_X, /* %04X */" % last
last += 1
else:
last = u
if fields[3] in ["ALAPH", "DALATH RISH"]:
value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
else:
value = "JOINING_TYPE_" + fields[2]
print " %s, /* %s */" % (value, '; '.join(fields))
print
print "};"
print
print "#define JOINING_TABLE_FIRST 0x%04X" % min_u
print "#define JOINING_TABLE_LAST 0x%04X" % max_u
print
occupancy = num * 100 / (max_u - min_u + 1)
# Maintain at least 40% occupancy in the table */
if occupancy < 40:
raise Exception ("Table too sparse, please investigate: ", occupancy)
def print_shaping_table(f):
shapes = {}
ligatures = {}
names = {}
for line in f:
fields = [x.strip () for x in line.split (';')]
if fields[5][0:1] != '<':
continue
items = fields[5].split (' ')
shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:])
if not shape in ['initial', 'medial', 'isolated', 'final']:
continue
c = int (fields[0], 16)
if len (items) != 1:
# We only care about lam-alef ligatures
if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]:
continue
# Save ligature
names[c] = fields[1]
if items not in ligatures:
ligatures[items] = {}
ligatures[items][shape] = c
pass
else:
# Save shape
if items[0] not in names:
names[items[0]] = fields[1]
else:
names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip ()
if items[0] not in shapes:
shapes[items[0]] = {}
shapes[items[0]][shape] = c
print
print "static const uint16_t shaping_table[][4] ="
print "{"
keys = shapes.keys ()
min_u, max_u = min (keys), max (keys)
for u in range (min_u, max_u + 1):
s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0
for shape in ['initial', 'medial', 'final', 'isolated']]
value = ', '.join ("0x%04X" % c for c in s)
print " {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "")
print "};"
print
print "#define SHAPING_TABLE_FIRST 0x%04X" % min_u
print "#define SHAPING_TABLE_LAST 0x%04X" % max_u
print
ligas = {}
for pair in ligatures.keys ():
for shape in ligatures[pair]:
c = ligatures[pair][shape]
if shape == 'isolated':
liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final'])
elif shape == 'final':
liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final'])
else:
raise Exception ("Unexpected shape", shape)
if liga[0] not in ligas:
ligas[liga[0]] = []
ligas[liga[0]].append ((liga[1], c))
max_i = max (len (ligas[l]) for l in ligas)
print
print "static const struct ligature_set_t {"
print " uint16_t first;"
print " struct ligature_pairs_t {"
print " uint16_t second;"
print " uint16_t ligature;"
print " } ligatures[%d];" % max_i
print "} ligature_table[] ="
print "{"
keys = ligas.keys ()
keys.sort ()
for first in keys:
print " { 0x%04X, {" % (first)
for liga in ligas[first]:
print " { 0x%04X, 0x%04X }, /* %s */" % (liga[0], liga[1], names[liga[1]])
print " }},"
print "};"
print
print "/* == Start of generated table == */"
print "/*"
print " * The following table is generated by running:"
print " *"
print " * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt"
print " *"
print " * on files with these headers:"
print " *"
for h in headers:
for l in h:
print " * %s" % (l.strip())
print " */"
print
print "#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH"
print "#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH"
print
print_joining_table (files[0])
print_shaping_table (files[1])
print
print "#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */"
print
print "/* == End of generated table == */"