Branch
Hash :
ef4dcd41
Author :
Date :
2024-01-09T02:54:25
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
#!/usr/bin/env python3
import os
import sys
import textwrap
self_path = os.path.dirname(os.path.realpath(__file__));
f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r")
codepoint_list = []
# Filter codepoints falling in the right category:
for line in f:
comment_off = line.find("#")
if comment_off >= 0:
line = line[:comment_off]
line = line.strip()
if not line:
continue
char_range, category = line.split(";")
char_range = char_range.strip()
category = category.strip()
if category[0] != 'P' and category[0] != 'S':
continue
delim_off = char_range.find("..")
if delim_off >= 0:
codepoint0 = int(char_range[:delim_off], 16)
codepoint1 = int(char_range[delim_off+2:], 16)
for codepoint in range(codepoint0, codepoint1 + 1):
codepoint_list.append(codepoint)
else:
codepoint = int(char_range, 16)
codepoint_list.append(codepoint)
f.close()
codepoint_list.sort()
index0 = 0
count = len(codepoint_list)
records = list()
while index0 < count:
index1 = index0 + 1
while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1:
index1 += 1
if index1 - index0 > 1:
# Range of codepoints
records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
else:
# Single codepoint
records.append("S(0x{:04x})".format(codepoint_list[index0]))
index0 = index1
sys.stdout.write("static const unsigned PUNCT_MAP[] = {\n")
sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
initial_indent = " ", subsequent_indent=" ")))
sys.stdout.write("\n};\n\n")