Hash :
dc63e5f8
Author :
Date :
2025-07-07T12:28:24
Ensure config.h is always included first While `config.h` may not be necessary in every file, it ensures consistency and makes code refactoring safer.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
#!/usr/bin/env python3
"""
Generate C file to handle keysym names
"""
import argparse
import itertools
import random
import re
import sys
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import DefaultDict, Generator, Iterable, Iterator
import perfect_hash
# Root of the project
SCRIPT = Path(__file__)
ROOT = SCRIPT.parent.parent
# Parse commands
parser = argparse.ArgumentParser(description="Generate C file to handle keysym names")
parser.add_argument(
"c_header", type=Path, help="Path to the libxkbcommon keysym header"
)
parser.add_argument("gperf", type=Path, help="Path to the gperf file")
args = parser.parse_args()
# Set the seed explicitly, so we reduce diff
random.seed(b"libxkbcommon")
KEYSYM_ENTRY_PATTERN = re.compile(
r"""
^\#define\s+
XKB_KEY_(?P<name>\w+)\s+
(?P<value>0x[0-9a-fA-F]+)\s*
(?:/\*\s*
(?:
(?P<deprecated>deprecated)|
\(<U\+(?P<unicode_alt_semantics>[0-9a-fA-F]{4,}>)\)|
\(U\+(?P<deprecated_unicode>[0-9a-fA-F]{4,})\s(?:\s|\w|-)+\)|
.*
)
)?
""",
re.VERBOSE,
)
class Deprecation(Enum):
NONE = "none"
"No deprecation"
EXPLICIT = "explicit"
"Explicit deprecation in comment: /* deprecated */"
IMPLICIT = "implicit"
"""
Implicit deprecation: the keysym has already been defined with a previous
name, and the present name has not been declared explicitly as an alias.
"""
@dataclass
class Keysym:
name: str
value: int
deprecated: Deprecation
alias: bool
def parse_keysyms(path: Path) -> Iterator[Keysym]:
with path.open("rt", encoding="utf-8") as fd:
for line in fd:
if m := KEYSYM_ENTRY_PATTERN.match(line):
yield Keysym(
name=m.group("name"),
value=int(m.group("value"), 16),
deprecated=Deprecation.EXPLICIT
if m.group("deprecated") or m.group("deprecated_unicode")
else Deprecation.NONE,
alias="alias for" in line.casefold()
or m.group("unicode_alt_semantics"),
)
def get_keysyms(path: Path) -> dict[int, list[Keysym]]:
keysyms: DefaultDict[int, list[Keysym]] = defaultdict(list)
for keysym in parse_keysyms(path):
if (
(ks := keysyms.get(keysym.value))
and keysym.deprecated is Deprecation.NONE
and not keysym.alias
# deal with first name being deprecated
and any(k.deprecated is Deprecation.NONE for k in ks)
):
keysym.deprecated = Deprecation.IMPLICIT
keysyms[keysym.value].append(keysym)
return keysyms
keysyms_by_value = get_keysyms(args.c_header)
entries = tuple(itertools.chain.from_iterable(keysyms_by_value.values()))
# Sort based on the keysym name:
# 1. Sort by the casefolded name: e.g. kana_ya < kana_YO.
# 2. If same casefolded name, then sort by cased name, i.e for
# ASCII: upper before lower: e.g kana_YA < kana_ya.
# E.g. kana_YA < kana_ya < kana_YO < kana_yo
# WARNING: this sort must not be changed, as some functions e.g.
# xkb_keysym_from_name rely on upper case variant occuring first.
entries_isorted = sorted(entries, key=lambda e: (e.name.casefold(), e.name))
# Sort based on keysym value. Sort is stable so in case of duplicate, the first
# keysym occurence stays first.
entries_kssorted = sorted(entries, key=lambda e: e.value)
print(
f"""
/**
* This file comes from libxkbcommon and was generated by {SCRIPT.name}
* You can always fetch the latest version from:
* https://raw.github.com/xkbcommon/libxkbcommon/master/src/ks_tables.h
*/
#pragma once
"""
)
entry_offsets: dict[str, int] = {}
UINT16_MAX = (1 << 16) - 1
UNICODE_KEYSYM = UINT16_MAX - 1
DEPRECATED_KEYSYM = UINT16_MAX
MAX_EXPLICIT_DEPRECATED_ALIAS_INDEX_LOG2 = 8
MAX_EXPLICIT_DEPRECATED_ALIAS_INDEX = 1 << MAX_EXPLICIT_DEPRECATED_ALIAS_INDEX_LOG2
MAX_EXPLICIT_DEPRECATED_ALIAS_COUNT_LOG2 = 4
MAX_EXPLICIT_DEPRECATED_ALIAS_COUNT = 1 << MAX_EXPLICIT_DEPRECATED_ALIAS_COUNT_LOG2
MAX_OFFSET = UNICODE_KEYSYM - 1
XKB_KEYSYM_UNICODE_MIN = 0x01000100
XKB_KEYSYM_UNICODE_MAX = 0x0110FFFF
print(
"""
#include "config.h"
#include <stddef.h>
#include <stdint.h>
#include "xkbcommon/xkbcommon.h"
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverlength-strings"
#endif
static const char *keysym_names =
""".strip()
)
offs = 0
for keysym in entries_isorted:
if offs >= MAX_OFFSET:
raise ValueError(f"Offset must be kept under {MAX_OFFSET}, got: {offs}.")
entry_offsets[keysym.name] = offs
print(f' "{keysym.name}\\0"')
offs += len(keysym.name) + 1
print(
"""
;
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
""".strip()
)
template = r"""
static const uint16_t keysym_name_G[] = {
$G
};
static size_t
keysym_name_hash_f(const char *key, const char *T)
{
size_t sum = 0;
for (size_t i = 0; key[i] != '\0'; i++)
sum += (size_t) (T[i % $NS] * key[i]);
return sum % $NG;
}
static inline size_t
keysym_name_perfect_hash(const char *key)
{
return (
keysym_name_G[keysym_name_hash_f(key, "$S1")] +
keysym_name_G[keysym_name_hash_f(key, "$S2")]
) % $NG;
}
"""
print(
perfect_hash.generate_code(
keys=[keysym.name for keysym in entries_isorted],
template=template,
)
)
print(
"""
struct name_keysym {
xkb_keysym_t keysym;
uint16_t offset;
};\n"""
)
def print_entries(entries: Iterable[Keysym]):
for entry in entries:
print(
" {{ 0x{value:08x}, {offs} }}, /* {name} */".format(
offs=entry_offsets[entry.name], value=entry.value, name=entry.name
)
)
print("static const struct name_keysym name_to_keysym[] = {")
print_entries(entries_isorted)
print("};\n")
# *.sort() is stable so we always get the first keysym for duplicate
print("static const struct name_keysym keysym_to_name[] = {")
print_entries(
next(g[1]) for g in itertools.groupby(entries_kssorted, key=lambda e: e.value)
)
print("};\n")
def make_deprecated_entry(
value,
keysyms: list[Keysym],
entry_offsets: dict[str, int],
explicit_deprecated_aliases_index: int,
) -> tuple[str | None, tuple[int, ...]]:
assert keysyms
non_deprecated_ks = tuple(k for k in keysyms if k.deprecated is Deprecation.NONE)
explicit_deprecated_aliases: tuple[int, ...] = ()
if non_deprecated_ks:
# Keysym is not deprecated. Check if none of its aliases are.
if len(keysyms) == 1 or all(
ks.alias and ks.deprecated is Deprecation.NONE for ks in keysyms[1:]
):
return None, ()
ref = non_deprecated_ks[0].name
canonical_name = f"Reference: {ref}. "
assert ref in entry_offsets
canonical_index = str(entry_offsets[ref])
deprecated_ks = tuple(k for k in keysyms if k not in non_deprecated_ks)
if any(ks.alias and ks.deprecated is Deprecation.NONE for ks in keysyms[1:]):
# keysym has both explicit and deprecated aliases
explicit_deprecated_aliases = tuple(
entry_offsets[ks.name]
for ks in keysyms[1:]
if ks.deprecated is not Deprecation.NONE
)
assert (
explicit_deprecated_aliases_index < MAX_EXPLICIT_DEPRECATED_ALIAS_INDEX
)
assert (
len(explicit_deprecated_aliases) < MAX_EXPLICIT_DEPRECATED_ALIAS_COUNT
)
else:
explicit_deprecated_aliases_index = 0
else:
# Keysym is deprecated
canonical_name = ""
canonical_index = (
"DEPRECATED_KEYSYM"
if value < XKB_KEYSYM_UNICODE_MIN or value > XKB_KEYSYM_UNICODE_MAX
else "UNICODE_KEYSYM"
)
deprecated_ks = keysyms
explicit_deprecated_aliases_index = 0
if non_deprecated_ks[1:]:
non_deprecated = (
"Non deprecated aliases: "
+ ", ".join(ks.name for ks in non_deprecated_ks[1:])
+ ". "
)
else:
non_deprecated = ""
deprecated = ", ".join(ks.name for ks in deprecated_ks)
comment = f"{canonical_name}{non_deprecated}Deprecated: {deprecated}"
return (
f" {{ 0x{value:0>8x}, {canonical_index: <17}, {explicit_deprecated_aliases_index}, {len(explicit_deprecated_aliases)} }}, /* {comment} */",
explicit_deprecated_aliases,
)
def generate_deprecated_keysyms(
keysyms_by_value: dict[int, list[Keysym]], entry_offsets: dict[str, int]
) -> Generator[tuple[int, ...], None, None]:
explicit_deprecated_aliases_index = 0
for value, keysyms in sorted(keysyms_by_value.items(), key=lambda e: e[0]):
assert keysyms
c_entry, explicit_deprecated_aliases = make_deprecated_entry(
value, keysyms, entry_offsets, explicit_deprecated_aliases_index
)
if c_entry is not None:
print(c_entry)
if explicit_deprecated_aliases:
yield explicit_deprecated_aliases
explicit_deprecated_aliases_index += len(explicit_deprecated_aliases)
def generate_mixed_aliases(aliases: Iterable[Iterable[int]]):
for xs in aliases:
for x in xs:
print(f" {x},")
print(f"#define UNICODE_KEYSYM 0x{UNICODE_KEYSYM:x}")
print(f"#define DEPRECATED_KEYSYM 0x{DEPRECATED_KEYSYM:x}")
# NOTE: Alternative implementation, useful the day the indices do not fit uint16_t.
# print(f"""
# struct deprecated_keysym {{
# xkb_keysym_t keysym;
# union {{
# uint32_t offset;
# struct {{
# uint32_t offset:{MAX_OFFSET_LOG2};
# /* Explicit deprecated aliases start index & count */
# uint8_t explicit_index:{MAX_EXPLICIT_DEPRECATED_ALIAS_INDEX_LOG2};
# uint8_t explicit_count:{MAX_EXPLICIT_DEPRECATED_ALIAS_COUNT_LOG2};
# }} details;
# }};
# }};
# """)
print("""
struct deprecated_keysym {
xkb_keysym_t keysym;
uint16_t offset;
/* Explicit deprecated aliases start index & count */
uint8_t explicit_index;
uint8_t explicit_count;
};
""")
print("static const struct deprecated_keysym deprecated_keysyms[] = {")
explicit_deprecated_aliases = tuple(
generate_deprecated_keysyms(keysyms_by_value, entry_offsets)
)
print("};\n")
print("static const uint32_t explicit_deprecated_aliases[] = {")
generate_mixed_aliases(explicit_deprecated_aliases)
print("};")
print(f"max name offset: {max(entry_offsets.values())}", file=sys.stderr)
# Check that the keywords of our XKB parser that clash with keysyms are handled properly
def parse_gperf_keywords(path: Path) -> Iterator[str]:
with path.open("rt", encoding="utf-8") as fd:
in_keyword_section = False
for line in fd:
if line.startswith(r"%%"):
# This is a boundary of the keywords section
if in_keyword_section:
break
in_keyword_section = True
elif in_keyword_section:
# Parse the keywords
keyword, *_ = line.split(",")
yield keyword.strip().casefold()
# Skip any line until we reach the keywords
else:
raise ValueError("Parse error: keywords section boundary not found")
SUPPORTED_KEYWORDS_CLASHES = {"section"}
UNSUPPORTED_KEYWORDS_CLASHES = frozenset(parse_gperf_keywords(args.gperf)).difference(
SUPPORTED_KEYWORDS_CLASHES
)
expected_clashes: set[str] = set()
errors = 0
for entry in entries:
if entry.name.casefold() in UNSUPPORTED_KEYWORDS_CLASHES:
print(
f"ERROR: keysym “{entry.name}” (0x{entry.value:0>4x}) clashes with keywords",
"and cannot be parsed properly.",
file=sys.stderr,
)
errors += 1
elif (lower := entry.name.lower()) in SUPPORTED_KEYWORDS_CLASHES:
if not entry.name.islower():
# Keywords’s atoms are registered in *lower* case, so the keysym will be
# replaced by the keysym with the corresponding name, but they may not match.
entry2: Keysym = Keysym("NoSymbol", 0, Deprecation.NONE, False)
if any(
e.name == lower
for e in keysyms_by_value[entry.value]
if e.name != entry.value
):
# There is a keysym in lower case that is an alias
print(
f"WARNING: keysym “{entry.name}”",
f"will be parsed as “{lower}” (expected)",
file=sys.stderr,
)
else:
# Lookup the keysym mismatch
for e in entries:
if e.name == lower:
entry2 = e
break
print(
f"ERROR: keysym “{entry.name}” (0x{entry.value:0>4x})",
r"clashes with keywords and will be replaced by",
f"“{entry2.name}” (0x{entry2.value:0>4x}).",
file=sys.stderr,
)
errors += 1
else:
print(
f"WARNING: keysym “{entry.name}” clashing with keywords (expected)",
file=sys.stderr,
)
expected_clashes.add(entry.name)
if diff := SUPPORTED_KEYWORDS_CLASHES.difference(expected_clashes):
print(f"ERROR: Unexpected missing clashing keysyms: {diff}", file=sys.stderr)
errors += 1
if errors:
print(
f" {errors} ERRORS ".center(80, "-"),
"Please update the parser file `parser.y` to handle keysyms causing clashes.",
"The relevant entries are:",
"- Keysym",
"- Element (for modmap, parsed via: Expr -> Term -> Lhs -> FieldSpec -> Element)",
file=sys.stderr,
sep="\n",
)
exit(1)