Hash :
53d9881e
Author :
Date :
2024-03-05T10:28:11
keysyms: Fix inconsistent case-insensitive name lookup
`xkb_keysym_from_name` has inconsistent behavior when used with the flag
`XKB_KEYSYM_CASE_INSENSITIVE`:
```c
xkb_keysym_from_name("a", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_a;
xkb_keysym_from_name("A", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_a;
xkb_keysym_from_name("dead_a", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_dead_A;
xkb_keysym_from_name("dead_A", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_dead_A;
xkb_keysym_from_name("dead_o", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_dead_o;
xkb_keysym_from_name("dead_O", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_dead_o;
xkb_keysym_from_name("KANA_tsu", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_kana_tsu;
xkb_keysym_from_name("KANA_TSU", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_kana_tsu;
xkb_keysym_from_name("KANA_ya", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_kana_YA;
xkb_keysym_from_name("KANA_YA", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_kana_YA;
xkb_keysym_from_name("XF86Screensaver", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_XF86ScreenSaver;
xkb_keysym_from_name("XF86ScreenSaver", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_XF86ScreenSaver;
```
So currently, if two keysym names differ only by case, then the
lower-case *keysym* is returned, not the keysym corresponding to the
lower-case keysym *name*. Indeed, `xkb_keysym_from_name` uses
`xkb_keysym_is_lower` to test if a keysym is a lower-case keysym.
Let’s look at the example for keysyms `a` and `A`: we get the keysym `a`
not because its name is lower case, but because `xkb_keysym_is_lower(XKB_KEY_a)`
returns true and `xkb_keysym_is_lower(XKB_KEY_A)` returns false.
So the results are correct according to the doc:
- Katakana is not a bicameral script, so e.g. `kana_ya` is *not* the lower
case of `kana_YA`.
- As for the `dead_*` keysyms, they are not cased either because they do
not correspond to characters.
- `XF86ScreenSaver` and `XF86Screensaver` are two different keysyms.
But this is also very counter-intuitive: `xkb_keysym_is_lower` is not
the right function to use in this case, because one would expect to check
only the name, not the corresponding character case:
```c
xkb_keysym_from_name("KANA_YA", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_kana_ya;
xkb_keysym_from_name("XF86ScreenSaver", XKB_KEYSYM_CASE_INSENSITIVE) == XKB_KEY_XF86Screensaver;
```
Fixed by making the order of the keysyms names consistent in `src/ks_tables.h`:
1. Sort by the casefolded name: e.g. `kana_ya` < `kana_YO`.
2. If same casefolded name, then sort by cased name, i.e for
ASCII: upper before lower: e.g `kana_YA` < `kana_ya`.
Thus we now have e.g. `kana_YA` < `kana_ya` < `kana_YO` < `kana_yo`.
The lookup logic has also been simplified.
Added exhaustive test for ambiguous case-insensitive names.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
#!/usr/bin/env python3
import argparse
from collections import defaultdict
from pathlib import Path
import re
import sys
from typing import Any, TypeAlias
import jinja2
KEYSYM_PATTERN = re.compile(
r"^#define\s+XKB_KEY_(?P<name>\w+)\s+(?P<value>0x[0-9a-fA-F]+)\s"
)
MAX_AMBIGUOUS_NAMES = 3
KeysymsBounds: TypeAlias = dict[str, int]
KeysymsCaseFoldedNames: TypeAlias = dict[str, list[str]]
def load_keysyms(path: Path) -> tuple[KeysymsBounds, KeysymsCaseFoldedNames]:
# Load the keysyms header
keysym_min = sys.maxsize
keysym_max = 0
min_unicode_keysym = 0x01000100
max_unicode_keysym = 0x0110FFFF
canonical_names: dict[int, str] = {}
casefolded_names: dict[str, list[str]] = defaultdict(list)
max_unicode_name = "U10FFFF"
max_keysym_name = "0x1fffffff" # XKB_KEYSYM_MAX
with path.open("rt", encoding="utf-8") as fd:
for line in fd:
if m := KEYSYM_PATTERN.match(line):
value = int(m.group("value"), 16)
keysym_min = min(keysym_min, value)
keysym_max = max(keysym_max, value)
name = m.group("name")
casefolded_names[name.casefold()].append(name)
if value not in canonical_names:
canonical_names[value] = name
# Keep only ambiguous case-insensitive names and sort them
for name in tuple(casefolded_names.keys()):
count = len(casefolded_names[name])
if count < 2:
del casefolded_names[name]
elif count > MAX_AMBIGUOUS_NAMES:
raise ValueError(
f"""Expected max {MAX_AMBIGUOUS_NAMES} keysyms for "{name}", got: {count}"""
)
else:
casefolded_names[name].sort()
return (
{
"XKB_KEYSYM_MIN_ASSIGNED": min(keysym_min, min_unicode_keysym),
"XKB_KEYSYM_MAX_ASSIGNED": max(keysym_max, max_unicode_keysym),
"XKB_KEYSYM_MIN_EXPLICIT": keysym_min,
"XKB_KEYSYM_MAX_EXPLICIT": keysym_max,
"XKB_KEYSYM_COUNT_EXPLICIT": len(canonical_names),
"XKB_KEYSYM_NAME_MAX_SIZE": max(
max(len(name) for name in canonical_names.values()),
len(max_unicode_name),
len(max_keysym_name),
),
},
casefolded_names,
)
def generate(
env: jinja2.Environment,
data: dict[str, Any],
root: Path,
file: Path,
):
"""Generate a file from its Jinja2 template"""
template_path = file.with_suffix(f"{file.suffix}.jinja")
template = env.get_template(str(template_path))
path = root / file
with path.open("wt", encoding="utf-8") as fd:
fd.writelines(template.generate(**data))
# Root of the project
ROOT = Path(__file__).parent.parent
# Parse commands
parser = argparse.ArgumentParser(
description="Generate C header files related to keysyms bounds"
)
parser.add_argument(
"--root",
type=Path,
default=ROOT,
help="Path to the root of the project (default: %(default)s)",
)
args = parser.parse_args()
# Configure Jinja
template_loader = jinja2.FileSystemLoader(args.root, encoding="utf-8")
jinja_env = jinja2.Environment(
loader=template_loader,
keep_trailing_newline=True,
trim_blocks=True,
lstrip_blocks=True,
)
jinja_env.filters["keysym"] = lambda ks: f"0x{ks:0>8x}"
# Load keysyms
keysyms_bounds, keysyms_ambiguous_case_insensitive_names = load_keysyms(
args.root / "include/xkbcommon/xkbcommon-keysyms.h"
)
# Generate the files
generate(
jinja_env,
keysyms_bounds,
args.root,
Path("src/keysym.h"),
)
generate(
jinja_env,
dict(
keysyms_bounds,
ambiguous_case_insensitive_names=keysyms_ambiguous_case_insensitive_names,
MAX_AMBIGUOUS_NAMES=MAX_AMBIGUOUS_NAMES,
),
args.root,
Path("test/keysym.h"),
)