Support translation Unicode codepoints to keysyms In order to support features like auto-type and UI automation, the relevant tools need to be able to invert the keycode->keysym->text transformation. In order to facilitate that, a new API was added. It allows querying the keysyms that correspond to particular Unicode codepoints. For all practical purposes, it can be thought of as an inverse of xkb_keysym_to_utf32().
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
diff --git a/src/keysym-utf.c b/src/keysym-utf.c
index 61e0a06..a49944a 100644
--- a/src/keysym-utf.c
+++ b/src/keysym-utf.c
@@ -897,6 +897,35 @@ xkb_keysym_to_utf32(xkb_keysym_t keysym)
return bin_search(keysymtab, ARRAY_SIZE(keysymtab) - 1, keysym);
}
+XKB_EXPORT xkb_keysym_t
+xkb_utf32_to_keysym(uint32_t ucs)
+{
+ /* first check for Latin-1 characters (1:1 mapping) */
+ if ((ucs >= 0x0020 && ucs <= 0x007e) ||
+ (ucs >= 0x00a0 && ucs <= 0x00ff))
+ return ucs;
+
+ /* special keysyms */
+ if ((ucs >= (XKB_KEY_BackSpace & 0x7f) && ucs <= (XKB_KEY_Clear & 0x7f)) ||
+ ucs == (XKB_KEY_Return & 0x7f) || ucs == (XKB_KEY_Escape & 0x7f))
+ return ucs | 0xff00;
+ if (ucs == (XKB_KEY_Delete & 0x7f))
+ return XKB_KEY_Delete;
+
+ /* Unicode non-symbols and code points outside Unicode planes */
+ if ((ucs >= 0xfdd0 && ucs <= 0xfdef) ||
+ ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe)
+ return XKB_KEY_NoSymbol;
+
+ /* search main table */
+ for (size_t i = 0; i < ARRAY_SIZE(keysymtab); i++)
+ if (keysymtab[i].ucs == ucs)
+ return keysymtab[i].keysym;
+
+ /* Use direct encoding if everything else fails */
+ return ucs | 0x01000000;
+}
+
/*
* Copyright © 2012 Intel Corporation
*
diff --git a/test/keysym.c b/test/keysym.c
index 2a8ca90..bb2ec28 100644
--- a/test/keysym.c
+++ b/test/keysym.c
@@ -104,6 +104,29 @@ test_github_issue_42(void)
setlocale(LC_CTYPE, "C");
}
+static void
+get_keysym_name(xkb_keysym_t keysym, char *buffer, size_t size)
+{
+ int name_length = xkb_keysym_get_name(keysym, buffer, size);
+ if (name_length < 0) {
+ snprintf(buffer, size, "(unknown: 0x%lx)", (unsigned long)keysym);
+ }
+}
+
+static int
+test_utf32_to_keysym(uint32_t ucs, xkb_keysym_t expected)
+{
+ char expected_name[64];
+ char actual_name[64];
+ xkb_keysym_t actual = xkb_utf32_to_keysym(ucs);
+ get_keysym_name(expected, expected_name, 64);
+ get_keysym_name(actual, actual_name, 64);
+
+ fprintf(stderr, "Code point 0x%lx: expected keysym: %s, actual: %s\n\n",
+ (unsigned long)ucs, expected_name, actual_name);
+ return expected == actual;
+}
+
int
main(void)
{
@@ -182,6 +205,49 @@ main(void)
assert(test_utf8(0x110ffff, "\xf4\x8f\xbf\xbf"));
assert(test_utf8(0x1110000, NULL) == 0);
+ assert(test_utf32_to_keysym('y', XKB_KEY_y));
+ assert(test_utf32_to_keysym('u', XKB_KEY_u));
+ assert(test_utf32_to_keysym('m', XKB_KEY_m));
+ assert(test_utf32_to_keysym(0x43c, XKB_KEY_Cyrillic_em));
+ assert(test_utf32_to_keysym(0x443, XKB_KEY_Cyrillic_u));
+ assert(test_utf32_to_keysym('!', XKB_KEY_exclam));
+ assert(test_utf32_to_keysym(0xF8, XKB_KEY_oslash));
+ assert(test_utf32_to_keysym(0x5D0, XKB_KEY_hebrew_aleph));
+ assert(test_utf32_to_keysym(0x634, XKB_KEY_Arabic_sheen));
+ assert(test_utf32_to_keysym(0x1F609, 0x0101F609)); // ;) emoji
+
+ assert(test_utf32_to_keysym('\b', XKB_KEY_BackSpace));
+ assert(test_utf32_to_keysym('\t', XKB_KEY_Tab));
+ assert(test_utf32_to_keysym('\n', XKB_KEY_Linefeed));
+ assert(test_utf32_to_keysym(0x0b, XKB_KEY_Clear));
+ assert(test_utf32_to_keysym('\r', XKB_KEY_Return));
+ assert(test_utf32_to_keysym(0x1b, XKB_KEY_Escape));
+ assert(test_utf32_to_keysym(0x7f, XKB_KEY_Delete));
+
+ assert(test_utf32_to_keysym(' ', XKB_KEY_space));
+ assert(test_utf32_to_keysym(',', XKB_KEY_comma));
+ assert(test_utf32_to_keysym('.', XKB_KEY_period));
+ assert(test_utf32_to_keysym('=', XKB_KEY_equal));
+ assert(test_utf32_to_keysym('9', XKB_KEY_9));
+ assert(test_utf32_to_keysym('*', XKB_KEY_asterisk));
+ assert(test_utf32_to_keysym(0xd7, XKB_KEY_multiply));
+ assert(test_utf32_to_keysym('-', XKB_KEY_minus));
+ assert(test_utf32_to_keysym(0x10fffd, 0x110fffd));
+
+ // Unicode non-characters
+ assert(test_utf32_to_keysym(0xfdd0, XKB_KEY_NoSymbol));
+ assert(test_utf32_to_keysym(0xfdef, XKB_KEY_NoSymbol));
+ assert(test_utf32_to_keysym(0xfffe, XKB_KEY_NoSymbol));
+ assert(test_utf32_to_keysym(0xffff, XKB_KEY_NoSymbol));
+ assert(test_utf32_to_keysym(0x7fffe, XKB_KEY_NoSymbol));
+ assert(test_utf32_to_keysym(0x7ffff, XKB_KEY_NoSymbol));
+ assert(test_utf32_to_keysym(0xafffe, XKB_KEY_NoSymbol));
+ assert(test_utf32_to_keysym(0xaffff, XKB_KEY_NoSymbol));
+
+ // Codepoints outside the Unicode planes
+ assert(test_utf32_to_keysym(0x110000, XKB_KEY_NoSymbol));
+ assert(test_utf32_to_keysym(0xdeadbeef, XKB_KEY_NoSymbol));
+
assert(xkb_keysym_is_lower(XKB_KEY_a));
assert(xkb_keysym_is_lower(XKB_KEY_Greek_lambda));
assert(xkb_keysym_is_lower(xkb_keysym_from_name("U03b1", 0))); /* GREEK SMALL LETTER ALPHA */
diff --git a/xkbcommon.map b/xkbcommon.map
index f28f68f..eede3e7 100644
--- a/xkbcommon.map
+++ b/xkbcommon.map
@@ -103,3 +103,8 @@ global:
xkb_keysym_to_lower;
xkb_keysym_to_upper;
} V_0.7.0;
+
+V_0.11.0 {
+global:
+ xkb_utf32_to_keysym;
+} V_0.8.0;
diff --git a/xkbcommon/xkbcommon.h b/xkbcommon/xkbcommon.h
index 9d3c548..9d18121 100644
--- a/xkbcommon/xkbcommon.h
+++ b/xkbcommon/xkbcommon.h
@@ -494,6 +494,28 @@ uint32_t
xkb_keysym_to_utf32(xkb_keysym_t keysym);
/**
+ * Get the keysym corresponding to a Unicode/UTF-32 codepoint.
+ *
+ * @returns The keysym corresponding to the specified Unicode
+ * codepoint, or XKB_KEY_NoSymbol if there is none.
+ *
+ * This function is the inverse of @ref xkb_keysym_to_utf32. In cases
+ * where a single codepoint corresponds to multiple keysyms, returns
+ * the keysym with the lowest value.
+ *
+ * Unicode codepoints which do not have a special (legacy) keysym
+ * encoding use a direct encoding scheme. These keysyms don't usually
+ * have an associated keysym constant (XKB_KEY_*).
+ *
+ * For noncharacter Unicode codepoints and codepoints outside of the
+ * defined Unicode planes this function returns XKB_KEY_NoSymbol.
+ *
+ * @sa xkb_keysym_to_utf32()
+ */
+xkb_keysym_t
+xkb_utf32_to_keysym(uint32_t ucs);
+
+/**
* Convert a keysym to its uppercase form.
*
* If there is no such form, the keysym is returned unchanged.