Commit 0345aba082c83e9950f9dd8b7ea3bf91fe566a02

Jaroslaw Kubik 2020-02-12T23:44:42

Support translation Unicode codepoints to keysyms In order to support features like auto-type and UI automation, the relevant tools need to be able to invert the keycode->keysym->text transformation. In order to facilitate that, a new API was added. It allows querying the keysyms that correspond to particular Unicode codepoints. For all practical purposes, it can be thought of as an inverse of xkb_keysym_to_utf32().

diff --git a/src/keysym-utf.c b/src/keysym-utf.c
index 61e0a06..a49944a 100644
--- a/src/keysym-utf.c
+++ b/src/keysym-utf.c
@@ -897,6 +897,35 @@ xkb_keysym_to_utf32(xkb_keysym_t keysym)
     return bin_search(keysymtab, ARRAY_SIZE(keysymtab) - 1, keysym);
 }
 
+XKB_EXPORT xkb_keysym_t
+xkb_utf32_to_keysym(uint32_t ucs)
+{
+    /* first check for Latin-1 characters (1:1 mapping) */
+    if ((ucs >= 0x0020 && ucs <= 0x007e) ||
+        (ucs >= 0x00a0 && ucs <= 0x00ff))
+        return ucs;
+
+    /* special keysyms */
+    if ((ucs >= (XKB_KEY_BackSpace & 0x7f) && ucs <= (XKB_KEY_Clear & 0x7f)) ||
+        ucs == (XKB_KEY_Return & 0x7f) || ucs == (XKB_KEY_Escape & 0x7f))
+        return ucs | 0xff00;
+    if (ucs == (XKB_KEY_Delete & 0x7f))
+        return XKB_KEY_Delete;
+
+    /* Unicode non-symbols and code points outside Unicode planes */
+    if ((ucs >= 0xfdd0 && ucs <= 0xfdef) ||
+        ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe)
+        return XKB_KEY_NoSymbol;
+
+    /* search main table */
+    for (size_t i = 0; i < ARRAY_SIZE(keysymtab); i++)
+        if (keysymtab[i].ucs == ucs)
+            return keysymtab[i].keysym;
+
+    /* Use direct encoding if everything else fails */
+    return ucs | 0x01000000;
+}
+
 /*
  * Copyright © 2012 Intel Corporation
  *
diff --git a/test/keysym.c b/test/keysym.c
index 2a8ca90..bb2ec28 100644
--- a/test/keysym.c
+++ b/test/keysym.c
@@ -104,6 +104,29 @@ test_github_issue_42(void)
     setlocale(LC_CTYPE, "C");
 }
 
+static void
+get_keysym_name(xkb_keysym_t keysym, char *buffer, size_t size)
+{
+    int name_length = xkb_keysym_get_name(keysym, buffer, size);
+    if (name_length < 0) {
+        snprintf(buffer, size, "(unknown: 0x%lx)", (unsigned long)keysym);
+    }
+}
+
+static int
+test_utf32_to_keysym(uint32_t ucs, xkb_keysym_t expected)
+{
+    char expected_name[64];
+    char actual_name[64];
+    xkb_keysym_t actual = xkb_utf32_to_keysym(ucs);
+    get_keysym_name(expected, expected_name, 64);
+    get_keysym_name(actual, actual_name, 64);
+
+    fprintf(stderr, "Code point 0x%lx: expected keysym: %s, actual: %s\n\n",
+            (unsigned long)ucs, expected_name, actual_name);
+    return expected == actual;
+}
+
 int
 main(void)
 {
@@ -182,6 +205,49 @@ main(void)
     assert(test_utf8(0x110ffff, "\xf4\x8f\xbf\xbf"));
     assert(test_utf8(0x1110000, NULL) == 0);
 
+    assert(test_utf32_to_keysym('y', XKB_KEY_y));
+    assert(test_utf32_to_keysym('u', XKB_KEY_u));
+    assert(test_utf32_to_keysym('m', XKB_KEY_m));
+    assert(test_utf32_to_keysym(0x43c, XKB_KEY_Cyrillic_em));
+    assert(test_utf32_to_keysym(0x443, XKB_KEY_Cyrillic_u));
+    assert(test_utf32_to_keysym('!', XKB_KEY_exclam));
+    assert(test_utf32_to_keysym(0xF8, XKB_KEY_oslash));
+    assert(test_utf32_to_keysym(0x5D0, XKB_KEY_hebrew_aleph));
+    assert(test_utf32_to_keysym(0x634, XKB_KEY_Arabic_sheen));
+    assert(test_utf32_to_keysym(0x1F609, 0x0101F609)); // ;) emoji
+
+    assert(test_utf32_to_keysym('\b', XKB_KEY_BackSpace));
+    assert(test_utf32_to_keysym('\t', XKB_KEY_Tab));
+    assert(test_utf32_to_keysym('\n', XKB_KEY_Linefeed));
+    assert(test_utf32_to_keysym(0x0b, XKB_KEY_Clear));
+    assert(test_utf32_to_keysym('\r', XKB_KEY_Return));
+    assert(test_utf32_to_keysym(0x1b, XKB_KEY_Escape));
+    assert(test_utf32_to_keysym(0x7f, XKB_KEY_Delete));
+
+    assert(test_utf32_to_keysym(' ', XKB_KEY_space));
+    assert(test_utf32_to_keysym(',', XKB_KEY_comma));
+    assert(test_utf32_to_keysym('.', XKB_KEY_period));
+    assert(test_utf32_to_keysym('=', XKB_KEY_equal));
+    assert(test_utf32_to_keysym('9', XKB_KEY_9));
+    assert(test_utf32_to_keysym('*', XKB_KEY_asterisk));
+    assert(test_utf32_to_keysym(0xd7, XKB_KEY_multiply));
+    assert(test_utf32_to_keysym('-', XKB_KEY_minus));
+    assert(test_utf32_to_keysym(0x10fffd, 0x110fffd));
+
+    // Unicode non-characters
+    assert(test_utf32_to_keysym(0xfdd0, XKB_KEY_NoSymbol));
+    assert(test_utf32_to_keysym(0xfdef, XKB_KEY_NoSymbol));
+    assert(test_utf32_to_keysym(0xfffe, XKB_KEY_NoSymbol));
+    assert(test_utf32_to_keysym(0xffff, XKB_KEY_NoSymbol));
+    assert(test_utf32_to_keysym(0x7fffe, XKB_KEY_NoSymbol));
+    assert(test_utf32_to_keysym(0x7ffff, XKB_KEY_NoSymbol));
+    assert(test_utf32_to_keysym(0xafffe, XKB_KEY_NoSymbol));
+    assert(test_utf32_to_keysym(0xaffff, XKB_KEY_NoSymbol));
+
+    // Codepoints outside the Unicode planes
+    assert(test_utf32_to_keysym(0x110000, XKB_KEY_NoSymbol));
+    assert(test_utf32_to_keysym(0xdeadbeef, XKB_KEY_NoSymbol));
+
     assert(xkb_keysym_is_lower(XKB_KEY_a));
     assert(xkb_keysym_is_lower(XKB_KEY_Greek_lambda));
     assert(xkb_keysym_is_lower(xkb_keysym_from_name("U03b1", 0))); /* GREEK SMALL LETTER ALPHA */
diff --git a/xkbcommon.map b/xkbcommon.map
index f28f68f..eede3e7 100644
--- a/xkbcommon.map
+++ b/xkbcommon.map
@@ -103,3 +103,8 @@ global:
 	xkb_keysym_to_lower;
 	xkb_keysym_to_upper;
 } V_0.7.0;
+
+V_0.11.0 {
+global:
+	xkb_utf32_to_keysym;
+} V_0.8.0;
diff --git a/xkbcommon/xkbcommon.h b/xkbcommon/xkbcommon.h
index 9d3c548..9d18121 100644
--- a/xkbcommon/xkbcommon.h
+++ b/xkbcommon/xkbcommon.h
@@ -494,6 +494,28 @@ uint32_t
 xkb_keysym_to_utf32(xkb_keysym_t keysym);
 
 /**
+ * Get the keysym corresponding to a Unicode/UTF-32 codepoint.
+ *
+ * @returns The keysym corresponding to the specified Unicode
+ * codepoint, or XKB_KEY_NoSymbol if there is none.
+ *
+ * This function is the inverse of @ref xkb_keysym_to_utf32. In cases
+ * where a single codepoint corresponds to multiple keysyms, returns
+ * the keysym with the lowest value.
+ * 
+ * Unicode codepoints which do not have a special (legacy) keysym
+ * encoding use a direct encoding scheme. These keysyms don't usually
+ * have an associated keysym constant (XKB_KEY_*).
+ *
+ * For noncharacter Unicode codepoints and codepoints outside of the
+ * defined Unicode planes this function returns XKB_KEY_NoSymbol.
+ *
+ * @sa xkb_keysym_to_utf32()
+ */
+xkb_keysym_t
+xkb_utf32_to_keysym(uint32_t ucs);
+
+/**
  * Convert a keysym to its uppercase form.
  *
  * If there is no such form, the keysym is returned unchanged.