Commit b973d71e82e9ec042d604226ccf0ba641773f38f

Ran Benita 2014-03-21T23:00:17

state: add xkb_state_key_get_{utf8,utf32}() API functions These functions generally have the same effect as xkb_state_key_get_syms() + xkb_keysym_to_utf{8,32}(). So why add them? - They provide a slightly nicer interface, especially if the string is the only interest. - It makes the handling of multiple-keysyms-to-utf8 transparent. For the designated use-case of multiple-keysyms (unicode combining characters), this is a must. We also validate the UTF-8, which the user might not otherwise do. - We will need to apply some transformation on the resulting string which depend on the xkb_state. This is not possible with the xkb_keysym_* functions. With these functions, the existing xkb_keysym_to_utf{8,32}() are not expected to be used by a typical user; they are "raw" functions. Signed-off-by: Ran Benita <ran234@gmail.com>

diff --git a/src/state.c b/src/state.c
index f409a3e..ebd0ca6 100644
--- a/src/state.c
+++ b/src/state.c
@@ -61,6 +61,7 @@
 
 #include "keymap.h"
 #include "keysym.h"
+#include "utf8.h"
 
 struct xkb_filter {
     union xkb_action action;
@@ -870,6 +871,70 @@ xkb_state_key_get_one_sym(struct xkb_state *state, xkb_keycode_t kc)
     return sym;
 }
 
+XKB_EXPORT int
+xkb_state_key_get_utf8(struct xkb_state *state, xkb_keycode_t kc,
+                       char *buffer, size_t size)
+{
+    xkb_keysym_t sym;
+    const xkb_keysym_t *syms;
+    int nsyms;
+    int offset;
+    char tmp[7];
+
+    /* Make sure the keysym transformations are applied. */
+    sym = xkb_state_key_get_one_sym(state, kc);
+    if (sym != XKB_KEY_NoSymbol) {
+        nsyms = 1; syms = &sym;
+    }
+    else {
+        nsyms = xkb_state_key_get_syms(state, kc, &syms);
+    }
+
+    /* Make sure not to truncate in the middle of a UTF-8 sequence. */
+    offset = 0;
+    for (int i = 0; i < nsyms; i++) {
+        int ret = xkb_keysym_to_utf8(syms[i], tmp, sizeof(tmp));
+        if (ret <= 0)
+            goto err_bad;
+
+        ret--;
+        if ((size_t) (offset + ret) <= size)
+            memcpy(buffer + offset, tmp, ret);
+        offset += ret;
+    }
+
+    if ((size_t) offset >= size)
+        goto err_trunc;
+    buffer[offset] = '\0';
+
+    if (!is_valid_utf8(buffer, offset))
+        goto err_bad;
+
+    return offset;
+
+err_trunc:
+    if (size > 0)
+        buffer[size - 1] = '\0';
+    return offset;
+
+err_bad:
+    if (size > 0)
+        buffer[0] = '\0';
+    return 0;
+}
+
+XKB_EXPORT uint32_t
+xkb_state_key_get_utf32(struct xkb_state *state, xkb_keycode_t kc)
+{
+    xkb_keysym_t sym;
+    uint32_t cp;
+
+    sym = xkb_state_key_get_one_sym(state, kc);
+    cp = xkb_keysym_to_utf32(sym);
+
+    return cp;
+}
+
 /**
  * Serialises the requested modifier state into an xkb_mod_mask_t, with all
  * the same disclaimers as in xkb_state_update_mask.
diff --git a/test/common.c b/test/common.c
index fd013ca..8b3f954 100644
--- a/test/common.c
+++ b/test/common.c
@@ -371,18 +371,8 @@ test_print_keycode_state(struct xkb_state *state, xkb_keycode_t keycode)
         printf("] ");
     }
 
-    /*
-     * Only do this if wchar_t is UCS-4, so we can be lazy and print
-     * with %lc.
-     */
-#ifdef __STDC_ISO_10646__
-    printf("unicode [ ");
-    for (int i = 0; i < nsyms; i++) {
-        uint32_t unicode = xkb_keysym_to_utf32(syms[i]);
-        printf("%lc ", (int) (unicode > 32 ? unicode : L' '));
-    }
-    printf("] ");
-#endif
+    xkb_state_key_get_utf8(state, keycode, s, sizeof(s));
+    printf("unicode [ %s ] ", s);
 
     layout = xkb_state_key_get_layout(state, keycode);
     printf("layout [ %s (%d) ] ",
diff --git a/test/state.c b/test/state.c
index 950b423..95852b2 100644
--- a/test/state.c
+++ b/test/state.c
@@ -428,6 +428,81 @@ test_caps_keysym_transformation(struct xkb_keymap *keymap)
     xkb_state_unref(state);
 }
 
+static void
+test_get_utf8_utf32(struct xkb_keymap *keymap)
+{
+    char buf[256];
+    struct xkb_state *state = xkb_state_new(keymap);
+    assert(state);
+
+#define TEST_KEY(key, expected_utf8, expected_utf32) do { \
+    assert(xkb_state_key_get_utf8(state, key + 8, NULL, 0) == strlen(expected_utf8)); \
+    assert(xkb_state_key_get_utf8(state, key + 8, buf, sizeof(buf)) == strlen(expected_utf8)); \
+    assert(memcmp(buf, expected_utf8, sizeof(expected_utf8)) == 0); \
+    assert(xkb_state_key_get_utf32(state, key + 8) == expected_utf32); \
+} while (0)
+
+    /* Simple ASCII. */
+    TEST_KEY(KEY_A, "a", 0x61);
+    TEST_KEY(KEY_ESC, "\x1B", 0x1B);
+    TEST_KEY(KEY_1, "1", 0x31);
+
+    /* Invalid. */
+    TEST_KEY(XKB_KEYCODE_INVALID - 8, "", 0);
+    TEST_KEY(300, "", 0);
+
+    /* No string. */
+    TEST_KEY(KEY_LEFTCTRL, "", 0);
+    TEST_KEY(KEY_NUMLOCK, "", 0);
+
+    /* Multiple keysyms. */
+    TEST_KEY(KEY_6, "HELLO", 0);
+    TEST_KEY(KEY_7, "YES THIS IS DOG", 0);
+
+    /* Check truncation. */
+    memset(buf, 'X', sizeof(buf));
+    assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 0) == strlen("HELLO"));
+    assert(memcmp(buf, "X", 1) == 0);
+    assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 1) == strlen("HELLO"));
+    assert(memcmp(buf, "", 1) == 0);
+    assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 2) == strlen("HELLO"));
+    assert(memcmp(buf, "H", 2) == 0);
+    assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 3) == strlen("HELLO"));
+    assert(memcmp(buf, "HE", 3) == 0);
+    assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 5) == strlen("HELLO"));
+    assert(memcmp(buf, "HELL", 5) == 0);
+    assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 6) == strlen("HELLO"));
+    assert(memcmp(buf, "HELLO", 6) == 0);
+    assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 7) == strlen("HELLO"));
+    assert(memcmp(buf, "HELLO\0X", 7) == 0);
+
+    /* Switch to ru layout */
+    xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_DOWN);
+    xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_UP);
+    assert(xkb_state_key_get_layout(state, KEY_A + 8) == 1);
+
+    /* Non ASCII. */
+    TEST_KEY(KEY_ESC, "\x1B", 0x1B);
+    TEST_KEY(KEY_A, "ф", 0x0444);
+    TEST_KEY(KEY_Z, "я", 0x044F);
+
+    /* Switch back to us layout */
+    xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_DOWN);
+    xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_UP);
+    assert(xkb_state_key_get_layout(state, KEY_A + 8) == 0);
+
+    xkb_state_update_key(state, KEY_LEFTSHIFT + EVDEV_OFFSET, XKB_KEY_DOWN);
+    TEST_KEY(KEY_A, "A", 0x41);
+    TEST_KEY(KEY_ESC, "\x1B", 0x1B);
+    TEST_KEY(KEY_1, "!", 0x21);
+    xkb_state_update_key(state, KEY_LEFTSHIFT + EVDEV_OFFSET, XKB_KEY_UP);
+
+    TEST_KEY(KEY_6, "HELLO", 0);
+    TEST_KEY(KEY_7, "YES THIS IS DOG", 0);
+
+    xkb_state_unref(state);
+}
+
 int
 main(void)
 {
@@ -449,6 +524,7 @@ main(void)
     test_repeat(keymap);
     test_consume(keymap);
     test_range(keymap);
+    test_get_utf8_utf32(keymap);
 
     xkb_keymap_unref(keymap);
     keymap = test_compile_rules(context, "evdev", NULL, "ch", "fr", NULL);
diff --git a/xkbcommon/xkbcommon.h b/xkbcommon/xkbcommon.h
index ab3be9f..36251db 100644
--- a/xkbcommon/xkbcommon.h
+++ b/xkbcommon/xkbcommon.h
@@ -433,6 +433,11 @@ xkb_keysym_from_name(const char *name, enum xkb_keysym_flags flags);
  * @returns The number of bytes written to the buffer (including the
  * terminating byte).  If the keysym does not have a Unicode
  * representation, returns 0.  If the buffer is too small, returns -1.
+ *
+ * Prefer not to use this function on keysyms obtained from an
+ * xkb_state.  In this case, use xkb_state_key_get_utf8() instead.
+ *
+ * @sa xkb_state_key_get_utf8()
  */
 int
 xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size);
@@ -443,6 +448,11 @@ xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size);
  * @returns The Unicode/UTF-32 representation of keysym, which is also
  * compatible with UCS-4.  If the keysym does not have a Unicode
  * representation, returns 0.
+ *
+ * Prefer not to use this function on keysyms obtained from an
+ * xkb_state.  In this case, use xkb_state_key_get_utf32() instead.
+ *
+ * @sa xkb_state_key_get_utf32()
  */
 uint32_t
 xkb_keysym_to_utf32(xkb_keysym_t keysym);
@@ -1269,6 +1279,44 @@ xkb_state_key_get_syms(struct xkb_state *state, xkb_keycode_t key,
                        const xkb_keysym_t **syms_out);
 
 /**
+ * Get the Unicode/UTF-8 string obtained from pressing a particular key
+ * in a given keyboard state.
+ *
+ * @param[in]  state  The keyboard state object.
+ * @param[in]  key    The keycode of the key.
+ * @param[out] buffer A buffer to write the string into.
+ * @param[in]  size   Size of the buffer.
+ *
+ * @warning If the buffer passed is too small, the string is truncated
+ * (though still NUL-terminated).
+ *
+ * @returns The number of bytes required for the string, excluding the
+ * NUL byte.  If there is nothing to write, returns 0.
+ *
+ * You may check if truncation has occurred by comparing the return value
+ * with the size of @p buffer, similarly to the snprintf(3) function.
+ * You may safely pass NULL and 0 to @p buffer and @p size to find the
+ * required size (without the NUL-byte).
+ *
+ * @memberof xkb_state
+ */
+int
+xkb_state_key_get_utf8(struct xkb_state *state, xkb_keycode_t key,
+                       char *buffer, size_t size);
+
+/**
+ * Get the Unicode/UTF-32 codepoint obtained from pressing a particular
+ * key in a a given keyboard state.
+ *
+ * @returns The UTF-32 representation for the key, if it consists of only
+ * a single codepoint.  Otherwise, returns 0.
+ *
+ * @memberof xkb_state
+ */
+uint32_t
+xkb_state_key_get_utf32(struct xkb_state *state, xkb_keycode_t key);
+
+/**
  * Get the single keysym obtained from pressing a particular key in a
  * given keyboard state.
  *