state: add xkb_state_key_get_{utf8,utf32}() API functions These functions generally have the same effect as xkb_state_key_get_syms() + xkb_keysym_to_utf{8,32}(). So why add them? - They provide a slightly nicer interface, especially if the string is the only interest. - It makes the handling of multiple-keysyms-to-utf8 transparent. For the designated use-case of multiple-keysyms (unicode combining characters), this is a must. We also validate the UTF-8, which the user might not otherwise do. - We will need to apply some transformation on the resulting string which depend on the xkb_state. This is not possible with the xkb_keysym_* functions. With these functions, the existing xkb_keysym_to_utf{8,32}() are not expected to be used by a typical user; they are "raw" functions. Signed-off-by: Ran Benita <ran234@gmail.com>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
diff --git a/src/state.c b/src/state.c
index f409a3e..ebd0ca6 100644
--- a/src/state.c
+++ b/src/state.c
@@ -61,6 +61,7 @@
#include "keymap.h"
#include "keysym.h"
+#include "utf8.h"
struct xkb_filter {
union xkb_action action;
@@ -870,6 +871,70 @@ xkb_state_key_get_one_sym(struct xkb_state *state, xkb_keycode_t kc)
return sym;
}
+XKB_EXPORT int
+xkb_state_key_get_utf8(struct xkb_state *state, xkb_keycode_t kc,
+ char *buffer, size_t size)
+{
+ xkb_keysym_t sym;
+ const xkb_keysym_t *syms;
+ int nsyms;
+ int offset;
+ char tmp[7];
+
+ /* Make sure the keysym transformations are applied. */
+ sym = xkb_state_key_get_one_sym(state, kc);
+ if (sym != XKB_KEY_NoSymbol) {
+ nsyms = 1; syms = &sym;
+ }
+ else {
+ nsyms = xkb_state_key_get_syms(state, kc, &syms);
+ }
+
+ /* Make sure not to truncate in the middle of a UTF-8 sequence. */
+ offset = 0;
+ for (int i = 0; i < nsyms; i++) {
+ int ret = xkb_keysym_to_utf8(syms[i], tmp, sizeof(tmp));
+ if (ret <= 0)
+ goto err_bad;
+
+ ret--;
+ if ((size_t) (offset + ret) <= size)
+ memcpy(buffer + offset, tmp, ret);
+ offset += ret;
+ }
+
+ if ((size_t) offset >= size)
+ goto err_trunc;
+ buffer[offset] = '\0';
+
+ if (!is_valid_utf8(buffer, offset))
+ goto err_bad;
+
+ return offset;
+
+err_trunc:
+ if (size > 0)
+ buffer[size - 1] = '\0';
+ return offset;
+
+err_bad:
+ if (size > 0)
+ buffer[0] = '\0';
+ return 0;
+}
+
+XKB_EXPORT uint32_t
+xkb_state_key_get_utf32(struct xkb_state *state, xkb_keycode_t kc)
+{
+ xkb_keysym_t sym;
+ uint32_t cp;
+
+ sym = xkb_state_key_get_one_sym(state, kc);
+ cp = xkb_keysym_to_utf32(sym);
+
+ return cp;
+}
+
/**
* Serialises the requested modifier state into an xkb_mod_mask_t, with all
* the same disclaimers as in xkb_state_update_mask.
diff --git a/test/common.c b/test/common.c
index fd013ca..8b3f954 100644
--- a/test/common.c
+++ b/test/common.c
@@ -371,18 +371,8 @@ test_print_keycode_state(struct xkb_state *state, xkb_keycode_t keycode)
printf("] ");
}
- /*
- * Only do this if wchar_t is UCS-4, so we can be lazy and print
- * with %lc.
- */
-#ifdef __STDC_ISO_10646__
- printf("unicode [ ");
- for (int i = 0; i < nsyms; i++) {
- uint32_t unicode = xkb_keysym_to_utf32(syms[i]);
- printf("%lc ", (int) (unicode > 32 ? unicode : L' '));
- }
- printf("] ");
-#endif
+ xkb_state_key_get_utf8(state, keycode, s, sizeof(s));
+ printf("unicode [ %s ] ", s);
layout = xkb_state_key_get_layout(state, keycode);
printf("layout [ %s (%d) ] ",
diff --git a/test/state.c b/test/state.c
index 950b423..95852b2 100644
--- a/test/state.c
+++ b/test/state.c
@@ -428,6 +428,81 @@ test_caps_keysym_transformation(struct xkb_keymap *keymap)
xkb_state_unref(state);
}
+static void
+test_get_utf8_utf32(struct xkb_keymap *keymap)
+{
+ char buf[256];
+ struct xkb_state *state = xkb_state_new(keymap);
+ assert(state);
+
+#define TEST_KEY(key, expected_utf8, expected_utf32) do { \
+ assert(xkb_state_key_get_utf8(state, key + 8, NULL, 0) == strlen(expected_utf8)); \
+ assert(xkb_state_key_get_utf8(state, key + 8, buf, sizeof(buf)) == strlen(expected_utf8)); \
+ assert(memcmp(buf, expected_utf8, sizeof(expected_utf8)) == 0); \
+ assert(xkb_state_key_get_utf32(state, key + 8) == expected_utf32); \
+} while (0)
+
+ /* Simple ASCII. */
+ TEST_KEY(KEY_A, "a", 0x61);
+ TEST_KEY(KEY_ESC, "\x1B", 0x1B);
+ TEST_KEY(KEY_1, "1", 0x31);
+
+ /* Invalid. */
+ TEST_KEY(XKB_KEYCODE_INVALID - 8, "", 0);
+ TEST_KEY(300, "", 0);
+
+ /* No string. */
+ TEST_KEY(KEY_LEFTCTRL, "", 0);
+ TEST_KEY(KEY_NUMLOCK, "", 0);
+
+ /* Multiple keysyms. */
+ TEST_KEY(KEY_6, "HELLO", 0);
+ TEST_KEY(KEY_7, "YES THIS IS DOG", 0);
+
+ /* Check truncation. */
+ memset(buf, 'X', sizeof(buf));
+ assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 0) == strlen("HELLO"));
+ assert(memcmp(buf, "X", 1) == 0);
+ assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 1) == strlen("HELLO"));
+ assert(memcmp(buf, "", 1) == 0);
+ assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 2) == strlen("HELLO"));
+ assert(memcmp(buf, "H", 2) == 0);
+ assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 3) == strlen("HELLO"));
+ assert(memcmp(buf, "HE", 3) == 0);
+ assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 5) == strlen("HELLO"));
+ assert(memcmp(buf, "HELL", 5) == 0);
+ assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 6) == strlen("HELLO"));
+ assert(memcmp(buf, "HELLO", 6) == 0);
+ assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 7) == strlen("HELLO"));
+ assert(memcmp(buf, "HELLO\0X", 7) == 0);
+
+ /* Switch to ru layout */
+ xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_DOWN);
+ xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_UP);
+ assert(xkb_state_key_get_layout(state, KEY_A + 8) == 1);
+
+ /* Non ASCII. */
+ TEST_KEY(KEY_ESC, "\x1B", 0x1B);
+ TEST_KEY(KEY_A, "ф", 0x0444);
+ TEST_KEY(KEY_Z, "я", 0x044F);
+
+ /* Switch back to us layout */
+ xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_DOWN);
+ xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_UP);
+ assert(xkb_state_key_get_layout(state, KEY_A + 8) == 0);
+
+ xkb_state_update_key(state, KEY_LEFTSHIFT + EVDEV_OFFSET, XKB_KEY_DOWN);
+ TEST_KEY(KEY_A, "A", 0x41);
+ TEST_KEY(KEY_ESC, "\x1B", 0x1B);
+ TEST_KEY(KEY_1, "!", 0x21);
+ xkb_state_update_key(state, KEY_LEFTSHIFT + EVDEV_OFFSET, XKB_KEY_UP);
+
+ TEST_KEY(KEY_6, "HELLO", 0);
+ TEST_KEY(KEY_7, "YES THIS IS DOG", 0);
+
+ xkb_state_unref(state);
+}
+
int
main(void)
{
@@ -449,6 +524,7 @@ main(void)
test_repeat(keymap);
test_consume(keymap);
test_range(keymap);
+ test_get_utf8_utf32(keymap);
xkb_keymap_unref(keymap);
keymap = test_compile_rules(context, "evdev", NULL, "ch", "fr", NULL);
diff --git a/xkbcommon/xkbcommon.h b/xkbcommon/xkbcommon.h
index ab3be9f..36251db 100644
--- a/xkbcommon/xkbcommon.h
+++ b/xkbcommon/xkbcommon.h
@@ -433,6 +433,11 @@ xkb_keysym_from_name(const char *name, enum xkb_keysym_flags flags);
* @returns The number of bytes written to the buffer (including the
* terminating byte). If the keysym does not have a Unicode
* representation, returns 0. If the buffer is too small, returns -1.
+ *
+ * Prefer not to use this function on keysyms obtained from an
+ * xkb_state. In this case, use xkb_state_key_get_utf8() instead.
+ *
+ * @sa xkb_state_key_get_utf8()
*/
int
xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size);
@@ -443,6 +448,11 @@ xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size);
* @returns The Unicode/UTF-32 representation of keysym, which is also
* compatible with UCS-4. If the keysym does not have a Unicode
* representation, returns 0.
+ *
+ * Prefer not to use this function on keysyms obtained from an
+ * xkb_state. In this case, use xkb_state_key_get_utf32() instead.
+ *
+ * @sa xkb_state_key_get_utf32()
*/
uint32_t
xkb_keysym_to_utf32(xkb_keysym_t keysym);
@@ -1269,6 +1279,44 @@ xkb_state_key_get_syms(struct xkb_state *state, xkb_keycode_t key,
const xkb_keysym_t **syms_out);
/**
+ * Get the Unicode/UTF-8 string obtained from pressing a particular key
+ * in a given keyboard state.
+ *
+ * @param[in] state The keyboard state object.
+ * @param[in] key The keycode of the key.
+ * @param[out] buffer A buffer to write the string into.
+ * @param[in] size Size of the buffer.
+ *
+ * @warning If the buffer passed is too small, the string is truncated
+ * (though still NUL-terminated).
+ *
+ * @returns The number of bytes required for the string, excluding the
+ * NUL byte. If there is nothing to write, returns 0.
+ *
+ * You may check if truncation has occurred by comparing the return value
+ * with the size of @p buffer, similarly to the snprintf(3) function.
+ * You may safely pass NULL and 0 to @p buffer and @p size to find the
+ * required size (without the NUL-byte).
+ *
+ * @memberof xkb_state
+ */
+int
+xkb_state_key_get_utf8(struct xkb_state *state, xkb_keycode_t key,
+ char *buffer, size_t size);
+
+/**
+ * Get the Unicode/UTF-32 codepoint obtained from pressing a particular
+ * key in a a given keyboard state.
+ *
+ * @returns The UTF-32 representation for the key, if it consists of only
+ * a single codepoint. Otherwise, returns 0.
+ *
+ * @memberof xkb_state
+ */
+uint32_t
+xkb_state_key_get_utf32(struct xkb_state *state, xkb_keycode_t key);
+
+/**
* Get the single keysym obtained from pressing a particular key in a
* given keyboard state.
*