xkbcomp: early detection of invalid encoding
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
diff --git a/src/xkbcomp/scanner.c b/src/xkbcomp/scanner.c
index 8bff375..7db9a7f 100644
--- a/src/xkbcomp/scanner.c
+++ b/src/xkbcomp/scanner.c
@@ -200,9 +200,18 @@ XkbParseString(struct xkb_context *ctx, const char *string, size_t len,
struct scanner scanner;
scanner_init(&scanner, ctx, string, len, file_name, NULL);
- /* Skip UTF-8 encoded BOM (U+FEFF) */
- /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
- scanner_str(&scanner, "\xef\xbb\xbf", 3);
+ /* Basic detection of wrong character encoding.
+ The first character relevant to the grammar must be ASCII:
+ whitespace, section, comment */
+ if (!scanner_check_supported_char_encoding(&scanner)) {
+ scanner_err(&scanner,
+ "This could be a file encoding issue. "
+ "Supported encodings must be backward compatible with ASCII.");
+ scanner_err(&scanner,
+ "E.g. ISO/CEI 8859 and UTF-8 are supported "
+ "but UTF-16, UTF-32 and CP1026 are not.");
+ return NULL;
+ }
return parse(ctx, &scanner, map);
}
diff --git a/test/buffercomp.c b/test/buffercomp.c
index b9b5e9d..091a876 100644
--- a/test/buffercomp.c
+++ b/test/buffercomp.c
@@ -31,6 +31,59 @@
#define DATA_PATH "keymaps/stringcomp.data"
+static bool
+test_encodings(struct xkb_context *ctx)
+{
+ struct xkb_keymap *keymap;
+
+ /* Accept UTF-8 encoded BOM (U+FEFF) */
+ const char utf8_with_bom[] =
+ "\xef\xbb\xbfxkb_keymap {"
+ " xkb_keycodes { include \"evdev\" };"
+ " xkb_types { include \"complete\" };"
+ " xkb_compat { include \"complete\" };"
+ " xkb_symbols { include \"pc\" };"
+ "};";
+ keymap = test_compile_buffer(ctx, utf8_with_bom, sizeof(utf8_with_bom));
+ assert(keymap);
+ xkb_keymap_unref(keymap);
+
+ /* Reject UTF-16LE encoded string */
+ const char utf16_le[] =
+ "x\0k\0b\0_\0k\0e\0y\0m\0a\0p\0 \0{\0\n\0"
+ " \0 \0x\0k\0b\0_\0k\0e\0y\0c\0o\0d\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0e\0v\0d\0e\0v\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0t\0y\0p\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0c\0o\0m\0p\0a\0t\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0s\0y\0m\0b\0o\0l\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0p\0c\0\"\0 \0}\0;\0\n\0"
+ "}\0;\0";
+ keymap = test_compile_buffer(ctx, utf16_le, sizeof(utf16_le));
+ assert(!keymap);
+
+ /* Reject UTF-16LE with BOM encoded string */
+ const char utf16_le_with_bom[] =
+ "\xff\xfex\0k\0b\0_\0k\0e\0y\0m\0a\0p\0 \0{\0\n\0"
+ " \0 \0x\0k\0b\0_\0k\0e\0y\0c\0o\0d\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0e\0v\0d\0e\0v\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0t\0y\0p\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0c\0o\0m\0p\0a\0t\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0s\0y\0m\0b\0o\0l\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0p\0c\0\"\0 \0}\0;\0\n\0"
+ "}\0;\0";
+ keymap = test_compile_buffer(ctx, utf16_le_with_bom, sizeof(utf16_le_with_bom));
+ assert(!keymap);
+
+ /* Reject UTF-16BE encoded string */
+ const char utf16_be[] =
+ "\0x\0k\0b\0_\0k\0e\0y\0m\0a\0p\0 \0{\0\n\0"
+ " \0 \0x\0k\0b\0_\0k\0e\0y\0c\0o\0d\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0e\0v\0d\0e\0v\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0t\0y\0p\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0c\0o\0m\0p\0a\0t\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0"
+ " \0 \0x\0k\0b\0_\0s\0y\0m\0b\0o\0l\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0p\0c\0\"\0 \0}\0;\0\n\0"
+ "}\0;";
+ keymap = test_compile_buffer(ctx, utf16_be, sizeof(utf16_be));
+ assert(!keymap);
+
+ return true;
+}
+
int
main(int argc, char *argv[])
{
@@ -78,17 +131,7 @@ main(int argc, char *argv[])
keymap = test_compile_buffer(ctx, "", 0);
assert(!keymap);
- /* Accept UTF-8 encoded BOM (U+FEFF) */
- const char *bom =
- "\xef\xbb\xbfxkb_keymap {"
- " xkb_keycodes { include \"evdev\" };"
- " xkb_types { include \"complete\" };"
- " xkb_compat { include \"complete\" };"
- " xkb_symbols { include \"pc\" };"
- "};";
- keymap = test_compile_buffer(ctx, bom, strlen(bom));
- assert(keymap);
- xkb_keymap_unref(keymap);
+ assert(test_encodings(ctx));
/* Make sure we can recompile our output for a normal keymap from rules. */
keymap = test_compile_rules(ctx, NULL, NULL,