xkbcomp: skip heading UTF-8 encoded BOM (U+FEFF) Leading BOM is legal and is used as a signature — an indication that an otherwise unmarked text file is in UTF-8. See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
diff --git a/src/xkbcomp/scanner.c b/src/xkbcomp/scanner.c
index 57babbb..8bff375 100644
--- a/src/xkbcomp/scanner.c
+++ b/src/xkbcomp/scanner.c
@@ -199,6 +199,11 @@ XkbParseString(struct xkb_context *ctx, const char *string, size_t len,
{
struct scanner scanner;
scanner_init(&scanner, ctx, string, len, file_name, NULL);
+
+ /* Skip UTF-8 encoded BOM (U+FEFF) */
+ /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
+ scanner_str(&scanner, "\xef\xbb\xbf", 3);
+
return parse(ctx, &scanner, map);
}
diff --git a/test/buffercomp.c b/test/buffercomp.c
index 9a76036..b9b5e9d 100644
--- a/test/buffercomp.c
+++ b/test/buffercomp.c
@@ -78,6 +78,18 @@ main(int argc, char *argv[])
keymap = test_compile_buffer(ctx, "", 0);
assert(!keymap);
+ /* Accept UTF-8 encoded BOM (U+FEFF) */
+ const char *bom =
+ "\xef\xbb\xbfxkb_keymap {"
+ " xkb_keycodes { include \"evdev\" };"
+ " xkb_types { include \"complete\" };"
+ " xkb_compat { include \"complete\" };"
+ " xkb_symbols { include \"pc\" };"
+ "};";
+ keymap = test_compile_buffer(ctx, bom, strlen(bom));
+ assert(keymap);
+ xkb_keymap_unref(keymap);
+
/* Make sure we can recompile our output for a normal keymap from rules. */
keymap = test_compile_rules(ctx, NULL, NULL,
"ru,ca,de,us", ",multix,neo,intl", NULL);