Commit f937c30820766e22c2ba5ad905eaa8cb4878294c

Pierre Le Marre 2023-10-29T07:31:34

xkbcomp: skip heading UTF-8 encoded BOM (U+FEFF) Leading BOM is legal and is used as a signature — an indication that an otherwise unmarked text file is in UTF-8. See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.

diff --git a/src/xkbcomp/scanner.c b/src/xkbcomp/scanner.c
index 57babbb..8bff375 100644
--- a/src/xkbcomp/scanner.c
+++ b/src/xkbcomp/scanner.c
@@ -199,6 +199,11 @@ XkbParseString(struct xkb_context *ctx, const char *string, size_t len,
 {
     struct scanner scanner;
     scanner_init(&scanner, ctx, string, len, file_name, NULL);
+
+    /* Skip UTF-8 encoded BOM (U+FEFF) */
+    /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
+    scanner_str(&scanner, "\xef\xbb\xbf", 3);
+
     return parse(ctx, &scanner, map);
 }
 
diff --git a/test/buffercomp.c b/test/buffercomp.c
index 9a76036..b9b5e9d 100644
--- a/test/buffercomp.c
+++ b/test/buffercomp.c
@@ -78,6 +78,18 @@ main(int argc, char *argv[])
     keymap = test_compile_buffer(ctx, "", 0);
     assert(!keymap);
 
+    /* Accept UTF-8 encoded BOM (U+FEFF) */
+    const char *bom =
+        "\xef\xbb\xbfxkb_keymap {"
+        "  xkb_keycodes { include \"evdev\" };"
+        "  xkb_types { include \"complete\" };"
+        "  xkb_compat { include \"complete\" };"
+        "  xkb_symbols { include \"pc\" };"
+        "};";
+    keymap = test_compile_buffer(ctx, bom, strlen(bom));
+    assert(keymap);
+    xkb_keymap_unref(keymap);
+
     /* Make sure we can recompile our output for a normal keymap from rules. */
     keymap = test_compile_rules(ctx, NULL, NULL,
                                 "ru,ca,de,us", ",multix,neo,intl", NULL);