rules: skip heading UTF-8 encoded BOM (U+FEFF) Leading BOM is legal and is used as a signature — an indication that an otherwise unmarked text file is in UTF-8. See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
diff --git a/src/xkbcomp/rules.c b/src/xkbcomp/rules.c
index f5d9c49..daa4f3d 100644
--- a/src/xkbcomp/rules.c
+++ b/src/xkbcomp/rules.c
@@ -1099,6 +1099,10 @@ read_rules_file(struct xkb_context *ctx,
scanner_init(&scanner, matcher->ctx, string, size, path, NULL);
+ /* Skip UTF-8 encoded BOM (U+FEFF) */
+ /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
+ scanner_str(&scanner, "\xef\xbb\xbf", 3);
+
ret = matcher_match(matcher, &scanner, include_depth, string, size, path);
unmap_file(string, size);
diff --git a/test/data/rules/utf-8_with_bom b/test/data/rules/utf-8_with_bom
new file mode 100644
index 0000000..a3c3a73
--- /dev/null
+++ b/test/data/rules/utf-8_with_bom
@@ -0,0 +1,22 @@
+// NOTE: this file is encoded in UTF-8 with a leading BOM (U+FEFF)
+! model = keycodes
+ my_model = my_keycodes
+ * = default_keycodes
+
+! layout variant = symbols
+ my_layout my_variant = my_symbols+extra_variant
+
+! layout = symbols
+ my_layout = my_symbols
+ * = default_symbols
+
+! model = types
+ my_model = my_types
+ * = default_types
+
+! model = compat
+ my_model = my_compat
+ * = default_compat
+
+! option = compat
+ my_option = |some:compat
diff --git a/test/rules-file.c b/test/rules-file.c
index d217ba9..302aa68 100644
--- a/test/rules-file.c
+++ b/test/rules-file.c
@@ -94,6 +94,18 @@ main(int argc, char *argv[])
ctx = test_get_context(0);
assert(ctx);
+ struct test_data test_utf_8_with_bom = {
+ .rules = "utf-8_with_bom",
+
+ .model = "my_model", .layout = "my_layout", .variant = "my_variant",
+ .options = "my_option",
+
+ .keycodes = "my_keycodes", .types = "my_types",
+ .compat = "my_compat|some:compat",
+ .symbols = "my_symbols+extra_variant",
+ };
+ assert(test_rules(ctx, &test_utf_8_with_bom));
+
struct test_data test1 = {
.rules = "simple",