Commit 59886e4183f07a90b485d88603cc34a398fa618f

Pierre Le Marre 2023-10-29T07:20:29

Compose: skip heading UTF-8 encoded BOM (U+FEFF) Leading BOM is legal and is used as a signature — an indication that an otherwise unmarked text file is in UTF-8. See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.

diff --git a/src/compose/parser.c b/src/compose/parser.c
index 5545a33..6740f21 100644
--- a/src/compose/parser.c
+++ b/src/compose/parser.c
@@ -534,6 +534,10 @@ initial:
     production.mods = 0;
     production.modmask = 0;
 
+    /* Skip UTF-8 encoded BOM (U+FEFF) */
+    /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
+    scanner_str(s, "\xef\xbb\xbf", 3);
+
     /* fallthrough */
 
 initial_eol:
diff --git a/test/compose.c b/test/compose.c
index 8c633d7..d7192f6 100644
--- a/test/compose.c
+++ b/test/compose.c
@@ -173,6 +173,16 @@ test_compose_seq_buffer(struct xkb_context *ctx, const char *buffer, ...)
 }
 
 static void
+test_compose_utf8_bom(struct xkb_context *ctx)
+{
+    const char *buffer = "\xef\xbb\xbf<A> : X";
+    assert(test_compose_seq_buffer(ctx, buffer,
+        XKB_KEY_A, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "X", XKB_KEY_X,
+        XKB_KEY_NoSymbol));
+}
+
+
+static void
 test_seqs(struct xkb_context *ctx)
 {
     struct xkb_compose_table *table;
@@ -723,6 +733,7 @@ main(int argc, char *argv[])
     unsetenv("XLOCALEDIR");
 #endif
 
+    test_compose_utf8_bom(ctx);
     test_seqs(ctx);
     test_conflicting(ctx);
     test_XCOMPOSEFILE(ctx);