Compose: skip heading UTF-8 encoded BOM (U+FEFF) Leading BOM is legal and is used as a signature — an indication that an otherwise unmarked text file is in UTF-8. See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
diff --git a/src/compose/parser.c b/src/compose/parser.c
index 5545a33..6740f21 100644
--- a/src/compose/parser.c
+++ b/src/compose/parser.c
@@ -534,6 +534,10 @@ initial:
production.mods = 0;
production.modmask = 0;
+ /* Skip UTF-8 encoded BOM (U+FEFF) */
+ /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
+ scanner_str(s, "\xef\xbb\xbf", 3);
+
/* fallthrough */
initial_eol:
diff --git a/test/compose.c b/test/compose.c
index 8c633d7..d7192f6 100644
--- a/test/compose.c
+++ b/test/compose.c
@@ -173,6 +173,16 @@ test_compose_seq_buffer(struct xkb_context *ctx, const char *buffer, ...)
}
static void
+test_compose_utf8_bom(struct xkb_context *ctx)
+{
+ const char *buffer = "\xef\xbb\xbf<A> : X";
+ assert(test_compose_seq_buffer(ctx, buffer,
+ XKB_KEY_A, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "X", XKB_KEY_X,
+ XKB_KEY_NoSymbol));
+}
+
+
+static void
test_seqs(struct xkb_context *ctx)
{
struct xkb_compose_table *table;
@@ -723,6 +733,7 @@ main(int argc, char *argv[])
unsetenv("XLOCALEDIR");
#endif
+ test_compose_utf8_bom(ctx);
test_seqs(ctx);
test_conflicting(ctx);
test_XCOMPOSEFILE(ctx);