rules: early detection of invalid encoding
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
diff --git a/src/xkbcomp/rules.c b/src/xkbcomp/rules.c
index daa4f3d..918fe31 100644
--- a/src/xkbcomp/rules.c
+++ b/src/xkbcomp/rules.c
@@ -1084,29 +1084,36 @@ read_rules_file(struct xkb_context *ctx,
FILE *file,
const char *path)
{
- bool ret = false;
+ bool ret;
char *string;
size_t size;
struct scanner scanner;
- ret = map_file(file, &string, &size);
- if (!ret) {
+ if (!map_file(file, &string, &size)) {
log_err(ctx, XKB_LOG_MESSAGE_NO_ID,
"Couldn't read rules file \"%s\": %s\n",
path, strerror(errno));
- goto out;
+ return false;
}
scanner_init(&scanner, matcher->ctx, string, size, path, NULL);
- /* Skip UTF-8 encoded BOM (U+FEFF) */
- /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
- scanner_str(&scanner, "\xef\xbb\xbf", 3);
+ /* Basic detection of wrong character encoding.
+ The first character relevant to the grammar must be ASCII:
+ whitespace, !, / (for comment) */
+ if (!scanner_check_supported_char_encoding(&scanner)) {
+ scanner_err(&scanner,
+ "This could be a file encoding issue. "
+ "Supported encodings must be backward compatible with ASCII.");
+ scanner_err(&scanner,
+ "E.g. ISO/CEI 8859 and UTF-8 are supported "
+ "but UTF-16, UTF-32 and CP1026 are not.");
+ unmap_file(string, size);
+ return false;
+ }
ret = matcher_match(matcher, &scanner, include_depth, string, size, path);
-
unmap_file(string, size);
-out:
return ret;
}
diff --git a/test/data/rules/utf-16be_with_bom b/test/data/rules/utf-16be_with_bom
new file mode 100644
index 0000000..ea44bab
Binary files /dev/null and b/test/data/rules/utf-16be_with_bom differ
diff --git a/test/data/rules/utf-16le_with_bom b/test/data/rules/utf-16le_with_bom
new file mode 100644
index 0000000..9faf37e
Binary files /dev/null and b/test/data/rules/utf-16le_with_bom differ
diff --git a/test/data/rules/utf-32be b/test/data/rules/utf-32be
new file mode 100644
index 0000000..588e32e
Binary files /dev/null and b/test/data/rules/utf-32be differ
diff --git a/test/rules-file.c b/test/rules-file.c
index 302aa68..726ec89 100644
--- a/test/rules-file.c
+++ b/test/rules-file.c
@@ -106,6 +106,42 @@ main(int argc, char *argv[])
};
assert(test_rules(ctx, &test_utf_8_with_bom));
+ struct test_data test_utf_16le_with_bom = {
+ .rules = "utf-16le_with_bom",
+
+ .model = "my_model", .layout = "my_layout", .variant = "my_variant",
+ .options = "my_option",
+
+ .keycodes = "my_keycodes", .types = "my_types",
+ .compat = "my_compat|some:compat",
+ .symbols = "my_symbols+extra_variant",
+ };
+ assert(!test_rules(ctx, &test_utf_16le_with_bom));
+
+ struct test_data test_utf_16be_with_bom = {
+ .rules = "utf-16be_with_bom",
+
+ .model = "my_model", .layout = "my_layout", .variant = "my_variant",
+ .options = "my_option",
+
+ .keycodes = "my_keycodes", .types = "my_types",
+ .compat = "my_compat|some:compat",
+ .symbols = "my_symbols+extra_variant",
+ };
+ assert(!test_rules(ctx, &test_utf_16be_with_bom));
+
+ struct test_data test_utf_32be = {
+ .rules = "utf-32be",
+
+ .model = "my_model", .layout = "my_layout", .variant = "my_variant",
+ .options = "my_option",
+
+ .keycodes = "my_keycodes", .types = "my_types",
+ .compat = "my_compat|some:compat",
+ .symbols = "my_symbols+extra_variant",
+ };
+ assert(!test_rules(ctx, &test_utf_32be));
+
struct test_data test1 = {
.rules = "simple",