Commit ca7aa69cc0fb102cfa41edf4f9b5f1c68b2c49e5

Pierre Le Marre 2023-09-26T17:05:05

Disallow producing NULL character with escape sequences NULL usually terminates the strings; allowing to produce it via escape sequences may lead to undefined behaviour. - Make NULL escape sequences (e.g. `\0` and `\x0`) invalid. - Add corresponding test. - Introduce the new message: XKB_WARNING_INVALID_ESCAPE_SEQUENCE.

diff --git a/doc/message-registry.md b/doc/message-registry.md
index a3be5c1..24b2ffc 100644
--- a/doc/message-registry.md
+++ b/doc/message-registry.md
@@ -6,7 +6,7 @@ NOTE: This file has been generated automatically by “update-message-registry.p
 -->
 
 This page lists the warnings and errors generated by xkbcommon.
-There are currently 52 entries.
+There are currently 53 entries.
 
 @todo The documentation of the log messages is a work in progress.
 
@@ -24,6 +24,7 @@ There are currently 52 entries.
 | [XKB-150] | `wrong-statement-type` | The type of the statement is not allowed in the context | Error |
 | [XKB-172] | `unsupported-geometry-section` | Geometry sections are not supported | Warning |
 | [XKB-183] | `cannot-infer-key-type` | Warn if no key type can be inferred | Warning |
+| [XKB-193] | `invalid-escape-sequence` | Invalid escape sequence in a string | Warning |
 | [XKB-195] | `illegal-key-type-preserve-result` | The result of a key type “preserve” entry must be a subset of its input modifiers. | Warning |
 | [XKB-203] | `invalid-include-statement` | Syntax error in the include statement | Error |
 | [XKB-206] | `invalid-modmap-entry` | A modmap entry is invalid | Error |
@@ -198,6 +199,14 @@ key <AB08> {[ comma, semicolon, periodcentered, multiply ]};
   <dt>Summary</dt><dd>Warn if no key type can be inferred</dd>
 </dl>
 
+### XKB-193 – Invalid escape sequence {#XKB-193}
+
+<dl>
+  <dt>Since</dt><dd>1.0.0</dd>
+  <dt>Type</dt><dd>Warning</dd>
+  <dt>Summary</dt><dd>Invalid escape sequence in a string</dd>
+</dl>
+
 ### XKB-195 – Illegal key type preserve result {#XKB-195}
 
 <dl>
@@ -624,6 +633,7 @@ The modifiers used in `map` or `preserve` entries should be declared using the e
 [XKB-150]: @ref XKB-150
 [XKB-172]: @ref XKB-172
 [XKB-183]: @ref XKB-183
+[XKB-193]: @ref XKB-193
 [XKB-195]: @ref XKB-195
 [XKB-203]: @ref XKB-203
 [XKB-206]: @ref XKB-206
diff --git a/doc/message-registry.yaml b/doc/message-registry.yaml
index 880c2f6..9f4c450 100644
--- a/doc/message-registry.yaml
+++ b/doc/message-registry.yaml
@@ -100,6 +100,11 @@
   added: ALWAYS
   type: warning
   description: "Warn if no key type can be inferred"
+- id: "invalid-escape-sequence"
+  code: 193
+  added: ALWAYS
+  type: warning
+  description: "Invalid escape sequence in a string"
 - id: "illegal-key-type-preserve-result"
   code: 195
   added: ALWAYS
diff --git a/src/compose/parser.c b/src/compose/parser.c
index e1b81de..57be1bd 100644
--- a/src/compose/parser.c
+++ b/src/compose/parser.c
@@ -178,13 +178,24 @@ skip_more_whitespace_and_comments:
                     scanner_buf_append(s, '"');
                 }
                 else if (scanner_chr(s, 'x') || scanner_chr(s, 'X')) {
-                    if (scanner_hex(s, &o))
+                    if (scanner_hex(s, &o) && is_valid_char((char) o)) {
                         scanner_buf_append(s, (char) o);
-                    else
-                        scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
+                    } else {
+                        // [TODO] actually show the sequence
+                        scanner_warn_with_code(s,
+                            XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
+                            "illegal hexadecimal escape sequence in string literal");
+                    }
                 }
                 else if (scanner_oct(s, &o)) {
-                    scanner_buf_append(s, (char) o);
+                    if (is_valid_char((char) o)) {
+                        scanner_buf_append(s, (char) o);
+                    } else {
+                        // [TODO] actually show the sequence
+                        scanner_warn_with_code(s,
+                            XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
+                            "illegal octal escape sequence in string literal");
+                    }
                 }
                 else {
                     scanner_warn(s, "unknown escape sequence (%c) in string literal", scanner_peek(s));
diff --git a/src/messages-codes.h b/src/messages-codes.h
index 5d8c84c..af7bb93 100644
--- a/src/messages-codes.h
+++ b/src/messages-codes.h
@@ -36,6 +36,8 @@ enum xkb_message_code {
     XKB_WARNING_UNSUPPORTED_GEOMETRY_SECTION = 172,
     /** Warn if no key type can be inferred */
     XKB_WARNING_CANNOT_INFER_KEY_TYPE = 183,
+    /** Invalid escape sequence in a string */
+    XKB_WARNING_INVALID_ESCAPE_SEQUENCE = 193,
     /** The result of a key type “preserve” entry must be a subset of its input modifiers. */
     XKB_WARNING_ILLEGAL_KEY_TYPE_PRESERVE_RESULT = 195,
     /** Syntax error in the include statement */
diff --git a/src/utils.h b/src/utils.h
index fdf758b..aa7969c 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -65,6 +65,15 @@
 #define STRINGIFY(x) #x
 #define STRINGIFY2(x) STRINGIFY(x)
 
+/* Check if a character is valid in a string literal */
+static inline bool
+is_valid_char(char c)
+{
+    /* Currently we only check for NULL character, but this could be extended
+     * in the future to further ASCII control characters. */
+    return c != 0;
+}
+
 char
 to_lower(char c);
 
diff --git a/src/xkbcomp/scanner.c b/src/xkbcomp/scanner.c
index e9d503c..800d2d0 100644
--- a/src/xkbcomp/scanner.c
+++ b/src/xkbcomp/scanner.c
@@ -98,7 +98,15 @@ skip_more_whitespace_and_comments:
                 else if (scanner_chr(s, 'f'))  scanner_buf_append(s, '\f');
                 else if (scanner_chr(s, 'v'))  scanner_buf_append(s, '\v');
                 else if (scanner_chr(s, 'e'))  scanner_buf_append(s, '\033');
-                else if (scanner_oct(s, &o))   scanner_buf_append(s, (char) o);
+                else if (scanner_oct(s, &o)) {
+                    if (is_valid_char((char) o)) {
+                        scanner_buf_append(s, (char) o);
+                    } else {
+                        scanner_warn_with_code(s,
+                            XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
+                            "invalid octal escape sequence: \\%o", o);
+                    }
+                }
                 else {
                     // TODO: display actual sequence! See: scanner_peek(s).
                     //       require escaping any potential control character
diff --git a/test/compose.c b/test/compose.c
index 5e7cba0..3d45805 100644
--- a/test/compose.c
+++ b/test/compose.c
@@ -684,6 +684,17 @@ test_traverse(struct xkb_context *ctx)
     xkb_compose_table_unref(table);
 }
 
+static void
+test_escape_sequences(struct xkb_context *ctx)
+{
+    const char *table_string = "<o> <e> : \"f\\x0o\\0o\" X\n";
+
+    assert(test_compose_seq_buffer(ctx, table_string,
+        XKB_KEY_o, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSING,  "",     XKB_KEY_NoSymbol,
+        XKB_KEY_e, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED,   "foo",  XKB_KEY_X,
+        XKB_KEY_NoSymbol));
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -717,6 +728,7 @@ main(int argc, char *argv[])
     test_include(ctx);
     test_override(ctx);
     test_traverse(ctx);
+    test_escape_sequences(ctx);
 
     xkb_context_unref(ctx);
     return 0;
diff --git a/test/data/keymaps/invalid-escape-sequence.xkb b/test/data/keymaps/invalid-escape-sequence.xkb
new file mode 100644
index 0000000..99349ec
--- /dev/null
+++ b/test/data/keymaps/invalid-escape-sequence.xkb
@@ -0,0 +1,9 @@
+xkb_keymap {
+    // The following include statement has an octal escape sequence that
+    // must be ignored. Else it would insert a NULL character and thus
+    // truncates the string to "evde", while we expect "evdev+aliases(qwerty)".
+    xkb_keycodes  { include "evde\0v+aliases(qwerty)" };
+    xkb_types     { include "complete" };
+    xkb_compat    { include "complete" };
+    xkb_symbols   { include "pc+us" };
+};
diff --git a/test/filecomp.c b/test/filecomp.c
index 5fc7477..e73dc4c 100644
--- a/test/filecomp.c
+++ b/test/filecomp.c
@@ -48,6 +48,7 @@ main(void)
     assert(test_file(ctx, "keymaps/quartz.xkb"));
     assert(test_file(ctx, "keymaps/no-aliases.xkb"));
     assert(test_file(ctx, "keymaps/modmap-none.xkb"));
+    assert(test_file(ctx, "keymaps/invalid-escape-sequence.xkb"));
 
     assert(!test_file(ctx, "keymaps/divide-by-zero.xkb"));
     assert(!test_file(ctx, "keymaps/bad.xkb"));
diff --git a/tools/messages.c b/tools/messages.c
index fc3b411..9230dfd 100644
--- a/tools/messages.c
+++ b/tools/messages.c
@@ -48,6 +48,7 @@ static const struct xkb_message_entry xkb_messages[] = {
     {XKB_ERROR_WRONG_STATEMENT_TYPE, "Wrong statement type"},
     {XKB_WARNING_UNSUPPORTED_GEOMETRY_SECTION, "Unsupported geometry section"},
     {XKB_WARNING_CANNOT_INFER_KEY_TYPE, "Cannot infer key type"},
+    {XKB_WARNING_INVALID_ESCAPE_SEQUENCE, "Invalid escape sequence"},
     {XKB_WARNING_ILLEGAL_KEY_TYPE_PRESERVE_RESULT, "Illegal key type preserve result"},
     {XKB_ERROR_INVALID_INCLUDE_STATEMENT, "Invalid include statement"},
     {XKB_ERROR_INVALID_MODMAP_ENTRY, "Invalid modmap entry"},