Edit

kc3-lang/libxkbcommon/src/compose/parser.c

Branch :

  • Show log

    Commit

  • Author : Ran Benita
    Date : 2025-01-29 15:35:22
    Hash : e120807b
    Message : Update license notices to SDPX short identifiers + update LICENSE Fix #628. Signed-off-by: Ran Benita <ran@unusedvar.com>

  • src/compose/parser.c
  • /*
     * For HPND-sell-variant:
     * Copyright 1992 by Oki Technosystems Laboratory, Inc.
     * Copyright 1992 by Fuji Xerox Co., Ltd.
     *
     * For MIT:
     * Copyright © 2013 Ran Benita <ran234@gmail.com>
     *
     * SPDX-License-Identifier: HPND-sell-variant AND MIT
     *
     * Author: Yasuhiro Kawai        Oki Technosystems Laboratory
     * Author: Kazunori Nishihara    Fuji Xerox
     */
    
    #include "config.h"
    
    #include <errno.h>
    
    #include "utils.h"
    #include "table.h"
    #include "scanner-utils.h"
    #include "paths.h"
    #include "utf8.h"
    #include "parser.h"
    #include "keysym.h"
    
    /*
     * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
     * See also the XCompose(5) manpage.
     *
     * FILE          ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
     * INCLUDE       ::= "include" '"' INCLUDE_STRING '"'
     * PRODUCTION    ::= LHS ":" RHS [ COMMENT ]
     * COMMENT       ::= "#" {<any character except null or newline>}
     * LHS           ::= EVENT { EVENT }
     * EVENT         ::= [MODIFIER_LIST] "<" keysym ">"
     * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None"
     * MODIFIER      ::= ["~"] MODIFIER_NAME
     * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta")
     * RHS           ::= ( STRING | keysym | STRING keysym )
     * STRING        ::= '"' { CHAR } '"'
     * CHAR          ::= GRAPHIC_CHAR | ESCAPED_CHAR
     * GRAPHIC_CHAR  ::= locale (codeset) dependent code
     * ESCAPED_CHAR  ::= ('\\' | '\"' | OCTAL | HEX )
     * OCTAL         ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
     * OCTAL_CHAR    ::= (0|1|2|3|4|5|6|7)
     * HEX           ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
     * HEX_CHAR      ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
     *
     * INCLUDE_STRING is a filesystem path, with the following %-expansions:
     *     %% - '%'.
     *     %H - The user's home directory (the $HOME environment variable).
     *     %L - The name of the locale specific Compose file (e.g.,
     *          "/usr/share/X11/locale/<localename>/Compose").
     *     %S - The name of the system directory for Compose files (e.g.,
     *          "/usr/share/X11/locale").
     */
    
    enum rules_token {
        TOK_END_OF_FILE = 0,
        TOK_END_OF_LINE,
        TOK_INCLUDE,
        TOK_INCLUDE_STRING,
        TOK_LHS_KEYSYM,
        TOK_COLON,
        TOK_BANG,
        TOK_TILDE,
        TOK_STRING,
        TOK_IDENT,
        TOK_ERROR
    };
    
    /* Values returned with some tokens, like yylval. */
    union lvalue {
        struct {
            /* Still \0-terminated. */
            const char *str;
            size_t len;
        } string;
    };
    
    static enum rules_token
    lex(struct scanner *s, union lvalue *val)
    {
    skip_more_whitespace_and_comments:
        /* Skip spaces. */
        while (is_space(scanner_peek(s)))
            if (scanner_next(s) == '\n')
                return TOK_END_OF_LINE;
    
        /* Skip comments. */
        if (scanner_chr(s, '#')) {
            scanner_skip_to_eol(s);
            goto skip_more_whitespace_and_comments;
        }
    
        /* See if we're done. */
        if (scanner_eof(s)) return TOK_END_OF_FILE;
    
        /* New token. */
        s->token_pos = s->pos;
        s->buf_pos = 0;
    
        /* LHS Keysym. */
        if (scanner_chr(s, '<')) {
            while (scanner_peek(s) != '>' && !scanner_eol(s) && !scanner_eof(s))
                scanner_buf_append(s, scanner_next(s));
            if (!scanner_chr(s, '>')) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "unterminated keysym literal");
                return TOK_ERROR;
            }
            if (!scanner_buf_append(s, '\0')) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "keysym literal is too long");
                return TOK_ERROR;
            }
            val->string.str = s->buf;
            val->string.len = s->buf_pos;
            return TOK_LHS_KEYSYM;
        }
    
        /* Colon. */
        if (scanner_chr(s, ':'))
            return TOK_COLON;
        if (scanner_chr(s, '!'))
            return TOK_BANG;
        if (scanner_chr(s, '~'))
            return TOK_TILDE;
    
        /* String literal. */
        if (scanner_chr(s, '\"')) {
            while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
                if (scanner_chr(s, '\\')) {
                    uint8_t o;
                    size_t start_pos = s->pos;
                    if (scanner_chr(s, '\\')) {
                        scanner_buf_append(s, '\\');
                    }
                    else if (scanner_chr(s, '"')) {
                        scanner_buf_append(s, '"');
                    }
                    else if (scanner_chr(s, 'x') || scanner_chr(s, 'X')) {
                        if (scanner_hex(s, &o) && is_valid_char((char) o)) {
                            scanner_buf_append(s, (char) o);
                        } else {
                            scanner_warn(s, XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
                                         "illegal hexadecimal escape sequence (%.*s) "
                                         "in string literal",
                                         (int) (s->pos - start_pos + 1),
                                         &s->s[start_pos - 1]);
                        }
                    }
                    else if (scanner_oct(s, &o) && is_valid_char((char) o)) {
                        scanner_buf_append(s, (char) o);
                    }
                    else if (s->pos > start_pos) {
                        scanner_warn(s, XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
                                     "illegal octal escape sequence (%.*s) "
                                     "in string literal",
                                     (int) (s->pos - start_pos + 1),
                                     &s->s[start_pos - 1]);
                        /* Ignore. */
                    }
                    else {
                        scanner_warn(s, XKB_WARNING_UNKNOWN_CHAR_ESCAPE_SEQUENCE,
                                     "unknown escape sequence (\\%c) in string literal",
                                     scanner_peek(s));
                        /* Ignore. */
                    }
                } else {
                    scanner_buf_append(s, scanner_next(s));
                }
            }
            if (!scanner_chr(s, '\"')) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "unterminated string literal");
                return TOK_ERROR;
            }
            if (!scanner_buf_append(s, '\0')) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "string literal is too long");
                return TOK_ERROR;
            }
            if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "string literal is not a valid UTF-8 string");
                return TOK_ERROR;
            }
            val->string.str = s->buf;
            val->string.len = s->buf_pos;
            return TOK_STRING;
        }
    
        /* Identifier or include. */
        if (is_alpha(scanner_peek(s)) || scanner_peek(s) == '_') {
            s->buf_pos = 0;
            while (is_alnum(scanner_peek(s)) || scanner_peek(s) == '_')
                scanner_buf_append(s, scanner_next(s));
            if (!scanner_buf_append(s, '\0')) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "identifier is too long");
                return TOK_ERROR;
            }
    
            if (streq(s->buf, "include"))
                return TOK_INCLUDE;
    
            val->string.str = s->buf;
            val->string.len = s->buf_pos;
            return TOK_IDENT;
        }
    
        scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                    "unrecognized token");
        /* Discard rest of line. */
        scanner_skip_to_eol(s);
        return TOK_ERROR;
    }
    
    static enum rules_token
    lex_include_string(struct scanner *s, struct xkb_compose_table *table,
                       union lvalue *val_out)
    {
        while (is_space(scanner_peek(s)))
            if (scanner_next(s) == '\n')
                return TOK_END_OF_LINE;
    
        s->token_pos = s->pos;
        s->buf_pos = 0;
    
        if (!scanner_chr(s, '\"')) {
            scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                        "include statement must be followed by a path");
            return TOK_ERROR;
        }
    
        while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
            if (scanner_chr(s, '%')) {
                if (scanner_chr(s, '%')) {
                    scanner_buf_append(s, '%');
                }
                else if (scanner_chr(s, 'H')) {
                    const char *home = xkb_context_getenv(table->ctx, "HOME");
                    if (!home) {
                        scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                                    "%%H was used in an include statement, "
                                    "but the HOME environment variable is not set");
                        return TOK_ERROR;
                    }
                    if (!scanner_buf_appends(s, home)) {
                        scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                                    "include path after expanding %%H is too long");
                        return TOK_ERROR;
                    }
                }
                else if (scanner_chr(s, 'L')) {
                    char *path = get_locale_compose_file_path(table->ctx, table->locale);
                    if (!path) {
                        scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                                    "failed to expand %%L to the locale Compose file");
                        return TOK_ERROR;
                    }
                    if (!scanner_buf_appends(s, path)) {
                        free(path);
                        scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                                    "include path after expanding %%L is too long");
                        return TOK_ERROR;
                    }
                    free(path);
                }
                else if (scanner_chr(s, 'S')) {
                    const char *xlocaledir = get_xlocaledir_path(table->ctx);
                    if (!scanner_buf_appends(s, xlocaledir)) {
                        scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                                    "include path after expanding %%S is too long");
                        return TOK_ERROR;
                    }
                }
                else {
                    scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                                "unknown %% format (%c) in include statement", scanner_peek(s));
                    return TOK_ERROR;
                }
            } else {
                scanner_buf_append(s, scanner_next(s));
            }
        }
        if (!scanner_chr(s, '\"')) {
            scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                        "unterminated include statement");
            return TOK_ERROR;
        }
        if (!scanner_buf_append(s, '\0')) {
            scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                        "include path is too long");
            return TOK_ERROR;
        }
        val_out->string.str = s->buf;
        val_out->string.len = s->buf_pos;
        return TOK_INCLUDE_STRING;
    }
    
    struct production {
        xkb_keysym_t lhs[MAX_LHS_LEN];
        unsigned int len;
        xkb_keysym_t keysym;
        char string[XKB_COMPOSE_MAX_STRING_SIZE];
        /* At least one of these is true. */
        bool has_keysym;
        bool has_string;
    
        /* The matching is as follows: (active_mods & modmask) == mods. */
        xkb_mod_mask_t modmask;
        xkb_mod_mask_t mods;
    };
    
    static void
    add_production(struct xkb_compose_table *table, struct scanner *s,
                   const struct production *production)
    {
        unsigned lhs_pos = 0;
        uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
        uint32_t *pptr = NULL;
        struct compose_node *node = NULL;
    
        /* Warn before potentially going over the limit, discard silently after. */
        if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
            scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                         "too many sequences for one Compose file; "
                         "will ignore further lines");
        if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
            return;
    
        /*
         * Insert the sequence to the ternary search tree, creating new nodes as
         * needed.
         *
         * TODO: We insert in the order given, this means some inputs can create
         * long O(n) chains, which results in total O(n^2) parsing time. We should
         * ensure the tree is reasonably balanced somehow.
         */
        while (true) {
            const xkb_keysym_t keysym = production->lhs[lhs_pos];
            const bool last = lhs_pos + 1 == production->len;
    
            if (curr == 0) {
                /*
                 * Create a new node and update the parent pointer to it.
                 * Update the pointer first because the append invalidates it.
                 */
                struct compose_node new = {
                    .keysym = keysym,
                    .lokid = 0,
                    .hikid = 0,
                    .internal = {
                        .eqkid = 0,
                        .is_leaf = false,
                    },
                };
                curr = darray_size(table->nodes);
                if (pptr != NULL) {
                    *pptr = curr;
                    pptr = NULL;
                }
                darray_append(table->nodes, new);
            }
    
            node = &darray_item(table->nodes, curr);
    
            if (keysym < node->keysym) {
                pptr = &node->lokid;
                curr = node->lokid;
            } else if (keysym > node->keysym) {
                pptr = &node->hikid;
                curr = node->hikid;
            } else if (!last) {
                if (node->is_leaf) {
                    scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                                 "a sequence already exists which is a prefix of "
                                 "this sequence; overriding");
                    node->internal.eqkid = 0;
                    node->internal.is_leaf = false;
                }
                lhs_pos++;
                pptr = &node->internal.eqkid;
                curr = node->internal.eqkid;
            } else {
                if (node->is_leaf) {
                    bool same_string =
                        (node->leaf.utf8 == 0 && !production->has_string) ||
                        (
                            node->leaf.utf8 != 0 && production->has_string &&
                            streq(&darray_item(table->utf8, node->leaf.utf8),
                                  production->string)
                        );
                    bool same_keysym =
                        (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
                        (
                            node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
                            node->leaf.keysym == production->keysym
                        );
                    if (same_string && same_keysym) {
                        scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                                     "this compose sequence is a duplicate of another; "
                                     "skipping line");
                        return;
                    } else {
                        scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                                     "this compose sequence already exists; overriding");
                    }
                } else if (node->internal.eqkid != 0) {
                    scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                                 "this compose sequence is a prefix of another; "
                                 "skipping line");
                    return;
                }
                node->is_leaf = true;
                if (production->has_string) {
                    node->leaf.utf8 = darray_size(table->utf8);
                    darray_append_items(table->utf8, production->string,
                                        (unsigned)strlen(production->string) + 1);
                }
                if (production->has_keysym) {
                    node->leaf.keysym = production->keysym;
                }
                return;
            }
        }
    }
    
    /* Should match resolve_modifier(). */
    #define ALL_MODS_MASK ((1u << 0) | (1u << 1) | (1u << 2) | (1u << 3))
    
    static xkb_mod_index_t
    resolve_modifier(const char *name)
    {
        static const struct {
            const char *name;
            xkb_mod_index_t mod;
        } mods[] = {
            { "Shift", 0 },
            { "Ctrl", 2 },
            { "Alt", 3 },
            { "Meta", 3 },
            { "Lock", 1 },
            { "Caps", 1 },
        };
    
        for (unsigned i = 0; i < ARRAY_SIZE(mods); i++)
            if (streq(name, mods[i].name))
                return mods[i].mod;
    
        return XKB_MOD_INVALID;
    }
    
    /* Parse a string literal ("...") and return the corresponding unescaped string,
     * or NULL if it fails.
     * This is aimed only for testing (un)escaping characters. */
    char *
    parse_string_literal(struct xkb_context *ctx, const char *string)
    {
        struct scanner s;
        union lvalue val;
        scanner_init(&s, ctx, string, strlen(string), "(unamed)", NULL);
        switch (lex(&s, &val)) {
            case TOK_STRING:
                return strdup(val.string.str);
            default:
                fprintf(stderr, "ERROR: %s\n", s.s);
                return NULL;
        }
    }
    
    static bool
    parse(struct xkb_compose_table *table, struct scanner *s,
          unsigned include_depth);
    
    static bool
    do_include(struct xkb_compose_table *table, struct scanner *s,
               const char *path, unsigned include_depth)
    {
        FILE *file;
        bool ok;
        char *string;
        size_t size;
        struct scanner new_s;
    
        if (include_depth >= MAX_INCLUDE_DEPTH) {
            scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                        "maximum include depth (%d) exceeded; maybe there is an include loop?",
                        MAX_INCLUDE_DEPTH);
            return false;
        }
    
        file = fopen(path, "rb");
        if (!file) {
            scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                        "failed to open included Compose file \"%s\": %s",
                        path, strerror(errno));
            return false;
        }
    
        ok = map_file(file, &string, &size);
        if (!ok) {
            scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                        "failed to read included Compose file \"%s\": %s",
                        path, strerror(errno));
            goto err_file;
        }
    
        scanner_init(&new_s, table->ctx, string, size, path, s->priv);
    
        ok = parse(table, &new_s, include_depth + 1);
        if (!ok)
            goto err_unmap;
    
    err_unmap:
        unmap_file(string, size);
    err_file:
        fclose(file);
        return ok;
    }
    
    static bool
    parse(struct xkb_compose_table *table, struct scanner *s,
          unsigned include_depth)
    {
        enum rules_token tok;
        union lvalue val;
        xkb_keysym_t keysym;
        struct production production;
        enum { MAX_ERRORS = 10 };
        int num_errors = 0;
    
        /* Basic detection of wrong character encoding.
           The first character relevant to the grammar must be ASCII:
           whitespace, include, modifier list, keysym, comment */
        if (!scanner_check_supported_char_encoding(s)) {
            scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                        "This could be a file encoding issue. "
                        "Supported file encodings are ASCII and UTF-8.");
            goto fail;
        }
    
    initial:
        production.len = 0;
        production.has_keysym = false;
        production.has_string = false;
        production.mods = 0;
        production.modmask = 0;
    
        /* fallthrough */
    
    initial_eol:
        switch (tok = lex(s, &val)) {
        case TOK_END_OF_LINE:
            goto initial_eol;
        case TOK_END_OF_FILE:
            goto finished;
        case TOK_INCLUDE:
            goto include;
        default:
            goto lhs_tok;
        }
    
    include:
        switch (tok = lex_include_string(s, table, &val)) {
        case TOK_INCLUDE_STRING:
            goto include_eol;
        default:
            goto unexpected;
        }
    
    include_eol:
        switch (tok = lex(s, &val)) {
        case TOK_END_OF_LINE:
            if (!do_include(table, s, val.string.str, include_depth))
                goto fail;
            goto initial;
        default:
            goto unexpected;
        }
    
    lhs:
        tok = lex(s, &val);
    lhs_tok:
        switch (tok) {
        case TOK_COLON:
            if (production.len <= 0) {
                scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                             "expected at least one keysym on left-hand side; "
                             "skipping line");
                goto skip;
            }
            goto rhs;
        case TOK_IDENT:
            if (streq(val.string.str, "None")) {
                production.mods = 0;
                production.modmask = ALL_MODS_MASK;
                goto lhs_keysym;
            }
            goto lhs_mod_list_tok;
        case TOK_TILDE:
            goto lhs_mod_list_tok;
        case TOK_BANG:
            production.modmask = ALL_MODS_MASK;
            goto lhs_mod_list;
        default:
            goto lhs_keysym_tok;
        }
    
    lhs_keysym:
        tok = lex(s, &val);
    lhs_keysym_tok:
        switch (tok) {
        case TOK_LHS_KEYSYM:
            keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
            if (keysym == XKB_KEY_NoSymbol) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "unrecognized keysym \"%s\" on left-hand side",
                            val.string.str);
                goto error;
            }
            check_deprecated_keysyms(scanner_warn, s, s->ctx,
                                     keysym, val.string.str, val.string.str, "%s", "");
            if (production.len + 1 > MAX_LHS_LEN) {
                scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                             "too many keysyms (%d) on left-hand side; skipping line",
                             MAX_LHS_LEN + 1);
                goto skip;
            }
            production.lhs[production.len++] = keysym;
            production.mods = 0;
            production.modmask = 0;
            goto lhs;
        default:
            goto unexpected;
        }
    
    lhs_mod_list:
        tok = lex(s, &val);
    lhs_mod_list_tok: {
            bool tilde = false;
            xkb_mod_index_t mod;
    
            if (tok != TOK_TILDE && tok != TOK_IDENT)
                goto lhs_keysym_tok;
    
            if (tok == TOK_TILDE) {
                tilde = true;
                tok = lex(s, &val);
            }
    
            if (tok != TOK_IDENT)
                goto unexpected;
    
            mod = resolve_modifier(val.string.str);
            if (mod == XKB_MOD_INVALID) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "unrecognized modifier \"%s\"",
                            val.string.str);
                goto error;
            }
    
            production.modmask |= UINT32_C(1) << mod;
            if (tilde)
                production.mods &= ~(UINT32_C(1) << mod);
            else
                production.mods |= UINT32_C(1) << mod;
    
            goto lhs_mod_list;
        }
    
    rhs:
        switch (tok = lex(s, &val)) {
        case TOK_STRING:
            if (production.has_string) {
                scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                             "right-hand side can have at most one string; "
                             "skipping line");
                goto skip;
            }
            if (val.string.len <= 0) {
                scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                             "right-hand side string must not be empty; "
                             "skipping line");
                goto skip;
            }
            if (val.string.len > sizeof(production.string)) {
                scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                             "right-hand side string is too long: "
                             "expected max: %d, got: %d; skipping line",
                             (int)sizeof(production.string) - 1,
                             (int)val.string.len - 1);
                goto skip;
            }
            memcpy(production.string, val.string.str, val.string.len);
            production.has_string = true;
            goto rhs;
        case TOK_IDENT:
            keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
            if (keysym == XKB_KEY_NoSymbol) {
                scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                            "unrecognized keysym \"%s\" on right-hand side",
                            val.string.str);
                goto error;
            }
            check_deprecated_keysyms(scanner_warn, s, s->ctx,
                                     keysym, val.string.str, val.string.str, "%s", "");
            if (production.has_keysym) {
                scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                             "right-hand side can have at most one keysym; "
                             "skipping line");
                goto skip;
            }
            production.keysym = keysym;
            production.has_keysym = true;
            /* fallthrough */
        case TOK_END_OF_LINE:
            if (!production.has_string && !production.has_keysym) {
                scanner_warn(s, XKB_LOG_MESSAGE_NO_ID,
                             "right-hand side must have at least one of string "
                             "or keysym; skipping line");
                goto skip;
            }
            add_production(table, s, &production);
            goto initial;
        default:
            goto unexpected;
        }
    
    unexpected:
        if (tok != TOK_ERROR)
            scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                        "unexpected token");
    error:
        num_errors++;
        if (num_errors <= MAX_ERRORS)
            goto skip;
    
        scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                    "too many errors");
        goto fail;
    
    fail:
        scanner_err(s, XKB_LOG_MESSAGE_NO_ID,
                    "failed to parse file");
        return false;
    
    skip:
        while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
            tok = lex(s, &val);
        goto initial;
    
    finished:
        return true;
    }
    
    bool
    parse_string(struct xkb_compose_table *table, const char *string, size_t len,
                 const char *file_name)
    {
        struct scanner s;
        scanner_init(&s, table->ctx, string, len, file_name, NULL);
        if (!parse(table, &s, 0))
            return false;
        /* Maybe the allocator can use the excess space. */
        darray_shrink(table->nodes);
        darray_shrink(table->utf8);
        return true;
    }
    
    bool
    parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
    {
        bool ok;
        char *string;
        size_t size;
    
        ok = map_file(file, &string, &size);
        if (!ok) {
            log_err(table->ctx, XKB_LOG_MESSAGE_NO_ID,
                    "Couldn't read Compose file %s: %s\n",
                    file_name, strerror(errno));
            return false;
        }
    
        ok = parse_string(table, string, size, file_name);
        unmap_file(string, size);
        return ok;
    }