Edit

kc3-lang/libxkbcommon/src/compose/table.h

Branch :

  • Show log

    Commit

  • Author : Ran Benita
    Date : 2021-03-29 16:05:14
    Hash : 02b9cabf
    Message : compose: use a ternary tree instead of a regular trie Previously we used a simple trie with a linked list for each chain. Unfortunately most compose files have very long chains which means the constructions performs an almost quadratic number of comparisons. Switch to using a ternary search tree instead. This is very similar to a trie, only the linked list is essentially replaced with a binary tree. On the en_US/Compose file, the perf diff is the following (the modified function is `parse`): Event 'cycles:u' Baseline Delta Abs Shared Object Symbol ........ ......... ................ ................................. 39.91% -17.62% bench-compose [.] parse.constprop.0 20.54% +6.47% bench-compose [.] lex 17.28% +5.55% libc-2.33.so [.] __strcmp_avx2 12.78% +4.01% bench-compose [.] xkb_keysym_from_name 2.30% +0.83% libc-2.33.so [.] __GI_____strtoull_l_internal 3.36% +0.78% bench-compose [.] strcmp@plt Thanks to some careful packing, the memory usage is pretty much the same. Signed-off-by: Ran Benita <ran@unusedvar.com>

  • src/compose/table.h
  • /*
     * Copyright © 2013,2021 Ran Benita <ran234@gmail.com>
     *
     * Permission is hereby granted, free of charge, to any person obtaining a
     * copy of this software and associated documentation files (the "Software"),
     * to deal in the Software without restriction, including without limitation
     * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     * and/or sell copies of the Software, and to permit persons to whom the
     * Software is furnished to do so, subject to the following conditions:
     *
     * The above copyright notice and this permission notice (including the next
     * paragraph) shall be included in all copies or substantial portions of the
     * Software.
     *
     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     * DEALINGS IN THE SOFTWARE.
     */
    
    #ifndef COMPOSE_COMPOSE_H
    #define COMPOSE_COMPOSE_H
    
    #include "xkbcommon/xkbcommon-compose.h"
    #include "utils.h"
    #include "context.h"
    
    /*
     * The compose table data structure is a ternary search tree.
     *
     * Reference: https://www.drdobbs.com/database/ternary-search-trees/184410528
     * Visualization: https://www.cs.usfca.edu/~galles/visualization/TST.html
     *
     * Short example. Given these sequences:
     *
     *      <B> <C>        : "first"  dead_a
     *      <B> <D> <E>    : "second" dead_b
     *      <A> <F>        : "third"  dead_c
     *
     * the tree would look like:
     *
     *          -------- [<B>]---------
     *          |          |          #
     *          v          V
     *     -- [<A>] --   [<C>] --------
     *     #    |    #     |          |
     *          v          #     -- [<D>] --
     *     -- [<F>] --           #    |    #
     *     #    |    #                v
     *          #                -- [<E>] --
     *                           #    |    #
     *                                #
     *
     * where:
     * - [<X>] is a node for a sequence keysym <X>.
     * - right arrows are `hikid` pointers.
     * - left arrows are `lokid` pointers.
     * - down arrows are `eqkid` pointers.
     * - # is a nil pointer.
     *
     * The nodes are all kept in a contiguous array.  Pointers are represented
     * as integer offsets into this array.  A nil pointer is represented as 0
     * (which, helpfully, is the offset of an empty dummy node).
     *
     * Nodes without an eqkid are leaf nodes.  Since a sequence cannot be a
     * prefix of another, these are exactly the nodes which terminate the
     * sequences (in a bijective manner).
     *
     * A leaf contains the result data of its sequence.  The result keysym is
     * contained in the node struct itself; the result UTF-8 string is a byte
     * offset into an array of the form "\0first\0second\0third" (the initial
     * \0 is so offset 0 points to an empty string).
     */
    
    /* Fits in uint16_t, also a good idea to have some limit. */
    #define MAX_COMPOSE_NODES 65535
    
    struct compose_node {
        xkb_keysym_t keysym;
    
        /* Offset into xkb_compose_table::nodes or 0. */
        uint16_t lokid;
        /* Offset into xkb_compose_table::nodes or 0. */
        uint16_t hikid;
    
        union {
            struct {
                uint32_t _pad:31;
                bool is_leaf:1;
            };
            struct {
                uint32_t _pad:31;
                bool is_leaf:1;
                /* Offset into xkb_compose_table::nodes or 0. */
                uint16_t eqkid;
            } internal;
            struct {
                /* Offset into xkb_compose_table::utf8. */
                uint32_t utf8:31;
                bool is_leaf:1;
                xkb_keysym_t keysym;
            } leaf;
        };
    };
    
    struct xkb_compose_table {
        int refcnt;
        enum xkb_compose_format format;
        enum xkb_compose_compile_flags flags;
        struct xkb_context *ctx;
    
        char *locale;
    
        darray_char utf8;
        darray(struct compose_node) nodes;
    };
    
    #endif