keysym: fix locale dependence in xkb_keysym_from_name() We currently use strcasecmp, which is locale-dependent. In particular, one well-known surprise even if restricted just ASCII input is found in the tr_TR (Turkish) locale, see e.g. https://msdn.microsoft.com/en-us/library/ms973919.aspx#stringsinnet20_topic5 We have known to avoid locale-dependent functions before, but in this case, we forgot. Fix it by implementing our own simple ASCII-only strcasecmp/strncasecmp. Might have been possible to use strcasecmp_l() with the C locale, but went the easy route. Side advantage is that even this non-optimized version is faster than the optimized libc one (__strcasecmp_l_sse42) since it doesn't need to do the locale stuff. xkb_keysym_from_name(), which uses strcasecmp heavily, becomes faster, and so for example Compose file parsing, which uses xkb_keysym_from_name() heavily, becomes ~20% faster. Resolves https://github.com/xkbcommon/libxkbcommon/issues/42 Signed-off-by: Ran Benita <ran234@gmail.com>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
diff --git a/configure.ac b/configure.ac
index fdab4ab..3484c8c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -68,12 +68,6 @@ AS_IF([test ! -f "src/xkbcomp/parser.c" -a "x$YACC_INST" = x], [
])
# Checks for library functions.
-AC_CHECK_FUNCS([strcasecmp strncasecmp])
-AS_IF([test "x$ac_cv_func_strcasecmp" = xno -o \
- "x$ac_cv_func_strncasecmp" = xno], [
- AC_MSG_ERROR([C library does not support strcasecmp/strncasecmp])
-])
-
AC_CHECK_FUNCS([eaccess euidaccess mmap])
AC_CHECK_FUNCS([secure_getenv __secure_getenv])
diff --git a/src/keysym.c b/src/keysym.c
index db0e973..9e7b4fb 100644
--- a/src/keysym.c
+++ b/src/keysym.c
@@ -76,7 +76,7 @@ compare_by_name(const void *a, const void *b)
{
const char *key = a;
const struct name_keysym *entry = b;
- return strcasecmp(key, get_name(entry));
+ return istrcmp(key, get_name(entry));
}
XKB_EXPORT int
@@ -109,7 +109,7 @@ xkb_keysym_get_name(xkb_keysym_t ks, char *buffer, size_t size)
/*
* Find the correct keysym if one case-insensitive match is given.
*
- * The name_to_keysym table is sorted by strcasecmp(). So bsearch() may return
+ * The name_to_keysym table is sorted by istrcmp(). So bsearch() may return
* _any_ of all possible case-insensitive duplicates. This function searches the
* returned entry @entry, all previous and all next entries that match by
* case-insensitive comparison and returns the exact match to @name. If @icase
@@ -138,7 +138,7 @@ find_sym(const struct name_keysym *entry, const char *name, bool icase)
for (iter = entry - 1; iter >= name_to_keysym; --iter) {
if (!icase && strcmp(get_name(iter), name) == 0)
return iter;
- if (strcasecmp(get_name(iter), get_name(entry)) != 0)
+ if (istrcmp(get_name(iter), get_name(entry)) != 0)
break;
if (icase && xkb_keysym_is_lower(iter->keysym))
return iter;
@@ -148,7 +148,7 @@ find_sym(const struct name_keysym *entry, const char *name, bool icase)
for (iter = entry + 1; iter < last; ++iter) {
if (!icase && strcmp(get_name(iter), name) == 0)
return iter;
- if (strcasecmp(get_name(iter), get_name(entry)) != 0)
+ if (istrcmp(get_name(iter), get_name(entry)) != 0)
break;
if (icase && xkb_keysym_is_lower(iter->keysym))
return iter;
diff --git a/src/utils.c b/src/utils.c
index d725bbd..a71b570 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -108,3 +108,56 @@ unmap_file(char *str, size_t size)
}
#endif
+
+// ASCII lower-case map.
+static const unsigned char lower_map[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+ 59, 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
+ 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
+ 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
+ 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
+ 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
+ 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
+ 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
+ 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
+ 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
+ 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
+ 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242,
+ 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
+};
+
+// ASCII tolower (to avoid locale issues).
+char
+to_lower(char c)
+{
+ return (char) lower_map[(unsigned char) c];
+}
+
+// ASCII strcasecmp (to avoid locale issues).
+int
+istrcmp(const char *a, const char *b)
+{
+ for (size_t i = 0; ; i++) {
+ if (to_lower(a[i]) != to_lower(b[i]))
+ return (int) to_lower(a[i]) - (int) to_lower(b[i]);
+ if (!a[i])
+ break;
+ }
+ return 0;
+}
+
+// ASCII strncasecmp (to avoid locale issues).
+int
+istrncmp(const char *a, const char *b, size_t n)
+{
+ for (size_t i = 0; i < n; i++) {
+ if (to_lower(a[i]) != to_lower(b[i]))
+ return (int) to_lower(a[i]) - (int) to_lower(b[i]);
+ if (!a[i])
+ break;
+ }
+ return 0;
+}
diff --git a/src/utils.h b/src/utils.h
index d63d23a..cb98e8e 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -29,7 +29,6 @@
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
-#include <strings.h>
#include "darray.h"
@@ -44,6 +43,15 @@
switch (0) { case 0: case (expr): ; } \
} while (0)
+char
+to_lower(char c);
+
+int
+istrcmp(const char *a, const char *b);
+
+int
+istrncmp(const char *a, const char *b, size_t n);
+
static inline bool
streq(const char *s1, const char *s2)
{
@@ -61,13 +69,13 @@ streq_not_null(const char *s1, const char *s2)
static inline bool
istreq(const char *s1, const char *s2)
{
- return strcasecmp(s1, s2) == 0;
+ return istrcmp(s1, s2) == 0;
}
static inline bool
istreq_prefix(const char *s1, const char *s2)
{
- return strncasecmp(s1, s2, strlen(s1)) == 0;
+ return istrncmp(s1, s2, strlen(s1)) == 0;
}
static inline char *
diff --git a/test/keysym.c b/test/keysym.c
index 439622c..4414523 100644
--- a/test/keysym.c
+++ b/test/keysym.c
@@ -20,6 +20,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
+#include <locale.h>
#include "test.h"
#include "keysym.h" /* For unexported is_lower/upper/keypad() */
@@ -81,6 +82,25 @@ test_utf8(xkb_keysym_t keysym, const char *expected)
return streq(s, expected);
}
+static void
+test_github_issue_42(void)
+{
+ // Verify we are not dependent on locale, Turkish-i problem in particular.
+ if (setlocale(LC_CTYPE, "tr_TR.UTF-8") == NULL) {
+ // The locale is not available, probably; skip.
+ return;
+ }
+
+ assert(test_string("i", XKB_KEY_i));
+ assert(test_string("I", XKB_KEY_I));
+ assert(test_casestring("i", XKB_KEY_i));
+ assert(test_casestring("I", XKB_KEY_i));
+ assert(xkb_keysym_to_upper(XKB_KEY_i) == XKB_KEY_I);
+ assert(xkb_keysym_to_lower(XKB_KEY_I) == XKB_KEY_i);
+
+ setlocale(LC_CTYPE, "C");
+}
+
int
main(void)
{
@@ -196,5 +216,7 @@ main(void)
assert(xkb_keysym_to_upper(XKB_KEY_eacute) == XKB_KEY_Eacute);
assert(xkb_keysym_to_lower(XKB_KEY_Eacute) == XKB_KEY_eacute);
+ test_github_issue_42();
+
return 0;
}