handle EILSEQ in tog(1)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
diff --git a/include/got_utf8.h b/include/got_utf8.h
new file mode 100644
index 0000000..2c3703e
--- /dev/null
+++ b/include/got_utf8.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+const struct got_error *got_mbsavis(char**, int *, const char *);
diff --git a/lib/utf8.c b/lib/utf8.c
new file mode 100644
index 0000000..d1db624
--- /dev/null
+++ b/lib/utf8.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <err.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+#include "got_error.h"
+#include "got_utf8.h"
+
+const struct got_error *
+got_mbsavis(char** outp, int *widthp, const char *mbs)
+{
+ const char *src; /* Iterate mbs. */
+ char *dst; /* Iterate *outp. */
+ wchar_t wc;
+ int total_width; /* Display width of the whole string. */
+ int width; /* Display width of a single Unicode char. */
+ int len; /* Length in bytes of UTF-8 encoded string. */
+
+ len = strlen(mbs);
+ if ((*outp = malloc(len + 1)) == NULL)
+ return got_error_from_errno();
+
+ if (MB_CUR_MAX == 1) {
+ memcpy(*outp, mbs, len + 1);
+ *widthp = len;
+ return NULL;
+ }
+
+ src = mbs;
+ dst = *outp;
+ total_width = 0;
+ while (*src != '\0') {
+ if ((len = mbtowc(&wc, src, MB_CUR_MAX)) == -1) {
+ total_width++;
+ *dst++ = '?';
+ src++;
+ } else if ((width = wcwidth(wc)) == -1) {
+ total_width++;
+ *dst++ = '?';
+ src += len;
+ } else {
+ total_width += width;
+ while (len-- > 0)
+ *dst++ = *src++;
+ }
+ }
+ *dst = '\0';
+ *widthp = total_width;
+ return NULL;
+}
diff --git a/tog/Makefile b/tog/Makefile
index 730e0f0..ec1cc5e 100644
--- a/tog/Makefile
+++ b/tog/Makefile
@@ -1,9 +1,10 @@
.PATH:${.CURDIR}/../lib
PROG= tog
-SRCS= tog.c delta.c diff.c diffreg.c error.c fileindex.c object.c \
- opentemp.c path.c pack.c privsep.c reference.c repository.c \
- sha1.c worktree.c zbuf.c object_idset.c commit_graph.c
+SRCS= tog.c commit_graph.c delta.c diff.c diffreg.c error.c \
+ fileindex.c object.c object_idset.c opentemp.c path.c \
+ pack.c privsep.c reference.c repository.c sha1.c worktree.c \
+ utf8.c zbuf.c
CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib
LDADD = -lpanel -lncursesw -lutil -lz
diff --git a/tog/tog.c b/tog/tog.c
index 9bf2f9c..111605a 100644
--- a/tog/tog.c
+++ b/tog/tog.c
@@ -39,6 +39,7 @@
#include "got_diff.h"
#include "got_opentemp.h"
#include "got_commit_graph.h"
+#include "got_utf8.h"
#ifndef MIN
#define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
@@ -97,20 +98,33 @@ usage_log(void)
static const struct got_error *
mbs2ws(wchar_t **ws, size_t *wlen, const char *s)
{
+ char *vis = NULL;
const struct got_error *err = NULL;
*ws = NULL;
*wlen = mbstowcs(NULL, s, 0);
- if (*wlen == (size_t)-1)
- return got_error_from_errno();
+ if (*wlen == (size_t)-1) {
+ int vislen;
+ if (errno != EILSEQ)
+ return got_error_from_errno();
+
+ /* byte string invalid in current encoding; try to "fix" it */
+ err = got_mbsavis(&vis, &vislen, s);
+ if (err)
+ return err;
+ *wlen = mbstowcs(NULL, vis, 0);
+ if (*wlen == (size_t)-1)
+ return got_error_from_errno(); /* give up */
+ }
*ws = calloc(*wlen + 1, sizeof(*ws));
if (*ws == NULL)
return got_error_from_errno();
- if (mbstowcs(*ws, s, *wlen) != *wlen)
+ if (mbstowcs(*ws, vis ? vis : s, *wlen) != *wlen)
err = got_error_from_errno();
+ free(vis);
if (err) {
free(*ws);
*ws = NULL;