utf8: use size_t for length of buffer The `git__utf8_charlen` now takes `size_t` as the buffer length, since it contains the full length of the buffer at the current position. It now returns `-1` in all cases where utf8 codepoints are invalid, since callers only care about a valid length of a sequence of codepoints, or if the current position is not valid utf8.
diff --git a/src/util.c b/src/util.c
index 508dce5..48cbc13 100644
--- a/src/util.c
+++ b/src/util.c
@@ -802,23 +802,23 @@ static const int8_t utf8proc_utf8class[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
};
-int git__utf8_charlen(const uint8_t *str, int str_len)
+int git__utf8_charlen(const uint8_t *str, size_t str_len)
{
- int length, i;
+ size_t length, i;
length = utf8proc_utf8class[str[0]];
if (!length)
return -1;
- if (str_len >= 0 && length > str_len)
- return -str_len;
+ if (str_len > 0 && length > str_len)
+ return -1;
for (i = 1; i < length; i++) {
if ((str[i] & 0xC0) != 0x80)
- return -i;
+ return -1;
}
- return length;
+ return (int)length;
}
int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst)