Merge pull request #1726 from crazymaster/development git_buf_text_gather_stats doesn't work for multi-byte characters
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
diff --git a/src/buf_text.c b/src/buf_text.c
index 443454b..472339d 100644
--- a/src/buf_text.c
+++ b/src/buf_text.c
@@ -262,7 +262,7 @@ bool git_buf_text_gather_stats(
while (scan < end) {
unsigned char c = *scan++;
- if ((c > 0x1F && c < 0x7F) || c > 0x9f)
+ if (c > 0x1F && c != 0x7F)
stats->printable++;
else switch (c) {
case '\0':
diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c
index 042bdda..2b3954d 100644
--- a/tests-clar/object/blob/filter.c
+++ b/tests-clar/object/blob/filter.c
@@ -5,7 +5,7 @@
#include "buf_text.h"
static git_repository *g_repo = NULL;
-#define NUM_TEST_OBJECTS 8
+#define NUM_TEST_OBJECTS 9
static git_oid g_oids[NUM_TEST_OBJECTS];
static const char *g_raw[NUM_TEST_OBJECTS] = {
"",
@@ -15,9 +15,10 @@ static const char *g_raw[NUM_TEST_OBJECTS] = {
"foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r",
"123\n\000\001\002\003\004abc\255\254\253\r\n",
"\xEF\xBB\xBFThis is UTF-8\n",
+ "\xEF\xBB\xBF\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\r\n\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\r\n",
"\xFE\xFF\x00T\x00h\x00i\x00s\x00!"
};
-static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, 12 };
+static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, -1, 12 };
static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = {
{ 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 2, 0, 6, 0 },
@@ -26,6 +27,7 @@ static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = {
{ 0, 0, 4, 4, 1, 31, 0 },
{ 0, 1, 1, 2, 1, 9, 5 },
{ GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 },
+ { GIT_BOM_UTF8, 0, 2, 2, 2, 27, 0 },
{ GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 },
};
static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
@@ -36,6 +38,7 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
{ "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 },
{ "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 },
{ "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 },
+ { "\xEF\xBB\xBF\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\n\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\n", 0, 29 },
{ "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 }
};