Commit 97309dd025e8cd4ee0b4a90774f73e8423ad1ebc

Ben Straub 2013-07-19T10:43:53

Merge pull request #1726 from crazymaster/development git_buf_text_gather_stats doesn't work for multi-byte characters

diff --git a/src/buf_text.c b/src/buf_text.c
index 443454b..472339d 100644
--- a/src/buf_text.c
+++ b/src/buf_text.c
@@ -262,7 +262,7 @@ bool git_buf_text_gather_stats(
 	while (scan < end) {
 		unsigned char c = *scan++;
 
-		if ((c > 0x1F && c < 0x7F) || c > 0x9f)
+		if (c > 0x1F && c != 0x7F)
 			stats->printable++;
 		else switch (c) {
 			case '\0':
diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c
index 042bdda..2b3954d 100644
--- a/tests-clar/object/blob/filter.c
+++ b/tests-clar/object/blob/filter.c
@@ -5,7 +5,7 @@
 #include "buf_text.h"
 
 static git_repository *g_repo = NULL;
-#define NUM_TEST_OBJECTS 8
+#define NUM_TEST_OBJECTS 9
 static git_oid g_oids[NUM_TEST_OBJECTS];
 static const char *g_raw[NUM_TEST_OBJECTS] = {
 	"",
@@ -15,9 +15,10 @@ static const char *g_raw[NUM_TEST_OBJECTS] = {
 	"foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r",
 	"123\n\000\001\002\003\004abc\255\254\253\r\n",
 	"\xEF\xBB\xBFThis is UTF-8\n",
+	"\xEF\xBB\xBF\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\r\n\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\r\n",
 	"\xFE\xFF\x00T\x00h\x00i\x00s\x00!"
 };
-static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, 12 };
+static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, -1, 12 };
 static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = {
 	{ 0, 0, 0, 0, 0, 0, 0 },
 	{ 0, 0, 0, 2, 0, 6, 0 },
@@ -26,6 +27,7 @@ static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = {
 	{ 0, 0, 4, 4, 1, 31, 0 },
 	{ 0, 1, 1, 2, 1, 9, 5 },
 	{ GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 },
+	{ GIT_BOM_UTF8, 0, 2, 2, 2, 27, 0 },
 	{ GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 },
 };
 static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
@@ -36,6 +38,7 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
 	{ "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 },
 	{ "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 },
 	{ "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 },
+	{ "\xEF\xBB\xBF\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\n\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\n", 0, 29 },
 	{ "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 }
 };