Commit 0161e096a30912e0721cf3e6446595d3400d55b7

Sven Strickroth 2014-11-13T19:30:47

Make binary detection work similar to vanilla git Main change: Don't treat chars > 128 as non-printable (common in UTF-8 files) Signed-off-by: Sven Strickroth <email@cs-ware.de>

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96bd9a1..e1c02f9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,9 @@ v0.22 + 1
 
 ### Changes or improvements
 
+* Updated binary identification in CRLF filtering to avoid false positives in
+  UTF-8 files.
+
 * Rename and copy detection is enabled for small files.
 
 ### API additions
diff --git a/src/buf_text.c b/src/buf_text.c
index cead599..cb3661e 100644
--- a/src/buf_text.c
+++ b/src/buf_text.c
@@ -191,7 +191,10 @@ bool git_buf_text_is_binary(const git_buf *buf)
 	while (scan < end) {
 		unsigned char c = *scan++;
 
-		if (c > 0x1F && c < 0x7F)
+		/* Printable characters are those above SPACE (0x1F) excluding DEL,
+		 * and including BS, ESC and FF.
+		 */
+		if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
 			printable++;
 		else if (c == '\0')
 			return true;
diff --git a/tests/core/buffer.c b/tests/core/buffer.c
index 87dec46..d28aa21 100644
--- a/tests/core/buffer.c
+++ b/tests/core/buffer.c
@@ -830,7 +830,7 @@ void test_core_buffer__classify_with_utf8(void)
 	cl_assert(!git_buf_text_contains_nul(&b));
 
 	b.ptr = data1; b.size = b.asize = data1len;
-	cl_assert(git_buf_text_is_binary(&b));
+	cl_assert(!git_buf_text_is_binary(&b));
 	cl_assert(!git_buf_text_contains_nul(&b));
 
 	b.ptr = data2; b.size = b.asize = data2len;