Commit 160e4fb792b070e14c7094893e390c53d788648c

Vicent Martí 2013-01-11T11:35:09

Merge pull request #1230 from arrbee/match-core-git-diff-binary-detection Match binary file check of core git in diff

diff --git a/src/buf_text.c b/src/buf_text.c
index a7122dc..0104a90 100644
--- a/src/buf_text.c
+++ b/src/buf_text.c
@@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf)
 	return ((printable >> 7) < nonprintable);
 }
 
+bool git_buf_text_contains_nul(const git_buf *buf)
+{
+	return (strnlen(buf->ptr, buf->size) != buf->size);
+}
+
 int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
 {
 	const char *ptr;
diff --git a/src/buf_text.h b/src/buf_text.h
index ae5e6ca..458ee33 100644
--- a/src/buf_text.h
+++ b/src/buf_text.h
@@ -71,6 +71,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
 extern bool git_buf_text_is_binary(const git_buf *buf);
 
 /**
+ * Check quickly if buffer contains a NUL byte
+ *
+ * @param buf Buffer to check
+ * @return true if buffer contains a NUL byte
+ */
+extern bool git_buf_text_contains_nul(const git_buf *buf);
+
+/**
  * Check if a buffer begins with a UTF BOM
  *
  * @param bom Set to the type of BOM detected or GIT_BOM_NONE
diff --git a/src/diff_output.c b/src/diff_output.c
index d75a7bb..933d44e 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content(
 	GIT_UNUSED(ctxt);
 
 	if ((file->flags & KNOWN_BINARY_FLAGS) == 0) {
-		if (git_buf_text_is_binary(&search))
+		/* TODO: provide encoding / binary detection callbacks that can
+		 * be UTF-8 aware, etc.  For now, instead of trying to be smart,
+		 * let's just use the simple NUL-byte detection that core git uses.
+		 */
+		/* previously was: if (git_buf_text_is_binary(&search)) */
+		if (git_buf_text_contains_nul(&search))
 			file->flags |= GIT_DIFF_FILE_BINARY;
 		else
 			file->flags |= GIT_DIFF_FILE_NOT_BINARY;
diff --git a/tests-clar/core/buffer.c b/tests-clar/core/buffer.c
index 40fc4c5..5d9b785 100644
--- a/tests-clar/core/buffer.c
+++ b/tests-clar/core/buffer.c
@@ -704,3 +704,26 @@ void test_core_buffer__base64(void)
 
 	git_buf_free(&buf);
 }
+
+void test_core_buffer__classify_with_utf8(void)
+{
+	char *data0 = "Simple text\n";
+	size_t data0len = 12;
+	char *data1 = "Is that UTF-8 data I see…\nYep!\n";
+	size_t data1len = 31;
+	char *data2 = "Internal NUL!!!\000\n\nI see you!\n";
+	size_t data2len = 29;
+	git_buf b;
+
+	b.ptr = data0; b.size = b.asize = data0len;
+	cl_assert(!git_buf_text_is_binary(&b));
+	cl_assert(!git_buf_text_contains_nul(&b));
+
+	b.ptr = data1; b.size = b.asize = data1len;
+	cl_assert(git_buf_text_is_binary(&b));
+	cl_assert(!git_buf_text_contains_nul(&b));
+
+	b.ptr = data2; b.size = b.asize = data2len;
+	cl_assert(git_buf_text_is_binary(&b));
+	cl_assert(git_buf_text_contains_nul(&b));
+}