Merge pull request #1230 from arrbee/match-core-git-diff-binary-detection Match binary file check of core git in diff
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
diff --git a/src/buf_text.c b/src/buf_text.c
index a7122dc..0104a90 100644
--- a/src/buf_text.c
+++ b/src/buf_text.c
@@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf)
return ((printable >> 7) < nonprintable);
}
+bool git_buf_text_contains_nul(const git_buf *buf)
+{
+ return (strnlen(buf->ptr, buf->size) != buf->size);
+}
+
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
{
const char *ptr;
diff --git a/src/buf_text.h b/src/buf_text.h
index ae5e6ca..458ee33 100644
--- a/src/buf_text.h
+++ b/src/buf_text.h
@@ -71,6 +71,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
extern bool git_buf_text_is_binary(const git_buf *buf);
/**
+ * Check quickly if buffer contains a NUL byte
+ *
+ * @param buf Buffer to check
+ * @return true if buffer contains a NUL byte
+ */
+extern bool git_buf_text_contains_nul(const git_buf *buf);
+
+/**
* Check if a buffer begins with a UTF BOM
*
* @param bom Set to the type of BOM detected or GIT_BOM_NONE
diff --git a/src/diff_output.c b/src/diff_output.c
index d75a7bb..933d44e 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content(
GIT_UNUSED(ctxt);
if ((file->flags & KNOWN_BINARY_FLAGS) == 0) {
- if (git_buf_text_is_binary(&search))
+ /* TODO: provide encoding / binary detection callbacks that can
+ * be UTF-8 aware, etc. For now, instead of trying to be smart,
+ * let's just use the simple NUL-byte detection that core git uses.
+ */
+ /* previously was: if (git_buf_text_is_binary(&search)) */
+ if (git_buf_text_contains_nul(&search))
file->flags |= GIT_DIFF_FILE_BINARY;
else
file->flags |= GIT_DIFF_FILE_NOT_BINARY;
diff --git a/tests-clar/core/buffer.c b/tests-clar/core/buffer.c
index 40fc4c5..5d9b785 100644
--- a/tests-clar/core/buffer.c
+++ b/tests-clar/core/buffer.c
@@ -704,3 +704,26 @@ void test_core_buffer__base64(void)
git_buf_free(&buf);
}
+
+void test_core_buffer__classify_with_utf8(void)
+{
+ char *data0 = "Simple text\n";
+ size_t data0len = 12;
+ char *data1 = "Is that UTF-8 data I see…\nYep!\n";
+ size_t data1len = 31;
+ char *data2 = "Internal NUL!!!\000\n\nI see you!\n";
+ size_t data2len = 29;
+ git_buf b;
+
+ b.ptr = data0; b.size = b.asize = data0len;
+ cl_assert(!git_buf_text_is_binary(&b));
+ cl_assert(!git_buf_text_contains_nul(&b));
+
+ b.ptr = data1; b.size = b.asize = data1len;
+ cl_assert(git_buf_text_is_binary(&b));
+ cl_assert(!git_buf_text_contains_nul(&b));
+
+ b.ptr = data2; b.size = b.asize = data2len;
+ cl_assert(git_buf_text_is_binary(&b));
+ cl_assert(git_buf_text_contains_nul(&b));
+}