Merge pull request #2362 from libgit2/rb/update-4k-to-8k Test and fix Git diff binary detection compatibility
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
diff --git a/src/blob.c b/src/blob.c
index ab7dec6..30d5b70 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -334,7 +334,8 @@ int git_blob_is_binary(const git_blob *blob)
assert(blob);
content.ptr = blob->odb_object->buffer;
- content.size = min(blob->odb_object->cached.size, 4000);
+ content.size =
+ min(blob->odb_object->cached.size, GIT_FILTER_BYTES_TO_CHECK_NUL);
content.asize = 0;
return git_buf_text_is_binary(&content);
diff --git a/src/diff_driver.c b/src/diff_driver.c
index dc8e79e..c3c5f36 100644
--- a/src/diff_driver.c
+++ b/src/diff_driver.c
@@ -397,7 +397,11 @@ void git_diff_driver_update_options(
int git_diff_driver_content_is_binary(
git_diff_driver *driver, const char *content, size_t content_len)
{
- const git_buf search = { (char *)content, 0, min(content_len, 4000) };
+ git_buf search;
+
+ search.ptr = (char *)content;
+ search.size = min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL);
+ search.asize = 0;
GIT_UNUSED(driver);
diff --git a/src/filter.h b/src/filter.h
index d0ace0f..5a36610 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -10,6 +10,10 @@
#include "common.h"
#include "git2/filter.h"
+/* Amount of file to examine for NUL byte when checking binary-ness */
+#define GIT_FILTER_BYTES_TO_CHECK_NUL 8000
+
+/* Possible CRLF values */
typedef enum {
GIT_CRLF_GUESS = -1,
GIT_CRLF_BINARY = 0,
diff --git a/tests/diff/workdir.c b/tests/diff/workdir.c
index a6d48ab..f82bb00 100644
--- a/tests/diff/workdir.c
+++ b/tests/diff/workdir.c
@@ -1580,3 +1580,117 @@ void test_diff_workdir__can_update_index(void)
git_diff_free(diff);
}
+
+#define STR7 "0123456"
+#define STR8 "01234567"
+#define STR40 STR8 STR8 STR8 STR8 STR8
+#define STR200 STR40 STR40 STR40 STR40 STR40
+#define STR999Z STR200 STR200 STR200 STR200 STR40 STR40 STR40 STR40 \
+ STR8 STR8 STR8 STR8 STR7 "\0"
+#define STR1000 STR200 STR200 STR200 STR200 STR200
+#define STR3999Z STR1000 STR1000 STR1000 STR999Z
+#define STR4000 STR1000 STR1000 STR1000 STR1000
+
+static void assert_delta_binary(git_diff *diff, size_t idx, int is_binary)
+{
+ git_patch *patch;
+ const git_diff_delta *delta;
+
+ cl_git_pass(git_patch_from_diff(&patch, diff, idx));
+ delta = git_patch_get_delta(patch);
+ cl_assert_equal_b((delta->flags & GIT_DIFF_FLAG_BINARY), is_binary);
+ git_patch_free(patch);
+}
+
+void test_diff_workdir__binary_detection(void)
+{
+ git_index *idx;
+ git_diff *diff = NULL;
+ git_buf b = GIT_BUF_INIT;
+ int i;
+ git_buf data[10] = {
+ { "1234567890", 0, 0 }, /* 0 - all ascii text control */
+ { "Åü†HøπΩ", 0, 0 }, /* 1 - UTF-8 multibyte text */
+ { "\xEF\xBB\xBFÜ⤒ƒ8£€", 0, 0 }, /* 2 - UTF-8 with BOM */
+ { STR999Z, 0, 1000 }, /* 3 - ASCII with NUL at 1000 */
+ { STR3999Z, 0, 4000 }, /* 4 - ASCII with NUL at 4000 */
+ { STR4000 STR3999Z "x", 0, 8001 }, /* 5 - ASCII with NUL at 8000 */
+ { STR4000 STR4000 "\0", 0, 8001 }, /* 6 - ASCII with NUL at 8001 */
+ { "\x00\xDC\x00\x6E\x21\x39\xFE\x0E\x00\x63\x00\xF8"
+ "\x00\x64\x00\x65\x20\x48", 0, 18 }, /* 7 - UTF-16 text */
+ { "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d",
+ 0, 26 }, /* 8 - All non-printable characters (no NUL) */
+ { "Hello \x01\x02\x03\x04\x05\x06 World!\x01\x02\x03\x04"
+ "\x05\x06\x07", 0, 26 }, /* 9 - 50-50 non-printable (no NUL) */
+ };
+
+ g_repo = cl_git_sandbox_init("empty_standard_repo");
+ cl_git_pass(git_repository_index(&idx, g_repo));
+
+ /* We start with ASCII in index and test data in workdir,
+ * then we will try with test data in index and ASCII in workdir.
+ */
+
+ cl_git_pass(git_buf_sets(&b, "empty_standard_repo/0"));
+ for (i = 0; i < 10; ++i) {
+ b.ptr[b.size - 1] = '0' + i;
+ cl_git_mkfile(b.ptr, "baseline");
+ cl_git_pass(git_index_add_bypath(idx, &b.ptr[b.size - 1]));
+
+ if (data[i].size == 0)
+ data[i].size = strlen(data[i].ptr);
+ cl_git_write2file(
+ b.ptr, data[i].ptr, data[i].size, O_WRONLY|O_TRUNC, 0664);
+ }
+ git_index_write(idx);
+
+ cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, NULL, NULL));
+
+ cl_assert_equal_i(10, git_diff_num_deltas(diff));
+
+ /* using diff binary detection (i.e. looking for NUL byte) */
+ assert_delta_binary(diff, 0, false);
+ assert_delta_binary(diff, 1, false);
+ assert_delta_binary(diff, 2, false);
+ assert_delta_binary(diff, 3, true);
+ assert_delta_binary(diff, 4, true);
+ assert_delta_binary(diff, 5, true);
+ assert_delta_binary(diff, 6, false);
+ assert_delta_binary(diff, 7, true);
+ assert_delta_binary(diff, 8, false);
+ assert_delta_binary(diff, 9, false);
+ /* The above have been checked to match command-line Git */
+
+ git_diff_free(diff);
+
+ cl_git_pass(git_buf_sets(&b, "empty_standard_repo/0"));
+ for (i = 0; i < 10; ++i) {
+ b.ptr[b.size - 1] = '0' + i;
+ cl_git_pass(git_index_add_bypath(idx, &b.ptr[b.size - 1]));
+
+ cl_git_write2file(b.ptr, "baseline\n", 9, O_WRONLY|O_TRUNC, 0664);
+ }
+ git_index_write(idx);
+
+ cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, NULL, NULL));
+
+ cl_assert_equal_i(10, git_diff_num_deltas(diff));
+
+ /* using diff binary detection (i.e. looking for NUL byte) */
+ assert_delta_binary(diff, 0, false);
+ assert_delta_binary(diff, 1, false);
+ assert_delta_binary(diff, 2, false);
+ assert_delta_binary(diff, 3, true);
+ assert_delta_binary(diff, 4, true);
+ assert_delta_binary(diff, 5, true);
+ assert_delta_binary(diff, 6, false);
+ assert_delta_binary(diff, 7, true);
+ assert_delta_binary(diff, 8, false);
+ assert_delta_binary(diff, 9, false);
+
+ git_diff_free(diff);
+
+ git_index_free(idx);
+ git_buf_free(&b);
+}