Make binary detection work similar to vanilla git Main change: Don't treat chars > 128 as non-printable (common in UTF-8 files) Signed-off-by: Sven Strickroth <email@cs-ware.de>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96bd9a1..e1c02f9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,9 @@ v0.22 + 1
### Changes or improvements
+* Updated binary identification in CRLF filtering to avoid false positives in
+ UTF-8 files.
+
* Rename and copy detection is enabled for small files.
### API additions
diff --git a/src/buf_text.c b/src/buf_text.c
index cead599..cb3661e 100644
--- a/src/buf_text.c
+++ b/src/buf_text.c
@@ -191,7 +191,10 @@ bool git_buf_text_is_binary(const git_buf *buf)
while (scan < end) {
unsigned char c = *scan++;
- if (c > 0x1F && c < 0x7F)
+ /* Printable characters are those above SPACE (0x1F) excluding DEL,
+ * and including BS, ESC and FF.
+ */
+ if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
printable++;
else if (c == '\0')
return true;
diff --git a/tests/core/buffer.c b/tests/core/buffer.c
index 87dec46..d28aa21 100644
--- a/tests/core/buffer.c
+++ b/tests/core/buffer.c
@@ -830,7 +830,7 @@ void test_core_buffer__classify_with_utf8(void)
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data1; b.size = b.asize = data1len;
- cl_assert(git_buf_text_is_binary(&b));
+ cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data2; b.size = b.asize = data2len;