Commit 4591e76a2d1aa07dc80eaa002b4ed7c58d81c242

Edward Thomson 2021-12-10T15:19:59

blob: identify binary content Introduce `git_blob_data_is_binary` to examine a blob's data, instead of the blob itself. A replacement for `git_buf_is_binary`.

diff --git a/include/git2/blob.h b/include/git2/blob.h
index 4922b08..59fac9e 100644
--- a/include/git2/blob.h
+++ b/include/git2/blob.h
@@ -285,6 +285,18 @@ GIT_EXTERN(int) git_blob_create_from_buffer(
 GIT_EXTERN(int) git_blob_is_binary(const git_blob *blob);
 
 /**
+ * Determine if the given content is most certainly binary or not;
+ * this is the same mechanism used by `git_blob_is_binary` but only
+ * looking at raw data.
+ *
+ * @param data The blob data which content should be analyzed
+ * @param len The length of the data
+ * @return 1 if the content of the blob is detected
+ * as binary; 0 otherwise.
+ */
+GIT_EXTERN(int) git_blob_data_is_binary(const char *data, size_t len);
+
+/**
  * Create an in-memory copy of a blob. The copy must be explicitly
  * free'd or it will leak.
  *
diff --git a/src/blob.c b/src/blob.c
index 65841ab..19ce8b3 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -404,6 +404,15 @@ int git_blob_is_binary(const git_blob *blob)
 	return git_str_is_binary(&content);
 }
 
+int git_blob_data_is_binary(const char *str, size_t len)
+{
+	git_str content = GIT_STR_INIT;
+
+	git_str_attach_notowned(&content, str, len);
+
+	return git_str_is_binary(&content);
+}
+
 int git_blob_filter_options_init(
 	git_blob_filter_options *opts,
 	unsigned int version)
diff --git a/tests/diff/blob.c b/tests/diff/blob.c
index 9f71e4e..d2f4220 100644
--- a/tests/diff/blob.c
+++ b/tests/diff/blob.c
@@ -604,12 +604,28 @@ void test_diff_blob__can_correctly_detect_a_binary_blob_as_binary(void)
 	cl_assert_equal_i(true, git_blob_is_binary(alien));
 }
 
+void test_diff_blob__can_correctly_detect_binary_blob_data_as_binary(void)
+{
+	/* alien.png */
+	const char *content = git_blob_rawcontent(alien);
+	size_t len = (size_t)git_blob_rawsize(alien);
+	cl_assert_equal_i(true, git_blob_data_is_binary(content, len));
+}
+
 void test_diff_blob__can_correctly_detect_a_textual_blob_as_non_binary(void)
 {
 	/* tests/resources/attr/root_test4.txt */
 	cl_assert_equal_i(false, git_blob_is_binary(d));
 }
 
+void test_diff_blob__can_correctly_detect_textual_blob_data_as_non_binary(void)
+{
+	/* tests/resources/attr/root_test4.txt */
+	const char *content = git_blob_rawcontent(d);
+	size_t len = (size_t)git_blob_rawsize(d);
+	cl_assert_equal_i(false, git_blob_data_is_binary(content, len));
+}
+
 /*
  * git_diff_blob_to_buffer tests
  */