Commit 0d10e79dd9b4c5dee72066526a6a3c99e19c545b

Vicent Martí 2012-12-17T10:13:36

Merge pull request #1149 from nulltoken/topic/blob_isbinary Introduce git_blob_is_binary()

diff --git a/include/git2/blob.h b/include/git2/blob.h
index a68c78b..30055b6 100644
--- a/include/git2/blob.h
+++ b/include/git2/blob.h
@@ -183,6 +183,19 @@ GIT_EXTERN(int) git_blob_create_fromchunks(
  */
 GIT_EXTERN(int) git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *buffer, size_t len);
 
+/**
+ * Determine if the blob content is most certainly binary or not.
+ *
+ * The heuristic used to guess if a file is binary is taken from core git:
+ * Searching for NUL bytes and looking for a reasonable ratio of printable
+ * to non-printable characters among the first 4000 bytes.
+ *
+ * @param blob The blob which content should be analyzed
+ * @return 1 if the content of the blob is detected
+ * as binary; 0 otherwise.
+ */
+GIT_EXTERN(int) git_blob_is_binary(git_blob *blob);
+
 /** @} */
 GIT_END_DECL
 #endif
diff --git a/src/blob.c b/src/blob.c
index b168df1..811bd85 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -296,3 +296,15 @@ cleanup:
 	git__free(content);
 	return error;
 }
+
+int git_blob_is_binary(git_blob *blob)
+{
+	git_buf content;
+
+	assert(blob);
+
+	content.ptr = blob->odb_object->raw.data;
+	content.size = min(blob->odb_object->raw.len, 4000);
+
+	return git_buf_text_is_binary(&content);
+}
diff --git a/src/indexer.c b/src/indexer.c
index 2fb7804..b9240f3 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -201,7 +201,7 @@ static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
 	char buffer[64];
 	size_t hdrlen;
 
-	hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), len, type);
+	hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
 	git_hash_update(ctx, buffer, hdrlen);
 }
 
@@ -269,11 +269,11 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start,
 
 	crc = crc32(0L, Z_NULL, 0);
 	while (size) {
-		ptr = git_mwindow_open(mwf, &w, start, size, &left);
+		ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
 		if (ptr == NULL)
 			return -1;
 
-		len = min(left, size);
+		len = min(left, (size_t)size);
 		crc = crc32(crc, ptr, len);
 		size -= len;
 		start += len;
diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c
index 3a4398b..2ce92b9 100644
--- a/src/win32/posix_w32.c
+++ b/src/win32/posix_w32.c
@@ -59,7 +59,6 @@ static int do_lstat(
 {
 	WIN32_FILE_ATTRIBUTE_DATA fdata;
 	wchar_t fbuf[GIT_WIN_PATH], lastch;
-	DWORD last_error;
 	int flen;
 
 	flen = git__utf8_to_16(fbuf, GIT_WIN_PATH, file_name);
diff --git a/tests-clar/diff/blob.c b/tests-clar/diff/blob.c
index d7fdba0..8300cb7 100644
--- a/tests-clar/diff/blob.c
+++ b/tests-clar/diff/blob.c
@@ -335,3 +335,15 @@ void test_diff_blob__checks_options_version_too_high(void)
 	err = giterr_last();
 	cl_assert_equal_i(GITERR_INVALID, err->klass);
 }
+
+void test_diff_blob__can_correctly_detect_a_binary_blob_as_binary(void)
+{
+	/* alien.png */
+	cl_assert_equal_i(true, git_blob_is_binary(alien));
+}
+
+void test_diff_blob__can_correctly_detect_a_textual_blob_as_non_binary(void)
+{
+	/* tests/resources/attr/root_test4.txt */
+	cl_assert_equal_i(false, git_blob_is_binary(d));
+}