Commit a94a54021c118df5d0cf526c0ace4bd96a442f39

Edward Thomson 2017-07-19T13:28:32

Merge pull request #4272 from pks-t/pks/patch-id Patch ID calculation

diff --git a/include/git2/diff.h b/include/git2/diff.h
index 4f0871d..40536cb 100644
--- a/include/git2/diff.h
+++ b/include/git2/diff.h
@@ -1400,6 +1400,51 @@ GIT_EXTERN(int) git_diff_format_email_init_options(
 	git_diff_format_email_options *opts,
 	unsigned int version);
 
+/**
+ * Patch ID options structure
+ *
+ * Initialize with `GIT_DIFF_PATCHID_OPTIONS_INIT` macro to
+ * correctly set the default values and version.
+ */
+typedef struct git_diff_patchid_options {
+	unsigned int version;
+} git_diff_patchid_options;
+
+#define GIT_DIFF_PATCHID_OPTIONS_VERSION 1
+#define GIT_DIFF_PATCHID_OPTIONS_INIT { GIT_DIFF_PATCHID_OPTIONS_VERSION }
+
+/**
+ * Initialize `git_diff_patchid_options` structure.
+ *
+ * Initializes the structure with default values. Equivalent to
+ * creating an instance with `GIT_DIFF_PATCHID_OPTIONS_INIT`.
+ */
+GIT_EXTERN(int) git_diff_patchid_init_options(
+	git_diff_patchid_options *opts,
+	unsigned int version);
+
+/**
+ * Calculate the patch ID for the given patch.
+ *
+ * Calculate a stable patch ID for the given patch by summing the
+ * hash of the file diffs, ignoring whitespace and line numbers.
+ * This can be used to derive whether two diffs are the same with
+ * a high probability.
+ *
+ * Currently, this function only calculates stable patch IDs, as
+ * defined in git-patch-id(1), and should in fact generate the
+ * same IDs as the upstream git project does.
+ *
+ * @param out Pointer where the calculated patch ID shoul be
+ *  stored
+ * @param diff The diff to calculate the ID for
+ * @param opts Options for how to calculate the patch ID. This is
+ *  intended for future changes, as currently no options are
+ *  available.
+ * @return 0 on success, an error code otherwise.
+ */
+GIT_EXTERN(int) git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts);
+
 GIT_END_DECL
 
 /** @} */
diff --git a/src/diff.c b/src/diff.c
index a93bd4c..bc40743 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -19,6 +19,12 @@
 #define DIFF_FLAG_SET(DIFF,FLAG,VAL) (DIFF)->opts.flags = \
 	(VAL) ? ((DIFF)->opts.flags | (FLAG)) : ((DIFF)->opts.flags & ~(VAL))
 
+struct patch_id_args {
+	git_hash_ctx ctx;
+	git_oid result;
+	int first_file;
+};
+
 GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta)
 {
 	const char *str = delta->old_file.path;
@@ -374,3 +380,141 @@ int git_diff_format_email_init_options(
 	return 0;
 }
 
+static int flush_hunk(git_oid *result, git_hash_ctx *ctx)
+{
+	git_oid hash;
+	unsigned short carry = 0;
+	int error, i;
+
+	if ((error = git_hash_final(&hash, ctx)) < 0 ||
+	    (error = git_hash_init(ctx)) < 0)
+		return error;
+
+	for (i = 0; i < GIT_OID_RAWSZ; i++) {
+		carry += result->id[i] + hash.id[i];
+		result->id[i] = carry;
+		carry >>= 8;
+	}
+
+	return 0;
+}
+
+static void strip_spaces(git_buf *buf)
+{
+	char *src = buf->ptr, *dst = buf->ptr;
+	char c;
+	size_t len = 0;
+
+	while ((c = *src++) != '\0') {
+		if (!git__isspace(c)) {
+			*dst++ = c;
+			len++;
+		}
+	}
+
+	git_buf_truncate(buf, len);
+}
+
+static int file_cb(
+	const git_diff_delta *delta,
+	float progress,
+	void *payload)
+{
+	struct patch_id_args *args = (struct patch_id_args *) payload;
+	git_buf buf = GIT_BUF_INIT;
+	int error;
+
+	GIT_UNUSED(progress);
+
+	if (!args->first_file &&
+	    (error = flush_hunk(&args->result, &args->ctx)) < 0)
+		goto out;
+	args->first_file = 0;
+
+	if ((error = git_buf_printf(&buf,
+				    "diff--gita/%sb/%s---a/%s+++b/%s",
+				    delta->old_file.path,
+				    delta->new_file.path,
+				    delta->old_file.path,
+				    delta->new_file.path)) < 0)
+		goto out;
+
+	strip_spaces(&buf);
+
+	if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0)
+		goto out;
+
+out:
+	git_buf_free(&buf);
+	return error;
+}
+
+static int line_cb(
+	const git_diff_delta *delta,
+	const git_diff_hunk *hunk,
+	const git_diff_line *line,
+	void *payload)
+{
+	struct patch_id_args *args = (struct patch_id_args *) payload;
+	git_buf buf = GIT_BUF_INIT;
+	int error;
+
+	GIT_UNUSED(delta);
+	GIT_UNUSED(hunk);
+
+	switch (line->origin) {
+	    case GIT_DIFF_LINE_ADDITION:
+		git_buf_putc(&buf, '+');
+		break;
+	    case GIT_DIFF_LINE_DELETION:
+		git_buf_putc(&buf, '-');
+		break;
+	    case GIT_DIFF_LINE_CONTEXT:
+		break;
+	    default:
+		giterr_set(GITERR_PATCH, "invalid line origin for patch");
+		return -1;
+	}
+
+	git_buf_put(&buf, line->content, line->content_len);
+	strip_spaces(&buf);
+
+	if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0)
+		goto out;
+
+out:
+	git_buf_free(&buf);
+	return error;
+}
+
+int git_diff_patchid_init_options(git_diff_patchid_options *opts, unsigned int version)
+{
+	GIT_INIT_STRUCTURE_FROM_TEMPLATE(
+		opts, version, git_diff_patchid_options, GIT_DIFF_PATCHID_OPTIONS_INIT);
+	return 0;
+}
+
+int git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts)
+{
+	struct patch_id_args args;
+	int error;
+
+	GITERR_CHECK_VERSION(
+		opts, GIT_DIFF_PATCHID_OPTIONS_VERSION, "git_diff_patchid_options");
+
+	memset(&args, 0, sizeof(args));
+	args.first_file = 1;
+	if ((error = git_hash_ctx_init(&args.ctx)) < 0)
+		goto out;
+
+	if ((error = git_diff_foreach(diff, file_cb, NULL, NULL, line_cb, &args)) < 0)
+		goto out;
+
+	if ((error = (flush_hunk(&args.result, &args.ctx))) < 0)
+		goto out;
+
+	git_oid_cpy(out, &args.result);
+
+out:
+	return error;
+}
diff --git a/tests/core/structinit.c b/tests/core/structinit.c
index 78503fc..8feba86 100644
--- a/tests/core/structinit.c
+++ b/tests/core/structinit.c
@@ -176,4 +176,8 @@ void test_core_structinit__compare(void)
 	CHECK_MACRO_FUNC_INIT_EQUAL( \
 		git_proxy_options, GIT_PROXY_OPTIONS_VERSION, \
 		GIT_PROXY_OPTIONS_INIT, git_proxy_init_options);
+
+	CHECK_MACRO_FUNC_INIT_EQUAL( \
+		git_diff_patchid_options, GIT_DIFF_PATCHID_OPTIONS_VERSION, \
+		GIT_DIFF_PATCHID_OPTIONS_INIT, git_diff_patchid_init_options);
 }
diff --git a/tests/diff/patchid.c b/tests/diff/patchid.c
new file mode 100644
index 0000000..75a2aa8
--- /dev/null
+++ b/tests/diff/patchid.c
@@ -0,0 +1,60 @@
+#include "clar_libgit2.h"
+#include "patch/patch_common.h"
+
+static void verify_patch_id(const char *diff_content, const char *expected_id)
+{
+	git_oid expected_oid, actual_oid;
+	git_diff *diff;
+
+	cl_git_pass(git_oid_fromstr(&expected_oid, expected_id));
+	cl_git_pass(git_diff_from_buffer(&diff, diff_content, strlen(diff_content)));
+	cl_git_pass(git_diff_patchid(&actual_oid, diff, NULL));
+
+	cl_assert_equal_oid(&expected_oid, &actual_oid);
+
+	git_diff_free(diff);
+}
+
+void test_diff_patchid__simple_commit(void)
+{
+	verify_patch_id(PATCH_SIMPLE_COMMIT, "06094b1948b878b7d9ff7560b4eae672a014b0ec");
+}
+
+void test_diff_patchid__filename_with_spaces(void)
+{
+	verify_patch_id(PATCH_APPEND_NO_NL, "f0ba05413beaef743b630e796153839462ee477a");
+}
+
+void test_diff_patchid__multiple_hunks(void)
+{
+	verify_patch_id(PATCH_MULTIPLE_HUNKS, "81e26c34643d17f521e57c483a6a637e18ba1f57");
+}
+
+void test_diff_patchid__multiple_files(void)
+{
+	verify_patch_id(PATCH_MULTIPLE_FILES, "192d1f49d23f2004517963aecd3f8a6c467f50ff");
+}
+
+void test_diff_patchid__same_diff_with_differing_whitespace_has_same_id(void)
+{
+	const char *tabs =
+	    "diff --git a/file.txt b/file.txt\n"
+	    "index 8fecc09..1d43a92 100644\n"
+	    "--- a/file.txt\n"
+	    "+++ b/file.txt\n"
+	    "@@ -1 +1 @@\n"
+	    "-old text\n"
+	    "+		new text\n";
+	const char *spaces =
+	    "diff --git a/file.txt b/file.txt\n"
+	    "index 8fecc09..1d43a92 100644\n"
+	    "--- a/file.txt\n"
+	    "+++ b/file.txt\n"
+	    "@@ -1 +1 @@\n"
+	    "-old text\n"
+	    "+        new text\n";
+	const char *id = "11efdd13c30f7a1056eac2ae2fb952da475e2c23";
+
+	verify_patch_id(tabs, id);
+	verify_patch_id(spaces, id);
+}
diff --git a/tests/patch/patch_common.h b/tests/patch/patch_common.h
index 6ec5546..a20ebd6 100644
--- a/tests/patch/patch_common.h
+++ b/tests/patch/patch_common.h
@@ -253,7 +253,66 @@
 	"@@ -9,0 +10 @@ below it!\n" \
 	"+insert at end\n"
 
-/* An insertion at the beginning and end of file (and the resultant patch) */
+#define PATCH_SIMPLE_COMMIT \
+	"commit 15e119375018fba121cf58e02a9f17fe22df0df8\n" \
+	"Author: Edward Thomson <ethomson@edwardthomson.com>\n" \
+	"Date:   Wed Jun 14 13:31:20 2017 +0200\n" \
+	"\n" \
+	"    CHANGELOG: document git_filter_init and GIT_FILTER_INIT\n" \
+	"\n" \
+	"diff --git a/CHANGELOG.md b/CHANGELOG.md\n" \
+	"index 1b9e0c90a..24ecba426 100644\n" \
+	"--- a/CHANGELOG.md\n" \
+	"+++ b/CHANGELOG.md\n" \
+	"@@ -96,6 +96,9 @@ v0.26\n" \
+	" * `git_transport_smart_proxy_options()' enables you to get the proxy options for\n" \
+	"   smart transports.\n" \
+	"\n" \
+	"+* The `GIT_FILTER_INIT` macro and the `git_filter_init` function are provided\n" \
+	"+  to initialize a `git_filter` structure.\n" \
+	"+\n" \
+	" ### Breaking API changes\n" \
+	"\n" \
+	" * `clone_checkout_strategy` has been removed from\n"
+
+#define PATCH_MULTIPLE_HUNKS \
+	"diff --git a/x b/x\n" \
+	"index 0719398..fa0350c 100644\n" \
+	"--- a/x\n" \
+	"+++ b/x\n" \
+	"@@ -1,5 +1,4 @@\n" \
+	" 1\n" \
+	"-2\n" \
+	" 3\n" \
+	" 4\n" \
+	" 5\n" \
+	"@@ -7,3 +6,4 @@\n" \
+	" 7\n" \
+	" 8\n" \
+	" 9\n" \
+	"+10\n"
+
+#define PATCH_MULTIPLE_FILES \
+	"diff --git a/x b/x\n" \
+	"index 8a1218a..7059ba5 100644\n" \
+	"--- a/x\n" \
+	"+++ b/x\n" \
+	"@@ -1,5 +1,4 @@\n" \
+	" 1\n" \
+	" 2\n" \
+	"-3\n" \
+	" 4\n" \
+	" 5\n" \
+	"diff --git a/y b/y\n" \
+	"index e006065..9405325 100644\n" \
+	"--- a/y\n" \
+	"+++ b/y\n" \
+	"@@ -1,4 +1,5 @@\n" \
+	" a\n" \
+	" b\n" \
+	"+c\n" \
+	" d\n" \
+	" e\n"
 
 #define FILE_PREPEND_AND_APPEND \
 	"first and\n" \