Commit 5b8d5a22e702e4862ebfc56e37d18ca11bfebf02

Edward Thomson 2018-07-01T13:42:53

apply: use preimage as the checkout baseline Use the preimage as the checkout's baseline. This allows us to support applying patches to files that are modified in the working directory (those that differ from the HEAD and index). Without this, files will be reported as (checkout) conflicts. With this, we expect the on-disk data when we began the patch application (the "preimage") to be on-disk during checkout. We could have also simply used the `FORCE` flag to checkout to accomplish a similar mechanism. However, `FORCE` ignores all differences, while providing a preimage ensures that we will only overwrite the file contents that we actually read. Modify the reader interface to provide the OID to support this.

diff --git a/src/apply.c b/src/apply.c
index 1a20278..0810c02 100644
--- a/src/apply.c
+++ b/src/apply.c
@@ -386,6 +386,7 @@ done:
 static int apply_one(
 	git_repository *repo,
 	git_reader *preimage_reader,
+	git_index *preimage,
 	git_index *postimage,
 	git_diff *diff,
 	size_t i)
@@ -395,8 +396,8 @@ static int apply_one(
 	const git_diff_delta *delta;
 	char *filename = NULL;
 	unsigned int mode;
-	git_oid blob_id;
-	git_index_entry index_entry;
+	git_oid pre_id, post_id;
+	git_index_entry pre_entry, post_entry;
 	int error;
 
 	if ((error = git_patch_from_diff(&patch, diff, i)) < 0)
@@ -404,11 +405,8 @@ static int apply_one(
 
 	delta = git_patch_get_delta(patch);
 
-	if (delta->status == GIT_DELTA_DELETED)
-		goto done;
-
 	if (delta->status != GIT_DELTA_ADDED) {
-		if ((error = git_reader_read(&pre_contents,
+		if ((error = git_reader_read(&pre_contents, &pre_id,
 		    preimage_reader, delta->old_file.path)) < 0) {
 
 			/* ENOTFOUND is really an application error */
@@ -417,21 +415,44 @@ static int apply_one(
 
 			goto done;
 		}
+
+		/*
+		 * We need to populate the preimage data structure with the
+		 * contents that we are using as the preimage for this file.
+		 * This allows us to apply patches to files that have been
+		 * modified in the working directory.  During checkout,
+		 * we will use this expected preimage as the baseline, and
+		 * limit checkout to only the paths affected by patch
+		 * application.  (Without this, we would fail to write the
+		 * postimage contents to any file that had been modified
+		 * from HEAD on-disk, even if the patch application succeeded.)
+		 */
+		if (preimage) {
+			memset(&pre_entry, 0, sizeof(git_index_entry));
+			pre_entry.path = delta->old_file.path;
+			pre_entry.mode = delta->old_file.mode;
+			git_oid_cpy(&pre_entry.id, &pre_id);
+
+			if ((error = git_index_add(preimage, &pre_entry)) < 0)
+				goto done;
+		}
 	}
 
-	if ((error = git_apply__patch(&post_contents, &filename, &mode,
-			pre_contents.ptr, pre_contents.size, patch)) < 0 ||
-		(error = git_blob_create_frombuffer(&blob_id, repo,
-			post_contents.ptr, post_contents.size)) < 0)
-		goto done;
+	if (delta->status != GIT_DELTA_DELETED) {
+		if ((error = git_apply__patch(&post_contents, &filename, &mode,
+				pre_contents.ptr, pre_contents.size, patch)) < 0 ||
+			(error = git_blob_create_frombuffer(&post_id, repo,
+				post_contents.ptr, post_contents.size)) < 0)
+			goto done;
 
-	memset(&index_entry, 0, sizeof(git_index_entry));
-	index_entry.path = filename;
-	index_entry.mode = mode;
-	git_oid_cpy(&index_entry.id, &blob_id);
+		memset(&post_entry, 0, sizeof(git_index_entry));
+		post_entry.path = filename;
+		post_entry.mode = mode;
+		git_oid_cpy(&post_entry.id, &post_id);
 
-	if ((error = git_index_add(postimage, &index_entry)) < 0)
-		goto done;
+		if ((error = git_index_add(postimage, &post_entry)) < 0)
+			goto done;
+	}
 
 done:
 	git_buf_dispose(&pre_contents);
@@ -483,7 +504,7 @@ int git_apply_to_tree(
 	}
 
 	for (i = 0; i < git_diff_num_deltas(diff); i++) {
-		if ((error = apply_one(repo, pre_reader, postimage, diff, i)) < 0)
+		if ((error = apply_one(repo, pre_reader, NULL, postimage, diff, i)) < 0)
 			goto done;
 	}
 
@@ -501,6 +522,7 @@ done:
 static int git_apply__to_workdir(
 	git_repository *repo,
 	git_diff *diff,
+	git_index *preimage,
 	git_index *postimage,
 	git_apply_options *opts)
 {
@@ -537,6 +559,8 @@ static int git_apply__to_workdir(
 	checkout_opts.paths.strings = (char **)paths.contents;
 	checkout_opts.paths.count = paths.length;
 
+	checkout_opts.baseline_index = preimage;
+
 	error = git_checkout_index(repo, postimage, &checkout_opts);
 
 	/*
@@ -554,6 +578,7 @@ done:
 static int git_apply__to_index(
 	git_repository *repo,
 	git_diff *diff,
+	git_index *preimage,
 	git_index *postimage,
 	git_apply_options *opts)
 {
@@ -563,6 +588,7 @@ static int git_apply__to_index(
 	size_t i;
 	int error;
 
+	GIT_UNUSED(preimage);
 	GIT_UNUSED(opts);
 
 	if ((error = git_repository_index(&index, repo)) < 0)
@@ -615,7 +641,7 @@ int git_apply(
 	git_diff *diff,
 	git_apply_options *given_opts)
 {
-	git_index *postimage = NULL;
+	git_index *preimage = NULL, *postimage = NULL;
 	git_reader *pre_reader = NULL;
 	git_apply_options opts = GIT_APPLY_OPTIONS_INIT;
 	size_t i;
@@ -643,28 +669,30 @@ int git_apply(
 		goto done;
 
 	/*
-	 * Build the postimage differences.  Note that this is not the
-	 * complete postimage, it only contains the new files created
-	 * during the application.  We will limit checkout to only write
-	 * the files affected by this diff.
+	 * Build the preimage and postimage (differences).  Note that
+	 * this is not the complete preimage or postimage, it only
+	 * contains the files affected by the patch.  We want to avoid
+	 * having the full repo index, so we will limit our checkout
+	 * to only write these files that were affected by the diff.
 	 */
-	if ((error = git_index_new(&postimage)) < 0)
+	if ((error = git_index_new(&preimage)) < 0 ||
+	    (error = git_index_new(&postimage)) < 0)
 		goto done;
 
 	for (i = 0; i < git_diff_num_deltas(diff); i++) {
-		if ((error = apply_one(repo, pre_reader, postimage, diff, i)) < 0)
+		if ((error = apply_one(repo, pre_reader, preimage, postimage, diff, i)) < 0)
 			goto done;
 	}
 
 	switch (opts.location) {
 	case GIT_APPLY_LOCATION_BOTH:
-		error = git_apply__to_workdir(repo, diff, postimage, &opts);
+		error = git_apply__to_workdir(repo, diff, preimage, postimage, &opts);
 		break;
 	case GIT_APPLY_LOCATION_INDEX:
-		error = git_apply__to_index(repo, diff, postimage, &opts);
+		error = git_apply__to_index(repo, diff, preimage, postimage, &opts);
 		break;
 	case GIT_APPLY_LOCATION_WORKDIR:
-		error = git_apply__to_workdir(repo, diff, postimage, &opts);
+		error = git_apply__to_workdir(repo, diff, preimage, postimage, &opts);
 		break;
 	default:
 		assert(false);
@@ -675,6 +703,7 @@ int git_apply(
 
 done:
 	git_index_free(postimage);
+	git_index_free(preimage);
 	git_reader_free(pre_reader);
 
 	return error;
diff --git a/src/reader.c b/src/reader.c
index 754b90a..32900df 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -24,6 +24,7 @@ typedef struct {
 
 static int tree_reader_read(
 	git_buf *out,
+	git_oid *out_id,
 	git_reader *_reader,
 	const char *filename)
 {
@@ -37,6 +38,9 @@ static int tree_reader_read(
 	    (error = git_buf_set(out, git_blob_rawcontent(blob), git_blob_rawsize(blob))) < 0)
 		goto done;
 
+	if (out_id)
+		git_oid_cpy(out_id, git_tree_entry_id(tree_entry));
+
 done:
 	git_blob_free(blob);
 	git_tree_entry_free(tree_entry);
@@ -74,6 +78,7 @@ typedef struct {
 
 static int workdir_reader_read(
 	git_buf *out,
+	git_oid *out_id,
 	git_reader *_reader,
 	const char *filename)
 {
@@ -86,7 +91,11 @@ static int workdir_reader_read(
 		goto done;
 
 	/* TODO: should we read the filtered data? */
-	error = git_futils_readbuffer(out, path.ptr);
+	if ((error = git_futils_readbuffer(out, path.ptr)) < 0)
+		goto done;
+
+	if (out_id)
+		error = git_odb_hash(out_id, out->ptr, out->size, GIT_OBJ_BLOB);
 
 done:
 	git_buf_dispose(&path);
@@ -125,6 +134,7 @@ typedef struct {
 
 static int index_reader_read(
 	git_buf *out,
+	git_oid *out_id,
 	git_reader *_reader,
 	const char *filename)
 {
@@ -139,6 +149,9 @@ static int index_reader_read(
 	if ((error = git_blob_lookup(&blob, reader->repo, &entry->id)) < 0)
 		goto done;
 
+	if (out_id)
+		git_oid_cpy(out_id, &entry->id);
+
 	error = git_blob__getbuf(out, blob);
 
 done:
@@ -185,11 +198,15 @@ int git_reader_for_index(
 
 /* generic */
 
-int git_reader_read(git_buf *out, git_reader *reader, const char *filename)
+int git_reader_read(
+	git_buf *out,
+	git_oid *out_id,
+	git_reader *reader,
+	const char *filename)
 {
 	assert(out && reader && filename);
 
-	return reader->read(out, reader, filename);
+	return reader->read(out, out_id, reader, filename);
 }
 
 void git_reader_free(git_reader *reader)
diff --git a/src/reader.h b/src/reader.h
index 18c7f59..7bb60e1 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -22,7 +22,7 @@ typedef struct git_reader git_reader;
  * reader after disposing the underlying object that it reads.
  */
 struct git_reader {
-	int (*read)(git_buf *out, git_reader *reader, const char *filename);
+	int (*read)(git_buf *out, git_oid *out_oid, git_reader *reader, const char *filename);
 	void (*free)(git_reader *reader);
 };
 
@@ -89,6 +89,7 @@ extern int git_reader_for_workdir(
  */
 extern int git_reader_read(
 	git_buf *out,
+	git_oid *out_id,
 	git_reader *reader,
 	const char *filename);