Commit a16e41729d5dec4acd30302c4a217622de00d290

Russell Belfer 2013-07-25T12:27:39

Fix rename detection to use actual blob size The size data in the index may not reflect the actual size of the blob data from the ODB when content filtering comes into play. This commit fixes rename detection to use the actual blob size when calculating data signatures instead of the value from the index. Because of a misunderstanding on my part, I first converted the git_index_add_bypath API to use the post-filtered blob data size in creating the index entry. I backed that change out, but I kept the overall refactoring of that routine and the new internal git_blob__create_from_paths API because it eliminates an extra stat() call from the code that adds a file to the index. The existing tests actually cover this code path, at least when running on Windows, so at this point I'm not adding new tests to cover the changes.

diff --git a/src/blob.c b/src/blob.c
index 2e4d5f4..0f1c970 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -105,6 +105,7 @@ static int write_file_stream(
 
 static int write_file_filtered(
 	git_oid *oid,
+	git_off_t *size,
 	git_odb *odb,
 	const char *full_path,
 	git_vector *filters)
@@ -123,8 +124,11 @@ static int write_file_filtered(
 	git_buf_free(&source);
 
 	/* Write the file to disk if it was properly filtered */
-	if (!error)
+	if (!error) {
+		*size = dest.size;
+
 		error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
+	}
 
 	git_buf_free(&dest);
 	return error;
@@ -152,21 +156,46 @@ static int write_symlink(
 	return error;
 }
 
-static int blob_create_internal(git_oid *oid, git_repository *repo, const char *content_path, const char *hint_path, bool try_load_filters)
+int git_blob__create_from_paths(
+	git_oid *oid,
+	struct stat *out_st,
+	git_repository *repo,
+	const char *content_path,
+	const char *hint_path,
+	mode_t hint_mode,
+	bool try_load_filters)
 {
 	int error;
 	struct stat st;
 	git_odb *odb = NULL;
 	git_off_t size;
+	mode_t mode;
+	git_buf path = GIT_BUF_INIT;
 
 	assert(hint_path || !try_load_filters);
 
-	if ((error = git_path_lstat(content_path, &st)) < 0 || (error = git_repository_odb__weakptr(&odb, repo)) < 0)
-		return error;
+	if (!content_path) {
+		if (git_repository__ensure_not_bare(repo, "create blob from file") < 0)
+			return GIT_EBAREREPO;
+
+		if (git_buf_joinpath(
+				&path, git_repository_workdir(repo), hint_path) < 0)
+			return -1;
+
+		content_path = path.ptr;
+	}
+
+	if ((error = git_path_lstat(content_path, &st)) < 0 ||
+		(error = git_repository_odb(&odb, repo)) < 0)
+		goto done;
+
+	if (out_st)
+		memcpy(out_st, &st, sizeof(st));
 
 	size = st.st_size;
+	mode = hint_mode ? hint_mode : st.st_mode;
 
-	if (S_ISLNK(st.st_mode)) {
+	if (S_ISLNK(hint_mode)) {
 		error = write_symlink(oid, odb, content_path, (size_t)size);
 	} else {
 		git_vector write_filters = GIT_VECTOR_INIT;
@@ -187,7 +216,8 @@ static int blob_create_internal(git_oid *oid, git_repository *repo, const char *
 			error = write_file_stream(oid, odb, content_path, size);
 		} else {
 			/* We need to apply one or more filters */
-			error = write_file_filtered(oid, odb, content_path, &write_filters);
+			error = write_file_filtered(
+				oid, &size, odb, content_path, &write_filters);
 		}
 
 		git_filters_free(&write_filters);
@@ -207,34 +237,21 @@ static int blob_create_internal(git_oid *oid, git_repository *repo, const char *
 		 */
 	}
 
+done:
+	git_odb_free(odb);
+	git_buf_free(&path);
+
 	return error;
 }
 
-int git_blob_create_fromworkdir(git_oid *oid, git_repository *repo, const char *path)
+int git_blob_create_fromworkdir(
+	git_oid *oid, git_repository *repo, const char *path)
 {
-	git_buf full_path = GIT_BUF_INIT;
-	const char *workdir;
-	int error;
-
-	if ((error = git_repository__ensure_not_bare(repo, "create blob from file")) < 0)
-		return error;
-
-	workdir = git_repository_workdir(repo);
-
-	if (git_buf_joinpath(&full_path, workdir, path) < 0) {
-		git_buf_free(&full_path);
-		return -1;
-	}
-
-	error = blob_create_internal(
-		oid, repo, git_buf_cstr(&full_path),
-		git_buf_cstr(&full_path) + strlen(workdir), true);
-
-	git_buf_free(&full_path);
-	return error;
+	return git_blob__create_from_paths(oid, NULL, repo, NULL, path, 0, true);
 }
 
-int git_blob_create_fromdisk(git_oid *oid, git_repository *repo, const char *path)
+int git_blob_create_fromdisk(
+	git_oid *oid, git_repository *repo, const char *path)
 {
 	int error;
 	git_buf full_path = GIT_BUF_INIT;
@@ -251,8 +268,8 @@ int git_blob_create_fromdisk(git_oid *oid, git_repository *repo, const char *pat
 	if (workdir && !git__prefixcmp(hintpath, workdir))
 		hintpath += strlen(workdir);
 
-	error = blob_create_internal(
-		oid, repo, git_buf_cstr(&full_path), hintpath, true);
+	error = git_blob__create_from_paths(
+		oid, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true);
 
 	git_buf_free(&full_path);
 	return error;
@@ -272,12 +289,9 @@ int git_blob_create_fromchunks(
 	git_filebuf file = GIT_FILEBUF_INIT;
 	git_buf path = GIT_BUF_INIT;
 
-	if (git_buf_join_n(
-		&path, '/', 3, 
-		git_repository_path(repo),
-		GIT_OBJECTS_DIR, 
-		"streamed") < 0)
-			goto cleanup;
+	if (git_buf_joinpath(
+			&path, git_repository_path(repo), GIT_OBJECTS_DIR "streamed") < 0)
+		goto cleanup;
 
 	content = git__malloc(BUFFER_SIZE);
 	GITERR_CHECK_ALLOC(content);
@@ -303,7 +317,8 @@ int git_blob_create_fromchunks(
 	if (git_filebuf_flush(&file) < 0)
 		goto cleanup;
 
-	error = blob_create_internal(oid, repo, file.path_lock, hintpath, hintpath != NULL);
+	error = git_blob__create_from_paths(
+		oid, NULL, repo, file.path_lock, hintpath, 0, hintpath != NULL);
 
 cleanup:
 	git_buf_free(&path);
diff --git a/src/blob.h b/src/blob.h
index 22e37cc..4cd9f1e 100644
--- a/src/blob.h
+++ b/src/blob.h
@@ -21,4 +21,13 @@ void git_blob__free(void *blob);
 int git_blob__parse(void *blob, git_odb_object *obj);
 int git_blob__getbuf(git_buf *buffer, git_blob *blob);
 
+extern int git_blob__create_from_paths(
+	git_oid *out_oid,
+	struct stat *out_st,
+	git_repository *repo,
+	const char *full_path,
+	const char *hint_path,
+	mode_t hint_mode,
+	bool apply_filters);
+
 #endif
diff --git a/src/diff_tform.c b/src/diff_tform.c
index 8fd2a4f..d4b8cf3 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -473,7 +473,13 @@ static int similarity_calc(
 			/* if lookup fails, just skip this item in similarity calc */
 			giterr_clear();
 		} else {
-			size_t sz = (size_t)(git__is_sizet(file->size) ? file->size : -1);
+			size_t sz;
+
+			/* index size may not be actual blob size if filtered */
+			if (file->size != git_blob_rawsize(info->blob))
+				file->size = git_blob_rawsize(info->blob);
+
+			sz = (size_t)(git__is_sizet(file->size) ? file->size : -1);
 
 			error = opts->metric->buffer_signature(
 				&cache[info->idx], info->file,
diff --git a/src/index.c b/src/index.c
index 0610eb5..cbdd43b 100644
--- a/src/index.c
+++ b/src/index.c
@@ -16,6 +16,7 @@
 #include "iterator.h"
 #include "pathspec.h"
 #include "ignore.h"
+#include "blob.h"
 
 #include "git2/odb.h"
 #include "git2/oid.h"
@@ -604,42 +605,23 @@ int git_index_entry__cmp_icase(const void *a, const void *b)
 	return strcasecmp(entry_a->path, entry_b->path);
 }
 
-static int index_entry_init(git_index_entry **entry_out, git_index *index, const char *rel_path)
+static int index_entry_init(
+	git_index_entry **entry_out, git_index *index, const char *rel_path)
 {
+	int error = 0;
 	git_index_entry *entry = NULL;
 	struct stat st;
 	git_oid oid;
-	const char *workdir;
-	git_buf full_path = GIT_BUF_INIT;
-	int error;
 
 	if (INDEX_OWNER(index) == NULL)
 		return create_index_error(-1,
 			"Could not initialize index entry. "
 			"Index is not backed up by an existing repository.");
 
-	workdir = git_repository_workdir(INDEX_OWNER(index));
-
-	if (!workdir)
-		return create_index_error(GIT_EBAREREPO,
-			"Could not initialize index entry. Repository is bare");
-
-	if ((error = git_buf_joinpath(&full_path, workdir, rel_path)) < 0)
-		return error;
-
-	if ((error = git_path_lstat(full_path.ptr, &st)) < 0) {
-		git_buf_free(&full_path);
-		return error;
-	}
-
-	git_buf_free(&full_path); /* done with full path */
-
-	/* There is no need to validate the rel_path here, since it will be
-	 * immediately validated by the call to git_blob_create_fromfile.
-	 */
-
-	/* write the blob to disk and get the oid */
-	if ((error = git_blob_create_fromworkdir(&oid, INDEX_OWNER(index), rel_path)) < 0)
+	/* write the blob to disk and get the oid and stat info */
+	error = git_blob__create_from_paths(
+		&oid, &st, INDEX_OWNER(index), NULL, rel_path, 0, true);
+	if (error < 0)
 		return error;
 
 	entry = git__calloc(1, sizeof(git_index_entry));