Commit d2458af7b7227f0b378e1509d7dfeb958c339590

Edward Thomson 2022-01-22T14:19:13

indexer: use a byte array for checksum The index's checksum is not an object ID, so we should not use the `git_oid` type. Use a byte array for checksum calculation and storage. Deprecate the `git_indexer_hash` function. Callers should use the new `git_indexer_name` function which provides a unique packfile name.

diff --git a/examples/index-pack.c b/examples/index-pack.c
index c58ac03..df37dd0 100644
--- a/examples/index-pack.c
+++ b/examples/index-pack.c
@@ -17,7 +17,6 @@ int lg2_index_pack(git_repository *repo, int argc, char **argv)
 	git_indexer *idx;
 	git_indexer_progress stats = {0, 0};
 	int error;
-	char hash[GIT_OID_HEXSZ + 1] = {0};
 	int fd;
 	ssize_t read_bytes;
 	char buf[512];
@@ -61,8 +60,7 @@ int lg2_index_pack(git_repository *repo, int argc, char **argv)
 
 	printf("\rIndexing %u of %u\n", stats.indexed_objects, stats.total_objects);
 
-	git_oid_fmt(hash, git_indexer_hash(idx));
-	puts(hash);
+	puts(git_indexer_name(idx));
 
  cleanup:
 	close(fd);
diff --git a/include/git2/indexer.h b/include/git2/indexer.h
index 4bacbd3..ffe9bf3 100644
--- a/include/git2/indexer.h
+++ b/include/git2/indexer.h
@@ -129,16 +129,30 @@ GIT_EXTERN(int) git_indexer_append(git_indexer *idx, const void *data, size_t si
  */
 GIT_EXTERN(int) git_indexer_commit(git_indexer *idx, git_indexer_progress *stats);
 
+#ifndef GIT_DEPRECATE_HARD
 /**
  * Get the packfile's hash
  *
  * A packfile's name is derived from the sorted hashing of all object
  * names. This is only correct after the index has been finalized.
  *
+ * @deprecated use git_indexer_name
  * @param idx the indexer instance
  * @return the packfile's hash
  */
 GIT_EXTERN(const git_oid *) git_indexer_hash(const git_indexer *idx);
+#endif
+
+/**
+ * Get the unique name for the resulting packfile.
+ *
+ * The packfile's name is derived from the packfile's content.
+ * This is only correct after the index has been finalized.
+ *
+ * @param idx the indexer instance
+ * @return a NUL terminated string for the packfile name
+ */
+GIT_EXTERN(const char *) git_indexer_name(const git_indexer *idx);
 
 /**
  * Free the indexer and its resources
diff --git a/src/indexer.c b/src/indexer.c
index 16db9ca..f9a32e7 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -55,7 +55,8 @@ struct git_indexer {
 	git_vector deltas;
 	unsigned int fanout[256];
 	git_hash_ctx hash_ctx;
-	git_oid hash;
+	unsigned char checksum[GIT_HASH_SHA1_SIZE];
+	char name[(GIT_HASH_SHA1_SIZE * 2) + 1];
 	git_indexer_progress_cb progress_cb;
 	void *progress_payload;
 	char objbuf[8*1024];
@@ -76,9 +77,16 @@ struct delta_info {
 	off64_t delta_off;
 };
 
+#ifndef GIT_DEPRECATE_HARD
 const git_oid *git_indexer_hash(const git_indexer *idx)
 {
-	return &idx->hash;
+	return (git_oid *)idx->checksum;
+}
+#endif
+
+const char *git_indexer_name(const git_indexer *idx)
+{
+	return idx->name;
 }
 
 static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
@@ -897,8 +905,7 @@ static int index_path(git_str *path, git_indexer *idx, const char *suffix)
 
 	git_str_truncate(path, slash);
 	git_str_puts(path, prefix);
-	git_oid_fmt(path->ptr + git_str_len(path), &idx->hash);
-	path->size += GIT_OID_HEXSZ;
+	git_str_puts(path, idx->name);
 	git_str_puts(path, suffix);
 
 	return git_str_oom(path) ? -1 : 0;
@@ -919,12 +926,13 @@ static int inject_object(git_indexer *idx, git_oid *id)
 	git_odb_object *obj = NULL;
 	struct entry *entry = NULL;
 	struct git_pack_entry *pentry = NULL;
-	git_oid foo = {{0}};
+	unsigned char empty_checksum[GIT_HASH_SHA1_SIZE] = {0};
 	unsigned char hdr[64];
 	git_str buf = GIT_STR_INIT;
 	off64_t entry_start;
 	const void *data;
 	size_t len, hdr_len;
+	size_t checksum_size = GIT_HASH_SHA1_SIZE;
 	int error;
 
 	if ((error = seek_back_trailer(idx)) < 0)
@@ -966,7 +974,7 @@ static int inject_object(git_indexer *idx, git_oid *id)
 
 	/* Write a fake trailer so the pack functions play ball */
 
-	if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
+	if ((error = append_to_pack(idx, empty_checksum, checksum_size)) < 0)
 		goto cleanup;
 
 	idx->pack->mwf.size += GIT_OID_RAWSZ;
@@ -1160,9 +1168,11 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
 	struct git_pack_idx_header hdr;
 	git_str filename = GIT_STR_INIT;
 	struct entry *entry;
-	git_oid trailer_hash, file_hash;
+	unsigned char checksum[GIT_HASH_SHA1_SIZE];
 	git_filebuf index_file = {0};
 	void *packfile_trailer;
+	size_t checksum_size = GIT_HASH_SHA1_SIZE;
+	bool mismatch;
 
 	if (!idx->parsed_header) {
 		git_error_set(GIT_ERROR_INDEXER, "incomplete pack header");
@@ -1170,27 +1180,27 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
 	}
 
 	/* Test for this before resolve_deltas(), as it plays with idx->off */
-	if (idx->off + 20 < idx->pack->mwf.size) {
+	if (idx->off + (ssize_t)checksum_size < idx->pack->mwf.size) {
 		git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack");
 		return -1;
 	}
-	if (idx->off + 20 > idx->pack->mwf.size) {
+	if (idx->off + (ssize_t)checksum_size > idx->pack->mwf.size) {
 		git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack");
 		return -1;
 	}
 
-	packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
+	packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - checksum_size, checksum_size, &left);
 	if (packfile_trailer == NULL) {
 		git_mwindow_close(&w);
 		goto on_error;
 	}
 
 	/* Compare the packfile trailer as it was sent to us and what we calculated */
-	git_oid_fromraw(&file_hash, packfile_trailer);
+	git_hash_final(checksum, &idx->trailer);
+	mismatch = !!memcmp(checksum, packfile_trailer, checksum_size);
 	git_mwindow_close(&w);
 
-	git_hash_final(trailer_hash.id, &idx->trailer);
-	if (git_oid_cmp(&file_hash, &trailer_hash)) {
+	if (mismatch) {
 		git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch");
 		return -1;
 	}
@@ -1210,8 +1220,8 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
 		if (update_header_and_rehash(idx, stats) < 0)
 			return -1;
 
-		git_hash_final(trailer_hash.id, &idx->trailer);
-		write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
+		git_hash_final(checksum, &idx->trailer);
+		write_at(idx, checksum, idx->pack->mwf.size - checksum_size, checksum_size);
 	}
 
 	/*
@@ -1230,7 +1240,9 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
 
 	/* Use the trailer hash as the pack file name to ensure
 	 * files with different contents have different names */
-	git_oid_cpy(&idx->hash, &trailer_hash);
+	memcpy(idx->checksum, checksum, checksum_size);
+	if (git_hash_fmt(idx->name, checksum, checksum_size) < 0)
+		return -1;
 
 	git_str_sets(&filename, idx->pack->pack_name);
 	git_str_shorten(&filename, strlen("pack"));
@@ -1291,14 +1303,14 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
 	}
 
 	/* Write out the packfile trailer to the index */
-	if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
+	if (git_filebuf_write(&index_file, checksum, checksum_size) < 0)
 		goto on_error;
 
 	/* Write out the hash of the idx */
-	if (git_filebuf_hash(trailer_hash.id, &index_file) < 0)
+	if (git_filebuf_hash(checksum, &index_file) < 0)
 		goto on_error;
 
-	git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
+	git_filebuf_write(&index_file, checksum, checksum_size);
 
 	/* Figure out what the final name should be */
 	if (index_path(&filename, idx, ".idx") < 0)
diff --git a/tests/pack/indexer.c b/tests/pack/indexer.c
index 954bee9..ec48ffd 100644
--- a/tests/pack/indexer.c
+++ b/tests/pack/indexer.c
@@ -169,8 +169,7 @@ void test_pack_indexer__fix_thin(void)
 	cl_assert_equal_i(stats.indexed_objects, 2);
 	cl_assert_equal_i(stats.local_objects, 1);
 
-	git_oid_fromstr(&should_id, "fefdb2d740a3a6b6c03a0c7d6ce431c6d5810e13");
-	cl_assert_equal_oid(&should_id, git_indexer_hash(idx));
+	cl_assert_equal_s("fefdb2d740a3a6b6c03a0c7d6ce431c6d5810e13", git_indexer_name(idx));
 
 	git_indexer_free(idx);
 	git_odb_free(odb);
diff --git a/tests/pack/packbuilder.c b/tests/pack/packbuilder.c
index 5b1f7b9..73bc667 100644
--- a/tests/pack/packbuilder.c
+++ b/tests/pack/packbuilder.c
@@ -5,6 +5,7 @@
 #include "iterator.h"
 #include "vector.h"
 #include "posix.h"
+#include "hash.h"
 
 static git_repository *_repo;
 static git_revwalk *_revwalker;
@@ -98,8 +99,8 @@ void test_pack_packbuilder__create_pack(void)
 	git_indexer_progress stats;
 	git_str buf = GIT_STR_INIT, path = GIT_STR_INIT;
 	git_hash_ctx ctx;
-	git_oid hash;
-	char hex[GIT_OID_HEXSZ+1]; hex[GIT_OID_HEXSZ] = '\0';
+	unsigned char hash[GIT_HASH_SHA1_SIZE];
+	char hex[(GIT_HASH_SHA1_SIZE * 2) + 1];
 
 	seed_packbuilder();
 
@@ -107,8 +108,7 @@ void test_pack_packbuilder__create_pack(void)
 	cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats));
 	cl_git_pass(git_indexer_commit(_indexer, &stats));
 
-	git_oid_fmt(hex, git_indexer_hash(_indexer));
-	git_str_printf(&path, "pack-%s.pack", hex);
+	git_str_printf(&path, "pack-%s.pack", git_indexer_name(_indexer));
 
 	/*
 	 * By default, packfiles are created with only one thread.
@@ -128,14 +128,13 @@ void test_pack_packbuilder__create_pack(void)
 
 	cl_git_pass(git_hash_ctx_init(&ctx, GIT_HASH_ALGORITHM_SHA1));
 	cl_git_pass(git_hash_update(&ctx, buf.ptr, buf.size));
-	cl_git_pass(git_hash_final(hash.id, &ctx));
+	cl_git_pass(git_hash_final(hash, &ctx));
 	git_hash_ctx_cleanup(&ctx);
 
 	git_str_dispose(&path);
 	git_str_dispose(&buf);
 
-	git_oid_fmt(hex, &hash);
-
+	git_hash_fmt(hex, hash, GIT_HASH_SHA1_SIZE);
 	cl_assert_equal_s(hex, "5d410bdf97cf896f9007681b92868471d636954b");
 }