Commit ea285904dcb1350d99703df86a5f38662935cbbc

lhchavez 2020-02-18T00:02:13

midx: Introduce git_odb_write_multi_pack_index() This change introduces git_odb_write_multi_pack_index(), which creates a `multi-pack-index` file from all the `.pack` files that have been loaded in the ODB. Fixes: #5399

diff --git a/include/git2/odb.h b/include/git2/odb.h
index 702e1bd..dd48455 100644
--- a/include/git2/odb.h
+++ b/include/git2/odb.h
@@ -391,6 +391,20 @@ GIT_EXTERN(int) git_odb_write_pack(
 	void *progress_payload);
 
 /**
+ * Write a `multi-pack-index` file from all the `.pack` files in the ODB.
+ *
+ * If the ODB layer understands pack files, then this will create a file called
+ * `multi-pack-index` next to the `.pack` and `.idx` files, which will contain
+ * an index of all objects stored in `.pack` files. This will allow for
+ * O(log n) lookup for n objects (regardless of how many packfiles there
+ * exist).
+ *
+ * @param db object database where the `multi-pack-index` file will be written.
+ */
+GIT_EXTERN(int) git_odb_write_multi_pack_index(
+	git_odb *db);
+
+/**
  * Determine the object-ID (sha1 hash) of a data buffer
  *
  * The resulting SHA-1 OID will be the identifier for the data
diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h
index 4dba460..9ae0ed9 100644
--- a/include/git2/sys/odb_backend.h
+++ b/include/git2/sys/odb_backend.h
@@ -85,6 +85,13 @@ struct git_odb_backend {
 		git_indexer_progress_cb progress_cb, void *progress_payload);
 
 	/**
+	 * If the backend supports pack files, this will create a
+	 * `multi-pack-index` file which will contain an index of all objects
+	 * across all the `.pack` files.
+	 */
+	int GIT_CALLBACK(writemidx)(git_odb_backend *);
+
+	/**
 	 * "Freshens" an already existing object, updating its last-used
 	 * time.  This occurs when `git_odb_write` was called, but the
 	 * object already existed (and will not be re-written).  The
diff --git a/src/odb.c b/src/odb.c
index e3a5381..7834e5f 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -1703,6 +1703,35 @@ int git_odb_write_pack(struct git_odb_writepack **out, git_odb *db, git_indexer_
 	return error;
 }
 
+int git_odb_write_multi_pack_index(git_odb *db)
+{
+	size_t i, writes = 0;
+	int error = GIT_ERROR;
+
+	GIT_ASSERT_ARG(db);
+
+	for (i = 0; i < db->backends.length && error < 0; ++i) {
+		backend_internal *internal = git_vector_get(&db->backends, i);
+		git_odb_backend *b = internal->backend;
+
+		/* we don't write in alternates! */
+		if (internal->is_alternate)
+			continue;
+
+		if (b->writemidx != NULL) {
+			++writes;
+			error = b->writemidx(b);
+		}
+	}
+
+	if (error == GIT_PASSTHROUGH)
+		error = 0;
+	if (error < 0 && !writes)
+		error = git_odb__error_unsupported_in_backend("write multi-pack-index");
+
+	return error;
+}
+
 void *git_odb_backend_data_alloc(git_odb_backend *backend, size_t len)
 {
 	GIT_UNUSED(backend);
diff --git a/src/odb_pack.c b/src/odb_pack.c
index 3df8a42..f4cb9a5 100644
--- a/src/odb_pack.c
+++ b/src/odb_pack.c
@@ -402,7 +402,6 @@ static int process_multi_pack_index_pack(
 		const char *packfile_name)
 {
 	int error;
-	size_t cmp_len = strlen(packfile_name);
 	struct git_pack_file *pack;
 	size_t found_position;
 	git_buf pack_path = GIT_BUF_INIT, index_prefix = GIT_BUF_INIT;
@@ -411,12 +410,11 @@ static int process_multi_pack_index_pack(
 	if (error < 0)
 		return error;
 
-	/* This is ensured by midx__parse_packfile_name() */
-	if (cmp_len <= strlen(".idx") || git__suffixcmp(git_buf_cstr(&pack_path), ".idx") != 0)
+	/* This is ensured by midx_parse_packfile_name() */
+	if (git_buf_len(&pack_path) <= strlen(".idx") || git__suffixcmp(git_buf_cstr(&pack_path), ".idx") != 0)
 		return git_odb__error_notfound("midx file contained a non-index", NULL, 0);
 
-	cmp_len -= strlen(".idx");
-	git_buf_attach_notowned(&index_prefix, git_buf_cstr(&pack_path), cmp_len);
+	git_buf_attach_notowned(&index_prefix, git_buf_cstr(&pack_path), git_buf_len(&pack_path) - strlen(".idx"));
 
 	if (git_vector_search2(&found_position, &backend->packs, packfile_byname_search_cmp, &index_prefix) == 0) {
 		/* Pack was found in the packs list. Moving it to the midx_packs list. */
@@ -744,6 +742,81 @@ static int pack_backend__writepack(struct git_odb_writepack **out,
 	return 0;
 }
 
+static int get_idx_path(
+		git_buf *idx_path,
+		struct pack_backend *backend,
+		struct git_pack_file *p)
+{
+	size_t path_len;
+	int error;
+
+	error = git_path_prettify(idx_path, p->pack_name, backend->pack_folder);
+	if (error < 0)
+		return error;
+	path_len = git_buf_len(idx_path);
+	if (path_len <= strlen(".pack") || git__suffixcmp(git_buf_cstr(idx_path), ".pack") != 0)
+		return git_odb__error_notfound("packfile does not end in .pack", NULL, 0);
+	path_len -= strlen(".pack");
+	error = git_buf_splice(idx_path, path_len, strlen(".pack"), ".idx", strlen(".idx"));
+	if (error < 0)
+		return error;
+
+	return 0;
+}
+
+static int pack_backend__writemidx(git_odb_backend *_backend)
+{
+	struct pack_backend *backend;
+	git_midx_writer *w = NULL;
+	struct git_pack_file *p;
+	size_t i;
+	int error = 0;
+
+	GIT_ASSERT_ARG(_backend);
+
+	backend = (struct pack_backend *)_backend;
+
+	error = git_midx_writer_new(&w, backend->pack_folder);
+	if (error < 0)
+		return error;
+
+	git_vector_foreach(&backend->midx_packs, i, p) {
+		git_buf idx_path = GIT_BUF_INIT;
+		error = get_idx_path(&idx_path, backend, p);
+		if (error < 0)
+			goto cleanup;
+		error = git_midx_writer_add(w, git_buf_cstr(&idx_path));
+		git_buf_dispose(&idx_path);
+		if (error < 0)
+			goto cleanup;
+	}
+	git_vector_foreach(&backend->packs, i, p) {
+		git_buf idx_path = GIT_BUF_INIT;
+		error = get_idx_path(&idx_path, backend, p);
+		if (error < 0)
+			goto cleanup;
+		error = git_midx_writer_add(w, git_buf_cstr(&idx_path));
+		git_buf_dispose(&idx_path);
+		if (error < 0)
+			goto cleanup;
+	}
+
+	/*
+	 * Invalidate the previous midx before writing the new one.
+	 */
+	error = remove_multi_pack_index(backend);
+	if (error < 0)
+		goto cleanup;
+	error = git_midx_writer_commit(w);
+	if (error < 0)
+		goto cleanup;
+	error = refresh_multi_pack_index(backend);
+
+cleanup:
+	git_midx_writer_free(w);
+	return error;
+}
+
 static void pack_backend__free(git_odb_backend *_backend)
 {
 	struct pack_backend *backend;
@@ -792,6 +865,7 @@ static int pack_backend__alloc(struct pack_backend **out, size_t initial_size)
 	backend->parent.refresh = &pack_backend__refresh;
 	backend->parent.foreach = &pack_backend__foreach;
 	backend->parent.writepack = &pack_backend__writepack;
+	backend->parent.writemidx = &pack_backend__writemidx;
 	backend->parent.freshen = &pack_backend__freshen;
 	backend->parent.free = &pack_backend__free;
 
diff --git a/tests/pack/midx.c b/tests/pack/midx.c
index 6e6c1a6..9e7bdb0 100644
--- a/tests/pack/midx.c
+++ b/tests/pack/midx.c
@@ -3,6 +3,7 @@
 #include <git2.h>
 #include <git2/sys/midx.h>
 
+#include "futils.h"
 #include "midx.h"
 
 void test_pack_midx__parse(void)
@@ -74,3 +75,36 @@ void test_pack_midx__writer(void)
 	git_midx_writer_free(w);
 	git_repository_free(repo);
 }
+
+void test_pack_midx__odb_create(void)
+{
+	git_repository *repo;
+	git_odb *odb;
+	git_clone_options opts = GIT_CLONE_OPTIONS_INIT;
+	git_buf midx = GIT_BUF_INIT, expected_midx = GIT_BUF_INIT, midx_path = GIT_BUF_INIT;
+	struct stat st;
+
+	opts.bare = true;
+	opts.local = GIT_CLONE_LOCAL;
+	cl_git_pass(git_clone(&repo, cl_fixture("testrepo/.gitted"), "./clone.git", &opts));
+	cl_git_pass(git_buf_joinpath(&midx_path, git_repository_path(repo), "objects/pack/multi-pack-index"));
+	cl_git_fail(p_stat(git_buf_cstr(&midx_path), &st));
+
+	cl_git_pass(git_repository_odb(&odb, repo));
+	cl_git_pass(git_odb_write_multi_pack_index(odb));
+	git_odb_free(odb);
+
+	cl_git_pass(p_stat(git_buf_cstr(&midx_path), &st));
+
+	cl_git_pass(git_futils_readbuffer(&expected_midx, cl_fixture("testrepo.git/objects/pack/multi-pack-index")));
+	cl_git_pass(git_futils_readbuffer(&midx, git_buf_cstr(&midx_path)));
+	cl_assert_equal_i(git_buf_len(&midx), git_buf_len(&expected_midx));
+	cl_assert_equal_strn(git_buf_cstr(&midx), git_buf_cstr(&expected_midx), git_buf_len(&midx));
+
+	git_repository_free(repo);
+	git_buf_dispose(&midx);
+	git_buf_dispose(&midx_path);
+	git_buf_dispose(&expected_midx);
+
+	cl_git_pass(git_futils_rmdir_r("./clone.git", NULL, GIT_RMDIR_REMOVE_FILES));
+}