Commit d185ab24af9498cfce524e53bb82229079ad2af5

Edward Thomson 2020-12-13T15:18:12

Merge pull request #5727 from lhchavez/make-git-diff-fast Cache the parsed submodule config when diffing

diff --git a/src/diff_generate.c b/src/diff_generate.c
index 745e5ae..f05ae37 100644
--- a/src/diff_generate.c
+++ b/src/diff_generate.c
@@ -680,6 +680,8 @@ typedef struct {
 	git_iterator *new_iter;
 	const git_index_entry *oitem;
 	const git_index_entry *nitem;
+	git_strmap *submodule_cache;
+	bool submodule_cache_initialized;
 } diff_in_progress;
 
 #define MODE_BITS_MASK 0000777
@@ -694,6 +696,7 @@ static int maybe_modified_submodule(
 	git_submodule *sub;
 	unsigned int sm_status = 0;
 	git_submodule_ignore_t ign = diff->base.opts.ignore_submodules;
+	git_strmap *submodule_cache = NULL;
 
 	*status = GIT_DELTA_UNMODIFIED;
 
@@ -701,8 +704,23 @@ static int maybe_modified_submodule(
 		ign == GIT_SUBMODULE_IGNORE_ALL)
 		return 0;
 
-	if ((error = git_submodule_lookup(
-			&sub, diff->base.repo, info->nitem->path)) < 0) {
+	if (diff->base.repo->submodule_cache != NULL) {
+		submodule_cache = diff->base.repo->submodule_cache;
+	} else {
+		if (!info->submodule_cache_initialized) {
+			info->submodule_cache_initialized = true;
+			/*
+			 * Try to cache the submodule information to avoid having to parse it for
+			 * every submodule. It is okay if it fails, the cache will still be NULL
+			 * and the submodules will be attempted to be looked up individually.
+			 */
+			git_submodule_cache_init(&info->submodule_cache, diff->base.repo);
+		}
+		submodule_cache = info->submodule_cache;
+	}
+
+	if ((error = git_submodule__lookup_with_cache(
+			&sub, diff->base.repo, info->nitem->path, submodule_cache)) < 0) {
 
 		/* GIT_EEXISTS means dir with .git in it was found - ignore it */
 		if (error == GIT_EEXISTS) {
@@ -1192,7 +1210,7 @@ int git_diff__from_iterators(
 	const git_diff_options *opts)
 {
 	git_diff_generated *diff;
-	diff_in_progress info;
+	diff_in_progress info = {0};
 	int error = 0;
 
 	*out = NULL;
@@ -1260,6 +1278,8 @@ cleanup:
 		*out = &diff->base;
 	else
 		git_diff_free(&diff->base);
+	if (info.submodule_cache)
+		git_submodule_cache_free(info.submodule_cache);
 
 	return error;
 }
diff --git a/src/repository.c b/src/repository.c
index 3b6bcbe..948413d 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -3055,30 +3055,16 @@ int git_repository_set_ident(git_repository *repo, const char *name, const char 
 
 int git_repository_submodule_cache_all(git_repository *repo)
 {
-	int error;
-
 	GIT_ASSERT_ARG(repo);
-
-	if ((error = git_strmap_new(&repo->submodule_cache)))
-		return error;
-
-	error = git_submodule__map(repo, repo->submodule_cache);
-	return error;
+	return git_submodule_cache_init(&repo->submodule_cache, repo);
 }
 
 int git_repository_submodule_cache_clear(git_repository *repo)
 {
-	git_submodule *sm;
-
+	int error = 0;
 	GIT_ASSERT_ARG(repo);
 
-	if (repo->submodule_cache == NULL) {
-		return 0;
-	}
-	git_strmap_foreach_value(repo->submodule_cache, sm, {
-		git_submodule_free(sm);
-	});
-	git_strmap_free(repo->submodule_cache);
-	repo->submodule_cache = 0;
-	return 0;
+	error = git_submodule_cache_free(repo->submodule_cache);
+	repo->submodule_cache = NULL;
+	return error;
 }
diff --git a/src/submodule.c b/src/submodule.c
index 0c901fa..50bde2c 100644
--- a/src/submodule.c
+++ b/src/submodule.c
@@ -249,11 +249,48 @@ out:
 	return error;
 }
 
+int git_submodule_cache_init(git_strmap **out, git_repository *repo)
+{
+	int error = 0;
+	git_strmap *cache = NULL;
+	GIT_ASSERT_ARG(out);
+	GIT_ASSERT_ARG(repo);
+	if ((error = git_strmap_new(&cache)) < 0)
+		return error;
+	if ((error = git_submodule__map(repo, cache)) < 0) {
+		git_submodule_cache_free(cache);
+		return error;
+	}
+	*out = cache;
+	return error;
+}
+
+int git_submodule_cache_free(git_strmap *cache)
+{
+	git_submodule *sm = NULL;
+	if (cache == NULL)
+		return 0;
+	git_strmap_foreach_value(cache, sm, {
+		git_submodule_free(sm);
+	});
+	git_strmap_free(cache);
+	return 0;
+}
+
 int git_submodule_lookup(
 	git_submodule **out, /* NULL if user only wants to test existence */
 	git_repository *repo,
 	const char *name)    /* trailing slash is allowed */
 {
+	return git_submodule__lookup_with_cache(out, repo, name, repo->submodule_cache);
+}
+
+int git_submodule__lookup_with_cache(
+	git_submodule **out, /* NULL if user only wants to test existence */
+	git_repository *repo,
+	const char *name,    /* trailing slash is allowed */
+	git_strmap *cache)
+{
 	int error;
 	unsigned int location;
 	git_submodule *sm;
@@ -266,8 +303,8 @@ int git_submodule_lookup(
 		return -1;
 	}
 
-	if (repo->submodule_cache != NULL) {
-		if ((sm = git_strmap_get(repo->submodule_cache, name)) != NULL) {
+	if (cache != NULL) {
+		if ((sm = git_strmap_get(cache, name)) != NULL) {
 			if (out) {
 				*out = sm;
 				GIT_REFCOUNT_INC(*out);
diff --git a/src/submodule.h b/src/submodule.h
index 57d95c3..b01ff68 100644
--- a/src/submodule.h
+++ b/src/submodule.h
@@ -101,12 +101,6 @@ struct git_submodule {
 	git_oid wd_oid;
 };
 
-/* Force revalidation of submodule data cache (alloc as needed) */
-extern int git_submodule_cache_refresh(git_repository *repo);
-
-/* Release all submodules */
-extern void git_submodule_cache_free(git_repository *repo);
-
 /* Additional flags on top of public GIT_SUBMODULE_STATUS values */
 enum {
 	GIT_SUBMODULE_STATUS__WD_SCANNED          = (1u << 20),
@@ -122,9 +116,15 @@ enum {
 #define GIT_SUBMODULE_STATUS__CLEAR_INTERNAL(S) \
 	((S) & ~(0xFFFFFFFFu << 20))
 
-/* Internal lookup does not attempt to refresh cached data */
-extern int git_submodule__lookup(
-	git_submodule **out, git_repository *repo, const char *path);
+/* Initialize an external submodule cache for the provided repo. */
+extern int git_submodule_cache_init(git_strmap **out, git_repository *repo);
+
+/* Release the resources of the submodule cache. */
+extern int git_submodule_cache_free(git_strmap *cache);
+
+/* Submodule lookup with an explicit cache */
+extern int git_submodule__lookup_with_cache(
+	git_submodule **out, git_repository *repo, const char *path, git_strmap *cache);
 
 /* Internal status fn returns status and optionally the various OIDs */
 extern int git_submodule__status(