Commit 41da4e16eb01a7bb19c52bbc4251963be192ba69

lhchavez 2020-12-10T19:52:01

Cache the parsed submodule config when diffing This change makes that anything that calls `git_diff__from_iterators` (any of the `git_diff_xxx` functions) only need to parse the `.gitmodules` file once. This can be avoided by calling `git_repository_submodule_cache_all(...)`, but we can do that safely for the user with no change in semantics. Fixes: #5725

diff --git a/src/diff_generate.c b/src/diff_generate.c
index 745e5ae..bc7c266 100644
--- a/src/diff_generate.c
+++ b/src/diff_generate.c
@@ -680,6 +680,8 @@ typedef struct {
 	git_iterator *new_iter;
 	const git_index_entry *oitem;
 	const git_index_entry *nitem;
+	git_strmap *submodule_cache;
+	bool submodule_cache_initialized;
 } diff_in_progress;
 
 #define MODE_BITS_MASK 0000777
@@ -694,6 +696,7 @@ static int maybe_modified_submodule(
 	git_submodule *sub;
 	unsigned int sm_status = 0;
 	git_submodule_ignore_t ign = diff->base.opts.ignore_submodules;
+	git_strmap *submodule_cache = NULL;
 
 	*status = GIT_DELTA_UNMODIFIED;
 
@@ -701,8 +704,29 @@ static int maybe_modified_submodule(
 		ign == GIT_SUBMODULE_IGNORE_ALL)
 		return 0;
 
-	if ((error = git_submodule_lookup(
-			&sub, diff->base.repo, info->nitem->path)) < 0) {
+	if (diff->base.repo->submodule_cache != NULL) {
+		submodule_cache = diff->base.repo->submodule_cache;
+	} else {
+		if (!info->submodule_cache_initialized) {
+			info->submodule_cache_initialized = true;
+			/* Cache the submodule information to avoid having to parse it for every submodule. */
+			if (git_strmap_new(&info->submodule_cache) == 0) {
+				if (git_submodule__map(diff->base.repo, info->submodule_cache) < 0) {
+					/* If the caching failed for whatever reason, bail out and clean up. */
+					git_submodule *sm = NULL;
+					git_strmap_foreach_value(info->submodule_cache, sm, {
+						git_submodule_free(sm);
+					});
+					git_strmap_free(info->submodule_cache);
+					info->submodule_cache = NULL;
+				}
+			}
+		}
+		submodule_cache = info->submodule_cache;
+	}
+
+	if ((error = git_submodule__lookup_with_cache(
+			&sub, diff->base.repo, info->nitem->path, submodule_cache)) < 0) {
 
 		/* GIT_EEXISTS means dir with .git in it was found - ignore it */
 		if (error == GIT_EEXISTS) {
@@ -1192,7 +1216,7 @@ int git_diff__from_iterators(
 	const git_diff_options *opts)
 {
 	git_diff_generated *diff;
-	diff_in_progress info;
+	diff_in_progress info = {0};
 	int error = 0;
 
 	*out = NULL;
@@ -1260,6 +1284,13 @@ cleanup:
 		*out = &diff->base;
 	else
 		git_diff_free(&diff->base);
+	if (info.submodule_cache) {
+		git_submodule *sm = NULL;
+		git_strmap_foreach_value(info.submodule_cache, sm, {
+			git_submodule_free(sm);
+		});
+		git_strmap_free(info.submodule_cache);
+	}
 
 	return error;
 }
diff --git a/src/submodule.c b/src/submodule.c
index 0c901fa..3bbdeed 100644
--- a/src/submodule.c
+++ b/src/submodule.c
@@ -254,6 +254,15 @@ int git_submodule_lookup(
 	git_repository *repo,
 	const char *name)    /* trailing slash is allowed */
 {
+	return git_submodule__lookup_with_cache(out, repo, name, repo->submodule_cache);
+}
+
+int git_submodule__lookup_with_cache(
+	git_submodule **out, /* NULL if user only wants to test existence */
+	git_repository *repo,
+	const char *name,    /* trailing slash is allowed */
+	git_strmap *cache)
+{
 	int error;
 	unsigned int location;
 	git_submodule *sm;
@@ -266,8 +275,8 @@ int git_submodule_lookup(
 		return -1;
 	}
 
-	if (repo->submodule_cache != NULL) {
-		if ((sm = git_strmap_get(repo->submodule_cache, name)) != NULL) {
+	if (cache != NULL) {
+		if ((sm = git_strmap_get(cache, name)) != NULL) {
 			if (out) {
 				*out = sm;
 				GIT_REFCOUNT_INC(*out);
diff --git a/src/submodule.h b/src/submodule.h
index 57d95c3..f32f818 100644
--- a/src/submodule.h
+++ b/src/submodule.h
@@ -122,9 +122,9 @@ enum {
 #define GIT_SUBMODULE_STATUS__CLEAR_INTERNAL(S) \
 	((S) & ~(0xFFFFFFFFu << 20))
 
-/* Internal lookup does not attempt to refresh cached data */
-extern int git_submodule__lookup(
-	git_submodule **out, git_repository *repo, const char *path);
+/* Submodule lookup with an explicit cache */
+extern int git_submodule__lookup_with_cache(
+	git_submodule **out, git_repository *repo, const char *path, git_strmap *cache);
 
 /* Internal status fn returns status and optionally the various OIDs */
 extern int git_submodule__status(