Allow for caching of submodules. Added `git_repository_submodule_cache_all` to initialze a cache of submodules on the repository so that operations looking up N submodules are O(N) and not O(N^2). Added a `git_repository_submodule_cache_clear` function to remove the cache. Also optimized the function that loads all submodules as it was itself O(N^2) w.r.t the number of submodules, having to loop through the `.gitmodules` file once per submodule. I changed it to process the `.gitmodules` file once, into a map. Signed-off-by: David Turner <dturner@twosigma.com>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
diff --git a/include/git2/sys/repository.h b/include/git2/sys/repository.h
index 800396c..0c91421 100644
--- a/include/git2/sys/repository.h
+++ b/include/git2/sys/repository.h
@@ -135,6 +135,35 @@ GIT_EXTERN(void) git_repository_set_index(git_repository *repo, git_index *index
*/
GIT_EXTERN(int) git_repository_set_bare(git_repository *repo);
+/**
+ * Load and cache all submodules.
+ *
+ * Because the `.gitmodules` file is unstructured, loading submodules is an
+ * O(N) operation. Any operation (such as `git_rebase_init`) that requires
+ * accessing all submodules is O(N^2) in the number of submodules, if it
+ * has to look each one up individually. This function loads all submodules
+ * and caches them so that subsequent calls to `git_submodule_lookup` are O(1).
+ *
+ * @param repo the repository whose submodules will be cached.
+ */
+GIT_EXTERN(int) git_repository_submodule_cache_all(
+ git_repository *repo);
+
+/**
+ * Clear the submodule cache.
+ *
+ * Clear the submodule cache populated by `git_repository_submodule_cache_all`.
+ * If there is no cache, do nothing.
+ *
+ * The cache incorporates data from the repository's configuration, as well
+ * as the state of the working tree, the index, and HEAD. So any time any
+ * of these has changed, the cache might become invalid.
+ *
+ * @param repo the repository whose submodule cache will be cleared
+ */
+GIT_EXTERN(int) git_repository_submodule_cache_clear(
+ git_repository *repo);
+
/** @} */
GIT_END_DECL
#endif
diff --git a/src/repository.c b/src/repository.c
index 6029919..2185632 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -27,6 +27,10 @@
#include "merge.h"
#include "diff_driver.h"
#include "annotated_commit.h"
+#include "submodule.h"
+
+GIT__USE_STRMAP
+#include "strmap.h"
#ifdef GIT_WIN32
# include "win32/w32_util.h"
@@ -109,6 +113,7 @@ void git_repository__cleanup(git_repository *repo)
{
assert(repo);
+ git_repository_submodule_cache_clear(repo);
git_cache_clear(&repo->objects);
git_attr_cache_flush(repo);
@@ -2541,3 +2546,31 @@ int git_repository_set_ident(git_repository *repo, const char *name, const char
return 0;
}
+
+int git_repository_submodule_cache_all(git_repository *repo)
+{
+ int error;
+
+ assert(repo);
+
+ if ((error = git_strmap_alloc(&repo->submodule_cache)))
+ return error;
+
+ error = git_submodule__map(repo, repo->submodule_cache);
+ return error;
+}
+
+int git_repository_submodule_cache_clear(git_repository *repo)
+{
+ git_submodule *sm;
+ assert(repo);
+ if (repo->submodule_cache == NULL) {
+ return 0;
+ }
+ git_strmap_foreach_value(repo->submodule_cache, sm, {
+ git_submodule_free(sm);
+ });
+ git_strmap_free(repo->submodule_cache);
+ repo->submodule_cache = 0;
+ return 0;
+}
diff --git a/src/repository.h b/src/repository.h
index b259bea..9d276f3 100644
--- a/src/repository.h
+++ b/src/repository.h
@@ -143,6 +143,7 @@ struct git_repository {
git_atomic attr_session_key;
git_cvar_value cvar_cache[GIT_CVAR_CACHE_MAX];
+ git_strmap *submodule_cache;
};
GIT_INLINE(git_attr_cache *) git_repository_attr_cache(git_repository *repo)
diff --git a/src/submodule.c b/src/submodule.c
index 46462f1..6996006 100644
--- a/src/submodule.c
+++ b/src/submodule.c
@@ -149,40 +149,53 @@ static int find_by_path(const git_config_entry *entry, void *payload)
}
/**
- * Find out the name of a submodule from its path
+ * Release the name map returned by 'load_submodule_names'.
*/
-static int name_from_path(git_buf *out, git_config *cfg, const char *path)
+static void free_submodule_names(git_strmap *names)
+{
+ git_buf *name = 0;
+ if (names == NULL)
+ return;
+ git_strmap_foreach_value(names, name, {
+ git__free(name);
+ });
+ git_strmap_free(names);
+ return;
+}
+
+/**
+ * Map submodule paths to names.
+ * TODO: for some use-cases, this might need case-folding on a
+ * case-insensitive filesystem
+ */
+static int load_submodule_names(git_strmap *out, git_config *cfg)
{
const char *key = "submodule\\..*\\.path";
git_config_iterator *iter;
git_config_entry *entry;
- int error;
+ git_buf buf = GIT_BUF_INIT;
+ int rval;
+ int error = 0;
if ((error = git_config_iterator_glob_new(&iter, cfg, key)) < 0)
return error;
- while ((error = git_config_next(&entry, iter)) == 0) {
+ while (git_config_next(&entry, iter) == 0) {
const char *fdot, *ldot;
- /* TODO: this should maybe be strcasecmp on a case-insensitive fs */
- if (strcmp(path, entry->value) != 0)
- continue;
-
fdot = strchr(entry->name, '.');
ldot = strrchr(entry->name, '.');
- git_buf_clear(out);
- git_buf_put(out, fdot + 1, ldot - fdot - 1);
- goto cleanup;
- }
-
- if (error == GIT_ITEROVER) {
- giterr_set(GITERR_SUBMODULE, "could not find a submodule name for '%s'", path);
- error = GIT_ENOTFOUND;
+ git_buf_put(&buf, fdot + 1, ldot - fdot - 1);
+ git_strmap_insert(out, entry->value, git_buf_detach(&buf), rval);
+ if (rval < 0) {
+ giterr_set(GITERR_NOMEMORY, "Error inserting submodule into hash table");
+ free_submodule_names(out);
+ return -1;
+ }
}
-cleanup:
git_config_iterator_free(iter);
- return error;
+ return 0;
}
int git_submodule_lookup(
@@ -196,6 +209,17 @@ int git_submodule_lookup(
assert(repo && name);
+ if (repo->submodule_cache != NULL) {
+ khiter_t pos = git_strmap_lookup_index(repo->submodule_cache, name);
+ if (git_strmap_valid_index(repo->submodule_cache, pos)) {
+ if (out) {
+ *out = git_strmap_value_at(repo->submodule_cache, pos);
+ GIT_REFCOUNT_INC(*out);
+ }
+ return 0;
+ }
+ }
+
if ((error = submodule_alloc(&sm, repo, name)) < 0)
return error;
@@ -327,20 +351,18 @@ static int submodules_from_index(git_strmap *map, git_index *idx, git_config *cf
int error;
git_iterator *i;
const git_index_entry *entry;
- git_buf name = GIT_BUF_INIT;
+ git_strmap *names = 0;
+ git_strmap_alloc(&names);
+ if ((error = load_submodule_names(names, cfg)))
+ goto done;
if ((error = git_iterator_for_index(&i, git_index_owner(idx), idx, NULL)) < 0)
- return error;
+ goto done;
while (!(error = git_iterator_advance(&entry, i))) {
khiter_t pos = git_strmap_lookup_index(map, entry->path);
git_submodule *sm;
- git_buf_clear(&name);
- if (!name_from_path(&name, cfg, entry->path)) {
- git_strmap_lookup_index(map, name.ptr);
- }
-
if (git_strmap_valid_index(map, pos)) {
sm = git_strmap_value_at(map, pos);
@@ -349,7 +371,17 @@ static int submodules_from_index(git_strmap *map, git_index *idx, git_config *cf
else
sm->flags |= GIT_SUBMODULE_STATUS__INDEX_NOT_SUBMODULE;
} else if (S_ISGITLINK(entry->mode)) {
- if (!submodule_get_or_create(&sm, git_index_owner(idx), map, name.ptr ? name.ptr : entry->path)) {
+ khiter_t name_pos;
+ const char *name;
+
+ name_pos = git_strmap_lookup_index(names, entry->path);
+ if (git_strmap_valid_index(names, name_pos)) {
+ name = git_strmap_value_at(names, name_pos);
+ } else {
+ name = entry->path;
+ }
+
+ if (!submodule_get_or_create(&sm, git_index_owner(idx), map, name)) {
submodule_update_from_index_entry(sm, entry);
git_submodule_free(sm);
}
@@ -359,8 +391,9 @@ static int submodules_from_index(git_strmap *map, git_index *idx, git_config *cf
if (error == GIT_ITEROVER)
error = 0;
- git_buf_free(&name);
+done:
git_iterator_free(i);
+ free_submodule_names(names);
return error;
}
@@ -370,20 +403,18 @@ static int submodules_from_head(git_strmap *map, git_tree *head, git_config *cfg
int error;
git_iterator *i;
const git_index_entry *entry;
- git_buf name = GIT_BUF_INIT;
+ git_strmap *names = 0;
+ git_strmap_alloc(&names);
+ if ((error = load_submodule_names(names, cfg)))
+ goto done;
if ((error = git_iterator_for_tree(&i, head, NULL)) < 0)
- return error;
+ goto done;
while (!(error = git_iterator_advance(&entry, i))) {
khiter_t pos = git_strmap_lookup_index(map, entry->path);
git_submodule *sm;
- git_buf_clear(&name);
- if (!name_from_path(&name, cfg, entry->path)) {
- git_strmap_lookup_index(map, name.ptr);
- }
-
if (git_strmap_valid_index(map, pos)) {
sm = git_strmap_value_at(map, pos);
@@ -392,7 +423,17 @@ static int submodules_from_head(git_strmap *map, git_tree *head, git_config *cfg
else
sm->flags |= GIT_SUBMODULE_STATUS__HEAD_NOT_SUBMODULE;
} else if (S_ISGITLINK(entry->mode)) {
- if (!submodule_get_or_create(&sm, git_tree_owner(head), map, name.ptr ? name.ptr : entry->path)) {
+ khiter_t name_pos;
+ const char *name;
+
+ name_pos = git_strmap_lookup_index(names, entry->path);
+ if (git_strmap_valid_index(names, name_pos)) {
+ name = git_strmap_value_at(names, name_pos);
+ } else {
+ name = entry->path;
+ }
+
+ if (!submodule_get_or_create(&sm, git_tree_owner(head), map, name)) {
submodule_update_from_head_data(
sm, entry->mode, &entry->id);
git_submodule_free(sm);
@@ -403,8 +444,9 @@ static int submodules_from_head(git_strmap *map, git_tree *head, git_config *cfg
if (error == GIT_ITEROVER)
error = 0;
- git_buf_free(&name);
+done:
git_iterator_free(i);
+ free_submodule_names(names);
return error;
}
@@ -416,7 +458,7 @@ typedef struct {
git_repository *repo;
} lfc_data;
-static int all_submodules(git_repository *repo, git_strmap *map)
+int git_submodule__map(git_repository *repo, git_strmap *map)
{
int error = 0;
git_index *idx = NULL;
@@ -509,7 +551,7 @@ int git_submodule_foreach(
if ((error = git_strmap_alloc(&submodules)) < 0)
return error;
- if ((error = all_submodules(repo, submodules)) < 0)
+ if ((error = git_submodule__map(repo, submodules)) < 0)
goto done;
if (!(error = git_vector_init(
@@ -1566,7 +1608,6 @@ int git_submodule_location(unsigned int *location, git_submodule *sm)
location, NULL, NULL, NULL, sm, GIT_SUBMODULE_IGNORE_ALL);
}
-
/*
* INTERNAL FUNCTIONS
*/
diff --git a/src/submodule.h b/src/submodule.h
index 2ef2031..456a939 100644
--- a/src/submodule.h
+++ b/src/submodule.h
@@ -143,4 +143,7 @@ extern int git_submodule_parse_ignore(
extern int git_submodule_parse_update(
git_submodule_update_t *out, const char *value);
+extern int git_submodule__map(
+ git_repository *repo,
+ git_strmap *map);
#endif
diff --git a/tests/submodule/lookup.c b/tests/submodule/lookup.c
index 148f927..e36fc44 100644
--- a/tests/submodule/lookup.c
+++ b/tests/submodule/lookup.c
@@ -388,3 +388,28 @@ void test_submodule_lookup__renamed(void)
cl_git_pass(git_submodule_foreach(g_repo, sm_lookup_cb, &data));
cl_assert_equal_i(8, data.count);
}
+
+void test_submodule_lookup_cached(void) {
+ git_submodule *sm;
+ git_submodule *sm2;
+ /* See that the simple tests still pass. */
+
+ git_repository_submodule_cache_all(g_repo);
+ test_submodule_lookup__simple_lookup();
+ git_repository_submodule_cache_clear(g_repo);
+ test_submodule_lookup__simple_lookup();
+
+ /* Check that subsequent calls return different objects when cached. */
+ git_repository_submodule_cache_all(g_repo);
+ cl_git_pass(git_submodule_lookup(&sm, g_repo, "sm_unchanged"));
+ cl_git_pass(git_submodule_lookup(&sm2, g_repo, "sm_unchanged"));
+ cl_assert_equal_p(sm, sm2);
+ git_submodule_free(sm2);
+
+ /* and that we get new objects again after clearing the cache. */
+ git_repository_submodule_cache_clear(g_repo);
+ cl_git_pass(git_submodule_lookup(&sm2, g_repo, "sm_unchanged"));
+ cl_assert(sm != sm2);
+ git_submodule_free(sm);
+ git_submodule_free(sm2);
+}