Commit 79b11c6222d6aeffe4aa7d06aa4bf039f97100a3

Stefan Sperling 2018-03-09T17:01:49

add a pack index cache; speeds tree listing up quite a lot

diff --git a/lib/got_repository_priv.h b/lib/got_repository_priv.h
new file mode 100644
index 0000000..104167f
--- /dev/null
+++ b/lib/got_repository_priv.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define GOT_PACKIDX_CACHE_SIZE	64
+
+struct got_repository {
+	char *path;
+
+	/* The pack index cache speeds up search for packed objects. */
+	struct got_packidx_v2_hdr *packidx_cache[GOT_PACKIDX_CACHE_SIZE];
+};
+
diff --git a/lib/pack.c b/lib/pack.c
index 57e1edc..f6ad8c9 100644
--- a/lib/pack.c
+++ b/lib/pack.c
@@ -39,6 +39,11 @@
 #include "got_delta_priv.h"
 #include "got_zb_priv.h"
 #include "got_object_priv.h"
+#include "got_repository_priv.h"
+
+#ifndef nitems
+#define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
+#endif
 
 #define GOT_PACK_PREFIX		"pack-"
 #define GOT_PACKFILE_SUFFIX	".pack"
@@ -308,6 +313,81 @@ get_object_idx(struct got_packidx_v2_hdr *packidx, struct got_object_id *id)
 	return -1;
 }
 
+static struct got_packidx_v2_hdr *
+dup_packidx(struct got_packidx_v2_hdr *packidx)
+{
+	struct got_packidx_v2_hdr *p;
+	size_t nobj;
+
+	p = calloc(1, sizeof(*p));
+	if (p == NULL)
+		return NULL;
+
+	memcpy(p, packidx, sizeof(*p));
+	p->sorted_ids = NULL;
+	p->crc32 = NULL;
+	p->offsets = NULL;
+	p->large_offsets = NULL;
+
+	nobj = betoh32(p->fanout_table[0xff]);
+
+	p->sorted_ids = calloc(nobj, sizeof(*p->sorted_ids));
+	if (p->sorted_ids == NULL)
+		goto err;
+	memcpy(p->sorted_ids, packidx->sorted_ids, nobj * sizeof(*p->sorted_ids));
+
+	p->crc32 = calloc(nobj, sizeof(*p->crc32));
+	if (p->crc32 == NULL)
+		goto err;
+	memcpy(p->crc32, packidx->crc32, nobj * sizeof(*p->crc32));
+
+	p->offsets = calloc(nobj, sizeof(*p->offsets));
+	if (p->offsets == NULL)
+		goto err;
+	memcpy(p->offsets, packidx->offsets, nobj * sizeof(*p->offsets));
+
+	if (p->large_offsets) {
+		p->large_offsets = calloc(nobj, sizeof(*p->large_offsets));
+		if (p->large_offsets == NULL)
+			goto err;
+		memcpy(p->large_offsets, packidx->large_offsets,
+		    nobj * sizeof(*p->large_offsets));
+	}
+
+	return p;
+
+err:
+	free(p->large_offsets);
+	free(p->offsets);
+	free(p->crc32);
+	free(p->sorted_ids);
+	free(p);
+	return NULL;
+}
+
+static void
+cache_packidx(struct got_packidx_v2_hdr *packidx,
+    struct got_repository *repo)
+{
+	struct got_packidx_v2_hdr *p;
+	int i;
+
+	for (i = 0; i < nitems(repo->packidx_cache); i++) {
+		if (repo->packidx_cache[i] == NULL)
+			break;
+	}
+
+	if (i == nitems(repo->packidx_cache)) {
+		got_packidx_close(repo->packidx_cache[i - 1]);
+		memmove(&repo->packidx_cache[1], &repo->packidx_cache[0],
+		    sizeof(repo->packidx_cache) -
+		    sizeof(repo->packidx_cache[0]));
+		i = 0;
+	}
+
+	repo->packidx_cache[i] = dup_packidx(packidx);
+}
+
 static const struct got_error *
 search_packidx(struct got_packidx_v2_hdr **packidx, int *idx,
     struct got_repository *repo, struct got_object_id *id)
@@ -317,6 +397,21 @@ search_packidx(struct got_packidx_v2_hdr **packidx, int *idx,
 	DIR *packdir;
 	struct dirent *dent;
 	char *path_packidx;
+	int i;
+
+	/* Search pack index cache. */
+	for (i = 0; i < nitems(repo->packidx_cache); i++) {
+		if (repo->packidx_cache[i] == NULL)
+			break;
+		*idx = get_object_idx(repo->packidx_cache[i], id);
+		if (*idx != -1) {
+			*packidx = dup_packidx(repo->packidx_cache[i]);
+			if (*packidx == NULL)
+				*idx = -1;
+			return NULL;
+		}
+	}
+	/* No luck. Search the filesystem. */
 
 	path_packdir = got_repo_get_path_objects_pack(repo);
 	if (path_packdir == NULL)
@@ -346,6 +441,7 @@ search_packidx(struct got_packidx_v2_hdr **packidx, int *idx,
 		*idx = get_object_idx(*packidx, id);
 		if (*idx != -1) {
 			err = NULL; /* found the object */
+			cache_packidx(*packidx, repo);
 			goto done;
 		}
 
diff --git a/lib/repository.c b/lib/repository.c
index 677e411..9037504 100644
--- a/lib/repository.c
+++ b/lib/repository.c
@@ -14,21 +14,29 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include <sys/queue.h>
+
 #include <limits.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <sha1.h>
 #include <string.h>
+#include <zlib.h>
 
 #include "got_error.h"
 #include "got_refs.h"
 #include "got_repository.h"
 
 #include "got_path_priv.h"
+#include "got_repository_priv.h"
+#include "got_zb_priv.h"
+#include "got_delta_priv.h"
+#include "got_object_priv.h"
+#include "got_pack_priv.h"
 
-struct got_repository {
-	char *path;
-};
+#ifndef nitems
+#define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
+#endif
 
 #define GOT_GIT_DIR	".git"
 
@@ -151,6 +159,13 @@ done:
 void
 got_repo_close(struct got_repository *repo)
 {
+	int i;
+
+	for (i = 0; i < nitems(repo->packidx_cache); i++) {
+		if (repo->packidx_cache[i] == NULL)
+			break;
+		got_packidx_close(repo->packidx_cache[i]);
+	}
 	free(repo->path);
 	free(repo);
 }