Commit c0f4a0118dd3821447512bf3b404be69c773eaf8

Carlos Martín Nieto 2012-12-19T16:48:12

pack: introduce a delta base cache Many delta bases are re-used. Cache them to avoid inflating the same data repeatedly. This version doesn't limit the amount of entries to store, so it can end up using a considerable amound of memory.

diff --git a/src/offmap.h b/src/offmap.h
new file mode 100644
index 0000000..cd46fd6
--- /dev/null
+++ b/src/offmap.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2012 the libgit2 contributors
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_offmap_h__
+#define INCLUDE_offmap_h__
+
+#include "common.h"
+#include "git2/types.h"
+
+#define kmalloc git__malloc
+#define kcalloc git__calloc
+#define krealloc git__realloc
+#define kfree git__free
+#include "khash.h"
+
+__KHASH_TYPE(off, git_off_t, void *);
+typedef khash_t(off) git_offmap;
+
+#define GIT__USE_OFFMAP \
+	__KHASH_IMPL(off, static kh_inline, git_off_t, void *, 1, kh_int64_hash_func, kh_int64_hash_equal);
+
+#define git_offmap_alloc()  kh_init(off)
+#define git_offmap_free(h)  kh_destroy(off, h), h = NULL
+#define git_offmap_clear(h) kh_clear(off, h)
+
+#define git_offmap_num_entries(h) kh_size(h)
+
+#define git_offmap_lookup_index(h, k)  kh_get(off, h, k)
+#define git_offmap_valid_index(h, idx) (idx != kh_end(h))
+
+#define git_offmap_exists(h, k) (kh_get(off, h, k) != kh_end(h))
+
+#define git_offmap_value_at(h, idx)        kh_val(h, idx)
+#define git_offmap_set_value_at(h, idx, v) kh_val(h, idx) = v
+#define git_offmap_delete_at(h, idx)       kh_del(off, h, idx)
+
+#define git_offmap_insert(h, key, val, rval) do { \
+	khiter_t __pos = kh_put(off, h, key, &rval); \
+	if (rval >= 0) { \
+		if (rval == 0) kh_key(h, __pos) = key; \
+		kh_val(h, __pos) = val; \
+	} } while (0)
+
+#define git_offmap_insert2(h, key, val, oldv, rval) do { \
+	khiter_t __pos = kh_put(off, h, key, &rval); \
+	if (rval >= 0) { \
+		if (rval == 0) { \
+			oldv = kh_val(h, __pos); \
+			kh_key(h, __pos) = key; \
+		} else { oldv = NULL; } \
+		kh_val(h, __pos) = val; \
+	} } while (0)
+
+#define git_offmap_delete(h, key) do { \
+	khiter_t __pos = git_offmap_lookup_index(h, key); \
+	if (git_offmap_valid_index(h, __pos)) \
+		git_offmap_delete_at(h, __pos); } while (0)
+
+#define git_offmap_foreach		kh_foreach
+#define git_offmap_foreach_value	kh_foreach_value
+
+#endif
diff --git a/src/pack.c b/src/pack.c
index d4f8d72..f7ef42d 100644
--- a/src/pack.c
+++ b/src/pack.c
@@ -46,6 +46,29 @@ static int packfile_error(const char *message)
 	return -1;
 }
 
+
+static git_pack_cache_entry *new_cache_object(git_off_t off, git_rawobj *source)
+{
+	git_pack_cache_entry *e = git__malloc(sizeof(git_pack_cache_entry));
+	if (!e)
+		return NULL;
+
+	e->off = off;
+	memcpy(&e->raw, source, sizeof(git_rawobj));
+
+	return e;
+}
+
+static void free_cache_object(void *o)
+{
+	git_pack_cache_entry *e = (git_pack_cache_entry *)o;
+
+	if (e != NULL) {
+		git__free(e->raw.data);
+		git__free(e);
+	}
+}
+
 /***********************************************************
  *
  * PACK INDEX METHODS
@@ -336,9 +359,11 @@ static int packfile_unpack_delta(
 		git_otype delta_type,
 		git_off_t obj_offset)
 {
-	git_off_t base_offset;
+	git_off_t base_offset, base_key;
 	git_rawobj base, delta;
-	int error;
+	git_pack_cache_entry *cached;
+	int error, found_base = 0;
+	khiter_t k;
 
 	base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
 	git_mwindow_close(w_curs);
@@ -347,33 +372,56 @@ static int packfile_unpack_delta(
 	if (base_offset < 0) /* must actually be an error code */
 		return (int)base_offset;
 
-	error = git_packfile_unpack(&base, p, &base_offset);
+	if (!p->bases) {
+		p->bases = git_offmap_alloc();
+		GITERR_CHECK_ALLOC(p->bases);
+	}
 
-	/*
-	 * TODO: git.git tries to load the base from other packfiles
-	 * or loose objects.
-	 *
-	 * We'll need to do this in order to support thin packs.
-	 */
-	if (error < 0)
-		return error;
+	base_key = base_offset; /* git_packfile_unpack modifies base_offset */
+	k = kh_get(off, p->bases, base_offset);
+	if (k != kh_end(p->bases)) { /* found it */
+		cached = kh_value(p->bases, k);
+		found_base = 1;
+		memcpy(&base, &cached->raw, sizeof(git_rawobj));
+	} else { /* have to inflate it */
+		error = git_packfile_unpack(&base, p, &base_offset);
+
+		/*
+		 * TODO: git.git tries to load the base from other packfiles
+		 * or loose objects.
+		 *
+		 * We'll need to do this in order to support thin packs.
+		 */
+		if (error < 0)
+			return error;
+	}
 
 	error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
 	git_mwindow_close(w_curs);
+
 	if (error < 0) {
-		git__free(base.data);
+		if (!found_base)
+			git__free(base.data);
 		return error;
 	}
 
 	obj->type = base.type;
 	error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
+	if (error < 0)
+		goto on_error;
+
+	if (!found_base) {
+		cached = new_cache_object(base_key, &base);
+		if (cached) {
+			k = kh_put(off, p->bases, base_key, &error);
+			assert(error != 0);
+			kh_value(p->bases, k) = cached;
+		}
+	}
 
-	git__free(base.data);
+on_error:
 	git__free(delta.data);
 
-	/* TODO: we might want to cache this. eventually */
-	//add_delta_base_cache(p, base_offset, base, base_size, *type);
-
 	return error; /* error set by git__delta_apply */
 }
 
@@ -651,9 +699,19 @@ static struct git_pack_file *packfile_alloc(size_t extra)
 
 void packfile_free(struct git_pack_file *p)
 {
+	khiter_t k;
 	assert(p);
 
-	/* clear_delta_base_cache(); */
+	if (p->bases) {
+		for (k = kh_begin(p->bases); k != kh_end(p->bases); k++) {
+			if (kh_exist(p->bases, k))
+				free_cache_object(kh_value(p->bases, k));
+		}
+
+		git_offmap_free(p->bases);
+	}
+
+
 	git_mwindow_free_all(&p->mwf);
 	git_mwindow_file_deregister(&p->mwf);
 
@@ -678,6 +736,9 @@ static int packfile_open(struct git_pack_file *p)
 	if (!p->index_map.data && pack_index_open(p) < 0)
 		return git_odb__error_notfound("failed to open packfile", NULL);
 
+	p->bases = git_offmap_alloc();
+	GITERR_CHECK_ALLOC(p->bases);
+
 	/* TODO: open with noatime */
 	p->mwf.fd = git_futils_open_ro(p->pack_name);
 	if (p->mwf.fd < 0) {
diff --git a/src/pack.h b/src/pack.h
index bbfcca5..0f795f6 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -53,6 +53,15 @@ struct git_pack_idx_header {
 	uint32_t idx_version;
 };
 
+typedef struct git_pack_cache_entry {
+	git_off_t off;
+	git_rawobj raw;
+} git_pack_cache_entry;
+
+#include "offmap.h"
+
+GIT__USE_OFFMAP;
+
 struct git_pack_file {
 	git_mwindow_file mwf;
 	git_map index_map;
@@ -68,6 +77,8 @@ struct git_pack_file {
 	git_vector cache;
 	git_oid **oids;
 
+	git_offmap *bases; /* delta base cache */
+
 	/* something like ".git/objects/pack/xxxxx.pack" */
 	char pack_name[GIT_FLEX_ARRAY]; /* more */
 };