pack: use a cache for delta bases when unpacking Bring back the use of the delta base cache for unpacking objects. When generating the delta chain, we stop when we find a delta base in the pack's cache and use that as the starting point.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
diff --git a/src/pack.c b/src/pack.c
index 523905f..c1d7592 100644
--- a/src/pack.c
+++ b/src/pack.c
@@ -42,8 +42,9 @@ static int pack_entry_find_offset(
/**
* Generate the chain of dependencies which we need to get to the
- * object at `off`. As we use a stack, the latest is the base object,
- * the rest are deltas.
+ * object at `off`. `chain` is used a stack, popping gives the right
+ * order to apply deltas on. If an object is found in the pack's base
+ * cache, we stop calculating there.
*/
static int pack_dependency_chain(git_dependency_chain *chain, struct git_pack_file *p, git_off_t off);
@@ -521,67 +522,6 @@ int git_packfile_resolve_header(
return error;
}
-static int packfile_unpack_delta(
- git_rawobj *obj,
- struct git_pack_file *p,
- git_mwindow **w_curs,
- git_off_t *curpos,
- size_t delta_size,
- git_otype delta_type,
- git_off_t obj_offset)
-{
- git_off_t base_offset, base_key;
- git_rawobj base, delta;
- git_pack_cache_entry *cached = NULL;
- int error, found_base = 0;
-
- base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
- git_mwindow_close(w_curs);
- if (base_offset == 0)
- return packfile_error("delta offset is zero");
- if (base_offset < 0) /* must actually be an error code */
- return (int)base_offset;
-
- if (!p->bases.entries && (cache_init(&p->bases) < 0))
- return -1;
-
- base_key = base_offset; /* git_packfile_unpack modifies base_offset */
- if ((cached = cache_get(&p->bases, base_offset)) != NULL) {
- memcpy(&base, &cached->raw, sizeof(git_rawobj));
- found_base = 1;
- }
-
- if (!cached) { /* have to inflate it */
- error = git_packfile_unpack(&base, p, &base_offset);
- if (error < 0)
- return error;
- }
-
- error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
- git_mwindow_close(w_curs);
-
- if (error < 0) {
- if (!found_base)
- git__free(base.data);
- return error;
- }
-
- obj->type = base.type;
- error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
- if (error < 0)
- goto on_error;
-
- if (found_base)
- git_atomic_dec(&cached->refcount);
- else if (cache_add(&p->bases, &base, base_key) < 0)
- git__free(base.data);
-
-on_error:
- git__free(delta.data);
-
- return error; /* error set by git__delta_apply */
-}
-
int git_packfile_unpack(
git_rawobj *obj,
struct git_pack_file *p,
@@ -589,10 +529,10 @@ int git_packfile_unpack(
{
git_mwindow *w_curs = NULL;
git_off_t curpos = *obj_offset;
- int error;
- git_dependency_chain chain;
+ int error, free_base = 0;
+ git_dependency_chain chain = GIT_ARRAY_INIT;
struct pack_chain_elem *elem;
-
+ git_pack_cache_entry *cached = NULL;
git_otype base_type;
/*
@@ -609,16 +549,38 @@ int git_packfile_unpack(
/* the first one is the base, so we expand that one */
elem = git_array_pop(chain);
- curpos = elem->offset;
- error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
- git_mwindow_close(&w_curs);
+ if (elem->cached) {
+ cached = elem->cached_entry;
+ memcpy(obj, &cached->raw, sizeof(git_rawobj));
+ base_type = obj->type;
+ } else {
+ curpos = elem->offset;
+ error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
+ git_mwindow_close(&w_curs);
+ base_type = elem->type;
+ free_base = 1;
+ }
if (error < 0)
goto cleanup;
- base_type = elem->type;
+ /*
+ * Finding the object we want as the base element is
+ * problematic, as we need to make sure we don't accidentally
+ * give the caller the cached object, which it would then feel
+ * free to free, so we need to copy the data.
+ */
+ if (cached && git_array_size(chain) == 0) {
+ void *data = obj->data;
+ obj->data = git__malloc(obj->len + 1);
+ GITERR_CHECK_ALLOC(obj->data);
+ memcpy(obj->data, data, obj->len + 1);
+ git_atomic_dec(&cached->refcount);
+ goto cleanup;
+ }
+
/* we now apply each consecutive delta until we run out */
- while (git_array_size(chain) > 0) {
+ while (git_array_size(chain) > 0 && !error) {
git_rawobj base, delta;
elem = git_array_pop(chain);
@@ -636,16 +598,39 @@ int git_packfile_unpack(
obj->type = GIT_OBJ_BAD;
error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
+ obj->type = base_type;
+ /*
+ * We usually don't want to free the base at this
+ * point, as we put it into the cache in the previous
+ * iteration. free_base lets us know that we got the
+ * base object directly from the packfile, so we can free it.
+ */
git__free(delta.data);
- git__free(base.data);
+ if (free_base) {
+ free_base = 0;
+ git__free(base.data);
+ }
+
+ if (cached) {
+ git_atomic_dec(&cached->refcount);
+ cached = NULL;
+ }
if (error < 0)
break;
- obj->type = base_type;
+ /* only try to cache if we're not handing this buffer off to the caller */
+ if (git_array_size(chain) > 0 &&
+ (error = cache_add(&p->bases, obj, elem->base_key)) < 0)
+ goto cleanup;
}
cleanup:
+ if (error < 0)
+ git__free(obj->data);
+
+ *obj_offset = elem->offset;
+
git_array_clear(chain);
return error;
}
@@ -1248,8 +1233,12 @@ static int pack_dependency_chain(git_dependency_chain *chain_out, struct git_pac
size_t size;
git_otype type;
+ if (!p->bases.entries && (cache_init(&p->bases) < 0))
+ return -1;
+
while (!found_base && error == 0) {
struct pack_chain_elem *elem;
+ git_pack_cache_entry *cached = NULL;
curpos = obj_offset;
elem = git_array_alloc(chain);
@@ -1262,13 +1251,23 @@ static int pack_dependency_chain(git_dependency_chain *chain_out, struct git_pac
if (error < 0)
return error;
+ elem->cached = 0;
elem->offset = curpos;
elem->size = size;
elem->type = type;
+ elem->base_key = obj_offset;
switch (type) {
case GIT_OBJ_OFS_DELTA:
case GIT_OBJ_REF_DELTA:
+ /* if we have a base cached, we can stop here instead */
+ if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
+ elem->cached_entry = cached;
+ elem->cached = 1;
+ found_base = 1;
+ break;
+ }
+
base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
git_mwindow_close(&w_curs);
diff --git a/src/pack.h b/src/pack.h
index a2ea384..e86889d 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -62,9 +62,14 @@ typedef struct git_pack_cache_entry {
} git_pack_cache_entry;
struct pack_chain_elem {
+ int cached;
+ git_off_t base_key;
+ /* if we don't have it cached we have this */
git_off_t offset;
size_t size;
git_otype type;
+ /* if cached, we have this instead */
+ git_pack_cache_entry *cached_entry;
};
typedef git_array_t(struct pack_chain_elem) git_dependency_chain;