Merge pull request #2330 from libgit2/cmn/pack-unpack-loop Make pack object lookup use loops
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
diff --git a/src/pack.c b/src/pack.c
index de038a4..d93ee25 100644
--- a/src/pack.c
+++ b/src/pack.c
@@ -514,72 +514,105 @@ int git_packfile_resolve_header(
return error;
}
-static int packfile_unpack_delta(
- git_rawobj *obj,
- struct git_pack_file *p,
- git_mwindow **w_curs,
- git_off_t *curpos,
- size_t delta_size,
- git_otype delta_type,
- git_off_t obj_offset)
-{
- git_off_t base_offset, base_key;
- git_rawobj base, delta;
- git_pack_cache_entry *cached = NULL;
- int error, found_base = 0;
+#define SMALL_STACK_SIZE 64
- base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
- git_mwindow_close(w_curs);
- if (base_offset == 0)
- return packfile_error("delta offset is zero");
- if (base_offset < 0) /* must actually be an error code */
- return (int)base_offset;
+/**
+ * Generate the chain of dependencies which we need to get to the
+ * object at `off`. `chain` is used a stack, popping gives the right
+ * order to apply deltas on. If an object is found in the pack's base
+ * cache, we stop calculating there.
+ */
+static int pack_dependency_chain(git_dependency_chain *chain_out,
+ git_pack_cache_entry **cached_out, git_off_t *cached_off,
+ struct pack_chain_elem *small_stack, size_t *stack_sz,
+ struct git_pack_file *p, git_off_t obj_offset)
+{
+ git_dependency_chain chain = GIT_ARRAY_INIT;
+ git_mwindow *w_curs = NULL;
+ git_off_t curpos = obj_offset, base_offset;
+ int error = 0, use_heap = 0;
+ size_t size, elem_pos;
+ git_otype type;
if (!p->bases.entries && (cache_init(&p->bases) < 0))
return -1;
- base_key = base_offset; /* git_packfile_unpack modifies base_offset */
- if ((cached = cache_get(&p->bases, base_offset)) != NULL) {
- memcpy(&base, &cached->raw, sizeof(git_rawobj));
- found_base = 1;
- }
+ elem_pos = 0;
+ while (true) {
+ struct pack_chain_elem *elem;
+ git_pack_cache_entry *cached = NULL;
- if (!cached) { /* have to inflate it */
- error = git_packfile_unpack(&base, p, &base_offset);
+ /* if we have a base cached, we can stop here instead */
+ if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
+ *cached_out = cached;
+ *cached_off = obj_offset;
+ break;
+ }
+
+ /* if we run out of space on the small stack, use the array */
+ if (elem_pos == SMALL_STACK_SIZE) {
+ git_array_init_to_size(chain, elem_pos);
+ GITERR_CHECK_ARRAY(chain);
+ memcpy(chain.ptr, small_stack, elem_pos * sizeof(struct pack_chain_elem));
+ chain.size = elem_pos;
+ use_heap = 1;
+ }
+
+ curpos = obj_offset;
+ if (!use_heap) {
+ elem = &small_stack[elem_pos];
+ } else {
+ elem = git_array_alloc(chain);
+ if (!elem) {
+ error = -1;
+ goto on_error;
+ }
+ }
+
+ elem->base_key = obj_offset;
+
+ error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
+ git_mwindow_close(&w_curs);
- /*
- * TODO: git.git tries to load the base from other packfiles
- * or loose objects.
- *
- * We'll need to do this in order to support thin packs.
- */
if (error < 0)
- return error;
- }
+ goto on_error;
- error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
- git_mwindow_close(w_curs);
+ elem->offset = curpos;
+ elem->size = size;
+ elem->type = type;
+ elem->base_key = obj_offset;
- if (error < 0) {
- if (!found_base)
- git__free(base.data);
- return error;
- }
+ if (type != GIT_OBJ_OFS_DELTA && type != GIT_OBJ_REF_DELTA)
+ break;
- obj->type = base.type;
- error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
- if (error < 0)
- goto on_error;
+ base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
+ git_mwindow_close(&w_curs);
- if (found_base)
- git_atomic_dec(&cached->refcount);
- else if (cache_add(&p->bases, &base, base_key) < 0)
- git__free(base.data);
+ if (base_offset == 0) {
+ error = packfile_error("delta offset is zero");
+ goto on_error;
+ }
+ if (base_offset < 0) { /* must actually be an error code */
+ error = (int)base_offset;
+ goto on_error;
+ }
-on_error:
- git__free(delta.data);
+ /* we need to pass the pos *after* the delta-base bit */
+ elem->offset = curpos;
- return error; /* error set by git__delta_apply */
+ /* go through the loop again, but with the new object */
+ obj_offset = base_offset;
+ elem_pos++;
+ }
+
+
+ *stack_sz = elem_pos + 1;
+ *chain_out = chain;
+ return error;
+
+on_error:
+ git_array_clear(chain);
+ return error;
}
int git_packfile_unpack(
@@ -589,48 +622,138 @@ int git_packfile_unpack(
{
git_mwindow *w_curs = NULL;
git_off_t curpos = *obj_offset;
- int error;
-
- size_t size = 0;
- git_otype type;
+ int error, free_base = 0;
+ git_dependency_chain chain = GIT_ARRAY_INIT;
+ struct pack_chain_elem *elem = NULL, *stack;
+ git_pack_cache_entry *cached = NULL;
+ struct pack_chain_elem small_stack[SMALL_STACK_SIZE];
+ size_t stack_size, elem_pos;
+ git_otype base_type;
/*
* TODO: optionally check the CRC on the packfile
*/
+ error = pack_dependency_chain(&chain, &cached, obj_offset, small_stack, &stack_size, p, *obj_offset);
+ if (error < 0)
+ return error;
+
obj->data = NULL;
obj->len = 0;
obj->type = GIT_OBJ_BAD;
- error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
- git_mwindow_close(&w_curs);
+ /* let's point to the right stack */
+ stack = chain.ptr ? chain.ptr : small_stack;
- if (error < 0)
- return error;
+ elem_pos = stack_size;
+ if (cached) {
+ memcpy(obj, &cached->raw, sizeof(git_rawobj));
+ base_type = obj->type;
+ elem_pos--; /* stack_size includes the base, which isn't actually there */
+ } else {
+ elem = &stack[--elem_pos];
+ base_type = elem->type;
+ }
- switch (type) {
- case GIT_OBJ_OFS_DELTA:
- case GIT_OBJ_REF_DELTA:
- error = packfile_unpack_delta(
- obj, p, &w_curs, &curpos,
- size, type, *obj_offset);
- break;
+ if (error < 0)
+ goto cleanup;
+ switch (base_type) {
case GIT_OBJ_COMMIT:
case GIT_OBJ_TREE:
case GIT_OBJ_BLOB:
case GIT_OBJ_TAG:
- error = packfile_unpack_compressed(
- obj, p, &w_curs, &curpos,
- size, type);
+ if (!cached) {
+ curpos = elem->offset;
+ error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
+ git_mwindow_close(&w_curs);
+ base_type = elem->type;
+ }
+ if (error < 0)
+ goto cleanup;
break;
-
+ case GIT_OBJ_OFS_DELTA:
+ case GIT_OBJ_REF_DELTA:
+ error = packfile_error("dependency chain ends in a delta");
+ goto cleanup;
default:
- error = packfile_error("invalid packfile type in header");;
- break;
+ error = packfile_error("invalid packfile type in header");
+ goto cleanup;
}
- *obj_offset = curpos;
+ /*
+ * Finding the object we want a cached base element is
+ * problematic, as we need to make sure we don't accidentally
+ * give the caller the cached object, which it would then feel
+ * free to free, so we need to copy the data.
+ */
+ if (cached && stack_size == 1) {
+ void *data = obj->data;
+ obj->data = git__malloc(obj->len + 1);
+ GITERR_CHECK_ALLOC(obj->data);
+ memcpy(obj->data, data, obj->len + 1);
+ git_atomic_dec(&cached->refcount);
+ goto cleanup;
+ }
+
+ /* we now apply each consecutive delta until we run out */
+ while (elem_pos > 0 && !error) {
+ git_rawobj base, delta;
+
+ /*
+ * We can now try to add the base to the cache, as
+ * long as it's not already the cached one.
+ */
+ if (!cached)
+ free_base = !!cache_add(&p->bases, obj, elem->base_key);
+
+ elem = &stack[elem_pos - 1];
+ curpos = elem->offset;
+ error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, elem->size, elem->type);
+ git_mwindow_close(&w_curs);
+
+ if (error < 0)
+ break;
+
+ /* the current object becomes the new base, on which we apply the delta */
+ base = *obj;
+ obj->data = NULL;
+ obj->len = 0;
+ obj->type = GIT_OBJ_BAD;
+
+ error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
+ obj->type = base_type;
+ /*
+ * We usually don't want to free the base at this
+ * point, as we put it into the cache in the previous
+ * iteration. free_base lets us know that we got the
+ * base object directly from the packfile, so we can free it.
+ */
+ git__free(delta.data);
+ if (free_base) {
+ free_base = 0;
+ git__free(base.data);
+ }
+
+ if (cached) {
+ git_atomic_dec(&cached->refcount);
+ cached = NULL;
+ }
+
+ if (error < 0)
+ break;
+
+ elem_pos--;
+ }
+
+cleanup:
+ if (error < 0)
+ git__free(obj->data);
+
+ if (elem)
+ *obj_offset = elem->offset;
+
+ git_array_clear(chain);
return error;
}
@@ -660,7 +783,7 @@ int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p,
st = inflateInit(&obj->zstream);
if (st != Z_OK) {
git__free(obj);
- giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
+ giterr_set(GITERR_ZLIB, "failed to init packfile stream");
return -1;
}
@@ -691,7 +814,7 @@ ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t
written = len - obj->zstream.avail_out;
if (st != Z_OK && st != Z_STREAM_END) {
- giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
+ giterr_set(GITERR_ZLIB, "error reading from the zlib stream");
return -1;
}
@@ -736,7 +859,7 @@ int packfile_unpack_compressed(
st = inflateInit(&stream);
if (st != Z_OK) {
git__free(buffer);
- giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
+ giterr_set(GITERR_ZLIB, "failed to init zlib stream on unpack");
return -1;
}
@@ -763,7 +886,7 @@ int packfile_unpack_compressed(
if ((st != Z_STREAM_END) || stream.total_out != size) {
git__free(buffer);
- giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
+ giterr_set(GITERR_ZLIB, "error inflating zlib stream");
return -1;
}
diff --git a/src/pack.h b/src/pack.h
index 58f81e2..610e70c 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -17,6 +17,7 @@
#include "mwindow.h"
#include "odb.h"
#include "oidmap.h"
+#include "array.h"
#define GIT_PACK_FILE_MODE 0444
@@ -60,6 +61,15 @@ typedef struct git_pack_cache_entry {
git_rawobj raw;
} git_pack_cache_entry;
+struct pack_chain_elem {
+ git_off_t base_key;
+ git_off_t offset;
+ size_t size;
+ git_otype type;
+};
+
+typedef git_array_t(struct pack_chain_elem) git_dependency_chain;
+
#include "offmap.h"
GIT__USE_OFFMAP;