Commit e0ab43e7e317d5415f5aa9899502da398dda1072

Stefan Sperling 2018-03-16T19:20:15

avoid a round-trip of data through a temp file when reading trees

diff --git a/lib/got_pack_lib.h b/lib/got_pack_lib.h
index 348ae1a..f84a8a1 100644
--- a/lib/got_pack_lib.h
+++ b/lib/got_pack_lib.h
@@ -128,3 +128,5 @@ const struct got_error *got_packfile_open_object(struct got_object **,
     struct got_object_id *, struct got_repository *);
 const struct got_error *got_packfile_extract_object(FILE **,
     struct got_object *, struct got_repository *);
+const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *,
+    struct got_object *, struct got_repository *);
diff --git a/lib/object.c b/lib/object.c
index 6190d26..66fcf36 100644
--- a/lib/object.c
+++ b/lib/object.c
@@ -501,7 +501,7 @@ done:
 
 static const struct got_error *
 parse_tree_object(struct got_tree_object **tree, struct got_repository *repo,
-    char *buf, size_t len)
+    uint8_t *buf, size_t len)
 {
 	const struct got_error *err;
 	size_t remain = len;
@@ -684,20 +684,26 @@ got_object_tree_open(struct got_tree_object **tree,
     struct got_repository *repo, struct got_object *obj)
 {
 	const struct got_error *err = NULL;
-	FILE *f;
 
 	if (obj->type != GOT_OBJ_TYPE_TREE)
 		return got_error(GOT_ERR_OBJ_TYPE);
 
-	if (obj->flags & GOT_OBJ_FLAG_PACKED)
-		err = got_packfile_extract_object(&f, obj, repo);
-	else
+	if (obj->flags & GOT_OBJ_FLAG_PACKED) {
+		uint8_t *buf;
+		size_t len;
+		err = got_packfile_extract_object_to_mem(&buf, &len, obj, repo);
+		if (err)
+			return err;
+		err = parse_tree_object(tree, repo, buf + obj->hdrlen, len);
+		free(buf);
+	} else {
+		FILE *f;
 		err = open_loose_object(&f, obj, repo);
-	if (err)
-		return err;
-
-	err = read_tree_object(tree, repo, obj, f);
-	fclose(f);
+		if (err)
+			return err;
+		err = read_tree_object(tree, repo, obj, f);
+		fclose(f);
+	}
 	return err;
 }
 
diff --git a/lib/pack.c b/lib/pack.c
index 569429d..67a4d28 100644
--- a/lib/pack.c
+++ b/lib/pack.c
@@ -1250,6 +1250,135 @@ done:
 	return err;
 }
 
+static const struct got_error *
+dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outlen,
+    struct got_delta_chain *deltas, const char *path_packfile,
+    struct got_repository *repo)
+{
+	const struct got_error *err = NULL;
+	struct got_delta *delta;
+	uint8_t *base_buf = NULL, *accum_buf = NULL;
+	size_t accum_size;
+	uint64_t max_size;
+	int n = 0;
+
+	*outbuf = NULL;
+	*outlen = 0;
+
+	if (SIMPLEQ_EMPTY(&deltas->entries))
+		return got_error(GOT_ERR_BAD_DELTA_CHAIN);
+
+	err = get_delta_chain_max_size(&max_size, deltas);
+	if (err)
+		return err;
+	accum_buf = malloc(max_size);
+	if (accum_buf == NULL)
+		return got_error(GOT_ERR_NO_MEM);
+
+	/* Deltas are ordered in ascending order. */
+	SIMPLEQ_FOREACH(delta, &deltas->entries, entry) {
+		uint8_t *delta_buf = NULL;
+		size_t delta_len = 0;
+
+		if (n == 0) {
+			FILE *delta_file;
+			size_t base_len;
+
+			/* Plain object types are the delta base. */
+			if (delta->type != GOT_OBJ_TYPE_COMMIT &&
+			    delta->type != GOT_OBJ_TYPE_TREE &&
+			    delta->type != GOT_OBJ_TYPE_BLOB &&
+			    delta->type != GOT_OBJ_TYPE_TAG) {
+				err = got_error(GOT_ERR_BAD_DELTA_CHAIN);
+				goto done;
+			}
+
+			delta_file = fopen(delta->path_packfile, "rb");
+			if (delta_file == NULL) {
+				err = got_error_from_errno();
+				goto done;
+			}
+
+			if (fseeko(delta_file, delta->offset + delta->tslen,
+			    SEEK_SET) != 0) {
+				fclose(delta_file);
+				err = got_error_from_errno();
+				goto done;
+			}
+			err = got_inflate_to_mem(&base_buf, &base_len,
+			    delta_file);
+			if (base_len < max_size) {
+				uint8_t *p;
+				p = reallocarray(base_buf, 1, max_size);
+				if (p == NULL) {
+					err = got_error(GOT_ERR_NO_MEM);
+					goto done;
+				}
+				base_buf = p;
+			}
+			fclose(delta_file);
+			if (err)
+				goto done;
+			n++;
+			continue;
+		}
+
+		get_cached_delta(&delta_buf, &delta_len, delta->data_offset,
+		    path_packfile, repo);
+		if (delta_buf == NULL) {
+			FILE *delta_file = fopen(delta->path_packfile, "rb");
+			if (delta_file == NULL) {
+				err = got_error_from_errno();
+				goto done;
+			}
+			if (fseeko(delta_file, delta->data_offset, SEEK_CUR)
+			    != 0) {
+				fclose(delta_file);
+				err = got_error_from_errno();
+				goto done;
+			}
+
+			/* Delta streams should always fit in memory. */
+			err = got_inflate_to_mem(&delta_buf, &delta_len,
+			    delta_file);
+			fclose(delta_file);
+			if (err)
+				goto done;
+
+			err = cache_delta(delta->data_offset, delta_buf,
+			    delta_len, path_packfile, repo);
+			if (err)
+				goto done;
+		}
+		/* delta_buf is now cached */
+
+		err = got_delta_apply_in_mem(base_buf, delta_buf,
+		    delta_len, accum_buf, &accum_size);
+		n++;
+		if (err)
+			goto done;
+
+		if (n < deltas->nentries) {
+			/* Accumulated delta becomes the new base. */
+			uint8_t *tmp = accum_buf;
+			accum_buf = base_buf;
+			base_buf = tmp;
+		}
+	}
+
+done:
+	free(base_buf);
+	if (err) {
+		free(accum_buf);
+		*outbuf = NULL;
+		*outlen = 0;
+	} else {
+		*outbuf = accum_buf;
+		*outlen = accum_size;
+	}
+	return err;
+}
+
 const struct got_error *
 got_packfile_extract_object(FILE **f, struct got_object *obj,
     struct got_repository *repo)
@@ -1289,3 +1418,38 @@ done:
 		fclose(*f);
 	return err;
 }
+
+const struct got_error *
+got_packfile_extract_object_to_mem(uint8_t **buf, size_t *len,
+    struct got_object *obj, struct got_repository *repo)
+{
+	const struct got_error *err = NULL;
+	FILE *packfile = NULL;
+
+	if (obj->type != GOT_OBJ_TYPE_TREE)
+		return got_error(GOT_ERR_OBJ_TYPE);
+
+	if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
+		return got_error(GOT_ERR_OBJ_NOT_PACKED);
+
+	if ((obj->flags & GOT_OBJ_FLAG_DELTIFIED) == 0) {
+		packfile = fopen(obj->path_packfile, "rb");
+		if (packfile == NULL) {
+			err = got_error_from_errno();
+			goto done;
+		}
+
+		if (fseeko(packfile, obj->pack_offset, SEEK_SET) != 0) {
+			err = got_error_from_errno();
+			goto done;
+		}
+
+		err = got_inflate_to_mem(buf, len, packfile);
+	} else
+		err = dump_delta_chain_to_mem(buf, len, &obj->deltas,
+		    obj->path_packfile, repo);
+done:
+	if (packfile)
+		fclose(packfile);
+	return err;
+}