Commit 42c69117cce2e1658d5b5aabbc383ce7252cf167

Stefan Sperling 2019-11-10T15:51:05

stop storing decompressed delta data in struct got_delta; fetch it on demand

diff --git a/lib/delta.c b/lib/delta.c
index 42f628d..9abb0d4 100644
--- a/lib/delta.c
+++ b/lib/delta.c
@@ -40,7 +40,7 @@
 
 struct got_delta *
 got_delta_open(off_t offset, size_t tslen, int type, size_t size,
-    off_t data_offset, uint8_t *delta_buf, size_t delta_len)
+    off_t data_offset)
 {
 	struct got_delta *delta;
 
@@ -53,8 +53,6 @@ got_delta_open(off_t offset, size_t tslen, int type, size_t size,
 	delta->tslen = tslen;
 	delta->size = size;
 	delta->data_offset = data_offset;
-	delta->delta_buf = delta_buf;
-	delta->delta_len = delta_len;
 	return delta;
 }
 
diff --git a/lib/got_lib_delta.h b/lib/got_lib_delta.h
index 8b79413..b04e243 100644
--- a/lib/got_lib_delta.h
+++ b/lib/got_lib_delta.h
@@ -21,8 +21,6 @@ struct got_delta {
 	int type;
 	size_t size;
 	off_t data_offset;
-	uint8_t *delta_buf;
-	size_t delta_len;
 };
 
 struct got_delta_chain {
@@ -32,8 +30,7 @@ struct got_delta_chain {
 
 #define GOT_DELTA_CHAIN_RECURSION_MAX	500
 
-struct got_delta *got_delta_open(off_t, size_t, int, size_t, off_t,
-    uint8_t *, size_t);
+struct got_delta *got_delta_open(off_t, size_t, int, size_t, off_t);
 const struct got_error *got_delta_chain_get_base_type(int *,
     struct got_delta_chain *);
 const struct got_error *got_delta_get_sizes(uint64_t *, uint64_t *,
diff --git a/lib/got_lib_pack.h b/lib/got_lib_pack.h
index 654dfa8..5ccd979 100644
--- a/lib/got_lib_pack.h
+++ b/lib/got_lib_pack.h
@@ -168,7 +168,7 @@ const struct got_error *got_packidx_match_id_str_prefix(
 const struct got_error *got_packfile_open_object(struct got_object **,
     struct got_pack *, struct got_packidx *, int, struct got_object_id *);
 const struct got_error *got_pack_get_max_delta_object_size(uint64_t *,
-    struct got_object *);
+    struct got_object *, struct got_pack *);
 const struct got_error *got_packfile_extract_object(struct got_pack *,
     struct got_object *, FILE *, FILE *, FILE *);
 const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *,
diff --git a/lib/object_cache.c b/lib/object_cache.c
index 6d87621..c67ff75 100644
--- a/lib/object_cache.c
+++ b/lib/object_cache.c
@@ -84,9 +84,9 @@ get_size_obj(struct got_object *obj)
 		return size;
 
 	SIMPLEQ_FOREACH(delta, &obj->deltas.entries, entry) {
-		if (SIZE_MAX - (sizeof(*delta) + delta->delta_len) < size)
+		if (SIZE_MAX - sizeof(*delta) < size)
 			return SIZE_MAX;
-		size += sizeof(*delta) + delta->delta_len;
+		size += sizeof(*delta);
 	}
 
 	return size;
diff --git a/lib/object_parse.c b/lib/object_parse.c
index a037ceb..9a37c6f 100644
--- a/lib/object_parse.c
+++ b/lib/object_parse.c
@@ -113,7 +113,6 @@ got_object_close(struct got_object *obj)
 		while (!SIMPLEQ_EMPTY(&obj->deltas.entries)) {
 			delta = SIMPLEQ_FIRST(&obj->deltas.entries);
 			SIMPLEQ_REMOVE_HEAD(&obj->deltas.entries, entry);
-			free(delta->delta_buf);
 			free(delta);
 		}
 	}
diff --git a/lib/pack.c b/lib/pack.c
index fb5f114..433976a 100644
--- a/lib/pack.c
+++ b/lib/pack.c
@@ -707,15 +707,32 @@ resolve_delta_chain(struct got_delta_chain *, struct got_packidx *,
     struct got_pack *, off_t, size_t, int, size_t, unsigned int);
 
 static const struct got_error *
+read_delta_data(uint8_t **delta_buf, size_t *delta_len,
+    size_t delta_data_offset, struct got_pack *pack)
+{
+	const struct got_error *err = NULL;
+
+	if (pack->map) {
+		if (delta_data_offset >= pack->filesize)
+			return got_error(GOT_ERR_PACK_OFFSET);
+		err = got_inflate_to_mem_mmap(delta_buf, delta_len, pack->map,
+		    delta_data_offset, pack->filesize - delta_data_offset);
+	} else {
+		if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1)
+			return got_error_from_errno("lseek");
+		err = got_inflate_to_mem_fd(delta_buf, delta_len, pack->fd);
+	}
+	return err;
+}
+
+static const struct got_error *
 add_delta(struct got_delta_chain *deltas, off_t delta_offset, size_t tslen,
-    int delta_type, size_t delta_size, size_t delta_data_offset,
-    uint8_t *delta_buf, size_t delta_len)
+    int delta_type, size_t delta_size, size_t delta_data_offset)
 {
 	struct got_delta *delta;
 
 	delta = got_delta_open(delta_offset, tslen, delta_type, delta_size,
-	    delta_data_offset, delta_buf,
-	    delta_len);
+	    delta_data_offset);
 	if (delta == NULL)
 		return got_error_from_errno("got_delta_open");
 	/* delta is freed in got_object_close() */
@@ -736,8 +753,7 @@ resolve_offset_delta(struct got_delta_chain *deltas,
 	uint64_t base_size;
 	size_t base_tslen;
 	off_t delta_data_offset;
-	uint8_t *delta_buf;
-	size_t delta_len, consumed;
+	size_t consumed;
 
 	err = parse_offset_delta(&base_offset, &consumed, pack,
 	    delta_offset, tslen);
@@ -754,21 +770,8 @@ resolve_offset_delta(struct got_delta_chain *deltas,
 			return got_error_from_errno("lseek");
 	}
 
-	if (pack->map) {
-		size_t mapoff = (size_t)delta_data_offset;
-		err = got_inflate_to_mem_mmap(&delta_buf, &delta_len, pack->map,
-		    mapoff, pack->filesize - mapoff);
-		if (err)
-			return err;
-	} else {
-
-		err = got_inflate_to_mem_fd(&delta_buf, &delta_len, pack->fd);
-		if (err)
-			return err;
-	}
-
 	err = add_delta(deltas, delta_offset, tslen, delta_type, delta_size,
-	    delta_data_offset, delta_buf, delta_len);
+	    delta_data_offset);
 	if (err)
 		return err;
 
@@ -801,28 +804,27 @@ resolve_ref_delta(struct got_delta_chain *deltas, struct got_packidx *packidx,
 	uint8_t *delta_buf;
 	size_t delta_len;
 
-	if (delta_offset >= pack->filesize)
-		return got_error(GOT_ERR_PACK_OFFSET);
-	delta_data_offset = delta_offset + tslen;
-	if (delta_data_offset >= pack->filesize)
+	if (delta_offset + tslen >= pack->filesize)
 		return got_error(GOT_ERR_PACK_OFFSET);
 
 	if (pack->map == NULL) {
-		delta_data_offset = lseek(pack->fd, 0, SEEK_CUR);
-		if (delta_data_offset == -1)
-			return got_error_from_errno("lseek");
 	}
 
 	if (pack->map) {
-		size_t mapoff = (size_t)delta_data_offset;
+		size_t mapoff = delta_offset + tslen;
 		memcpy(&id, pack->map + mapoff, sizeof(id));
 		mapoff += sizeof(id);
+		delta_data_offset = (off_t)mapoff;
 		err = got_inflate_to_mem_mmap(&delta_buf, &delta_len, pack->map,
 		    mapoff, pack->filesize - mapoff);
 		if (err)
 			return err;
 	} else {
-		ssize_t n = read(pack->fd, &id, sizeof(id));
+		ssize_t n;
+		delta_data_offset = lseek(pack->fd, 0, SEEK_CUR);
+		if (delta_data_offset == -1)
+			return got_error_from_errno("lseek");
+		n = read(pack->fd, &id, sizeof(id));
 		if (n < 0)
 			return got_error_from_errno("read");
 		if (n != sizeof(id))
@@ -833,7 +835,7 @@ resolve_ref_delta(struct got_delta_chain *deltas, struct got_packidx *packidx,
 	}
 
 	err = add_delta(deltas, delta_offset, tslen, delta_type, delta_size,
-	    delta_data_offset, delta_buf, delta_len);
+	    delta_data_offset);
 	if (err)
 		return err;
 
@@ -875,7 +877,7 @@ resolve_delta_chain(struct got_delta_chain *deltas, struct got_packidx *packidx,
 	case GOT_OBJ_TYPE_TAG:
 		/* Plain types are the final delta base. Recursion ends. */
 		err = add_delta(deltas, delta_offset, tslen, delta_type,
-		    delta_size, 0, NULL, 0);
+		    delta_size, 0);
 		break;
 	case GOT_OBJ_TYPE_OFFSET_DELTA:
 		err = resolve_offset_delta(deltas, packidx, pack,
@@ -980,7 +982,8 @@ got_packfile_open_object(struct got_object **obj, struct got_pack *pack,
 }
 
 static const struct got_error *
-get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas)
+get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas,
+    struct got_pack *pack)
 {
 	struct got_delta *delta;
 	uint64_t base_size = 0, result_size = 0;
@@ -993,8 +996,16 @@ get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas)
 		    delta->type != GOT_OBJ_TYPE_BLOB &&
 		    delta->type != GOT_OBJ_TYPE_TAG) {
 			const struct got_error *err;
+			uint8_t *delta_buf;
+			size_t delta_len;
+
+			err = read_delta_data(&delta_buf, &delta_len,
+			    delta->data_offset, pack);
+			if (err)
+				return err;
 			err = got_delta_get_sizes(&base_size, &result_size,
-			    delta->delta_buf, delta->delta_len);
+			    delta_buf, delta_len);
+			free(delta_buf);
 			if (err)
 				return err;
 		} else
@@ -1009,12 +1020,13 @@ get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas)
 }
 
 const struct got_error *
-got_pack_get_max_delta_object_size(uint64_t *size, struct got_object *obj)
+got_pack_get_max_delta_object_size(uint64_t *size, struct got_object *obj,
+    struct got_pack *pack)
 {
 	if ((obj->flags & GOT_OBJ_FLAG_DELTIFIED) == 0)
 		return got_error(GOT_ERR_OBJ_TYPE);
 
-	return get_delta_chain_max_size(size, &obj->deltas);
+	return get_delta_chain_max_size(size, &obj->deltas, pack);
 }
 
 static const struct got_error *
@@ -1023,8 +1035,8 @@ dump_delta_chain_to_file(size_t *result_size, struct got_delta_chain *deltas,
 {
 	const struct got_error *err = NULL;
 	struct got_delta *delta;
-	uint8_t *base_buf = NULL, *accum_buf = NULL;
-	size_t base_bufsz = 0, accum_size = 0;
+	uint8_t *base_buf = NULL, *accum_buf = NULL, *delta_buf;
+	size_t base_bufsz = 0, accum_size = 0, delta_len;
 	uint64_t max_size;
 	int n = 0;
 
@@ -1034,7 +1046,7 @@ dump_delta_chain_to_file(size_t *result_size, struct got_delta_chain *deltas,
 		return got_error(GOT_ERR_BAD_DELTA_CHAIN);
 
 	/* We process small enough files entirely in memory for speed. */
-	err = get_delta_chain_max_size(&max_size, deltas);
+	err = get_delta_chain_max_size(&max_size, deltas, pack);
 	if (err)
 		return err;
 	if (max_size < GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
@@ -1099,18 +1111,23 @@ dump_delta_chain_to_file(size_t *result_size, struct got_delta_chain *deltas,
 			continue;
 		}
 
+		err = read_delta_data(&delta_buf, &delta_len,
+		    delta->data_offset, pack);
+		if (err)
+			goto done;
 		if (base_buf) {
 			err = got_delta_apply_in_mem(base_buf, base_bufsz,
-			    delta->delta_buf, delta->delta_len, accum_buf,
+			    delta_buf, delta_len, accum_buf,
 			    &accum_size, max_size);
 			n++;
 		} else {
-			err = got_delta_apply(base_file, delta->delta_buf,
-			    delta->delta_len,
+			err = got_delta_apply(base_file, delta_buf,
+			    delta_len,
 			    /* Final delta application writes to output file. */
 			    ++n < deltas->nentries ? accum_file : outfile,
 			    &accum_size);
 		}
+		free(delta_buf);
 		if (err)
 			goto done;
 
@@ -1167,8 +1184,8 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outlen,
 {
 	const struct got_error *err = NULL;
 	struct got_delta *delta;
-	uint8_t *base_buf = NULL, *accum_buf = NULL;
-	size_t base_bufsz = 0, accum_size = 0;
+	uint8_t *base_buf = NULL, *accum_buf = NULL, *delta_buf;
+	size_t base_bufsz = 0, accum_size = 0, delta_len;
 	uint64_t max_size;
 	int n = 0;
 
@@ -1178,7 +1195,7 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outlen,
 	if (SIMPLEQ_EMPTY(&deltas->entries))
 		return got_error(GOT_ERR_BAD_DELTA_CHAIN);
 
-	err = get_delta_chain_max_size(&max_size, deltas);
+	err = get_delta_chain_max_size(&max_size, deltas, pack);
 	if (err)
 		return err;
 	accum_buf = malloc(max_size);
@@ -1224,9 +1241,14 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outlen,
 			continue;
 		}
 
+		err = read_delta_data(&delta_buf, &delta_len,
+		    delta->data_offset, pack);
+		if (err)
+			goto done;
 		err = got_delta_apply_in_mem(base_buf, base_bufsz,
-		    delta->delta_buf, delta->delta_len, accum_buf,
+		    delta_buf, delta_len, accum_buf,
 		    &accum_size, max_size);
+		free(delta_buf);
 		n++;
 		if (err)
 			goto done;
diff --git a/libexec/got-read-pack/got-read-pack.c b/libexec/got-read-pack/got-read-pack.c
index faa1ca0..cb7464c 100644
--- a/libexec/got-read-pack/got-read-pack.c
+++ b/libexec/got-read-pack/got-read-pack.c
@@ -289,7 +289,7 @@ blob_request(struct imsg *imsg, struct imsgbuf *ibuf, struct got_pack *pack,
 		goto done;
 
 	if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
-		err = got_pack_get_max_delta_object_size(&blob_size, obj);
+		err = got_pack_get_max_delta_object_size(&blob_size, obj, pack);
 		if (err)
 			goto done;
 	} else