Commit 4788f1cebb9b14790dd4899b6f9007c721eec61e

Stefan Sperling 2020-03-18T16:13:46

extract large objects to a temporary file in got-index-pack

diff --git a/lib/fetch.c b/lib/fetch.c
index db72516..325ffec 100644
--- a/lib/fetch.c
+++ b/lib/fetch.c
@@ -346,6 +346,7 @@ got_fetch_pack(struct got_object_id **pack_hash, struct got_pathlist_head *refs,
 {
 	int imsg_fetchfds[2], imsg_idxfds[2];
 	int packfd = -1, npackfd = -1, idxfd = -1, nidxfd = -1, nfetchfd = -1;
+	int tmpfd = -1;
 	int fetchstatus, idxstatus, done = 0;
 	const struct got_error *err;
 	struct imsgbuf fetchibuf, idxibuf;
@@ -391,6 +392,12 @@ got_fetch_pack(struct got_object_id **pack_hash, struct got_pathlist_head *refs,
 		goto done;
 	}
 
+	tmpfd = got_opentempfd();
+	if (tmpfd == -1) {
+		err = got_error_from_errno("got_opentempfd");
+		goto done;
+	}
+
 	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fetchfds) == -1) {
 		err = got_error_from_errno("socketpair");
 		goto done;
@@ -506,6 +513,10 @@ got_fetch_pack(struct got_object_id **pack_hash, struct got_pathlist_head *refs,
 	if (err != NULL)
 		goto done;
 	nidxfd = -1;
+	err = got_privsep_send_tmpfd(&idxibuf, tmpfd);
+	if (err != NULL)
+		goto done;
+	tmpfd = -1;
 	done = 0;
 	while (!done) {
 		int nobj_total, nobj_indexed, nobj_loose, nobj_resolved;
@@ -566,6 +577,8 @@ done:
 		err = got_error_from_errno("close");
 	if (idxfd != -1 && close(idxfd) == -1 && err == NULL)
 		err = got_error_from_errno("close");
+	if (tmpfd != -1 && close(tmpfd) == -1 && err == NULL)
+		err = got_error_from_errno("close");
 	free(tmppackpath);
 	free(tmpidxpath);
 	free(idxpath);
diff --git a/lib/got_lib_inflate.h b/lib/got_lib_inflate.h
index 7a61421..48821e9 100644
--- a/lib/got_lib_inflate.h
+++ b/lib/got_lib_inflate.h
@@ -44,7 +44,8 @@ const struct got_error *got_inflate_to_mem_fd(uint8_t **, size_t *, size_t *,
 const struct got_error *got_inflate_to_mem_mmap(uint8_t **, size_t *, size_t *,
     uint32_t *, uint8_t *, size_t, size_t);
 const struct got_error *got_inflate_to_file(size_t *, FILE *, FILE *);
-const struct got_error *got_inflate_to_file_fd(size_t *, int, FILE *);
+const struct got_error *got_inflate_to_file_fd(size_t *, size_t *, uint32_t *,
+    int, FILE *);
 const struct got_error *got_inflate_to_fd(size_t *, FILE *, int);
-const struct got_error *got_inflate_to_file_mmap(size_t *, uint8_t *, size_t,
-    size_t, FILE *);
+const struct got_error *got_inflate_to_file_mmap(size_t *, size_t *,
+    uint32_t *, uint8_t *, size_t, size_t, FILE *);
diff --git a/lib/got_lib_pack.h b/lib/got_lib_pack.h
index 15e9a29..f95085e 100644
--- a/lib/got_lib_pack.h
+++ b/lib/got_lib_pack.h
@@ -179,6 +179,8 @@ const struct got_error *got_packfile_open_object(struct got_object **,
     struct got_pack *, struct got_packidx *, int, struct got_object_id *);
 const struct got_error *got_pack_get_max_delta_object_size(uint64_t *,
     struct got_object *, struct got_pack *);
+const struct got_error *got_pack_dump_delta_chain_to_file(size_t *,
+    struct got_delta_chain *, struct got_pack *, FILE *, FILE *, FILE *);
 const struct got_error *got_pack_dump_delta_chain_to_mem(uint8_t **, size_t *,
     struct got_delta_chain *, struct got_pack *);
 const struct got_error *got_packfile_extract_object(struct got_pack *,
diff --git a/lib/inflate.c b/lib/inflate.c
index e94e223..50d5e77 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -506,20 +506,22 @@ done:
 }
 
 const struct got_error *
-got_inflate_to_file_fd(size_t *outlen, int infd, FILE *outfile)
+got_inflate_to_file_fd(size_t *outlen, size_t *consumed_total,
+    uint32_t *input_crc, int infd, FILE *outfile)
 {
 	const struct got_error *err;
-	size_t avail;
+	size_t avail, consumed;
 	struct got_inflate_buf zb;
 
-	err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL);
+	err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, input_crc);
 	if (err)
 		goto done;
 
 	*outlen = 0;
-
+	if (consumed_total)
+		*consumed_total = 0;
 	do {
-		err = got_inflate_read_fd(&zb, infd, &avail, NULL);
+		err = got_inflate_read_fd(&zb, infd, &avail, &consumed);
 		if (err)
 			goto done;
 		if (avail > 0) {
@@ -530,6 +532,8 @@ got_inflate_to_file_fd(size_t *outlen, int infd, FILE *outfile)
 				goto done;
 			}
 			*outlen += avail;
+			if (consumed_total)
+				*consumed_total += consumed;
 		}
 	} while (zb.flags & GOT_INFLATE_F_HAVE_MORE);
 
@@ -541,26 +545,29 @@ done:
 }
 
 const struct got_error *
-got_inflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset,
-    size_t len, FILE *outfile)
+got_inflate_to_file_mmap(size_t *outlen, size_t *consumed_total,
+    uint32_t *input_crc, uint8_t *map, size_t offset, size_t len,
+    FILE *outfile)
 {
 	const struct got_error *err;
-	size_t avail;
+	size_t avail, consumed;
 	struct got_inflate_buf zb;
-	size_t consumed;
 
-	err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL);
+	err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, input_crc);
 	if (err)
 		goto done;
 
 	*outlen = 0;
-
+	if (consumed_total)
+		*consumed_total = 0;
 	do {
 		err = got_inflate_read_mmap(&zb, map, offset, len, &avail,
 		    &consumed);
 		if (err)
 			goto done;
 		offset += consumed;
+		if (consumed_total)
+			*consumed_total += consumed;
 		len -= consumed;
 		if (avail > 0) {
 			size_t n;
diff --git a/lib/pack.c b/lib/pack.c
index 8375932..9dc1a83 100644
--- a/lib/pack.c
+++ b/lib/pack.c
@@ -1030,8 +1030,9 @@ got_pack_get_max_delta_object_size(uint64_t *size, struct got_object *obj,
 }
 
 const struct got_error *
-dump_delta_chain_to_file(size_t *result_size, struct got_delta_chain *deltas,
-    struct got_pack *pack, FILE *outfile, FILE *base_file, FILE *accum_file)
+got_pack_dump_delta_chain_to_file(size_t *result_size,
+    struct got_delta_chain *deltas, struct got_pack *pack, FILE *outfile,
+    FILE *base_file, FILE *accum_file)
 {
 	const struct got_error *err = NULL;
 	struct got_delta *delta;
@@ -1089,11 +1090,13 @@ dump_delta_chain_to_file(size_t *result_size, struct got_delta_chain *deltas,
 				if (pack->map) {
 					mapoff = (size_t)delta_data_offset;
 					err = got_inflate_to_file_mmap(
-					    &base_bufsz, pack->map, mapoff,
-					    pack->filesize - mapoff, base_file);
+					    &base_bufsz, NULL, NULL, pack->map,
+					    mapoff, pack->filesize - mapoff,
+					    base_file);
 				} else
 					err = got_inflate_to_file_fd(
-					    &base_bufsz, pack->fd, base_file);
+					    &base_bufsz, NULL, NULL, pack->fd,
+					    base_file);
 			} else {
 				if (pack->map) {
 					mapoff = (size_t)delta_data_offset;
@@ -1338,17 +1341,18 @@ got_packfile_extract_object(struct got_pack *pack, struct got_object *obj,
 
 		if (pack->map) {
 			size_t mapoff = (size_t)obj->pack_offset;
-			err = got_inflate_to_file_mmap(&obj->size, pack->map,
-			    mapoff, pack->filesize - mapoff, outfile);
+			err = got_inflate_to_file_mmap(&obj->size, NULL, NULL,
+			    pack->map, mapoff, pack->filesize - mapoff,
+			    outfile);
 		} else {
 			if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1)
 				return got_error_from_errno("lseek");
-			err = got_inflate_to_file_fd(&obj->size, pack->fd,
-			    outfile);
+			err = got_inflate_to_file_fd(&obj->size, NULL, NULL,
+			    pack->fd, outfile);
 		}
 	} else
-		err = dump_delta_chain_to_file(&obj->size, &obj->deltas, pack,
-		    outfile, base_file, accum_file);
+		err = got_pack_dump_delta_chain_to_file(&obj->size,
+		    &obj->deltas, pack, outfile, base_file, accum_file);
 
 	return err;
 }
diff --git a/libexec/got-index-pack/got-index-pack.c b/libexec/got-index-pack/got-index-pack.c
index 4adf115..650dd1e 100644
--- a/libexec/got-index-pack/got-index-pack.c
+++ b/libexec/got-index-pack/got-index-pack.c
@@ -145,11 +145,31 @@ read_crc(uint32_t *crc, int fd, size_t len)
 }
 
 static const struct got_error *
-read_packed_object(struct got_pack *pack, struct got_indexed_object *obj)
+read_file_sha1(SHA1_CTX *ctx, FILE *f)
+{
+	uint8_t buf[8192];
+	size_t r;
+
+	for (;;) {
+		r = fread(buf, 1, sizeof(buf), f);
+		if (r == 0) {
+			if (feof(f))
+				return NULL;
+			return got_ferror(f, GOT_ERR_IO);
+		}
+		SHA1Update(ctx, buf, r);
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
+read_packed_object(struct got_pack *pack, struct got_indexed_object *obj,
+    FILE *tmpfile)
 {
 	const struct got_error *err = NULL;
 	SHA1_CTX ctx;
-	uint8_t *data;
+	uint8_t *data = NULL;
 	size_t datalen;
 	ssize_t n;
 	char *header;
@@ -179,14 +199,28 @@ read_packed_object(struct got_pack *pack, struct got_indexed_object *obj)
 	case GOT_OBJ_TYPE_COMMIT:
 	case GOT_OBJ_TYPE_TREE:
 	case GOT_OBJ_TYPE_TAG:
-		/* XXX TODO reading large objects into memory is bad! */
-		if (pack->map) {
-			err = got_inflate_to_mem_mmap(&data, &datalen,
-			    &obj->len, &obj->crc, pack->map, mapoff,
-			    pack->filesize - mapoff);
+		if (obj->size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
+			if (fseek(tmpfile, 0L, SEEK_SET) == -1) {
+				err = got_error_from_errno("fseek");
+				break;
+			}
+			if (pack->map) {
+				err = got_inflate_to_file_mmap(&datalen,
+				    &obj->len, &obj->crc, pack->map, mapoff,
+				    pack->filesize - mapoff, tmpfile);
+			} else {
+				err = got_inflate_to_file_fd(&datalen,
+				    &obj->len, &obj->crc, pack->fd, tmpfile);
+			}
 		} else {
-			err = got_inflate_to_mem_fd(&data, &datalen,
-			    &obj->len, &obj->crc, obj->size, pack->fd);
+			if (pack->map) {
+				err = got_inflate_to_mem_mmap(&data, &datalen,
+				    &obj->len, &obj->crc, pack->map, mapoff,
+				    pack->filesize - mapoff);
+			} else {
+				err = got_inflate_to_mem_fd(&data, &datalen,
+				    &obj->len, &obj->crc, obj->size, pack->fd);
+			}
 		}
 		if (err)
 			break;
@@ -203,7 +237,12 @@ read_packed_object(struct got_pack *pack, struct got_indexed_object *obj)
 		}
 		headerlen = strlen(header) + 1;
 		SHA1Update(&ctx, header, headerlen);
-		SHA1Update(&ctx, data, datalen);
+		if (obj->size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
+			err = read_file_sha1(&ctx, tmpfile);
+			if (err)
+				break;
+		} else
+			SHA1Update(&ctx, data, datalen);
 		SHA1Final(obj->id.sha1, &ctx);
 		free(header);
 		free(data);
@@ -509,8 +548,8 @@ update_packidx(struct got_packidx *packidx, int nobj,
 }
 
 static const struct got_error *
-index_pack(struct got_pack *pack, int idxfd, uint8_t *pack_hash,
-    struct imsgbuf *ibuf)
+index_pack(struct got_pack *pack, int idxfd, FILE *tmpfile,
+    uint8_t *pack_hash, struct imsgbuf *ibuf)
 {
 	const struct got_error *err;
 	struct got_packfile_hdr hdr;
@@ -629,7 +668,7 @@ index_pack(struct got_pack *pack, int idxfd, uint8_t *pack_hash,
 			}
 		}
 
-		err = read_packed_object(pack, obj);
+		err = read_packed_object(pack, obj, tmpfile);
 		if (err)
 			goto done;
 
@@ -786,7 +825,8 @@ main(int argc, char **argv)
 	const struct got_error *err = NULL, *close_err;
 	struct imsgbuf ibuf;
 	struct imsg imsg;
-	int idxfd = -1;
+	int idxfd = -1, tmpfd = -1;
+	FILE *tmpfile = NULL;
 	struct got_pack pack;
 	uint8_t pack_hash[SHA1_DIGEST_LENGTH];
 	off_t packfile_size;
@@ -845,6 +885,27 @@ main(int argc, char **argv)
 	}
 	idxfd = imsg.fd;
 
+	err = got_privsep_recv_imsg(&imsg, &ibuf, 0);
+	if (err)
+		goto done;
+	if (imsg.hdr.type == GOT_IMSG_STOP)
+		goto done;
+	if (imsg.hdr.type != GOT_IMSG_TMPFD) {
+		err = got_error(GOT_ERR_PRIVSEP_MSG);
+		goto done;
+	}
+	if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) {
+		err = got_error(GOT_ERR_PRIVSEP_LEN);
+		goto done;
+	}
+	tmpfd = imsg.fd;
+	tmpfile = fdopen(tmpfd, "w+");
+	if (tmpfile == NULL) {
+		err = got_error_from_errno("fdopen");
+		goto done;
+	}
+	tmpfd = -1;
+
 	if (lseek(pack.fd, 0, SEEK_END) == -1) {
 		err = got_error_from_errno("lseek");
 		goto done;
@@ -867,13 +928,15 @@ main(int argc, char **argv)
 	if (pack.map == MAP_FAILED)
 		pack.map = NULL; /* fall back to read(2) */
 #endif
-	err = index_pack(&pack, idxfd, pack_hash, &ibuf);
+	err = index_pack(&pack, idxfd, tmpfile, pack_hash, &ibuf);
 done:
 	close_err = got_pack_close(&pack);
 	if (close_err && err == NULL)
 		err = close_err;
 	if (idxfd != -1 && close(idxfd) == -1 && err == NULL)
 		err = got_error_from_errno("close");
+	if (tmpfd != -1 && close(tmpfd) == -1 && err == NULL)
+		err = got_error_from_errno("close");
 
 	if (err == NULL)
 		err = got_privsep_send_index_pack_done(&ibuf);