Commit 57b35b75575f6066ee8cacd4916b638f8df8e269

Stefan Sperling 2018-06-22T20:12:26

access pack index files with mmap()

diff --git a/lib/got_lib_pack.h b/lib/got_lib_pack.h
index 6658cfd..f0d6dcd 100644
--- a/lib/got_lib_pack.h
+++ b/lib/got_lib_pack.h
@@ -30,13 +30,17 @@ struct got_packidx_trailer {
 	u_int8_t	packidx_sha1[SHA1_DIGEST_LENGTH];
 } __attribute__((__packed__));
 
+struct got_packidx_object_id {
+	u_int8_t sha1[SHA1_DIGEST_LENGTH];
+} __attribute__((__packed__));
+
 /* Ignore pack index version 1 which is no longer written by Git. */
 #define GOT_PACKIDX_VERSION 2
 
 struct got_packidx_v2_hdr {
-	uint32_t	magic;		/* big endian */
+	uint32_t	*magic;		/* big endian */
 #define GOT_PACKIDX_V2_MAGIC 0xff744f63	/* "\377t0c" */
-	uint32_t	version;
+	uint32_t	*version;
 
 	/* 
 	 * Each entry N in the fanout table contains the number of objects in
@@ -46,10 +50,11 @@ struct got_packidx_v2_hdr {
 	 * total number of objects in the pack file. All pointer variables
 	 * below point to tables with a corresponding number of entries.
 	 */
-	uint32_t	fanout_table[0xff + 1];	/* values are big endian */
+	uint32_t	*fanout_table;	/* values are big endian */
+#define GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS (0xff + 1)
 
 	/* Sorted SHA1 checksums for each object in the pack file. */
-	struct got_object_id *sorted_ids;
+	struct got_packidx_object_id *sorted_ids;
 
 	/* CRC32 of the packed representation of each object. */
 	uint32_t	*crc32;
@@ -62,13 +67,16 @@ struct got_packidx_v2_hdr {
 	/* Large offsets table is empty for pack files < 2 GB. */
 	uint64_t	*large_offsets;		/* values are big endian */
 
-	struct got_packidx_trailer trailer;
+	struct got_packidx_trailer *trailer;
 };
 
 /* An open pack index file. */
 struct got_packidx {
 	char *path_packidx; /* actual on-disk path */
-	struct got_packidx_v2_hdr hdr;
+	int fd;
+	uint8_t *map;
+	size_t len;
+	struct got_packidx_v2_hdr hdr; /* convenient pointers into map */
 };
 
 struct got_packfile_hdr {
@@ -137,7 +145,7 @@ struct got_packfile_obj_data {
 
 const struct got_error *got_packidx_open(struct got_packidx **,
     const char *);
-void got_packidx_close(struct got_packidx *);
+const struct got_error* got_packidx_close(struct got_packidx *);
 
 const struct got_error *got_packfile_open_object(struct got_object **,
     struct got_object_id *, struct got_repository *);
diff --git a/lib/pack.c b/lib/pack.c
index d46cc6d..a191b9a 100644
--- a/lib/pack.c
+++ b/lib/pack.c
@@ -17,6 +17,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/queue.h>
+#include <sys/mman.h>
 
 #include <dirent.h>
 #include <fcntl.h>
@@ -122,9 +123,8 @@ got_packidx_open(struct got_packidx **packidx, const char *path)
 {
 	struct got_packidx *p;
 	struct got_packidx_v2_hdr *h;
-	FILE *f;
 	const struct got_error *err = NULL;
-	size_t n, nobj, packfile_size;
+	size_t nobj, len_fanout, len_ids, offset, remain;
 	SHA1_CTX ctx;
 	uint8_t sha1[SHA1_DIGEST_LENGTH];
 
@@ -132,141 +132,140 @@ got_packidx_open(struct got_packidx **packidx, const char *path)
 
 	SHA1Init(&ctx);
 
-	f = fopen(path, "rb");
-	if (f == NULL)
+	p = calloc(1, sizeof(*p));
+	if (p == NULL)
 		return got_error_from_errno();
 
-	err = get_packfile_size(&packfile_size, path);
-	if (err)
+	p->fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
+	if (p->fd == -1)
+		return got_error_from_errno();
+
+	err = get_packfile_size(&p->len, path);
+	if (err) {
+		close(p->fd);
+		free(p);
+		return err;
+	}
+	if (p->len < sizeof(p->hdr)) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
+		close(p->fd);
+		free(p);
 		return err;
+	}
 
-	p = calloc(1, sizeof(*p));
-	if (p == NULL)
-		return got_error_from_errno();
 	p->path_packidx = strdup(path);
 	if (p->path_packidx == NULL) {
 		err = got_error_from_errno();
-		free(p->path_packidx);
-		free(p);
-		return err;
+		goto done;
 	}
 
-	h = &p->hdr;
-	n = fread(&h->magic, sizeof(h->magic), 1, f);
-	if (n != 1) {
-		err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
+	p->map = mmap(NULL, p->len, PROT_READ, MAP_PRIVATE, p->fd, 0);
+	if (p->map == MAP_FAILED) {
+		err = got_error_from_errno();
 		goto done;
 	}
+	h = &p->hdr;
+	offset = 0;
+	remain = p->len;
 
-	if (betoh32(h->magic) != GOT_PACKIDX_V2_MAGIC) {
+	if (remain < sizeof(*h->magic)) {
 		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
+	h->magic = (uint32_t *)(p->map + offset);
+	if (betoh32(*h->magic) != GOT_PACKIDX_V2_MAGIC) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+	offset += sizeof(*h->magic);
+	remain -= sizeof(*h->magic);
 
-	SHA1Update(&ctx, (uint8_t *)&h->magic, sizeof(h->magic));
+	SHA1Update(&ctx, (uint8_t *)h->magic, sizeof(*h->magic));
 
-	n = fread(&h->version, sizeof(h->version), 1, f);
-	if (n != 1) {
-		err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
+	if (remain < sizeof(*h->version)) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
-
-	if (betoh32(h->version) != GOT_PACKIDX_VERSION) {
+	h->version = (uint32_t *)(p->map + offset);
+	if (betoh32(*h->version) != GOT_PACKIDX_VERSION) {
 		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
+	offset += sizeof(*h->version);
+	remain -= sizeof(*h->version);
 
-	SHA1Update(&ctx, (uint8_t *)&h->version, sizeof(h->version));
+	SHA1Update(&ctx, (uint8_t *)h->version, sizeof(*h->version));
 
-	n = fread(&h->fanout_table, sizeof(h->fanout_table), 1, f);
-	if (n != 1) {
-		err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
+	len_fanout =
+	    sizeof(*h->fanout_table) * GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS;
+	if (remain < len_fanout) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
-
+	h->fanout_table = (uint32_t *)(p->map + offset);
 	err = verify_fanout_table(h->fanout_table);
 	if (err)
 		goto done;
-
-	SHA1Update(&ctx, (uint8_t *)h->fanout_table, sizeof(h->fanout_table));
+	SHA1Update(&ctx, (uint8_t *)h->fanout_table, len_fanout);
+	offset += len_fanout;
+	remain -= len_fanout;
 
 	nobj = betoh32(h->fanout_table[0xff]);
-
-	h->sorted_ids = calloc(nobj, sizeof(*h->sorted_ids));
-	if (h->sorted_ids == NULL) {
-		err = got_error_from_errno();
-		goto done;
-	}
-
-	n = fread(h->sorted_ids, sizeof(*h->sorted_ids), nobj, f);
-	if (n != nobj) {
-		err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
-		goto done;
-	}
-
-	SHA1Update(&ctx, (uint8_t *)h->sorted_ids,
-	    nobj * sizeof(*h->sorted_ids));
-
-	h->crc32 = calloc(nobj, sizeof(*h->crc32));
-	if (h->crc32 == NULL) {
-		err = got_error_from_errno();
+	len_ids = nobj * sizeof(*h->sorted_ids);
+	if (len_ids <= nobj || len_ids > remain) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
+	h->sorted_ids =
+	    (struct got_packidx_object_id *)((uint8_t*)(p->map + offset));
+	SHA1Update(&ctx, (uint8_t *)h->sorted_ids, len_ids);
+	offset += len_ids;
+	remain -= len_ids;
 
-	n = fread(h->crc32, sizeof(*h->crc32), nobj, f);
-	if (n != nobj) {
-		err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
+	if (remain < nobj * sizeof(*h->crc32)) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
-
+	h->crc32 = (uint32_t *)((uint8_t*)(p->map + offset));
 	SHA1Update(&ctx, (uint8_t *)h->crc32, nobj * sizeof(*h->crc32));
+	remain -= nobj * sizeof(*h->crc32);
+	offset += nobj * sizeof(*h->crc32);
 
-	h->offsets = calloc(nobj, sizeof(*h->offsets));
-	if (h->offsets == NULL) {
-		err = got_error_from_errno();
-		goto done;
-	}
-
-	n = fread(h->offsets, sizeof(*h->offsets), nobj, f);
-	if (n != nobj) {
-		err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
+	if (remain < nobj * sizeof(*h->offsets)) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
-
+	h->offsets = (uint32_t *)((uint8_t*)(p->map + offset));
 	SHA1Update(&ctx, (uint8_t *)h->offsets, nobj * sizeof(*h->offsets));
+	remain -= nobj * sizeof(*h->offsets);
+	offset += nobj * sizeof(*h->offsets);
 
 	/* Large file offsets are contained only in files > 2GB. */
-	if (packfile_size <= 0x80000000)
+	if (p->len <= 0x80000000)
 		goto checksum;
 
-	h->large_offsets = calloc(nobj, sizeof(*h->large_offsets));
-	if (h->large_offsets == NULL) {
-		err = got_error_from_errno();
-		goto done;
-	}
-
-	n = fread(h->large_offsets, sizeof(*h->large_offsets), nobj, f);
-	if (n != nobj) {
-		err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
+	if (remain < nobj * sizeof(*h->large_offsets)) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
-
+	h->large_offsets = (uint64_t *)((uint8_t*)(p->map + offset));
 	SHA1Update(&ctx, (uint8_t*)h->large_offsets,
 	    nobj * sizeof(*h->large_offsets));
+	remain -= nobj * sizeof(*h->large_offsets);
+	offset += nobj * sizeof(*h->large_offsets);
 
 checksum:
-	n = fread(&h->trailer, sizeof(h->trailer), 1, f);
-	if (n != 1) {
-		err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
+	if (remain < sizeof(*h->trailer)) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
-
-	SHA1Update(&ctx, h->trailer.packfile_sha1, SHA1_DIGEST_LENGTH);
+	h->trailer =
+	    (struct got_packidx_trailer *)((uint8_t*)(p->map + offset));
+	SHA1Update(&ctx, h->trailer->packfile_sha1, SHA1_DIGEST_LENGTH);
 	SHA1Final(sha1, &ctx);
-	if (memcmp(h->trailer.packidx_sha1, sha1, SHA1_DIGEST_LENGTH) != 0)
+	if (memcmp(h->trailer->packidx_sha1, sha1, SHA1_DIGEST_LENGTH) != 0)
 		err = got_error(GOT_ERR_PACKIDX_CSUM);
 done:
-	fclose(f);
 	if (err)
 		got_packidx_close(p);
 	else
@@ -274,15 +273,20 @@ done:
 	return err;
 }
 
-void
+const struct got_error *
 got_packidx_close(struct got_packidx *packidx)
 {
-	free(packidx->hdr.sorted_ids);
-	free(packidx->hdr.offsets);
-	free(packidx->hdr.crc32);
-	free(packidx->hdr.large_offsets);
+	const struct got_error *err = NULL;
+
 	free(packidx->path_packidx);
+	if (packidx->map != NULL && packidx->map != MAP_FAILED) {
+		if (munmap(packidx->map, packidx->len) == -1)
+			err = got_error_from_errno();
+	}
+	close(packidx->fd);
 	free(packidx);
+
+	return err;
 }
 
 static int
@@ -330,12 +334,12 @@ get_object_idx(struct got_packidx *packidx, struct got_object_id *id,
 		left = betoh32(packidx->hdr.fanout_table[id0 - 1]);
 
 	while (left <= right) {
-		struct got_object_id *oid;
+		struct got_packidx_object_id *oid;
 		int i, cmp;
 
 		i = ((left + right) / 2);
 		oid = &packidx->hdr.sorted_ids[i];
-		cmp = got_object_id_cmp(id, oid);
+		cmp = memcmp(id->sha1, oid->sha1, SHA1_DIGEST_LENGTH);
 		if (cmp == 0)
 			return i;
 		else if (cmp > 0)
@@ -347,9 +351,10 @@ get_object_idx(struct got_packidx *packidx, struct got_object_id *id,
 	return -1;
 }
 
-static void
+static const struct got_error *
 cache_packidx(struct got_packidx *packidx, struct got_repository *repo)
 {
+	const struct got_error *err = NULL;
 	int i;
 
 	for (i = 0; i < nitems(repo->packidx_cache); i++) {
@@ -358,7 +363,9 @@ cache_packidx(struct got_packidx *packidx, struct got_repository *repo)
 	}
 
 	if (i == nitems(repo->packidx_cache)) {
-		got_packidx_close(repo->packidx_cache[i - 1]);
+		err = got_packidx_close(repo->packidx_cache[i - 1]);
+		if (err)
+			return err;
 		memmove(&repo->packidx_cache[1], &repo->packidx_cache[0],
 		    sizeof(repo->packidx_cache) -
 		    sizeof(repo->packidx_cache[0]));
@@ -366,6 +373,7 @@ cache_packidx(struct got_packidx *packidx, struct got_repository *repo)
 	}
 
 	repo->packidx_cache[i] = packidx;
+	return NULL;
 }
 
 static const struct got_error *
@@ -419,12 +427,14 @@ search_packidx(struct got_packidx **packidx, int *idx,
 		*idx = get_object_idx(*packidx, id, repo);
 		if (*idx != -1) {
 			err = NULL; /* found the object */
-			cache_packidx(*packidx, repo);
+			err = cache_packidx(*packidx, repo);
 			goto done;
 		}
 
-		got_packidx_close(*packidx);
+		err = got_packidx_close(*packidx);
 		*packidx = NULL;
+		if (err)
+			goto done;
 	}
 
 	err = got_error(GOT_ERR_NO_OBJ);