Commit 0a0a30486325aded3095a281a9238a8dcc9b16a7

Stefan Sperling 2018-01-10T21:15:21

open pack file index

diff --git a/include/got_error.h b/include/got_error.h
index 398a3f9..fbfadea 100644
--- a/include/got_error.h
+++ b/include/got_error.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Stefan Sperling <stsp@openbsd.org>
+ * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -29,6 +29,7 @@
 #define GOT_ERR_OBJ_TYPE	0x0011
 #define GOT_ERR_BAD_OBJ_DATA	0x0012
 #define GOT_ERR_FILE_OPEN	0x0013
+#define GOT_ERR_BAD_PACKIDX	0x0014
 
 static const struct got_error {
 	int code;
@@ -48,6 +49,7 @@ static const struct got_error {
 	{ GOT_ERR_OBJ_TYPE,	"wrong type of object" },
 	{ GOT_ERR_BAD_OBJ_DATA,	"bad object data" },
 	{ GOT_ERR_FILE_OPEN,	"could not open file" },
+	{ GOT_ERR_BAD_PACKIDX,	"bad pack index file" },
 };
 
 const struct got_error * got_error(int code);
diff --git a/lib/pack.c b/lib/pack.c
new file mode 100644
index 0000000..8267925
--- /dev/null
+++ b/lib/pack.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/stat.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <sha1.h>
+#include <endian.h>
+
+#include "got_error.h"
+#include "pack.h"
+
+static const struct got_error *
+verify_fanout_table(uint32_t *fanout_table)
+{
+	int i;
+
+	for (i = 0; i < 0xff - 1; i++) {
+		if (fanout_table[i] > fanout_table[i + 1])
+			return got_error(GOT_ERR_BAD_PACKIDX);
+	}
+
+	return NULL;
+}
+
+const struct got_error *
+get_packfile_size(size_t *size, const char *path_idx)
+{
+	struct stat sb;
+	char *path_pack;
+	char base_path[PATH_MAX];
+	char *dot;
+
+	if (strlcpy(base_path, path_idx, PATH_MAX) > PATH_MAX)
+		return got_error(GOT_ERR_NO_SPACE);
+
+	dot = strrchr(base_path, '.');
+	if (dot == NULL)
+		return got_error(GOT_ERR_BAD_PATH);
+	*dot = '\0';
+	if (asprintf(&path_pack, "%s.pack", base_path) == -1)
+		return got_error(GOT_ERR_NO_MEM);
+
+	if (stat(path_pack, &sb) != 0) {
+		free(path_pack);
+		return got_error(GOT_ERR_IO);
+
+	}
+
+	free(path_pack);
+	*size = sb.st_size;
+	return 0;
+}
+
+const struct got_error *
+got_packidx_open(struct got_packidx_v2_hdr **packidx, const char *path)
+{
+	struct got_packidx_v2_hdr *p;
+	FILE *f;
+	const struct got_error *err = NULL;
+	size_t n, nobj, packfile_size;
+
+	f = fopen(path, "rb");
+	if (f == NULL)
+		return got_error(GOT_ERR_BAD_PATH);
+
+	err = get_packfile_size(&packfile_size, path);
+	if (err)
+		return err;
+
+	p = calloc(1, sizeof(*p));
+	if (p == NULL) {
+		err = got_error(GOT_ERR_NO_MEM);
+		goto done;
+	}
+
+	n = fread(&p->magic, sizeof(p->magic), 1, f);
+	if (n != 1) {
+		err = got_error(ferror(f) ? GOT_ERR_IO : GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	if (betoh32(p->magic) != GOT_PACKIDX_V2_MAGIC) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	n = fread(&p->version, sizeof(p->version), 1, f);
+	if (n != 1) {
+		err = got_error(ferror(f) ? GOT_ERR_IO : GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	if (betoh32(p->version) != GOT_PACKIDX_VERSION) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	n = fread(&p->fanout_table, sizeof(p->fanout_table), 1, f);
+	if (n != 1) {
+		err = got_error(ferror(f) ? GOT_ERR_IO : GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	err = verify_fanout_table(p->fanout_table);
+	if (err)
+		goto done;
+
+	nobj = betoh32(p->fanout_table[0xff]);
+
+	p->sorted_ids = calloc(nobj, sizeof(*p->sorted_ids));
+	if (p->sorted_ids == NULL) {
+		err = got_error(GOT_ERR_NO_MEM);
+		goto done;
+	}
+
+	n = fread(p->sorted_ids, sizeof(*p->sorted_ids), nobj, f);
+	if (n != nobj) {
+		err = got_error(ferror(f) ? GOT_ERR_IO : GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	p->offsets = calloc(nobj, sizeof(*p->offsets));
+	if (p->offsets == NULL) {
+		err = got_error(GOT_ERR_NO_MEM);
+		goto done;
+	}
+
+	n = fread(p->offsets, sizeof(*p->offsets), nobj, f);
+	if (n != nobj) {
+		err = got_error(ferror(f) ? GOT_ERR_IO : GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	p->crc32 = calloc(nobj, sizeof(*p->crc32));
+	if (p->crc32 == NULL) {
+		err = got_error(GOT_ERR_NO_MEM);
+		goto done;
+	}
+
+	n = fread(p->crc32, sizeof(*p->crc32), nobj, f);
+	if (n != nobj) {
+		err = got_error(ferror(f) ? GOT_ERR_IO : GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	/* Large file offsets are contained only in files > 2GB. */
+	if (packfile_size < 0x80000000)
+		goto checksum;
+
+	p->large_offsets = calloc(nobj, sizeof(*p->large_offsets));
+	if (p->large_offsets == NULL) {
+		err = got_error(GOT_ERR_NO_MEM);
+		goto done;
+	}
+
+	n = fread(p->large_offsets, sizeof(*p->large_offsets), nobj, f);
+	if (n != nobj) {
+		err = got_error(ferror(f) ? GOT_ERR_IO : GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+checksum:
+
+	n = fread(&p->trailer, sizeof(p->trailer), 1, f);
+	if (n != 1) {
+		err = got_error(ferror(f) ? GOT_ERR_IO : GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
+
+	/* TODO verify checksum */
+
+done:
+	fclose(f);
+	if (err)
+		got_packidx_close(p);
+	else
+		*packidx = p;
+	return err;
+}
+
+void
+got_packidx_close(struct got_packidx_v2_hdr *packidx)
+{
+	free(packidx->sorted_ids);
+	free(packidx->offsets);
+	free(packidx->crc32);
+	free(packidx->large_offsets);
+	free(packidx);
+}
diff --git a/lib/pack.h b/lib/pack.h
index 99603fc..4d508b3 100644
--- a/lib/pack.h
+++ b/lib/pack.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Stefan Sperling <stsp@openbsd.org>
+ * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,7 +20,7 @@ struct got_pack_obj_id {
 	u_int8_t sha1[SHA1_DIGEST_LENGTH];
 } __attribute__((__packed__));
 
-struct got_pack_idx_trailer {
+struct got_packidx_trailer {
 	u_int8_t	pack_file_sha1[SHA1_DIGEST_LENGTH];
 	u_int8_t	pack_idx_sha1[SHA1_DIGEST_LENGTH];
 } __attribute__((__packed__));
@@ -41,7 +41,7 @@ struct got_packidx_v2_hdr {
 	 * total number of objects in the pack file. All pointer variables
 	 * below point to tables with a corresponding number of entries.
 	 */
-	uint32_t	fanout_table[0xff];	/* values are big endian */
+	uint32_t	fanout_table[0xff + 1];	/* values are big endian */
 
 	/* Sorted SHA1 checksums for each object in the pack file. */
 	struct got_pack_obj_id *sorted_ids;
@@ -57,7 +57,7 @@ struct got_packidx_v2_hdr {
 	/* Large offsets table is empty for pack files < 2 GB. */
 	uint64_t	*large_offsets;		/* values are big endian */
 
-	struct got_pack_idx_trailer trailer;
+	struct got_packidx_trailer trailer;
 };
 
 struct got_packfile_hdr {
@@ -114,8 +114,12 @@ struct got_packfile_object_data_offset_delta {
 
 struct got_packfile_obj_data {
 	union {
-		struct got_packfile_object_data;
-		struct got_packfile_object_data_ref_delta;
-		struct got_packfile_object_data_offset_delta;
+		struct got_packfile_object_data data;
+		struct got_packfile_object_data_ref_delta ref_delta;
+		struct got_packfile_object_data_offset_delta offset_delta;
 	} __attribute__((__packed__));
 } __attribute__((__packed__));
+
+const struct got_error *got_packidx_open(struct got_packidx_v2_hdr **,
+    const char *);
+void got_packidx_close(struct got_packidx_v2_hdr *);
diff --git a/regress/packfiles/Makefile b/regress/packfiles/Makefile
new file mode 100644
index 0000000..829b06d
--- /dev/null
+++ b/regress/packfiles/Makefile
@@ -0,0 +1,12 @@
+.PATH:${.CURDIR}/../../lib
+
+PROG = packfile_test
+SRCS = error.c pack.c packfile_test.c
+
+CPPFLAGS = -I${.CURDIR}/../../include -I${.CURDIR}/../../lib
+LDADD = 
+DEBUG = -O0 -g
+
+NOMAN = yes
+
+.include <bsd.regress.mk>
diff --git a/regress/packfiles/packfile_test.c b/regress/packfiles/packfile_test.c
new file mode 100644
index 0000000..fbe9c74
--- /dev/null
+++ b/regress/packfiles/packfile_test.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/stat.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sha1.h>
+#include <zlib.h>
+
+#include "got_error.h"
+#include "pack.h"
+
+#define RUN_TEST(expr, name) \
+	if (!(expr)) { printf("test %s failed", (name)); failure = 1; }
+
+#define GOT_REPO_PATH "../../../"
+
+static int
+packfile_read_idx(const char *repo_path)
+{
+	const struct got_error *err;
+	struct got_packidx_v2_hdr *packidx;
+	const char *pack_checksum = "5414c35e56c54294d2515863832bf46ad0e321d7";
+	const char *pack_prefix = ".git/objects/pack/pack";
+	char *fullpath;
+	int ret = 1;
+
+	if (asprintf(&fullpath, "%s/%s-%s.idx", repo_path, pack_prefix,
+	    pack_checksum) == -1)
+		return 0;
+
+	err = got_packidx_open(&packidx, fullpath);
+	if (err) {
+		printf("got_packidx_open: %s\n", err->msg);
+		ret = 0;
+	}
+
+	got_packidx_close(packidx);
+	free(fullpath);
+	return ret;
+}
+
+int
+main(int argc, const char *argv[])
+{
+	int failure = 0;
+	const char *repo_path;
+
+	if (argc == 1)
+		repo_path = GOT_REPO_PATH;
+	else if (argc == 2)
+		repo_path = argv[1];
+	else {
+		fprintf(stderr, "usage: repository_test [REPO_PATH]\n");
+		return 1;
+	}
+
+	RUN_TEST(packfile_read_idx(repo_path), "packfile_read_idx");
+
+	return failure ? 1 : 0;
+}