Commit b7c44096ae12c47085978a4992d4f8cdf7946db4

Carlos Martín Nieto 2011-07-28T23:35:39

Implement the indexer Only v2 index files are supported. Signed-off-by: Carlos Martín Nieto <carlos@cmartin.tk>

diff --git a/include/git2/indexer.h b/include/git2/indexer.h
index f32b1ef..1f59ee3 100644
--- a/include/git2/indexer.h
+++ b/include/git2/indexer.h
@@ -2,6 +2,7 @@
 #define _INCLUDE_git_indexer_h__
 
 #include "git2/common.h"
+#include "git2/oid.h"
 
 typedef struct git_indexer_stats {
 	unsigned int total;
@@ -12,7 +13,8 @@ typedef struct git_indexer_stats {
 typedef struct git_indexer git_indexer;
 
 GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname);
-GIT_EXTERN(int) git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data);
+GIT_EXTERN(int) git_indexer_run(git_indexer *idx, git_indexer_stats *stats);
+GIT_EXTERN(const git_oid *) git_indexer_result(git_indexer *idx);
 GIT_EXTERN(void) git_indexer_free(git_indexer *idx);
 
 
diff --git a/src/indexer.c b/src/indexer.c
index 6de0fec..4def1af 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -26,14 +26,20 @@
 #include "git2/indexer.h"
 #include "git2/object.h"
 #include "git2/zlib.h"
+#include "git2/oid.h"
 
 #include "common.h"
 #include "pack.h"
 #include "mwindow.h"
 #include "posix.h"
+#include "pack.h"
+#include "filebuf.h"
+#include "sha1.h"
+
+#define UINT31_MAX (0x7FFFFFFF)
 
 struct entry {
-	unsigned char sha[GIT_OID_RAWSZ];
+	git_oid oid;
 	uint32_t crc;
 	uint32_t offset;
 	uint64_t offset_long;
@@ -42,11 +48,19 @@ struct entry {
 typedef struct git_indexer {
 	struct git_pack_file *pack;
 	struct stat st;
-	git_indexer_stats stats;
 	struct git_pack_header hdr;
-	struct entry *objects;
+	size_t nr_objects;
+	git_vector objects;
+	git_filebuf file;
+	unsigned int fanout[256];
+	git_oid hash;
 } git_indexer;
 
+const git_oid *git_indexer_hash(git_indexer *idx)
+{
+	return &idx->hash;
+}
+
 static int parse_header(git_indexer *idx)
 {
 	int error;
@@ -55,7 +69,7 @@ static int parse_header(git_indexer *idx)
 	if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS)
 		return git__rethrow(error, "Failed to read in pack header");
 
-	if (idx->hdr.hdr_signature != htonl(PACK_SIGNATURE))
+	if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE))
 		return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature");
 
 	if (!pack_version_ok(idx->hdr.hdr_version))
@@ -65,12 +79,23 @@ static int parse_header(git_indexer *idx)
 	return GIT_SUCCESS;
 }
 
+int objects_cmp(const void *a, const void *b)
+{
+	const struct entry *entrya = a;
+	const struct entry *entryb = b;
+
+	return git_oid_cmp(&entrya->oid, &entryb->oid);
+}
+
 int git_indexer_new(git_indexer **out, const char *packname)
 {
 	git_indexer *idx;
 	unsigned int namelen;
 	int ret, error;
 
+	if (git_path_root(packname) < 0)
+		return git__throw(GIT_EINVALIDPATH, "Path is not absolute");
+
 	idx = git__malloc(sizeof(git_indexer));
 	if (idx == NULL)
 		return GIT_ENOMEM;
@@ -83,7 +108,7 @@ int git_indexer_new(git_indexer **out, const char *packname)
 		goto cleanup;
 
 	memset(idx->pack, 0x0, sizeof(struct git_pack_file));
-	memcpy(idx->pack->pack_name, packname, namelen);
+	memcpy(idx->pack->pack_name, packname, namelen + 1);
 
 	ret = p_stat(packname, &idx->st);
 	if (ret < 0) {
@@ -102,6 +127,7 @@ int git_indexer_new(git_indexer **out, const char *packname)
 	}
 
 	idx->pack->mwf.fd = ret;
+	idx->pack->mwf.size = idx->st.st_size;
 
 	error = parse_header(idx);
 	if (error < GIT_SUCCESS) {
@@ -109,61 +135,187 @@ int git_indexer_new(git_indexer **out, const char *packname)
 		goto cleanup;
 	}
 
-	idx->objects = git__calloc(sizeof(struct entry), idx->hdr.hdr_entries);
-	if (idx->objects == NULL) {
-		error = GIT_ENOMEM;
+	idx->nr_objects = ntohl(idx->hdr.hdr_entries);
+
+	error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp);
+	if (error < GIT_SUCCESS) {
 		goto cleanup;
 	}
 
-	idx->stats.total = idx->hdr.hdr_entries;
-
 	*out = idx;
 
 	return GIT_SUCCESS;
 
 cleanup:
-	free(idx->pack);
-	free(idx);
+	git_indexer_free(idx);
 
 	return error;
 }
 
-/*
- * Create the index. Every time something interesting happens
- * (something has been parse or resolved), the callback gets called
- * with some stats so it can tell the user how hard we're working
- */
-int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *cb_data)
+static void index_path(char *path, git_indexer *idx)
 {
-	git_mwindow_file *mwf = &idx->pack->mwf;
-	off_t off = 0;
+	char *ptr;
+	const char prefix[] = "pack-", suffix[] = ".idx\0";
+
+	ptr = strrchr(path, '/') + 1;
+
+	memcpy(ptr, prefix, STRLEN(prefix));
+	ptr += STRLEN(prefix);
+	git_oid_fmt(ptr, &idx->hash);
+	ptr += GIT_OID_HEXSZ;
+	memcpy(ptr, suffix, STRLEN(suffix));
+}
+
+static int write_index(git_indexer *idx)
+{
+	git_mwindow *w = NULL;
+	int error, namelen;
+	unsigned int i, long_offsets, left;
+	struct git_pack_idx_header hdr;
+	char filename[GIT_PATH_MAX];
+	struct entry *entry;
+	void *packfile_hash;
+	git_oid file_hash;
+	SHA_CTX ctx;
+
+	git_vector_sort(&idx->objects);
+
+	namelen = strlen(idx->pack->pack_name);
+	memcpy(filename, idx->pack->pack_name, namelen);
+	memcpy(filename + namelen - STRLEN("pack"), "idx\0", STRLEN("idx\0"));
+
+	error = git_filebuf_open(&idx->file, filename, GIT_FILEBUF_HASH_CONTENTS);
+
+	/* Write out the header */
+	hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
+	hdr.idx_version = htonl(2);
+	error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
+
+	/* Write out the fanout table */
+	for (i = 0; i < 256; ++i) {
+		uint32_t n = htonl(idx->fanout[i]);
+		error = git_filebuf_write(&idx->file, &n, sizeof(n));
+		if (error < GIT_SUCCESS)
+			goto cleanup;
+	}
+
+	/* Write out the object names (SHA-1 hashes) */
+	SHA1_Init(&ctx);
+	git_vector_foreach(&idx->objects, i, entry) {
+		error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid));
+		SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ);
+		if (error < GIT_SUCCESS)
+			goto cleanup;
+	}
+	SHA1_Final(idx->hash.id, &ctx);
+
+	/* Write out the CRC32 values */
+	git_vector_foreach(&idx->objects, i, entry) {
+		error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
+		if (error < GIT_SUCCESS)
+			goto cleanup;
+	}
+
+	/* Write out the offsets */
+	git_vector_foreach(&idx->objects, i, entry) {
+		uint32_t n;
+
+		if (entry->offset == UINT32_MAX)
+			n = htonl(0x80000000 | long_offsets++);
+		else
+			n = htonl(entry->offset);
+
+		error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
+		if (error < GIT_SUCCESS)
+			goto cleanup;
+	}
+
+	/* Write out the long offsets */
+	git_vector_foreach(&idx->objects, i, entry) {
+		uint32_t split[2];
+
+		if (entry->offset != UINT32_MAX)
+			continue;
+
+		split[0] = htonl(entry->offset_long >> 32);
+		split[1] = htonl(entry->offset_long & 0xffffffff);
+
+		error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
+		if (error < GIT_SUCCESS)
+			goto cleanup;
+	}
+
+	/* Write out the packfile trailer */
+
+	packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
+	if (packfile_hash == NULL) {
+		error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash");
+		goto cleanup;
+	}
+
+	memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
+
+	git_mwindow_close(&w);
+
+	error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
+
+	/* Write out the index sha */
+	error = git_filebuf_hash(&file_hash, &idx->file);
+	if (error < GIT_SUCCESS)
+		goto cleanup;
+
+	error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
+	if (error < GIT_SUCCESS)
+		goto cleanup;
+
+	/* Figure out what the final name should be */
+	index_path(filename, idx);
+	/* Commit file */
+	error = git_filebuf_commit_at(&idx->file, filename);
+
+cleanup:
+	if (error < GIT_SUCCESS)
+		git_filebuf_cleanup(&idx->file);
+
+	return error;
+}
+
+int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
+{
+	git_mwindow_file *mwf;
+	off_t off = sizeof(struct git_pack_header);
 	int error;
-	unsigned int fanout[256] = {0};
+	struct entry *entry;
+	unsigned int left, processed;
 
-	/* FIXME: Write the keep file */
+	assert(idx && stats);
 
+	mwf = &idx->pack->mwf;
 	error = git_mwindow_file_register(mwf);
 	if (error < GIT_SUCCESS)
 		return git__rethrow(error, "Failed to register mwindow file");
 
-	/* Notify before the first one */
-	if (cb)
-		cb(&idx->stats, cb_data);
+	stats->total = idx->nr_objects;
+	stats->processed = processed = 0;
 
-	while (idx->stats.processed < idx->stats.total) {
+	while (processed < idx->nr_objects) {
 		git_rawobj obj;
 		git_oid oid;
-		struct entry entry;
+		git_mwindow *w = NULL;
 		char hdr[512] = {0}; /* FIXME: How long should this be? */
 		int i, hdr_len;
+		off_t entry_start = off;
+		void *packed;
+		size_t entry_size;
 
-		memset(&entry, 0x0, sizeof(entry)); /* Necessary? */
+		entry = git__malloc(sizeof(struct entry));
+		memset(entry, 0x0, sizeof(struct entry));
 
 		if (off > UINT31_MAX) {
-			entry.offset = ~0ULL;
-			entry.offset_long = off;
+			entry->offset = UINT32_MAX;
+			entry->offset_long = off;
 		} else {
-			entry.offset = off;
+			entry->offset = off;
 		}
 
 		error = git_packfile_unpack(&obj, idx->pack, &off);
@@ -178,30 +330,40 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void 
 			goto cleanup;
 		}
 
-		memcpy(&entry.sha, oid.id, GIT_OID_RAWSZ);
-		/* entry.crc = crc32(obj.data) */
+		git_oid_cpy(&entry->oid, &oid);
+		entry->crc = crc32(0L, Z_NULL, 0);
+
+		entry_size = off - entry_start;
+		packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
+		if (packed == NULL) {
+			error = git__rethrow(error, "Failed to open window to read packed data");
+			goto cleanup;
+		}
+		entry->crc = htonl(crc32(entry->crc, packed, entry_size));
+		git_mwindow_close(&w);
 
 		/* Add the object to the list */
-		//memcpy(&idx->objects[idx->stats.processed], &entry, sizeof(entry));
-		idx->objects[idx->stats.processed] = entry;
+		error = git_vector_insert(&idx->objects, entry);
+		if (error < GIT_SUCCESS) {
+			error = git__rethrow(error, "Failed to add entry to list");
+			goto cleanup;
+		}
 
 		for (i = oid.id[0]; i < 256; ++i) {
-			fanout[i]++;
+			idx->fanout[i]++;
 		}
 
 		free(obj.data);
 
-		idx->stats.processed++;
-
-		if (cb)
-			cb(&idx->stats, cb_data);
-
+		stats->processed = ++processed;
 	}
 
 	/*
 	 * All's gone well, so let's write the index file.
 	 */
+	error = write_index(idx);
 
+	/* Delete keep file */
 cleanup:
 	git_mwindow_free_all(mwf);
 
@@ -211,8 +373,13 @@ cleanup:
 
 void git_indexer_free(git_indexer *idx)
 {
+	unsigned int i;
+	struct entry *e;
+
 	p_close(idx->pack->mwf.fd);
-	free(idx->objects);
+	git_vector_foreach(&idx->objects, i, e)
+		free(e);
+	git_vector_free(&idx->objects);
 	free(idx->pack);
 	free(idx);
 }