Commit 4cf9323be1ed856e8d8b3d5c41390ea89e0583d3

Vicent Martí 2013-09-17T18:49:47

Merge pull request #1860 from libgit2/cmn/indexer-hash indexer: check the packfile trailer

diff --git a/src/indexer.c b/src/indexer.c
index 09f9629..aaaf093 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -47,6 +47,11 @@ struct git_indexer_stream {
 	git_transfer_progress_callback progress_cb;
 	void *progress_payload;
 	char objbuf[8*1024];
+
+	/* Fields for calculating the packfile trailer (hash of everything before it) */
+	char inbuf[GIT_OID_RAWSZ];
+	int inbuf_len;
+	git_hash_ctx trailer;
 };
 
 struct delta_info {
@@ -121,6 +126,7 @@ int git_indexer_stream_new(
 	GITERR_CHECK_ALLOC(idx);
 	idx->progress_cb = progress_cb;
 	idx->progress_payload = progress_payload;
+	git_hash_ctx_init(&idx->trailer);
 
 	error = git_buf_joinpath(&path, prefix, suff);
 	if (error < 0)
@@ -322,7 +328,6 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent
 		entry->offset = (uint32_t)entry_start;
 	}
 
-	/* FIXME: Parse the object instead of hashing it */
 	if (git_odb__hashobj(&oid, obj) < 0) {
 		giterr_set(GITERR_INDEXER, "Failed to hash object");
 		goto on_error;
@@ -370,6 +375,43 @@ static int do_progress_callback(git_indexer_stream *idx, git_transfer_progress *
 	return idx->progress_cb(stats, idx->progress_payload);
 }
 
+/* Hash everything but the last 20B of input */
+static void hash_partially(git_indexer_stream *idx, const void *data, size_t size)
+{
+	int to_expell, to_keep;
+
+	if (size == 0)
+		return;
+
+	/* Easy case, dump the buffer and the data minus the last 20 bytes */
+	if (size >= 20) {
+		git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
+		git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);
+
+		data += size - GIT_OID_RAWSZ;
+		memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
+		idx->inbuf_len = GIT_OID_RAWSZ;
+		return;
+	}
+
+	/* We can just append */
+	if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
+		memcpy(idx->inbuf + idx->inbuf_len, data, size);
+		idx->inbuf_len += size;
+		return;
+	}
+
+	/* We need to partially drain the buffer and then append */
+	to_expell = abs(size - (GIT_OID_RAWSZ - idx->inbuf_len));
+	to_keep = abs(idx->inbuf_len - to_expell);
+
+	git_hash_update(&idx->trailer, idx->inbuf, to_expell);
+
+	memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
+	memcpy(idx->inbuf + to_keep, data, size);
+	idx->inbuf_len += size - to_expell;
+}
+
 int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats)
 {
 	int error = -1;
@@ -384,6 +426,8 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
 	if (git_filebuf_write(&idx->pack_file, data, size) < 0)
 		return -1;
 
+	hash_partially(idx, data, size);
+
 	/* Make sure we set the new size of the pack */
 	if (idx->opened_pack) {
 		idx->pack->mwf.size += size;
@@ -576,17 +620,33 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *
 	struct git_pack_idx_header hdr;
 	git_buf filename = GIT_BUF_INIT;
 	struct entry *entry;
-	void *packfile_hash;
-	git_oid file_hash;
+	git_oid trailer_hash, file_hash;
 	git_hash_ctx ctx;
 	git_filebuf index_file = {0};
+	void *packfile_trailer;
 
 	if (git_hash_ctx_init(&ctx) < 0)
 		return -1;
 
 	/* Test for this before resolve_deltas(), as it plays with idx->off */
-	if (idx->off < idx->pack->mwf.size - GIT_OID_RAWSZ) {
-		giterr_set(GITERR_INDEXER, "Indexing error: unexpected data at the end of the pack");
+	if (idx->off < idx->pack->mwf.size - 20) {
+		giterr_set(GITERR_INDEXER, "unexpected data at the end of the pack");
+		return -1;
+	}
+
+	packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
+	if (packfile_trailer == NULL) {
+		git_mwindow_close(&w);
+		goto on_error;
+	}
+
+	/* Compare the packfile trailer as it was sent to us and what we calculated */
+	git_oid_fromraw(&file_hash, packfile_trailer);
+	git_mwindow_close(&w);
+
+	git_hash_final(&trailer_hash, &idx->trailer);
+	if (git_oid_cmp(&file_hash, &trailer_hash)) {
+		giterr_set(GITERR_INDEXER, "packfile trailer mismatch");
 		return -1;
 	}
 
@@ -595,7 +655,7 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *
 			return -1;
 
 	if (stats->indexed_objects != stats->total_objects) {
-		giterr_set(GITERR_INDEXER, "Indexing error: early EOF");
+		giterr_set(GITERR_INDEXER, "early EOF");
 		return -1;
 	}
 
@@ -658,23 +718,15 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *
 		git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
 	}
 
-	/* Write out the packfile trailer */
-	packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
-	if (packfile_hash == NULL) {
-		git_mwindow_close(&w);
+	/* Write out the packfile trailer to the index */
+	if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
 		goto on_error;
-	}
-
-	memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
-	git_mwindow_close(&w);
-
-	git_filebuf_write(&index_file, &file_hash, sizeof(git_oid));
 
-	/* Write out the packfile trailer to the idx file as well */
-	if (git_filebuf_hash(&file_hash, &index_file) < 0)
+	/* Write out the hash of the idx */
+	if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
 		goto on_error;
 
-	git_filebuf_write(&index_file, &file_hash, sizeof(git_oid));
+	git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
 
 	/* Figure out what the final name should be */
 	if (index_path_stream(&filename, idx, ".idx") < 0)