Commit c1af5a3935025f486156cdfe3b006700e73f0a49

Carlos Martín Nieto 2011-08-06T00:35:20

Implement cooperative caching When indexing a file with ref deltas, a temporary cache for the offsets has to be built, as we don't have an index file yet. If the user takes the responsiblity for filling the cache, the packing code will look there first when it finds a ref delta. Signed-off-by: Carlos Martín Nieto <carlos@cmartin.tk>

diff --git a/src/indexer.c b/src/indexer.c
index b874a35..0ab54f7 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -79,7 +79,7 @@ static int parse_header(git_indexer *idx)
 	return GIT_SUCCESS;
 }
 
-int objects_cmp(const void *a, const void *b)
+static int objects_cmp(const void *a, const void *b)
 {
 	const struct entry *entrya = a;
 	const struct entry *entryb = b;
@@ -87,6 +87,15 @@ int objects_cmp(const void *a, const void *b)
 	return git_oid_cmp(&entrya->oid, &entryb->oid);
 }
 
+static int cache_cmp(const void *a, const void *b)
+{
+	const struct git_pack_entry *ea = a;
+	const struct git_pack_entry *eb = b;
+
+	return git_oid_cmp(&ea->sha1, &eb->sha1);
+}
+
+
 int git_indexer_new(git_indexer **out, const char *packname)
 {
 	git_indexer *idx;
@@ -139,10 +148,14 @@ int git_indexer_new(git_indexer **out, const char *packname)
 
 	idx->nr_objects = ntohl(idx->hdr.hdr_entries);
 
+	error = git_vector_init(&idx->pack->cache, idx->nr_objects, cache_cmp);
+	if (error < GIT_SUCCESS)
+		goto cleanup;
+
+	idx->pack->has_cache = 1;
 	error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp);
-	if (error < GIT_SUCCESS) {
+	if (error < GIT_SUCCESS)
 		goto cleanup;
-	}
 
 	*out = idx;
 
@@ -250,6 +263,7 @@ int git_indexer_write(git_indexer *idx)
 	/* Write out the packfile trailer */
 
 	packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
+	git_mwindow_close(&w);
 	if (packfile_hash == NULL) {
 		error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash");
 		goto cleanup;
@@ -276,6 +290,7 @@ int git_indexer_write(git_indexer *idx)
 	error = git_filebuf_commit_at(&idx->file, filename);
 
 cleanup:
+	git_mwindow_free_all(&idx->pack->mwf);
 	if (error < GIT_SUCCESS)
 		git_filebuf_cleanup(&idx->file);
 
@@ -303,6 +318,7 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
 	while (processed < idx->nr_objects) {
 		git_rawobj obj;
 		git_oid oid;
+		struct git_pack_entry *pentry;
 		git_mwindow *w = NULL;
 		char hdr[512] = {0}; /* FIXME: How long should this be? */
 		int i, hdr_len;
@@ -326,12 +342,24 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
 			goto cleanup;
 		}
 
+		/* FIXME: Parse the object instead of hashing it */
 		error = git_odb__hash_obj(&oid, hdr, sizeof(hdr), &hdr_len, &obj);
 		if (error < GIT_SUCCESS) {
 			error = git__rethrow(error, "Failed to hash object");
 			goto cleanup;
 		}
 
+		pentry = git__malloc(sizeof(struct git_pack_entry));
+		if (pentry == NULL) {
+			error = GIT_ENOMEM;
+			goto cleanup;
+		}
+		git_oid_cpy(&pentry->sha1, &oid);
+		pentry->offset = entry_start;
+		error = git_vector_insert(&idx->pack->cache, pentry);
+		if (error < GIT_SUCCESS)
+			goto cleanup;
+
 		git_oid_cpy(&entry->oid, &oid);
 		entry->crc = crc32(0L, Z_NULL, 0);
 
@@ -371,11 +399,15 @@ void git_indexer_free(git_indexer *idx)
 {
 	unsigned int i;
 	struct entry *e;
+	struct git_pack_entry *pe;
 
 	p_close(idx->pack->mwf.fd);
 	git_vector_foreach(&idx->objects, i, e)
 		free(e);
 	git_vector_free(&idx->objects);
+	git_vector_foreach(&idx->pack->cache, i, pe)
+		free(pe);
+	git_vector_free(&idx->pack->cache);
 	free(idx->pack);
 	free(idx);
 }
diff --git a/src/pack.c b/src/pack.c
index f0ebf9d..4b43e7c 100644
--- a/src/pack.c
+++ b/src/pack.c
@@ -473,6 +473,18 @@ off_t get_delta_base(
 			return 0;  /* out of bound */
 		*curpos += used;
 	} else if (type == GIT_OBJ_REF_DELTA) {
+		/* If we have the cooperative cache, search in it first */
+		if (p->has_cache) {
+			int pos;
+			struct git_pack_entry key;
+
+			git_oid_fromraw(&key.sha1, base_info);
+			pos = git_vector_bsearch(&p->cache, &key);
+			if (pos >= 0) {
+				*curpos += 20;
+				return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset;
+			}
+		}
 		/* The base entry _must_ be in the same pack */
 		if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < GIT_SUCCESS)
 			return git__rethrow(GIT_EPACKCORRUPTED, "Base entry delta is not in the same pack");
diff --git a/src/pack.h b/src/pack.h
index a7112a6..164086f 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -77,8 +77,9 @@ struct git_pack_file {
 
 	int index_version;
 	git_time_t mtime;
-	unsigned pack_local:1, pack_keep:1;
+	unsigned pack_local:1, pack_keep:1, has_cache:1;
 	git_oid sha1;
+	git_vector cache;
 
 	/* something like ".git/objects/pack/xxxxx.pack" */
 	char pack_name[GIT_FLEX_ARRAY]; /* more */