Implement cooperative caching When indexing a file with ref deltas, a temporary cache for the offsets has to be built, as we don't have an index file yet. If the user takes the responsiblity for filling the cache, the packing code will look there first when it finds a ref delta. Signed-off-by: Carlos Martín Nieto <carlos@cmartin.tk>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
diff --git a/src/indexer.c b/src/indexer.c
index b874a35..0ab54f7 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -79,7 +79,7 @@ static int parse_header(git_indexer *idx)
return GIT_SUCCESS;
}
-int objects_cmp(const void *a, const void *b)
+static int objects_cmp(const void *a, const void *b)
{
const struct entry *entrya = a;
const struct entry *entryb = b;
@@ -87,6 +87,15 @@ int objects_cmp(const void *a, const void *b)
return git_oid_cmp(&entrya->oid, &entryb->oid);
}
+static int cache_cmp(const void *a, const void *b)
+{
+ const struct git_pack_entry *ea = a;
+ const struct git_pack_entry *eb = b;
+
+ return git_oid_cmp(&ea->sha1, &eb->sha1);
+}
+
+
int git_indexer_new(git_indexer **out, const char *packname)
{
git_indexer *idx;
@@ -139,10 +148,14 @@ int git_indexer_new(git_indexer **out, const char *packname)
idx->nr_objects = ntohl(idx->hdr.hdr_entries);
+ error = git_vector_init(&idx->pack->cache, idx->nr_objects, cache_cmp);
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+
+ idx->pack->has_cache = 1;
error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp);
- if (error < GIT_SUCCESS) {
+ if (error < GIT_SUCCESS)
goto cleanup;
- }
*out = idx;
@@ -250,6 +263,7 @@ int git_indexer_write(git_indexer *idx)
/* Write out the packfile trailer */
packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
+ git_mwindow_close(&w);
if (packfile_hash == NULL) {
error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash");
goto cleanup;
@@ -276,6 +290,7 @@ int git_indexer_write(git_indexer *idx)
error = git_filebuf_commit_at(&idx->file, filename);
cleanup:
+ git_mwindow_free_all(&idx->pack->mwf);
if (error < GIT_SUCCESS)
git_filebuf_cleanup(&idx->file);
@@ -303,6 +318,7 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
while (processed < idx->nr_objects) {
git_rawobj obj;
git_oid oid;
+ struct git_pack_entry *pentry;
git_mwindow *w = NULL;
char hdr[512] = {0}; /* FIXME: How long should this be? */
int i, hdr_len;
@@ -326,12 +342,24 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
goto cleanup;
}
+ /* FIXME: Parse the object instead of hashing it */
error = git_odb__hash_obj(&oid, hdr, sizeof(hdr), &hdr_len, &obj);
if (error < GIT_SUCCESS) {
error = git__rethrow(error, "Failed to hash object");
goto cleanup;
}
+ pentry = git__malloc(sizeof(struct git_pack_entry));
+ if (pentry == NULL) {
+ error = GIT_ENOMEM;
+ goto cleanup;
+ }
+ git_oid_cpy(&pentry->sha1, &oid);
+ pentry->offset = entry_start;
+ error = git_vector_insert(&idx->pack->cache, pentry);
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+
git_oid_cpy(&entry->oid, &oid);
entry->crc = crc32(0L, Z_NULL, 0);
@@ -371,11 +399,15 @@ void git_indexer_free(git_indexer *idx)
{
unsigned int i;
struct entry *e;
+ struct git_pack_entry *pe;
p_close(idx->pack->mwf.fd);
git_vector_foreach(&idx->objects, i, e)
free(e);
git_vector_free(&idx->objects);
+ git_vector_foreach(&idx->pack->cache, i, pe)
+ free(pe);
+ git_vector_free(&idx->pack->cache);
free(idx->pack);
free(idx);
}
diff --git a/src/pack.c b/src/pack.c
index f0ebf9d..4b43e7c 100644
--- a/src/pack.c
+++ b/src/pack.c
@@ -473,6 +473,18 @@ off_t get_delta_base(
return 0; /* out of bound */
*curpos += used;
} else if (type == GIT_OBJ_REF_DELTA) {
+ /* If we have the cooperative cache, search in it first */
+ if (p->has_cache) {
+ int pos;
+ struct git_pack_entry key;
+
+ git_oid_fromraw(&key.sha1, base_info);
+ pos = git_vector_bsearch(&p->cache, &key);
+ if (pos >= 0) {
+ *curpos += 20;
+ return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset;
+ }
+ }
/* The base entry _must_ be in the same pack */
if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < GIT_SUCCESS)
return git__rethrow(GIT_EPACKCORRUPTED, "Base entry delta is not in the same pack");
diff --git a/src/pack.h b/src/pack.h
index a7112a6..164086f 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -77,8 +77,9 @@ struct git_pack_file {
int index_version;
git_time_t mtime;
- unsigned pack_local:1, pack_keep:1;
+ unsigned pack_local:1, pack_keep:1, has_cache:1;
git_oid sha1;
+ git_vector cache;
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[GIT_FLEX_ARRAY]; /* more */