indexer: make use of streaming also for deltas Up to now, deltas needed to be enterily in the packfile, and we tried to decompress then in their entirety over and over again. Adjust the logic so we read them as they come, just as we do for full objects. This also allows us to simplify the logic and have less nested code. The delta resolving phase still needs to decompress the whole object into memory, as there is not yet any streaming delta-apply support, but it helps in speeding up the downloading process and reduces the amount of memory allocations we need to do.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
diff --git a/src/indexer.c b/src/indexer.c
index c331a44..7266676 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -39,7 +39,8 @@ struct git_indexer {
struct git_indexer_stream {
unsigned int parsed_header :1,
opened_pack :1,
- have_stream :1;
+ have_stream :1,
+ have_delta :1;
struct git_pack_file *pack;
git_filebuf pack_file;
git_filebuf index_file;
@@ -180,39 +181,13 @@ cleanup:
}
/* Try to store the delta so we can try to resolve it later */
-static int store_delta(git_indexer_stream *idx, git_off_t entry_start, size_t entry_size, git_otype type)
+static int store_delta(git_indexer_stream *idx)
{
- git_mwindow *w = NULL;
struct delta_info *delta;
- git_rawobj obj;
- int error;
-
- assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
-
- if (type == GIT_OBJ_REF_DELTA) {
- idx->off += GIT_OID_RAWSZ;
- } else {
- git_off_t base_off;
-
- base_off = get_delta_base(idx->pack, &w, &idx->off, type, entry_start);
- git_mwindow_close(&w);
- if (base_off < 0)
- return (int)base_off;
- }
-
- error = packfile_unpack_compressed(&obj, idx->pack, &w, &idx->off, entry_size, type);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return GIT_EBUFS;
- } else if (error < 0){
- return -1;
- }
delta = git__calloc(1, sizeof(struct delta_info));
GITERR_CHECK_ALLOC(delta);
- delta->delta_off = entry_start;
-
- git__free(obj.data);
+ delta->delta_off = idx->entry_start;
if (git_vector_insert(&idx->deltas, delta) < 0)
return -1;
@@ -249,7 +224,44 @@ static int hash_object_stream(git_hash_ctx *ctx, git_packfile_stream *stream)
return 0;
}
-static int store_cache(git_indexer_stream *idx, git_hash_ctx *ctx, git_off_t entry_start)
+/* In order to create the packfile stream, we need to skip over the delta base description */
+static int advance_delta_offset(git_indexer_stream *idx, git_otype type)
+{
+ git_mwindow *w = NULL;
+
+ assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
+
+ if (type == GIT_OBJ_REF_DELTA) {
+ idx->off += GIT_OID_RAWSZ;
+ } else {
+ git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
+ git_mwindow_close(&w);
+ if (base_off < 0)
+ return (int)base_off;
+ }
+
+ return 0;
+}
+
+/* Read from the stream and discard any output */
+static int read_object_stream(git_packfile_stream *stream)
+{
+ char buffer[4*1024];
+ ssize_t read;
+
+ assert(stream);
+
+ do {
+ read = git_packfile_stream_read(stream, buffer, sizeof(buffer));
+ } while (read > 0);
+
+ if (read < 0)
+ return (int)read;
+
+ return 0;
+}
+
+static int store_object(git_indexer_stream *idx)
{
int i;
git_oid oid;
@@ -258,8 +270,10 @@ static int store_cache(git_indexer_stream *idx, git_hash_ctx *ctx, git_off_t ent
struct entry *entry;
git_off_t entry_size;
git_mwindow *w = NULL;
- git_mwindow_file *mwf = &idx->pack->mwf;
struct git_pack_entry *pentry;
+ git_hash_ctx *ctx = &idx->hash_ctx;
+ git_mwindow_file *mwf = &idx->pack->mwf;
+ git_off_t entry_start = idx->entry_start;
entry = git__calloc(1, sizeof(*entry));
GITERR_CHECK_ALLOC(entry);
@@ -278,8 +292,10 @@ static int store_cache(git_indexer_stream *idx, git_hash_ctx *ctx, git_off_t ent
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
- if (git_vector_insert(&idx->pack->cache, pentry) < 0)
+ if (git_vector_insert(&idx->pack->cache, pentry) < 0) {
+ git__free(pentry);
goto on_error;
+ }
git_oid_cpy(&entry->oid, &oid);
entry->crc = crc32(0L, Z_NULL, 0);
@@ -302,7 +318,6 @@ static int store_cache(git_indexer_stream *idx, git_hash_ctx *ctx, git_off_t ent
return 0;
on_error:
- git__free(pentry);
git__free(entry);
return -1;
@@ -461,44 +476,59 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
git_mwindow_close(&w);
idx->entry_start = entry_start;
+ git_hash_ctx_init(&idx->hash_ctx);
if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
- error = store_delta(idx, entry_start, entry_size, type);
+ error = advance_delta_offset(idx, type);
if (error == GIT_EBUFS) {
idx->off = entry_start;
return 0;
}
if (error < 0)
- return error;
+ return -1;
- stats->received_objects++;
- do_progress_callback(idx, stats);
- continue;
+ idx->have_delta = 1;
+ } else {
+ idx->have_delta = 0;
+ hash_header(&idx->hash_ctx, entry_size, type);
}
- /* If we got this far, we create the stream for our object */
idx->have_stream = 1;
- git_hash_ctx_init(&idx->hash_ctx);
- hash_header(&idx->hash_ctx, entry_size, type);
- idx->entry_start = entry_start;
if (git_packfile_stream_open(stream, idx->pack, idx->off) < 0)
goto on_error;
+
}
- error = hash_object_stream(&idx->hash_ctx, stream);
- idx->off = idx->stream.curpos;
+ if (idx->have_delta) {
+ error = read_object_stream(stream);
+ } else {
+ error = hash_object_stream(&idx->hash_ctx, stream);
+ }
+
+ idx->off = stream->curpos;
if (error == GIT_EBUFS)
return 0;
+
+ /* We want to free the stream reasorces no matter what here */
+ idx->have_stream = 0;
+ git_packfile_stream_free(stream);
+
if (error < 0)
goto on_error;
- git_packfile_stream_free(&idx->stream);
- if (store_cache(idx, &idx->hash_ctx, idx->entry_start) < 0)
+ if (idx->have_delta) {
+ error = store_delta(idx);
+ } else {
+ error = store_object(idx);
+ }
+
+ if (error < 0)
goto on_error;
- stats->indexed_objects = (unsigned int)++processed;
+ if (!idx->have_delta) {
+ stats->indexed_objects = (unsigned int)++processed;
+ }
stats->received_objects++;
- idx->have_stream = 0;
do_progress_callback(idx, stats);
}
@@ -506,7 +536,6 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
return 0;
on_error:
- git_packfile_stream_free(&idx->stream);
git_mwindow_free_all(mwf);
return -1;
}