Commit 447ae791e564ed887fb4abe752f38a1a9ada1267

Carlos Martín Nieto 2013-03-03T15:19:21

indexer: kill git_indexer This was the first implementation and its goal was simply to have something that worked. It is slow and now it's just taking up space. Remove it and switch the one known usage to use the streaming indexer.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
diff --git a/include/git2/indexer.h b/include/git2/indexer.h
index 151f5b4..dfe6ae5 100644
--- a/include/git2/indexer.h
+++ b/include/git2/indexer.h
@@ -33,7 +33,6 @@ typedef struct git_transfer_progress {
  */
 typedef int (*git_transfer_progress_callback)(const git_transfer_progress *stats, void *payload);
 
-typedef struct git_indexer git_indexer;
 typedef struct git_indexer_stream git_indexer_stream;
 
 /**
@@ -86,53 +85,6 @@ GIT_EXTERN(const git_oid *) git_indexer_stream_hash(const git_indexer_stream *id
  */
 GIT_EXTERN(void) git_indexer_stream_free(git_indexer_stream *idx);
 
-/**
- * Create a new indexer instance
- *
- * @param out where to store the indexer instance
- * @param packname the absolute filename of the packfile to index
- */
-GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname);
-
-/**
- * Iterate over the objects in the packfile and extract the information
- *
- * Indexing a packfile can be very expensive so this function is
- * expected to be run in a worker thread and the stats used to provide
- * feedback the user.
- *
- * @param idx the indexer instance
- * @param stats storage for the running state
- */
-GIT_EXTERN(int) git_indexer_run(git_indexer *idx, git_transfer_progress *stats);
-
-/**
- * Write the index file to disk.
- *
- * The file will be stored as pack-$hash.idx in the same directory as
- * the packfile.
- *
- * @param idx the indexer instance
- */
-GIT_EXTERN(int) git_indexer_write(git_indexer *idx);
-
-/**
- * Get the packfile's hash
- *
- * A packfile's name is derived from the sorted hashing of all object
- * names. This is only correct after the index has been written to disk.
- *
- * @param idx the indexer instance
- */
-GIT_EXTERN(const git_oid *) git_indexer_hash(const git_indexer *idx);
-
-/**
- * Free the indexer and its resources
- *
- * @param idx the indexer to free
- */
-GIT_EXTERN(void) git_indexer_free(git_indexer *idx);
-
 GIT_END_DECL
 
 #endif
diff --git a/src/indexer.c b/src/indexer.c
index c4648e4..1600d1c 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -27,15 +27,6 @@ struct entry {
 	uint64_t offset_long;
 };
 
-struct git_indexer {
-	struct git_pack_file *pack;
-	size_t nr_objects;
-	git_vector objects;
-	git_filebuf file;
-	unsigned int fanout[256];
-	git_oid hash;
-};
-
 struct git_indexer_stream {
 	unsigned int parsed_header :1,
 		opened_pack :1,
@@ -61,11 +52,6 @@ struct delta_info {
 	git_off_t delta_off;
 };
 
-const git_oid *git_indexer_hash(const git_indexer *idx)
-{
-	return &idx->hash;
-}
-
 const git_oid *git_indexer_stream_hash(const git_indexer_stream *idx)
 {
 	return &idx->hash;
@@ -451,7 +437,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
 			return -1;
 
 		stats->received_objects = 0;
-		stats->indexed_objects = 0;
+		processed = stats->indexed_objects = 0;
 		stats->total_objects = (unsigned int)idx->nr_objects;
 		do_progress_callback(idx, stats);
 	}
@@ -755,315 +741,3 @@ void git_indexer_stream_free(git_indexer_stream *idx)
 	git_filebuf_cleanup(&idx->pack_file);
 	git__free(idx);
 }
-
-int git_indexer_new(git_indexer **out, const char *packname)
-{
-	git_indexer *idx;
-	struct git_pack_header hdr;
-	int error;
-
-	assert(out && packname);
-
-	idx = git__calloc(1, sizeof(git_indexer));
-	GITERR_CHECK_ALLOC(idx);
-
-	open_pack(&idx->pack, packname);
-
-	if ((error = parse_header(&hdr, idx->pack)) < 0)
-		goto cleanup;
-
-	idx->nr_objects = ntohl(hdr.hdr_entries);
-
-	/* for now, limit to 2^32 objects */
-	assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
-
-	error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp);
-	if (error < 0)
-		goto cleanup;
-
-	idx->pack->has_cache = 1;
-	error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp);
-	if (error < 0)
-		goto cleanup;
-
-	*out = idx;
-
-	return 0;
-
-cleanup:
-	git_indexer_free(idx);
-
-	return -1;
-}
-
-static int index_path(git_buf *path, git_indexer *idx)
-{
-	const char prefix[] = "pack-", suffix[] = ".idx";
-	size_t slash = (size_t)path->size;
-
-	/* search backwards for '/' */
-	while (slash > 0 && path->ptr[slash - 1] != '/')
-		slash--;
-
-	if (git_buf_grow(path, slash + 1 + strlen(prefix) +
-					 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
-		return -1;
-
-	git_buf_truncate(path, slash);
-	git_buf_puts(path, prefix);
-	git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
-	path->size += GIT_OID_HEXSZ;
-	git_buf_puts(path, suffix);
-
-	return git_buf_oom(path) ? -1 : 0;
-}
-
-int git_indexer_write(git_indexer *idx)
-{
-	git_mwindow *w = NULL;
-	int error;
-	unsigned int i, long_offsets = 0, left;
-	struct git_pack_idx_header hdr;
-	git_buf filename = GIT_BUF_INIT;
-	struct entry *entry;
-	void *packfile_hash;
-	git_oid file_hash;
-	git_hash_ctx ctx;
-
-	if (git_hash_ctx_init(&ctx) < 0)
-		return -1;
-
-	git_vector_sort(&idx->objects);
-
-	git_buf_sets(&filename, idx->pack->pack_name);
-	git_buf_truncate(&filename, filename.size - strlen("pack"));
-	git_buf_puts(&filename, "idx");
-	if (git_buf_oom(&filename))
-		return -1;
-
-	error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS);
-	if (error < 0)
-		goto cleanup;
-
-	/* Write out the header */
-	hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
-	hdr.idx_version = htonl(2);
-	error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
-	if (error < 0)
-		goto cleanup;
-
-	/* Write out the fanout table */
-	for (i = 0; i < 256; ++i) {
-		uint32_t n = htonl(idx->fanout[i]);
-		error = git_filebuf_write(&idx->file, &n, sizeof(n));
-		if (error < 0)
-			goto cleanup;
-	}
-
-	/* Write out the object names (SHA-1 hashes) */
-	git_vector_foreach(&idx->objects, i, entry) {
-		if ((error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid))) < 0 ||
-			(error = git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ)) < 0)
-			goto cleanup;
-	}
-
-	if ((error = git_hash_final(&idx->hash, &ctx)) < 0)
-		goto cleanup;
-
-	/* Write out the CRC32 values */
-	git_vector_foreach(&idx->objects, i, entry) {
-		error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
-		if (error < 0)
-			goto cleanup;
-	}
-
-	/* Write out the offsets */
-	git_vector_foreach(&idx->objects, i, entry) {
-		uint32_t n;
-
-		if (entry->offset == UINT32_MAX)
-			n = htonl(0x80000000 | long_offsets++);
-		else
-			n = htonl(entry->offset);
-
-		error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
-		if (error < 0)
-			goto cleanup;
-	}
-
-	/* Write out the long offsets */
-	git_vector_foreach(&idx->objects, i, entry) {
-		uint32_t split[2];
-
-		if (entry->offset != UINT32_MAX)
-			continue;
-
-		split[0] = htonl(entry->offset_long >> 32);
-		split[1] = htonl(entry->offset_long & 0xffffffff);
-
-		error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
-		if (error < 0)
-			goto cleanup;
-	}
-
-	/* Write out the packfile trailer */
-
-	packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
-	git_mwindow_close(&w);
-	if (packfile_hash == NULL) {
-		error = -1;
-		goto cleanup;
-	}
-
-	memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
-
-	git_mwindow_close(&w);
-
-	error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
-	if (error < 0)
-		goto cleanup;
-
-	/* Write out the index sha */
-	error = git_filebuf_hash(&file_hash, &idx->file);
-	if (error < 0)
-		goto cleanup;
-
-	error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
-	if (error < 0)
-		goto cleanup;
-
-	/* Figure out what the final name should be */
-	error = index_path(&filename, idx);
-	if (error < 0)
-		goto cleanup;
-
-	/* Commit file */
-	error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE);
-
-cleanup:
-	git_mwindow_free_all(&idx->pack->mwf);
-	git_mwindow_file_deregister(&idx->pack->mwf);
-	if (error < 0)
-		git_filebuf_cleanup(&idx->file);
-	git_buf_free(&filename);
-	git_hash_ctx_cleanup(&ctx);
-
-	return error;
-}
-
-int git_indexer_run(git_indexer *idx, git_transfer_progress *stats)
-{
-	git_mwindow_file *mwf;
-	git_off_t off = sizeof(struct git_pack_header);
-	int error;
-	struct entry *entry;
-	unsigned int left, processed;
-
-	assert(idx && stats);
-
-	mwf = &idx->pack->mwf;
-	error = git_mwindow_file_register(mwf);
-	if (error < 0)
-		return error;
-
-	stats->total_objects = (unsigned int)idx->nr_objects;
-	stats->indexed_objects = processed = 0;
-
-	while (processed < idx->nr_objects) {
-		git_rawobj obj;
-		git_oid oid;
-		struct git_pack_entry *pentry;
-		git_mwindow *w = NULL;
-		int i;
-		git_off_t entry_start = off;
-		void *packed;
-		size_t entry_size;
-		char fmt[GIT_OID_HEXSZ] = {0};
-
-		entry = git__calloc(1, sizeof(*entry));
-		GITERR_CHECK_ALLOC(entry);
-
-		if (off > UINT31_MAX) {
-			entry->offset = UINT32_MAX;
-			entry->offset_long = off;
-		} else {
-			entry->offset = (uint32_t)off;
-		}
-
-		error = git_packfile_unpack(&obj, idx->pack, &off);
-		if (error < 0)
-			goto cleanup;
-
-		/* FIXME: Parse the object instead of hashing it */
-		error = git_odb__hashobj(&oid, &obj);
-		if (error < 0) {
-			giterr_set(GITERR_INDEXER, "Failed to hash object");
-			goto cleanup;
-		}
-
-		pentry = git__malloc(sizeof(struct git_pack_entry));
-		if (pentry == NULL) {
-			error = -1;
-			goto cleanup;
-		}
-
-		git_oid_cpy(&pentry->sha1, &oid);
-		pentry->offset = entry_start;
-		git_oid_fmt(fmt, &oid);
-		error = git_vector_insert(&idx->pack->cache, pentry);
-		if (error < 0)
-			goto cleanup;
-
-		git_oid_cpy(&entry->oid, &oid);
-		entry->crc = crc32(0L, Z_NULL, 0);
-
-		entry_size = (size_t)(off - entry_start);
-		packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
-		if (packed == NULL) {
-			error = -1;
-			goto cleanup;
-		}
-		entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
-		git_mwindow_close(&w);
-
-		/* Add the object to the list */
-		error = git_vector_insert(&idx->objects, entry);
-		if (error < 0)
-			goto cleanup;
-
-		for (i = oid.id[0]; i < 256; ++i) {
-			idx->fanout[i]++;
-		}
-
-		git__free(obj.data);
-
-		stats->indexed_objects = ++processed;
-	}
-
-cleanup:
-	git_mwindow_free_all(mwf);
-
-	return error;
-
-}
-
-void git_indexer_free(git_indexer *idx)
-{
-	unsigned int i;
-	struct entry *e;
-	struct git_pack_entry *pe;
-
-	if (idx == NULL)
-		return;
-
-	git_mwindow_file_deregister(&idx->pack->mwf);
-	git_vector_foreach(&idx->objects, i, e)
-		git__free(e);
-	git_vector_free(&idx->objects);
-	git_vector_foreach(&idx->pack->cache, i, pe)
-		git__free(pe);
-	git_vector_free(&idx->pack->cache);
-	git_packfile_free(idx->pack);
-	git__free(idx);
-}
-
diff --git a/tests-clar/pack/packbuilder.c b/tests-clar/pack/packbuilder.c
index 6dc1c76..764fba2 100644
--- a/tests-clar/pack/packbuilder.c
+++ b/tests-clar/pack/packbuilder.c
@@ -8,7 +8,7 @@
 static git_repository *_repo;
 static git_revwalk *_revwalker;
 static git_packbuilder *_packbuilder;
-static git_indexer *_indexer;
+static git_indexer_stream *_indexer;
 static git_vector _commits;
 static int _commits_is_initialized;
 
@@ -40,7 +40,7 @@ void test_pack_packbuilder__cleanup(void)
 	git_revwalk_free(_revwalker);
 	_revwalker = NULL;
 
-	git_indexer_free(_indexer);
+	git_indexer_stream_free(_indexer);
 	_indexer = NULL;
 
 	cl_git_sandbox_cleanup();
@@ -75,20 +75,29 @@ static void seed_packbuilder(void)
 	}
 }
 
+static int feed_indexer(void *ptr, size_t len, void *payload)
+{
+	git_transfer_progress *stats = (git_transfer_progress *)payload;
+
+	return git_indexer_stream_add(_indexer, ptr, len, stats);
+}
+
 void test_pack_packbuilder__create_pack(void)
 {
 	git_transfer_progress stats;
-	git_buf buf = GIT_BUF_INIT;
+	git_buf buf = GIT_BUF_INIT, path = GIT_BUF_INIT;
 	git_hash_ctx ctx;
 	git_oid hash;
 	char hex[41]; hex[40] = '\0';
 
 	seed_packbuilder();
-	cl_git_pass(git_packbuilder_write(_packbuilder, "testpack.pack"));
 
-	cl_git_pass(git_indexer_new(&_indexer, "testpack.pack"));
-	cl_git_pass(git_indexer_run(_indexer, &stats));
-	cl_git_pass(git_indexer_write(_indexer));
+	cl_git_pass(git_indexer_stream_new(&_indexer, ".", NULL, NULL));
+	cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats));
+	cl_git_pass(git_indexer_stream_finalize(_indexer, &stats));
+
+	git_oid_fmt(hex, git_indexer_stream_hash(_indexer));
+	git_buf_printf(&path, "pack-%s.pack", hex);
 
 	/*
 	 * By default, packfiles are created with only one thread.
@@ -104,13 +113,14 @@ void test_pack_packbuilder__create_pack(void)
 	 *
 	 */
 
-	cl_git_pass(git_futils_readbuffer(&buf, "testpack.pack"));
+	cl_git_pass(git_futils_readbuffer(&buf, git_buf_cstr(&path)));
 
 	cl_git_pass(git_hash_ctx_init(&ctx));
 	cl_git_pass(git_hash_update(&ctx, buf.ptr, buf.size));
 	cl_git_pass(git_hash_final(&hash, &ctx));
 	git_hash_ctx_cleanup(&ctx);
 
+	git_buf_free(&path);
 	git_buf_free(&buf);
 
 	git_oid_fmt(hex, &hash);