Commit c1b370e9313a13df350974e2237997cc651a5d67

Edward Thomson 2016-08-17T09:24:44

Merge pull request #3837 from novalis/dturner/indexv4 Support index v4

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 92bc0c1..e4fd68d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,8 @@ v0.24 + 1
 * Do not fail when deleting remotes in the presence of broken
   global configs which contain branches.
 
+* Support for reading and writing git index v4 files
+
 ### API additions
 
 * You can now get the user-agent used by libgit2 using the
@@ -49,6 +51,9 @@ v0.24 + 1
 * `git_diff_from_buffer` can create a `git_diff` object from the contents
   of a git-style patch file.
 
+* `git_index_version()` and `git_index_set_version()` to get and set
+  the index version
+
 ### API removals
 
 * `git_blob_create_fromchunks()` has been removed in favour of
diff --git a/include/git2/index.h b/include/git2/index.h
index 466765b..e58b328 100644
--- a/include/git2/index.h
+++ b/include/git2/index.h
@@ -252,6 +252,31 @@ GIT_EXTERN(int) git_index_caps(const git_index *index);
 GIT_EXTERN(int) git_index_set_caps(git_index *index, int caps);
 
 /**
+ * Get index on-disk version.
+ *
+ * Valid return values are 2, 3, or 4.  If 3 is returned, an index
+ * with version 2 may be written instead, if the extension data in
+ * version 3 is not necessary.
+ *
+ * @param index An existing index object
+ * @return the index version
+ */
+GIT_EXTERN(unsigned int) git_index_version(git_index *index);
+
+/**
+ * Set index on-disk version.
+ *
+ * Valid values are 2, 3, or 4.  If 2 is given, git_index_write may
+ * write an index with version 3 instead, if necessary to accurately
+ * represent the index.
+ *
+ * @param index An existing index object
+ * @param version The new version number
+ * @return 0 on success, -1 on failure
+ */
+GIT_EXTERN(int) git_index_set_version(git_index *index, unsigned int version);
+
+/**
  * Update the contents of an existing index object in memory by reading
  * from the hard disk.
  *
diff --git a/src/index.c b/src/index.c
index 9908ba6..bc15959 100644
--- a/src/index.c
+++ b/src/index.c
@@ -19,6 +19,7 @@
 #include "blob.h"
 #include "idxmap.h"
 #include "diff.h"
+#include "varint.h"
 
 #include "git2/odb.h"
 #include "git2/oid.h"
@@ -65,8 +66,11 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr
 static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ;
 static const size_t INDEX_HEADER_SIZE = 12;
 
-static const unsigned int INDEX_VERSION_NUMBER = 2;
+static const unsigned int INDEX_VERSION_NUMBER_DEFAULT = 2;
+static const unsigned int INDEX_VERSION_NUMBER_LB = 2;
 static const unsigned int INDEX_VERSION_NUMBER_EXT = 3;
+static const unsigned int INDEX_VERSION_NUMBER_COMP = 4;
+static const unsigned int INDEX_VERSION_NUMBER_UB = 4;
 
 static const unsigned int INDEX_HEADER_SIG = 0x44495243;
 static const char INDEX_EXT_TREECACHE_SIG[] = {'T', 'R', 'E', 'E'};
@@ -434,6 +438,7 @@ int git_index_open(git_index **index_out, const char *index_path)
 	index->entries_search = git_index_entry_srch;
 	index->entries_search_path = index_entry_srch_path;
 	index->reuc_search = reuc_srch;
+	index->version = INDEX_VERSION_NUMBER_DEFAULT;
 
 	if (index_path != NULL && (error = git_index_read(index, true)) < 0)
 		goto fail;
@@ -747,6 +752,28 @@ done:
 	return 0;
 }
 
+unsigned git_index_version(git_index *index)
+{
+	assert(index);
+
+	return index->version;
+}
+
+int git_index_set_version(git_index *index, unsigned int version)
+{
+	assert(index);
+
+	if (version < INDEX_VERSION_NUMBER_LB ||
+	    version > INDEX_VERSION_NUMBER_UB) {
+		giterr_set(GITERR_INDEX, "Invalid version number");
+		return -1;
+	}
+
+	index->version = version;
+
+	return 0;
+}
+
 int git_index_write(git_index *index)
 {
 	git_indexwriter writer = GIT_INDEXWRITER_INIT;
@@ -2262,12 +2289,15 @@ static size_t read_entry(
 	git_index_entry **out,
 	git_index *index,
 	const void *buffer,
-	size_t buffer_size)
+	size_t buffer_size,
+	const char **last)
 {
 	size_t path_length, entry_size;
 	const char *path_ptr;
 	struct entry_short source;
 	git_index_entry entry = {{0}};
+	bool compressed = index->version >= INDEX_VERSION_NUMBER_COMP;
+	char *tmp_path = NULL;
 
 	if (INDEX_FOOTER_SIZE + minimal_entry_size > buffer_size)
 		return 0;
@@ -2302,33 +2332,56 @@ static size_t read_entry(
 	} else
 		path_ptr = (const char *) buffer + offsetof(struct entry_short, path);
 
-	path_length = entry.flags & GIT_IDXENTRY_NAMEMASK;
-
-	/* if this is a very long string, we must find its
-	 * real length without overflowing */
-	if (path_length == 0xFFF) {
-		const char *path_end;
+	if (!compressed) {
+		path_length = entry.flags & GIT_IDXENTRY_NAMEMASK;
 
-		path_end = memchr(path_ptr, '\0', buffer_size);
-		if (path_end == NULL)
-			return 0;
+		/* if this is a very long string, we must find its
+		 * real length without overflowing */
+		if (path_length == 0xFFF) {
+			const char *path_end;
 
-		path_length = path_end - path_ptr;
-	}
+			path_end = memchr(path_ptr, '\0', buffer_size);
+			if (path_end == NULL)
+				return 0;
 
-	if (entry.flags & GIT_IDXENTRY_EXTENDED)
-		entry_size = long_entry_size(path_length);
-	else
-		entry_size = short_entry_size(path_length);
+			path_length = path_end - path_ptr;
+		}
 
-	if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
-		return 0;
+		if (entry.flags & GIT_IDXENTRY_EXTENDED)
+			entry_size = long_entry_size(path_length);
+		else
+			entry_size = short_entry_size(path_length);
 
-	entry.path = (char *)path_ptr;
+		if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
+			return 0;
 
-	if (index_entry_dup(out, index, &entry) < 0)
+		entry.path = (char *)path_ptr;
+	} else {
+		size_t varint_len;
+		size_t shared = git_decode_varint((const unsigned char *)path_ptr, 
+						  &varint_len);
+		size_t len = strlen(path_ptr + varint_len);
+		size_t last_len = strlen(*last);
+		size_t tmp_path_len;
+
+		if (varint_len == 0)
+			return index_error_invalid("incorrect prefix length");
+
+		GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1);
+		tmp_path = git__malloc(tmp_path_len);
+		GITERR_CHECK_ALLOC(tmp_path);
+		memcpy(tmp_path, last, last_len);
+		memcpy(tmp_path + last_len, path_ptr + varint_len, len);
+		entry_size = long_entry_size(shared + len);
+		entry.path = tmp_path;
+	}
+
+	if (index_entry_dup(out, index, &entry) < 0) {
+		git__free(tmp_path);
 		return 0;
+	}
 
+	git__free(tmp_path);
 	return entry_size;
 }
 
@@ -2341,8 +2394,8 @@ static int read_header(struct index_header *dest, const void *buffer)
 		return index_error_invalid("incorrect header signature");
 
 	dest->version = ntohl(source->version);
-	if (dest->version != INDEX_VERSION_NUMBER_EXT &&
-		dest->version != INDEX_VERSION_NUMBER)
+	if (dest->version < INDEX_VERSION_NUMBER_LB ||
+		dest->version > INDEX_VERSION_NUMBER_UB)
 		return index_error_invalid("incorrect header version");
 
 	dest->entry_count = ntohl(source->entry_count);
@@ -2395,6 +2448,8 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
 	unsigned int i;
 	struct index_header header = { 0 };
 	git_oid checksum_calculated, checksum_expected;
+	const char **last = NULL;
+	const char *empty = "";
 
 #define seek_forward(_increase) { \
 	if (_increase >= buffer_size) { \
@@ -2415,6 +2470,10 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
 	if ((error = read_header(&header, buffer)) < 0)
 		return error;
 
+	index->version = header.version;
+	if (index->version >= INDEX_VERSION_NUMBER_COMP)
+		last = &empty;
+
 	seek_forward(INDEX_HEADER_SIZE);
 
 	assert(!index->entries.length);
@@ -2427,7 +2486,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
 	/* Parse all the entries */
 	for (i = 0; i < header.entry_count && buffer_size > INDEX_FOOTER_SIZE; ++i) {
 		git_index_entry *entry;
-		size_t entry_size = read_entry(&entry, index, buffer, buffer_size);
+		size_t entry_size = read_entry(&entry, index, buffer, buffer_size, last);
 
 		/* 0 bytes read means an object corruption */
 		if (entry_size == 0) {
@@ -2518,15 +2577,31 @@ static bool is_index_extended(git_index *index)
 	return (extended > 0);
 }
 
-static int write_disk_entry(git_filebuf *file, git_index_entry *entry)
+static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last)
 {
 	void *mem = NULL;
 	struct entry_short *ondisk;
 	size_t path_len, disk_size;
 	char *path;
+	const char *path_start = entry->path;
+	size_t same_len = 0;
 
 	path_len = ((struct entry_internal *)entry)->pathlen;
 
+	if (last) {
+		const char *last_c = *last;
+
+		while (*path_start == *last_c) {
+			if (!*path_start || !*last_c)
+				break;
+			++path_start;
+			++last_c;
+			++same_len;
+		}
+		path_len -= same_len;
+		*last = entry->path;
+	}
+
 	if (entry->flags & GIT_IDXENTRY_EXTENDED)
 		disk_size = long_entry_size(path_len);
 	else
@@ -2574,7 +2649,12 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry)
 	else
 		path = ondisk->path;
 
-	memcpy(path, entry->path, path_len);
+	if (last) {
+		path += git_encode_varint((unsigned char *) path,
+					  disk_size,
+					  path_len - same_len);
+	}
+	memcpy(path, path_start, path_len);
 
 	return 0;
 }
@@ -2585,6 +2665,8 @@ static int write_entries(git_index *index, git_filebuf *file)
 	size_t i;
 	git_vector case_sorted, *entries;
 	git_index_entry *entry;
+	const char **last = NULL;
+	const char *empty = "";
 
 	/* If index->entries is sorted case-insensitively, then we need
 	 * to re-sort it case-sensitively before writing */
@@ -2596,8 +2678,11 @@ static int write_entries(git_index *index, git_filebuf *file)
 		entries = &index->entries;
 	}
 
+	if (index->version >= INDEX_VERSION_NUMBER_COMP)
+		last = &empty;
+
 	git_vector_foreach(entries, i, entry)
-		if ((error = write_disk_entry(file, entry)) < 0)
+		if ((error = write_disk_entry(file, entry, last)) < 0)
 			break;
 
 	if (index->ignore_case)
@@ -2762,8 +2847,12 @@ static int write_index(git_oid *checksum, git_index *index, git_filebuf *file)
 
 	assert(index && file);
 
-	is_extended = is_index_extended(index);
-	index_version_number = is_extended ? INDEX_VERSION_NUMBER_EXT : INDEX_VERSION_NUMBER;
+	if (index->version <= INDEX_VERSION_NUMBER_EXT)  {
+		is_extended = is_index_extended(index);
+		index_version_number = is_extended ? INDEX_VERSION_NUMBER_EXT : INDEX_VERSION_NUMBER_LB;
+	} else {
+		index_version_number = index->version;
+	}
 
 	header.signature = htonl(INDEX_HEADER_SIG);
 	header.version = htonl(index_version_number);
diff --git a/src/index.h b/src/index.h
index 8b9b494..9918f14 100644
--- a/src/index.h
+++ b/src/index.h
@@ -46,6 +46,8 @@ struct git_index {
 	git_vector_cmp entries_search;
 	git_vector_cmp entries_search_path;
 	git_vector_cmp reuc_search;
+
+	unsigned int version;
 };
 
 struct git_index_conflict_iterator {
diff --git a/src/varint.c b/src/varint.c
new file mode 100644
index 0000000..2f86860
--- /dev/null
+++ b/src/varint.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "common.h"
+#include "varint.h"
+
+uintmax_t git_decode_varint(const unsigned char *bufp, size_t *varint_len)
+{
+	const unsigned char *buf = bufp;
+	unsigned char c = *buf++;
+	uintmax_t val = c & 127;
+	while (c & 128) {
+		val += 1;
+		if (!val || MSB(val, 7)) {
+			/* This is not a valid varint_len, so it signals
+			   the error */
+			*varint_len = 0;
+			return 0; /* overflow */
+		}
+		c = *buf++;
+		val = (val << 7) + (c & 127);
+	}
+	*varint_len = buf - bufp;
+	return val;
+}
+
+int git_encode_varint(unsigned char *buf, size_t bufsize, uintmax_t value)
+{
+	unsigned char varint[16];
+	unsigned pos = sizeof(varint) - 1;
+	varint[pos] = value & 127;
+	while (value >>= 7)
+		varint[--pos] = 128 | (--value & 127);
+	if (buf) {
+		if (bufsize < pos)
+			return -1;
+		memcpy(buf, varint + pos, sizeof(varint) - pos);
+	}
+	return sizeof(varint) - pos;
+}
diff --git a/src/varint.h b/src/varint.h
new file mode 100644
index 0000000..650ec7d
--- /dev/null
+++ b/src/varint.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_varint_h__
+#define INCLUDE_varint_h__
+
+#include <stdint.h>
+
+extern int git_encode_varint(unsigned char *, size_t, uintmax_t);
+extern uintmax_t git_decode_varint(const unsigned char *, size_t *);
+
+#endif
diff --git a/tests/core/encoding.c b/tests/core/encoding.c
new file mode 100644
index 0000000..7d91720
--- /dev/null
+++ b/tests/core/encoding.c
@@ -0,0 +1,39 @@
+#include "clar_libgit2.h"
+#include "varint.h"
+
+void test_core_encoding__decode(void)
+{
+	const unsigned char *buf = (unsigned char *)"AB";
+	size_t size;
+
+	cl_assert(git_decode_varint(buf, &size) == 65);
+	cl_assert(size == 1);
+
+	buf = (unsigned char *)"\xfe\xdc\xbaXY";
+	cl_assert(git_decode_varint(buf, &size) == 267869656);
+	cl_assert(size == 4);
+
+	buf = (unsigned char *)"\xaa\xaa\xfe\xdc\xbaXY";
+	cl_assert(git_decode_varint(buf, &size) == 1489279344088ULL);
+	cl_assert(size == 6);
+
+	buf = (unsigned char *)"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xfe\xdc\xbaXY";
+	cl_assert(git_decode_varint(buf, &size) == 0);
+	cl_assert(size == 0);
+
+}
+
+void test_core_encoding__encode(void)
+{
+	unsigned char buf[100];
+	cl_assert(git_encode_varint(buf, 100, 65) == 1);
+	cl_assert(buf[0] == 'A');
+
+	cl_assert(git_encode_varint(buf, 100, 267869656) == 4);
+	cl_assert(!memcmp(buf, "\xfe\xdc\xbaX", 4));
+
+	cl_assert(git_encode_varint(buf, 100, 1489279344088ULL) == 6);
+	cl_assert(!memcmp(buf, "\xaa\xaa\xfe\xdc\xbaX", 6));
+
+	cl_assert(git_encode_varint(buf, 1, 1489279344088ULL) == -1);
+}
diff --git a/tests/index/version.c b/tests/index/version.c
new file mode 100644
index 0000000..3fd240d
--- /dev/null
+++ b/tests/index/version.c
@@ -0,0 +1,41 @@
+#include "clar_libgit2.h"
+#include "index.h"
+
+static git_repository *g_repo = NULL;
+
+void test_index_version__can_write_v4(void)
+{
+	git_index *index;
+	const git_index_entry *entry;
+
+	g_repo = cl_git_sandbox_init("filemodes");
+	cl_git_pass(git_repository_index(&index, g_repo));
+
+	cl_assert(index->on_disk);
+	cl_assert(git_index_version(index) == 2);
+
+	cl_assert(git_index_entrycount(index) == 6);
+
+	cl_git_pass(git_index_set_version(index, 4));
+
+	cl_git_pass(git_index_write(index));
+	git_index_free(index);
+
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_assert(git_index_version(index) == 4);
+
+	entry = git_index_get_bypath(index, "exec_off", 0);
+	cl_assert(entry);
+	entry = git_index_get_bypath(index, "exec_off2on_staged", 0);
+	cl_assert(entry);
+	entry = git_index_get_bypath(index, "exec_on", 0);
+	cl_assert(entry);
+
+	git_index_free(index);
+}
+
+void test_index_version__cleanup(void)
+{
+        cl_git_sandbox_cleanup();
+        g_repo = NULL;
+}