Commit bd670abd23944a20c6a84978ea590c8fd4258cb2

Edward Thomson 2015-06-23T23:30:58

Merge pull request #3226 from libgit2/cmn/racy-diff-again racy-git, the missing link

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
diff --git a/CHANGELOG.md b/CHANGELOG.md
index eb7ae84..1340f78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -69,6 +69,9 @@ support for HTTPS connections insead of OpenSSL.
   and `git_diff_buffers` now accept a new binary callback of type
   `git_diff_binary_cb` that includes the binary diff information.
 
+* The race condition mitigations described in `racy-git.txt` have been
+  implemented.
+
 ### API additions
 
 * The `git_merge_options` gained a `file_flags` member.
diff --git a/src/diff.c b/src/diff.c
index d7365ef..cc93f57 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -816,11 +816,11 @@ static int maybe_modified(
 	} else if (git_oid_iszero(&nitem->id) && new_is_workdir) {
 		bool use_ctime = ((diff->diffcaps & GIT_DIFFCAPS_TRUST_CTIME) != 0);
 		bool use_nanos = ((diff->diffcaps & GIT_DIFFCAPS_TRUST_NANOSECS) != 0);
+		git_index *index;
+		git_iterator_index(&index, info->new_iter);
 
 		status = GIT_DELTA_UNMODIFIED;
 
-		/* TODO: add check against index file st_mtime to avoid racy-git */
-
 		if (S_ISGITLINK(nmode)) {
 			if ((error = maybe_modified_submodule(&status, &noid, diff, info)) < 0)
 				return error;
@@ -839,7 +839,8 @@ static int maybe_modified(
 			 !diff_time_eq(&oitem->ctime, &nitem->ctime, use_nanos)) ||
 			oitem->ino != nitem->ino ||
 			oitem->uid != nitem->uid ||
-			oitem->gid != nitem->gid)
+			oitem->gid != nitem->gid ||
+			(index && nitem->mtime.seconds >= index->stamp.mtime))
 		{
 			status = GIT_DELTA_MODIFIED;
 			modified_uncertain = true;
diff --git a/src/index.c b/src/index.c
index 1fb3c48..5ce5522 100644
--- a/src/index.c
+++ b/src/index.c
@@ -688,20 +688,59 @@ int git_index__changed_relative_to(
 	return !!git_oid_cmp(&index->checksum, checksum);
 }
 
+static bool is_racy_timestamp(git_time_t stamp, git_index_entry *entry)
+{
+	/* Git special-cases submodules in the check */
+	if (S_ISGITLINK(entry->mode))
+		return false;
+
+	/* If we never read the index, we can't have this race either */
+	if (stamp == 0)
+		return false;
+
+	/* If the timestamp is the same or newer than the index, it's racy */
+	return ((int32_t) stamp) <= entry->mtime.seconds;
+}
+
 /*
  * Force the next diff to take a look at those entries which have the
  * same timestamp as the current index.
  */
-static void truncate_racily_clean(git_index *index)
+static int truncate_racily_clean(git_index *index)
 {
 	size_t i;
+	int error;
 	git_index_entry *entry;
 	git_time_t ts = index->stamp.mtime;
+	git_diff_options diff_opts = GIT_DIFF_OPTIONS_INIT;
+	git_diff *diff;
 
+	/* Nothing to do if there's no repo to talk about */
+	if (!INDEX_OWNER(index))
+		return 0;
+
+	/* If there's no workdir, we can't know where to even check */
+	if (!git_repository_workdir(INDEX_OWNER(index)))
+		return 0;
+
+	diff_opts.flags |= GIT_DIFF_INCLUDE_TYPECHANGE | GIT_DIFF_IGNORE_SUBMODULES | GIT_DIFF_DISABLE_PATHSPEC_MATCH;
 	git_vector_foreach(&index->entries, i, entry) {
-		if (entry->mtime.seconds == ts || ts == 0)
+		if (!is_racy_timestamp(ts, entry))
+			continue;
+
+		diff_opts.pathspec.count = 1;
+		diff_opts.pathspec.strings = (char **) &entry->path;
+
+		if ((error = git_diff_index_to_workdir(&diff, INDEX_OWNER(index), index, &diff_opts)) < 0)
+			return error;
+
+		if (git_diff_num_deltas(diff) > 0)
 			entry->file_size = 0;
+
+		git_diff_free(diff);
 	}
+
+	return 0;
 }
 
 int git_index_write(git_index *index)
diff --git a/src/iterator.c b/src/iterator.c
index 7807a16..d5f7eec 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -1762,6 +1762,18 @@ int git_iterator_current_workdir_path(git_buf **path, git_iterator *iter)
 	return 0;
 }
 
+int git_iterator_index(git_index **out, git_iterator *iter)
+{
+	workdir_iterator *wi = (workdir_iterator *)iter;
+
+	if (iter->type != GIT_ITERATOR_TYPE_WORKDIR)
+		*out = NULL;
+
+	*out = wi->index;
+
+	return 0;
+}
+
 int git_iterator_advance_over_with_status(
 	const git_index_entry **entryptr,
 	git_iterator_status_t *status,
diff --git a/src/iterator.h b/src/iterator.h
index db1f325..57f8241 100644
--- a/src/iterator.h
+++ b/src/iterator.h
@@ -11,6 +11,7 @@
 #include "git2/index.h"
 #include "vector.h"
 #include "buffer.h"
+#include "ignore.h"
 
 typedef struct git_iterator git_iterator;
 
@@ -286,4 +287,11 @@ typedef enum {
 extern int git_iterator_advance_over_with_status(
 	const git_index_entry **entry, git_iterator_status_t *status, git_iterator *iter);
 
+/**
+ * Retrieve the index stored in the iterator.
+ *
+ * Only implemented for the workdir iterator
+ */
+extern int git_iterator_index(git_index **out, git_iterator *iter);
+
 #endif
diff --git a/tests/diff/racy.c b/tests/diff/racy.c
deleted file mode 100644
index a109f8c..0000000
--- a/tests/diff/racy.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "clar_libgit2.h"
-
-#include "buffer.h"
-
-static git_repository *g_repo;
-
-void test_diff_racy__initialize(void)
-{
-	cl_git_pass(git_repository_init(&g_repo, "diff_racy", false));
-}
-
-void test_diff_racy__cleanup(void)
-{
-	cl_git_sandbox_cleanup();
-}
-
-void test_diff_racy__diff(void)
-{
-	git_index *index;
-	git_diff *diff;
-	git_buf path = GIT_BUF_INIT;
-
-	cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A"));
-	cl_git_mkfile(path.ptr, "A");
-
-	/* Put 'A' into the index */
-	cl_git_pass(git_repository_index(&index, g_repo));
-	cl_git_pass(git_index_add_bypath(index, "A"));
-	cl_git_pass(git_index_write(index));
-
-	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
-	cl_assert_equal_i(0, git_diff_num_deltas(diff));
-
-	/* Change its contents quickly, so we get the same timestamp */
-	cl_git_mkfile(path.ptr, "B");
-
-	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
-	cl_assert_equal_i(1, git_diff_num_deltas(diff));
-}
diff --git a/tests/diff/workdir.c b/tests/diff/workdir.c
index ecc556c..13de6a9 100644
--- a/tests/diff/workdir.c
+++ b/tests/diff/workdir.c
@@ -1623,6 +1623,8 @@ void test_diff_workdir__can_update_index(void)
 
 	/* now if we do it again, we should see fewer OID calculations */
 
+	/* tick again as the index updating from the previous diff might have reset the timestamp */
+	tick_index(index);
 	basic_diff_status(&diff, &opts);
 
 	cl_git_pass(git_diff_get_perfdata(&perf, diff));
diff --git a/tests/index/racy.c b/tests/index/racy.c
new file mode 100644
index 0000000..3a4bc43
--- /dev/null
+++ b/tests/index/racy.c
@@ -0,0 +1,143 @@
+#include "clar_libgit2.h"
+#include "../checkout/checkout_helpers.h"
+
+#include "buffer.h"
+#include "index.h"
+
+static git_repository *g_repo;
+
+void test_index_racy__initialize(void)
+{
+	cl_git_pass(git_repository_init(&g_repo, "diff_racy", false));
+}
+
+void test_index_racy__cleanup(void)
+{
+	git_repository_free(g_repo);
+	g_repo = NULL;
+
+	cl_fixture_cleanup("diff_racy");
+}
+
+void test_index_racy__diff(void)
+{
+	git_index *index;
+	git_diff *diff;
+	git_buf path = GIT_BUF_INIT;
+
+	cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A"));
+	cl_git_mkfile(path.ptr, "A");
+
+	/* Put 'A' into the index */
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_git_pass(git_index_add_bypath(index, "A"));
+	cl_git_pass(git_index_write(index));
+
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
+	cl_assert_equal_i(0, git_diff_num_deltas(diff));
+	git_diff_free(diff);
+
+	/* Change its contents quickly, so we get the same timestamp */
+	cl_git_mkfile(path.ptr, "B");
+
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
+	cl_assert_equal_i(1, git_diff_num_deltas(diff));
+
+	git_index_free(index);
+	git_diff_free(diff);
+	git_buf_free(&path);
+}
+
+void test_index_racy__write_index_just_after_file(void)
+{
+	git_index *index;
+	git_diff *diff;
+	git_buf path = GIT_BUF_INIT;
+	struct timeval times[2];
+
+	/* Make sure we do have a timestamp */
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_git_pass(git_index_write(index));
+
+	cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A"));
+	cl_git_mkfile(path.ptr, "A");
+	/* Force the file's timestamp to be a second after we wrote the index */
+	times[0].tv_sec = index->stamp.mtime + 1;
+	times[0].tv_usec = 0;
+	times[1].tv_sec = index->stamp.mtime + 1;
+	times[1].tv_usec = 0;
+	cl_git_pass(p_utimes(path.ptr, times));
+
+	/*
+	 * Put 'A' into the index, the size field will be filled,
+	 * because the index' on-disk timestamp does not match the
+	 * file's timestamp.
+	 */
+	cl_git_pass(git_index_add_bypath(index, "A"));
+	cl_git_pass(git_index_write(index));
+
+	cl_git_mkfile(path.ptr, "B");
+	/*
+	 * Pretend this index' modification happend a second after the
+	 * file update, and rewrite the file in that same second.
+	 */
+	times[0].tv_sec = index->stamp.mtime + 2;
+	times[0].tv_usec = 0;
+	times[1].tv_sec = index->stamp.mtime + 2;
+	times[0].tv_usec = 0;
+
+	cl_git_pass(p_utimes(git_index_path(index), times));
+	cl_git_pass(p_utimes(path.ptr, times));
+
+	cl_git_pass(git_index_read(index, true));
+
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
+	cl_assert_equal_i(1, git_diff_num_deltas(diff));
+
+	git_buf_free(&path);
+	git_diff_free(diff);
+	git_index_free(index);
+}
+
+void test_index_racy__empty_file_after_smudge(void)
+{
+	git_index *index;
+	git_diff *diff;
+	git_buf path = GIT_BUF_INIT;
+	int i, found_race = 0;
+	const git_index_entry *entry;
+
+	/* Make sure we do have a timestamp */
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_git_pass(git_index_write(index));
+
+	cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A"));
+
+	/* Make sure writing the file, adding and rewriting happen in the same second */
+	for (i = 0; i < 10; i++) {
+		struct stat st;
+		cl_git_mkfile(path.ptr, "A");
+
+		cl_git_pass(git_index_add_bypath(index, "A"));
+		cl_git_mkfile(path.ptr, "B");
+		cl_git_pass(git_index_write(index));
+
+		cl_git_mkfile(path.ptr, "");
+
+		cl_git_pass(p_stat(path.ptr, &st));
+		cl_assert(entry = git_index_get_bypath(index, "A", 0));
+		if (entry->mtime.seconds == (int32_t) st.st_mtime) {
+			found_race = 1;
+			break;
+		}
+
+	}
+
+	if (!found_race)
+		cl_fail("failed to find race after 10 attempts");
+
+	cl_assert_equal_i(0, entry->file_size);
+
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
+	cl_assert_equal_i(1, git_diff_num_deltas(diff));
+}
diff --git a/tests/merge/workdir/dirty.c b/tests/merge/workdir/dirty.c
index 30c404b..4bf984c 100644
--- a/tests/merge/workdir/dirty.c
+++ b/tests/merge/workdir/dirty.c
@@ -133,12 +133,25 @@ static void hack_index(char *files[])
 	struct stat statbuf;
 	git_buf path = GIT_BUF_INIT;
 	git_index_entry *entry;
+	struct timeval times[2];
+	time_t now;
 	size_t i;
 
 	/* Update the index to suggest that checkout placed these files on
 	 * disk, keeping the object id but updating the cache, which will
 	 * emulate a Git implementation's different filter.
+	 *
+	 * We set the file's timestamp to before now to pretend that
+	 * it was an old checkout so we don't trigger the racy
+	 * protections would would check the content.
 	 */
+
+	now = time(NULL);
+	times[0].tv_sec  = now - 5;
+	times[0].tv_usec = 0;
+	times[1].tv_sec  = now - 5;
+	times[1].tv_usec = 0;
+
 	for (i = 0, filename = files[i]; filename; filename = files[++i]) {
 		git_buf_clear(&path);
 
@@ -146,6 +159,7 @@ static void hack_index(char *files[])
 			git_index_get_bypath(repo_index, filename, 0));
 
 		cl_git_pass(git_buf_printf(&path, "%s/%s", TEST_REPO_PATH, filename));
+		cl_git_pass(p_utimes(path.ptr, times));
 		cl_git_pass(p_stat(path.ptr, &statbuf));
 
 		entry->ctime.seconds = (git_time_t)statbuf.st_ctime;
@@ -245,7 +259,6 @@ static int merge_differently_filtered_files(char *files[])
 	write_files(files);
 	hack_index(files);
 
-	repo_index->stamp.mtime = time(NULL) + 1;
 	cl_git_pass(git_index_write(repo_index));
 
 	error = merge_branch();
diff --git a/tests/status/worktree.c b/tests/status/worktree.c
index 56f98a8..75c7b71 100644
--- a/tests/status/worktree.c
+++ b/tests/status/worktree.c
@@ -985,6 +985,8 @@ void test_status_worktree__update_stat_cache_0(void)
 
 	opts.flags &= ~GIT_STATUS_OPT_UPDATE_INDEX;
 
+	/* tick again as the index updating from the previous diff might have reset the timestamp */
+	tick_index(index);
 	cl_git_pass(git_status_list_new(&status, repo, &opts));
 	check_status0(status);
 	cl_git_pass(git_status_list_get_perfdata(&perf, status));