Commit 49d34c1c0c706eea09380b2165bb3ad4e506dc30

Russell Belfer 2012-09-13T13:17:38

Fix problems in diff iterator record chaining There is a bug in building the linked list of line records in the diff iterator and also an off by one element error in the hunk counts. This fixes both of these, adds some test data with more complex sets of hunk and line diffs to exercise this code better.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
diff --git a/src/diff_output.c b/src/diff_output.c
index ea40c33..50e3cc1 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -1204,13 +1204,17 @@ static int diffiter_hunk_cb(
 	if (info->last_hunk)
 		info->last_hunk->next = hunk;
 	info->last_hunk = hunk;
+	info->last_line = NULL;
 
 	memcpy(&hunk->range, range, sizeof(hunk->range));
 
 	iter->hunk_count++;
 
-	if (iter->hunk_head == NULL)
-		iter->hunk_curr = iter->hunk_head = hunk;
+	/* adding first hunk to list */
+	if (iter->hunk_head == NULL) {
+		iter->hunk_head = hunk;
+		iter->hunk_curr = NULL;
+	}
 
 	return 0;
 }
@@ -1345,9 +1349,14 @@ int git_diff_iterator_num_hunks_in_file(git_diff_iterator *iter)
 int git_diff_iterator_num_lines_in_hunk(git_diff_iterator *iter)
 {
 	int error = diffiter_do_diff_file(iter);
-	if (!error && iter->hunk_curr)
-		error = iter->hunk_curr->line_count;
-	return error;
+	if (error)
+		return error;
+
+	if (iter->hunk_curr)
+		return iter->hunk_curr->line_count;
+	if (iter->hunk_head)
+		return iter->hunk_head->line_count;
+	return 0;
 }
 
 int git_diff_iterator_next_file(
@@ -1386,7 +1395,7 @@ int git_diff_iterator_next_file(
 	}
 
 	if (iter->ctxt.delta == NULL) {
-		iter->hunk_curr = NULL;
+		iter->hunk_curr = iter->hunk_head = NULL;
 		iter->line_curr = NULL;
 	}
 
@@ -1409,11 +1418,13 @@ int git_diff_iterator_next_hunk(
 		return error;
 
 	if (iter->hunk_curr == NULL) {
-		if (range_ptr) *range_ptr = NULL;
-		if (header) *header = NULL;
-		if (header_len) *header_len = 0;
-		iter->line_curr = NULL;
-		return GIT_ITEROVER;
+		if (iter->hunk_head == NULL)
+			goto no_more_hunks;
+		iter->hunk_curr = iter->hunk_head;
+	} else {
+		if (iter->hunk_curr->next == NULL)
+			goto no_more_hunks;
+		iter->hunk_curr = iter->hunk_curr->next;
 	}
 
 	range = &iter->hunk_curr->range;
@@ -1436,9 +1447,16 @@ int git_diff_iterator_next_hunk(
 	}
 
 	iter->line_curr = iter->hunk_curr->line_head;
-	iter->hunk_curr = iter->hunk_curr->next;
 
 	return error;
+
+no_more_hunks:
+	if (range_ptr) *range_ptr = NULL;
+	if (header) *header = NULL;
+	if (header_len) *header_len = 0;
+	iter->line_curr = NULL;
+
+	return GIT_ITEROVER;
 }
 
 int git_diff_iterator_next_line(
@@ -1453,7 +1471,7 @@ int git_diff_iterator_next_line(
 		return error;
 
 	/* if the user has not called next_hunk yet, call it implicitly (OK?) */
-	if (iter->hunk_curr == iter->hunk_head) {
+	if (iter->hunk_curr == NULL) {
 		error = git_diff_iterator_next_hunk(NULL, NULL, NULL, iter);
 		if (error)
 			return error;
diff --git a/tests-clar/diff/tree.c b/tests-clar/diff/tree.c
index 3003374..f5e72ca 100644
--- a/tests-clar/diff/tree.c
+++ b/tests-clar/diff/tree.c
@@ -256,3 +256,85 @@ void test_diff_tree__merge(void)
 
 	git_diff_list_free(diff1);
 }
+
+void test_diff_tree__larger_hunks(void)
+{
+	const char *a_commit = "d70d245ed97ed2aa596dd1af6536e4bfdb047b69";
+	const char *b_commit = "7a9e0b02e63179929fed24f0a3e0f19168114d10";
+	git_tree *a, *b;
+	git_diff_options opts = {0};
+	git_diff_list *diff = NULL;
+	git_diff_iterator *iter = NULL;
+	git_diff_delta *delta;
+	diff_expects exp;
+	int error, num_files = 0;
+
+	g_repo = cl_git_sandbox_init("diff");
+
+	cl_assert((a = resolve_commit_oid_to_tree(g_repo, a_commit)) != NULL);
+	cl_assert((b = resolve_commit_oid_to_tree(g_repo, b_commit)) != NULL);
+
+	opts.context_lines = 1;
+	opts.interhunk_lines = 0;
+
+	memset(&exp, 0, sizeof(exp));
+
+	cl_git_pass(git_diff_tree_to_tree(g_repo, &opts, a, b, &diff));
+	cl_git_pass(git_diff_iterator_new(&iter, diff));
+
+	/* this should be exact */
+	cl_assert(git_diff_iterator_progress(iter) == 0.0f);
+
+	/* You wouldn't actually structure an iterator loop this way, but
+	 * I have here for testing purposes of the return value
+	 */
+	while (!(error = git_diff_iterator_next_file(&delta, iter))) {
+		git_diff_range *range;
+		const char *header;
+		size_t header_len;
+		int actual_hunks = 0, num_hunks;
+		float expected_progress;
+
+		num_files++;
+
+		expected_progress = (float)num_files / 2.0f;
+		cl_assert(expected_progress == git_diff_iterator_progress(iter));
+
+		num_hunks = git_diff_iterator_num_hunks_in_file(iter);
+
+		while (!(error = git_diff_iterator_next_hunk(
+					&range, &header, &header_len, iter)))
+		{
+			int actual_lines = 0;
+			int num_lines = git_diff_iterator_num_lines_in_hunk(iter);
+			char origin;
+			const char *line;
+			size_t line_len;
+
+			while (!(error = git_diff_iterator_next_line(
+						&origin, &line, &line_len, iter)))
+			{
+				actual_lines++;
+			}
+
+			cl_assert_equal_i(GIT_ITEROVER, error);
+			cl_assert_equal_i(actual_lines, num_lines);
+
+			actual_hunks++;
+		}
+
+		cl_assert_equal_i(GIT_ITEROVER, error);
+		cl_assert_equal_i(actual_hunks, num_hunks);
+	}
+
+	cl_assert_equal_i(GIT_ITEROVER, error);
+	cl_assert_equal_i(2, num_files);
+	cl_assert(git_diff_iterator_progress(iter) == 1.0f);
+
+	git_diff_iterator_free(iter);
+	git_diff_list_free(diff);
+	diff = NULL;
+
+	git_tree_free(a);
+	git_tree_free(b);
+}
diff --git a/tests-clar/diff/workdir.c b/tests-clar/diff/workdir.c
index eac7eb8..40a8885 100644
--- a/tests-clar/diff/workdir.c
+++ b/tests-clar/diff/workdir.c
@@ -670,3 +670,94 @@ void test_diff_workdir__eof_newline_changes(void)
  *
  * Expect 13 files, 0 ADD, 4 DEL, 4 MOD, 1 IGN, 4 UNTR
  */
+
+
+void test_diff_workdir__larger_hunks(void)
+{
+	const char *a_commit = "d70d245ed97ed2aa596dd1af6536e4bfdb047b69";
+	const char *b_commit = "7a9e0b02e63179929fed24f0a3e0f19168114d10";
+	git_tree *a, *b;
+	git_diff_options opts = {0};
+	int i, error;
+
+	g_repo = cl_git_sandbox_init("diff");
+
+	cl_assert((a = resolve_commit_oid_to_tree(g_repo, a_commit)) != NULL);
+	cl_assert((b = resolve_commit_oid_to_tree(g_repo, b_commit)) != NULL);
+
+	opts.context_lines = 1;
+	opts.interhunk_lines = 0;
+
+	for (i = 0; i <= 2; ++i) {
+		git_diff_list *diff = NULL;
+		git_diff_iterator *iter = NULL;
+		git_diff_delta *delta;
+		int num_files = 0;
+
+		/* okay, this is a bit silly, but oh well */
+		switch (i) {
+		case 0:
+			cl_git_pass(git_diff_workdir_to_index(g_repo, &opts, &diff));
+			break;
+		case 1:
+			cl_git_pass(git_diff_workdir_to_tree(g_repo, &opts, a, &diff));
+			break;
+		case 2:
+			cl_git_pass(git_diff_workdir_to_tree(g_repo, &opts, b, &diff));
+			break;
+		}
+
+		cl_git_pass(git_diff_iterator_new(&iter, diff));
+
+		cl_assert(git_diff_iterator_progress(iter) == 0.0f);
+
+		while (!(error = git_diff_iterator_next_file(&delta, iter))) {
+			git_diff_range *range;
+			const char *header;
+			size_t header_len;
+			int actual_hunks = 0, num_hunks;
+			float expected_progress;
+
+			num_files++;
+
+			expected_progress = (float)num_files / 2.0f;
+			cl_assert(expected_progress == git_diff_iterator_progress(iter));
+
+			num_hunks = git_diff_iterator_num_hunks_in_file(iter);
+
+			while (!(error = git_diff_iterator_next_hunk(
+						 &range, &header, &header_len, iter)))
+			{
+				int actual_lines = 0;
+				int num_lines = git_diff_iterator_num_lines_in_hunk(iter);
+				char origin;
+				const char *line;
+				size_t line_len;
+
+				while (!(error = git_diff_iterator_next_line(
+							 &origin, &line, &line_len, iter)))
+				{
+					actual_lines++;
+				}
+
+				cl_assert_equal_i(GIT_ITEROVER, error);
+				cl_assert_equal_i(actual_lines, num_lines);
+
+				actual_hunks++;
+			}
+
+			cl_assert_equal_i(GIT_ITEROVER, error);
+			cl_assert_equal_i(actual_hunks, num_hunks);
+		}
+
+		cl_assert_equal_i(GIT_ITEROVER, error);
+		cl_assert_equal_i(2, num_files);
+		cl_assert(git_diff_iterator_progress(iter) == 1.0f);
+
+		git_diff_iterator_free(iter);
+		git_diff_list_free(diff);
+	}
+
+	git_tree_free(a);
+	git_tree_free(b);
+}
diff --git a/tests-clar/resources/diff/.gitted/HEAD b/tests-clar/resources/diff/.gitted/HEAD
new file mode 100644
index 0000000..cb089cd
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/tests-clar/resources/diff/.gitted/config b/tests-clar/resources/diff/.gitted/config
new file mode 100644
index 0000000..77a27ef
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/config
@@ -0,0 +1,6 @@
+[core]
+	repositoryformatversion = 0
+	filemode = true
+	bare = false
+	logallrefupdates = true
+	ignorecase = false
diff --git a/tests-clar/resources/diff/.gitted/description b/tests-clar/resources/diff/.gitted/description
new file mode 100644
index 0000000..498b267
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/description
@@ -0,0 +1 @@
+Unnamed repository; edit this file 'description' to name the repository.
diff --git a/tests-clar/resources/diff/.gitted/index b/tests-clar/resources/diff/.gitted/index
new file mode 100644
index 0000000..e107187
Binary files /dev/null and b/tests-clar/resources/diff/.gitted/index differ
diff --git a/tests-clar/resources/diff/.gitted/info/exclude b/tests-clar/resources/diff/.gitted/info/exclude
new file mode 100644
index 0000000..a5196d1
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/info/exclude
@@ -0,0 +1,6 @@
+# git ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~
diff --git a/tests-clar/resources/diff/.gitted/logs/HEAD b/tests-clar/resources/diff/.gitted/logs/HEAD
new file mode 100644
index 0000000..8c6f6fd
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/logs/HEAD
@@ -0,0 +1,2 @@
+0000000000000000000000000000000000000000 d70d245ed97ed2aa596dd1af6536e4bfdb047b69 Russell Belfer <rb@github.com> 1347559804 -0700	commit (initial): initial commit
+d70d245ed97ed2aa596dd1af6536e4bfdb047b69 7a9e0b02e63179929fed24f0a3e0f19168114d10 Russell Belfer <rb@github.com> 1347560491 -0700	commit: some changes
diff --git a/tests-clar/resources/diff/.gitted/logs/refs/heads/master b/tests-clar/resources/diff/.gitted/logs/refs/heads/master
new file mode 100644
index 0000000..8c6f6fd
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/logs/refs/heads/master
@@ -0,0 +1,2 @@
+0000000000000000000000000000000000000000 d70d245ed97ed2aa596dd1af6536e4bfdb047b69 Russell Belfer <rb@github.com> 1347559804 -0700	commit (initial): initial commit
+d70d245ed97ed2aa596dd1af6536e4bfdb047b69 7a9e0b02e63179929fed24f0a3e0f19168114d10 Russell Belfer <rb@github.com> 1347560491 -0700	commit: some changes
diff --git a/tests-clar/resources/diff/.gitted/objects/29/ab7053bb4dde0298e03e2c179e890b7dd465a7 b/tests-clar/resources/diff/.gitted/objects/29/ab7053bb4dde0298e03e2c179e890b7dd465a7
new file mode 100644
index 0000000..94f9a67
Binary files /dev/null and b/tests-clar/resources/diff/.gitted/objects/29/ab7053bb4dde0298e03e2c179e890b7dd465a7 differ
diff --git a/tests-clar/resources/diff/.gitted/objects/3e/5bcbad2a68e5bc60a53b8388eea53a1a7ab847 b/tests-clar/resources/diff/.gitted/objects/3e/5bcbad2a68e5bc60a53b8388eea53a1a7ab847
new file mode 100644
index 0000000..9fed523
Binary files /dev/null and b/tests-clar/resources/diff/.gitted/objects/3e/5bcbad2a68e5bc60a53b8388eea53a1a7ab847 differ
diff --git a/tests-clar/resources/diff/.gitted/objects/54/6c735f16a3b44d9784075c2c0dab2ac9bf1989 b/tests-clar/resources/diff/.gitted/objects/54/6c735f16a3b44d9784075c2c0dab2ac9bf1989
new file mode 100644
index 0000000..d7df4d6
Binary files /dev/null and b/tests-clar/resources/diff/.gitted/objects/54/6c735f16a3b44d9784075c2c0dab2ac9bf1989 differ
diff --git a/tests-clar/resources/diff/.gitted/objects/7a/9e0b02e63179929fed24f0a3e0f19168114d10 b/tests-clar/resources/diff/.gitted/objects/7a/9e0b02e63179929fed24f0a3e0f19168114d10
new file mode 100644
index 0000000..9bc25eb
Binary files /dev/null and b/tests-clar/resources/diff/.gitted/objects/7a/9e0b02e63179929fed24f0a3e0f19168114d10 differ
diff --git a/tests-clar/resources/diff/.gitted/objects/7b/808f723a8ca90df319682c221187235af76693 b/tests-clar/resources/diff/.gitted/objects/7b/808f723a8ca90df319682c221187235af76693
new file mode 100644
index 0000000..2fd266b
Binary files /dev/null and b/tests-clar/resources/diff/.gitted/objects/7b/808f723a8ca90df319682c221187235af76693 differ
diff --git a/tests-clar/resources/diff/.gitted/objects/88/789109439c1e1c3cd45224001edee5304ed53c b/tests-clar/resources/diff/.gitted/objects/88/789109439c1e1c3cd45224001edee5304ed53c
new file mode 100644
index 0000000..7598b59
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/objects/88/789109439c1e1c3cd45224001edee5304ed53c
@@ -0,0 +1 @@
+x+)JMU07g040031QH/H-+(a)[wz{j%;ʊRSrS4W4そN+a
\ No newline at end of file
diff --git a/tests-clar/resources/diff/.gitted/objects/cb/8294e696339863df760b2ff5d1e275bee72455 b/tests-clar/resources/diff/.gitted/objects/cb/8294e696339863df760b2ff5d1e275bee72455
new file mode 100644
index 0000000..86ebe04
Binary files /dev/null and b/tests-clar/resources/diff/.gitted/objects/cb/8294e696339863df760b2ff5d1e275bee72455 differ
diff --git a/tests-clar/resources/diff/.gitted/objects/d7/0d245ed97ed2aa596dd1af6536e4bfdb047b69 b/tests-clar/resources/diff/.gitted/objects/d7/0d245ed97ed2aa596dd1af6536e4bfdb047b69
new file mode 100644
index 0000000..99304c4
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/objects/d7/0d245ed97ed2aa596dd1af6536e4bfdb047b69
@@ -0,0 +1 @@
+x	!m_RB:XkVpWp 9{ ,^z#7JygԚA i1Y2VyR)𢒨'm[;-lO_#%v8
\ No newline at end of file
diff --git a/tests-clar/resources/diff/.gitted/refs/heads/master b/tests-clar/resources/diff/.gitted/refs/heads/master
new file mode 100644
index 0000000..a83afc3
--- /dev/null
+++ b/tests-clar/resources/diff/.gitted/refs/heads/master
@@ -0,0 +1 @@
+7a9e0b02e63179929fed24f0a3e0f19168114d10
diff --git a/tests-clar/resources/diff/another.txt b/tests-clar/resources/diff/another.txt
new file mode 100644
index 0000000..d0e0bae
--- /dev/null
+++ b/tests-clar/resources/diff/another.txt
@@ -0,0 +1,38 @@
+Git is fast. With Git, nearly all operations are performed locally, giving
+it an huge speed advantage on centralized systems that constantly have to
+communicate with a server somewh3r3.
+
+For testing, large AWS instances were set up in the same availability
+zone. Git and SVN were installed on both machines, the Ruby repository was
+copied to both Git and SVN servers, and common operations were performed on
+both.
+
+In some cases the commands don't match up exactly. Here, matching on the
+lowest common denominator was attempted. For example, the 'commit' tests
+also include the time to push for Git, though most of the time you would not
+actually be pushing to the server immediately after a commit where the two
+commands cannot be separated in SVN.
+
+Note that this is the best case scenario for SVN - a server with no load
+with an 80MB/s bandwidth connection to the client machine. Nearly all of
+these times would be even worse for SVN if that connection was slower, while
+many of the Git times would not be affected.
+
+Clearly, in many of these common version control operations, Git is one or
+two orders of magnitude faster than SVN, even under ideal conditions for
+SVN.
+
+Let's see how common operations stack up against Subversion, a common
+centralized version control system that is similar to CVS or
+Perforce. Smaller is faster.
+
+One place where Git is slower is in the initial clone operation. Here, Git
+One place where Git is slower is in the initial clone operation. Here, Git
+One place where Git is slower is in the initial clone operation. Here, Git
+seen in the above charts, it's not considerably slower for an operation that
+is only performed once.
+
+It's also interesting to note that the size of the data on the client side
+is very similar even though Git also has every version of every file for the
+entire history of the project. This illustrates how efficient it is at
+compressing and storing data on the client side.
\ No newline at end of file
diff --git a/tests-clar/resources/diff/readme.txt b/tests-clar/resources/diff/readme.txt
new file mode 100644
index 0000000..beedf28
--- /dev/null
+++ b/tests-clar/resources/diff/readme.txt
@@ -0,0 +1,36 @@
+The Git feature that r3ally mak3s it stand apart from n3arly 3v3ry other SCM
+out there is its branching model.
+
+Git allows and encourages you to have multiple local branches that can be
+entirely independent of each other. The creation, merging, and deletion of
+those lines of development takes seconds.
+
+Git allows and encourages you to have multiple local branches that can be
+entirely independent of each other. The creation, merging, and deletion of
+those lines of development takes seconds.
+
+This means that you can do things like:
+
+Role-Bas3d Codelin3s. Have a branch that always contains only what goes to
+production, another that you merge work into for testing, and several
+smaller ones for day to day work.
+
+Feature Based Workflow. Create new branches for each new feature you're
+working on so you can seamlessly switch back and forth between them, then
+delete each branch when that feature gets merged into your main line.
+
+Disposable Experimentation. Create a branch to experiment in, realize it's
+not going to work, and just delete it - abandoning the work—with nobody else
+ever seeing it (even if you've pushed other branches in the meantime).
+
+Notably, when you push to a remote repository, you do not have to push all
+share it with others.
+
+Git allows and encourages you to have multiple local branches that can be
+entirely independent of each other. The creation, merging, and deletion of
+those lines of development takes seconds.
+
+There are ways to accomplish some of this with other systems, but the work
+involved is much more difficult and error-prone. Git makes this process
+incredibly easy and it changes the way most developers work when they learn
+it.!