Commit 4a0dbeb0d35343ded24b51906f2a8f8ef6c7910b

Edward Thomson 2015-08-30T17:06:26

diff: use new iterator pathlist handling When using literal pathspecs in diff with `GIT_DIFF_DISABLE_PATHSPEC_MATCH` turn on the faster iterator pathlist handling. Updates iterator pathspecs to include directory prefixes (eg, `foo/`) for compatibility with `GIT_DIFF_DISABLE_PATHSPEC_MATCH`.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
diff --git a/src/diff.c b/src/diff.c
index d87738f..32c1d4a 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -80,14 +80,13 @@ static bool diff_pathspec_match(
 	/* The iterator has filtered out paths for us, so the fact that we're
 	 * seeing this patch means that it must match the given path list.
 	 */
-	if (DIFF_FLAG_IS_SET(diff, GIT_DIFF_ENABLE_FILELIST_MATCH)) {
+	if (DIFF_FLAG_IS_SET(diff, GIT_DIFF_DISABLE_PATHSPEC_MATCH)) {
 		*matched_pathspec = path;
 		return true;
 	}
 
 	return git_pathspec__match(
-		&diff->pathspec, path,
-		DIFF_FLAG_IS_SET(diff, GIT_DIFF_DISABLE_PATHSPEC_MATCH),
+		&diff->pathspec, path, false,
 		DIFF_FLAG_IS_SET(diff, GIT_DIFF_IGNORE_CASE),
 		matched_pathspec, NULL);
 }
@@ -1063,6 +1062,12 @@ static int handle_unmatched_new_item(
 					&info->nitem, &untracked_state, info->new_iter)) < 0)
 				return error;
 
+			/* if we found nothing that matched our pathlist filter, exclude */
+			if (untracked_state == GIT_ITERATOR_STATUS_FILTERED) {
+				git_vector_pop(&diff->deltas);
+				git__free(last);
+			}
+
 			/* if we found nothing or just ignored items, update the record */
 			if (untracked_state == GIT_ITERATOR_STATUS_IGNORED ||
 				untracked_state == GIT_ITERATOR_STATUS_EMPTY) {
@@ -1276,8 +1281,7 @@ cleanup:
 
 #define DIFF_FROM_ITERATORS(MAKE_FIRST, FLAGS_FIRST, MAKE_SECOND, FLAGS_SECOND) do { \
 	git_iterator *a = NULL, *b = NULL; \
-	git_vector pathlist = GIT_VECTOR_INIT; \
-	char *pfx = (opts && !(opts->flags & GIT_DIFF_ENABLE_FILELIST_MATCH)) ? \
+	char *pfx = (opts && !(opts->flags & GIT_DIFF_DISABLE_PATHSPEC_MATCH)) ? \
 		git_pathspec_prefix(&opts->pathspec) : NULL; \
 	git_iterator_options a_opts = GIT_ITERATOR_OPTIONS_INIT, \
 		b_opts = GIT_ITERATOR_OPTIONS_INIT; \
@@ -1288,19 +1292,15 @@ cleanup:
 	b_opts.start = pfx; \
 	b_opts.end = pfx; \
 	GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); \
-	if (opts && (opts->flags & GIT_DIFF_ENABLE_FILELIST_MATCH) && opts->pathspec.count) { \
-		size_t __i; \
-		error = git_vector_init(&pathlist, opts->pathspec.count, NULL); \
-		for (__i = 0; !error && __i < opts->pathspec.count; __i++) { \
-			error = git_vector_insert(&pathlist, opts->pathspec.strings[__i]); \
-		} \
-		a_opts.pathlist = &pathlist; \
-		b_opts.pathlist = &pathlist; \
+	if (opts && (opts->flags & GIT_DIFF_DISABLE_PATHSPEC_MATCH)) { \
+		a_opts.pathlist.strings = opts->pathspec.strings; \
+		a_opts.pathlist.count = opts->pathspec.count; \
+		b_opts.pathlist.strings = opts->pathspec.strings; \
+		b_opts.pathlist.count = opts->pathspec.count; \
 	} \
 	if (!error && !(error = MAKE_FIRST) && !(error = MAKE_SECOND)) \
 		error = git_diff__from_iterators(diff, repo, a, b, opts); \
 	git__free(pfx); git_iterator_free(a); git_iterator_free(b); \
-	git_vector_free(&pathlist); \
 } while (0)
 
 int git_diff_tree_to_tree(
diff --git a/src/iterator.c b/src/iterator.c
index 9fa7cab..bad24d1 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -38,12 +38,15 @@
 	if ((options && options->start && !(P)->base.start) || \
 		(options && options->end && !(P)->base.end)) { \
 		git__free(P); return -1; } \
+	(P)->base.strcomp = git__strcmp; \
+	(P)->base.strncomp = git__strncmp; \
 	(P)->base.prefixcomp = git__prefixcmp; \
 	(P)->base.flags = options ? options->flags & ~ITERATOR_CASE_FLAGS : 0; \
 	if ((P)->base.flags & GIT_ITERATOR_DONT_AUTOEXPAND) \
 		(P)->base.flags |= GIT_ITERATOR_INCLUDE_TREES; \
-	if (options && options->pathlist) \
-		(P)->base.pathlist = options->pathlist; \
+	if (options && options->pathlist.count && \
+		iterator_pathlist__init(&P->base, &options->pathlist) < 0) { \
+		git__free(P); return -1; } \
 	} while (0)
 
 #define iterator__flag(I,F) ((((git_iterator *)(I))->flags & GIT_ITERATOR_ ## F) != 0)
@@ -61,6 +64,82 @@
 	(iterator__end(I) && ((git_iterator *)(I))->prefixcomp((PATH),iterator__end(I)) > 0)
 
 
+typedef enum {
+	ITERATOR_PATHLIST_NONE = 0,
+	ITERATOR_PATHLIST_MATCH = 1,
+	ITERATOR_PATHLIST_MATCH_DIRECTORY = 2,
+	ITERATOR_PATHLIST_MATCH_CHILD = 3,
+} iterator_pathlist__match_t;
+
+static int iterator_pathlist__init(git_iterator *iter, git_strarray *pathspec)
+{
+	size_t i;
+
+	if (git_vector_init(&iter->pathlist, pathspec->count, iter->strcomp) < 0)
+		return -1;
+
+	for (i = 0; i < pathspec->count; i++) {
+		if (!pathspec->strings[i])
+			continue;
+
+		if (git_vector_insert(&iter->pathlist, pathspec->strings[i]) < 0)
+			return -1;
+	}
+
+	git_vector_sort(&iter->pathlist);
+
+	return 0;
+}
+
+static iterator_pathlist__match_t iterator_pathlist__match(
+	git_iterator *iter, const char *path, size_t path_len)
+{
+	const char *p;
+	size_t idx;
+	int error;
+
+	error = git_vector_bsearch2(&idx, &iter->pathlist, iter->strcomp, path);
+
+	if (error == 0)
+		return ITERATOR_PATHLIST_MATCH;
+
+	/* at this point, the path we're examining may be a directory (though we
+	 * don't know that yet, since we're avoiding a stat unless it's necessary)
+	 * so see if the pathlist contains a file beneath this directory.
+	 */
+	while ((p = git_vector_get(&iter->pathlist, idx)) != NULL) {
+		if (iter->prefixcomp(p, path) != 0)
+			break;
+
+		/* an exact match would have been matched by the bsearch above */
+		assert(p[path_len]);
+
+		/* is this a literal directory entry (eg `foo/`) or a file beneath */
+		if (p[path_len] == '/') {
+			while (p[path_len] == '/')
+				path_len++;
+
+			return (p[path_len] == '\0') ?
+				ITERATOR_PATHLIST_MATCH_DIRECTORY :
+				ITERATOR_PATHLIST_MATCH_CHILD;
+		}
+
+		if (p[path_len] > '/')
+			break;
+
+		idx++;
+	}
+
+	return ITERATOR_PATHLIST_NONE;
+}
+
+static void iterator_pathlist__update_ignore_case(git_iterator *iter)
+{
+	git_vector_set_cmp(&iter->pathlist, iter->strcomp);
+	git_vector_sort(&iter->pathlist);
+}
+
+
 static int iterator__reset_range(
 	git_iterator *iter, const char *start, const char *end)
 {
@@ -87,7 +166,8 @@ static int iterator__update_ignore_case(
 	git_iterator *iter,
 	git_iterator_flag_t flags)
 {
-	int error = 0, ignore_case = -1;
+	bool ignore_case;
+	int error;
 
 	if ((flags & GIT_ITERATOR_IGNORE_CASE) != 0)
 		ignore_case = true;
@@ -96,25 +176,29 @@ static int iterator__update_ignore_case(
 	else {
 		git_index *index;
 
-		if (!(error = git_repository_index__weakptr(&index, iter->repo)))
-			ignore_case = (index->ignore_case != false);
+		if ((error = git_repository_index__weakptr(&index, iter->repo)) < 0)
+			return error;
+
+		ignore_case = (index->ignore_case == 1);
 	}
 
-	if (ignore_case > 0)
+	if (ignore_case) {
 		iter->flags = (iter->flags | GIT_ITERATOR_IGNORE_CASE);
-	else if (ignore_case == 0)
-		iter->flags = (iter->flags & ~GIT_ITERATOR_IGNORE_CASE);
 
-	iter->prefixcomp = iterator__ignore_case(iter) ?
-		git__prefixcmp_icase : git__prefixcmp;
+		iter->strcomp = git__strcasecmp;
+		iter->strncomp = git__strncasecmp;
+		iter->prefixcomp = git__prefixcmp_icase;
+	} else {
+		iter->flags = (iter->flags & ~GIT_ITERATOR_IGNORE_CASE);
 
-	if (iter->pathlist) {
-		git_vector_set_cmp(iter->pathlist, iterator__ignore_case(iter) ?
-			git__strcasecmp : git__strcmp);
-		git_vector_sort(iter->pathlist);
+		iter->strcomp = git__strcmp;
+		iter->strncomp = git__strncmp;
+		iter->prefixcomp = git__prefixcmp;
 	}
 
-	return error;
+	iterator_pathlist__update_ignore_case(iter);
+
+	return 0;
 }
 
 GIT_INLINE(void) iterator__clear_entry(const git_index_entry **entry)
@@ -210,7 +294,6 @@ typedef struct {
 	int path_ambiguities;
 	bool path_has_filename;
 	bool entry_is_current;
-	int (*strncomp)(const char *a, const char *b, size_t sz);
 } tree_iterator;
 
 static char *tree_iterator__current_filename(
@@ -280,7 +363,7 @@ static int tree_iterator__search_cmp(const void *key, const void *val, void *p)
 	return git_path_cmp(
 		tf->start, tf->startlen, false,
 		te->filename, te->filename_len, te->attr == GIT_FILEMODE_TREE,
-		((tree_iterator *)p)->strncomp);
+		((git_iterator *)p)->strncomp);
 }
 
 static bool tree_iterator__move_to_next(
@@ -312,7 +395,7 @@ static int tree_iterator__set_next(tree_iterator *ti, tree_iterator_frame *tf)
 	for (; tf->next < tf->n_entries; tf->next++, last = te) {
 		te = tf->entries[tf->next]->te;
 
-		if (last && tree_iterator__te_cmp(last, te, ti->strncomp))
+		if (last && tree_iterator__te_cmp(last, te, ti->base.strncomp))
 			break;
 
 		/* try to load trees for items in [current,next) range */
@@ -624,9 +707,6 @@ int git_iterator_for_tree(
 	if (tree == NULL)
 		return git_iterator_for_nothing(iter, options);
 
-	/* not yet supported */
-	assert (!options || !options->pathlist);
-
 	if ((error = git_object_dup((git_object **)&tree, (git_object *)tree)) < 0)
 		return error;
 
@@ -637,7 +717,6 @@ int git_iterator_for_tree(
 
 	if ((error = iterator__update_ignore_case((git_iterator *)ti, options ? options->flags : 0)) < 0)
 		goto fail;
-	ti->strncomp = iterator__ignore_case(ti) ? git__strncasecmp : git__strncmp;
 
 	if ((error = git_pool_init(&ti->pool, sizeof(tree_iterator_entry),0)) < 0 ||
 		(error = tree_iterator__create_root_frame(ti, tree)) < 0 ||
@@ -660,6 +739,8 @@ typedef struct {
 	git_vector entries;
 	git_vector_cmp entry_srch;
 	size_t current;
+	/* when limiting with a pathlist, this is the current index into it */
+	size_t pathlist_idx;
 	/* when not in autoexpand mode, use these to represent "tree" state */
 	git_buf partial;
 	size_t partial_pos;
@@ -679,10 +760,12 @@ static const git_index_entry *index_iterator__index_entry(index_iterator *ii)
 	return ie;
 }
 
-static const git_index_entry *index_iterator__advance_over_unwanted(index_iterator *ii)
+static const git_index_entry *index_iterator__advance_over_unwanted(
+	index_iterator *ii)
 {
 	const git_index_entry *ie = index_iterator__index_entry(ii);
 	const char *p;
+	size_t p_len;
 	int cmp;
 
 	while (ie) {
@@ -697,24 +780,48 @@ static const git_index_entry *index_iterator__advance_over_unwanted(index_iterat
 		 * returned.  otherwise, advance the pathlist entry or the iterator
 		 * until we find the next path that we want to return.
 		 */
-		if (ii->base.pathlist) {
-			if (ii->base.pathlist_idx >= ii->base.pathlist->length) {
+		if (ii->base.pathlist.length) {
+
+			if (ii->pathlist_idx >= ii->base.pathlist.length) {
 				ii->current = SIZE_MAX;
 				ie = NULL;
 				break;
 			}
 
-			p = ii->base.pathlist->contents[ii->base.pathlist_idx];
-			cmp = ii->base.pathlist->_cmp(p, ie->path);
+			p = git_vector_get(&ii->base.pathlist, ii->pathlist_idx);
+
+			/* trim trailing slashes that indicate an exact directory match */
+			p_len = strlen(p);
+
+			while (p_len && p[p_len-1] == '/')
+				p_len--;
+
+			cmp = ii->base.strncomp(ie->path, p, p_len);
+
+			/* we've matched the prefix - if the pathlist entry is equal to
+			 * this entry, or if the pathlist entry is a folder (eg `foo/`)
+			 * and this entry was beneath that, then continue.  otherwise,
+			 * sort the index entry path against the pathlist entry.
+			 */
+			if (cmp == 0) {
+				if (ie->path[p_len] == 0)
+					;
+				else if (ie->path[p_len] == '/')
+					;
+				else if (ie->path[p_len] < '/')
+					cmp = -1;
+				else if (ie->path[p_len] > '/')
+					cmp = 1;
+			}
 
 			if (cmp < 0) {
-				ii->base.pathlist_idx++;
+				ii->current++;
+				ie = index_iterator__index_entry(ii);
 				continue;
 			}
 
 			if (cmp > 0) {
-				ii->current++;
-				ie = index_iterator__index_entry(ii);
+				ii->pathlist_idx++;
 				continue;
 			}
 		}
@@ -861,13 +968,12 @@ static int index_iterator__reset(
 {
 	index_iterator *ii = (index_iterator *)self;
 	const git_index_entry *ie;
-	size_t pathlist_idx = 0;
 
 	if (iterator__reset_range(self, start, end) < 0)
 		return -1;
 
 	ii->current = 0;
-	ii->base.pathlist_idx = 0;
+	ii->pathlist_idx = 0;
 
 	/* if we're given a start prefix, find it; if we're given a pathlist, find
 	 * the first of those.  start at the later of the two.
@@ -961,6 +1067,7 @@ struct fs_iterator {
 	size_t root_len;
 	uint32_t dirload_flags;
 	int depth;
+	iterator_pathlist__match_t pathlist_match;
 
 	int (*enter_dir_cb)(fs_iterator *self);
 	int (*leave_dir_cb)(fs_iterator *self);
@@ -971,6 +1078,7 @@ struct fs_iterator {
 
 typedef struct {
 	struct stat st;
+	iterator_pathlist__match_t pathlist_match;
 	size_t      path_len;
 	char        path[GIT_FLEX_ARRAY];
 } fs_iterator_path_with_stat;
@@ -1052,72 +1160,22 @@ static void fs_iterator__seek_frame_start(
 		ff->index = 0;
 }
 
-typedef enum {
-	DIRLOAD_PATHLIST_NONE = 0,
-	DIRLOAD_PATHLIST_EXACT = 1,
-	DIRLOAD_PATHLIST_DIRECTORY = 2,
-} dirload_pathlist_match_t;
-
-static dirload_pathlist_match_t dirload_pathlist_match(
-	git_vector *pathlist,
-	const char *path,
-	size_t path_len,
-	int (*prefixcomp)(const char *a, const char *b))
-{
-	const char *matched;
-	size_t idx;
-
-	if (git_vector_bsearch2(
-			&idx, pathlist, pathlist->_cmp, path) != GIT_ENOTFOUND)
-		return DIRLOAD_PATHLIST_EXACT;
-
-	/* the explicit path that we've seen in the directory iterator was
-	 * not found - however, we may have hit a subdirectory in the directory
-	 * iterator.  examine the pathlist to see if it contains children of the
-	 * current path.  if so, indicate that we've found a subdirectory that
-	 * is worth examining.
-	 */
-	while ((matched = git_vector_get(pathlist, idx)) != NULL &&
-		prefixcomp(matched, path) == 0) {
-
-		if (matched[path_len] == '/')
-			return DIRLOAD_PATHLIST_DIRECTORY;
-		else if (matched[path_len] > '/')
-			break;
-
-		idx++;
-	}
-
-	return DIRLOAD_PATHLIST_NONE;
-}
-
-static int dirload_with_stat(
-	git_vector *contents,
-	const char *dirpath,
-	size_t prefix_len,
-	unsigned int flags,
-	const char *start_stat,
-	const char *end_stat,
-	git_vector *pathlist)
+static int dirload_with_stat(git_vector *contents, size_t *filtered, fs_iterator *fi)
 {
 	git_path_diriter diriter = GIT_PATH_DIRITER_INIT;
 	const char *path;
-	int (*strncomp)(const char *a, const char *b, size_t sz);
-	int (*prefixcomp)(const char *a, const char *b);
-	size_t start_len = start_stat ? strlen(start_stat) : 0;
-	size_t end_len = end_stat ? strlen(end_stat) : 0;
+	size_t start_len = fi->base.start ? strlen(fi->base.start) : 0;
+	size_t end_len = fi->base.end ? strlen(fi->base.end) : 0;
 	fs_iterator_path_with_stat *ps;
 	size_t path_len, cmp_len, ps_size;
-	dirload_pathlist_match_t pathlist_match = DIRLOAD_PATHLIST_EXACT;
+	iterator_pathlist__match_t pathlist_match = ITERATOR_PATHLIST_MATCH;
 	int error;
 
-	strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ?
-		git__strncasecmp : git__strncmp;
-	prefixcomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ?
-		git__prefixcmp_icase : git__prefixcmp;
+	*filtered = 0;
 
 	/* Any error here is equivalent to the dir not existing, skip over it */
-	if ((error = git_path_diriter_init(&diriter, dirpath, flags)) < 0) {
+	if ((error = git_path_diriter_init(
+			&diriter, fi->path.ptr, fi->dirload_flags)) < 0) {
 		error = GIT_ENOTFOUND;
 		goto done;
 	}
@@ -1126,29 +1184,35 @@ static int dirload_with_stat(
 		if ((error = git_path_diriter_fullpath(&path, &path_len, &diriter)) < 0)
 			goto done;
 
-		assert(path_len > prefix_len);
+		assert(path_len > fi->root_len);
 
 		/* remove the prefix if requested */
-		path += prefix_len;
-		path_len -= prefix_len;
+		path += fi->root_len;
+		path_len -= fi->root_len;
 
 		/* skip if before start_stat or after end_stat */
 		cmp_len = min(start_len, path_len);
-		if (cmp_len && strncomp(path, start_stat, cmp_len) < 0)
+		if (cmp_len && fi->base.strncomp(path, fi->base.start, cmp_len) < 0)
 			continue;
 		/* skip if after end_stat */
 		cmp_len = min(end_len, path_len);
-		if (cmp_len && strncomp(path, end_stat, cmp_len) > 0)
+		if (cmp_len && fi->base.strncomp(path, fi->base.end, cmp_len) > 0)
 			continue;
 
-		/* skip if we have a pathlist and this isn't in it.  note that we
-		 * haven't stat'd yet to know if it's a file or a directory, so this
-		 * match for files like `foo` when we're looking for `foo/bar`
+		/* if we have a pathlist that we're limiting to, examine this path.
+		 * if the frame has already deemed us inside the path (eg, we're in
+		 * `foo/bar` and the pathlist previously was detected to say `foo/`)
+		 * then simply continue.  otherwise, examine the pathlist looking for
+		 * this path or children of this path.
 		 */
-		if (pathlist &&
-				!(pathlist_match = dirload_pathlist_match(
-					pathlist, path, path_len, prefixcomp)))
+		if (fi->base.pathlist.length &&
+			fi->pathlist_match != ITERATOR_PATHLIST_MATCH &&
+			fi->pathlist_match != ITERATOR_PATHLIST_MATCH_DIRECTORY &&
+			!(pathlist_match = iterator_pathlist__match(&fi->base, path, path_len))) {
+
+			*filtered++;
 			continue;
+		}
 
 		/* Make sure to append two bytes, one for the path's null
 		 * termination, one for a possible trailing '/' for folders.
@@ -1170,7 +1234,7 @@ static int dirload_with_stat(
 				continue;
 			}
 
-			if (pathlist_match == DIRLOAD_PATHLIST_DIRECTORY) {
+			if (pathlist_match == ITERATOR_PATHLIST_MATCH_DIRECTORY) {
 				/* were looking for a directory, but this is a file */
 				git__free(ps);
 				continue;
@@ -1192,6 +1256,11 @@ static int dirload_with_stat(
 			continue;
 		}
 
+		/* record whether this path was explicitly found in the path list
+		 * or whether we're only examining it because something beneath it
+		 * is in the path list.
+		 */
+		ps->pathlist_match = pathlist_match;
 		git_vector_insert(contents, ps);
 	}
 
@@ -1211,6 +1280,7 @@ static int fs_iterator__expand_dir(fs_iterator *fi)
 {
 	int error;
 	fs_iterator_frame *ff;
+	size_t filtered = 0;
 
 	if (fi->depth > FS_MAX_DEPTH) {
 		giterr_set(GITERR_REPOSITORY,
@@ -1221,9 +1291,7 @@ static int fs_iterator__expand_dir(fs_iterator *fi)
 	ff = fs_iterator__alloc_frame(fi);
 	GITERR_CHECK_ALLOC(ff);
 
-	error = dirload_with_stat(&ff->entries,
-		fi->path.ptr, fi->root_len, fi->dirload_flags,
-		fi->base.start, fi->base.end, fi->base.pathlist);
+	error = dirload_with_stat(&ff->entries, &filtered, fi);
 
 	if (error < 0) {
 		git_error_state last_error = { 0 };
@@ -1418,6 +1486,7 @@ static int fs_iterator__update_entry(fs_iterator *fi)
 		return GIT_ITEROVER;
 
 	fi->entry.path = ps->path;
+	fi->pathlist_match = ps->pathlist_match;
 	git_index_entry__init_from_stat(&fi->entry, &ps->st, true);
 
 	/* need different mode here to keep directories during iteration */
@@ -1450,6 +1519,7 @@ static int fs_iterator__initialize(
 		return -1;
 	}
 	fi->root_len = fi->path.size;
+	fi->pathlist_match = ITERATOR_PATHLIST_MATCH_CHILD;
 
 	fi->dirload_flags =
 		(iterator__ignore_case(fi) ? GIT_PATH_DIR_IGNORE_CASE : 0) |
@@ -1721,6 +1791,7 @@ void git_iterator_free(git_iterator *iter)
 
 	iter->cb->free(iter);
 
+	git_vector_free(&iter->pathlist);
 	git__free(iter->start);
 	git__free(iter->end);
 
@@ -1790,7 +1861,7 @@ int git_iterator_current_parent_tree(
 		if (!(tf = tf->down) ||
 			tf->current >= tf->n_entries ||
 			!(te = tf->entries[tf->current]->te) ||
-			ti->strncomp(scan, te->filename, te->filename_len) != 0)
+			ti->base.strncomp(scan, te->filename, te->filename_len) != 0)
 			return 0;
 
 		scan += te->filename_len;
@@ -1923,9 +1994,18 @@ int git_iterator_advance_over_with_status(
 
 			if (!error)
 				continue;
+			
 			else if (error == GIT_ENOTFOUND) {
+				/* we entered this directory only hoping to find child matches to
+				 * our pathlist (eg, this is `foo` and we had a pathlist entry for
+				 * `foo/bar`).  it should not be ignored, it should be excluded.
+				 */
+				if (wi->fi.pathlist_match == ITERATOR_PATHLIST_MATCH_CHILD)
+					*status = GIT_ITERATOR_STATUS_FILTERED;
+				else
+					wi->is_ignored = GIT_IGNORE_TRUE; /* mark empty dirs ignored */
+
 				error = 0;
-				wi->is_ignored = GIT_IGNORE_TRUE; /* mark empty dirs ignored */
 			} else
 				break; /* real error, stop here */
 		} else {
diff --git a/src/iterator.h b/src/iterator.h
index 0ea2bc0..d2d61fb 100644
--- a/src/iterator.h
+++ b/src/iterator.h
@@ -38,15 +38,14 @@ typedef enum {
 	GIT_ITERATOR_INCLUDE_CONFLICTS = (1u << 5),
 } git_iterator_flag_t;
 
-
 typedef struct {
 	const char *start;
 	const char *end;
 
-	/* paths to include in the iterator (literal).  any paths not listed
-	 * will be excluded.  note that this vector may be resorted!
+	/* paths to include in the iterator (literal).  if set, any paths not
+	 * listed here will be excluded from iteration.
 	 */
-	git_vector *pathlist;
+	git_strarray pathlist;
 
 	/* flags, from above */
 	unsigned int flags;
@@ -70,8 +69,9 @@ struct git_iterator {
 	git_repository *repo;
 	char *start;
 	char *end;
-	git_vector *pathlist;
-	size_t pathlist_idx;
+	git_vector pathlist;
+	int (*strcomp)(const char *a, const char *b);
+	int (*strncomp)(const char *a, const char *b, size_t n);
 	int (*prefixcomp)(const char *str, const char *prefix);
 	size_t stat_calls;
 	unsigned int flags;
@@ -277,7 +277,8 @@ extern git_index *git_iterator_get_index(git_iterator *iter);
 typedef enum {
 	GIT_ITERATOR_STATUS_NORMAL = 0,
 	GIT_ITERATOR_STATUS_IGNORED = 1,
-	GIT_ITERATOR_STATUS_EMPTY = 2
+	GIT_ITERATOR_STATUS_EMPTY = 2,
+	GIT_ITERATOR_STATUS_FILTERED = 3
 } git_iterator_status_t;
 
 /* Advance over a directory and check if it contains no files or just
diff --git a/src/merge.c b/src/merge.c
index 1460a50..5ba263b 100644
--- a/src/merge.c
+++ b/src/merge.c
@@ -2357,7 +2357,8 @@ static int merge_check_index(size_t *conflicts, git_repository *repo, git_index 
 	}
 
 	iter_opts.flags = GIT_ITERATOR_DONT_IGNORE_CASE;
-	iter_opts.pathlist = &staged_paths;
+	iter_opts.pathlist.strings = (char **)staged_paths.contents;
+	iter_opts.pathlist.count = staged_paths.length;
 
 	if ((error = git_iterator_for_index(&iter_repo, index_repo, &iter_opts)) < 0 ||
 		(error = git_iterator_for_index(&iter_new, index_new, &iter_opts)) < 0 ||
diff --git a/tests/diff/workdir.c b/tests/diff/workdir.c
index 503d674..336f959 100644
--- a/tests/diff/workdir.c
+++ b/tests/diff/workdir.c
@@ -486,7 +486,7 @@ void test_diff_workdir__to_index_with_pathlist_disabling_fnmatch(void)
 	/* ensure that a single NULL pathspec is filtered out (like when using
 	 * fnmatch filtering)
 	 */
-	opts.pathspec.strings = &pathspec;
+
 	opts.pathspec.count   = 1;
 
 	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, NULL, &opts));
@@ -581,6 +581,30 @@ void test_diff_workdir__to_index_with_pathlist_disabling_fnmatch(void)
 
 	git_diff_free(diff);
 
+	/* ensure that multiple trailing slashes are ignored */
+	pathspec = "subdir//////";
+
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, NULL, &opts));
+
+	for (use_iterator = 0; use_iterator <= 1; use_iterator++) {
+		memset(&exp, 0, sizeof(exp));
+
+		if (use_iterator)
+			cl_git_pass(diff_foreach_via_iterator(
+				diff, diff_file_cb, NULL, NULL, NULL, &exp));
+		else
+			cl_git_pass(git_diff_foreach(diff, diff_file_cb, NULL, NULL, NULL, &exp));
+
+		cl_assert_equal_i(3, exp.files);
+		cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]);
+		cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
+		cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
+		cl_assert_equal_i(0, exp.file_status[GIT_DELTA_IGNORED]);
+		cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNTRACKED]);
+	}
+
+	git_diff_free(diff);
+
 	/* ensure that fnmatching is completely disabled */
 	pathspec = "subdir/*";
 
diff --git a/tests/repo/iterator.c b/tests/repo/iterator.c
index 84dfbe1..5420aad 100644
--- a/tests/repo/iterator.c
+++ b/tests/repo/iterator.c
@@ -26,7 +26,7 @@ static void expect_iterator_items(
 	const git_index_entry *entry;
 	int count, error;
 	int no_trees = !(git_iterator_flags(i) & GIT_ITERATOR_INCLUDE_TREES);
-	bool v = true;
+	bool v = false;
 
 	if (expected_flat < 0) { v = true; expected_flat = -expected_flat; }
 	if (expected_total < 0) { v = true; expected_total = -expected_total; }
@@ -1099,7 +1099,8 @@ void test_repo_iterator__indexfilelist(void)
 	/* In this test we DO NOT force a case setting on the index. */
 	default_icase = ((git_index_caps(index) & GIT_INDEXCAP_IGNORE_CASE) != 0);
 
-	i_opts.pathlist = &filelist;
+	i_opts.pathlist.strings = (char **)filelist.contents;
+	i_opts.pathlist.count = filelist.length;
 
 	/* All indexfilelist iterator tests are "autoexpand with no tree entries" */
 
@@ -1147,7 +1148,8 @@ void test_repo_iterator__indexfilelist_2(void)
 	cl_git_pass(git_vector_insert(&filelist, "e"));
 	cl_git_pass(git_vector_insert(&filelist, "k/a"));
 
-	i_opts.pathlist = &filelist;
+	i_opts.pathlist.strings = (char **)filelist.contents;
+	i_opts.pathlist.count = filelist.length;
 
 	i_opts.start = "b";
 	i_opts.end = "k/D";
@@ -1188,7 +1190,8 @@ void test_repo_iterator__indexfilelist_icase(void)
 
 	/* All indexfilelist iterator tests are "autoexpand with no tree entries" */
 
-	i_opts.pathlist = &filelist;
+	i_opts.pathlist.strings = (char **)filelist.contents;
+	i_opts.pathlist.count = filelist.length;
 
 	i_opts.start = "c";
 	i_opts.end = "k/D";
@@ -1248,7 +1251,8 @@ void test_repo_iterator__workdirfilelist(void)
 	/* All indexfilelist iterator tests are "autoexpand with no tree entries" */
 	/* In this test we DO NOT force a case on the iteratords and verify default behavior. */
 
-	i_opts.pathlist = &filelist;
+	i_opts.pathlist.strings = (char **)filelist.contents;
+	i_opts.pathlist.count = filelist.length;
 
 	cl_git_pass(git_iterator_for_workdir(&i, g_repo, NULL, NULL, &i_opts));
 	expect_iterator_items(i, 8, NULL, 8, NULL);
@@ -1297,7 +1301,8 @@ void test_repo_iterator__workdirfilelist_icase(void)
 	g_repo = cl_git_sandbox_init("icase");
 
 	i_opts.flags = GIT_ITERATOR_DONT_IGNORE_CASE;
-	i_opts.pathlist = &filelist;
+	i_opts.pathlist.strings = (char **)filelist.contents;
+	i_opts.pathlist.count = filelist.length;
 
 	i_opts.start = "c";
 	i_opts.end = "k/D";