Commit b16a36e111018e46e00fce3cd4400038bdf16f62

Edward Thomson 2021-08-29T22:53:49

Merge pull request #6011 from libgit2/ethomson/filter_apply filter: filter drivers stop taking git_buf as user input

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h
index e43fde5..b375941 100644
--- a/include/git2/sys/filter.h
+++ b/include/git2/sys/filter.h
@@ -167,17 +167,18 @@ typedef void GIT_CALLBACK(git_filter_shutdown_fn)(git_filter *self);
  *
  * The `payload` will be a pointer to a reference payload for the filter.
  * This will start as NULL, but `check` can assign to this pointer for
- * later use by the `apply` callback.  Note that the value should be heap
- * allocated (not stack), so that it doesn't go away before the `apply`
+ * later use by the `stream` callback.  Note that the value should be heap
+ * allocated (not stack), so that it doesn't go away before the `stream`
  * callback can use it.  If a filter allocates and assigns a value to the
  * `payload`, it will need a `cleanup` callback to free the payload.
  */
 typedef int GIT_CALLBACK(git_filter_check_fn)(
-	git_filter  *self,
-	void       **payload, /* points to NULL ptr on entry, may be set */
+	git_filter              *self,
+	void                   **payload, /* NULL on entry, may be set */
 	const git_filter_source *src,
-	const char **attr_values);
+	const char             **attr_values);
 
+#ifndef GIT_DEPRECATE_HARD
 /**
  * Callback to actually perform the data filtering
  *
@@ -189,32 +190,45 @@ typedef int GIT_CALLBACK(git_filter_check_fn)(
  *
  * The `payload` value will refer to any payload that was set by the
  * `check` callback.  It may be read from or written to as needed.
+ *
+ * @deprecated use git_filter_stream_fn
  */
 typedef int GIT_CALLBACK(git_filter_apply_fn)(
-	git_filter    *self,
-	void         **payload, /* may be read and/or set */
-	git_buf       *to,
-	const git_buf *from,
+	git_filter              *self,
+	void                   **payload, /* may be read and/or set */
+	git_buf                 *to,
+	const git_buf           *from,
 	const git_filter_source *src);
+#endif
 
+/**
+ * Callback to perform the data filtering.
+ *
+ * Specified as `filter.stream`, this is a callback that filters data
+ * in a streaming manner.  This function will provide a
+ * `git_writestream` that will the original data will be written to;
+ * with that data, the `git_writestream` will then perform the filter
+ * translation and stream the filtered data out to the `next` location.
+ */
 typedef int GIT_CALLBACK(git_filter_stream_fn)(
-	git_writestream **out,
-	git_filter *self,
-	void **payload,
+	git_writestream        **out,
+	git_filter              *self,
+	void                   **payload,
 	const git_filter_source *src,
-	git_writestream *next);
+	git_writestream         *next);
 
 /**
  * Callback to clean up after filtering has been applied
  *
  * Specified as `filter.cleanup`, this is an optional callback invoked
- * after the filter has been applied.  If the `check` or `apply` callbacks
- * allocated a `payload` to keep per-source filter state, use this
- * callback to free that payload and release resources as required.
+ * after the filter has been applied.  If the `check`, `apply`, or
+ * `stream` callbacks allocated a `payload` to keep per-source filter
+ * state, use this callback to free that payload and release resources
+ * as required.
  */
 typedef void GIT_CALLBACK(git_filter_cleanup_fn)(
-	git_filter *self,
-	void       *payload);
+	git_filter              *self,
+	void                    *payload);
 
 /**
  * Filter structure used to register custom filters.
@@ -248,21 +262,28 @@ struct git_filter {
 	/**
 	 * Called to determine whether the filter should be invoked for a
 	 * given file.  If this function returns `GIT_PASSTHROUGH` then the
-	 * `apply` function will not be invoked and the contents will be passed
-	 * through unmodified.
+	 * `stream` or `apply` functions will not be invoked and the
+	 * contents will be passed through unmodified.
 	 */
 	git_filter_check_fn    check;
 
+#ifdef GIT_DEPRECATE_HARD
+	void *reserved;
+#else
 	/**
-	 * Called to actually apply the filter to file contents.  If this
-	 * function returns `GIT_PASSTHROUGH` then the contents will be passed
-	 * through unmodified.
+	 * Provided for backward compatibility; this will apply the
+	 * filter to the given contents in a `git_buf`.  Callers should
+	 * provide a `stream` function instead.
 	 */
 	git_filter_apply_fn    apply;
+#endif
 
 	/**
-	 * Called to apply the filter in a streaming manner.  If this is not
-	 * specified then the system will call `apply` with the whole buffer.
+	 * Called to apply the filter, this function will provide a
+	 * `git_writestream` that will the original data will be
+	 * written to; with that data, the `git_writestream` will then
+	 * perform the filter translation and stream the filtered data
+	 * out to the `next` location.
 	 */
 	git_filter_stream_fn   stream;
 
@@ -289,9 +310,9 @@ GIT_EXTERN(int) git_filter_init(git_filter *filter, unsigned int version);
  * As mentioned elsewhere, the initialize callback will not be invoked
  * immediately.  It is deferred until the filter is used in some way.
  *
- * A filter's attribute checks and `check` and `apply` callbacks will be
- * issued in order of `priority` on smudge (to workdir), and in reverse
- * order of `priority` on clean (to odb).
+ * A filter's attribute checks and `check` and `stream` (or `apply`)
+ * callbacks will be issued in order of `priority` on smudge (to
+ * workdir), and in reverse order of `priority` on clean (to odb).
  *
  * Two filters are preregistered with libgit2:
  * - GIT_FILTER_CRLF with priority 0
diff --git a/src/crlf.c b/src/crlf.c
index 1de9d8c..406f714 100644
--- a/src/crlf.c
+++ b/src/crlf.c
@@ -386,6 +386,17 @@ static int crlf_apply(
 		return crlf_apply_to_odb(*payload, to, from, src);
 }
 
+static int crlf_stream(
+	git_writestream **out,
+	git_filter *self,
+	void **payload,
+	const git_filter_source *src,
+	git_writestream *next)
+{
+	return git_filter_buffered_stream_new(out,
+		self, crlf_apply, NULL, payload, src, next);
+}
+
 static void crlf_cleanup(
 	git_filter *self,
 	void       *payload)
@@ -405,7 +416,7 @@ git_filter *git_crlf_filter_new(void)
 	f->f.initialize = NULL;
 	f->f.shutdown = git_filter_free;
 	f->f.check    = crlf_check;
-	f->f.apply    = crlf_apply;
+	f->f.stream   = crlf_stream;
 	f->f.cleanup  = crlf_cleanup;
 
 	return (git_filter *)f;
diff --git a/src/filter.c b/src/filter.c
index eed175e..f1d9614 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -839,9 +839,10 @@ int git_filter_list_apply_to_blob(
 	return error;
 }
 
-struct proxy_stream {
+struct buffered_stream {
 	git_writestream parent;
 	git_filter *filter;
+	int (*write_fn)(git_filter *, void **, git_buf *, const git_buf *, const git_filter_source *);
 	const git_filter_source *source;
 	void **payload;
 	git_buf input;
@@ -850,92 +851,120 @@ struct proxy_stream {
 	git_writestream *target;
 };
 
-static int proxy_stream_write(
+static int buffered_stream_write(
 	git_writestream *s, const char *buffer, size_t len)
 {
-	struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
-	GIT_ASSERT_ARG(proxy_stream);
+	struct buffered_stream *buffered_stream = (struct buffered_stream *)s;
+	GIT_ASSERT_ARG(buffered_stream);
 
-	return git_buf_put(&proxy_stream->input, buffer, len);
+	return git_buf_put(&buffered_stream->input, buffer, len);
 }
 
-static int proxy_stream_close(git_writestream *s)
+static int buffered_stream_close(git_writestream *s)
 {
-	struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
+	struct buffered_stream *buffered_stream = (struct buffered_stream *)s;
 	git_buf *writebuf;
 	git_error_state error_state = {0};
 	int error;
 
-	GIT_ASSERT_ARG(proxy_stream);
+	GIT_ASSERT_ARG(buffered_stream);
 
-	error = proxy_stream->filter->apply(
-		proxy_stream->filter,
-		proxy_stream->payload,
-		proxy_stream->output,
-		&proxy_stream->input,
-		proxy_stream->source);
+	error = buffered_stream->write_fn(
+		buffered_stream->filter,
+		buffered_stream->payload,
+		buffered_stream->output,
+		&buffered_stream->input,
+		buffered_stream->source);
 
 	if (error == GIT_PASSTHROUGH) {
-		writebuf = &proxy_stream->input;
+		writebuf = &buffered_stream->input;
 	} else if (error == 0) {
-		if ((error = git_buf_sanitize(proxy_stream->output)) < 0)
+		if ((error = git_buf_sanitize(buffered_stream->output)) < 0)
 			return error;
 
-		writebuf = proxy_stream->output;
+		writebuf = buffered_stream->output;
 	} else {
 		/* close stream before erroring out taking care
 		 * to preserve the original error */
 		git_error_state_capture(&error_state, error);
-		proxy_stream->target->close(proxy_stream->target);
+		buffered_stream->target->close(buffered_stream->target);
 		git_error_state_restore(&error_state);
 		return error;
 	}
 
-	if ((error = proxy_stream->target->write(
-			proxy_stream->target, writebuf->ptr, writebuf->size)) == 0)
-		error = proxy_stream->target->close(proxy_stream->target);
+	if ((error = buffered_stream->target->write(
+			buffered_stream->target, writebuf->ptr, writebuf->size)) == 0)
+		error = buffered_stream->target->close(buffered_stream->target);
 
 	return error;
 }
 
-static void proxy_stream_free(git_writestream *s)
+static void buffered_stream_free(git_writestream *s)
 {
-	struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
+	struct buffered_stream *buffered_stream = (struct buffered_stream *)s;
 
-	if (proxy_stream) {
-		git_buf_dispose(&proxy_stream->input);
-		git_buf_dispose(&proxy_stream->temp_buf);
-		git__free(proxy_stream);
+	if (buffered_stream) {
+		git_buf_dispose(&buffered_stream->input);
+		git_buf_dispose(&buffered_stream->temp_buf);
+		git__free(buffered_stream);
 	}
 }
 
-static int proxy_stream_init(
+int git_filter_buffered_stream_new(
 	git_writestream **out,
 	git_filter *filter,
+	int (*write_fn)(git_filter *, void **, git_buf *, const git_buf *, const git_filter_source *),
 	git_buf *temp_buf,
 	void **payload,
 	const git_filter_source *source,
 	git_writestream *target)
 {
-	struct proxy_stream *proxy_stream = git__calloc(1, sizeof(struct proxy_stream));
-	GIT_ERROR_CHECK_ALLOC(proxy_stream);
-
-	proxy_stream->parent.write = proxy_stream_write;
-	proxy_stream->parent.close = proxy_stream_close;
-	proxy_stream->parent.free = proxy_stream_free;
-	proxy_stream->filter = filter;
-	proxy_stream->payload = payload;
-	proxy_stream->source = source;
-	proxy_stream->target = target;
-	proxy_stream->output = temp_buf ? temp_buf : &proxy_stream->temp_buf;
+	struct buffered_stream *buffered_stream = git__calloc(1, sizeof(struct buffered_stream));
+	GIT_ERROR_CHECK_ALLOC(buffered_stream);
+
+	buffered_stream->parent.write = buffered_stream_write;
+	buffered_stream->parent.close = buffered_stream_close;
+	buffered_stream->parent.free = buffered_stream_free;
+	buffered_stream->filter = filter;
+	buffered_stream->write_fn = write_fn;
+	buffered_stream->output = temp_buf ? temp_buf : &buffered_stream->temp_buf;
+	buffered_stream->payload = payload;
+	buffered_stream->source = source;
+	buffered_stream->target = target;
 
 	if (temp_buf)
 		git_buf_clear(temp_buf);
 
-	*out = (git_writestream *)proxy_stream;
+	*out = (git_writestream *)buffered_stream;
 	return 0;
 }
 
+static int setup_stream(
+	git_writestream **out,
+	git_filter_entry *fe,
+	git_filter_list *filters,
+	git_writestream *last_stream)
+{
+#ifndef GIT_DEPRECATE_HARD
+	GIT_ASSERT(fe->filter->stream || fe->filter->apply);
+
+	/*
+	 * If necessary, create a stream that proxies the traditional
+	 * application.
+	 */
+	if (!fe->filter->stream) {
+		/* Create a stream that proxies the one-shot apply */
+		return git_filter_buffered_stream_new(out,
+			fe->filter, fe->filter->apply, filters->temp_buf,
+			&fe->payload, &filters->source, last_stream);
+	}
+#endif
+
+	GIT_ASSERT(fe->filter->stream);
+	return fe->filter->stream(out, fe->filter,
+		&fe->payload, &filters->source, last_stream);
+}
+
 static int stream_list_init(
 	git_writestream **out,
 	git_vector *streams,
@@ -957,22 +986,11 @@ static int stream_list_init(
 	for (i = 0; i < git_array_size(filters->filters); ++i) {
 		size_t filter_idx = (filters->source.mode == GIT_FILTER_TO_WORKTREE) ?
 			git_array_size(filters->filters) - 1 - i : i;
+
 		git_filter_entry *fe = git_array_get(filters->filters, filter_idx);
 		git_writestream *filter_stream;
 
-		GIT_ASSERT(fe->filter->stream || fe->filter->apply);
-
-		/* If necessary, create a stream that proxies the traditional
-		 * application.
-		 */
-		if (fe->filter->stream)
-			error = fe->filter->stream(&filter_stream, fe->filter,
-				&fe->payload, &filters->source, last_stream);
-		else
-			/* Create a stream that proxies the one-shot apply */
-			error = proxy_stream_init(&filter_stream, fe->filter,
-				filters->temp_buf, &fe->payload, &filters->source,
-				last_stream);
+		error = setup_stream(&filter_stream, fe, filters, last_stream);
 
 		if (error < 0)
 			goto out;
diff --git a/src/filter.h b/src/filter.h
index 55ed50e..2417912 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -11,6 +11,7 @@
 
 #include "attr_file.h"
 #include "git2/filter.h"
+#include "git2/sys/filter.h"
 
 /* Amount of file to examine for NUL byte when checking binary-ness */
 #define GIT_FILTER_BYTES_TO_CHECK_NUL 8000
@@ -51,4 +52,13 @@ extern int git_filter_list__convert_buf(
 extern git_filter *git_crlf_filter_new(void);
 extern git_filter *git_ident_filter_new(void);
 
+extern int git_filter_buffered_stream_new(
+	git_writestream **out,
+	git_filter *filter,
+	int (*write_fn)(git_filter *, void **, git_buf *, const git_buf *, const git_filter_source *),
+	git_buf *temp_buf,
+	void **payload,
+	const git_filter_source *source,
+	git_writestream *target);
+
 #endif
diff --git a/src/ident.c b/src/ident.c
index ae3ef1b..e5aab80 100644
--- a/src/ident.c
+++ b/src/ident.c
@@ -113,6 +113,17 @@ static int ident_apply(
 		return ident_remove_id(to, from);
 }
 
+static int ident_stream(
+	git_writestream **out,
+	git_filter *self,
+	void **payload,
+	const git_filter_source *src,
+	git_writestream *next)
+{
+	return git_filter_buffered_stream_new(out,
+		self, ident_apply, NULL, payload, src, next);
+}
+
 git_filter *git_ident_filter_new(void)
 {
 	git_filter *f = git__calloc(1, sizeof(git_filter));
@@ -122,7 +133,7 @@ git_filter *git_ident_filter_new(void)
 	f->version = GIT_FILTER_VERSION;
 	f->attributes = "+ident"; /* apply to files with ident attribute set */
 	f->shutdown = git_filter_free;
-	f->apply    = ident_apply;
+	f->stream   = ident_stream;
 
 	return f;
 }
diff --git a/tests/filter/custom_helpers.c b/tests/filter/custom_helpers.c
index 233ba32..ee3b635 100644
--- a/tests/filter/custom_helpers.c
+++ b/tests/filter/custom_helpers.c
@@ -37,6 +37,17 @@ int bitflip_filter_apply(
 	return 0;
 }
 
+static int bitflip_filter_stream(
+	git_writestream **out,
+	git_filter *self,
+	void **payload,
+	const git_filter_source *src,
+	git_writestream *next)
+{
+	return git_filter_buffered_stream_new(out,
+		self, bitflip_filter_apply, NULL, payload, src, next);
+}
+
 static void bitflip_filter_free(git_filter *f)
 {
 	git__free(f);
@@ -50,7 +61,7 @@ git_filter *create_bitflip_filter(void)
 	filter->version = GIT_FILTER_VERSION;
 	filter->attributes = "+bitflip";
 	filter->shutdown = bitflip_filter_free;
-	filter->apply = bitflip_filter_apply;
+	filter->stream = bitflip_filter_stream;
 
 	return filter;
 }
@@ -88,6 +99,17 @@ int reverse_filter_apply(
 	return 0;
 }
 
+static int reverse_filter_stream(
+	git_writestream **out,
+	git_filter *self,
+	void **payload,
+	const git_filter_source *src,
+	git_writestream *next)
+{
+	return git_filter_buffered_stream_new(out,
+		self, reverse_filter_apply, NULL, payload, src, next);
+}
+
 static void reverse_filter_free(git_filter *f)
 {
 	git__free(f);
@@ -101,7 +123,7 @@ git_filter *create_reverse_filter(const char *attrs)
 	filter->version = GIT_FILTER_VERSION;
 	filter->attributes = attrs;
 	filter->shutdown = reverse_filter_free;
-	filter->apply = reverse_filter_apply;
+	filter->stream = reverse_filter_stream;
 
 	return filter;
 }
diff --git a/tests/filter/wildcard.c b/tests/filter/wildcard.c
index ca1ba1a..0c9c13b 100644
--- a/tests/filter/wildcard.c
+++ b/tests/filter/wildcard.c
@@ -93,6 +93,17 @@ static int wildcard_filter_apply(
 	return GIT_PASSTHROUGH;
 }
 
+static int wildcard_filter_stream(
+	git_writestream **out,
+	git_filter *self,
+	void **payload,
+	const git_filter_source *src,
+	git_writestream *next)
+{
+	return git_filter_buffered_stream_new(out,
+		self, wildcard_filter_apply, NULL, payload, src, next);
+}
+
 static void wildcard_filter_cleanup(git_filter *self, void *payload)
 {
 	GIT_UNUSED(self);
@@ -112,7 +123,7 @@ static git_filter *create_wildcard_filter(void)
 	filter->version = GIT_FILTER_VERSION;
 	filter->attributes = "filter=*";
 	filter->check = wildcard_filter_check;
-	filter->apply = wildcard_filter_apply;
+	filter->stream = wildcard_filter_stream;
 	filter->cleanup = wildcard_filter_cleanup;
 	filter->shutdown = wildcard_filter_free;