Commit 26846f4cdbaca7eac5f1030f1c2a25c8b2f8c274

Edward Thomson 2021-05-06T15:19:58

filter: remove git_buf sharing in `git_filter_list_apply_to_data` The API `git_filter_list_apply_to_data` shares data between its out and in parameters to avoid unnecessarily copying it when there are no filters to apply. However, it does so in a manner that is potentially confusing, leaving both `git_buf`s populated with data. This is risky for end-users who have to know how to deal with this. Instead, we remove this optimization - users who want to avoid unnecessary copies can use the longstanding streaming API or check the filter status before invoking the filters.

diff --git a/include/git2/filter.h b/include/git2/filter.h
index 641391b..45aa6c5 100644
--- a/include/git2/filter.h
+++ b/include/git2/filter.h
@@ -122,18 +122,6 @@ GIT_EXTERN(int) git_filter_list_contains(
 /**
  * Apply filter list to a data buffer.
  *
- * See `git2/buffer.h` for background on `git_buf` objects.
- *
- * If the `in` buffer holds data allocated by libgit2 (i.e. `in->asize` is
- * not zero), then it will be overwritten when applying the filters.  If
- * not, then it will be left untouched.
- *
- * If there are no filters to apply (or `filters` is NULL), then the `out`
- * buffer will reference the `in` buffer data (with `asize` set to zero)
- * instead of allocating data.  This keeps allocations to a minimum, but
- * it means you have to be careful about freeing the `in` data since `out`
- * may be pointing to it!
- *
  * @param out Buffer to store the result of the filtering
  * @param filters A loaded git_filter_list (or NULL)
  * @param in Buffer containing the data to filter
diff --git a/src/filter.c b/src/filter.c
index f2ad837..72a426e 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -720,31 +720,39 @@ static void buf_stream_init(struct buf_stream *writer, git_buf *target)
 	git_buf_clear(target);
 }
 
-int git_filter_list_apply_to_data(
-	git_buf *tgt, git_filter_list *filters, git_buf *src)
+static int git_filter_list_apply_to_buffer(
+	git_buf *out,
+	git_filter_list *filters,
+	const char *in,
+	size_t in_len)
 {
 	struct buf_stream writer;
 	int error;
 
-	if ((error = git_buf_sanitize(tgt)) < 0 ||
-	    (error = git_buf_sanitize(src)) < 0)
-	    return error;
-
-	if (!filters) {
-		git_buf_attach_notowned(tgt, src->ptr, src->size);
-		return 0;
-	}
+	if ((error = git_buf_sanitize(out)) < 0)
+		return error;
 
-	buf_stream_init(&writer, tgt);
+	buf_stream_init(&writer, out);
 
 	if ((error = git_filter_list_stream_buffer(filters,
-		src->ptr, src->size, &writer.parent)) < 0)
+		in, in_len, &writer.parent)) < 0)
 			return error;
 
 	GIT_ASSERT(writer.complete);
 	return error;
 }
 
+int git_filter_list_apply_to_data(
+	git_buf *tgt, git_filter_list *filters, git_buf *src)
+{
+	int error;
+
+	if ((error = git_buf_sanitize(src)) < 0)
+	    return error;
+
+	return git_filter_list_apply_to_buffer(tgt, filters, src->ptr, src->size);
+}
+
 int git_filter_list_apply_to_file(
 	git_buf *out,
 	git_filter_list *filters,