Commit ce49c7a8a902bd3a74a59a356dd11886e83d2e92

Russell Belfer 2012-03-02T15:09:40

Add filter tests and fix some bugs This adds some initial unit tests for file filtering and fixes some simple bugs in filter application.

diff --git a/src/blob.c b/src/blob.c
index e1f4a7f..b67f8af 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -25,6 +25,12 @@ size_t git_blob_rawsize(git_blob *blob)
 	return blob->odb_object->raw.len;
 }
 
+int git_blob__getbuf(git_buf *buffer, git_blob *blob)
+{
+	return git_buf_set(
+		buffer, blob->odb_object->raw.data, blob->odb_object->raw.len);
+}
+
 void git_blob__free(git_blob *blob)
 {
 	git_odb_object_free(blob->odb_object);
diff --git a/src/blob.h b/src/blob.h
index f810b50..0305e94 100644
--- a/src/blob.h
+++ b/src/blob.h
@@ -19,5 +19,6 @@ struct git_blob {
 
 void git_blob__free(git_blob *blob);
 int git_blob__parse(git_blob *blob, git_odb_object *obj);
+int git_blob__getbuf(git_buf *buffer, git_blob *blob);
 
 #endif
diff --git a/src/buffer.c b/src/buffer.c
index 68cc393..3098f6d 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -389,3 +389,10 @@ void git_buf_rtrim(git_buf *buf)
 
 	buf->ptr[buf->size] = '\0';
 }
+
+int git_buf_cmp(const git_buf *a, const git_buf *b)
+{
+	int result = memcmp(a->ptr, b->ptr, min(a->size, b->size));
+	return (result != 0) ? result :
+		(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
+}
diff --git a/src/buffer.h b/src/buffer.h
index 3e9cb17..3cdd794 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -118,4 +118,6 @@ GIT_INLINE(int) git_buf_rfind_next(git_buf *buf, char ch)
 /* Remove whitespace from the end of the buffer */
 void git_buf_rtrim(git_buf *buf);
 
+int git_buf_cmp(const git_buf *a, const git_buf *b);
+
 #endif
diff --git a/src/crlf.c b/src/crlf.c
index 404156d..f0ec7b7 100644
--- a/src/crlf.c
+++ b/src/crlf.c
@@ -104,52 +104,32 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con
 
 static int drop_crlf(git_buf *dest, const git_buf *source)
 {
-	size_t psize = source->size - 1;
-	size_t i = 0;
+	const char *scan = source->ptr, *next;
+	const char *scan_end = source->ptr + source->size;
 
-	/* Initial scan: see if we can reach the end of the document
-	 * without finding a single carriage return */
-	while (i < psize && source->ptr[i] != '\r')
-		i++;
-
-	/* Clean file? Tell the library to skip this filter */
-	if (i == psize)
-		return -1;
-
-	/* Main scan loop. Keep moving forward until we find a carriage
-	 * return, and then copy the whole chunk to the destination
-	 * buffer.
-	 *
-	 * Note that we only scan until `size - 1`, because we cannot drop a
-	 * carriage return if it's the last character in the file (what a weird
-	 * file, anyway)
+	/* Main scan loop.  Find the next carriage return and copy the
+	 * whole chunk up to that point to the destination buffer.
 	 */
-	while (i < psize) {
-		size_t org = i;
+	while ((next = memchr(scan, '\r', scan_end - scan)) != NULL) {
+		/* copy input up to \r */
+		if (next > scan)
+			git_buf_put(dest, scan, next - scan);
 
-		while (i < psize && source->ptr[i] != '\r')
-			i++;
-
-		if (i > org)
-			git_buf_put(dest, source->ptr + org, i - org);
-
-		/* We found a carriage return. Is the next character a newline?
-		 * If it is, we just keep moving. The newline will be copied
-		 * to the dest in the next chunk.
-		 *
-		 * If it's not a newline, we need to insert the carriage return
-		 * into the dest buffer, because we don't drop lone CRs.
-		 */
-		if (source->ptr[i + 1] != '\n') {
+		/* Do not drop \r unless it is followed by \n */
+		if (*(next + 1) != '\n')
 			git_buf_putc(dest, '\r');
-		}
-		
-		i++;
+
+		scan = next + 1;
 	}
 
-	/* Copy the last character in the file */
-	git_buf_putc(dest, source->ptr[psize]);
-	return 0;
+	/* If there was no \r, then tell the library to skip this filter */
+	if (scan == source->ptr)
+		return -1;
+
+	/* Copy remaining input into dest */
+	git_buf_put(dest, scan, scan_end - scan);
+
+	return git_buf_lasterror(dest);
 }
 
 static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
diff --git a/src/filter.c b/src/filter.c
index f93730a..f0ee1ad 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -12,7 +12,7 @@
 #include "repository.h"
 #include "git2/config.h"
 
-/* Fresh from Core Git. I wonder what we could use this for... */
+/* Tweaked from Core Git. I wonder what we could use this for... */
 void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
 {
 	size_t i;
@@ -27,20 +27,20 @@ void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
 
 			if (i + 1 < text->size && text->ptr[i + 1] == '\n')
 				stats->crlf++;
-
-			continue;
 		}
 
-		if (c == '\n') {
+		else if (c == '\n')
 			stats->lf++;
-			continue;
-		}
 
-		if (c == 127)
+		else if (c == 0x85)
+			/* Unicode CR+LF */
+			stats->crlf++;
+
+		else if (c == 127)
 			/* DEL */
 			stats->nonprintable++;
 
-		else if (c < 32) {
+		else if (c <= 0x1F || (c >= 0x80 && c <= 0x9F)) {
 			switch (c) {
 				/* BS, HT, ESC and FF */
 			case '\b': case '\t': case '\033': case '\014':
@@ -53,6 +53,7 @@ void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
 				stats->nonprintable++;
 			}
 		}
+
 		else
 			stats->printable++;
 	}
@@ -118,7 +119,7 @@ void git_filters_free(git_vector *filters)
 
 int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
 {
-	unsigned int src, dst, i;
+	unsigned int i, src;
 	git_buf *dbuffer[2];
 
 	dbuffer[0] = source;
@@ -138,28 +139,26 @@ int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
 
 	for (i = 0; i < filters->length; ++i) {
 		git_filter *filter = git_vector_get(filters, i);
-		dst = (src + 1) % 2;
+		unsigned int dst = 1 - src;
 
 		git_buf_clear(dbuffer[dst]);
 
-		/* Apply the filter, from dbuffer[src] to dbuffer[dst];
+		/* Apply the filter from dbuffer[src] to the other buffer;
 		 * if the filtering is canceled by the user mid-filter,
 		 * we skip to the next filter without changing the source
 		 * of the double buffering (so that the text goes through
 		 * cleanly).
 		 */
-		if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) {
-			src = (src + 1) % 2;
-		}
+		if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0)
+			src = dst;
 
 		if (git_buf_oom(dbuffer[dst]))
 			return GIT_ENOMEM;
 	}
 
 	/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
-	if (dst != 1) {
+	if (src != 1)
 		git_buf_swap(dest, source);
-	}
 
 	return GIT_SUCCESS;
 }
diff --git a/tests-clar/clar_helpers.c b/tests-clar/clar_helpers.c
index eea8bc8..1ef5a9b 100644
--- a/tests-clar/clar_helpers.c
+++ b/tests-clar/clar_helpers.c
@@ -27,3 +27,15 @@ void cl_git_mkfile(const char *filename, const char *content)
 
 	cl_must_pass(p_close(fd));
 }
+
+void cl_git_append2file(const char *filename, const char *new_content)
+{
+	int fd = p_open(filename, O_WRONLY | O_APPEND | O_CREAT);
+	cl_assert(fd != 0);
+	if (!new_content)
+		new_content = "\n";
+	cl_must_pass(p_write(fd, new_content, strlen(new_content)));
+	cl_must_pass(p_close(fd));
+	cl_must_pass(p_chmod(filename, 0644));
+}
+
diff --git a/tests-clar/clar_libgit2.h b/tests-clar/clar_libgit2.h
index 73ef668..fd5c16a 100644
--- a/tests-clar/clar_libgit2.h
+++ b/tests-clar/clar_libgit2.h
@@ -53,5 +53,6 @@ GIT_INLINE(void) cl_assert_strequal_internal(
 
 /* Write the contents of a buffer to disk */
 void cl_git_mkfile(const char *filename, const char *content);
+void cl_git_append2file(const char *filename, const char *new_content);
 
 #endif
diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c
new file mode 100644
index 0000000..0b87b2b
--- /dev/null
+++ b/tests-clar/object/blob/filter.c
@@ -0,0 +1,125 @@
+#include "clar_libgit2.h"
+#include "posix.h"
+#include "blob.h"
+#include "filter.h"
+
+static git_repository *g_repo = NULL;
+#define NUM_TEST_OBJECTS 6
+static git_oid g_oids[NUM_TEST_OBJECTS];
+static const char *g_raw[NUM_TEST_OBJECTS] = {
+	"",
+	"foo\nbar\n",
+	"foo\rbar\r",
+	"foo\r\nbar\r\n",
+	"foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r",
+	"123\n\000\001\002\003\004abc\255\254\253\r\n"
+};
+static int g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17 };
+static git_text_stats g_stats[NUM_TEST_OBJECTS] = {
+	{ 0, 0, 0, 0, 0, 0 },
+	{ 0, 0, 2, 0, 6, 0 },
+	{ 0, 2, 0, 0, 6, 0 },
+	{ 0, 2, 2, 2, 6, 0 },
+	{ 0, 4, 4, 1, 31, 0 },
+	{ 1, 1, 2, 1, 9, 5 }
+};
+static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
+	{ "", 0, 0 },
+	{ "foo\nbar\n", 0, 8 },
+	{ "foo\rbar\r", 0, 8 },
+	{ "foo\nbar\n", 0, 8 },
+	{ "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 },
+	{ "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }
+};
+
+void test_object_blob_filter__initialize(void)
+{
+	int i;
+
+	cl_fixture_sandbox("empty_standard_repo");
+	cl_git_pass(p_rename(
+		"empty_standard_repo/.gitted", "empty_standard_repo/.git"));
+	cl_git_pass(git_repository_open(&g_repo, "empty_standard_repo"));
+
+	for (i = 0; i < NUM_TEST_OBJECTS; i++) {
+		size_t len = (g_len[i] < 0) ? strlen(g_raw[i]) : (size_t)g_len[i];
+		g_len[i] = (int)len;
+
+		cl_git_pass(
+			git_blob_create_frombuffer(&g_oids[i], g_repo, g_raw[i], len)
+		);
+	}
+}
+
+void test_object_blob_filter__cleanup(void)
+{
+	git_repository_free(g_repo);
+	g_repo = NULL;
+	cl_fixture_cleanup("empty_standard_repo");
+}
+
+void test_object_blob_filter__unfiltered(void)
+{
+	int i;
+	git_blob *blob;
+
+	for (i = 0; i < NUM_TEST_OBJECTS; i++) {
+		cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
+		cl_assert((size_t)g_len[i] == git_blob_rawsize(blob));
+		cl_assert(memcmp(git_blob_rawcontent(blob), g_raw[i], g_len[i]) == 0);
+		git_blob_free(blob);
+	}
+}
+
+void test_object_blob_filter__stats(void)
+{
+	int i;
+	git_blob *blob;
+	git_buf buf = GIT_BUF_INIT;
+	git_text_stats stats;
+
+	for (i = 0; i < NUM_TEST_OBJECTS; i++) {
+		cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
+		cl_git_pass(git_blob__getbuf(&buf, blob));
+		git_text_gather_stats(&stats, &buf);
+		cl_assert(memcmp(&g_stats[i], &stats, sizeof(stats)) == 0);
+		git_blob_free(blob);
+	}
+
+	git_buf_free(&buf);
+}
+
+void test_object_blob_filter__to_odb(void)
+{
+	git_vector filters = GIT_VECTOR_INIT;
+	git_config *cfg;
+	int i;
+	git_blob *blob;
+	git_buf orig = GIT_BUF_INIT, out = GIT_BUF_INIT;
+
+	cl_git_pass(git_repository_config(&cfg, g_repo));
+	cl_assert(cfg);
+
+	git_attr_cache_flush(g_repo);
+	cl_git_append2file("empty_standard_repo/.gitattributes", "*.txt text\n");
+
+	cl_assert(git_filters_load(
+		&filters, g_repo, "filename.txt", GIT_FILTER_TO_ODB) > 0);
+	cl_assert(filters.length == 1);
+
+	for (i = 0; i < NUM_TEST_OBJECTS; i++) {
+		cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
+		cl_git_pass(git_blob__getbuf(&orig, blob));
+
+		cl_git_pass(git_filters_apply(&out, &orig, &filters));
+		cl_assert(git_buf_cmp(&out, &g_crlf_filtered[i]) == 0);
+
+		git_blob_free(blob);
+	}
+
+	git_filters_free(&filters);
+	git_buf_free(&orig);
+	git_buf_free(&out);
+	git_config_free(cfg);
+}
+