Commit 30c8e26074925fde4e2a0a6f5bb6a5a0398c277f

Edward Thomson 2015-12-14T13:53:26

Merge pull request #3521 from pks-t/blame-line-overflow Line count overflow in git_blame_hunk and git_blame__entry

diff --git a/include/git2/blame.h b/include/git2/blame.h
index 173e999..84bb7f9 100644
--- a/include/git2/blame.h
+++ b/include/git2/blame.h
@@ -74,8 +74,8 @@ typedef struct git_blame_options {
 	uint16_t min_match_characters;
 	git_oid newest_commit;
 	git_oid oldest_commit;
-	uint32_t min_line;
-	uint32_t max_line;
+	size_t min_line;
+	size_t max_line;
 } git_blame_options;
 
 #define GIT_BLAME_OPTIONS_VERSION 1
@@ -113,15 +113,15 @@ GIT_EXTERN(int) git_blame_init_options(
  *   root, or the commit specified in git_blame_options.oldest_commit)
  */
 typedef struct git_blame_hunk {
-	uint16_t lines_in_hunk;
+	size_t lines_in_hunk;
 
 	git_oid final_commit_id;
-	uint16_t final_start_line_number;
+	size_t final_start_line_number;
 	git_signature *final_signature;
 
 	git_oid orig_commit_id;
 	const char *orig_path;
-	uint16_t orig_start_line_number;
+	size_t orig_start_line_number;
 	git_signature *orig_signature;
 
 	char boundary;
@@ -156,7 +156,7 @@ GIT_EXTERN(const git_blame_hunk*) git_blame_get_hunk_byindex(
  */
 GIT_EXTERN(const git_blame_hunk*) git_blame_get_hunk_byline(
 		git_blame *blame,
-		uint32_t lineno);
+		size_t lineno);
 
 /**
  * Get the blame for a single file.
diff --git a/src/blame.c b/src/blame.c
index 08a90dc..2daf915 100644
--- a/src/blame.c
+++ b/src/blame.c
@@ -23,8 +23,8 @@ static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
 	git_blame_hunk *hunk = (git_blame_hunk*)entry;
 
 	size_t lineno = *(size_t*)key;
-	size_t lines_in_hunk = (size_t)hunk->lines_in_hunk;
-	size_t final_start_line_number = (size_t)hunk->final_start_line_number;
+	size_t lines_in_hunk = hunk->lines_in_hunk;
+	size_t final_start_line_number = hunk->final_start_line_number;
 
 	if (lineno < final_start_line_number)
 		return -1;
@@ -44,7 +44,7 @@ static int hunk_cmp(const void *_a, const void *_b)
 
 static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
 {
-	return line >= (size_t)(hunk->final_start_line_number + hunk->lines_in_hunk - 1);
+	return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1);
 }
 
 static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
@@ -53,9 +53,9 @@ static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
 }
 
 static git_blame_hunk* new_hunk(
-		uint16_t start,
-		uint16_t lines,
-		uint16_t orig_start,
+		size_t start,
+		size_t lines,
+		size_t orig_start,
 		const char *path)
 {
 	git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
@@ -166,9 +166,9 @@ const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t inde
 	return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
 }
 
-const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, uint32_t lineno)
+const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, size_t lineno)
 {
-	size_t i, new_lineno = (size_t)lineno;
+	size_t i, new_lineno = lineno;
 	assert(blame);
 
 	if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
@@ -223,8 +223,8 @@ static git_blame_hunk *split_hunk_in_vector(
 	}
 
 	new_line_count = hunk->lines_in_hunk - rel_line;
-	nh = new_hunk((uint16_t)(hunk->final_start_line_number+rel_line), (uint16_t)new_line_count,
-			(uint16_t)(hunk->orig_start_line_number+rel_line), hunk->orig_path);
+	nh = new_hunk(hunk->final_start_line_number + rel_line, new_line_count,
+			hunk->orig_start_line_number + rel_line, hunk->orig_path);
 
 	if (!nh)
 		return NULL;
@@ -233,7 +233,7 @@ static git_blame_hunk *split_hunk_in_vector(
 	git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
 
 	/* Adjust hunk that was split */
-	hunk->lines_in_hunk -= (uint16_t)new_line_count;
+	hunk->lines_in_hunk -= new_line_count;
 	git_vector_insert_sorted(vec, nh, NULL);
 	{
 		git_blame_hunk *ret = return_new ? nh : hunk;
@@ -442,7 +442,7 @@ static int buffer_line_cb(
 		} else {
 			/* Create a new buffer-blame hunk with this line */
 			shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
-			blame->current_hunk = new_hunk((uint16_t)blame->current_diff_line, 1, 0, blame->path);
+			blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
 			GITERR_CHECK_ALLOC(blame->current_hunk);
 
 			git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
diff --git a/src/blame.h b/src/blame.h
index 7e23de8..d8db8d5 100644
--- a/src/blame.h
+++ b/src/blame.h
@@ -31,10 +31,10 @@ typedef struct git_blame__entry {
 	/* the first line of this group in the final image;
 	 * internally all line numbers are 0 based.
 	 */
-	int lno;
+	size_t lno;
 
 	/* how many lines this group has */
-	int num_lines;
+	size_t num_lines;
 
 	/* the commit that introduced this group into the final image */
 	git_blame__origin *suspect;
@@ -51,7 +51,7 @@ typedef struct git_blame__entry {
 	/* the line number of the first line of this group in the
 	 * suspect's file; internally all line numbers are 0 based.
 	 */
-	int s_lno;
+	size_t s_lno;
 
 	/* how significant this entry is -- cached to avoid
 	 * scanning the lines over and over.
diff --git a/src/blame_git.c b/src/blame_git.c
index 67bae23..b8b5682 100644
--- a/src/blame_git.c
+++ b/src/blame_git.c
@@ -93,18 +93,25 @@ static bool same_suspect(git_blame__origin *a, git_blame__origin *b)
 }
 
 /* find the line number of the last line the target is suspected for */
-static int find_last_in_target(git_blame *blame, git_blame__origin *target)
+static bool find_last_in_target(size_t *out, git_blame *blame, git_blame__origin *target)
 {
 	git_blame__entry *e;
-	int last_in_target = -1;
+	size_t last_in_target = 0;
+	bool found = false;
+
+	*out = 0;
 
 	for (e=blame->ent; e; e=e->next) {
 		if (e->guilty || !same_suspect(e->suspect, target))
 			continue;
-		if (last_in_target < e->s_lno + e->num_lines)
+		if (last_in_target < e->s_lno + e->num_lines) {
+			found = true;
 			last_in_target = e->s_lno + e->num_lines;
+		}
 	}
-	return last_in_target;
+
+	*out = last_in_target;
+	return found;
 }
 
 /*
@@ -122,9 +129,9 @@ static int find_last_in_target(git_blame *blame, git_blame__origin *target)
  * to be blamed for the parent, and after that portion.
  */
 static void split_overlap(git_blame__entry *split, git_blame__entry *e,
-		int tlno, int plno, int same, git_blame__origin *parent)
+		size_t tlno, size_t plno, size_t same, git_blame__origin *parent)
 {
-	int chunk_end_lno;
+	size_t chunk_end_lno;
 
 	if (e->s_lno < tlno) {
 		/* there is a pre-chunk part not blamed on the parent */
@@ -265,9 +272,9 @@ static void decref_split(git_blame__entry *split)
 static void blame_overlap(
 		git_blame *blame,
 		git_blame__entry *e,
-		int tlno,
-		int plno,
-		int same,
+		size_t tlno,
+		size_t plno,
+		size_t same,
 		git_blame__origin *parent)
 {
 	git_blame__entry split[3] = {{0}};
@@ -285,9 +292,9 @@ static void blame_overlap(
  */
 static void blame_chunk(
 		git_blame *blame,
-		int tlno,
-		int plno,
-		int same,
+		size_t tlno,
+		size_t plno,
+		size_t same,
 		git_blame__origin *target,
 		git_blame__origin *parent)
 {
@@ -314,7 +321,7 @@ static int my_emit(
 	blame_chunk(d->blame, d->tlno, d->plno, start_b, d->target, d->parent);
 	d->plno = start_a + count_a;
 	d->tlno = start_b + count_b;
-	
+
 	return 0;
 }
 
@@ -376,12 +383,11 @@ static int pass_blame_to_parent(
 		git_blame__origin *target,
 		git_blame__origin *parent)
 {
-	int last_in_target;
+	size_t last_in_target;
 	mmfile_t file_p, file_o;
 	blame_chunk_cb_data d = { blame, target, parent, 0, 0 };
 
-	last_in_target = find_last_in_target(blame, target);
-	if (last_in_target < 0)
+	if (!find_last_in_target(&last_in_target, blame, target))
 		return 1; /* nothing remains for this target */
 
 	fill_origin_blob(parent, &file_p);
diff --git a/tests/blame/blame_helpers.c b/tests/blame/blame_helpers.c
index b305ba1..61e8735 100644
--- a/tests/blame/blame_helpers.c
+++ b/tests/blame/blame_helpers.c
@@ -4,7 +4,7 @@ void hunk_message(size_t idx, const git_blame_hunk *hunk, const char *fmt, ...)
 {
 	va_list arglist;
 
-	printf("Hunk %"PRIuZ" (line %d +%d): ", idx,
+	printf("Hunk %"PRIuZ" (line %"PRIuZ" +%"PRIuZ"): ", idx,
 			hunk->final_start_line_number, hunk->lines_in_hunk-1);
 
 	va_start(arglist, fmt);
@@ -15,7 +15,7 @@ void hunk_message(size_t idx, const git_blame_hunk *hunk, const char *fmt, ...)
 }
 
 void check_blame_hunk_index(git_repository *repo, git_blame *blame, int idx,
-		int start_line, int len, char boundary, const char *commit_id, const char *orig_path)
+		size_t start_line, size_t len, char boundary, const char *commit_id, const char *orig_path)
 {
 	char expected[GIT_OID_HEXSZ+1] = {0}, actual[GIT_OID_HEXSZ+1] = {0};
 	const git_blame_hunk *hunk = git_blame_get_hunk_byindex(blame, idx);
diff --git a/tests/blame/blame_helpers.h b/tests/blame/blame_helpers.h
index 94321a5..fd5a35d 100644
--- a/tests/blame/blame_helpers.h
+++ b/tests/blame/blame_helpers.h
@@ -7,10 +7,8 @@ void check_blame_hunk_index(
 		git_repository *repo,
 		git_blame *blame,
 		int idx,
-		int start_line,
-		int len,
+		size_t start_line,
+		size_t len,
 		char boundary,
 		const char *commit_id,
 		const char *orig_path);
-
-
diff --git a/tests/blame/simple.c b/tests/blame/simple.c
index 83e5e05..30b7816 100644
--- a/tests/blame/simple.c
+++ b/tests/blame/simple.c
@@ -281,6 +281,18 @@ void test_blame_simple__can_restrict_lines_both(void)
 	check_blame_hunk_index(g_repo, g_blame, 2,  6, 2, 0, "63d671eb", "b.txt");
 }
 
+void test_blame_simple__can_blame_huge_file(void)
+{
+	git_blame_options opts = GIT_BLAME_OPTIONS_INIT;
+
+	cl_git_pass(git_repository_open(&g_repo, cl_fixture("blametest.git")));
+
+	cl_git_pass(git_blame_file(&g_blame, g_repo, "huge.txt", &opts));
+	cl_assert_equal_i(2, git_blame_get_hunk_count(g_blame));
+	check_blame_hunk_index(g_repo, g_blame, 0, 1,     65536, 0, "4eecfea", "huge.txt");
+	check_blame_hunk_index(g_repo, g_blame, 1, 65537, 1,     0, "6653ff4", "huge.txt");
+}
+
 /*
  * $ git blame -n branch_file.txt be3563a..HEAD
  *    orig line no                          final line no
diff --git a/tests/resources/blametest.git/objects/37/681a80ca21064efd5c3bf2ef41eb3d05a1428b b/tests/resources/blametest.git/objects/37/681a80ca21064efd5c3bf2ef41eb3d05a1428b
new file mode 100644
index 0000000..a6ca0fb
Binary files /dev/null and b/tests/resources/blametest.git/objects/37/681a80ca21064efd5c3bf2ef41eb3d05a1428b differ
diff --git a/tests/resources/blametest.git/objects/4e/ecfea484f8005d101e547f6bfb07c99e2b114e b/tests/resources/blametest.git/objects/4e/ecfea484f8005d101e547f6bfb07c99e2b114e
new file mode 100644
index 0000000..79e0ada
Binary files /dev/null and b/tests/resources/blametest.git/objects/4e/ecfea484f8005d101e547f6bfb07c99e2b114e differ
diff --git a/tests/resources/blametest.git/objects/5a/572e2e94825f54b95417eacaa089d560c5a5e9 b/tests/resources/blametest.git/objects/5a/572e2e94825f54b95417eacaa089d560c5a5e9
new file mode 100644
index 0000000..348beab
Binary files /dev/null and b/tests/resources/blametest.git/objects/5a/572e2e94825f54b95417eacaa089d560c5a5e9 differ
diff --git a/tests/resources/blametest.git/objects/66/53ff42313eb5c82806f145391b18a9699800c7 b/tests/resources/blametest.git/objects/66/53ff42313eb5c82806f145391b18a9699800c7
new file mode 100644
index 0000000..1f11409
Binary files /dev/null and b/tests/resources/blametest.git/objects/66/53ff42313eb5c82806f145391b18a9699800c7 differ
diff --git a/tests/resources/blametest.git/objects/ad/9cb4eac23df2fe5e1264287a5872ea2a1ff8b2 b/tests/resources/blametest.git/objects/ad/9cb4eac23df2fe5e1264287a5872ea2a1ff8b2
new file mode 100644
index 0000000..077e658
Binary files /dev/null and b/tests/resources/blametest.git/objects/ad/9cb4eac23df2fe5e1264287a5872ea2a1ff8b2 differ
diff --git a/tests/resources/blametest.git/objects/de/9fe35f9906e1994e083cc59c87232bf418795b b/tests/resources/blametest.git/objects/de/9fe35f9906e1994e083cc59c87232bf418795b
new file mode 100644
index 0000000..11ec90d
Binary files /dev/null and b/tests/resources/blametest.git/objects/de/9fe35f9906e1994e083cc59c87232bf418795b differ
diff --git a/tests/resources/blametest.git/refs/heads/master b/tests/resources/blametest.git/refs/heads/master
index b763025..d1bc4ca 100644
--- a/tests/resources/blametest.git/refs/heads/master
+++ b/tests/resources/blametest.git/refs/heads/master
@@ -1 +1 @@
-bc7c5ac2bafe828a68e9d1d460343718d6fbe136
+6653ff42313eb5c82806f145391b18a9699800c7