Commit 33b1d3fd62c3702ff6cc85c95dc01f372253851e

lhchavez 2022-04-05T13:10:33

[midx] Fix an undefined behavior (left-shift signed overflow) There was a missing check to ensure that the `off64_t` (which is a signed value) didn't overflow when parsing it from the midx file. This shouldn't have huge repercusions since the parsed value is immediately validated afterwards, but then again, there is no such thing as "benign" undefined behavior. This change makes all the bitwise arithmetic happen with unsigned types and is only casted to `off64_t` until the very end. Thanks to Taotao Gu for finding and reporting this!

diff --git a/fuzzers/corpora/midx/666a779eed16847c6930a71c0547a34e52db409e b/fuzzers/corpora/midx/666a779eed16847c6930a71c0547a34e52db409e
new file mode 100644
index 0000000..ed9e0d0
Binary files /dev/null and b/fuzzers/corpora/midx/666a779eed16847c6930a71c0547a34e52db409e differ
diff --git a/src/libgit2/midx.c b/src/libgit2/midx.c
index eb99e73..0092601 100644
--- a/src/libgit2/midx.c
+++ b/src/libgit2/midx.c
@@ -225,8 +225,13 @@ int git_midx_parse(
 	chunk_hdr = data + sizeof(struct git_midx_header);
 	last_chunk = NULL;
 	for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) {
-		chunk_offset = ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) << 32 |
-				((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 8))));
+		uint32_t chunk_id = ntohl(*((uint32_t *)(chunk_hdr + 0)));
+		uint64_t high_offset = ((uint64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) & 0xffffffffu;
+		uint64_t low_offset = ((uint64_t)ntohl(*((uint32_t *)(chunk_hdr + 8)))) & 0xffffffffu;
+
+		if (high_offset >= INT32_MAX)
+			return midx_error("chunk offset out of range");
+		chunk_offset = (off64_t)(high_offset << 32 | low_offset);
 		if (chunk_offset < last_chunk_offset)
 			return midx_error("chunks are non-monotonic");
 		if (chunk_offset >= trailer_offset)
@@ -235,7 +240,7 @@ int git_midx_parse(
 			last_chunk->length = (size_t)(chunk_offset - last_chunk_offset);
 		last_chunk_offset = chunk_offset;
 
-		switch (ntohl(*((uint32_t *)(chunk_hdr + 0)))) {
+		switch (chunk_id) {
 		case MIDX_PACKFILE_NAMES_ID:
 			chunk_packfile_names.offset = last_chunk_offset;
 			last_chunk = &chunk_packfile_names;