Commit d0656ac80f33d54967425e782cbe71a0f1b963db

lhchavez 2020-06-27T12:15:26

Make the tests run cleanly under UndefinedBehaviorSanitizer This change makes the tests run cleanly under `-fsanitize=undefined,nullability` and comprises of: * Avoids some arithmetic with NULL pointers (which UBSan does not like). * Avoids an overflow in a shift, due to an uint8_t being implicitly converted to a signed 32-bit signed integer after being shifted by a 32-bit signed integer. * Avoids a unaligned read in libgit2. * Ignores unaligned reads in the SHA1 library, since it only happens on Intel processors, where it is _still_ undefined behavior, but the semantics are moderately well-understood. Of notable omission is `-fsanitize=integer`, since there are lots of warnings in zlib and the SHA1 library which probably don't make sense to fix and I could not figure out how to silence easily. libgit2 itself also has ~100s of warnings which are mostly innocuous (e.g. use of enum constants that only fit on an `uint32_t`, but there is no way to do that in a simple fashion because the data type chosen for enumerated types is implementation-defined), and investigating whether there are worrying warnings would need reducing the noise significantly.

diff --git a/src/apply.c b/src/apply.c
index f901663..b0be2d8 100644
--- a/src/apply.c
+++ b/src/apply.c
@@ -66,6 +66,9 @@ static int patch_image_init_fromstr(
 	if (git_pool_init(&out->pool, sizeof(git_diff_line)) < 0)
 		return -1;
 
+	if (!in_len)
+		return 0;
+
 	for (start = in; start < in + in_len; start = end) {
 		end = memchr(start, '\n', in_len - (start - in));
 
diff --git a/src/buffer.c b/src/buffer.c
index 3ee2775..c203650 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -365,7 +365,7 @@ int git_buf_encode_base85(git_buf *buf, const char *data, size_t len)
 
 		for (i = 24; i >= 0; i -= 8) {
 			uint8_t ch = *data++;
-			acc |= ch << i;
+			acc |= (uint32_t)ch << i;
 
 			if (--len == 0)
 				break;
@@ -759,7 +759,8 @@ int git_buf_join(
 	ssize_t offset_a = -1;
 
 	/* not safe to have str_b point internally to the buffer */
-	assert(str_b < buf->ptr || str_b >= buf->ptr + buf->size);
+	if (buf->size)
+		assert(str_b < buf->ptr || str_b >= buf->ptr + buf->size);
 
 	/* figure out if we need to insert a separator */
 	if (separator && strlen_a) {
@@ -769,7 +770,7 @@ int git_buf_join(
 	}
 
 	/* str_a could be part of the buffer */
-	if (str_a >= buf->ptr && str_a < buf->ptr + buf->size)
+	if (buf->size && str_a >= buf->ptr && str_a < buf->ptr + buf->size)
 		offset_a = str_a - buf->ptr;
 
 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, strlen_a, strlen_b);
diff --git a/src/index.c b/src/index.c
index 361288b..d5afc9b 100644
--- a/src/index.c
+++ b/src/index.c
@@ -2781,17 +2781,19 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
 	ondisk.flags = htons(entry->flags);
 
 	if (entry->flags & GIT_INDEX_ENTRY_EXTENDED) {
+		const size_t path_offset = offsetof(struct entry_long, path);
 		struct entry_long ondisk_ext;
 		memcpy(&ondisk_ext, &ondisk, sizeof(struct entry_short));
 		ondisk_ext.flags_extended = htons(entry->flags_extended &
 			GIT_INDEX_ENTRY_EXTENDED_FLAGS);
-		memcpy(mem, &ondisk_ext, offsetof(struct entry_long, path));
-		path = ((struct entry_long*)mem)->path;
-		disk_size -= offsetof(struct entry_long, path);
+		memcpy(mem, &ondisk_ext, path_offset);
+		path = (char *)mem + path_offset;
+		disk_size -= path_offset;
 	} else {
-		memcpy(mem, &ondisk, offsetof(struct entry_short, path));
-		path = ((struct entry_short*)mem)->path;
-		disk_size -= offsetof(struct entry_short, path);
+		const size_t path_offset = offsetof(struct entry_short, path);
+		memcpy(mem, &ondisk, path_offset);
+		path = (char *)mem + path_offset;
+		disk_size -= path_offset;
 	}
 
 	if (last) {