Commit 0f4c61754bd123b3bee997b397187c9b813ca3e4

Vicent Marti 2012-08-28T22:19:08

Add bounds checking to UTF-8 conversion

diff --git a/src/fileops.c b/src/fileops.c
index 6adccdd..95eacb5 100644
--- a/src/fileops.c
+++ b/src/fileops.c
@@ -56,7 +56,7 @@ int git_futils_creat_locked(const char *path, const mode_t mode)
 #ifdef GIT_WIN32
 	wchar_t buf[GIT_WIN_PATH];
 
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	fd = _wopen(buf, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL, mode);
 #else
 	fd = open(path, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL, mode);
@@ -381,7 +381,7 @@ static int win32_expand_path(struct win32_path *s_root, const wchar_t *templ)
 
 static int win32_find_file(git_buf *path, const struct win32_path *root, const char *filename)
 {
-	size_t len;
+	size_t len, alloc_len;
 	wchar_t *file_utf16 = NULL;
 	char file_utf8[GIT_PATH_MAX];
 
@@ -389,7 +389,8 @@ static int win32_find_file(git_buf *path, const struct win32_path *root, const c
 		return GIT_ENOTFOUND;
 
 	/* allocate space for wchar_t path to file */
-	file_utf16 = git__calloc(root->len + len + 2, sizeof(wchar_t));
+	alloc_len = root->len + len + 2;
+	file_utf16 = git__calloc(alloc_len, sizeof(wchar_t));
 	GITERR_CHECK_ALLOC(file_utf16);
 
 	/* append root + '\\' + filename as wchar_t */
@@ -398,7 +399,7 @@ static int win32_find_file(git_buf *path, const struct win32_path *root, const c
 	if (*filename == '/' || *filename == '\\')
 		filename++;
 
-	git__utf8_to_16(file_utf16 + root->len - 1, filename);
+	git__utf8_to_16(file_utf16 + root->len - 1, alloc_len, filename);
 
 	/* check access */
 	if (_waccess(file_utf16, F_OK) < 0) {
diff --git a/src/path.c b/src/path.c
index 1518885..09556bd 100644
--- a/src/path.c
+++ b/src/path.c
@@ -432,14 +432,14 @@ bool git_path_is_empty_dir(const char *path)
 {
 	git_buf pathbuf = GIT_BUF_INIT;
 	HANDLE hFind = INVALID_HANDLE_VALUE;
-	wchar_t *wbuf;
+	wchar_t wbuf[GIT_WIN_PATH];
 	WIN32_FIND_DATAW ffd;
 	bool retval = true;
 
 	if (!git_path_isdir(path)) return false;
 
 	git_buf_printf(&pathbuf, "%s\\*", path);
-	wbuf = gitwin_to_utf16(git_buf_cstr(&pathbuf));
+	git__utf8_to_16(wbuf, GIT_WIN_PATH, git_buf_cstr(&pathbuf));
 
 	hFind = FindFirstFileW(wbuf, &ffd);
 	if (INVALID_HANDLE_VALUE == hFind) {
@@ -455,7 +455,6 @@ bool git_path_is_empty_dir(const char *path)
 
 	FindClose(hFind);
 	git_buf_free(&pathbuf);
-	git__free(wbuf);
 	return retval;
 }
 
diff --git a/src/win32/dir.c b/src/win32/dir.c
index 8b4f896..5cb1082 100644
--- a/src/win32/dir.c
+++ b/src/win32/dir.c
@@ -40,7 +40,7 @@ git__DIR *git__opendir(const char *dir)
 	if (!new->dir)
 		goto fail;
 
-	git__utf8_to_16(filter_w, filter);
+	git__utf8_to_16(filter_w, GIT_WIN_PATH, filter);
 	new->h = FindFirstFileW(filter_w, &new->f);
 
 	if (new->h == INVALID_HANDLE_VALUE) {
@@ -116,7 +116,7 @@ void git__rewinddir(git__DIR *d)
 	if (!init_filter(filter, sizeof(filter), d->dir))
 		return;
 
-	git__utf8_to_16(filter_w, filter);
+	git__utf8_to_16(filter_w, GIT_WIN_PATH, filter);
 	d->h = FindFirstFileW(filter_w, &d->f);
 
 	if (d->h == INVALID_HANDLE_VALUE)
diff --git a/src/win32/posix.h b/src/win32/posix.h
index ddab88a..da46cf5 100644
--- a/src/win32/posix.h
+++ b/src/win32/posix.h
@@ -23,7 +23,7 @@ GIT_INLINE(int) p_mkdir(const char *path, mode_t mode)
 {
 	wchar_t buf[GIT_WIN_PATH];
 	GIT_UNUSED(mode);
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	return _wmkdir(buf);
 }
 
diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c
index 682a40a..649fe9b 100644
--- a/src/win32/posix_w32.c
+++ b/src/win32/posix_w32.c
@@ -16,7 +16,7 @@
 int p_unlink(const char *path)
 {
 	wchar_t buf[GIT_WIN_PATH];
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	_wchmod(buf, 0666);
 	return _wunlink(buf);
 }
@@ -58,7 +58,7 @@ static int do_lstat(const char *file_name, struct stat *buf)
 	wchar_t fbuf[GIT_WIN_PATH];
 	DWORD last_error;
 
-	git__utf8_to_16(fbuf, file_name);
+	git__utf8_to_16(fbuf, GIT_WIN_PATH, file_name);
 
 	if (GetFileAttributesExW(fbuf, GetFileExInfoStandard, &fdata)) {
 		int fMode = S_IREAD;
@@ -157,7 +157,7 @@ int p_readlink(const char *link, char *target, size_t target_len)
 		}
 	}
 
-	git__utf8_to_16(link_w, link);
+	git__utf8_to_16(link_w, GIT_WIN_PATH, link);
 
 	hFile = CreateFileW(link_w,			// file to open
 			GENERIC_READ,			// open for reading
@@ -226,7 +226,7 @@ int p_open(const char *path, int flags, ...)
 	wchar_t buf[GIT_WIN_PATH];
 	mode_t mode = 0;
 
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 
 	if (flags & O_CREAT) {
 		va_list arg_list;
@@ -242,7 +242,7 @@ int p_open(const char *path, int flags, ...)
 int p_creat(const char *path, mode_t mode)
 {
 	wchar_t buf[GIT_WIN_PATH];
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | _O_BINARY, mode);
 }
 
@@ -274,28 +274,28 @@ int p_stat(const char* path, struct stat* buf)
 int p_chdir(const char* path)
 {
 	wchar_t buf[GIT_WIN_PATH];
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	return _wchdir(buf);
 }
 
 int p_chmod(const char* path, mode_t mode)
 {
 	wchar_t buf[GIT_WIN_PATH];
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	return _wchmod(buf, mode);
 }
 
 int p_rmdir(const char* path)
 {
 	wchar_t buf[GIT_WIN_PATH];
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	return _wrmdir(buf);
 }
 
 int p_hide_directory__w32(const char *path)
 {
 	wchar_t buf[GIT_WIN_PATH];
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	return (SetFileAttributesW(buf, FILE_ATTRIBUTE_HIDDEN) != 0) ? 0 : -1;
 }
 
@@ -305,7 +305,7 @@ char *p_realpath(const char *orig_path, char *buffer)
 	wchar_t orig_path_w[GIT_WIN_PATH];
 	wchar_t buffer_w[GIT_WIN_PATH];
 
-	git__utf8_to_16(orig_path_w, orig_path);
+	git__utf8_to_16(orig_path_w, GIT_WIN_PATH, orig_path);
 	ret = GetFullPathNameW(orig_path_w, GIT_WIN_PATH, buffer_w, NULL);
 
 	/* According to MSDN, a return value equals to zero means a failure. */
@@ -399,7 +399,7 @@ int p_setenv(const char* name, const char* value, int overwrite)
 int p_access(const char* path, mode_t mode)
 {
 	wchar_t buf[GIT_WIN_PATH];
-	git__utf8_to_16(buf, path);
+	git__utf8_to_16(buf, GIT_WIN_PATH, path);
 	return _waccess(buf, mode);
 }
 
@@ -408,8 +408,8 @@ int p_rename(const char *from, const char *to)
 	wchar_t wfrom[GIT_WIN_PATH];
 	wchar_t wto[GIT_WIN_PATH];
 
-	git__utf8_to_16(wfrom, from);
-	git__utf8_to_16(wto, to);
+	git__utf8_to_16(wfrom, GIT_WIN_PATH, from);
+	git__utf8_to_16(wto, GIT_WIN_PATH, to);
 	return MoveFileExW(wfrom, wto, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED) ? 0 : -1;
 }
 
diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c
index a98e814..88a8414 100644
--- a/src/win32/utf-conv.c
+++ b/src/win32/utf-conv.c
@@ -11,83 +11,52 @@
 #define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0)
 #define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00)
 
-void git__utf8_to_16(wchar_t *dest, const char *src)
+#if 0
+void git__utf8_to_16(wchar_t *dest, size_t length, const char *src)
 {
 	wchar_t *pDest = dest;
 	uint32_t ch;
 	const uint8_t* pSrc = (uint8_t*) src;
-	const uint8_t *pSrcLimit = pSrc + strlen(src);
 
-	assert(dest && src);
+	assert(dest && src && length);
 
-	if ((pSrcLimit - pSrc) >= 4) {
-		pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
+	length--;
 
-		/* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
-		do {
-			ch = *pSrc++;
-			if(ch < 0xc0) {
-				/*
-				 * ASCII, or a trail byte in lead position which is treated like
-				 * a single-byte sequence for better character boundary
-				 * resynchronization after illegal sequences.
-				 */
-				*pDest++=(wchar_t)ch;
-			} else if(ch < 0xe0) { /* U+0080..U+07FF */
-				/* 0x3080 = (0xc0 << 6) + 0x80 */
-				*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
-			} else if(ch < 0xf0) { /* U+0800..U+FFFF */
-				/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
-				/* 0x2080 = (0x80 << 6) + 0x80 */
-				ch = (ch << 12) + (*pSrc++ << 6);
-				*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
-			} else /* f0..f4 */ { /* U+10000..U+10FFFF */
-				/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
-				ch = (ch << 18) + (*pSrc++ << 12);
-				ch += *pSrc++ << 6;
-				ch += *pSrc++ - 0x3c82080;
-				*(pDest++) = U16_LEAD(ch);
-				*(pDest++) = U16_TRAIL(ch);
-			}
-		} while(pSrc < pSrcLimit);
-
-		pSrcLimit += 3; /* restore original pSrcLimit */
-	}
-
-	while(pSrc < pSrcLimit) {
+	while(*pSrc && length > 0) {
 		ch = *pSrc++;
+		length--;
+
 		if(ch < 0xc0) {
 			/*
 			 * ASCII, or a trail byte in lead position which is treated like
 			 * a single-byte sequence for better character boundary
 			 * resynchronization after illegal sequences.
 			 */
-			*pDest++=(wchar_t)ch;
+			*pDest++ = (wchar_t)ch;
 			continue;
 		} else if(ch < 0xe0) { /* U+0080..U+07FF */
-			if(pSrc < pSrcLimit) {
+			if (pSrc[0]) {
 				/* 0x3080 = (0xc0 << 6) + 0x80 */
 				*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
 				continue;
 			}
 		} else if(ch < 0xf0) { /* U+0800..U+FFFF */
-			if((pSrcLimit - pSrc) >= 2) {
+			if (pSrc[0] && pSrc[1]) {
 				/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
 				/* 0x2080 = (0x80 << 6) + 0x80 */
 				ch = (ch << 12) + (*pSrc++ << 6);
 				*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
-				pSrc += 3;
 				continue;
 			}
 		} else /* f0..f4 */ { /* U+10000..U+10FFFF */
-			if((pSrcLimit - pSrc) >= 3) {
+			if (length >= 1 && pSrc[0] && pSrc[1] && pSrc[2]) {
 				/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
 				ch = (ch << 18) + (*pSrc++ << 12);
 				ch += *pSrc++ << 6;
 				ch += *pSrc++ - 0x3c82080;
 				*(pDest++) = U16_LEAD(ch);
 				*(pDest++) = U16_TRAIL(ch);
-				pSrc += 4;
+				length--; /* two bytes for this character */
 				continue;
 			}
 		}
@@ -99,6 +68,12 @@ void git__utf8_to_16(wchar_t *dest, const char *src)
 
 	*pDest++ = 0x0;
 }
+#endif
+
+void git__utf8_to_16(wchar_t *dest, size_t length, const char *src)
+{
+	MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, length);
+}
 
 void git__utf16_to_8(char *out, const wchar_t *input)
 {
diff --git a/src/win32/utf-conv.h b/src/win32/utf-conv.h
index c0cfffe..3bd1549 100644
--- a/src/win32/utf-conv.h
+++ b/src/win32/utf-conv.h
@@ -12,7 +12,7 @@
 
 #define GIT_WIN_PATH (260 + 1)
 
-void git__utf8_to_16(wchar_t *dest, const char *src);
+void git__utf8_to_16(wchar_t *dest, size_t length, const char *src);
 void git__utf16_to_8(char *dest, const wchar_t *src);
 
 #endif
diff --git a/tests-clar/clar_helpers.c b/tests-clar/clar_helpers.c
index 125f785..fa48ac8 100644
--- a/tests-clar/clar_helpers.c
+++ b/tests-clar/clar_helpers.c
@@ -60,7 +60,7 @@ char *cl_getenv(const char *name)
 	wchar_t *value_utf16;
 	char *value_utf8;
 
-	git__utf8_to_16(name_utf16, name);
+	git__utf8_to_16(name_utf16, GIT_WIN_PATH, name);
 	alloc_len = GetEnvironmentVariableW(name_utf16, NULL, 0);
 	if (alloc_len <= 0)
 		return NULL;
@@ -83,10 +83,10 @@ int cl_setenv(const char *name, const char *value)
 	wchar_t name_utf16[GIT_WIN_PATH];
 	wchar_t value_utf16[GIT_WIN_PATH];
 
-	git__utf8_to_16(name_utf16, name);
+	git__utf8_to_16(name_utf16, GIT_WIN_PATH, name);
 
 	if (value != NULL)
-		git__utf8_to_16(value_utf16, value);
+		git__utf8_to_16(value_utf16, GIT_WIN_PATH, value);
 
 	cl_assert(SetEnvironmentVariableW(name_utf16, value ? value_utf16 : NULL));
 	return 0;