Commit 9e35d7fd6ee0b6dc0008982ab84668fbb2478939

Russell Belfer 2012-05-24T13:44:24

Fix bugs in UTF-8 <-> UTF-16 conversion The function to convert UTF-16 to UTF-8 was only allocating a buffer of wcslen(utf16str) bytes for the UTF-8 string, but that is not sufficient if you have multibyte characters, and so when those occured, the conversion was failing. This updates the conversion functions to use the Win APIs to calculate the correct buffer lengths. Also fixes a comparison in the unit tests that would fail if you did not have a particular environment variable set.

diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c
index 76f1e42..0a705c0 100644
--- a/src/win32/utf-conv.c
+++ b/src/win32/utf-conv.c
@@ -32,19 +32,16 @@ void gitwin_set_utf8(void)
 wchar_t* gitwin_to_utf16(const char* str)
 {
 	wchar_t* ret;
-	size_t cb;
+	int cb;
 
 	if (!str)
 		return NULL;
 
-	cb = strlen(str) * sizeof(wchar_t);
+	cb = MultiByteToWideChar(_active_codepage, 0, str, -1, NULL, 0);
 	if (cb == 0)
 		return (wchar_t *)git__calloc(1, sizeof(wchar_t));
 
-	/* Add space for null terminator */
-	cb += sizeof(wchar_t);
-
-	ret = (wchar_t *)git__malloc(cb);
+	ret = (wchar_t *)git__malloc(cb * sizeof(wchar_t));
 	if (!ret)
 		return NULL;
 
@@ -59,7 +56,8 @@ wchar_t* gitwin_to_utf16(const char* str)
 
 int gitwin_append_utf16(wchar_t *buffer, const char *str, size_t len)
 {
-	int result = MultiByteToWideChar(_active_codepage, 0, str, -1, buffer, (int)len);
+	int result = MultiByteToWideChar(
+		_active_codepage, 0, str, -1, buffer, (int)len);
 	if (result == 0)
 		giterr_set(GITERR_OS, "Could not convert string to UTF-16");
 	return result;
@@ -68,23 +66,22 @@ int gitwin_append_utf16(wchar_t *buffer, const char *str, size_t len)
 char* gitwin_from_utf16(const wchar_t* str)
 {
 	char* ret;
-	size_t cb;
+	int cb;
 
 	if (!str)
 		return NULL;
 
-	cb = wcslen(str) * sizeof(char);
+	cb = WideCharToMultiByte(_active_codepage, 0, str, -1, NULL, 0, NULL, NULL);
 	if (cb == 0)
 		return (char *)git__calloc(1, sizeof(char));
 
-	/* Add space for null terminator */
-	cb += sizeof(char);
-
 	ret = (char*)git__malloc(cb);
 	if (!ret)
 		return NULL;
 
-	if (WideCharToMultiByte(_active_codepage, 0, str, -1, ret, (int)cb, NULL, NULL) == 0) {
+	if (WideCharToMultiByte(
+		_active_codepage, 0, str, -1, ret, (int)cb, NULL, NULL) == 0)
+	{
 		giterr_set(GITERR_OS, "Could not convert string to UTF-8");
 		git__free(ret);
 		ret = NULL;
diff --git a/tests-clar/core/env.c b/tests-clar/core/env.c
index bd1a942..abe7bf8 100644
--- a/tests-clar/core/env.c
+++ b/tests-clar/core/env.c
@@ -15,12 +15,16 @@ static char *cl_getenv(const char *name)
 
 	cl_assert(name_utf16);
 	alloc_len = GetEnvironmentVariableW(name_utf16, NULL, 0);
-	if (alloc_len < 0)
+	if (alloc_len <= 0)
 		return NULL;
+
 	cl_assert(value_utf16 = git__calloc(alloc_len, sizeof(wchar_t)));
+
 	value_len = GetEnvironmentVariableW(name_utf16, value_utf16, alloc_len);
 	cl_assert_equal_i(value_len, alloc_len - 1);
+
 	cl_assert(value_utf8 = gitwin_from_utf16(value_utf16));
+
 	git__free(value_utf16);
 
 	return value_utf8;