Commit cd39e4e2f3cc27a2976c9a8e0058f9e32d1f1c8f

Edward Thomson 2015-04-29T18:12:51

git_buf_put_w: introduce utf16->utf8 conversion

diff --git a/src/path.c b/src/path.c
index 6c9852b..5b9fb93 100644
--- a/src/path.c
+++ b/src/path.c
@@ -10,6 +10,7 @@
 #include "repository.h"
 #ifdef GIT_WIN32
 #include "win32/posix.h"
+#include "win32/buffer.h"
 #include "win32/w32_util.h"
 #else
 #include <dirent.h>
@@ -1134,7 +1135,7 @@ int git_path_diriter_init(
 	return 0;
 }
 
-static int diriter_update_utf16(git_path_diriter *diriter)
+static int diriter_update_paths(git_path_diriter *diriter)
 {
 	size_t filename_len, path_len;
 
@@ -1156,29 +1157,9 @@ static int diriter_update_utf16(git_path_diriter *diriter)
 		diriter->current.cFileName, filename_len * sizeof(wchar_t));
 	diriter->path[path_len-1] = L'\0';
 
-	return 0;
-}
-
-static int diriter_update_utf8(git_path_diriter *diriter)
-{
-	git_win32_utf8_path filename_utf8;
-	wchar_t *filename_utf16;
-	int filename_utf8_len;
-
-	/* Don't copy the full UTF-16 path into the UTF-8 path, only do the
-	 * UTF16 -> UTF8 conversion of the filename portion. This prevents us
-	 * from trying to encode the parent path differently, which would be
-	 * bad since we do arithmetic based on the already computed parent len.
-	 */
-
-	filename_utf16 = &diriter->path[diriter->parent_len + 1];
-
-	if ((filename_utf8_len = git_win32_path_to_utf8(filename_utf8, filename_utf16)) < 0)
-		return filename_utf8_len;
-
 	git_buf_truncate(&diriter->path_utf8, diriter->parent_utf8_len);
 	git_buf_putc(&diriter->path_utf8, '/');
-	git_buf_put(&diriter->path_utf8, filename_utf8, (size_t)filename_utf8_len);
+	git_buf_put_w(&diriter->path_utf8, diriter->current.cFileName, filename_len);
 
 	if (git_buf_oom(&diriter->path_utf8))
 		return -1;
@@ -1200,7 +1181,7 @@ int git_path_diriter_next(git_path_diriter *diriter)
 			return GIT_ITEROVER;
 	} while (skip_dot && git_path_is_dot_or_dotdotW(diriter->current.cFileName));
 
-	if (diriter_update_utf16(diriter) < 0 || diriter_update_utf8(diriter) < 0)
+	if (diriter_update_paths(diriter) < 0)
 		return -1;
 
 	return 0;
diff --git a/src/win32/buffer.c b/src/win32/buffer.c
new file mode 100644
index 0000000..7495018
--- /dev/null
+++ b/src/win32/buffer.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "common.h"
+#include "buffer.h"
+#include "../buffer.h"
+#include "utf-conv.h"
+
+GIT_INLINE(int) handle_wc_error(void)
+{
+	if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+		errno = ENAMETOOLONG;
+	else
+		errno = EINVAL;
+
+	return -1;
+}
+
+int git_buf_put_w(git_buf *buf, const wchar_t *string_w, size_t len_w)
+{
+	int utf8_len, utf8_write_len;
+	size_t new_size;
+
+	if (!len_w)
+		return 0;
+
+	assert(string_w);
+
+	/* Measure the string necessary for conversion */
+	if ((utf8_len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, string_w, len_w, NULL, 0, NULL, NULL)) == 0)
+		return 0;
+
+	assert(utf8_len > 0);
+
+	GITERR_CHECK_ALLOC_ADD(&new_size, buf->size, (size_t)utf8_len);
+	GITERR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
+
+	if (git_buf_grow(buf, new_size) < 0)
+		return -1;
+
+	if ((utf8_write_len = WideCharToMultiByte(
+			CP_UTF8, WC_ERR_INVALID_CHARS, string_w, len_w, &buf->ptr[buf->size], utf8_len, NULL, NULL)) == 0)
+		return handle_wc_error();
+
+	assert(utf8_write_len == utf8_len);
+
+	buf->size += utf8_write_len;
+	buf->ptr[buf->size] = '\0';
+	return 0;
+}
+
diff --git a/src/win32/buffer.h b/src/win32/buffer.h
new file mode 100644
index 0000000..6224398
--- /dev/null
+++ b/src/win32/buffer.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_git_win32_buffer_h__
+#define INCLUDE_git_win32_buffer_h__
+
+#include "../buffer.h"
+
+/**
+ * Convert a wide character string to UTF-8 and append the results to the
+ * buffer.
+ */
+int git_buf_put_w(git_buf *buf, const wchar_t *string_w, size_t len_w);
+
+#endif
diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c
index 0dad4ea..f1b674e 100644
--- a/src/win32/utf-conv.c
+++ b/src/win32/utf-conv.c
@@ -8,10 +8,6 @@
 #include "common.h"
 #include "utf-conv.h"
 
-#ifndef WC_ERR_INVALID_CHARS
-# define WC_ERR_INVALID_CHARS	0x80
-#endif
-
 GIT_INLINE(DWORD) get_wc_flags(void)
 {
 	static char inited = 0;
diff --git a/src/win32/utf-conv.h b/src/win32/utf-conv.h
index 89cdb96..33b95f5 100644
--- a/src/win32/utf-conv.h
+++ b/src/win32/utf-conv.h
@@ -10,6 +10,10 @@
 #include <wchar.h>
 #include "common.h"
 
+#ifndef WC_ERR_INVALID_CHARS
+# define WC_ERR_INVALID_CHARS	0x80
+#endif
+
 /**
  * Converts a UTF-8 string to wide characters.
  *