Commit 280adb3f942a1ce4f4939b7058209d0cd0467062

Edward Thomson 2015-08-04T16:51:00

index: canonicalize directory case when adding On case insensitive systems, when given a user-provided path in the higher-level index addition functions (eg `git_index_add_bypath` / `git_index_add_frombuffer`), examine the index to try to match the given path to an existing directory. Various mechanisms can cause the on-disk representation of a folder to not match the representation in HEAD or the index - for example, a case changing rename of some file `a/file.txt` to `A/file.txt` will update the paths in the index, but not rename the folder on disk. If a user subsequently adds `a/other.txt`, then this should be stored in the index as `A/other.txt`.

diff --git a/src/index.c b/src/index.c
index 6be73d2..be86f16 100644
--- a/src/index.c
+++ b/src/index.c
@@ -1102,6 +1102,74 @@ static int check_file_directory_collision(git_index *index,
 	return 0;
 }
 
+static int canonicalize_directory_path(
+	git_index *index, git_index_entry *entry)
+{
+	const git_index_entry *match, *best = NULL;
+	char *search, *sep;
+	size_t pos, search_len, best_len;
+
+	if (!index->ignore_case)
+		return 0;
+
+	/* item already exists in the index, simply re-use the existing case */
+	if ((match = git_index_get_bypath(index, entry->path, 0)) != NULL) {
+		memcpy((char *)entry->path, match->path, strlen(entry->path));
+		return 0;
+	}
+
+	/* nothing to do */
+	if (strchr(entry->path, '/') == NULL)
+		return 0;
+
+	if ((search = git__strdup(entry->path)) == NULL)
+		return -1;
+
+	/* starting at the parent directory and descending to the root, find the
+	 * common parent directory.
+	 */
+	while (!best && (sep = strrchr(search, '/'))) {
+		sep[1] = '\0';
+
+		search_len = strlen(search);
+
+		git_vector_bsearch2(
+			&pos, &index->entries, index->entries_search_path, search);
+
+		while ((match = git_vector_get(&index->entries, pos))) {
+			if (GIT_IDXENTRY_STAGE(match) != 0) {
+				/* conflicts do not contribute to canonical paths */
+			} else if (memcmp(search, match->path, search_len) == 0) {
+				/* prefer an exact match to the input filename */
+				best = match;
+				best_len = search_len;
+				break;
+			} else if (strncasecmp(search, match->path, search_len) == 0) {
+				/* continue walking, there may be a path with an exact
+				 * (case sensitive) match later in the index, but use this
+				 * as the best match until that happens.
+				 */
+				if (!best) {
+					best = match;
+					best_len = search_len;
+				}
+			} else {
+				break;
+			}
+
+			pos++;
+		}
+
+		sep[0] = '\0';
+	}
+
+	if (best)
+		memcpy((char *)entry->path, best->path, best_len);
+
+	git__free(search);
+	return 0;
+}
+
 static int index_no_dups(void **old, void *new)
 {
 	const git_index_entry *entry = new;
@@ -1115,10 +1183,17 @@ static int index_no_dups(void **old, void *new)
  * it, then it will return an error **and also free the entry**.  When
  * it replaces an existing entry, it will update the entry_ptr with the
  * actual entry in the index (and free the passed in one).
+ * trust_path is whether we use the given path, or whether (on case
+ * insensitive systems only) we try to canonicalize the given path to
+ * be within an existing directory.
  * trust_mode is whether we trust the mode in entry_ptr.
  */
 static int index_insert(
-	git_index *index, git_index_entry **entry_ptr, int replace, bool trust_mode)
+	git_index *index,
+	git_index_entry **entry_ptr,
+	int replace,
+	bool trust_path,
+	bool trust_mode)
 {
 	int error = 0;
 	size_t path_length, position;
@@ -1156,8 +1231,14 @@ static int index_insert(
 			entry->mode = index_merge_mode(index, existing, entry->mode);
 	}
 
+	/* canonicalize the directory name */
+	if (!trust_path)
+		error = canonicalize_directory_path(index, entry);
+
 	/* look for tree / blob name collisions, removing conflicts if requested */
-	error = check_file_directory_collision(index, entry, position, replace);
+	if (!error)
+		error = check_file_directory_collision(index, entry, position, replace);
+
 	if (error < 0)
 		/* skip changes */;
 
@@ -1258,7 +1339,7 @@ int git_index_add_frombuffer(
 	git_oid_cpy(&entry->id, &id);
 	entry->file_size = len;
 
-	if ((error = index_insert(index, &entry, 1, true)) < 0)
+	if ((error = index_insert(index, &entry, 1, true, true)) < 0)
 		return error;
 
 	/* Adding implies conflict was resolved, move conflict entries to REUC */
@@ -1317,7 +1398,7 @@ int git_index_add_bypath(git_index *index, const char *path)
 	assert(index && path);
 
 	if ((ret = index_entry_init(&entry, index, path)) == 0)
-		ret = index_insert(index, &entry, 1, false);
+		ret = index_insert(index, &entry, 1, false, false);
 
 	/* If we were given a directory, let's see if it's a submodule */
 	if (ret < 0 && ret != GIT_EDIRECTORY)
@@ -1343,7 +1424,7 @@ int git_index_add_bypath(git_index *index, const char *path)
 			if ((ret = add_repo_as_submodule(&entry, index, path)) < 0)
 				return ret;
 
-			if ((ret = index_insert(index, &entry, 1, false)) < 0)
+			if ((ret = index_insert(index, &entry, 1, false, false)) < 0)
 				return ret;
 		} else if (ret < 0) {
 			return ret;
@@ -1394,7 +1475,7 @@ int git_index_add(git_index *index, const git_index_entry *source_entry)
 	}
 
 	if ((ret = index_entry_dup(&entry, INDEX_OWNER(index), source_entry)) < 0 ||
-		(ret = index_insert(index, &entry, 1, true)) < 0)
+		(ret = index_insert(index, &entry, 1, true, true)) < 0)
 		return ret;
 
 	git_tree_cache_invalidate_path(index->tree, entry->path);
@@ -1555,7 +1636,7 @@ int git_index_conflict_add(git_index *index,
 		/* Make sure stage is correct */
 		GIT_IDXENTRY_STAGE_SET(entries[i], i + 1);
 
-		if ((ret = index_insert(index, &entries[i], 0, true)) < 0)
+		if ((ret = index_insert(index, &entries[i], 0, true, true)) < 0)
 			goto on_error;
 
 		entries[i] = NULL; /* don't free if later entry fails */
diff --git a/tests/index/bypath.c b/tests/index/bypath.c
index b607e17..17bba6a 100644
--- a/tests/index/bypath.c
+++ b/tests/index/bypath.c
@@ -72,3 +72,151 @@ void test_index_bypath__add_hidden(void)
 	cl_assert_equal_i(GIT_FILEMODE_BLOB, entry->mode);
 #endif
 }
+
+void test_index_bypath__add_honors_existing_case(void)
+{
+	const git_index_entry *entry;
+
+	if (!cl_repo_get_bool(g_repo, "core.ignorecase"))
+		clar__skip();
+
+	cl_git_mkfile("submod2/just_a_dir/file1.txt", "This is a file");
+	cl_git_mkfile("submod2/just_a_dir/file2.txt", "This is another file");
+	cl_git_mkfile("submod2/just_a_dir/file3.txt", "This is another file");
+	cl_git_mkfile("submod2/just_a_dir/file4.txt", "And another file");
+
+	cl_git_pass(git_index_add_bypath(g_idx, "just_a_dir/File1.txt"));
+	cl_git_pass(git_index_add_bypath(g_idx, "JUST_A_DIR/file2.txt"));
+	cl_git_pass(git_index_add_bypath(g_idx, "Just_A_Dir/FILE3.txt"));
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "just_a_dir/File1.txt", 0));
+	cl_assert_equal_s("just_a_dir/File1.txt", entry->path);
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "JUST_A_DIR/file2.txt", 0));
+	cl_assert_equal_s("just_a_dir/file2.txt", entry->path);
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "Just_A_Dir/FILE3.txt", 0));
+	cl_assert_equal_s("just_a_dir/FILE3.txt", entry->path);
+
+	cl_git_rewritefile("submod2/just_a_dir/file3.txt", "Rewritten");
+	cl_git_pass(git_index_add_bypath(g_idx, "Just_A_Dir/file3.txt"));
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "Just_A_Dir/file3.txt", 0));
+	cl_assert_equal_s("just_a_dir/FILE3.txt", entry->path);
+}
+
+void test_index_bypath__add_honors_existing_case_2(void)
+{
+	git_index_entry dummy = { { 0 } };
+	const git_index_entry *entry;
+
+	if (!cl_repo_get_bool(g_repo, "core.ignorecase"))
+		clar__skip();
+
+	dummy.mode = GIT_FILEMODE_BLOB;
+
+	/* note that `git_index_add` does no checking to canonical directories */
+	dummy.path = "Just_a_dir/file0.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "just_a_dir/fileA.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "Just_A_Dir/fileB.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "JUST_A_DIR/fileC.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "just_A_dir/fileD.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "JUST_a_DIR/fileE.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	cl_git_mkfile("submod2/just_a_dir/file1.txt", "This is a file");
+	cl_git_mkfile("submod2/just_a_dir/file2.txt", "This is another file");
+	cl_git_mkfile("submod2/just_a_dir/file3.txt", "This is another file");
+	cl_git_mkfile("submod2/just_a_dir/file4.txt", "And another file");
+
+	cl_git_pass(git_index_add_bypath(g_idx, "just_a_dir/File1.txt"));
+	cl_git_pass(git_index_add_bypath(g_idx, "JUST_A_DIR/file2.txt"));
+	cl_git_pass(git_index_add_bypath(g_idx, "Just_A_Dir/FILE3.txt"));
+	cl_git_pass(git_index_add_bypath(g_idx, "JusT_A_DIR/FILE4.txt"));
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "just_a_dir/File1.txt", 0));
+	cl_assert_equal_s("just_a_dir/File1.txt", entry->path);
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "JUST_A_DIR/file2.txt", 0));
+	cl_assert_equal_s("JUST_A_DIR/file2.txt", entry->path);
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "Just_A_Dir/FILE3.txt", 0));
+	cl_assert_equal_s("Just_A_Dir/FILE3.txt", entry->path);
+
+	cl_git_rewritefile("submod2/just_a_dir/file3.txt", "Rewritten");
+	cl_git_pass(git_index_add_bypath(g_idx, "Just_A_Dir/file3.txt"));
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "Just_A_Dir/file3.txt", 0));
+	cl_assert_equal_s("Just_A_Dir/FILE3.txt", entry->path);
+}
+
+void test_index_bypath__add_honors_existing_case_3(void)
+{
+	git_index_entry dummy = { { 0 } };
+	const git_index_entry *entry;
+
+	if (!cl_repo_get_bool(g_repo, "core.ignorecase"))
+		clar__skip();
+
+	dummy.mode = GIT_FILEMODE_BLOB;
+
+	dummy.path = "just_a_dir/filea.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "Just_A_Dir/fileB.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "just_A_DIR/FILEC.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "Just_a_DIR/FileD.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	cl_git_mkfile("submod2/JuSt_A_DiR/fILEE.txt", "This is a file");
+
+	cl_git_pass(git_index_add_bypath(g_idx, "just_a_dir/fILEE.txt"));
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "JUST_A_DIR/fILEE.txt", 0));
+	cl_assert_equal_s("just_a_dir/fILEE.txt", entry->path);
+}
+
+void test_index_bypath__add_honors_existing_case_4(void)
+{
+	git_index_entry dummy = { { 0 } };
+	const git_index_entry *entry;
+
+	if (!cl_repo_get_bool(g_repo, "core.ignorecase"))
+		clar__skip();
+
+	dummy.mode = GIT_FILEMODE_BLOB;
+
+	dummy.path = "just_a_dir/a/b/c/d/e/file1.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	dummy.path = "just_a_dir/a/B/C/D/E/file2.txt";
+	cl_git_pass(git_index_add(g_idx, &dummy));
+
+	cl_must_pass(p_mkdir("submod2/just_a_dir/a", 0777));
+	cl_must_pass(p_mkdir("submod2/just_a_dir/a/b", 0777));
+	cl_must_pass(p_mkdir("submod2/just_a_dir/a/b/z", 0777));
+	cl_must_pass(p_mkdir("submod2/just_a_dir/a/b/z/y", 0777));
+	cl_must_pass(p_mkdir("submod2/just_a_dir/a/b/z/y/x", 0777));
+
+	cl_git_mkfile("submod2/just_a_dir/a/b/z/y/x/FOO.txt", "This is a file");
+
+	cl_git_pass(git_index_add_bypath(g_idx, "just_a_dir/A/b/Z/y/X/foo.txt"));
+
+	cl_assert(entry = git_index_get_bypath(g_idx, "just_a_dir/A/b/Z/y/X/foo.txt", 0));
+	cl_assert_equal_s("just_a_dir/a/b/Z/y/X/foo.txt", entry->path);
+}
+