Hash :
d7f58eab
Author :
Date :
2019-06-21T11:55:21
config_file: implement stat cache to avoid repeated rehashing To decide whether a config file has changed, we always hash its complete contents. This is unnecessarily expensive, as well-behaved filesystems will always update stat information for files which have changed. So before computing the hash, we should first check whether the stat info has actually changed for either the configuration file or any of its includes. This avoids having to re-read the configuration file and its includes every time when we check whether it's been modified. Tracing the for-each-ref example previous to this commit, one can see that we repeatedly re-open both the repo configuration as well as the global configuration: $ strace lg2 for-each-ref |& grep config access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory) access("/home/pks/.config/git/config", F_OK) = 0 access("/etc/gitconfig", F_OK) = -1 ENOENT (No such file or directory) stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 access("/tmp/repo/.git/config", F_OK) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c05290) = -1 ENOENT (No such file or directory) access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 access("/home/pks/.config/git/config", F_OK) = 0 stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c051f0) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 With the change, we only do stats for those files and open them a single time, only: $ strace lg2 for-each-ref |& grep config access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory) access("/home/pks/.config/git/config", F_OK) = 0 access("/etc/gitconfig", F_OK) = -1 ENOENT (No such file or directory) stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 access("/tmp/repo/.git/config", F_OK) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffe70540d20) = -1 ENOENT (No such file or directory) access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 access("/home/pks/.config/git/config", F_OK) = 0 stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/home/pks/.gitconfig", 0x7ffe70540ca0) = -1 ENOENT (No such file or directory) stat("/home/pks/.gitconfig", 0x7ffe70540c80) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory) stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory) stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory) stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 The following benchmark has been performed with and without the stat cache in a best-of-ten run: ``` int lg2_repro(git_repository *repo, int argc, char **argv) { git_config *cfg; int32_t dummy; int i; UNUSED(argc); UNUSED(argv); check_lg2(git_repository_config(&cfg, repo), "Could not obtain config", NULL); for (i = 1; i < 100000; ++i) git_config_get_int32(&dummy, cfg, "foo.bar"); git_config_free(cfg); return 0; } ``` Without stat cache: $ time lg2 repro real 0m1.528s user 0m0.568s sys 0m0.944s With stat cache: $ time lg2 repro real 0m0.526s user 0m0.268s sys 0m0.258s This benchmark shows a nearly three-fold performance improvement. This change requires that we check our configuration stress tests as we're now in fact becoming more racy. If somebody is writing a configuration file at nearly the same time (there is a window of 100ns on Windows-based systems), then it might be that we realize that this file has actually changed and thus may not re-read it. This will only happen if either an external process is rewriting the configuration file or if the same process has multiple `git_config` structures pointing to the same time, where one of both is being used to write and the other one is used to read values.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
#include "clar_libgit2.h"
#include "filebuf.h"
#include "fileops.h"
#include "posix.h"
#define TEST_CONFIG "git-test-config"
static git_buf buf = GIT_BUF_INIT;
void test_config_stress__initialize(void)
{
git_filebuf file = GIT_FILEBUF_INIT;
cl_git_pass(git_filebuf_open(&file, TEST_CONFIG, 0, 0666));
git_filebuf_printf(&file, "[color]\n\tui = auto\n");
git_filebuf_printf(&file, "[core]\n\teditor = \n");
cl_git_pass(git_filebuf_commit(&file));
}
void test_config_stress__cleanup(void)
{
git_buf_dispose(&buf);
p_unlink(TEST_CONFIG);
}
void test_config_stress__dont_break_on_invalid_input(void)
{
git_config *config;
cl_assert(git_path_exists(TEST_CONFIG));
cl_git_pass(git_config_open_ondisk(&config, TEST_CONFIG));
cl_git_pass(git_config_get_string_buf(&buf, config, "color.ui"));
cl_git_pass(git_config_get_string_buf(&buf, config, "core.editor"));
git_config_free(config);
}
void assert_config_value(git_config *config, const char *key, const char *value)
{
git_buf_clear(&buf);
cl_git_pass(git_config_get_string_buf(&buf, config, key));
cl_assert_equal_s(value, git_buf_cstr(&buf));
}
void test_config_stress__comments(void)
{
git_config *config;
cl_git_pass(git_config_open_ondisk(&config, cl_fixture("config/config12")));
assert_config_value(config, "some.section.test2", "hello");
assert_config_value(config, "some.section.test3", "welcome");
assert_config_value(config, "some.section.other", "hello! \" ; ; ; ");
assert_config_value(config, "some.section.other2", "cool! \" # # # ");
assert_config_value(config, "some.section.multi", "hi, this is a ; multiline comment # with ;\n special chars and other stuff !@#");
assert_config_value(config, "some.section.multi2", "good, this is a ; multiline comment # with ;\n special chars and other stuff !@#");
assert_config_value(config, "some.section.back", "this is \ba phrase");
git_config_free(config);
}
void test_config_stress__escape_subsection_names(void)
{
git_config *config;
cl_assert(git_path_exists("git-test-config"));
cl_git_pass(git_config_open_ondisk(&config, TEST_CONFIG));
cl_git_pass(git_config_set_string(config, "some.sec\\tion.other", "foo"));
git_config_free(config);
cl_git_pass(git_config_open_ondisk(&config, TEST_CONFIG));
assert_config_value(config, "some.sec\\tion.other", "foo");
git_config_free(config);
}
void test_config_stress__trailing_backslash(void)
{
git_config *config;
const char *path = "C:\\iam\\some\\windows\\path\\";
cl_assert(git_path_exists("git-test-config"));
cl_git_pass(git_config_open_ondisk(&config, TEST_CONFIG));
cl_git_pass(git_config_set_string(config, "windows.path", path));
git_config_free(config);
cl_git_pass(git_config_open_ondisk(&config, TEST_CONFIG));
assert_config_value(config, "windows.path", path);
git_config_free(config);
}
void test_config_stress__complex(void)
{
git_config *config;
const char *path = "./config-immediate-multiline";
cl_git_mkfile(path, "[imm]\n multi = \"\\\nfoo\"");
cl_git_pass(git_config_open_ondisk(&config, path));
assert_config_value(config, "imm.multi", "foo");
git_config_free(config);
}
void test_config_stress__quick_write(void)
{
git_config *config_w, *config_r;
const char *path = "./config-quick-write";
const char *key = "quick.write";
int32_t i;
/* Create an external writer for one instance with the other one */
cl_git_pass(git_config_open_ondisk(&config_w, path));
cl_git_pass(git_config_open_ondisk(&config_r, path));
/* Write and read in the same second (repeat to increase the chance of it happening) */
for (i = 0; i < 10; i++) {
int32_t val;
cl_git_pass(git_config_set_int32(config_w, key, i));
cl_msleep(1);
cl_git_pass(git_config_get_int32(&val, config_r, key));
cl_assert_equal_i(i, val);
}
git_config_free(config_r);
git_config_free(config_w);
}