Commit 73dab7692e780e1df96093a54854795428eb66b4

Edward Thomson 2016-08-04T16:16:16

Merge pull request #3861 from libgit2/ethomson/refresh_objects odb: freshen existing objects when writing

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 241c7be..36cd42b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -63,6 +63,13 @@ v0.24 + 1
 * `git_diff_file` now includes an `id_abbrev` field that reflects the
   number of nibbles set in the `id` field.
 
+* `git_odb_backend` now has a `freshen` function pointer.  This optional
+  function pointer is similar to the `exists` function, but it will update
+  a last-used marker.  For filesystem-based object databases, this updates
+  the timestamp of the file containing the object, to indicate "freshness".
+  If this is `NULL`, then it will not be called and the `exists` function
+  will be used instead.
+
 v0.24
 -------
 
diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h
index e423a92..9bcc50d 100644
--- a/include/git2/sys/odb_backend.h
+++ b/include/git2/sys/odb_backend.h
@@ -84,6 +84,17 @@ struct git_odb_backend {
 		git_transfer_progress_cb progress_cb, void *progress_payload);
 
 	/**
+	 * "Freshens" an already existing object, updating its last-used
+	 * time.  This occurs when `git_odb_write` was called, but the
+	 * object already existed (and will not be re-written).  The
+	 * underlying implementation may want to update last-used timestamps.
+	 *
+	 * If callers implement this, they should return `0` if the object
+	 * exists and was freshened, and non-zero otherwise.
+	 */
+	int (* freshen)(git_odb_backend *, const git_oid *);
+
+	/**
 	 * Frees any resources held by the odb (including the `git_odb_backend`
 	 * itself). An odb backend implementation must provide this function.
 	 */
diff --git a/src/fileops.c b/src/fileops.c
index 22868b4..fcc0301 100644
--- a/src/fileops.c
+++ b/src/fileops.c
@@ -837,6 +837,19 @@ int git_futils_cp(const char *from, const char *to, mode_t filemode)
 	return cp_by_fd(ifd, ofd, true);
 }
 
+int git_futils_touch(const char *path, time_t *when)
+{
+	struct p_timeval times[2];
+	int ret;
+
+	times[0].tv_sec =  times[1].tv_sec  = when ? *when : time(NULL);
+	times[0].tv_usec = times[1].tv_usec = 0;
+
+	ret = p_utimes(path, times);
+
+	return (ret < 0) ? git_path_set_error(errno, path, "touch") : 0;
+}
+
 static int cp_link(const char *from, const char *to, size_t link_size)
 {
 	int error = 0;
diff --git a/src/fileops.h b/src/fileops.h
index 6c6c49d..54e3bd4 100644
--- a/src/fileops.h
+++ b/src/fileops.h
@@ -185,6 +185,12 @@ extern int git_futils_cp(
 	mode_t filemode);
 
 /**
+ * Set the files atime and mtime to the given time, or the current time
+ * if `ts` is NULL.
+ */
+extern int git_futils_touch(const char *path, time_t *when);
+
+/**
  * Flags that can be passed to `git_futils_cp_r`.
  *
  * - GIT_CPDIR_CREATE_EMPTY_DIRS: create directories even if there are no
diff --git a/src/odb.c b/src/odb.c
index 02391c4..253023c 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -654,7 +654,10 @@ void git_odb_free(git_odb *db)
 	GIT_REFCOUNT_DEC(db, odb_free);
 }
 
-static int odb_exists_1(git_odb *db, const git_oid *id, bool only_refreshed)
+static int odb_exists_1(
+	git_odb *db,
+	const git_oid *id,
+	bool only_refreshed)
 {
 	size_t i;
 	bool found = false;
@@ -673,6 +676,44 @@ static int odb_exists_1(git_odb *db, const git_oid *id, bool only_refreshed)
 	return (int)found;
 }
 
+static int odb_freshen_1(
+	git_odb *db,
+	const git_oid *id,
+	bool only_refreshed)
+{
+	size_t i;
+	bool found = false;
+
+	for (i = 0; i < db->backends.length && !found; ++i) {
+		backend_internal *internal = git_vector_get(&db->backends, i);
+		git_odb_backend *b = internal->backend;
+
+		if (only_refreshed && !b->refresh)
+			continue;
+
+		if (b->freshen != NULL)
+			found = !b->freshen(b, id);
+		else if (b->exists != NULL)
+			found = b->exists(b, id);
+	}
+
+	return (int)found;
+}
+
+static int odb_freshen(git_odb *db, const git_oid *id)
+{
+	assert(db && id);
+
+	if (odb_freshen_1(db, id, false))
+		return 1;
+
+	if (!git_odb_refresh(db))
+		return odb_freshen_1(db, id, true);
+
+	/* Failed to refresh, hence not found */
+	return 0;
+}
+
 int git_odb_exists(git_odb *db, const git_oid *id)
 {
 	git_odb_object *object;
@@ -1131,7 +1172,7 @@ int git_odb_write(
 	assert(oid && db);
 
 	git_odb_hash(oid, data, len, type);
-	if (git_odb_exists(db, oid))
+	if (odb_freshen(db, oid))
 		return 0;
 
 	for (i = 0; i < db->backends.length && error < 0; ++i) {
@@ -1257,7 +1298,7 @@ int git_odb_stream_finalize_write(git_oid *out, git_odb_stream *stream)
 
 	git_hash_final(out, stream->hash_ctx);
 
-	if (git_odb_exists(stream->backend->odb, out))
+	if (odb_freshen(stream->backend->odb, out))
 		return 0;
 
 	return stream->finalize_write(stream, out);
diff --git a/src/odb_loose.c b/src/odb_loose.c
index 228d4c3..f312b9c 100644
--- a/src/odb_loose.c
+++ b/src/odb_loose.c
@@ -918,6 +918,23 @@ cleanup:
 	return error;
 }
 
+static int loose_backend__freshen(
+	git_odb_backend *_backend,
+	const git_oid *oid)
+{
+	loose_backend *backend = (loose_backend *)_backend;
+	git_buf path = GIT_BUF_INIT;
+	int error;
+
+	if (object_file_name(&path, backend, oid) < 0)
+		return -1;
+
+	error = git_futils_touch(path.ptr, NULL);
+	git_buf_free(&path);
+
+	return error;
+}
+
 static void loose_backend__free(git_odb_backend *_backend)
 {
 	loose_backend *backend;
@@ -975,6 +992,7 @@ int git_odb_backend_loose(
 	backend->parent.exists = &loose_backend__exists;
 	backend->parent.exists_prefix = &loose_backend__exists_prefix;
 	backend->parent.foreach = &loose_backend__foreach;
+	backend->parent.freshen = &loose_backend__freshen;
 	backend->parent.free = &loose_backend__free;
 
 	*backend_out = (git_odb_backend *)backend;
diff --git a/src/odb_pack.c b/src/odb_pack.c
index 005d072..b80d033 100644
--- a/src/odb_pack.c
+++ b/src/odb_pack.c
@@ -20,6 +20,9 @@
 
 #include "git2/odb_backend.h"
 
+/* re-freshen pack files no more than every 2 seconds */
+#define FRESHEN_FREQUENCY 2
+
 struct pack_backend {
 	git_odb_backend parent;
 	git_vector packs;
@@ -363,6 +366,28 @@ static int pack_backend__read_header(
 	return git_packfile_resolve_header(len_p, type_p, e.p, e.offset);
 }
 
+static int pack_backend__freshen(
+	git_odb_backend *backend, const git_oid *oid)
+{
+	struct git_pack_entry e;
+	time_t now;
+	int error;
+
+	if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
+		return error;
+
+	now = time(NULL);
+
+	if (e.p->last_freshen > now - FRESHEN_FREQUENCY)
+		return 0;
+
+	if ((error = git_futils_touch(e.p->pack_name, &now)) < 0)
+		return error;
+
+	e.p->last_freshen = now;
+	return 0;
+}
+
 static int pack_backend__read(
 	void **buffer_p, size_t *len_p, git_otype *type_p,
 	git_odb_backend *backend, const git_oid *oid)
@@ -560,6 +585,7 @@ static int pack_backend__alloc(struct pack_backend **out, size_t initial_size)
 	backend->parent.refresh = &pack_backend__refresh;
 	backend->parent.foreach = &pack_backend__foreach;
 	backend->parent.writepack = &pack_backend__writepack;
+	backend->parent.freshen = &pack_backend__freshen;
 	backend->parent.free = &pack_backend__free;
 
 	*out = backend;
diff --git a/src/pack.h b/src/pack.h
index d15247b..5302db5 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -102,6 +102,8 @@ struct git_pack_file {
 
 	git_pack_cache bases; /* delta base cache */
 
+	time_t last_freshen; /* last time the packfile was freshened */
+
 	/* something like ".git/objects/pack/xxxxx.pack" */
 	char pack_name[GIT_FLEX_ARRAY]; /* more */
 };
diff --git a/tests/odb/freshen.c b/tests/odb/freshen.c
new file mode 100644
index 0000000..d8d6c02
--- /dev/null
+++ b/tests/odb/freshen.c
@@ -0,0 +1,93 @@
+#include "clar_libgit2.h"
+#include "odb.h"
+#include "posix.h"
+
+static git_repository *repo;
+static git_odb *odb;
+
+void test_odb_freshen__initialize(void)
+{
+	repo = cl_git_sandbox_init("testrepo.git");
+	cl_git_pass(git_repository_odb(&odb, repo));
+}
+
+void test_odb_freshen__cleanup(void)
+{
+	git_odb_free(odb);
+	cl_git_sandbox_cleanup();
+}
+
+#define LOOSE_STR "hey\n"
+#define LOOSE_ID  "1385f264afb75a56a5bec74243be9b367ba4ca08"
+#define LOOSE_FN  "13/85f264afb75a56a5bec74243be9b367ba4ca08"
+
+void test_odb_freshen__loose_object(void)
+{
+	git_oid expected_id, id;
+	struct stat before, after;
+	struct p_timeval old_times[2];
+
+	cl_git_pass(git_oid_fromstr(&expected_id, LOOSE_ID));
+
+	old_times[0].tv_sec = 1234567890;
+	old_times[0].tv_usec = 0;
+	old_times[1].tv_sec = 1234567890;
+	old_times[1].tv_usec = 0;
+
+	/* set time to way back */
+	cl_must_pass(p_utimes("testrepo.git/objects/" LOOSE_FN, old_times));
+	cl_must_pass(p_lstat("testrepo.git/objects/" LOOSE_FN, &before));
+
+	cl_git_pass(git_odb_write(&id, odb, LOOSE_STR, CONST_STRLEN(LOOSE_STR),
+		GIT_OBJ_BLOB));
+	cl_assert_equal_oid(&expected_id, &id);
+	cl_must_pass(p_lstat("testrepo.git/objects/" LOOSE_FN, &after));
+
+	cl_assert(before.st_atime < after.st_atime);
+	cl_assert(before.st_mtime < after.st_mtime);
+}
+
+#define PACKED_STR "Testing a readme.txt\n"
+#define PACKED_ID  "6336846bd5c88d32f93ae57d846683e61ab5c530"
+#define PACKED_FN  "pack-d85f5d483273108c9d8dd0e4728ccf0b2982423a.pack"
+
+void test_odb_freshen__packed_object(void)
+{
+	git_oid expected_id, id;
+	struct stat before, after;
+	struct p_timeval old_times[2];
+
+	cl_git_pass(git_oid_fromstr(&expected_id, PACKED_ID));
+
+	old_times[0].tv_sec = 1234567890;
+	old_times[0].tv_usec = 0;
+	old_times[1].tv_sec = 1234567890;
+	old_times[1].tv_usec = 0;
+
+	/* set time to way back */
+	cl_must_pass(p_utimes("testrepo.git/objects/pack/" PACKED_FN, old_times));
+	cl_must_pass(p_lstat("testrepo.git/objects/pack/" PACKED_FN, &before));
+
+	/* ensure that packfile is freshened */
+	cl_git_pass(git_odb_write(&id, odb, PACKED_STR,
+		CONST_STRLEN(PACKED_STR), GIT_OBJ_BLOB));
+	cl_assert_equal_oid(&expected_id, &id);
+	cl_must_pass(p_lstat("testrepo.git/objects/pack/" PACKED_FN, &after));
+
+	cl_assert(before.st_atime < after.st_atime);
+	cl_assert(before.st_mtime < after.st_mtime);
+
+	memcpy(&before, &after, sizeof(struct stat));
+
+	/* ensure that the pack file is not freshened again immediately */
+	cl_git_pass(git_odb_write(&id, odb, PACKED_STR,
+		CONST_STRLEN(PACKED_STR), GIT_OBJ_BLOB));
+	cl_assert_equal_oid(&expected_id, &id);
+	cl_must_pass(p_lstat("testrepo.git/objects/pack/" PACKED_FN, &after));
+
+	cl_assert(before.st_atime == after.st_atime);
+	cl_assert(before.st_atime_nsec == after.st_atime_nsec);
+	cl_assert(before.st_mtime == after.st_mtime);
+	cl_assert(before.st_mtime_nsec == after.st_mtime_nsec);
+}
+