Commit c23841c833ab35d82b32553d0659e815859e6c01

Shawn O. Pearce 2009-01-03T04:21:30

Add the binary delta apply algorithm for pack style deltas The git__delta_apply() function can be used to apply a Git style delta, such as those used in pack files or in git patch files, to recover the original object stream. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>

diff --git a/src/delta-apply.c b/src/delta-apply.c
new file mode 100644
index 0000000..f924461
--- /dev/null
+++ b/src/delta-apply.c
@@ -0,0 +1,104 @@
+#include "common.h"
+#include "git/odb.h"
+
+/*
+ * This file was heavily cribbed from BinaryDelta.java in JGit, which
+ * itself was heavily cribbed from <code>patch-delta.c</code> in the
+ * GIT project.   The original delta patching code was written by
+ * Nicolas Pitre <nico@cam.org>.
+ */
+
+static size_t hdr_sz(
+	const unsigned char **delta,
+	const unsigned char *end)
+{
+	const unsigned char *d = *delta;
+	size_t r = 0;
+	unsigned int c, shift = 0;
+
+	do {
+		if (d == end)
+			return -1;
+		c = *d++;
+		r |= (c & 0x7f) << shift;
+		shift += 7;
+	} while (c & 0x80);
+	*delta = d;
+	return r;
+}
+
+int git__delta_apply(
+	git_obj *out,
+	const unsigned char *base,
+	size_t base_len,
+	const unsigned char *delta,
+	size_t delta_len)
+{
+	const unsigned char *delta_end = delta + delta_len;
+	size_t res_sz;
+	unsigned char *res_dp;
+
+	/* Check that the base size matches the data we were given;
+	 * if not we would underflow while accessing data from the
+	 * base object, resulting in data corruption or segfault.
+	 */
+	if (base_len != hdr_sz(&delta, delta_end))
+		return GIT_ERROR;
+
+	res_sz = hdr_sz(&delta, delta_end);
+	if (!(res_dp = git__malloc(res_sz + 1)))
+		return GIT_ERROR;
+	res_dp[res_sz] = '\0';
+	out->data = res_dp;
+	out->len = res_sz;
+
+	while (delta < delta_end) {
+		unsigned char cmd = *delta++;
+		if (cmd & 0x80) {
+			/* cmd is a copy instruction; copy from the base.
+			 */
+			size_t off = 0, len = 0;
+
+			if (cmd & 0x01) off  = *delta++;
+			if (cmd & 0x02) off |= *delta++ <<  8;
+			if (cmd & 0x04) off |= *delta++ << 16;
+			if (cmd & 0x08) off |= *delta++ << 24;
+
+			if (cmd & 0x10) len  = *delta++;
+			if (cmd & 0x20) len |= *delta++ <<  8;
+			if (cmd & 0x40) len |= *delta++ << 16;
+			if (!len)       len  = 0x10000;
+
+			if (base_len < off + len || res_sz < len)
+				goto fail;
+			memcpy(res_dp, base + off, len);
+			res_dp += len;
+			res_sz -= len;
+
+		} else if (cmd) {
+			/* cmd is a literal insert instruction; copy from
+			 * the delta stream itself.
+			 */
+			if (delta_end - delta < cmd || res_sz < cmd)
+				goto fail;
+			memcpy(res_dp, delta, cmd);
+			delta  += cmd;
+			res_dp += cmd;
+			res_sz -= cmd;
+
+		} else {
+			/* cmd == 0 is reserved for future encodings.
+			 */
+			goto fail;
+		}
+	}
+
+	if (delta != delta_end || res_sz)
+		goto fail;
+	return GIT_SUCCESS;
+
+fail:
+	free(out->data);
+	out->data = NULL;
+	return GIT_ERROR;
+}
diff --git a/src/delta-apply.h b/src/delta-apply.h
new file mode 100644
index 0000000..498bccd
--- /dev/null
+++ b/src/delta-apply.h
@@ -0,0 +1,25 @@
+#ifndef INCLUDE_delta_apply_h__
+#define INCLUDE_delta_apply_h__
+
+/**
+ * Apply a git binary delta to recover the original content.
+ *
+ * @param out the output buffer to receive the original data.
+ *		Only out->data and out->len are populated, as this is
+ *		the only information available in the delta.
+ * @param base the base to copy from during copy instructions.
+ * @param base_len number of bytes available at base.
+ * @param delta the delta to execute copy/insert instructions from.
+ * @param delta_len total number of bytes in the delta.
+ * @return
+ * - GIT_SUCCESS on a successful delta unpack.
+ * - GIT_ERROR if the delta is corrupt or doesn't match the base.
+ */
+extern int git__delta_apply(
+	git_obj *out,
+	const unsigned char *base,
+	size_t base_len,
+	const unsigned char *delta,
+	size_t delta_len);
+
+#endif