Add the binary delta apply algorithm for pack style deltas The git__delta_apply() function can be used to apply a Git style delta, such as those used in pack files or in git patch files, to recover the original object stream. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
diff --git a/src/delta-apply.c b/src/delta-apply.c
new file mode 100644
index 0000000..f924461
--- /dev/null
+++ b/src/delta-apply.c
@@ -0,0 +1,104 @@
+#include "common.h"
+#include "git/odb.h"
+
+/*
+ * This file was heavily cribbed from BinaryDelta.java in JGit, which
+ * itself was heavily cribbed from <code>patch-delta.c</code> in the
+ * GIT project. The original delta patching code was written by
+ * Nicolas Pitre <nico@cam.org>.
+ */
+
+static size_t hdr_sz(
+ const unsigned char **delta,
+ const unsigned char *end)
+{
+ const unsigned char *d = *delta;
+ size_t r = 0;
+ unsigned int c, shift = 0;
+
+ do {
+ if (d == end)
+ return -1;
+ c = *d++;
+ r |= (c & 0x7f) << shift;
+ shift += 7;
+ } while (c & 0x80);
+ *delta = d;
+ return r;
+}
+
+int git__delta_apply(
+ git_obj *out,
+ const unsigned char *base,
+ size_t base_len,
+ const unsigned char *delta,
+ size_t delta_len)
+{
+ const unsigned char *delta_end = delta + delta_len;
+ size_t res_sz;
+ unsigned char *res_dp;
+
+ /* Check that the base size matches the data we were given;
+ * if not we would underflow while accessing data from the
+ * base object, resulting in data corruption or segfault.
+ */
+ if (base_len != hdr_sz(&delta, delta_end))
+ return GIT_ERROR;
+
+ res_sz = hdr_sz(&delta, delta_end);
+ if (!(res_dp = git__malloc(res_sz + 1)))
+ return GIT_ERROR;
+ res_dp[res_sz] = '\0';
+ out->data = res_dp;
+ out->len = res_sz;
+
+ while (delta < delta_end) {
+ unsigned char cmd = *delta++;
+ if (cmd & 0x80) {
+ /* cmd is a copy instruction; copy from the base.
+ */
+ size_t off = 0, len = 0;
+
+ if (cmd & 0x01) off = *delta++;
+ if (cmd & 0x02) off |= *delta++ << 8;
+ if (cmd & 0x04) off |= *delta++ << 16;
+ if (cmd & 0x08) off |= *delta++ << 24;
+
+ if (cmd & 0x10) len = *delta++;
+ if (cmd & 0x20) len |= *delta++ << 8;
+ if (cmd & 0x40) len |= *delta++ << 16;
+ if (!len) len = 0x10000;
+
+ if (base_len < off + len || res_sz < len)
+ goto fail;
+ memcpy(res_dp, base + off, len);
+ res_dp += len;
+ res_sz -= len;
+
+ } else if (cmd) {
+ /* cmd is a literal insert instruction; copy from
+ * the delta stream itself.
+ */
+ if (delta_end - delta < cmd || res_sz < cmd)
+ goto fail;
+ memcpy(res_dp, delta, cmd);
+ delta += cmd;
+ res_dp += cmd;
+ res_sz -= cmd;
+
+ } else {
+ /* cmd == 0 is reserved for future encodings.
+ */
+ goto fail;
+ }
+ }
+
+ if (delta != delta_end || res_sz)
+ goto fail;
+ return GIT_SUCCESS;
+
+fail:
+ free(out->data);
+ out->data = NULL;
+ return GIT_ERROR;
+}
diff --git a/src/delta-apply.h b/src/delta-apply.h
new file mode 100644
index 0000000..498bccd
--- /dev/null
+++ b/src/delta-apply.h
@@ -0,0 +1,25 @@
+#ifndef INCLUDE_delta_apply_h__
+#define INCLUDE_delta_apply_h__
+
+/**
+ * Apply a git binary delta to recover the original content.
+ *
+ * @param out the output buffer to receive the original data.
+ * Only out->data and out->len are populated, as this is
+ * the only information available in the delta.
+ * @param base the base to copy from during copy instructions.
+ * @param base_len number of bytes available at base.
+ * @param delta the delta to execute copy/insert instructions from.
+ * @param delta_len total number of bytes in the delta.
+ * @return
+ * - GIT_SUCCESS on a successful delta unpack.
+ * - GIT_ERROR if the delta is corrupt or doesn't match the base.
+ */
+extern int git__delta_apply(
+ git_obj *out,
+ const unsigned char *base,
+ size_t base_len,
+ const unsigned char *delta,
+ size_t delta_len);
+
+#endif