Hash :
a97b769a
Author :
Date :
2016-04-27T12:00:31
odb: avoid inflating the full delta to read the header When we read the header, we want to know the size and type of the object. We're currently inflating the full delta in order to read the first few bytes. This can mean hundreds of kB needlessly inflated for large objects. Instead use a packfile stream to read just enough so we can read the two varints in the header and avoid inflating most of the delta.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "common.h"
#include "git2/odb.h"
#include "delta-apply.h"
/*
* This file was heavily cribbed from BinaryDelta.java in JGit, which
* itself was heavily cribbed from <code>patch-delta.c</code> in the
* GIT project. The original delta patching code was written by
* Nicolas Pitre <nico@cam.org>.
*/
static int hdr_sz(
size_t *size,
const unsigned char **delta,
const unsigned char *end)
{
const unsigned char *d = *delta;
size_t r = 0;
unsigned int c, shift = 0;
do {
if (d == end)
return -1;
c = *d++;
r |= (c & 0x7f) << shift;
shift += 7;
} while (c & 0x80);
*delta = d;
*size = r;
return 0;
}
int git__delta_read_header(
const unsigned char *delta,
size_t delta_len,
size_t *base_sz,
size_t *res_sz)
{
const unsigned char *delta_end = delta + delta_len;
if ((hdr_sz(base_sz, &delta, delta_end) < 0) ||
(hdr_sz(res_sz, &delta, delta_end) < 0))
return -1;
return 0;
}
#define DELTA_HEADER_BUFFER_LEN 16
int git__delta_read_header_fromstream(size_t *base_sz, size_t *res_sz, git_packfile_stream *stream)
{
static const size_t buffer_len = DELTA_HEADER_BUFFER_LEN;
unsigned char buffer[DELTA_HEADER_BUFFER_LEN];
const unsigned char *delta, *delta_end;
size_t len;
ssize_t read;
len = read = 0;
while (len < buffer_len) {
read = git_packfile_stream_read(stream, &buffer[len], buffer_len - len);
if (read == 0)
break;
if (read == GIT_EBUFS)
continue;
len += read;
}
delta = buffer;
delta_end = delta + len;
if ((hdr_sz(base_sz, &delta, delta_end) < 0) ||
(hdr_sz(res_sz, &delta, delta_end) < 0))
return -1;
return 0;
}
int git__delta_apply(
git_rawobj *out,
const unsigned char *base,
size_t base_len,
const unsigned char *delta,
size_t delta_len)
{
const unsigned char *delta_end = delta + delta_len;
size_t base_sz, res_sz, alloc_sz;
unsigned char *res_dp;
/* Check that the base size matches the data we were given;
* if not we would underflow while accessing data from the
* base object, resulting in data corruption or segfault.
*/
if ((hdr_sz(&base_sz, &delta, delta_end) < 0) || (base_sz != base_len)) {
giterr_set(GITERR_INVALID, "Failed to apply delta. Base size does not match given data");
return -1;
}
if (hdr_sz(&res_sz, &delta, delta_end) < 0) {
giterr_set(GITERR_INVALID, "Failed to apply delta. Base size does not match given data");
return -1;
}
GITERR_CHECK_ALLOC_ADD(&alloc_sz, res_sz, 1);
res_dp = git__malloc(alloc_sz);
GITERR_CHECK_ALLOC(res_dp);
res_dp[res_sz] = '\0';
out->data = res_dp;
out->len = res_sz;
while (delta < delta_end) {
unsigned char cmd = *delta++;
if (cmd & 0x80) {
/* cmd is a copy instruction; copy from the base.
*/
size_t off = 0, len = 0;
if (cmd & 0x01) off = *delta++;
if (cmd & 0x02) off |= *delta++ << 8;
if (cmd & 0x04) off |= *delta++ << 16;
if (cmd & 0x08) off |= *delta++ << 24;
if (cmd & 0x10) len = *delta++;
if (cmd & 0x20) len |= *delta++ << 8;
if (cmd & 0x40) len |= *delta++ << 16;
if (!len) len = 0x10000;
if (base_len < off + len || res_sz < len)
goto fail;
memcpy(res_dp, base + off, len);
res_dp += len;
res_sz -= len;
} else if (cmd) {
/* cmd is a literal insert instruction; copy from
* the delta stream itself.
*/
if (delta_end - delta < cmd || res_sz < cmd)
goto fail;
memcpy(res_dp, delta, cmd);
delta += cmd;
res_dp += cmd;
res_sz -= cmd;
} else {
/* cmd == 0 is reserved for future encodings.
*/
goto fail;
}
}
if (delta != delta_end || res_sz)
goto fail;
return 0;
fail:
git__free(out->data);
out->data = NULL;
giterr_set(GITERR_INVALID, "Failed to apply delta");
return -1;
}