Merge pull request #1203 from phkelley/reverse_dak Revert changes from git/git diff-delta.c by dak@gnu.org, proski@gnu.org
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
diff --git a/src/delta.c b/src/delta.c
index 2514dcc..3db319c 100644
--- a/src/delta.c
+++ b/src/delta.c
@@ -1,16 +1,8 @@
/*
- * diff-delta.c: generate a delta between two buffers
+ * Copyright (C) 2009-2012 the libgit2 contributors
*
- * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
- * http://www.xmailserver.org/xdiff-lib.html
- *
- * Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
- *
- * Modified for libgit2 by Michael Schubert <schu@schu.io>, (C) 2012
- *
- * This code is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
*/
#include "delta.h"
@@ -116,11 +108,7 @@ static const unsigned int U[256] = {
struct index_entry {
const unsigned char *ptr;
unsigned int val;
-};
-
-struct unpacked_index_entry {
- struct index_entry entry;
- struct unpacked_index_entry *next;
+ struct index_entry *next;
};
struct git_delta_index {
@@ -137,8 +125,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
const unsigned char *data, *buffer = buf;
struct git_delta_index *index;
- struct unpacked_index_entry *entry, **hash;
- struct index_entry *packed_entry, **packed_hash;
+ struct index_entry *entry, **hash;
void *mem;
unsigned long memsize;
@@ -146,7 +133,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
return NULL;
/* Determine index hash size. Note that indexing skips the
- first byte to allow for optimizing the Rabin's polynomial
+ first byte to allow for optimizing the rabin polynomial
initialization in create_delta(). */
entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW;
if (bufsize >= 0xffffffffUL) {
@@ -162,21 +149,28 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
hmask = hsize - 1;
/* allocate lookup index */
- memsize = sizeof(*hash) * hsize +
+ memsize = sizeof(*index) +
+ sizeof(*hash) * hsize +
sizeof(*entry) * entries;
mem = git__malloc(memsize);
if (!mem)
return NULL;
+ index = mem;
+ mem = index->hash;
hash = mem;
mem = hash + hsize;
entry = mem;
+ index->memsize = memsize;
+ index->src_buf = buf;
+ index->src_size = bufsize;
+ index->hash_mask = hmask;
memset(hash, 0, hsize * sizeof(*hash));
/* allocate an array to count hash entries */
hash_count = calloc(hsize, sizeof(*hash_count));
if (!hash_count) {
- git__free(hash);
+ git__free(index);
return NULL;
}
@@ -190,13 +184,12 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
if (val == prev_val) {
/* keep the lowest of consecutive identical blocks */
- entry[-1].entry.ptr = data + RABIN_WINDOW;
- --entries;
+ entry[-1].ptr = data + RABIN_WINDOW;
} else {
prev_val = val;
i = val & hmask;
- entry->entry.ptr = data + RABIN_WINDOW;
- entry->entry.val = val;
+ entry->ptr = data + RABIN_WINDOW;
+ entry->val = val;
entry->next = hash[i];
hash[i] = entry++;
hash_count[i]++;
@@ -205,7 +198,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
/*
* Determine a limit on the number of entries in the same hash
- * bucket. This guards us against pathological data sets causing
+ * bucket. This guard us against patological data sets causing
* really bad hash distribution with most entries in the same hash
* bucket that would bring us to O(m*n) computing costs (m and n
* corresponding to reference and target buffer sizes).
@@ -216,84 +209,21 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
* the reference buffer.
*/
for (i = 0; i < hsize; i++) {
- int acc;
-
- if (hash_count[i] <= HASH_LIMIT)
+ if (hash_count[i] < HASH_LIMIT)
continue;
- /* We leave exactly HASH_LIMIT entries in the bucket */
- entries -= hash_count[i] - HASH_LIMIT;
-
entry = hash[i];
- acc = 0;
-
- /*
- * Assume that this loop is gone through exactly
- * HASH_LIMIT times and is entered and left with
- * acc==0. So the first statement in the loop
- * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
- * to the accumulator, and the inner loop consequently
- * is run (hash_count[i]-HASH_LIMIT) times, removing
- * one element from the list each time. Since acc
- * balances out to 0 at the final run, the inner loop
- * body can't be left with entry==NULL. So we indeed
- * encounter entry==NULL in the outer loop only.
- */
do {
- acc += hash_count[i] - HASH_LIMIT;
- if (acc > 0) {
- struct unpacked_index_entry *keep = entry;
- do {
- entry = entry->next;
- acc -= HASH_LIMIT;
- } while (acc > 0);
- keep->next = entry->next;
- }
- entry = entry->next;
+ struct index_entry *keep = entry;
+ int skip = hash_count[i] / HASH_LIMIT / 2;
+ do {
+ entry = entry->next;
+ } while(--skip && entry);
+ keep->next = entry;
} while (entry);
}
git__free(hash_count);
- /*
- * Now create the packed index in array form
- * rather than linked lists.
- */
- memsize = sizeof(*index)
- + sizeof(*packed_hash) * (hsize+1)
- + sizeof(*packed_entry) * entries;
- mem = git__malloc(memsize);
- if (!mem) {
- git__free(hash);
- return NULL;
- }
-
- index = mem;
- index->memsize = memsize;
- index->src_buf = buf;
- index->src_size = bufsize;
- index->hash_mask = hmask;
-
- mem = index->hash;
- packed_hash = mem;
- mem = packed_hash + (hsize+1);
- packed_entry = mem;
-
- for (i = 0; i < hsize; i++) {
- /*
- * Coalesce all entries belonging to one linked list
- * into consecutive array entries.
- */
- packed_hash[i] = packed_entry;
- for (entry = hash[i]; entry; entry = entry->next)
- *packed_entry++ = entry->entry;
- }
-
- /* Sentinel value to indicate the length of the last hash bucket */
- packed_hash[hsize] = packed_entry;
-
- assert(packed_entry - (struct index_entry *)mem == entries);
- git__free(hash);
-
return index;
}
@@ -312,7 +242,7 @@ unsigned long git_delta_sizeof_index(struct git_delta_index *index)
/*
* The maximum size for any opcode sequence, including the initial header
- * plus Rabin window plus biggest copy.
+ * plus rabin window plus biggest copy.
*/
#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
@@ -377,7 +307,7 @@ git_delta_create(
val ^= U[data[-RABIN_WINDOW]];
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
i = val & index->hash_mask;
- for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
+ for (entry = index->hash[i]; entry; entry = entry->next) {
const unsigned char *ref = entry->ptr;
const unsigned char *src = data;
unsigned int ref_size = (unsigned int)(ref_top - ref);