Hash :
fe621944
Author :
Date :
2020-11-10T22:54:37
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
/* Generic infrastructure to implement various diff algorithms. */
/*
* Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
struct diff_range {
int start;
int end;
};
/* List of all possible return codes of a diff invocation. */
#define DIFF_RC_USE_DIFF_ALGO_FALLBACK -1
#define DIFF_RC_OK 0
/* Any positive return values are errno values from sys/errno.h */
struct diff_atom;
/* For each file, there is a "root" struct diff_data referencing the entire
* file, which the atoms are parsed from. In recursion of diff algorithm, there
* may be "child" struct diff_data only referencing a subsection of the file,
* re-using the atoms parsing. For "root" structs, atoms_allocated will be
* nonzero, indicating that the array of atoms is owned by that struct. For
* "child" structs, atoms_allocated == 0, to indicate that the struct is
* referencing a subset of atoms. */
struct diff_data {
FILE *f; /* if root diff_data and not memory-mapped */
off_t pos; /* if not memory-mapped */
const uint8_t *data; /* if memory-mapped */
off_t len;
ARRAYLIST(struct diff_atom) atoms;
struct diff_data *root;
struct diff_data *current;
void *algo_data;
int diff_flags;
int err;
};
#define DIFF_FLAG_IGNORE_WHITESPACE 0x00000001
#define DIFF_FLAG_SHOW_PROTOTYPES 0x00000002
void diff_data_free(struct diff_data *diff_data);
struct diff_chunk;
typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t;
struct diff_result {
int rc;
/*
* Pointers to diff data passed in via diff_main.
* Do not free these diff_data before freeing the diff_result struct.
*/
struct diff_data *left;
struct diff_data *right;
diff_chunk_arraylist_t chunks;
};
struct diff_state;
/* Signature of a utility function to divide a file into diff atoms.
* An example is diff_atomize_text_by_line() in diff_atomize_text.c.
*
* func_data: context pointer (free to be used by implementation).
* d: struct diff_data with d->data and d->len already set up, and
* d->atoms to be created.
*/
typedef int (*diff_atomize_func_t)(void *func_data, struct diff_data *d);
extern int diff_atomize_text_by_line(void *func_data, struct diff_data *d);
struct diff_algo_config;
typedef int (*diff_algo_impl_t)(
const struct diff_algo_config *algo_config, struct diff_state *state);
/* Form a result with all left-side removed and all right-side added, i.e. no
* actual diff algorithm involved. */
int diff_algo_none(const struct diff_algo_config *algo_config,
struct diff_state *state);
/* Myers Diff tracing from the start all the way through to the end, requiring
* quadratic amounts of memory. This can fail if the required space surpasses
* algo_config->permitted_state_size. */
extern int diff_algo_myers(const struct diff_algo_config *algo_config,
struct diff_state *state);
/* Myers "Divide et Impera": tracing forwards from the start and backwards from
* the end to find a midpoint that divides the problem into smaller chunks.
* Requires only linear amounts of memory. */
extern int diff_algo_myers_divide(
const struct diff_algo_config *algo_config, struct diff_state *state);
/* Patience Diff algorithm, which divides a larger diff into smaller chunks. For
* very specific scenarios, it may lead to a complete diff result by itself, but
* needs a fallback algo to solve chunks that don't have common-unique atoms. */
extern int diff_algo_patience(
const struct diff_algo_config *algo_config, struct diff_state *state);
/* Diff algorithms to use, possibly nested. For example:
*
* struct diff_algo_config myers, patience, myers_divide;
*
* myers = (struct diff_algo_config){
* .impl = diff_algo_myers,
* .permitted_state_size = 32 * 1024 * 1024,
* // When too large, do diff_algo_patience:
* .fallback_algo = &patience,
* };
*
* const struct diff_algo_config patience = (struct diff_algo_config){
* .impl = diff_algo_patience,
* // After subdivision, do Patience again:
* .inner_algo = &patience,
* // If subdivision failed, do Myers Divide et Impera:
* .fallback_algo = &myers_then_myers_divide,
* };
*
* const struct diff_algo_config myers_divide = (struct diff_algo_config){
* .impl = diff_algo_myers_divide,
* // When division succeeded, start from the top:
* .inner_algo = &myers_then_myers_divide,
* // (fallback_algo = NULL implies diff_algo_none).
* };
* struct diff_config config = {
* .algo = &myers,
* ...
* };
* diff_main(&config, ...);
*/
struct diff_algo_config {
diff_algo_impl_t impl;
/* Fail this algo if it would use more than this amount of memory, and
* instead use fallback_algo (diff_algo_myers). permitted_state_size ==
* 0 means no limitation. */
size_t permitted_state_size;
/* For algorithms that divide into smaller chunks, use this algorithm to
* solve the divided chunks. */
const struct diff_algo_config *inner_algo;
/* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large
* state, or diff_algo_patience can't find any common-unique atoms),
* then use this algorithm instead. */
const struct diff_algo_config *fallback_algo;
};
struct diff_config {
diff_atomize_func_t atomize_func;
void *atomize_func_data;
const struct diff_algo_config *algo;
/* How deep to step into subdivisions of a source file, a paranoia /
* safety measure to guard against infinite loops through diff
* algorithms. When the maximum recursion is reached, employ
* diff_algo_none (i.e. remove all left atoms and add all right atoms).
*/
unsigned int max_recursion_depth;
};
int diff_atomize_file(struct diff_data *d, const struct diff_config *config,
FILE *f, const uint8_t *data, off_t len, int diff_flags);
struct diff_result *diff_main(const struct diff_config *config,
struct diff_data *left,
struct diff_data *right);
void diff_result_free(struct diff_result *result);