sync files from diff.git 29916bb6c0c248ca6fa5486cb9e081d92112e86c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
diff --git a/lib/diff_atomize_text.c b/lib/diff_atomize_text.c
index 1bdb997..1da34c6 100644
--- a/lib/diff_atomize_text.c
+++ b/lib/diff_atomize_text.c
@@ -29,6 +29,12 @@
#include "diff_internal.h"
#include "diff_debug.h"
+unsigned int
+diff_atom_hash_update(unsigned int hash, unsigned char atom_byte)
+{
+ return hash * 23 + atom_byte;
+}
+
static int
diff_data_atomize_text_lines_fd(struct diff_data *d)
{
@@ -63,7 +69,8 @@ diff_data_atomize_text_lines_fd(struct diff_data *d)
if (buf[i] != '\r' && buf[i] != '\n') {
if (!ignore_whitespace
|| !isspace(buf[i]))
- hash = hash * 23 + buf[i];
+ hash = diff_atom_hash_update(
+ hash, buf[i]);
line_end++;
} else
eol = buf[i];
diff --git a/lib/diff_internal.h b/lib/diff_internal.h
index 94ef28c..699cdbd 100644
--- a/lib/diff_internal.h
+++ b/lib/diff_internal.h
@@ -56,72 +56,12 @@ diff_range_len(const struct diff_range *r)
#define DIFF_RC_OK 0
/* Any positive return values are errno values from sys/errno.h */
-struct diff_data;
-
-struct diff_atom {
- struct diff_data *root; /* back pointer to root diff data */
-
- off_t pos; /* if not memory-mapped */
- const uint8_t *at; /* if memory-mapped */
- off_t len;
-
- /* This hash is just a very cheap speed up for finding *mismatching*
- * atoms. When hashes match, we still need to compare entire atoms to
- * find out whether they are indeed identical or not. */
- unsigned int hash;
-};
-
-int
-diff_atom_cmp(int *cmp,
- const struct diff_atom *left,
- const struct diff_atom *right);
-
/* Indicate whether two given diff atoms match. */
int
diff_atom_same(bool *same,
const struct diff_atom *left,
const struct diff_atom *right);
-/* The atom's index in the entire file. For atoms divided by lines of text, this
- * yields the line number (starting with 0). Also works for diff_data that
- * reference only a subsection of a file, always reflecting the global position
- * in the file (and not the relative position within the subsection). */
-#define diff_atom_root_idx(DIFF_DATA, ATOM) \
- ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
- ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
- : (DIFF_DATA)->root->atoms.len)
-
-/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
- * yields the line number (starting with 0). */
-#define diff_atom_idx(DIFF_DATA, ATOM) \
- ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
- ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
- : (DIFF_DATA)->atoms.len)
-
-#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
- for ((ATOM) = (FIRST_ATOM); \
- (ATOM) \
- && ((ATOM) >= (FIRST_ATOM)) \
- && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
- (ATOM)++)
-
-#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
- foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
-
-#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
- for ((ATOM) = (FROM); \
- (ATOM) \
- && ((ATOM) >= (DIFF_DATA)->atoms.head) \
- && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
- (ATOM)++)
-
-#define diff_data_foreach_atom_backwards_from(FROM, ATOM, DIFF_DATA) \
- for ((ATOM) = (FROM); \
- (ATOM) \
- && ((ATOM) >= (DIFF_DATA)->atoms.head) \
- && ((ATOM) - (DIFF_DATA)->atoms.head >= 0); \
- (ATOM)--)
-
/* A diff chunk represents a set of atoms on the left and/or a set of atoms on
* the right.
*
diff --git a/lib/diff_main.h b/lib/diff_main.h
index 4014216..bd94a91 100644
--- a/lib/diff_main.h
+++ b/lib/diff_main.h
@@ -25,7 +25,72 @@ struct diff_range {
#define DIFF_RC_OK 0
/* Any positive return values are errno values from sys/errno.h */
-struct diff_atom;
+struct diff_atom {
+ struct diff_data *root; /* back pointer to root diff data */
+
+ off_t pos; /* if not memory-mapped */
+ const uint8_t *at; /* if memory-mapped */
+ off_t len;
+
+ /* This hash is just a very cheap speed up for finding *mismatching*
+ * atoms. When hashes match, we still need to compare entire atoms to
+ * find out whether they are indeed identical or not.
+ * Calculated over all atom bytes with diff_atom_hash_update(). */
+ unsigned int hash;
+};
+
+/* Mix another atom_byte into the provided hash value and return the result.
+ * The hash value passed in for the first byte of the atom must be zero. */
+unsigned int
+diff_atom_hash_update(unsigned int hash, unsigned char atom_byte);
+
+/* Compare two atoms for equality. Return 0 on success, or errno on failure.
+ * Set cmp to -1, 0, or 1, just like strcmp(). */
+int
+diff_atom_cmp(int *cmp,
+ const struct diff_atom *left,
+ const struct diff_atom *right);
+
+
+/* The atom's index in the entire file. For atoms divided by lines of text, this
+ * yields the line number (starting with 0). Also works for diff_data that
+ * reference only a subsection of a file, always reflecting the global position
+ * in the file (and not the relative position within the subsection). */
+#define diff_atom_root_idx(DIFF_DATA, ATOM) \
+ ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
+ ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
+ : (DIFF_DATA)->root->atoms.len)
+
+/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
+ * yields the line number (starting with 0). */
+#define diff_atom_idx(DIFF_DATA, ATOM) \
+ ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+ ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
+ : (DIFF_DATA)->atoms.len)
+
+#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
+ for ((ATOM) = (FIRST_ATOM); \
+ (ATOM) \
+ && ((ATOM) >= (FIRST_ATOM)) \
+ && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
+ (ATOM)++)
+
+#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
+ foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
+
+#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
+ for ((ATOM) = (FROM); \
+ (ATOM) \
+ && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+ && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
+ (ATOM)++)
+
+#define diff_data_foreach_atom_backwards_from(FROM, ATOM, DIFF_DATA) \
+ for ((ATOM) = (FROM); \
+ (ATOM) \
+ && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+ && ((ATOM) - (DIFF_DATA)->atoms.head >= 0); \
+ (ATOM)--)
/* For each file, there is a "root" struct diff_data referencing the entire
* file, which the atoms are parsed from. In recursion of diff algorithm, there