sync files from diff.git e51ebd83fa731d197ee4074ee2e94dbc0581078c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
diff --git a/lib/diff_atomize_text.c b/lib/diff_atomize_text.c
index 1da34c6..0531fab 100644
--- a/lib/diff_atomize_text.c
+++ b/lib/diff_atomize_text.c
@@ -43,6 +43,7 @@ diff_data_atomize_text_lines_fd(struct diff_data *d)
unsigned int array_size_estimate = d->len / 50;
unsigned int pow2 = 1;
bool ignore_whitespace = (d->diff_flags & DIFF_FLAG_IGNORE_WHITESPACE);
+ bool embedded_nul = false;
while (array_size_estimate >>= 1)
pow2++;
@@ -71,6 +72,8 @@ diff_data_atomize_text_lines_fd(struct diff_data *d)
|| !isspace(buf[i]))
hash = diff_atom_hash_update(
hash, buf[i]);
+ if (buf[i] == '\0')
+ embedded_nul = true;
line_end++;
} else
eol = buf[i];
@@ -112,6 +115,10 @@ diff_data_atomize_text_lines_fd(struct diff_data *d)
return errno;
}
+ /* File are considered binary if they contain embedded '\0' bytes. */
+ if (embedded_nul)
+ d->atomizer_flags |= DIFF_ATOMIZER_FOUND_BINARY_DATA;
+
return DIFF_RC_OK;
}
@@ -121,7 +128,7 @@ diff_data_atomize_text_lines_mmap(struct diff_data *d)
const uint8_t *pos = d->data;
const uint8_t *end = pos + d->len;
bool ignore_whitespace = (d->diff_flags & DIFF_FLAG_IGNORE_WHITESPACE);
-
+ bool embedded_nul = false;
unsigned int array_size_estimate = d->len / 50;
unsigned int pow2 = 1;
while (array_size_estimate >>= 1)
@@ -137,6 +144,8 @@ diff_data_atomize_text_lines_mmap(struct diff_data *d)
if (!ignore_whitespace
|| !isspace(*line_end))
hash = hash * 23 + *line_end;
+ if (*line_end == '\0')
+ embedded_nul = true;
line_end++;
}
@@ -167,6 +176,10 @@ diff_data_atomize_text_lines_mmap(struct diff_data *d)
pos = line_end;
}
+ /* File are considered binary if they contain embedded '\0' bytes. */
+ if (embedded_nul)
+ d->atomizer_flags |= DIFF_ATOMIZER_FOUND_BINARY_DATA;
+
return DIFF_RC_OK;
}
diff --git a/lib/diff_main.h b/lib/diff_main.h
index bd94a91..5e816ae 100644
--- a/lib/diff_main.h
+++ b/lib/diff_main.h
@@ -105,6 +105,7 @@ struct diff_data {
const uint8_t *data; /* if memory-mapped */
off_t len;
+ int atomizer_flags;
ARRAYLIST(struct diff_atom) atoms;
struct diff_data *root;
struct diff_data *current;
@@ -115,8 +116,13 @@ struct diff_data {
int err;
};
+/* Flags set by file atomizer. */
+#define DIFF_ATOMIZER_FOUND_BINARY_DATA 0x00000001
+
+/* Flags set by caller of diff_main(). */
#define DIFF_FLAG_IGNORE_WHITESPACE 0x00000001
#define DIFF_FLAG_SHOW_PROTOTYPES 0x00000002
+#define DIFF_FLAG_FORCE_TEXT_DATA 0x00000004
void diff_data_free(struct diff_data *diff_data);
@@ -143,7 +149,7 @@ struct diff_state;
*
* func_data: context pointer (free to be used by implementation).
* d: struct diff_data with d->data and d->len already set up, and
- * d->atoms to be created.
+ * d->atoms to be created and d->atomizer_flags to be set up.
*/
typedef int (*diff_atomize_func_t)(void *func_data, struct diff_data *d);
diff --git a/lib/diff_output_edscript.c b/lib/diff_output_edscript.c
index 116c0d8..677f5ba 100644
--- a/lib/diff_output_edscript.c
+++ b/lib/diff_output_edscript.c
@@ -110,6 +110,12 @@ diff_output_edscript(struct diff_output_info **output_info,
{
struct diff_output_info *outinfo = NULL;
struct diff_chunk_context cc = {};
+ int atomizer_flags = (result->left->atomizer_flags|
+ result->right->atomizer_flags);
+ int flags = (result->left->root->diff_flags |
+ result->right->root->diff_flags);
+ bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
+ bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
int i, rc;
if (!result)
@@ -124,6 +130,23 @@ diff_output_edscript(struct diff_output_info **output_info,
outinfo = *output_info;
}
+ if (have_binary && !force_text) {
+ for (i = 0; i < result->chunks.len; i++) {
+ struct diff_chunk *c = &result->chunks.head[i];
+ enum diff_chunk_type t = diff_chunk_type(c);
+
+ if (t != CHUNK_MINUS && t != CHUNK_PLUS)
+ continue;
+
+ fprintf(dest, "Binary files %s and %s differ\n",
+ info->left_path ? : "a",
+ info->right_path ? : "b");
+ break;
+ }
+
+ return DIFF_RC_OK;
+ }
+
for (i = 0; i < result->chunks.len; i++) {
struct diff_chunk *chunk = &result->chunks.head[i];
enum diff_chunk_type t = diff_chunk_type(chunk);
diff --git a/lib/diff_output_unidiff.c b/lib/diff_output_unidiff.c
index 2f178e7..520dc91 100644
--- a/lib/diff_output_unidiff.c
+++ b/lib/diff_output_unidiff.c
@@ -412,9 +412,13 @@ diff_output_unidiff(struct diff_output_info **output_info,
struct diff_output_unidiff_state *state;
struct diff_chunk_context cc = {};
struct diff_output_info *outinfo = NULL;
+ int atomizer_flags = (result->left->atomizer_flags|
+ result->right->atomizer_flags);
int flags = (result->left->root->diff_flags |
result->right->root->diff_flags);
bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
+ bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
+ bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
int i;
if (!result)
@@ -429,6 +433,23 @@ diff_output_unidiff(struct diff_output_info **output_info,
outinfo = *output_info;
}
+ if (have_binary && !force_text) {
+ for (i = 0; i < result->chunks.len; i++) {
+ struct diff_chunk *c = &result->chunks.head[i];
+ enum diff_chunk_type t = diff_chunk_type(c);
+
+ if (t != CHUNK_MINUS && t != CHUNK_PLUS)
+ continue;
+
+ fprintf(dest, "Binary files %s and %s differ\n",
+ info->left_path ? : "a",
+ info->right_path ? : "b");
+ break;
+ }
+
+ return DIFF_RC_OK;
+ }
+
state = diff_output_unidiff_state_alloc();
if (state == NULL) {
if (output_info) {