Commit 520287f63a86c5d284ff4d23a2d5d61decbb8685

Vicent Martí 2013-08-19T02:17:00

Merge pull request #1785 from libgit2/cmn/odb-hash-frontend odb: move hashing to the frontend for streaming

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
diff --git a/include/git2/odb.h b/include/git2/odb.h
index b3e9a57..3e93a93 100644
--- a/include/git2/odb.h
+++ b/include/git2/odb.h
@@ -219,16 +219,9 @@ GIT_EXTERN(int) git_odb_write(git_oid *out, git_odb *odb, const void *data, size
  * The type and final length of the object must be specified
  * when opening the stream.
  *
- * The returned stream will be of type `GIT_STREAM_WRONLY` and
- * will have the following methods:
- *
- *		- stream->write: write `n` bytes into the stream
- *		- stream->finalize_write: close the stream and store the object in
- *			the odb
- *		- stream->free: free the stream
- *
- * The streaming write won't be effective until `stream->finalize_write`
- * is called and returns without an error
+ * The returned stream will be of type `GIT_STREAM_WRONLY`, and it
+ * won't be effective until `git_odb_stream_finalize_write` is called
+ * and returns without an error
  *
  * The stream must always be free'd or will leak memory.
  *
@@ -243,6 +236,42 @@ GIT_EXTERN(int) git_odb_write(git_oid *out, git_odb *odb, const void *data, size
 GIT_EXTERN(int) git_odb_open_wstream(git_odb_stream **out, git_odb *db, size_t size, git_otype type);
 
 /**
+ * Write to an odb stream
+ *
+ * @param stream the stream
+ * @param buffer the data to write
+ * @param len the buffer's length
+ * @return 0 if the write succeeded; error code otherwise
+ */
+GIT_EXTERN(int) git_odb_stream_write(git_odb_stream *stream, const char *buffer, size_t len);
+
+/**
+ * Finish writing to an odb stream
+ *
+ * The object will take its final name and will be available to the
+ * odb.
+ *
+ * @param out pointer to store the resulting object's id
+ * @param stream the stream
+ * @return 0 on success; an error code otherwise
+ */
+GIT_EXTERN(int) git_odb_stream_finalize_write(git_oid *out, git_odb_stream *stream);
+
+/**
+ * Read from an odb stream
+ *
+ * Most backends don't implement streaming reads
+ */
+GIT_EXTERN(int) git_odb_stream_read(git_odb_stream *stream, char *buffer, size_t len);
+
+/**
+ * Free an odb stream
+ *
+ * @param stream the stream to free
+ */
+GIT_EXTERN(void) git_odb_stream_free(git_odb_stream *stream);
+
+/**
  * Open a stream to read an object from the ODB
  *
  * Note that most backends do *not* support streaming reads
diff --git a/include/git2/odb_backend.h b/include/git2/odb_backend.h
index af1e3e5..dca4995 100644
--- a/include/git2/odb_backend.h
+++ b/include/git2/odb_backend.h
@@ -65,14 +65,41 @@ typedef enum {
 	GIT_STREAM_RW = (GIT_STREAM_RDONLY | GIT_STREAM_WRONLY),
 } git_odb_stream_t;
 
-/** A stream to read/write from a backend */
+typedef struct git_hash_ctx git_hash_ctx;
+
+/**
+ * A stream to read/write from a backend.
+ *
+ * This represents a stream of data being written to or read from a
+ * backend. When writing, the frontend functions take care of
+ * calculating the object's id and all `finalize_write` needs to do is
+ * store the object with the id it is passed.
+ */
 struct git_odb_stream {
 	git_odb_backend *backend;
 	unsigned int mode;
+	git_hash_ctx *hash_ctx;
 
+	/**
+	 * Write at most `len` bytes into `buffer` and advance the
+	 * stream.
+	 */
 	int (*read)(git_odb_stream *stream, char *buffer, size_t len);
+
+	/**
+	 * Write `len` bytes from `buffer` into the stream.
+	 */
 	int (*write)(git_odb_stream *stream, const char *buffer, size_t len);
-	int (*finalize_write)(git_oid *oid_p, git_odb_stream *stream);
+
+	/**
+	 * Store the contents of the stream as an object with the id
+	 * specified in `oid`.
+	 */
+	int (*finalize_write)(git_odb_stream *stream, const git_oid *oid);
+
+	/**
+	 * Free the stream's memory.
+	 */
 	void (*free)(git_odb_stream *stream);
 };
 
diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h
index 3cd2734..31ffe1c 100644
--- a/include/git2/sys/odb_backend.h
+++ b/include/git2/sys/odb_backend.h
@@ -48,12 +48,12 @@ struct git_odb_backend {
 	int (* read_header)(
 		size_t *, git_otype *, git_odb_backend *, const git_oid *);
 
-	/* The writer may assume that the object
-	 * has already been hashed and is passed
-	 * in the first parameter.
+	/**
+	 * Write an object into the backend. The id of the object has
+	 * already been calculated and is passed in.
 	 */
 	int (* write)(
-		git_oid *, git_odb_backend *, const void *, size_t, git_otype);
+		git_odb_backend *, const git_oid *, const void *, size_t, git_otype);
 
 	int (* writestream)(
 		git_odb_stream **, git_odb_backend *, size_t, git_otype);
diff --git a/src/blob.c b/src/blob.c
index 5bb51f7..6a289f4 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -60,10 +60,10 @@ int git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *b
 		(error = git_odb_open_wstream(&stream, odb, len, GIT_OBJ_BLOB)) < 0)
 		return error;
 
-	if ((error = stream->write(stream, buffer, len)) == 0)
-		error = stream->finalize_write(oid, stream);
+	if ((error = git_odb_stream_write(stream, buffer, len)) == 0)
+		error = git_odb_stream_finalize_write(oid, stream);
 
-	stream->free(stream);
+	git_odb_stream_free(stream);
 	return error;
 }
 
@@ -80,12 +80,12 @@ static int write_file_stream(
 		return error;
 
 	if ((fd = git_futils_open_ro(path)) < 0) {
-		stream->free(stream);
+		git_odb_stream_free(stream);
 		return -1;
 	}
 
 	while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
-		error = stream->write(stream, buffer, read_len);
+		error = git_odb_stream_write(stream, buffer, read_len);
 		written += read_len;
 	}
 
@@ -97,9 +97,9 @@ static int write_file_stream(
 	}
 
 	if (!error)
-		error = stream->finalize_write(oid, stream);
+		error = git_odb_stream_finalize_write(oid, stream);
 
-	stream->free(stream);
+	git_odb_stream_free(stream);
 	return error;
 }
 
diff --git a/src/odb.c b/src/odb.c
index 6969cf7..21b46bf 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -291,10 +291,10 @@ typedef struct {
 	git_otype type;
 } fake_wstream;
 
-static int fake_wstream__fwrite(git_oid *oid, git_odb_stream *_stream)
+static int fake_wstream__fwrite(git_odb_stream *_stream, const git_oid *oid)
 {
 	fake_wstream *stream = (fake_wstream *)_stream;
-	return _stream->backend->write(oid, _stream->backend, stream->buffer, stream->size, stream->type);
+	return _stream->backend->write(_stream->backend, oid, stream->buffer, stream->size, stream->type);
 }
 
 static int fake_wstream__write(git_odb_stream *_stream, const char *data, size_t len)
@@ -851,7 +851,7 @@ int git_odb_write(
 			continue;
 
 		if (b->write != NULL)
-			error = b->write(oid, b, data, len, type);
+			error = b->write(b, oid, data, len, type);
 	}
 
 	if (!error || error == GIT_PASSTHROUGH)
@@ -865,17 +865,27 @@ int git_odb_write(
 		return error;
 
 	stream->write(stream, data, len);
-	error = stream->finalize_write(oid, stream);
-	stream->free(stream);
+	error = stream->finalize_write(stream, oid);
+	git_odb_stream_free(stream);
 
 	return error;
 }
 
+static void hash_header(git_hash_ctx *ctx, size_t size, git_otype type)
+{
+	char header[64];
+	int hdrlen;
+
+	hdrlen = git_odb__format_object_header(header, sizeof(header), size, type);
+	git_hash_update(ctx, header, hdrlen);
+}
+
 int git_odb_open_wstream(
 	git_odb_stream **stream, git_odb *db, size_t size, git_otype type)
 {
 	size_t i, writes = 0;
 	int error = GIT_ERROR;
+	git_hash_ctx *ctx;
 
 	assert(stream && db);
 
@@ -901,9 +911,40 @@ int git_odb_open_wstream(
 	if (error < 0 && !writes)
 		error = git_odb__error_unsupported_in_backend("write object");
 
+	ctx = git__malloc(sizeof(git_hash_ctx));
+	GITERR_CHECK_ALLOC(ctx);
+
+
+	git_hash_ctx_init(ctx);
+	hash_header(ctx, size, type);
+	(*stream)->hash_ctx = ctx;
+
 	return error;
 }
 
+int git_odb_stream_write(git_odb_stream *stream, const char *buffer, size_t len)
+{
+	git_hash_update(stream->hash_ctx, buffer, len);
+	return stream->write(stream, buffer, len);
+}
+
+int git_odb_stream_finalize_write(git_oid *out, git_odb_stream *stream)
+{
+	git_hash_final(out, stream->hash_ctx);
+	return stream->finalize_write(stream, out);
+}
+
+int git_odb_stream_read(git_odb_stream *stream, char *buffer, size_t len)
+{
+	return stream->read(stream, buffer, len);
+}
+
+void git_odb_stream_free(git_odb_stream *stream)
+{
+	git__free(stream->hash_ctx);
+	stream->free(stream);
+}
+
 int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oid)
 {
 	size_t i, reads = 0;
diff --git a/src/odb_loose.c b/src/odb_loose.c
index 90e2587..abf46a1 100644
--- a/src/odb_loose.c
+++ b/src/odb_loose.c
@@ -771,15 +771,14 @@ static int loose_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb 
 	return state.cb_error ? state.cb_error : error;
 }
 
-static int loose_backend__stream_fwrite(git_oid *oid, git_odb_stream *_stream)
+static int loose_backend__stream_fwrite(git_odb_stream *_stream, const git_oid *oid)
 {
 	loose_writestream *stream = (loose_writestream *)_stream;
 	loose_backend *backend = (loose_backend *)_stream->backend;
 	git_buf final_path = GIT_BUF_INIT;
 	int error = 0;
 
-	if (git_filebuf_hash(oid, &stream->fbuf) < 0 ||
-		object_file_name(&final_path, backend, oid) < 0 ||
+	if (object_file_name(&final_path, backend, oid) < 0 ||
 		object_mkdir(&final_path, backend) < 0)
 		error = -1;
 	/*
@@ -812,17 +811,6 @@ static void loose_backend__stream_free(git_odb_stream *_stream)
 	git__free(stream);
 }
 
-static int format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type)
-{
-	const char *type_str = git_object_type2string(obj_type);
-	int len = snprintf(hdr, n, "%s %"PRIuZ, type_str, obj_len);
-
-	assert(len > 0);				/* otherwise snprintf() is broken */
-	assert(((size_t)len) < n);		/* otherwise the caller is broken! */
-
-	return len+1;
-}
-
 static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend, size_t length, git_otype type)
 {
 	loose_backend *backend;
@@ -836,7 +824,7 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_
 	backend = (loose_backend *)_backend;
 	*stream_out = NULL;
 
-	hdrlen = format_object_header(hdr, sizeof(hdr), length, type);
+	hdrlen = git_odb__format_object_header(hdr, sizeof(hdr), length, type);
 
 	stream = git__calloc(1, sizeof(loose_writestream));
 	GITERR_CHECK_ALLOC(stream);
@@ -850,7 +838,6 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_
 
 	if (git_buf_joinpath(&tmp_path, backend->objects_dir, "tmp_object") < 0 ||
 		git_filebuf_open(&stream->fbuf, tmp_path.ptr,
-			GIT_FILEBUF_HASH_CONTENTS |
 			GIT_FILEBUF_TEMPORARY |
 			(backend->object_zlib_level << GIT_FILEBUF_DEFLATE_SHIFT)) < 0 ||
 		stream->stream.write((git_odb_stream *)stream, hdr, hdrlen) < 0)
@@ -865,7 +852,7 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_
 	return !stream ? -1 : 0;
 }
 
-static int loose_backend__write(git_oid *oid, git_odb_backend *_backend, const void *data, size_t len, git_otype type)
+static int loose_backend__write(git_odb_backend *_backend, const git_oid *oid, const void *data, size_t len, git_otype type)
 {
 	int error = 0, header_len;
 	git_buf final_path = GIT_BUF_INIT;
@@ -876,7 +863,7 @@ static int loose_backend__write(git_oid *oid, git_odb_backend *_backend, const v
 	backend = (loose_backend *)_backend;
 
 	/* prepare the header for the file */
-	header_len = format_object_header(header, sizeof(header), len, type);
+	header_len = git_odb__format_object_header(header, sizeof(header), len, type);
 
 	if (git_buf_joinpath(&final_path, backend->objects_dir, "tmp_object") < 0 ||
 		git_filebuf_open(&fbuf, final_path.ptr,
diff --git a/src/tag.c b/src/tag.c
index 71f4c1e..31a3c8b 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -366,10 +366,10 @@ int git_tag_create_frombuffer(git_oid *oid, git_repository *repo, const char *bu
 	if (git_odb_open_wstream(&stream, odb, strlen(buffer), GIT_OBJ_TAG) < 0)
 		return -1;
 
-	stream->write(stream, buffer, strlen(buffer));
+	git_odb_stream_write(stream, buffer, strlen(buffer));
 
-	error = stream->finalize_write(oid, stream);
-	stream->free(stream);
+	error = git_odb_stream_finalize_write(oid, stream);
+	git_odb_stream_free(stream);
 
 	if (error < 0) {
 		git_buf_free(&ref_name);
diff --git a/src/transports/local.c b/src/transports/local.c
index a9da814..8a75de7 100644
--- a/src/transports/local.c
+++ b/src/transports/local.c
@@ -287,9 +287,9 @@ static int local_push_copy_object(
 		odb_obj_size, odb_obj_type)) < 0)
 		goto on_error;
 
-	if (odb_stream->write(odb_stream, (char *)git_odb_object_data(odb_obj),
+	if (git_odb_stream_write(odb_stream, (char *)git_odb_object_data(odb_obj),
 		odb_obj_size) < 0 ||
-		odb_stream->finalize_write(&remote_odb_obj_oid, odb_stream) < 0) {
+		git_odb_stream_finalize_write(&remote_odb_obj_oid, odb_stream) < 0) {
 		error = -1;
 	} else if (git_oid__cmp(&obj->id, &remote_odb_obj_oid) != 0) {
 		giterr_set(GITERR_ODB, "Error when writing object to remote odb "
@@ -298,7 +298,7 @@ static int local_push_copy_object(
 		error = -1;
 	}
 
-	odb_stream->free(odb_stream);
+	git_odb_stream_free(odb_stream);
 
 on_error:
 	git_odb_object_free(odb_obj);
diff --git a/tests-clar/object/raw/write.c b/tests-clar/object/raw/write.c
index 9709c03..273f08f 100644
--- a/tests-clar/object/raw/write.c
+++ b/tests-clar/object/raw/write.c
@@ -31,9 +31,9 @@ static void streaming_write(git_oid *oid, git_odb *odb, git_rawobj *raw)
    int error;
 
    cl_git_pass(git_odb_open_wstream(&stream, odb, raw->len, raw->type));
-   stream->write(stream, raw->data, raw->len);
-   error = stream->finalize_write(oid, stream);
-   stream->free(stream);
+   git_odb_stream_write(stream, raw->data, raw->len);
+   error = git_odb_stream_finalize_write(oid, stream);
+   git_odb_stream_free(stream);
    cl_git_pass(error);
 }