blob: implement function to parse raw data Currently, parsing objects is strictly tied to having an ODB object available. This makes it hard to parse an object when all that is available is its raw object and size. Furthermore, hacking around that limitation by directly creating an ODB structure either on stack or on heap does not really work that well due to ODB objects being reference counted and then automatically free'd when reaching a reference count of zero. In some occasions parsing raw objects without touching the ODB is actually recuired, though. One use case is for example object verification, where we want to assure that an object is valid before inserting it into the ODB or writing it into the git repository. Asa first step towards that, introduce a distinction between raw and ODB objects for blobs. Creation of ODB objects stays the same by simply using `git_blob__parse`, but a new function `git_blob__parse_raw` has been added that creates a blob from a pair of data and size. By setting a new flag inside of the blob, we can now distinguish whether it is a raw or ODB object now and treat it accordingly in several places. Note that the blob data passed in is not being copied. Because of that, callers need to make sure to keep it alive during the blob's life time. This is being used to avoid unnecessarily increasing the memory footprint when parsing largish blobs.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
diff --git a/src/blob.c b/src/blob.c
index b1c0280..bcd3f41 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -19,13 +19,19 @@
const void *git_blob_rawcontent(const git_blob *blob)
{
assert(blob);
- return git_odb_object_data(blob->odb_object);
+ if (blob->raw)
+ return blob->data.raw.data;
+ else
+ return git_odb_object_data(blob->data.odb);
}
git_off_t git_blob_rawsize(const git_blob *blob)
{
assert(blob);
- return (git_off_t)git_odb_object_size(blob->odb_object);
+ if (blob->raw)
+ return blob->data.raw.size;
+ else
+ return (git_off_t)git_odb_object_size(blob->data.odb);
}
int git_blob__getbuf(git_buf *buffer, git_blob *blob)
@@ -36,17 +42,31 @@ int git_blob__getbuf(git_buf *buffer, git_blob *blob)
git_blob_rawsize(blob));
}
-void git_blob__free(void *blob)
+void git_blob__free(void *_blob)
{
- git_odb_object_free(((git_blob *)blob)->odb_object);
+ git_blob *blob = (git_blob *) _blob;
+ if (!blob->raw)
+ git_odb_object_free(blob->data.odb);
git__free(blob);
}
-int git_blob__parse(void *blob, git_odb_object *odb_obj)
+int git_blob__parse_raw(void *_blob, const char *data, size_t size)
{
+ git_blob *blob = (git_blob *) _blob;
+ assert(blob);
+ blob->raw = 1;
+ blob->data.raw.data = data;
+ blob->data.raw.size = size;
+ return 0;
+}
+
+int git_blob__parse(void *_blob, git_odb_object *odb_obj)
+{
+ git_blob *blob = (git_blob *) _blob;
assert(blob);
git_cached_obj_incref((git_cached_obj *)odb_obj);
- ((git_blob *)blob)->odb_object = odb_obj;
+ blob->raw = 0;
+ blob->data.odb = odb_obj;
return 0;
}
diff --git a/src/blob.h b/src/blob.h
index 3f1f977..f644ec5 100644
--- a/src/blob.h
+++ b/src/blob.h
@@ -16,11 +16,20 @@
struct git_blob {
git_object object;
- git_odb_object *odb_object;
+
+ union {
+ git_odb_object *odb;
+ struct {
+ const char *data;
+ git_off_t size;
+ } raw;
+ } data;
+ unsigned int raw:1;
};
void git_blob__free(void *blob);
int git_blob__parse(void *blob, git_odb_object *obj);
+int git_blob__parse_raw(void *blob, const char *data, size_t size);
int git_blob__getbuf(git_buf *buffer, git_blob *blob);
extern int git_blob__create_from_paths(