Hash :
28a0741f
Author :
Date :
2017-04-10T09:30:08
odb: verify object hashes The upstream git.git project verifies objects when looking them up from disk. This avoids scenarios where objects have somehow become corrupt on disk, e.g. due to hardware failures or bit flips. While our mantra is usually to follow upstream behavior, we do not do so in this case, as we never check hashes of objects we have just read from disk. To fix this, we create a new error class `GIT_EMISMATCH` which denotes that we have looked up an object with a hashsum mismatch. `odb_read_1` will then, after having read the object from its backend, hash the object and compare the resulting hash to the expected hash. If hashes do not match, it will return an error. This obviously introduces another computation of checksums and could potentially impact performance. Note though that we usually perform I/O operations directly before doing this computation, and as such the actual overhead should be drowned out by I/O. Running our test suite seems to confirm this guess. On a Linux system with best-of-five timings, we had 21.592s with the check enabled and 21.590s with the ckeck disabled. Note though that our test suite mostly contains very small blobs only. It is expected that repositories with bigger blobs may notice an increased hit by this check. In addition to a new test, we also had to change the odb::backend::nonrefreshing test suite, which now triggers a hashsum mismatch when looking up the commit "deadbeef...". This is expected, as the fake backend allocated inside of the test will return an empty object for the OID "deadbeef...", which will obviously not hash back to "deadbeef..." again. We can simply adjust the hash to equal the hash of the empty object here to fix this test.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_odb_h__
#define INCLUDE_odb_h__
#include "git2/odb.h"
#include "git2/oid.h"
#include "git2/types.h"
#include "vector.h"
#include "cache.h"
#include "posix.h"
#include "filter.h"
#define GIT_OBJECTS_DIR "objects/"
#define GIT_OBJECT_DIR_MODE 0777
#define GIT_OBJECT_FILE_MODE 0444
extern bool git_odb__strict_hash_verification;
/* DO NOT EXPORT */
typedef struct {
void *data; /**< Raw, decompressed object data. */
size_t len; /**< Total number of bytes in data. */
git_otype type; /**< Type of this object. */
} git_rawobj;
/* EXPORT */
struct git_odb_object {
git_cached_obj cached;
void *buffer;
};
/* EXPORT */
struct git_odb {
git_refcount rc;
git_vector backends;
git_cache own_cache;
unsigned int do_fsync :1;
};
typedef enum {
GIT_ODB_CAP_FROM_OWNER = -1,
} git_odb_cap_t;
/*
* Set the capabilities for the object database.
*/
int git_odb__set_caps(git_odb *odb, int caps);
/*
* Add the default loose and packed backends for a database.
*/
int git_odb__add_default_backends(
git_odb *db, const char *objects_dir,
bool as_alternates, int alternate_depth);
/*
* Hash a git_rawobj internally.
* The `git_rawobj` is supposed to be previously initialized
*/
int git_odb__hashobj(git_oid *id, git_rawobj *obj);
/*
* Format the object header such as it would appear in the on-disk object
*/
int git_odb__format_object_header(char *hdr, size_t n, git_off_t obj_len, git_otype obj_type);
/*
* Hash an open file descriptor.
* This is a performance call when the contents of a fd need to be hashed,
* but the fd is already open and we have the size of the contents.
*
* Saves us some `stat` calls.
*
* The fd is never closed, not even on error. It must be opened and closed
* by the caller
*/
int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type);
/*
* Hash an open file descriptor applying an array of filters
* Acts just like git_odb__hashfd with the addition of filters...
*/
int git_odb__hashfd_filtered(
git_oid *out, git_file fd, size_t len, git_otype type, git_filter_list *fl);
/*
* Hash a `path`, assuming it could be a POSIX symlink: if the path is a
* symlink, then the raw contents of the symlink will be hashed. Otherwise,
* this will fallback to `git_odb__hashfd`.
*
* The hash type for this call is always `GIT_OBJ_BLOB` because symlinks may
* only point to blobs.
*/
int git_odb__hashlink(git_oid *out, const char *path);
/**
* Generate a GIT_EMISMATCH error for the ODB.
*/
int git_odb__error_mismatch(
const git_oid *expected, const git_oid *actual);
/*
* Generate a GIT_ENOTFOUND error for the ODB.
*/
int git_odb__error_notfound(
const char *message, const git_oid *oid, size_t oid_len);
/*
* Generate a GIT_EAMBIGUOUS error for the ODB.
*/
int git_odb__error_ambiguous(const char *message);
/*
* Attempt to read object header or just return whole object if it could
* not be read.
*/
int git_odb__read_header_or_object(
git_odb_object **out, size_t *len_p, git_otype *type_p,
git_odb *db, const git_oid *id);
/* freshen an entry in the object database */
int git_odb__freshen(git_odb *db, const git_oid *id);
/* fully free the object; internal method, DO NOT EXPORT */
void git_odb_object__free(void *object);
#endif