Hash :
f56f8585
Author :
Date :
2012-11-19T22:23:16
indexer: use the packfile streaming API The new API allows us to read the object bit by bit from the packfile, instead of needing it all at once in the packfile. This also allows us to hash the object as it comes in from the network instead of having to try to read it all and failing repeatedly for larger objects. This is only the first step, but it already shows huge improvements when dealing with objects over a few megabytes in size. It reduces the memory needs in some cases, but delta objects still need to be completely in memory and the old inefficent method is still used for that.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_odb_h__
#define INCLUDE_odb_h__
#include "git2/odb.h"
#include "git2/oid.h"
#include "git2/types.h"
#include "vector.h"
#include "cache.h"
#include "posix.h"
#define GIT_OBJECTS_DIR "objects/"
#define GIT_OBJECT_DIR_MODE 0777
#define GIT_OBJECT_FILE_MODE 0444
/* DO NOT EXPORT */
typedef struct {
void *data; /**< Raw, decompressed object data. */
size_t len; /**< Total number of bytes in data. */
git_otype type; /**< Type of this object. */
} git_rawobj;
/* EXPORT */
struct git_odb_object {
git_cached_obj cached;
git_rawobj raw;
};
/* EXPORT */
struct git_odb {
git_refcount rc;
git_vector backends;
git_cache cache;
};
/*
* Hash a git_rawobj internally.
* The `git_rawobj` is supposed to be previously initialized
*/
int git_odb__hashobj(git_oid *id, git_rawobj *obj);
/*
* Format the object header such as it would appear in the on-disk object
*/
int git_odb__format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type);
/*
* Hash an open file descriptor.
* This is a performance call when the contents of a fd need to be hashed,
* but the fd is already open and we have the size of the contents.
*
* Saves us some `stat` calls.
*
* The fd is never closed, not even on error. It must be opened and closed
* by the caller
*/
int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type);
/*
* Hash an open file descriptor applying an array of filters
* Acts just like git_odb__hashfd with the addition of filters...
*/
int git_odb__hashfd_filtered(
git_oid *out, git_file fd, size_t len, git_otype type, git_vector *filters);
/*
* Hash a `path`, assuming it could be a POSIX symlink: if the path is a
* symlink, then the raw contents of the symlink will be hashed. Otherwise,
* this will fallback to `git_odb__hashfd`.
*
* The hash type for this call is always `GIT_OBJ_BLOB` because symlinks may
* only point to blobs.
*/
int git_odb__hashlink(git_oid *out, const char *path);
/*
* Generate a GIT_ENOTFOUND error for the ODB.
*/
int git_odb__error_notfound(const char *message, const git_oid *oid);
/*
* Generate a GIT_EAMBIGUOUS error for the ODB.
*/
int git_odb__error_ambiguous(const char *message);
/*
* Attempt to read object header or just return whole object if it could
* not be read.
*/
int git_odb__read_header_or_object(
git_odb_object **out, size_t *len_p, git_otype *type_p,
git_odb *db, const git_oid *id);
#endif