odb: Proper symlink hashing
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
diff --git a/src/blob.c b/src/blob.c
index 7497ba7..4e95bd9 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -68,10 +68,7 @@ int git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *b
int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *path)
{
int error = GIT_SUCCESS;
- int islnk = 0;
- int fd = 0;
git_buf full_path = GIT_BUF_INIT;
- char buffer[2048];
git_off_t size;
git_odb_stream *stream = NULL;
struct stat st;
@@ -92,39 +89,59 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
goto cleanup;
}
- islnk = S_ISLNK(st.st_mode);
size = st.st_size;
error = git_repository_odb__weakptr(&odb, repo);
if (error < GIT_SUCCESS)
goto cleanup;
- if (!islnk) {
- if ((fd = p_open(full_path.ptr, O_RDONLY)) < 0) {
- error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", full_path.ptr
-);
- goto cleanup;
- }
- }
-
if ((error = git_odb_open_wstream(&stream, odb, (size_t)size, GIT_OBJ_BLOB)) < GIT_SUCCESS)
goto cleanup;
- while (size > 0) {
+ if (S_ISLNK(st.st_mode)) {
+ char *link_data;
ssize_t read_len;
- if (!islnk)
- read_len = p_read(fd, buffer, sizeof(buffer));
- else
- read_len = p_readlink(full_path.ptr, buffer, sizeof(buffer));
+ link_data = git__malloc(size);
+ if (!link_data) {
+ error = GIT_ENOMEM;
+ goto cleanup;
+ }
+
+ read_len = p_readlink(full_path.ptr, link_data, size);
- if (read_len < 0) {
- error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file");
+ if (read_len != (ssize_t)size) {
+ error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read symlink");
+ free(link_data);
goto cleanup;
}
- stream->write(stream, buffer, read_len);
- size -= read_len;
+ stream->write(stream, link_data, size);
+ free(link_data);
+
+ } else {
+ int fd;
+ char buffer[2048];
+
+ if ((fd = p_open(full_path.ptr, O_RDONLY)) < 0) {
+ error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", full_path.ptr);
+ goto cleanup;
+ }
+
+ while (size > 0) {
+ ssize_t read_len = p_read(fd, buffer, sizeof(buffer));
+
+ if (read_len < 0) {
+ error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file");
+ p_close(fd);
+ goto cleanup;
+ }
+
+ stream->write(stream, buffer, read_len);
+ size -= read_len;
+ }
+
+ p_close(fd);
}
error = stream->finalize_write(oid, stream);
@@ -132,11 +149,9 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
cleanup:
if (stream)
stream->free(stream);
- if (!islnk && fd)
- p_close(fd);
+
git_buf_free(&full_path);
- return error == GIT_SUCCESS ? GIT_SUCCESS :
- git__rethrow(error, "Failed to create blob");
+ return error;
}
diff --git a/src/odb.c b/src/odb.c
index 43a7523..ef3ced3 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -145,6 +145,48 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
return GIT_SUCCESS;
}
+int git_odb__hashlink(git_oid *out, const char *path)
+{
+ struct stat st;
+ int error;
+ git_off_t size;
+
+ error = p_lstat(path, &st);
+ if (error < 0)
+ return git__throw(GIT_EOSERR, "Failed to stat blob. %s", strerror(errno));
+
+ size = st.st_size;
+
+ if (!git__is_sizet(size))
+ return git__throw(GIT_EOSERR, "File size overflow for 32-bit systems");
+
+ if (S_ISLNK(st.st_mode)) {
+ char *link_data;
+ ssize_t read_len;
+
+ link_data = git__malloc(size);
+ if (link_data == NULL)
+ return GIT_ENOMEM;
+
+ read_len = p_readlink(path, link_data, size + 1);
+ if (read_len != (ssize_t)size)
+ return git__throw(GIT_EOSERR, "Failed to read symlink data");
+
+ error = git_odb_hash(out, link_data, (size_t)size, GIT_OBJ_BLOB);
+ free(link_data);
+ } else {
+ int fd;
+
+ if ((fd = p_open(path, O_RDONLY)) < 0)
+ return git__throw(GIT_ENOTFOUND, "Could not open '%s'", path);
+
+ error = git_odb__hashfd(out, fd, (size_t)size, GIT_OBJ_BLOB);
+ p_close(fd);
+ }
+
+ return error;
+}
+
int git_odb_hashfile(git_oid *out, const char *path, git_otype type)
{
int fd, error;
diff --git a/src/odb.h b/src/odb.h
index fd0787e..d5340ef 100644
--- a/src/odb.h
+++ b/src/odb.h
@@ -39,7 +39,32 @@ struct git_odb {
git_cache cache;
};
+/*
+ * Hash a git_rawobj internally.
+ * The `git_rawobj` is supposed to be previously initialized
+ */
int git_odb__hashobj(git_oid *id, git_rawobj *obj);
+
+/*
+ * Hash an open file descriptor.
+ * This is a performance call when the contents of a fd need to be hashed,
+ * but the fd is already open and we have the size of the contents.
+ *
+ * Saves us some `stat` calls.
+ *
+ * The fd is never closed, not even on error. It must be opened and closed
+ * by the caller
+ */
int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type);
+/*
+ * Hash a `path`, assuming it could be a POSIX symlink: if the path is a symlink,
+ * then the raw contents of the symlink will be hashed. Otherwise, this will
+ * fallback to `git_odb__hashfd`.
+ *
+ * The hash type for this call is always `GIT_OBJ_BLOB` because symlinks may only
+ * point to blobs.
+ */
+int git_odb__hashlink(git_oid *out, const char *path);
+
#endif