Commit f19e3ca28835eab8dbef62915c475caa18f355fe

Vicent Martí 2012-02-10T20:16:42

odb: Proper symlink hashing

diff --git a/src/blob.c b/src/blob.c
index 7497ba7..4e95bd9 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -68,10 +68,7 @@ int git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *b
 int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *path)
 {
 	int error = GIT_SUCCESS;
-	int islnk = 0;
-	int fd = 0;
 	git_buf full_path = GIT_BUF_INIT;
-	char buffer[2048];
 	git_off_t size;
 	git_odb_stream *stream = NULL;
 	struct stat st;
@@ -92,39 +89,59 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
 		goto cleanup;
 	}
 
-	islnk = S_ISLNK(st.st_mode);
 	size = st.st_size;
 
 	error = git_repository_odb__weakptr(&odb, repo);
 	if (error < GIT_SUCCESS)
 		goto cleanup;
 
-	if (!islnk) {
-		if ((fd = p_open(full_path.ptr, O_RDONLY)) < 0) {
-			error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", full_path.ptr
-);
-			goto cleanup;
-		}
-	}
-
 	if ((error = git_odb_open_wstream(&stream, odb, (size_t)size, GIT_OBJ_BLOB)) < GIT_SUCCESS)
 		goto cleanup;
 
-	while (size > 0) {
+	if (S_ISLNK(st.st_mode)) {
+		char *link_data;
 		ssize_t read_len;
 
-		if (!islnk)
-			read_len = p_read(fd, buffer, sizeof(buffer));
-		else
-			read_len = p_readlink(full_path.ptr, buffer, sizeof(buffer));
+		link_data = git__malloc(size);
+		if (!link_data) {
+			error = GIT_ENOMEM;
+			goto cleanup;
+		}
+
+		read_len = p_readlink(full_path.ptr, link_data, size);
 
-		if (read_len < 0) {
-			error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file");
+		if (read_len != (ssize_t)size) {
+			error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read symlink");
+			free(link_data);
 			goto cleanup;
 		}
 
-		stream->write(stream, buffer, read_len);
-		size -= read_len;
+		stream->write(stream, link_data, size);
+		free(link_data);
+
+	} else {
+		int fd;
+		char buffer[2048];
+
+		if ((fd = p_open(full_path.ptr, O_RDONLY)) < 0) {
+			error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", full_path.ptr);
+			goto cleanup;
+		}
+
+		while (size > 0) {
+			ssize_t read_len = p_read(fd, buffer, sizeof(buffer));
+
+			if (read_len < 0) {
+				error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file");
+				p_close(fd);
+				goto cleanup;
+			}
+
+			stream->write(stream, buffer, read_len);
+			size -= read_len;
+		}
+
+		p_close(fd);
 	}
 
 	error = stream->finalize_write(oid, stream);
@@ -132,11 +149,9 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
 cleanup:
 	if (stream)
 		stream->free(stream);
-	if (!islnk && fd)
-		p_close(fd);
+
 	git_buf_free(&full_path);
 
-	return error == GIT_SUCCESS ? GIT_SUCCESS :
-		git__rethrow(error, "Failed to create blob");
+	return error;
 }
 
diff --git a/src/odb.c b/src/odb.c
index 43a7523..ef3ced3 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -145,6 +145,48 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
 	return GIT_SUCCESS;
 }
 
+int git_odb__hashlink(git_oid *out, const char *path)
+{
+	struct stat st;
+	int error;
+	git_off_t size;
+
+	error = p_lstat(path, &st);
+	if (error < 0)
+		return git__throw(GIT_EOSERR, "Failed to stat blob. %s", strerror(errno));
+
+	size = st.st_size;
+
+	if (!git__is_sizet(size))
+		return git__throw(GIT_EOSERR, "File size overflow for 32-bit systems");
+
+	if (S_ISLNK(st.st_mode)) {
+		char *link_data;
+		ssize_t read_len;
+
+		link_data = git__malloc(size);
+		if (link_data == NULL)
+			return GIT_ENOMEM;
+
+		read_len = p_readlink(path, link_data, size + 1);
+		if (read_len != (ssize_t)size)
+			return git__throw(GIT_EOSERR, "Failed to read symlink data");
+
+		error = git_odb_hash(out, link_data, (size_t)size, GIT_OBJ_BLOB);
+		free(link_data);
+	} else { 
+		int fd;
+
+		if ((fd = p_open(path, O_RDONLY)) < 0)
+			return git__throw(GIT_ENOTFOUND, "Could not open '%s'", path);
+
+		error = git_odb__hashfd(out, fd, (size_t)size, GIT_OBJ_BLOB);
+		p_close(fd);
+	}
+
+	return error;
+}
+
 int git_odb_hashfile(git_oid *out, const char *path, git_otype type)
 {
 	int fd, error;
diff --git a/src/odb.h b/src/odb.h
index fd0787e..d5340ef 100644
--- a/src/odb.h
+++ b/src/odb.h
@@ -39,7 +39,32 @@ struct git_odb {
 	git_cache cache;
 };
 
+/*
+ * Hash a git_rawobj internally.
+ * The `git_rawobj` is supposed to be previously initialized
+ */
 int git_odb__hashobj(git_oid *id, git_rawobj *obj);
+
+/*
+ * Hash an open file descriptor.
+ * This is a performance call when the contents of a fd need to be hashed,
+ * but the fd is already open and we have the size of the contents.
+ *
+ * Saves us some `stat` calls.
+ *
+ * The fd is never closed, not even on error. It must be opened and closed
+ * by the caller
+ */
 int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type);
 
+/*
+ * Hash a `path`, assuming it could be a POSIX symlink: if the path is a symlink,
+ * then the raw contents of the symlink will be hashed. Otherwise, this will
+ * fallback to `git_odb__hashfd`.
+ *
+ * The hash type for this call is always `GIT_OBJ_BLOB` because symlinks may only
+ * point to blobs.
+ */
+int git_odb__hashlink(git_oid *out, const char *path);
+
 #endif