Commit 4277420a1f45ca37345733120cbe89d19c550f92

Stefan Sperling 2019-06-29T12:58:30

speed up matching of abbreviated commit IDs in pack files

diff --git a/lib/got_lib_pack.h b/lib/got_lib_pack.h
index 7af741d..a5191eb 100644
--- a/lib/got_lib_pack.h
+++ b/lib/got_lib_pack.h
@@ -159,12 +159,10 @@ struct got_packfile_obj_data {
 const struct got_error *got_packidx_init_hdr(struct got_packidx *, int);
 const struct got_error *got_packidx_open(struct got_packidx **,
     const char *, int);
-const struct got_error* got_packidx_close(struct got_packidx *);
+const struct got_error *got_packidx_close(struct got_packidx *);
 int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *);
-typedef const struct got_error *(*got_packidx_for_each_id_cb)(void *,
-    struct got_object_id *);
-const struct got_error *got_packidx_for_each_id(struct got_packidx *,
-    got_packidx_for_each_id_cb cb, void *);
+const struct got_error *got_packidx_match_id_str_prefix(struct got_object_id **,
+    struct got_packidx *, const char *);
 
 const struct got_error *got_packfile_open_object(struct got_object **,
     struct got_pack *, struct got_packidx *, int, struct got_object_id *);
diff --git a/lib/got_lib_sha1.h b/lib/got_lib_sha1.h
index d207ee3..141cb25 100644
--- a/lib/got_lib_sha1.h
+++ b/lib/got_lib_sha1.h
@@ -16,5 +16,6 @@
 
 #define GOT_SHA1_STRING_ZERO "0000000000000000000000000000000000000000"
 
+int got_parse_xdigit(uint8_t *, const char *);
 int got_parse_sha1_digest(uint8_t *, const char *);
 char *got_sha1_digest_to_str(const uint8_t *, char *, size_t);
diff --git a/lib/pack.c b/lib/pack.c
index 269e6fa..b2200e9 100644
--- a/lib/pack.c
+++ b/lib/pack.c
@@ -444,24 +444,57 @@ got_packidx_get_object_idx(struct got_packidx *packidx, struct got_object_id *id
 }
 
 const struct got_error *
-got_packidx_for_each_id(struct got_packidx *packidx,
-    got_packidx_for_each_id_cb cb, void *cb_arg)
+got_packidx_match_id_str_prefix(struct got_object_id **unique_id,
+    struct got_packidx *packidx, const char *id_str_prefix)
 {
-	const struct got_error *err = NULL;
-	uint32_t nobj = betoh32(packidx->hdr.fanout_table[0xff]);
+	u_int8_t id0;
+	uint32_t totobj = betoh32(packidx->hdr.fanout_table[0xff]);
+	char hex[3];
+	size_t prefix_len = strlen(id_str_prefix);
 	struct got_packidx_object_id *oid;
-	struct got_object_id id;
 	int i;
 
-	for (i = 0; i < nobj; i++) {
-		oid = &packidx->hdr.sorted_ids[i];
-		memcpy(&id, oid->sha1, SHA1_DIGEST_LENGTH);
-		err = cb(cb_arg, &id);
-		if (err)
+	*unique_id = NULL;
+
+	if (prefix_len < 2)
+		return got_error(GOT_ERR_BAD_OBJ_ID_STR);
+
+	hex[0] = id_str_prefix[0];
+	hex[1] = id_str_prefix[1];
+	hex[2] = '\0';
+	if (!got_parse_xdigit(&id0, hex))
+		return got_error(GOT_ERR_BAD_OBJ_ID_STR);
+
+	i = betoh32(packidx->hdr.fanout_table[id0 - 1]);
+	if (i == 0)
+		return NULL;
+
+	oid = &packidx->hdr.sorted_ids[i];
+	while (i < totobj && oid->sha1[0] == id0) {
+		char id_str[SHA1_DIGEST_STRING_LENGTH];
+		int cmp;
+
+		if (!got_sha1_digest_to_str(oid->sha1, id_str, sizeof(id_str)))
+		        return got_error(GOT_ERR_NO_SPACE);
+
+		cmp = strncmp(id_str, id_str_prefix, prefix_len);
+		if (cmp < 0) {
+			oid = &packidx->hdr.sorted_ids[++i];
+			continue;
+		} else if (cmp > 0)
 			break;
+
+		if (*unique_id != NULL)
+			return got_error(GOT_ERR_AMBIGUOUS_ID);
+		*unique_id = malloc(sizeof(**unique_id));
+		if (*unique_id == NULL)
+			return got_error_from_errno("malloc");
+		memcpy((*unique_id)->sha1, oid->sha1, SHA1_DIGEST_LENGTH);
+
+		oid = &packidx->hdr.sorted_ids[++i];
 	}
 
-	return err;
+	return NULL;
 }
 
 const struct got_error *
diff --git a/lib/repository.c b/lib/repository.c
index f4cf798..7eafcbd 100644
--- a/lib/repository.c
+++ b/lib/repository.c
@@ -878,47 +878,16 @@ got_repo_init(const char *repo_path)
 	return NULL;
 }
 
-struct match_packidx_id_args {
-	const char *id_str_prefix;
-	struct got_object_id *unique_id;
-};
-
-static const struct got_error *
-match_packidx_id(void *arg, struct got_object_id *id)
-{
-	const struct got_error *err;
-	struct match_packidx_id_args *a = arg;
-	char *id_str;
-
-	err = got_object_id_str(&id_str, id);
-	if (err)
-		return err;
-
-	if (strncmp(a->id_str_prefix, id_str, strlen(a->id_str_prefix)) == 0) {
-		if (a->unique_id == NULL) {
-			a->unique_id = got_object_id_dup(id);
-			if (a->unique_id == NULL)
-				err = got_error_from_errno("got_object_id_dup");
-		}
-	}
-
-	free(id_str);
-	return err;
-}
-
 static const struct got_error *
-match_packed_object_id_prefix(struct got_object_id **unique_id,
+match_packed_object(struct got_object_id **unique_id,
     struct got_repository *repo, const char *id_str_prefix)
 {
 	const struct got_error *err = NULL;
-	struct match_packidx_id_args args;
 	char *path_packdir;
 	DIR *packdir;
 	struct dirent *dent;
 	char *path_packidx;
 
-	*unique_id = NULL;
-
 	path_packdir = got_repo_get_path_objects_pack(repo);
 	if (path_packdir == NULL)
 		return got_error_from_errno("got_repo_get_path_objects_pack");
@@ -931,7 +900,7 @@ match_packed_object_id_prefix(struct got_object_id **unique_id,
 
 	while ((dent = readdir(packdir)) != NULL) {
 		struct got_packidx *packidx;
-		const struct got_error *cb_err;
+		struct got_object_id *unique_id_in_pack;
 
 		if (!is_packidx_filename(dent->d_name, dent->d_namlen))
 			continue;
@@ -939,30 +908,31 @@ match_packed_object_id_prefix(struct got_object_id **unique_id,
 		if (asprintf(&path_packidx, "%s/%s", path_packdir,
 		    dent->d_name) == -1) {
 			err = got_error_from_errno("asprintf");
-			goto done;
+			break;
 		}
 
 		err = got_packidx_open(&packidx, path_packidx, 0);
 		free(path_packidx);
 		if (err)
-			goto done;
+			break;
 
-		args.id_str_prefix = id_str_prefix;
-		args.unique_id = NULL;
-		cb_err = got_packidx_for_each_id(packidx, match_packidx_id,
-		    &args);
-		err = got_packidx_close(packidx);
-		if (err)
+		err = got_packidx_match_id_str_prefix(&unique_id_in_pack,
+		    packidx, id_str_prefix);
+		if (err) {
+			got_packidx_close(packidx);
 			break;
-		if (cb_err) {
-			err = cb_err;
+		}
+		err = got_packidx_close(packidx);
+		if (err) {
+			free(unique_id_in_pack);
 			break;
 		}
 
-		if (args.unique_id) {
-			if (*unique_id == NULL)
-				*unique_id = args.unique_id;
-			else {
+		if (unique_id_in_pack) {
+			if (*unique_id == NULL) {
+				*unique_id = unique_id_in_pack;
+			} else {
+				free(unique_id_in_pack);
 				err = got_error(GOT_ERR_AMBIGUOUS_ID);
 				break;
 			}
@@ -980,7 +950,7 @@ done:
 }
 
 static const struct got_error *
-match_object_id(struct got_object_id **unique_id, const char *path_objects,
+match_loose_object(struct got_object_id **unique_id, const char *path_objects,
     const char *object_dir, const char *id_str_prefix,
     struct got_repository *repo)
 {
@@ -989,20 +959,7 @@ match_object_id(struct got_object_id **unique_id, const char *path_objects,
 	DIR *dir = NULL;
 	struct dirent *dent;
 	struct got_object_id id;
-	int i;
 
-	for (i = 0; i < strlen(id_str_prefix); i++) {
-		if (isxdigit((unsigned char)id_str_prefix[i]))
-			continue;
-		return got_error(GOT_ERR_BAD_OBJ_ID_STR);
-	}
-
-	/* Search pack index. */
-	err = match_packed_object_id_prefix(unique_id, repo, id_str_prefix);
-	if (err)
-		return err;
-
-	/* Search on-disk directories. */
 	if (asprintf(&path, "%s/%s", path_objects, object_dir) == -1) {
 		err = got_error_from_errno("asprintf");
 		goto done;
@@ -1010,6 +967,10 @@ match_object_id(struct got_object_id **unique_id, const char *path_objects,
 
 	dir = opendir(path);
 	if (dir == NULL) {
+		if (errno == ENOENT) {
+			err = NULL;
+			goto done;
+		}
 		err = got_error_from_errno2("opendir", path);
 		goto done;
 	}
@@ -1046,9 +1007,6 @@ match_object_id(struct got_object_id **unique_id, const char *path_objects,
 			goto done;
 		}
 	}
-
-	if (err == NULL && *unique_id == NULL)
-		err = got_error(GOT_ERR_NO_OBJ);
 done:
 	if (err) {
 		free(*unique_id);
@@ -1061,20 +1019,32 @@ done:
 }
 
 const struct got_error *
-got_repo_match_object_id_prefix(struct got_object_id **id, const char *id_str_prefix,
-    struct got_repository *repo)
+got_repo_match_object_id_prefix(struct got_object_id **id,
+    const char *id_str_prefix, struct got_repository *repo)
 {
 	const struct got_error *err = NULL;
 	char *path_objects = got_repo_get_path_objects(repo);
 	char *object_dir = NULL;
 	size_t len;
+	int i;
+
+	*id = NULL;
+
+	for (i = 0; i < strlen(id_str_prefix); i++) {
+		if (isxdigit((unsigned char)id_str_prefix[i]))
+			continue;
+		return got_error(GOT_ERR_BAD_OBJ_ID_STR);
+	}
 
 	len = strlen(id_str_prefix);
 	if (len >= 2) {
+		err = match_packed_object(id, repo, id_str_prefix);
+		if (err)
+			return err;
 		object_dir = strndup(id_str_prefix, 2);
 		if (object_dir == NULL)
 			return got_error_from_errno("strdup");
-		err = match_object_id(id, path_objects, object_dir,
+		err = match_loose_object(id, path_objects, object_dir,
 		    id_str_prefix, repo);
 	} else if (len == 1) {
 		int i;
@@ -1082,7 +1052,10 @@ got_repo_match_object_id_prefix(struct got_object_id **id, const char *id_str_pr
 			if (asprintf(&object_dir, "%s%.1x", id_str_prefix, i)
 			    == -1)
 				return got_error_from_errno("asprintf");
-			err = match_object_id(id, path_objects, object_dir,
+			err = match_packed_object(id, repo, object_dir);
+			if (err)
+				return err;
+			err = match_loose_object(id, path_objects, object_dir,
 			    id_str_prefix, repo);
 			if (err)
 				break;
@@ -1091,5 +1064,11 @@ got_repo_match_object_id_prefix(struct got_object_id **id, const char *id_str_pr
 		return got_error(GOT_ERR_BAD_OBJ_ID_STR);
 
 	free(object_dir);
+	if (err) {
+		free(*id);
+		*id = NULL;
+	} else if (*id == NULL)
+		err = got_error(GOT_ERR_NO_OBJ);
+
 	return err;
 }
diff --git a/lib/sha1.c b/lib/sha1.c
index d5e11f0..1f25f1b 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -23,8 +23,8 @@
 
 #include "got_lib_sha1.h"
 
-static int
-parse_xdigit(uint8_t *val, const char *hex)
+int
+got_parse_xdigit(uint8_t *val, const char *hex)
 {
 	char *ep;
 	long lval;
@@ -54,7 +54,7 @@ got_parse_sha1_digest(uint8_t *digest, const char *line)
 			hex[j] = *line;
 			line++;
 		}
-		if (!parse_xdigit(&b, hex))
+		if (!got_parse_xdigit(&b, hex))
 			return 0;
 		digest[i] = b;
 	}