Hash :
9bc8be3d
Author :
Date :
2013-02-19T10:25:41
Refine pluggable similarity API This plugs in the three basic similarity strategies for handling whitespace via internal use of the pluggable API. In so doing, I realized that the use of git_buf in the hashsig API was not needed and actually just made it harder to use, so I tweaked that API as well. Note that the similarity metric is still not hooked up in the find_similarity code - this is just setting out the function that will be used.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_hashsig_h__
#define INCLUDE_hashsig_h__
#include "common.h"
/**
* Similarity signature of line hashes for a buffer
*/
typedef struct git_hashsig git_hashsig;
typedef enum {
GIT_HASHSIG_NORMAL = 0, /* use all data */
GIT_HASHSIG_IGNORE_WHITESPACE = 1, /* ignore whitespace */
GIT_HASHSIG_SMART_WHITESPACE = 2, /* ignore \r and all space after \n */
} git_hashsig_option_t;
/**
* Build a similarity signature for a buffer
*
* If you have passed a whitespace-ignoring buffer, then the whitespace
* will be removed from the buffer while it is being processed, modifying
* the buffer in place. Sorry about that!
*
* This will return an error if the buffer doesn't contain enough data to
* compute a valid signature.
*
* @param out The array of hashed runs representing the file content
* @param buf The contents of the file to hash
* @param buflen The length of the data at `buf`
* @param generate_pairwise_hashes Should pairwise runs be hashed
*/
extern int git_hashsig_create(
git_hashsig **out,
const char *buf,
size_t buflen,
git_hashsig_option_t opts);
/**
* Build a similarity signature from a file
*
* This walks through the file, only loading a maximum of 4K of file data at
* a time. Otherwise, it acts just like `git_hashsig_create`.
*
* This will return an error if the file doesn't contain enough data to
* compute a valid signature.
*/
extern int git_hashsig_create_fromfile(
git_hashsig **out,
const char *path,
git_hashsig_option_t opts);
/**
* Release memory for a content similarity signature
*/
extern void git_hashsig_free(git_hashsig *sig);
/**
* Measure similarity between two files
*
* @return <0 for error, [0 to 100] as similarity score
*/
extern int git_hashsig_compare(
const git_hashsig *a,
const git_hashsig *b);
#endif