Hash :
9c454b00
Author :
Date :
2013-01-11T22:13:02
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_buf_text_h__
#define INCLUDE_buf_text_h__
#include "buffer.h"
typedef enum {
GIT_BOM_NONE = 0,
GIT_BOM_UTF8 = 1,
GIT_BOM_UTF16_LE = 2,
GIT_BOM_UTF16_BE = 3,
GIT_BOM_UTF32_LE = 4,
GIT_BOM_UTF32_BE = 5
} git_bom_t;
typedef struct {
git_bom_t bom; /* BOM found at head of text */
unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */
unsigned int printable, nonprintable; /* These are just approximations! */
} git_buf_text_stats;
/**
* Append string to buffer, prefixing each character from `esc_chars` with
* `esc_with` string.
*
* @param buf Buffer to append data to
* @param string String to escape and append
* @param esc_chars Characters to be escaped
* @param esc_with String to insert in from of each found character
* @return 0 on success, <0 on failure (probably allocation problem)
*/
extern int git_buf_text_puts_escaped(
git_buf *buf,
const char *string,
const char *esc_chars,
const char *esc_with);
/**
* Append string escaping characters that are regex special
*/
GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string)
{
return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
}
/**
* Unescape all characters in a buffer in place
*
* I.e. remove backslashes
*/
extern void git_buf_text_unescape(git_buf *buf);
/**
* Fill buffer with the common prefix of a array of strings
*
* Buffer will be set to empty if there is no common prefix
*/
extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
/**
* Check quickly if buffer looks like it contains binary data
*
* @param buf Buffer to check
* @return true if buffer looks like non-text data
*/
extern bool git_buf_text_is_binary(const git_buf *buf);
/**
* Check quickly if buffer contains a NUL byte
*
* @param buf Buffer to check
* @return true if buffer contains a NUL byte
*/
extern bool git_buf_text_contains_nul(const git_buf *buf);
/**
* Check if a buffer begins with a UTF BOM
*
* @param bom Set to the type of BOM detected or GIT_BOM_NONE
* @param buf Buffer in which to check the first bytes for a BOM
* @param offset Offset into buffer to look for BOM
* @return Number of bytes of BOM data (or 0 if no BOM found)
*/
extern int git_buf_text_detect_bom(
git_bom_t *bom, const git_buf *buf, size_t offset);
/**
* Gather stats for a piece of text
*
* Fill the `stats` structure with counts of unreadable characters, carriage
* returns, etc, so it can be used in heuristics. This automatically skips
* a trailing EOF (\032 character). Also it will look for a BOM at the
* start of the text and can be told to skip that as well.
*
* @param stats Structure to be filled in
* @param buf Text to process
* @param skip_bom Exclude leading BOM from stats if true
* @return Does the buffer heuristically look like binary data
*/
extern bool git_buf_text_gather_stats(
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom);
/**
* Similarity signature of line hashes for a buffer
*/
typedef struct git_buf_text_hashsig git_buf_text_hashsig;
/**
* Build a similarity signature for a buffer
*
* This can either generate a simple array of hashed lines/runs in the
* file, or it can also keep hashes of pairs of runs in sequence. Adding
* the pairwise runs means the final score will be sensitive to line
* ordering changes as well as individual line contents.
*
* @param out The array of hashed runs representing the file content
* @param buf The contents of the file to hash
* @param generate_pairwise_hashes Should pairwise runs be hashed
*/
extern int git_buf_text_hashsig_create(
git_buf_text_hashsig **out,
const git_buf *buf,
bool generate_pairwise_hashes);
/**
* Build a similarity signature from a file
*
* This walks through the file, only loading a maximum of 4K of file data at
* a time. Otherwise, it acts just like `git_buf_text_hashsig_create`.
*/
extern int git_buf_text_hashsig_create_fromfile(
git_buf_text_hashsig **out,
const char *path,
bool generate_pairwise_hashes);
/**
* Release memory for a content similarity signature
*/
extern void git_buf_text_hashsig_free(git_buf_text_hashsig *sig);
/**
* Measure similarity between two files
*
* @return <0 for error, [0 to scale] as similarity score
*/
extern int git_buf_text_hashsig_compare(
const git_buf_text_hashsig *a,
const git_buf_text_hashsig *b,
int scale);
#endif