Hash :
3658e81e
Author :
Date :
2013-03-25T14:20:07
Move crlf conversion into buf_text This adds crlf/lf conversion functions into buf_text with more efficient implementations that bypass the high level buffer functions. They attempt to minimize the number of reallocations done and they directly write the buffer data as needed if they know that there is enough memory allocated to memcpy data. Tests are added for these new functions. The crlf.c code is updated to use the new functions. Removed the include of buf_text.h from filter.h and just include it more narrowly in the places that need it.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_buf_text_h__
#define INCLUDE_buf_text_h__
#include "buffer.h"
typedef enum {
GIT_BOM_NONE = 0,
GIT_BOM_UTF8 = 1,
GIT_BOM_UTF16_LE = 2,
GIT_BOM_UTF16_BE = 3,
GIT_BOM_UTF32_LE = 4,
GIT_BOM_UTF32_BE = 5
} git_bom_t;
typedef struct {
git_bom_t bom; /* BOM found at head of text */
unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */
unsigned int printable, nonprintable; /* These are just approximations! */
} git_buf_text_stats;
/**
* Append string to buffer, prefixing each character from `esc_chars` with
* `esc_with` string.
*
* @param buf Buffer to append data to
* @param string String to escape and append
* @param esc_chars Characters to be escaped
* @param esc_with String to insert in from of each found character
* @return 0 on success, <0 on failure (probably allocation problem)
*/
extern int git_buf_text_puts_escaped(
git_buf *buf,
const char *string,
const char *esc_chars,
const char *esc_with);
/**
* Append string escaping characters that are regex special
*/
GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string)
{
return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
}
/**
* Unescape all characters in a buffer in place
*
* I.e. remove backslashes
*/
extern void git_buf_text_unescape(git_buf *buf);
/**
* Replace all \r\n with \n (or do nothing if no \r\n are found)
*
* @return 0 on success, GIT_ENOTFOUND if no \r\n, -1 on memory error
*/
extern int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src);
/**
* Replace all \n with \r\n (or do nothing if no \n are found)
*
* @return 0 on success, GIT_ENOTFOUND if no \n, -1 on memory error
*/
extern int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src);
/**
* Fill buffer with the common prefix of a array of strings
*
* Buffer will be set to empty if there is no common prefix
*/
extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
/**
* Check quickly if buffer looks like it contains binary data
*
* @param buf Buffer to check
* @return true if buffer looks like non-text data
*/
extern bool git_buf_text_is_binary(const git_buf *buf);
/**
* Check quickly if buffer contains a NUL byte
*
* @param buf Buffer to check
* @return true if buffer contains a NUL byte
*/
extern bool git_buf_text_contains_nul(const git_buf *buf);
/**
* Check if a buffer begins with a UTF BOM
*
* @param bom Set to the type of BOM detected or GIT_BOM_NONE
* @param buf Buffer in which to check the first bytes for a BOM
* @param offset Offset into buffer to look for BOM
* @return Number of bytes of BOM data (or 0 if no BOM found)
*/
extern int git_buf_text_detect_bom(
git_bom_t *bom, const git_buf *buf, size_t offset);
/**
* Gather stats for a piece of text
*
* Fill the `stats` structure with counts of unreadable characters, carriage
* returns, etc, so it can be used in heuristics. This automatically skips
* a trailing EOF (\032 character). Also it will look for a BOM at the
* start of the text and can be told to skip that as well.
*
* @param stats Structure to be filled in
* @param buf Text to process
* @param skip_bom Exclude leading BOM from stats if true
* @return Does the buffer heuristically look like binary data
*/
extern bool git_buf_text_gather_stats(
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom);
#endif