speed up 'tog diff' get_filestream_info() a bit With this, 'tog diff' is able to display clang 10 commits. However, such huge diffs still take a rather long time to open. get_filestream_info() is a hack. Ideally, diff line-offset information needed by tog should be part of the result of the diff operation, rather than forcing tog to calculate line offsets during a post-processing step.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
diff --git a/tog/tog.c b/tog/tog.c
index 5a7f312..8eb6e6b 100644
--- a/tog/tog.c
+++ b/tog/tog.c
@@ -3023,18 +3023,15 @@ get_filestream_info(size_t *filesize, int *nlines, off_t **line_offsets,
FILE *infile)
{
const struct got_error *err = NULL;
- size_t len;
- char *buf = NULL;
+ size_t len, remain;
+ char buf[32768];
int i;
- size_t noffsets = 0;
+ size_t nalloc = 0;
off_t off = 0;
- if (line_offsets)
- *line_offsets = NULL;
- if (filesize)
- *filesize = 0;
- if (nlines)
- *nlines = 0;
+ *line_offsets = NULL;
+ *filesize = 0;
+ *nlines = 0;
if (fseek(infile, 0, SEEK_END) == -1)
return got_error_from_errno("fseek");
@@ -3046,22 +3043,26 @@ get_filestream_info(size_t *filesize, int *nlines, off_t **line_offsets,
if (len == 0)
return NULL;
- if ((buf = calloc(len, sizeof(char *))) == NULL)
- return got_error_from_errno("calloc");
- fread(buf, 1, len, infile);
- if (ferror(infile)) {
- err = got_error_from_errno("fread");
- goto done;
- }
+ remain = len;
+ while (remain > 0) {
+ size_t r, n = MIN(remain, sizeof(buf));
+ r = fread(buf, 1, n, infile);
+ if (r == 0) {
+ if (ferror(infile)) {
+ err = got_error_from_errno("fread");
+ goto done;
+ }
+ break;
+ }
+ i = 0;
+ remain -= r;
- i = 0;
- if (line_offsets && nlines) {
if (*line_offsets == NULL) {
/* Have some data but perhaps no '\n'. */
- noffsets = 1;
*nlines = 1;
- *line_offsets = calloc(1, sizeof(**line_offsets));
+ nalloc = len / 40; /* 40-char average line length */
+ *line_offsets = calloc(nalloc, sizeof(**line_offsets));
if (*line_offsets == NULL) {
err = got_error_from_errno("calloc");
goto done;
@@ -3073,24 +3074,25 @@ get_filestream_info(size_t *filesize, int *nlines, off_t **line_offsets,
i++;
}
}
+
/* Scan '\n' offsets in remaining chunk of data. */
- while (i < len) {
+ while (i < r) {
if (buf[i] != '\n') {
i++;
continue;
}
(*nlines)++;
- if (noffsets < *nlines) {
+ if (nalloc < *nlines) {
+ size_t nallocnew = *nlines + (remain / 40);
off_t *o = recallocarray(*line_offsets,
- noffsets, *nlines,
- sizeof(**line_offsets));
+ nalloc, nallocnew, sizeof(**line_offsets));
if (o == NULL) {
err = got_error_from_errno(
"recallocarray");
goto done;
}
*line_offsets = o;
- noffsets = *nlines;
+ nalloc = nallocnew;
}
off = i + 1;
(*line_offsets)[*nlines - 1] = off;
@@ -3104,19 +3106,13 @@ get_filestream_info(size_t *filesize, int *nlines, off_t **line_offsets,
}
rewind(infile);
- if (filesize)
- *filesize = len;
+ *filesize = len;
done:
- free(buf);
if (err) {
- if (line_offsets) {
- free(*line_offsets);
- *line_offsets = NULL;
- }
- if (filesize)
- *filesize = 0;
- if (nlines)
- *nlines = 0;
+ free(*line_offsets);
+ *line_offsets = NULL;
+ *filesize = 0;
+ *nlines = 0;
}
return NULL;
}