Edit

IABSD.fr/src/usr.bin/diff3/diff3prog.c

Branch :

  • Show log

    Commit

  • Author : stsp
    Date : 2021-04-13 14:20:23
    Hash : 08d4acf6
    Message : Fix merging of files that lack a final \n at EOF after a block of common lines. Problem reported by Josh Rickmar. ok millert@

  • usr.bin/diff3/diff3prog.c
  • /*	$OpenBSD: diff3prog.c,v 1.21 2021/04/13 14:20:23 stsp Exp $	*/
    
    /*
     * Copyright (C) Caldera International Inc.  2001-2002.
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions
     * are met:
     * 1. Redistributions of source code and documentation must retain the above
     *    copyright notice, this list of conditions and the following disclaimer.
     * 2. Redistributions in binary form must reproduce the above copyright
     *    notice, this list of conditions and the following disclaimer in the
     *    documentation and/or other materials provided with the distribution.
     * 3. All advertising materials mentioning features or use of this software
     *    must display the following acknowledgement:
     *	This product includes software developed or owned by Caldera
     *	International, Inc.
     * 4. Neither the name of Caldera International, Inc. nor the names of other
     *    contributors may be used to endorse or promote products derived from
     *    this software without specific prior written permission.
     *
     * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
     * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
     * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
     * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     * POSSIBILITY OF SUCH DAMAGE.
     */
    /*-
     * Copyright (c) 1991, 1993
     *	The Regents of the University of California.  All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions
     * are met:
     * 1. Redistributions of source code must retain the above copyright
     *    notice, this list of conditions and the following disclaimer.
     * 2. Redistributions in binary form must reproduce the above copyright
     *    notice, this list of conditions and the following disclaimer in the
     *    documentation and/or other materials provided with the distribution.
     * 3. Neither the name of the University nor the names of its contributors
     *    may be used to endorse or promote products derived from this software
     *    without specific prior written permission.
     *
     * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     * SUCH DAMAGE.
     *
     *	@(#)diff3.c	8.1 (Berkeley) 6/6/93
     */
    
    #include <ctype.h>
    #include <err.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>
    
    /* diff3 - 3-way differential file comparison */
    
    /* diff3 [-ex3EX] d13 d23 f1 f2 f3 [m1 m3]
     *
     * d13 = diff report on f1 vs f3
     * d23 = diff report on f2 vs f3
     * f1, f2, f3 the 3 files
     * if changes in f1 overlap with changes in f3, m1 and m3 are used
     * to mark the overlaps; otherwise, the file names f1 and f3 are used
     * (only for options E and X).
     */
    
    /*
     * "from" is first in range of changed lines; "to" is last+1
     * from=to=line after point of insertion for added lines.
     */
    struct  range {
    	int from;
    	int to;
    };
    struct diff {
    	struct range old;
    	struct range new;
    };
    
    size_t szchanges;
    
    struct diff *d13;
    struct diff *d23;
    /*
     * "de" is used to gather editing scripts.  These are later spewed out in
     * reverse order.  Its first element must be all zero, the "new" component
     * of "de" contains line positions or byte positions depending on when you
     * look (!?).  Array overlap indicates which sections in "de" correspond to
     * lines that are different in all three files.
     */
    struct diff *de;
    char *overlap;
    int  overlapcnt;
    FILE *fp[3];
    int cline[3];		/* # of the last-read line in each file (0-2) */
    /*
     * the latest known correspondence between line numbers of the 3 files
     * is stored in last[1-3];
     */
    int last[4];
    int eflag;
    int oflag;		/* indicates whether to mark overlaps (-E or -X)*/
    int debug  = 0;
    char f1mark[40], f3mark[40];	/* markers for -E and -X */
    
    int duplicate(struct range *, struct range *);
    int edit(struct diff *, int, int);
    char *getchange(FILE *);
    char *get_line(FILE *, size_t *);
    int number(char **);
    int readin(char *, struct diff **);
    int skip(int, int, char *);
    void change(int, struct range *, int);
    void keep(int, struct range *);
    void merge(int, int);
    void prange(struct range *);
    void repos(int);
    void separate(const char *);
    __dead void edscript(int);
    __dead void trouble(void);
    void increase(void);
    __dead void usage(void);
    
    int
    main(int argc, char **argv)
    {
    	int ch, i, m, n;
    
    	if (pledge("stdio rpath", NULL) == -1)
    		err(1, "pledge");
    
    	eflag = 0;
    	oflag = 0;
    	while ((ch = getopt(argc, argv, "EeXx3")) != -1) {
    		switch (ch) {
    		case 'E':
    			eflag = 3;
    			oflag = 1;
    			break;
    		case 'e':
    			eflag = 3;
    			break;
    		case 'X':
    			oflag = eflag = 1;
    			break;
    		case 'x':
    			eflag = 1;
    			break;
    		case '3':
    			eflag = 2;
    			break;
    		}
    	}
    	argc -= optind;
    	argv += optind;
    	/* XXX - argc usage seems wrong here */
    	if (argc < 5)
    		usage();
    
    	if (oflag) {
    		(void)snprintf(f1mark, sizeof(f1mark), "<<<<<<< %s",
    		    argc >= 6 ? argv[5] : argv[2]);
    		(void)snprintf(f3mark, sizeof(f3mark), ">>>>>>> %s",
    		    argc >= 7 ? argv[6] : argv[4]);
    	}
    
    	increase();
    	m = readin(argv[0], &d13);
    	n = readin(argv[1], &d23);
    	for (i = 0; i <= 2; i++) {
    		if ((fp[i] = fopen(argv[i + 2], "r")) == NULL)
    			err(EXIT_FAILURE, "can't open %s", argv[i + 2]);
    	}
    	merge(m, n);
    	exit(EXIT_SUCCESS);
    }
    
    /*
     * Pick up the line numbers of all changes from one change file.
     * (This puts the numbers in a vector, which is not strictly necessary,
     * since the vector is processed in one sequential pass.
     * The vector could be optimized out of existence)
     */
    int
    readin(char *name, struct diff **dd)
    {
    	int a, b, c, d, i;
    	char kind, *p;
    
    	fp[0] = fopen(name, "r");
    	if (fp[0] == NULL)
    		err(EXIT_FAILURE, "can't open %s", name);
    	for (i=0; (p = getchange(fp[0])); i++) {
    		if (i >= szchanges - 1)
    			increase();
    		a = b = number(&p);
    		if (*p == ',') {
    			p++;
    			b = number(&p);
    		}
    		kind = *p++;
    		c = d = number(&p);
    		if (*p==',') {
    			p++;
    			d = number(&p);
    		}
    		if (kind == 'a')
    			a++;
    		if (kind == 'd')
    			c++;
    		b++;
    		d++;
    		(*dd)[i].old.from = a;
    		(*dd)[i].old.to = b;
    		(*dd)[i].new.from = c;
    		(*dd)[i].new.to = d;
    	}
    	if (i) {
    		(*dd)[i].old.from = (*dd)[i-1].old.to;
    		(*dd)[i].new.from = (*dd)[i-1].new.to;
    	}
    	(void)fclose(fp[0]);
    	return (i);
    }
    
    int
    number(char **lc)
    {
    	int nn;
    	nn = 0;
    	while (isdigit((unsigned char)(**lc)))
    		nn = nn*10 + *(*lc)++ - '0';
    	return (nn);
    }
    
    char *
    getchange(FILE *b)
    {
    	char *line;
    
    	while ((line = get_line(b, NULL))) {
    		if (isdigit((unsigned char)line[0]))
    			return (line);
    	}
    	return (NULL);
    }
    
    char *
    get_line(FILE *b, size_t *n)
    {
    	char *cp;
    	size_t len;
    	static char *buf;
    	static size_t bufsize;
    
    	if ((cp = fgetln(b, &len)) == NULL)
    		return (NULL);
    
    	if (cp[len - 1] != '\n')
    		len++;
    	if (len + 1 > bufsize) {
    		do {
    			bufsize += 1024;
    		} while (len + 1 > bufsize);
    		if ((buf = realloc(buf, bufsize)) == NULL)
    			err(EXIT_FAILURE, NULL);
    	}
    	memcpy(buf, cp, len - 1);
    	buf[len - 1] = '\n';
    	buf[len] = '\0';
    	if (n != NULL)
    		*n = len;
    	return (buf);
    }
    
    void
    merge(int m1, int m2)
    {
    	struct diff *d1, *d2, *d3;
    	int dup, j, t1, t2;
    
    	d1 = d13;
    	d2 = d23;
    	j = 0;
    	for (;;) {
    		t1 = (d1 < d13 + m1);
    		t2 = (d2 < d23 + m2);
    		if (!t1 && !t2)
    			break;
    
    		if (debug) {
    			printf("%d,%d=%d,%d %d,%d=%d,%d\n",
    			d1->old.from,d1->old.to,
    			d1->new.from,d1->new.to,
    			d2->old.from,d2->old.to,
    			d2->new.from,d2->new.to);
    		}
    		/* first file is different from others */
    		if (!t2 || (t1 && d1->new.to < d2->new.from)) {
    			/* stuff peculiar to 1st file */
    			if (eflag==0) {
    				separate("1");
    				change(1, &d1->old, 0);
    				keep(2, &d1->new);
    				change(3, &d1->new, 0);
    			}
    			d1++;
    			continue;
    		}
    		/* second file is different from others */
    		if (!t1 || (t2 && d2->new.to < d1->new.from)) {
    			if (eflag==0) {
    				separate("2");
    				keep(1, &d2->new);
    				change(2, &d2->old, 0);
    				change(3, &d2->new, 0);
    			}
    			d2++;
    			continue;
    		}
    		/*
    		 * Merge overlapping changes in first file
    		 * this happens after extension (see below).
    		 */
    		if (d1 + 1 < d13 + m1 && d1->new.to >= d1[1].new.from) {
    			d1[1].old.from = d1->old.from;
    			d1[1].new.from = d1->new.from;
    			d1++;
    			continue;
    		}
    
    		/* merge overlapping changes in second */
    		if (d2 + 1 < d23 + m2 && d2->new.to >= d2[1].new.from) {
    			d2[1].old.from = d2->old.from;
    			d2[1].new.from = d2->new.from;
    			d2++;
    			continue;
    		}
    		/* stuff peculiar to third file or different in all */
    		if (d1->new.from == d2->new.from && d1->new.to == d2->new.to) {
    			dup = duplicate(&d1->old,&d2->old);
    			/*
    			 * dup = 0 means all files differ
    			 * dup = 1 means files 1 and 2 identical
    			 */
    			if (eflag==0) {
    				separate(dup ? "3" : "");
    				change(1, &d1->old, dup);
    				change(2, &d2->old, 0);
    				d3 = d1->old.to > d1->old.from ? d1 : d2;
    				change(3, &d3->new, 0);
    			} else
    				j = edit(d1, dup, j);
    			d1++;
    			d2++;
    			continue;
    		}
    		/*
    		 * Overlapping changes from file 1 and 2; extend changes
    		 * appropriately to make them coincide.
    		 */
    		if (d1->new.from < d2->new.from) {
    			d2->old.from -= d2->new.from-d1->new.from;
    			d2->new.from = d1->new.from;
    		} else if (d2->new.from < d1->new.from) {
    			d1->old.from -= d1->new.from-d2->new.from;
    			d1->new.from = d2->new.from;
    		}
    		if (d1->new.to > d2->new.to) {
    			d2->old.to += d1->new.to - d2->new.to;
    			d2->new.to = d1->new.to;
    		} else if (d2->new.to > d1->new.to) {
    			d1->old.to += d2->new.to - d1->new.to;
    			d1->new.to = d2->new.to;
    		}
    	}
    	if (eflag)
    		edscript(j);
    }
    
    void
    separate(const char *s)
    {
    	printf("====%s\n", s);
    }
    
    /*
     * The range of lines rold.from thru rold.to in file i is to be changed.
     * It is to be printed only if it does not duplicate something to be
     * printed later.
     */
    void
    change(int i, struct range *rold, int dup)
    {
    	printf("%d:", i);
    	last[i] = rold->to;
    	prange(rold);
    	if (dup || debug)
    		return;
    	i--;
    	(void)skip(i, rold->from, NULL);
    	(void)skip(i, rold->to, "  ");
    }
    
    /*
     * print the range of line numbers, rold.from thru rold.to, as n1,n2 or n1
     */
    void
    prange(struct range *rold)
    {
    	if (rold->to <= rold->from)
    		printf("%da\n", rold->from - 1);
    	else {
    		printf("%d", rold->from);
    		if (rold->to > rold->from+1)
    			printf(",%d", rold->to - 1);
    		printf("c\n");
    	}
    }
    
    /*
     * No difference was reported by diff between file 1 (or 2) and file 3,
     * and an artificial dummy difference (trange) must be ginned up to
     * correspond to the change reported in the other file.
     */
    void
    keep(int i, struct range *rnew)
    {
    	int delta;
    	struct range trange;
    
    	delta = last[3] - last[i];
    	trange.from = rnew->from - delta;
    	trange.to = rnew->to - delta;
    	change(i, &trange, 1);
    }
    
    /*
     * skip to just before line number from in file "i".  If "pr" is non-NULL,
     * print all skipped stuff with string pr as a prefix.
     */
    int
    skip(int i, int from, char *pr)
    {
    	size_t j, n;
    	char *line;
    
    	for (n = 0; cline[i] < from - 1; n += j) {
    		if ((line = get_line(fp[i], &j)) == NULL)
    			trouble();
    		if (pr != NULL)
    			printf("%s%s", pr, line);
    		cline[i]++;
    	}
    	return ((int) n);
    }
    
    /*
     * Return 1 or 0 according as the old range (in file 1) contains exactly
     * the same data as the new range (in file 2).
     */
    int
    duplicate(struct range *r1, struct range *r2)
    {
    	int c,d;
    	int nchar;
    	int nline;
    
    	if (r1->to-r1->from != r2->to-r2->from)
    		return (0);
    	(void)skip(0, r1->from, NULL);
    	(void)skip(1, r2->from, NULL);
    	nchar = 0;
    	for (nline=0; nline < r1->to - r1->from; nline++) {
    		do {
    			c = getc(fp[0]);
    			d = getc(fp[1]);
    			if (c == -1 && d == -1)
    				break;
    			if (c == -1 || d== -1)
    				trouble();
    			nchar++;
    			if (c != d) {
    				repos(nchar);
    				return (0);
    			}
    		} while (c != '\n');
    	}
    	repos(nchar);
    	return (1);
    }
    
    void
    repos(int nchar)
    {
    	int i;
    
    	for (i = 0; i < 2; i++)
    		(void)fseek(fp[i], (long)-nchar, SEEK_CUR);
    }
    
    __dead void
    trouble(void)
    {
    	errx(EXIT_FAILURE, "logic error");
    }
    
    /*
     * collect an editing script for later regurgitation
     */
    int
    edit(struct diff *diff, int dup, int j)
    {
    	if (((dup + 1) & eflag) == 0)
    		return (j);
    	j++;
    	overlap[j] = !dup;
    	if (!dup)
    		overlapcnt++;
    	de[j].old.from = diff->old.from;
    	de[j].old.to = diff->old.to;
    	de[j].new.from = de[j-1].new.to + skip(2, diff->new.from, NULL);
    	de[j].new.to = de[j].new.from + skip(2, diff->new.to, NULL);
    	return (j);
    }
    
    /* regurgitate */
    __dead void
    edscript(int n)
    {
    	int j,k;
    	char block[BUFSIZ];
    
    	for (; n > 0; n--) {
    		if (!oflag || !overlap[n])
    			prange(&de[n].old);
    		else
    			printf("%da\n=======\n", de[n].old.to -1);
    		(void)fseek(fp[2], (long)de[n].new.from, SEEK_SET);
    		for (k = de[n].new.to-de[n].new.from; k > 0; k-= j) {
    			size_t r;
    			j = k > BUFSIZ ? BUFSIZ : k;
    			r = fread(block, 1, j, fp[2]);
    			if (r == 0) {
    				if (feof(fp[2]))
    					break;
    				trouble();
    			}
    			if (r != j)
    				j = r;
    			(void)fwrite(block, 1, j, stdout);
    		}
    		if (!oflag || !overlap[n])
    			printf(".\n");
    		else {
    			printf("%s\n.\n", f3mark);
    			printf("%da\n%s\n.\n", de[n].old.from - 1, f1mark);
    		}
    	}
    	exit(overlapcnt);
    }
    
    void
    increase(void)
    {
    	struct diff *p;
    	char *q;
    	size_t newsz, incr;
    
    	/* are the memset(3) calls needed? */
    	newsz = szchanges == 0 ? 64 : 2 * szchanges;
    	incr = newsz - szchanges;
    
    	p = reallocarray(d13, newsz, sizeof(struct diff));
    	if (p == NULL)
    		err(1, NULL);
    	memset(p + szchanges, 0, incr * sizeof(struct diff));
    	d13 = p;
    	p = reallocarray(d23, newsz, sizeof(struct diff));
    	if (p == NULL)
    		err(1, NULL);
    	memset(p + szchanges, 0, incr * sizeof(struct diff));
    	d23 = p;
    	p = reallocarray(de, newsz, sizeof(struct diff));
    	if (p == NULL)
    		err(1, NULL);
    	memset(p + szchanges, 0, incr * sizeof(struct diff));
    	de = p;
    	q = reallocarray(overlap, newsz, sizeof(char));
    	if (q == NULL)
    		err(1, NULL);
    	memset(q + szchanges, 0, incr * sizeof(char));
    	overlap = q;
    	szchanges = newsz;
    }
    
    
    __dead void
    usage(void)
    {
    	extern char *__progname;
    
    	fprintf(stderr, "usage: %s [-exEX3] /tmp/d3a.?????????? "
    	    "/tmp/d3b.?????????? file1 file2 file3\n", __progname);
    	exit(EXIT_FAILURE);
    }