/* diff3 - 3-way differential file comparison*/
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>

/*
 * diff3 [-ex3] d13 d23 f1 f2 f3 
 *
 * d13 = diff report on f1 vs f3
 * d23 = diff report on f2 vs f3
 * f1, f2, f3 the 3 files
*/

struct  range {
	int from;	/* first in range of changed lines */
	int to;		/* from=to=line after point of insertion for lines */
};

struct diff {
	struct range old;
	struct range new;
};

#define NC 2000		/* max number of edits per file */
/* de is used to gather editing scripts,
 * that are later spewed out in reverse order.
 * its first element must be all zero
 * the "new" component of de contains line positions
 * or byte positions depending on when you look(!?)
*/

struct diff d13[NC];
struct diff d23[NC];
struct diff de[NC];
char line[1024];
Biobuf *bp[3];
int linct[3] = { 0,0,0};

/*	the number of the last-read line in each file
 *	is kept in cline[0-2]
*/
int cline[3];

/*	the latest known correspondence between line
 *	numbers of the 3 files is stored in last[1-3]
*/
int last[4];
int eflag;
int debug  = 0;

Biobuf bout;

static int readin(char *, struct diff *);
static int number(char **);
static int getline(Biobuf *);
static int getchange(Biobuf *);
static void merge(int, int );
static void separate(char *);
static void change(int, struct range *, int );
static void prange(struct range *);
static void keep(int, struct range *, struct range *);
static int skip(int, int, char *);
static int duplicate(struct range *, struct range *);
static void repos(int);
static int edit(struct diff *, int, int);
static void edscript(int);
static void usage(void);

void
main(int argc, char *argv[])
{
	int i, m, n;

	argv0 = argv[0];
	if (argc > 1 && *argv[1] == '-') {
		switch(argv[1][1]) {
		case 'e':
			eflag = 3;
			break;
		case '3':
			eflag = 2;
			break;
		case 'x':
			eflag = 1;
			break;
		default:
			usage();
		}
		argv++;
		argc--;
	}

	if (argc < 6)
		usage();

	Binit(&bout, 1, OWRITE);

	m = readin(argv[1], d13);
	n = readin(argv[2], d23);

	for (i = 0; i <= 2; i++)
		if ((bp[i] = Bopen(argv[i+3], OREAD)) == nil)
			sysfatal("%s - can't open", argv[i+3]);
	merge(m, n);
	exits(nil);
}

/*pick up the line numbers of allcahnges from
 * one change file
 * (this puts the numbers in a vector, which is not
 * strictly necessary, since the vector is processed
 * in one sequential pass. The vector could be optimized
 * out of existence)
*/

static int
readin(char *name, struct diff *dd)
{
	int i;
	int a,b,c,d;
	char *p, kind;

	bp[0] = Bopen(name, OREAD);
	for (i = 0; getchange(bp[0]); i++) {
		if (i >= NC)
			sysfatal("too many changes\n");
		p = line;
		a = b = number(&p);
		if (*p == ',') {
			p++;
			b = number(&p);
		}
		kind = *p++;
		c = d = number(&p);
		if (*p == ',') {
			p++;
			d = number(&p);
		}
		if (kind == 'a')
			a++;
		if (kind == 'd')
			c++;
		b++;
		d++;
		dd[i].old.from = a;
		dd[i].old.to = b;
		dd[i].new.from = c;
		dd[i].new.to = d;
	}
	dd[i].old.from = dd[i-1].old.to;
	dd[i].new.from = dd[i-1].new.to;
	Bterm(bp[0]);

	return i;
}

static int
number(char **lc)
{
	int nn = 0;

	while (isdigit(**lc))
		nn = nn*10 + *(*lc)++ - '0';
	return nn;
}


static int
getchange(Biobuf *b)
{
	while(getline(b))
		if (isdigit(*line))
			return 1;
	return 0;

}

static int
getline(Biobuf *b)
{
	int l;
	char *p;
	
	if ((p = Brdline(b, '\n')) == nil)
		return 0;
	l = Blinelen(b);
	if (l >= sizeof(line))
		l = sizeof(line) -1;
	strncpy(line, p, l);
	line[l] = 0;
	return l;
}

static void
merge(int m1, int m2)
{
	int dup;
	int j;
	int t1,t2;
	struct diff *d1, *d2, *d3;

	j = 0;
	d1 = d13;
	d2 = d23;

	for (;(t1 = d1 < d13+m1) | (t2 = d2 < d23+m2);) {
		if (debug) {
			fprint(2, "%d,%d=%d,%d %d,%d=%d,%d\n",
				d1->old.from, d1->old.to, 
				d1->new.from, d1->new.to,
				d2->old.from, d2->old.to,
				d2->new.from, d2->new.to);
		}

		/* first file is different from others*/
		if (!t2 || t1 && d1->new.to < d2->new.from) {
			/* stuff peculiar to 1st file */
			if (eflag==0) {
				separate("1");
				change(1,&d1->old,0);
				keep(2,&d1->old,&d1->new);
				change(3,&d1->new,0);
			}
			d1++;
			continue;
		}

		/* second file is different from others*/
		if (!t1 || t2 && d2->new.to < d1->new.from) {
			if (eflag==0) {
				separate("2");
				keep(1,&d2->old,&d2->new);
				change(2,&d2->old,0);
				change(3,&d2->new,0);
			}
			d2++;
			continue;
		}
		/* merge overlapping changes in first file
		 * this happens after extension see below*/
		if (d1+1 < d13+m1 && d1->new.to >= d1[1].new.from) {
			d1[1].old.from = d1->old.from;
			d1[1].new.from = d1->new.from;
			d1++;
			continue;
		}

		/* merge overlapping changes in second*/
		if (d2+1<d23+m2 && d2->new.to>=d2[1].new.from) {
			d2[1].old.from = d2->old.from;
			d2[1].new.from = d2->new.from;
			d2++;
			continue;
		}
		/*			stuff peculiar to third file or different in all*/
		if (d1->new.from == d2->new.from && d1->new.to == d2->new.to) {
			dup = duplicate(&d1->old, &d2->old);
			/* dup=0 means all files differ
			 * dup =1 meands files 1&2 identical*/
			if (eflag == 0) {
				separate(dup?"3":"");
				change(1, &d1->old, dup);
				change(2, &d2->old, 0);
				d3 = d1->old.to>d1->old.from? d1: d2;
				change(3, &d3->new,0);
			} else
				j = edit(d1, dup, j);
			d1++;
			d2++;
			continue;
		}
		/* overlapping changes from file1 & 2
		 * extend changes appropriately to
		 * make them coincide*/
		if (d1->new.from < d2->new.from) {
			d2->old.from -= d2->new.from-d1->new.from;
			d2->new.from = d1->new.from;
		}
		else if(d2->new.from < d1->new.from) {
			d1->old.from -= d1->new.from-d2->new.from;
			d1->new.from = d2->new.from;
		}
		if (d1->new.to > d2->new.to) {
			d2->old.to += d1->new.to - d2->new.to;
			d2->new.to = d1->new.to;
		}
		else if(d2->new.to > d1->new.to) {
			d1->old.to += d2->new.to - d1->new.to;
			d1->new.to = d2->new.to;
		}
	}
	if (eflag)
		edscript(j);
}

static void
separate(char *s)
{
	Bprint(&bout, "====%s\n", s);
}

/*	the range of ines rold.from thru rold.to in file i
 *	is to be changed. it is to be printed only if
 *	it does not duplicate something to be printed later
*/
static void
change(int i, struct range *rold, int dup)
{
	Bprint(&bout, "%d:", i);

	last[i] = rold->to;
	prange(rold);
	if (dup)
		return;
	if (debug)
		return;
	i--;
	skip(i, rold->from, (char *)0);
	skip(i, rold->to, "  ");
}

/*	print the range of line numbers, rold.from  thru rold.to
 *	as n1,n2 or n1
 */
static void
prange(struct range *rold)
{
	if (rold->to <= rold->from)
		Bprint(&bout, "%da\n", rold->from-1);
	else {
		Bprint(&bout, "%d", rold->from);
		if (rold->to > rold->from+1)
			Bprint(&bout, ",%d", rold->to-1);
		Bprint(&bout, "c\n");
	}
}

/*	no difference was reported by diff between file 1(or 2)
 *	and file 3, and an artificial dummy difference (trange)
 *	must be ginned up to correspond to the change reported
 *	in the other file
 */
static void
keep(int i, struct range *rold, struct range *rnew)
{
	int delta;
	struct range trange;
	USED(rold);
	delta = last[3] - last[i];
	trange.from = rnew->from - delta;
	trange.to = rnew->to - delta;
	change(i, &trange, 1);
}

/*	skip to just befor line number from in file i
 *	if "pr" is nonzero, print all skipped stuff
 * 	with string pr as a prefix
 */
static int
skip(int i, int from, char *pr)
{
	int j, n;

	for (n = 0; cline[i] < from-1; n += j) {
		if ((j = getline(bp[i])) == 0)
			sysfatal("unexpected EOF");
		line[Blinelen(bp[i])] = 0;
		if (pr)
			Bprint(&bout, "%s%s", pr, line);
		cline[i]++;
	}
	return n;
}

/*	return 1 or 0 according as the old range
 *	(in file 1) contains exactly the same data
 *	as the new range (in file 2)
*/
static int
duplicate(struct range *r1, struct range *r2)
{
	int c,d;
	int nchar;
	int nline;

	if (r1->to-r1->from != r2->to-r2->from)
		return 0;
	skip(0, r1->from,(char *)0);
	skip(1, r2->from,(char *)0);
	nchar = 0;
	for (nline = 0; nline < r1->to - r1->from; nline++) {
		do {
			c = Bgetc(bp[0]);
			d = Bgetc(bp[1]);
			if (c== -1 || d== -1)
				sysfatal("unexpected EOF");
			nchar++;
			if (c != d) {
				repos(nchar);
				return 0;
			}
		} while(c != '\n');
	}
	repos(nchar);
	return 1;
}

static void
repos(int nchar)
{
	int i;
	for (i = 0; i < 2; i++)
		Bseek(bp[i], (long)-nchar, 1);
}

/*	collect an editing script for later regurgitation
*/
static int
edit(struct diff *diff, int dup, int j)
{
	if (((dup+1) & eflag) == 0)
		return j;
	j++;
	de[j].old.from = diff->old.from;
	de[j].old.to = diff->old.to;
	de[j].new.from = de[j-1].new.to + skip(2,diff->new.from,(char *)0);
	de[j].new.to = de[j].new.from + skip(2,diff->new.to,(char *)0);
	return j;
}

/* regurgitate */
static void
edscript(int n)
{
	int j,k;
	char block[512];

	for (n = n; n > 0; n--) {
		prange(&de[n].old);
		Bseek(bp[2], (long)de[n].new.from, 0);
		for (k = de[n].new.to - de[n].new.from; k > 0; k-= j) {
			j = (k > 512) ? 512: k;
			if (Bread(bp[2], block, j) != j)
				sysfatal("read failed");
			if (Bwrite(&bout, block, j) != j)
				sysfatal("write failed");
		}
		Bprint(&bout, ".\n");
	}
}

static void
usage(void)
{
	fprint(2, "usage: %s [-ex3] diff-13 diff-23 f1 f2 f3\n", argv0);
	exits("usage");
}