Home | History | Annotate | Download | only in diff
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 /*
     23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     28 /*	  All Rights Reserved  	*/
     29 
     30 /*
     31  * University Copyright- Copyright (c) 1982, 1986, 1988
     32  * The Regents of the University of California
     33  * All Rights Reserved
     34  *
     35  * University Acknowledgment- Portions of this document are derived from
     36  * software developed by the University of California, Berkeley, and its
     37  * contributors.
     38  */
     39 
     40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     41 
     42 /*
     43  *	diff - differential file comparison
     44  *
     45  *	Uses an algorithm  which finds
     46  *	a pair of longest identical subsequences in the two
     47  *	files.
     48  *
     49  *	The major goal is to generate the match vector J.
     50  *	J[i] is the index of the line in file1 corresponding
     51  *	to line i file0. J[i] = 0 if there is no
     52  *	such line in file1.
     53  *
     54  *	Lines are hashed so as to work in core. All potential
     55  *	matches are located by sorting the lines of each file
     56  *	on the hash (called value). In particular, this
     57  *	collects the equivalence classes in file1 together.
     58  *	Subroutine equiv  replaces the value of each line in
     59  *	file0 by the index of the first element of its
     60  *	matching equivalence in (the reordered) file1.
     61  *	To save space equiv squeezes file1 into a single
     62  *	array member in which the equivalence classes
     63  *	are simply concatenated, except that their first
     64  *	members are flagged by changing sign.
     65  *
     66  *	Next the indices that point into member are unsorted into
     67  *	array class according to the original order of file0.
     68  *
     69  *	The cleverness lies in routine stone. This marches
     70  *	through the lines of file0, developing a vector klist
     71  *	of "k-candidates". At step i a k-candidate is a matched
     72  *	pair of lines x,y (x in file0 y in file1) such that
     73  *	there is a common subsequence of lenght k
     74  *	between the first i lines of file0 and the first y
     75  *	lines of file1, but there is no such subsequence for
     76  *	any smaller y. x is the earliest possible mate to y
     77  *	that occurs in such a subsequence.
     78  *
     79  *	Whenever any of the members of the equivalence class of
     80  *	lines in file1 matable to a line in file0 has serial number
     81  *	less than the y of some k-candidate, that k-candidate
     82  *	with the smallest such y is replaced. The new
     83  *	k-candidate is chained (via pred) to the current
     84  *	k-1 candidate so that the actual subsequence can
     85  *	be recovered. When a member has serial number greater
     86  *	that the y of all k-candidates, the klist is extended.
     87  *	At the end, the longest subsequence is pulled out
     88  *	and placed in the array J by unravel.
     89  *
     90  *	With J in hand, the matches there recorded are
     91  *	checked against reality to assure that no spurious
     92  *	matches have crept in due to hashing. If they have,
     93  *	they are broken, and "jackpot " is recorded--a harmless
     94  *	matter except that a true match for a spuriously
     95  *	mated line may now be unnecessarily reported as a change.
     96  *
     97  *	Much of the complexity of the program comes simply
     98  *	from trying to minimize core utilization and
     99  *	maximize the range of doable problems by dynamically
    100  *	allocating what is needed and reusing what is not.
    101  *	The core requirements for problems larger than somewhat
    102  *	are (in words) 2*length(file0) + length(file1) +
    103  *	3*(number of k-candidates installed),  typically about
    104  *	6n words for files of length n.
    105  */
    106 #include <stdio.h>
    107 #include <wchar.h>
    108 #include <ctype.h>
    109 #include <stdlib.h>
    110 #include <limits.h>
    111 #include <sys/types.h>
    112 #include <sys/stat.h>
    113 #include <sys/wait.h>
    114 #include <unistd.h>
    115 #include <signal.h>
    116 #include <fcntl.h>
    117 #include <dirent.h>
    118 #include <locale.h>
    119 #include <stdarg.h>
    120 #include <errno.h>
    121 #include <string.h>
    122 #include "diff.h"
    123 
    124 #define	CHRTRAN(x)	(iflag ? (iswupper(x) ? towlower(x) : (x)) : (x))
    125 #define	NCCHRTRAN(x)	(iswupper(x) ? towlower(x) : (x))
    126 #define	max(a, b)	((a) < (b) ? (b) : (a))
    127 #define	min(a, b)	((a) > (b) ? (b) : (a))
    128 
    129 int pref, suff;		/* length of prefix and suffix */
    130 int *class;		/* will be overlaid on file[0] */
    131 int *member;		/* will be overlaid on file[1] */
    132 int *klist;		/* will be overlaid on file[0] after class */
    133 struct cand *clist;	/* merely a free storage pot for candidates */
    134 int clen = 0;
    135 int *J;			/* will be overlaid on class */
    136 long *ixold;		/* will be overlaid on klist */
    137 long *ixnew;		/* will be overlaid on file[1] */
    138 
    139 static int	mbcurmax;
    140 
    141 static void error(const char *);
    142 static void unravel(int);
    143 static void	check(void);
    144 static void	output(void);
    145 static void	change(int, int, int, int);
    146 static void	range(int, int, char *);
    147 static void	fetch(long *, int, int, int, char *, int);
    148 static void	dump_context_vec(void);
    149 static void	diffdir(char **);
    150 static void	setfile(char **, char **, char *);
    151 static void	scanpr(struct dir *, int, char *, char *,
    152 	char *, char *, char *);
    153 static void	only(struct dir *, int);
    154 static void	sort(struct line *, int);
    155 static void	unsort(struct line *, int, int *);
    156 static void	filename(char **, char **, struct stat *, char **);
    157 static void	prepare(int, char *);
    158 static void	prune(void);
    159 static void	equiv(struct line *, int, struct line *, int, int *);
    160 static void	done(void);
    161 static void	noroom(void);
    162 static void	usage(void);
    163 static void	initbuf(FILE *, int, long);
    164 static void	resetbuf(int);
    165 
    166 static int	stone(int *, int, int *, int *);
    167 static int	newcand(int, int, int);
    168 static int	search(int *, int, int);
    169 static int	skipline(int);
    170 static int	readhash(FILE *, int, char *);
    171 static int	entcmp(struct dir *, struct dir *);
    172 static int	compare(struct dir *);
    173 static int	calldiff(char *);
    174 static int	binary(int);
    175 static int	filebinary(FILE *);
    176 static int	isbinary(char *, int);
    177 static int	useless(char *);
    178 static char	*copytemp(char *);
    179 static char *pfiletype(mode_t);
    180 static struct dir *setupdir(char *);
    181 static wint_t	getbufwchar(int, int *);
    182 static wint_t	wcput(wint_t);
    183 static long	ftellbuf(int);
    184 
    185 
    186 /*
    187  * error message string constants
    188  */
    189 #define	BAD_MB_ERR	"invalid multibyte character encountered"
    190 #define	NO_PROCS_ERR	"no more processes"
    191 #define	NO_MEM_ERR	"out of memory"
    192 
    193 static void *
    194 talloc(size_t n)
    195 {
    196 	void *p;
    197 	p = malloc(n);
    198 	if (p == NULL)
    199 		noroom();
    200 	return (p);
    201 }
    202 
    203 static void *
    204 ralloc(void *p, size_t n)	/* compacting reallocation */
    205 {
    206 	void	*q;
    207 #if 0
    208 	free(p);
    209 #endif
    210 	q = realloc(p, n);
    211 	if (q == NULL)
    212 		noroom();
    213 	return (q);
    214 }
    215 
    216 
    217 int
    218 main(int argc, char **argv)
    219 {
    220 	int k;
    221 	char *argp;
    222 	int flag;			/* option flag read by getopt() */
    223 	int i, j;
    224 	char buf1[BUFSIZ], buf2[BUFSIZ];
    225 
    226 
    227 	(void) setlocale(LC_ALL, "");
    228 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
    229 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
    230 #endif
    231 	(void) textdomain(TEXT_DOMAIN);
    232 
    233 	mbcurmax = MB_CUR_MAX;
    234 
    235 	diffargv = argv;
    236 	whichtemp = 0;
    237 	while ((flag = getopt(argc, argv, "bitwcuefhnlrsC:D:S:U:")) != EOF) {
    238 		switch (flag) {
    239 		case 'D':
    240 			opt = D_IFDEF;
    241 			wantelses = 1;
    242 			ifdef1 = "";
    243 			ifdef2 = optarg;
    244 			break;
    245 
    246 		case 'b':
    247 			bflag = 1;
    248 			break;
    249 
    250 		case 'C':
    251 		case 'U':
    252 			opt = D_CONTEXT;
    253 			argp = optarg;
    254 			context = 0;
    255 			while (*argp >= '0' && *argp <= '9')
    256 				context *= 10, context += *argp++ - '0';
    257 			if (*argp)
    258 				error(gettext("use [ -C num | -U num ]"));
    259 			if (flag == 'U')
    260 				uflag++;
    261 			else
    262 				uflag = 0;
    263 			break;
    264 
    265 		case 'c':
    266 		case 'u':
    267 			opt = D_CONTEXT;
    268 			context = 3;
    269 			if (flag == 'u')
    270 				uflag++;
    271 			else
    272 				uflag = 0;
    273 			break;
    274 
    275 		case 'e':
    276 			opt = D_EDIT;
    277 			break;
    278 
    279 		case 'f':
    280 			opt = D_REVERSE;
    281 			break;
    282 
    283 		case 'h':
    284 			hflag++;
    285 			break;
    286 
    287 		case 'i':
    288 			iflag = 1;
    289 			break;
    290 
    291 		case 'l':
    292 			lflag = 1;
    293 			break;
    294 
    295 		case 'n':
    296 			opt = D_NREVERSE;
    297 			break;
    298 
    299 		case 'r':
    300 			rflag = 1;
    301 			break;
    302 
    303 		case 'S':
    304 			(void) strcpy(start, optarg);
    305 			break;
    306 
    307 		case 's':
    308 			sflag = 1;
    309 			break;
    310 
    311 		case 't':
    312 			tflag = 1;
    313 			break;
    314 
    315 		case 'w':
    316 			wflag = 1;
    317 			break;
    318 
    319 		case '?':
    320 			usage();
    321 			break;
    322 
    323 		default:
    324 			/* Not sure how it would get here, but just in case */
    325 			(void) fprintf(stderr, "diff: ");
    326 			(void) fprintf(stderr,
    327 				gettext("invalid option -%c\n"), flag);
    328 			usage();
    329 		}
    330 	}
    331 
    332 	argc -= optind;
    333 	argv = &argv[optind];
    334 
    335 	if (opt != D_CONTEXT && uflag)
    336 		uflag = 0;
    337 
    338 	if (argc != 2)
    339 		error(gettext("two filename arguments required"));
    340 
    341 	file1 = argv[0];
    342 	file2 = argv[1];
    343 
    344 	if (hflag) {
    345 		if (opt) {
    346 			error(
    347 gettext("-h doesn't support -e, -f, -n, -c, or -I"));
    348 		} else {
    349 			diffargv[0] = "diffh";
    350 			(void) execv(diffh, diffargv);
    351 			(void) fprintf(stderr, "diffh: ");
    352 			perror(diffh);
    353 			status = 2;
    354 			done();
    355 		}
    356 
    357 	}
    358 
    359 	if (strcmp(file1, "-") == 0) {
    360 		if (fstat(fileno(stdin), &stb1) == 0)
    361 			stb1.st_mode = S_IFREG;
    362 		else {
    363 			(void) fprintf(stderr, "diff: ");
    364 			perror("stdin");
    365 			done();
    366 		}
    367 	} else if (stat(file1, &stb1) < 0) {
    368 		(void) fprintf(stderr, "diff: ");
    369 		perror(file1);
    370 		done();
    371 	}
    372 
    373 	if (strcmp(file2, "-") == 0) {
    374 		if (strcmp(file1, "-") == 0)
    375 			error(gettext("cannot specify - -"));
    376 		else {
    377 			if (fstat(fileno(stdin), &stb2) == 0)
    378 				stb2.st_mode = S_IFREG;
    379 			else {
    380 				(void) fprintf(stderr, "diff: ");
    381 				perror("stdin");
    382 				done();
    383 			}
    384 		}
    385 	} else if (stat(file2, &stb2) < 0) {
    386 		(void) fprintf(stderr, "diff: ");
    387 		perror(file2);
    388 		done();
    389 	}
    390 
    391 	if ((stb1.st_mode & S_IFMT) == S_IFDIR &&
    392 	    (stb2.st_mode & S_IFMT) == S_IFDIR) {
    393 		diffdir(argv);
    394 		done();
    395 	    }
    396 
    397 	filename(&file1, &file2, &stb1, &input_file1);
    398 	filename(&file2, &file1, &stb2, &input_file2);
    399 	if ((input[0] = fopen(file1, "r")) == NULL) {
    400 		(void) fprintf(stderr, "diff: ");
    401 		perror(file1);
    402 		status = 2;
    403 		done();
    404 	}
    405 	initbuf(input[0], 0, 0);
    406 
    407 	if ((input[1] = fopen(file2, "r")) == NULL) {
    408 		(void) fprintf(stderr, "diff: ");
    409 		perror(file2);
    410 		status = 2;
    411 		done();
    412 	}
    413 	initbuf(input[1], 1, 0);
    414 
    415 	if (stb1.st_size != stb2.st_size)
    416 		goto notsame;
    417 
    418 	for (;;) {
    419 		i = fread(buf1, 1, BUFSIZ, input[0]);
    420 		j = fread(buf2, 1, BUFSIZ, input[1]);
    421 		if (ferror(input[0]) || ferror(input[1])) {
    422 			(void) fprintf(stderr, "diff: ");
    423 			(void) fprintf(stderr, gettext("Error reading "));
    424 			perror(ferror(input[0])? file1:file2);
    425 			(void) fclose(input[0]);
    426 			(void) fclose(input[1]);
    427 			status = 2;
    428 			done();
    429 		}
    430 		if (i != j)
    431 			goto notsame;
    432 		if (i == 0 && j == 0) {
    433 			/* files are the same; diff -D needs to print one */
    434 			if (opt == D_IFDEF) {
    435 				rewind(input[0]);
    436 				while (i = fread(buf1, 1, BUFSIZ, input[0]))
    437 					(void) fwrite(buf1, 1, i, stdout);
    438 			}
    439 			(void) fclose(input[0]);
    440 			(void) fclose(input[1]);
    441 			status = 0;
    442 			goto same;		/* files don't differ */
    443 		}
    444 		for (j = 0; j < i; j++)
    445 			if (buf1[j] != buf2[j])
    446 				goto notsame;
    447 	}
    448 
    449 notsame:
    450 	status = 1;
    451 	if (filebinary(input[0]) || filebinary(input[1])) {
    452 		if (ferror(input[0]) || ferror(input[1])) {
    453 			(void) fprintf(stderr, "diff: ");
    454 			(void) fprintf(stderr, gettext("Error reading "));
    455 			perror(ferror(input[0])? file1:file2);
    456 			(void) fclose(input[0]);
    457 			(void) fclose(input[1]);
    458 			status = 2;
    459 			done();
    460 		}
    461 		(void) printf(gettext("Binary files %s and %s differ\n"),
    462 		    file1, file2);
    463 		(void) fclose(input[0]);
    464 		(void) fclose(input[1]);
    465 		done();
    466 	}
    467 	prepare(0, file1);
    468 	prepare(1, file2);
    469 	prune();
    470 	sort(sfile[0], slen[0]);
    471 	sort(sfile[1], slen[1]);
    472 
    473 	member = (int *)file[1];
    474 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
    475 	member = (int *)ralloc((void *)member, (slen[1] + 2) * sizeof (int));
    476 
    477 	class = (int *)file[0];
    478 	unsort(sfile[0], slen[0], class);
    479 	class = (int *)ralloc((void *)class, (slen[0] + 2) * sizeof (int));
    480 
    481 	klist = (int *)talloc((slen[0] + 2) * sizeof (int));
    482 	clist = (struct cand *)talloc(sizeof (cand));
    483 	k = stone(class, slen[0], member, klist);
    484 	free((void *)member);
    485 	free((void *)class);
    486 
    487 	J = (int *)talloc((len[0] + 2) * sizeof (int));
    488 	unravel(klist[k]);
    489 	free((char *)clist);
    490 	free((char *)klist);
    491 
    492 	ixold = (long *)talloc((len[0] + 2) * sizeof (long));
    493 	ixnew = (long *)talloc((len[1] + 2) * sizeof (long));
    494 	check();
    495 	output();
    496 	status = anychange;
    497 
    498 same:
    499 	if (opt == D_CONTEXT && anychange == 0)
    500 		(void) printf(gettext("No differences encountered\n"));
    501 	done();
    502 	/*NOTREACHED*/
    503 	return (0);
    504 }
    505 
    506 static int
    507 stone(int *a, int n, int *b, int *c)
    508 {
    509 	int i, k, y;
    510 	int j, l;
    511 	int oldc, tc;
    512 	int oldl;
    513 
    514 	k = 0;
    515 	c[0] = newcand(0, 0, 0);
    516 	for (i = 1; i <= n; i++) {
    517 		j = a[i];
    518 		if (j == 0)
    519 			continue;
    520 		y = -b[j];
    521 		oldl = 0;
    522 		oldc = c[0];
    523 		do {
    524 			if (y <= clist[oldc].y)
    525 				continue;
    526 			l = search(c, k, y);
    527 			if (l != oldl+1)
    528 				oldc = c[l-1];
    529 			if (l <= k) {
    530 				if (clist[c[l]].y <= y)
    531 					continue;
    532 				tc = c[l];
    533 				c[l] = newcand(i, y, oldc);
    534 				oldc = tc;
    535 				oldl = l;
    536 			} else {
    537 				c[l] = newcand(i, y, oldc);
    538 				k++;
    539 				break;
    540 			}
    541 		} while ((y = b[++j]) > 0);
    542 	}
    543 	return (k);
    544 }
    545 
    546 static int
    547 newcand(int x, int y, int pred)
    548 {
    549 	struct cand *q;
    550 
    551 	clist = (struct cand *)ralloc((void *)clist, ++clen * sizeof (cand));
    552 	q = clist + clen -1;
    553 	q->x = x;
    554 	q->y = y;
    555 	q->pred = pred;
    556 	return (clen - 1);
    557 }
    558 
    559 static int
    560 search(int *c, int k, int y)
    561 {
    562 	int i, j, l;
    563 	int t;
    564 
    565 	if (clist[c[k]].y < y)	/* quick look for typical case */
    566 		return (k + 1);
    567 	i = 0;
    568 	j = k+1;
    569 	while ((l = (i + j) / 2) > i) {
    570 		t = clist[c[l]].y;
    571 		if (t > y)
    572 			j = l;
    573 		else if (t < y)
    574 			i = l;
    575 		else
    576 			return (l);
    577 	}
    578 	return (l + 1);
    579 }
    580 
    581 static void
    582 unravel(int p)
    583 {
    584 	int i;
    585 	struct cand *q;
    586 
    587 	for (i = 0; i <= len[0]; i++)
    588 		J[i] = i <= pref ? i :
    589 			i > len[0] - suff ? i + len[1] - len[0]:
    590 			0;
    591 	for (q = clist + p; q->y != 0; q = clist + q->pred)
    592 		J[q->x + pref] = q->y + pref;
    593 }
    594 
    595 /*
    596  * check does double duty:
    597  * 1. ferret out any fortuitous correspondences due to confounding by
    598  * hashing (which result in "jackpot")
    599  * 2. collect random access indexes to the two files
    600  */
    601 
    602 static void
    603 check(void)
    604 {
    605 	wint_t	c, d;
    606 	int i, j;
    607 	/* int jackpot; */
    608 	int	mlen;
    609 	long ctold, ctnew;
    610 
    611 	resetbuf(0);
    612 	resetbuf(1);
    613 
    614 	j = 1;
    615 	ixold[0] = ixnew[0] = 0;
    616 	/* jackpot = 0; */
    617 
    618 	/*
    619 	 * ctold and ctnew are byte positions within the file (suitable for
    620 	 * lseek()).  After we get a character with getwc(), instead of
    621 	 * just incrementing the byte position by 1, we have to determine
    622 	 * how many bytes the character actually is.  This is the reason for
    623 	 * the wctomb() calls here and in skipline().
    624 	 */
    625 	ctold = ctnew = 0;
    626 	for (i = 1; i <= len[0]; i++) {
    627 		if (J[i] == 0) {
    628 			ixold[i] = ctold += skipline(0);
    629 			continue;
    630 		}
    631 		while (j < J[i]) {
    632 			ixnew[j] = ctnew += skipline(1);
    633 			j++;
    634 		}
    635 		if (bflag || wflag || iflag) {
    636 			for (;;) {
    637 				c = getbufwchar(0, &mlen);
    638 				ctold += mlen;
    639 				d = getbufwchar(1, &mlen);
    640 				ctnew += mlen;
    641 
    642 				if (bflag && iswspace(c) && iswspace(d)) {
    643 					while (iswspace(c)) {
    644 						if (c == '\n' || c == WEOF)
    645 							break;
    646 
    647 						c = getbufwchar(0, &mlen);
    648 						ctold += mlen;
    649 					}
    650 					while (iswspace(d)) {
    651 						if (d == '\n' || d == WEOF)
    652 							break;
    653 
    654 						d = getbufwchar(1, &mlen);
    655 						ctnew += mlen;
    656 					}
    657 				} else if (wflag) {
    658 					while (iswspace(c) && c != '\n') {
    659 						c = getbufwchar(0, &mlen);
    660 						ctold += mlen;
    661 					}
    662 					while (iswspace(d) && d != '\n') {
    663 						d = getbufwchar(1, &mlen);
    664 						ctnew += mlen;
    665 					}
    666 				}
    667 				if (c == WEOF || d == WEOF) {
    668 					if (c != d) {
    669 						/* jackpot++; */
    670 						J[i] = 0;
    671 						if (c != '\n' && c != WEOF)
    672 							ctold += skipline(0);
    673 						if (d != '\n' && d != WEOF)
    674 							ctnew += skipline(1);
    675 						break;
    676 					}
    677 					break;
    678 				} else {
    679 					if (CHRTRAN(c) != CHRTRAN(d)) {
    680 						/* jackpot++; */
    681 						J[i] = 0;
    682 						if (c != '\n')
    683 							ctold += skipline(0);
    684 						if (d != '\n')
    685 							ctnew += skipline(1);
    686 						break;
    687 					}
    688 					if (c == '\n')
    689 						break;
    690 				}
    691 			}
    692 		} else {
    693 			for (;;) {
    694 				c = getbufwchar(0, &mlen);
    695 				ctold += mlen;
    696 				d = getbufwchar(1, &mlen);
    697 				ctnew += mlen;
    698 				if (c != d) {
    699 					/* jackpot++; */
    700 					J[i] = 0;
    701 					if (c != '\n' && c != WEOF)
    702 						ctold += skipline(0);
    703 					if (d != '\n' && d != WEOF)
    704 						ctnew += skipline(1);
    705 					break;
    706 				}
    707 				if (c == '\n' || c == WEOF)
    708 					break;
    709 			}
    710 		}
    711 		ixold[i] = ctold;
    712 		ixnew[j] = ctnew;
    713 		j++;
    714 	}
    715 	for (; j <= len[1]; j++) {
    716 		ixnew[j] = ctnew += skipline(1);
    717 	}
    718 
    719 /*	if(jackpot)			*/
    720 /*		fprintf(stderr, "diff: jackpot\n");	*/
    721 }
    722 
    723 static int
    724 skipline(int f)
    725 {
    726 	int i;
    727 	wint_t c;
    728 	int	mlen;
    729 
    730 	for (i = 1; c = getbufwchar(f, &mlen); ) {
    731 		if (c == '\n' || c == WEOF)
    732 			return (i);
    733 		i += mlen;
    734 	}
    735 	return (i);
    736 }
    737 
    738 static void
    739 output(void)
    740 {
    741 	int m;
    742 	wint_t	wc;
    743 	int i0, i1, j1;
    744 	int j0;
    745 	int	mlen;
    746 
    747 	resetbuf(0);
    748 	resetbuf(1);
    749 
    750 	m = len[0];
    751 	J[0] = 0;
    752 	J[m + 1] = len[1] + 1;
    753 	if (opt != D_EDIT)
    754 		for (i0 = 1; i0 <= m; i0 = i1+1) {
    755 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
    756 				i0++;
    757 			j0 = J[i0 - 1] + 1;
    758 			i1 = i0 - 1;
    759 			while (i1 < m && J[i1 + 1] == 0)
    760 				i1++;
    761 			j1 = J[i1 + 1] - 1;
    762 			J[i1] = j1;
    763 			change(i0, i1, j0, j1);
    764 		} else for (i0 = m; i0 >= 1; i0 = i1 - 1) {
    765 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
    766 				i0--;
    767 			j0 = J[i0 + 1] - 1;
    768 			i1 = i0 + 1;
    769 			while (i1 > 1 && J[i1 - 1] == 0)
    770 				i1--;
    771 			j1 = J[i1 - 1] + 1;
    772 			J[i1] = j1;
    773 			change(i1, i0, j1, j0);
    774 		}
    775 	if (m == 0)
    776 		change(1, 0, 1, len[1]);
    777 	if (opt == D_IFDEF) {
    778 		for (;;) {
    779 			wc = getbufwchar(0, &mlen);
    780 			if (wc == WEOF)
    781 				return;
    782 			(void) wcput(wc);
    783 		}
    784 	}
    785 	if (anychange && opt == D_CONTEXT)
    786 		dump_context_vec();
    787 }
    788 
    789 
    790 /*
    791  * indicate that there is a difference between lines a and b of the from file
    792  * to get to lines c to d of the to file.
    793  * If a is greater then b then there are no lines in the from file involved
    794  * and this means that there were lines appended (beginning at b).
    795  * If c is greater than d then there are lines missing from the to file.
    796  */
    797 static void
    798 change(int a, int b, int c, int d)
    799 {
    800 	char	time_buf[BUFSIZ];
    801 	char	*dcmsg;
    802 
    803 	if (opt != D_IFDEF && a > b && c > d)
    804 		return;
    805 	if (anychange == 0) {
    806 		anychange = 1;
    807 		if (opt == D_CONTEXT) {
    808 			/*
    809 			 * TRANSLATION_NOTE_FOR_DC
    810 			 * This message is the format of file
    811 			 * timestamps written with the -C and
    812 			 * -c options.
    813 			 * %a -- locale's abbreviated weekday name
    814 			 * %b -- locale's abbreviated month name
    815 			 * %e -- day of month [1,31]
    816 			 * %T -- Time as %H:%M:%S
    817 			 * %Y -- Year, including the century
    818 			 */
    819 			dcmsg = dcgettext(NULL, "%a %b %e %T %Y", LC_TIME);
    820 			(void) cftime(time_buf, dcmsg, &stb1.st_mtime);
    821 			if (uflag)
    822 				(void) printf("--- %s	%s\n", input_file1,
    823 				    time_buf);
    824 			else
    825 				(void) printf("*** %s	%s\n", input_file1,
    826 				    time_buf);
    827 			(void) cftime(time_buf, dcmsg, &stb2.st_mtime);
    828 			if (uflag)
    829 				(void) printf("+++ %s	%s\n", input_file2,
    830 				    time_buf);
    831 			else
    832 				(void) printf("--- %s	%s\n", input_file2,
    833 				    time_buf);
    834 
    835 			context_vec_start = (struct context_vec *)
    836 					    malloc(MAX_CONTEXT *
    837 					    sizeof (struct context_vec));
    838 			if (context_vec_start == NULL)
    839 				error(gettext(NO_MEM_ERR));
    840 
    841 			context_vec_end = context_vec_start + (MAX_CONTEXT - 1);
    842 			context_vec_ptr = context_vec_start - 1;
    843 		}
    844 	}
    845 
    846 	if (opt == D_CONTEXT) {
    847 		/*
    848 		 * if this new change is within 'context' lines of
    849 		 * the previous change, just add it to the change
    850 		 * record.  If the record is full or if this
    851 		 * change is more than 'context' lines from the previous
    852 		 * change, dump the record, reset it & add the new change.
    853 		 */
    854 		if (context_vec_ptr >= context_vec_end ||
    855 		    (context_vec_ptr >= context_vec_start &&
    856 		    a > (context_vec_ptr->b + 2 * context) &&
    857 		    c > (context_vec_ptr->d + 2 * context)))
    858 			dump_context_vec();
    859 
    860 		context_vec_ptr++;
    861 		context_vec_ptr->a = a;
    862 		context_vec_ptr->b = b;
    863 		context_vec_ptr->c = c;
    864 		context_vec_ptr->d = d;
    865 		return;
    866 	}
    867 
    868 	switch (opt) {
    869 	case D_NORMAL:
    870 	case D_EDIT:
    871 		range(a, b, ",");
    872 		(void) putchar(a > b ? 'a' : c > d ? 'd' : 'c');
    873 		if (opt == D_NORMAL) range(c, d, ",");
    874 		(void) printf("\n");
    875 		break;
    876 	case D_REVERSE:
    877 		(void) putchar(a > b ? 'a' : c > d ? 'd' : 'c');
    878 		range(a, b, " ");
    879 		(void) printf("\n");
    880 		break;
    881 	case D_NREVERSE:
    882 		if (a > b)
    883 			(void) printf("a%d %d\n", b, d - c + 1);
    884 		else {
    885 			(void) printf("d%d %d\n", a, b - a + 1);
    886 			if (!(c > d))
    887 				/* add changed lines */
    888 				(void) printf("a%d %d\n", b, d - c + 1);
    889 		}
    890 		break;
    891 	}
    892 	if (opt == D_NORMAL || opt == D_IFDEF) {
    893 		fetch(ixold, a, b, 0, "< ", 1);
    894 		if (a <= b && c <= d && opt == D_NORMAL)
    895 			(void) prints("---\n");
    896 	}
    897 	fetch(ixnew, c, d, 1, opt == D_NORMAL?"> ":empty, 0);
    898 	if ((opt == D_EDIT || opt == D_REVERSE) && c <= d)
    899 		(void) prints(".\n");
    900 	if (inifdef) {
    901 		(void) fprintf(stdout, "#endif /* %s */\n", endifname);
    902 		inifdef = 0;
    903 	}
    904 }
    905 
    906 static void
    907 range(int a, int b, char *separator)
    908 {
    909 	(void) printf("%d", a > b ? b : a);
    910 	if (a < b) {
    911 		(void) printf("%s%d", separator, b);
    912 	}
    913 }
    914 
    915 static void
    916 fetch(long *f, int a, int b, int filen, char *s, int oldfile)
    917 {
    918 	int i;
    919 	int col;
    920 	int nc;
    921 	int mlen = 0;
    922 	wint_t	ch;
    923 	FILE	*lb;
    924 
    925 	lb = input[filen];
    926 	/*
    927 	 * When doing #ifdef's, copy down to current line
    928 	 * if this is the first file, so that stuff makes it to output.
    929 	 */
    930 	if (opt == D_IFDEF && oldfile) {
    931 		long curpos = ftellbuf(filen);
    932 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
    933 		nc = f[(a > b) ? b : (a - 1) ] - curpos;
    934 		for (i = 0; i < nc; i += mlen) {
    935 			ch = getbufwchar(filen, &mlen);
    936 			if (ch == WEOF) {
    937 				(void) putchar('\n');
    938 				break;
    939 			} else {
    940 				(void) wcput(ch);
    941 			}
    942 		}
    943 	}
    944 	if (a > b)
    945 		return;
    946 	if (opt == D_IFDEF) {
    947 		int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0');
    948 		if (inifdef)
    949 			(void) fprintf(stdout, "#else /* %s%s */\n",
    950 			    oneflag && oldfile == 1 ? "!" : "", ifdef2);
    951 		else {
    952 			if (oneflag) {
    953 				/* There was only one ifdef given */
    954 				endifname = ifdef2;
    955 				if (oldfile)
    956 					(void) fprintf(stdout,
    957 					    "#ifndef %s\n", endifname);
    958 				else
    959 					(void) fprintf(stdout,
    960 					    "#ifdef %s\n", endifname);
    961 			} else {
    962 				endifname = oldfile ? ifdef1 : ifdef2;
    963 				(void) fprintf(stdout,
    964 					"#ifdef %s\n", endifname);
    965 			}
    966 		}
    967 		inifdef = 1 + oldfile;
    968 	}
    969 
    970 	for (i = a; i <= b; i++) {
    971 		(void) fseek(lb, f[i - 1], SEEK_SET);
    972 		initbuf(lb, filen, f[i - 1]);
    973 		if (opt != D_IFDEF)
    974 			(void) prints(s);
    975 		col = 0;
    976 		while (ch = getbufwchar(filen, &mlen)) {
    977 			if (ch != '\n' && ch != WEOF) {
    978 				if (ch == '\t' && tflag)
    979 					do
    980 						(void) putchar(' ');
    981 					while (++col & 7);
    982 				else {
    983 					(void) wcput(ch);
    984 					col++;
    985 				}
    986 			} else
    987 				break;
    988 		}
    989 		(void) putchar('\n');
    990 	}
    991 }
    992 
    993 /*
    994  * hashing has the effect of
    995  * arranging line in 7-bit bytes and then
    996  * summing 1-s complement in 16-bit hunks
    997  */
    998 
    999 static int
   1000 readhash(FILE *f, int filen, char *str)
   1001 {
   1002 	long sum;
   1003 	unsigned int	shift;
   1004 	int space;
   1005 	int t;
   1006 	wint_t	wt;
   1007 	int	mlen;
   1008 
   1009 	sum = 1;
   1010 	space = 0;
   1011 	if (!bflag && !wflag) {
   1012 		if (iflag)
   1013 			if (mbcurmax == 1) {
   1014 				/* In this case, diff doesn't have to take */
   1015 				/* care of multibyte characters. */
   1016 				for (shift = 0; (t = getc(f)) != '\n';
   1017 					shift += 7) {
   1018 					if (t == EOF) {
   1019 						if (shift) {
   1020 							(void) fprintf(stderr,
   1021 	gettext("Warning: missing newline at end of file %s\n"), str);
   1022 							break;
   1023 						} else
   1024 							return (0);
   1025 					}
   1026 					sum += (isupper(t) ? tolower(t) : t) <<
   1027 						(shift &= HALFMASK);
   1028 				}
   1029 			} else {
   1030 				/* In this case, diff needs to take care of */
   1031 				/* multibyte characters. */
   1032 				for (shift = 0;
   1033 				(wt = getbufwchar(filen, &mlen)) != '\n';
   1034 					shift += 7) {
   1035 					if (wt == WEOF) {
   1036 						if (shift) {
   1037 							(void) fprintf(stderr,
   1038 	gettext("Warning: missing newline at end of file %s\n"), str);
   1039 							break;
   1040 						} else
   1041 							return (0);
   1042 					}
   1043 					sum += NCCHRTRAN(wt) <<
   1044 						(shift &= HALFMASK);
   1045 				}
   1046 			}
   1047 		else
   1048 			/* In this case, diff doesn't have to take care of */
   1049 			/* multibyte characters. */
   1050 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
   1051 				if (t == EOF) {
   1052 					if (shift) {
   1053 						(void) fprintf(stderr,
   1054 	gettext("Warning: missing newline at end of file %s\n"), str);
   1055 						break;
   1056 					} else
   1057 						return (0);
   1058 				}
   1059 				sum += (long)t << (shift &= HALFMASK);
   1060 			}
   1061 	} else {
   1062 		/* In this case, diff needs to take care of */
   1063 		/* multibyte characters. */
   1064 		for (shift = 0; ; ) {
   1065 			wt = getbufwchar(filen, &mlen);
   1066 
   1067 			if (wt != '\n' && iswspace(wt)) {
   1068 				space++;
   1069 				continue;
   1070 			} else {
   1071 				switch (wt) {
   1072 				case WEOF:
   1073 					if (shift) {
   1074 						(void) fprintf(stderr,
   1075 	gettext("Warning: missing newline at end of file %s\n"), str);
   1076 						break;
   1077 					} else
   1078 						return (0);
   1079 				default:
   1080 					if (space && !wflag) {
   1081 						shift += 7;
   1082 						space = 0;
   1083 					}
   1084 					sum += CHRTRAN(wt) <<
   1085 						(shift &= HALFMASK);
   1086 					shift += 7;
   1087 					continue;
   1088 				case L'\n':
   1089 					break;
   1090 				}
   1091 			}
   1092 			break;
   1093 		}
   1094 	}
   1095 	return (sum);
   1096 }
   1097 
   1098 
   1099 /* dump accumulated "context" diff changes */
   1100 static void
   1101 dump_context_vec(void)
   1102 {
   1103 	int	a, b, c, d;
   1104 	char	ch;
   1105 	struct	context_vec *cvp = context_vec_start;
   1106 	int	lowa, upb, lowc, upd;
   1107 	int	do_output;
   1108 
   1109 	if (cvp > context_vec_ptr)
   1110 		return;
   1111 
   1112 	lowa = max(1, cvp->a - context);
   1113 	upb  = min(len[0], context_vec_ptr->b + context);
   1114 	lowc = max(1, cvp->c - context);
   1115 	upd  = min(len[1], context_vec_ptr->d + context);
   1116 
   1117 	if (uflag) {
   1118 		(void) printf("@@ -%d,%d +%d,%d @@\n",
   1119 		    lowa, upb - lowa + 1,
   1120 		    lowc, upd - lowc + 1);
   1121 	} else {
   1122 		(void) printf("***************\n*** ");
   1123 		range(lowa, upb, ",");
   1124 		(void) printf(" ****\n");
   1125 	}
   1126 
   1127 	/*
   1128 	 * output changes to the "old" file.  The first loop suppresses
   1129 	 * output if there were no changes to the "old" file (we'll see
   1130 	 * the "old" lines as context in the "new" list).
   1131 	 */
   1132 	if (uflag)
   1133 		do_output = 1;
   1134 	else
   1135 		for (do_output = 0; cvp <= context_vec_ptr; cvp++)
   1136 			if (cvp->a <= cvp->b) {
   1137 				cvp = context_vec_start;
   1138 				do_output++;
   1139 				break;
   1140 			}
   1141 
   1142 	if (do_output) {
   1143 		while (cvp <= context_vec_ptr) {
   1144 			a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d;
   1145 
   1146 			if (a <= b && c <= d)
   1147 				ch = 'c';
   1148 			else
   1149 				ch = (a <= b) ? 'd' : 'a';
   1150 
   1151 			if (ch == 'a') {
   1152 				/* The last argument should not affect */
   1153 				/* the behavior of fetch() */
   1154 				fetch(ixold, lowa, b, 0, uflag ? " " : "  ", 1);
   1155 				if (uflag)
   1156 					fetch(ixnew, c, d, 1, "+", 0);
   1157 			} else if (ch == 'd') {
   1158 				fetch(ixold, lowa, a - 1, 0, uflag ? " " :
   1159 					    "  ", 1);
   1160 				fetch(ixold, a, b, 0, uflag ? "-" : "- ", 1);
   1161 			} else {
   1162 				/* The last argument should not affect */
   1163 				/* the behavior of fetch() */
   1164 				fetch(ixold, lowa, a-1, 0, uflag ? " " : "  ",
   1165 				    1);
   1166 				if (uflag) {
   1167 					fetch(ixold, a, b, 0, "-", 1);
   1168 					fetch(ixnew, c, d, 1, "+", 0);
   1169 				} else
   1170 					fetch(ixold, a, b, 0, "! ", 1);
   1171 			}
   1172 			lowa = b + 1;
   1173 			cvp++;
   1174 		}
   1175 		/* The last argument should not affect the behavior */
   1176 		/* of fetch() */
   1177 		fetch(ixold, b+1, upb, 0, uflag ? " " : "  ", 1);
   1178 	}
   1179 
   1180 	if (uflag) {
   1181 		context_vec_ptr = context_vec_start - 1;
   1182 		return;
   1183 	}
   1184 
   1185 	/* output changes to the "new" file */
   1186 	(void) printf("--- ");
   1187 	range(lowc, upd, ",");
   1188 	(void) printf(" ----\n");
   1189 
   1190 	do_output = 0;
   1191 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
   1192 		if (cvp->c <= cvp->d) {
   1193 			cvp = context_vec_start;
   1194 			do_output++;
   1195 			break;
   1196 		}
   1197 
   1198 	if (do_output) {
   1199 		while (cvp <= context_vec_ptr) {