Home | History | Annotate | Download | only in bdiff
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     23 /*	  All Rights Reserved  	*/
     24 
     25 
     26 /*
     27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
     28  * Use is subject to license terms.
     29  */
     30 
     31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     32 
     33 #include <fatal.h>
     34 #include <signal.h>
     35 #include <sys/types.h>
     36 #include <unistd.h>
     37 #include <stdio.h>
     38 #include <ctype.h>
     39 #include <string.h>
     40 #include <stdlib.h>
     41 #include <wait.h>
     42 
     43 #define	ONSIG	16
     44 
     45 /*
     46  *	This program segments two files into pieces of <= seglim lines
     47  *	(which is passed as a third argument or defaulted to some number)
     48  *	and then executes diff upon the pieces. The output of
     49  *	'diff' is then processed to make it look as if 'diff' had
     50  *	processed the files whole. The reason for all this is that seglim
     51  *	is a reasonable upper limit on the size of files that diff can
     52  *	process.
     53  *	NOTE -- by segmenting the files in this manner, it cannot be
     54  *	guaranteed that the 'diffing' of the segments will generate
     55  *	a minimal set of differences.
     56  *	This process is most definitely not equivalent to 'diffing'
     57  *	the files whole, assuming 'diff' could handle such large files.
     58  *
     59  *	'diff' is executed by a child process, generated by forking,
     60  *	and communicates with this program through pipes.
     61  */
     62 
     63 static char Error[128];
     64 
     65 static int seglim;	/* limit of size of file segment to be generated */
     66 
     67 static char diff[]  =  "/usr/bin/diff";
     68 static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */
     69 static char tempfile[32];
     70 static char otmp[32], ntmp[32];
     71 static int	fflags;
     72 static int	fatal_num = 1;		/* exit number for fatal exit */
     73 static offset_t	linenum;
     74 static size_t obufsiz, nbufsiz, dbufsiz;
     75 static char *readline(char **, size_t *, FILE *);
     76 static void addgen(char **, size_t *, FILE *);
     77 static void delgen(char **, size_t *, FILE *);
     78 static void fixnum(char *);
     79 static void fatal(char *);
     80 static void setsig(void);
     81 static void setsig1(int);
     82 static char *satoi(char *, offset_t *);
     83 static FILE *maket(char *);
     84 
     85 static char *prognam;
     86 
     87 int
     88 main(int argc, char *argv[])
     89 {
     90 	FILE *poldfile, *pnewfile;
     91 	char *oline, *nline, *diffline;
     92 	char *olp, *nlp, *dp;
     93 	int otcnt, ntcnt;
     94 	pid_t i;
     95 	int pfd[2];
     96 	FILE *poldtemp, *pnewtemp, *pipeinp;
     97 	int status;
     98 
     99 	prognam = argv[0];
    100 	/*
    101 	 * Set flags for 'fatal' so that it will clean up,
    102 	 * produce a message, and terminate.
    103 	 */
    104 	fflags = FTLMSG | FTLCLN | FTLEXIT;
    105 
    106 	setsig();
    107 
    108 	if (argc < 3 || argc > 5)
    109 		fatal("arg count");
    110 
    111 	if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
    112 		fatal("both files standard input");
    113 	if (strcmp(argv[1], "-") == 0)
    114 		poldfile = stdin;
    115 	else
    116 		if ((poldfile = fopen(argv[1], "r")) == NULL) {
    117 			(void) snprintf(Error, sizeof (Error),
    118 				"Can not open '%s'", argv[1]);
    119 			fatal(Error);
    120 		}
    121 	if (strcmp(argv[2], "-") == 0)
    122 		pnewfile = stdin;
    123 	else
    124 		if ((pnewfile = fopen(argv[2], "r")) == NULL) {
    125 			(void) snprintf(Error, sizeof (Error),
    126 				"Can not open '%s'", argv[2]);
    127 			fatal(Error);
    128 		}
    129 
    130 	seglim = 3500;
    131 
    132 	if (argc > 3) {
    133 		if (argv[3][0] == '-' && argv[3][1] == 's')
    134 			fflags &= ~FTLMSG;
    135 		else {
    136 			if ((seglim = atoi(argv[3])) == 0)
    137 				fatal("non-numeric limit");
    138 			if (argc == 5 && argv[4][0] == '-' &&
    139 					argv[4][1] == 's')
    140 				fflags &= ~FTLMSG;
    141 		}
    142 	}
    143 
    144 	linenum = 0;
    145 
    146 	/* Allocate the buffers and initialize their lengths */
    147 
    148 	obufsiz = BUFSIZ;
    149 	nbufsiz = BUFSIZ;
    150 	dbufsiz = BUFSIZ;
    151 
    152 	if ((oline = (char *)malloc(obufsiz)) == NULL ||
    153 	    (nline = (char *)malloc(nbufsiz)) == NULL ||
    154 	    (diffline = (char *)malloc(dbufsiz)) == NULL)
    155 		fatal("Out of memory");
    156 
    157 	/*
    158 	 * The following while-loop will prevent any lines
    159 	 * common to the beginning of both files from being
    160 	 * sent to 'diff'. Since the running time of 'diff' is
    161 	 * non-linear, this will help improve performance.
    162 	 * If, during this process, both files reach EOF, then
    163 	 * the files are equal and the program will terminate.
    164 	 * If either file reaches EOF before the other, the
    165 	 * program will generate the appropriate 'diff' output
    166 	 * itself, since this can be easily determined and will
    167 	 * avoid executing 'diff' completely.
    168 	 */
    169 	for (;;) {
    170 		olp = readline(&oline, &obufsiz, poldfile);
    171 		nlp = readline(&nline, &nbufsiz, pnewfile);
    172 
    173 		if (!olp && !nlp)	/* EOF found on both:  files equal */
    174 			return (0);
    175 
    176 		if (!olp) {
    177 			/*
    178 			 * The entire old file is a prefix of the
    179 			 * new file. Generate the appropriate "append"
    180 			 * 'diff'-like output, which is of the form:
    181 			 * 		nan, n
    182 			 * where 'n' represents a line-number.
    183 			 */
    184 			addgen(&nline, &nbufsiz, pnewfile);
    185 		}
    186 
    187 		if (!nlp) {
    188 			/*
    189 			 * The entire new file is a prefix of the
    190 			 * old file. Generate the appropriate "delete"
    191 			 * 'diff'-like output, which is of the form:
    192 			 * 		n, ndn
    193 			 * where 'n' represents a line-number.
    194 			 */
    195 			delgen(&oline, &obufsiz, poldfile);
    196 		}
    197 
    198 		if (strcmp(olp, nlp) == 0)
    199 			linenum++;
    200 		else
    201 			break;
    202 	}
    203 
    204 	/*
    205 	 * Here, first 'linenum' lines are equal.
    206 	 * The following while-loop segments both files into
    207 	 * seglim segments, forks and executes 'diff' on the
    208 	 * segments, and processes the resulting output of
    209 	 * 'diff', which is read from a pipe.
    210 	 */
    211 	for (;;) {
    212 		/* If both files are at EOF, everything is done. */
    213 		if (!olp && !nlp)	/* finished */
    214 			return (0);
    215 
    216 		if (!olp) {
    217 			/*
    218 			 * Generate appropriate "append"
    219 			 * output without executing 'diff'.
    220 			 */
    221 			addgen(&nline, &nbufsiz, pnewfile);
    222 		}
    223 
    224 		if (!nlp) {
    225 			/*
    226 			 * Generate appropriate "delete"
    227 			 * output without executing 'diff'.
    228 			 */
    229 			delgen(&oline, &obufsiz, poldfile);
    230 		}
    231 
    232 		/*
    233 		 * Create a temporary file to hold a segment
    234 		 * from the old file, and write it.
    235 		 */
    236 		poldtemp = maket(otmp);
    237 		otcnt = 0;
    238 		while (olp && otcnt < seglim) {
    239 			(void) fputs(oline, poldtemp);
    240 			if (ferror(poldtemp) != 0) {
    241 				fflags |= FTLMSG;
    242 				fatal("Can not write to temporary file");
    243 			}
    244 			olp = readline(&oline, &obufsiz, poldfile);
    245 			otcnt++;
    246 		}
    247 		(void) fclose(poldtemp);
    248 
    249 		/*
    250 		 * Create a temporary file to hold a segment
    251 		 * from the new file, and write it.
    252 		 */
    253 		pnewtemp = maket(ntmp);
    254 		ntcnt = 0;
    255 		while (nlp && ntcnt < seglim) {
    256 			(void) fputs(nline, pnewtemp);
    257 			if (ferror(pnewtemp) != 0) {
    258 				fflags |= FTLMSG;
    259 				fatal("Can not write to temporary file");
    260 			}
    261 			nlp = readline(&nline, &nbufsiz, pnewfile);
    262 			ntcnt++;
    263 		}
    264 		(void) fclose(pnewtemp);
    265 
    266 		/* Create pipes and fork.  */
    267 		if ((pipe(pfd)) == -1)
    268 			fatal("Can not create pipe");
    269 		if ((i = fork()) < (pid_t)0) {
    270 			(void) close(pfd[0]);
    271 			(void) close(pfd[1]);
    272 			fatal("Can not fork, try again");
    273 		} else if (i == (pid_t)0) {	/* child process */
    274 			(void) close(pfd[0]);
    275 			(void) close(1);
    276 			(void) dup(pfd[1]);
    277 			(void) close(pfd[1]);
    278 
    279 			/* Execute 'diff' on the segment files. */
    280 			(void) execlp(diff, diff, otmp, ntmp, 0);
    281 
    282 			/*
    283 			 * Exit code here must be > 1.
    284 			 * Parent process treats exit code of 1 from the child
    285 			 * as non-error because the child process "diff" exits
    286 			 * with a status of 1 when a difference is encountered.
    287 			 * The error here is a true error--the parent process
    288 			 * needs to detect it and exit with a non-zero status.
    289 			 */
    290 			(void) close(1);
    291 			(void) snprintf(Error, sizeof (Error),
    292 			    "Can not execute '%s'", diff);
    293 			fatal_num = 2;
    294 			fatal(Error);
    295 		} else {			/* parent process */
    296 			(void) close(pfd[1]);
    297 			pipeinp = fdopen(pfd[0], "r");
    298 
    299 			/* Process 'diff' output. */
    300 			while ((dp = readline(&diffline, &dbufsiz, pipeinp))) {
    301 				if (isdigit(*dp))
    302 					fixnum(diffline);
    303 				else
    304 					(void) printf("%s", diffline);
    305 			}
    306 
    307 			(void) fclose(pipeinp);
    308 
    309 			/* EOF on pipe. */
    310 			(void) wait(&status);
    311 			if (status&~0x100) {
    312 				(void) snprintf(Error, sizeof (Error),
    313 				    "'%s' failed", diff);
    314 				fatal(Error);
    315 			}
    316 		}
    317 		linenum += seglim;
    318 
    319 		/* Remove temporary files. */
    320 		(void) unlink(otmp);
    321 		(void) unlink(ntmp);
    322 	}
    323 }
    324 
    325 /* Routine to save remainder of a file. */
    326 static void
    327 saverest(char **linep, size_t *bufsizp, FILE *iptr)
    328 {
    329 	char *lp;
    330 	FILE *temptr;
    331 
    332 	temptr = maket(tempfile);
    333 
    334 	lp = *linep;
    335 
    336 	while (lp) {
    337 		(void) fputs(*linep, temptr);
    338 		linenum++;
    339 		lp = readline(linep, bufsizp, iptr);
    340 	}
    341 	(void) fclose(temptr);
    342 }
    343 
    344 /* Routine to write out data saved by 'saverest' and to remove the file. */
    345 static void
    346 putsave(char **linep, size_t *bufsizp, char type)
    347 {
    348 	FILE *temptr;
    349 
    350 	if ((temptr = fopen(tempfile, "r")) == NULL) {
    351 		(void) snprintf(Error, sizeof (Error),
    352 		    "Can not open tempfile ('%s')", tempfile); fatal(Error);
    353 	}
    354 
    355 	while (readline(linep, bufsizp, temptr))
    356 		(void) printf("%c %s", type, *linep);
    357 
    358 	(void) fclose(temptr);
    359 
    360 	(void) unlink(tempfile);
    361 }
    362 
    363 static void
    364 fixnum(char *lp)
    365 {
    366 	offset_t num;
    367 
    368 	while (*lp) {
    369 		switch (*lp) {
    370 
    371 		case 'a':
    372 		case 'c':
    373 		case 'd':
    374 		case ',':
    375 		case '\n':
    376 			(void) printf("%c", *lp);
    377 			lp++;
    378 			break;
    379 
    380 		default:
    381 			lp = satoi(lp, &num);
    382 			num += linenum;
    383 			(void) printf("%lld", num);
    384 		}
    385 	}
    386 }
    387 
    388 static void
    389 addgen(char **lpp, size_t *bufsizp, FILE *fp)
    390 {
    391 	offset_t oldline;
    392 	(void) printf("%llda%lld", linenum, linenum+1);
    393 
    394 	/* Save lines of new file. */
    395 	oldline = linenum + 1;
    396 	saverest(lpp, bufsizp, fp);
    397 
    398 	if (oldline < linenum)
    399 		(void) printf(",%lld\n", linenum);
    400 	else
    401 		(void) printf("\n");
    402 
    403 	/* Output saved lines, as 'diff' would. */
    404 	putsave(lpp, bufsizp, '>');
    405 
    406 	exit(0);
    407 }
    408 
    409 static void
    410 delgen(char **lpp, size_t *bufsizp, FILE *fp)
    411 {
    412 	offset_t savenum;
    413 
    414 	(void) printf("%lld", linenum+1);
    415 	savenum = linenum;
    416 
    417 	/* Save lines of old file. */
    418 	saverest(lpp, bufsizp, fp);
    419 
    420 	if (savenum +1 != linenum)
    421 		(void) printf(",%lldd%lld\n", linenum, savenum);
    422 	else
    423 		(void) printf("d%lld\n", savenum);
    424 
    425 	/* Output saved lines, as 'diff' would.  */
    426 	putsave(lpp, bufsizp, '<');
    427 
    428 	exit(0);
    429 }
    430 
    431 static void
    432 clean_up()
    433 {
    434 	(void) unlink(tempfile);
    435 	(void) unlink(otmp);
    436 	(void) unlink(ntmp);
    437 }
    438 
    439 static FILE *
    440 maket(char *file)
    441 {
    442 	FILE *iop;
    443 	int fd;
    444 
    445 	(void) strcpy(file, tempskel);
    446 	if ((fd = mkstemp(file)) == -1 ||
    447 		(iop = fdopen(fd, "w+")) == NULL) {
    448 		(void) snprintf(Error, sizeof (Error),
    449 		    "Can not open/create temp file ('%s')", file);
    450 		fatal(Error);
    451 	}
    452 	return (iop);
    453 }
    454 
    455 static void
    456 fatal(char *msg)
    457 /*
    458  *	General purpose error handler.
    459  *
    460  *	The argument to fatal is a pointer to an error message string.
    461  *	The action of this routine is driven completely from
    462  *	the "fflags" global word (see <fatal.h>).
    463  *
    464  *	The FTLMSG bit controls the writing of the error
    465  *	message on file descriptor 2.  A newline is written
    466  *	after the user supplied message.
    467  *
    468  *	If the FTLCLN bit is on, clean_up is called.
    469  */
    470 {
    471 	if (fflags & FTLMSG)
    472 		(void) fprintf(stderr, "%s: %s\n", prognam, msg);
    473 	if (fflags & FTLCLN)
    474 		clean_up();
    475 	if (fflags & FTLEXIT)
    476 		exit(fatal_num);
    477 }
    478 
    479 static void
    480 setsig()
    481 /*
    482  *	General-purpose signal setting routine.
    483  *	All non-ignored, non-caught signals are caught.
    484  *	If a signal other than hangup, interrupt, or quit is caught,
    485  *	a "user-oriented" message is printed on file descriptor 2.
    486  *	If hangup, interrupt or quit is caught, that signal
    487  *	is set to ignore.
    488  *	Termination is like that of "fatal",
    489  *	via "clean_up()"
    490  */
    491 {
    492 	void (*act)(int);
    493 	int j;
    494 
    495 	for (j = 1; j < ONSIG; j++) {
    496 		act = signal(j, setsig1);
    497 		if (act == SIG_ERR)
    498 			continue;
    499 		if (act == SIG_DFL)
    500 			continue;
    501 		(void) signal(j, act);
    502 	}
    503 }
    504 
    505 static void
    506 setsig1(int sig)
    507 {
    508 
    509 	(void) signal(sig, SIG_IGN);
    510 	clean_up();
    511 	exit(1);
    512 }
    513 
    514 static char *
    515 satoi(char *p, offset_t *ip)
    516 {
    517 	offset_t sum;
    518 
    519 	sum = 0;
    520 	while (isdigit(*p))
    521 		sum = sum * 10 + (*p++ - '0');
    522 	*ip = sum;
    523 	return (p);
    524 }
    525 
    526 /*
    527  * Read a line of data from a file.  If the current buffer is not large enough
    528  * to contain the line, double the size of the buffer and continue reading.
    529  * Loop until either the entire line is read or until there is no more space
    530  * to be malloc'd.
    531  */
    532 
    533 static char *
    534 readline(char **bufferp, size_t *bufsizp, FILE *filep)
    535 {
    536 	char *bufp;
    537 	size_t newsize;		/* number of bytes to make buffer */
    538 	size_t oldsize;
    539 
    540 	(*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */
    541 	(*bufferp)[*bufsizp - 2] = ' ';	/* arbitrary non-newline char */
    542 	bufp = fgets(*bufferp, *bufsizp, filep);
    543 	if (bufp == NULL)
    544 		return (bufp);
    545 	while ((*bufferp)[*bufsizp -1] == '\0' &&
    546 	    (*bufferp)[*bufsizp - 2] != '\n' &&
    547 	    strlen(*bufferp) == *bufsizp - 1) {
    548 		newsize = 2 * (*bufsizp);
    549 		bufp = (char *)realloc((void *)*bufferp, newsize);
    550 		if (bufp == NULL)
    551 			fatal("Out of memory");
    552 		oldsize = *bufsizp;
    553 		*bufsizp = newsize;
    554 		*bufferp = bufp;
    555 		(*bufferp)[*bufsizp - 1] = '\t';
    556 		(*bufferp)[*bufsizp - 2] = ' ';
    557 		bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep);
    558 		if (bufp == NULL) {
    559 			if (filep->_flag & _IOEOF) {
    560 				bufp = *bufferp;
    561 				break;
    562 			} else
    563 				fatal("Read error");
    564 		} else
    565 			bufp = *bufferp;
    566 	}
    567 	return (bufp);
    568 }
    569