Home | History | Annotate | Download | only in cut
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     28 /*	  All Rights Reserved  	*/
     29 
     30 #
     31 /* cut : cut and paste columns of a table (projection of a relation) */
     32 /* Release 1.5; handles single backspaces as produced by nroff    */
     33 
     34 #include <stdio.h>	/* make: cc cut.c */
     35 #include <ctype.h>
     36 #include <limits.h>
     37 #include <locale.h>
     38 #include <wchar.h>
     39 #include <stdlib.h>
     40 #include <unistd.h>
     41 #include <string.h>
     42 #include <errno.h>
     43 
     44 #define	MAX_RANGES	MAX_INPUT	/* maximum number of ranges */
     45 					/* as input args */
     46 
     47 static void	bfunc(void);
     48 static void	bnfunc(void);
     49 static void	cfunc(void);
     50 static void	ffunc(void);
     51 static wchar_t	*read_line(FILE *);
     52 static void	process_list(char *);
     53 static void	diag(const char *);
     54 static void	usage(void);
     55 
     56 static wchar_t wdel = '\t';
     57 static int	dellen;
     58 static int	supflag = 0;
     59 static int	rstart[MAX_RANGES];
     60 static int	rend[MAX_RANGES];
     61 static int	nranges = 0;
     62 static FILE	*inptr;
     63 static char	dummy[MB_LEN_MAX];
     64 
     65 static wchar_t	*linebuf = NULL;
     66 static int	bufsiz;
     67 
     68 int
     69 main(int argc, char **argv)
     70 {
     71 	int	c;
     72 	char	*list;
     73 	int	status = 0;
     74 	int	bflag, nflag, cflag, fflag, dflag, filenr;
     75 	void	(*funcp)();
     76 
     77 	bflag = nflag = cflag = fflag = dflag = 0;
     78 
     79 	(void) setlocale(LC_ALL, "");
     80 
     81 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
     82 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't. */
     83 #endif
     84 	(void) textdomain(TEXT_DOMAIN);
     85 
     86 	while ((c = getopt(argc, argv, "b:c:d:f:ns")) != EOF)
     87 		switch (c) {
     88 			case 'b':
     89 				if (fflag || cflag)
     90 					usage();
     91 				bflag++;
     92 				list = optarg;
     93 				break;
     94 
     95 			case 'c':
     96 				if (fflag || bflag)
     97 					usage();
     98 				cflag++;
     99 				list = optarg;
    100 				break;
    101 
    102 			case 'd':
    103 				dellen = mbtowc(&wdel, optarg, MB_CUR_MAX);
    104 				if (dellen == -1) {
    105 					diag("no delimiter specified");
    106 				} else if (dellen != strlen(optarg))
    107 					diag("invalid delimiter");
    108 				dflag++;
    109 				break;
    110 
    111 			case 'f':
    112 				if (bflag || cflag)
    113 					usage();
    114 				fflag++;
    115 				list = optarg;
    116 				break;
    117 
    118 			case 'n':
    119 				nflag++;
    120 				break;
    121 
    122 			case 's':
    123 				supflag++;
    124 				break;
    125 
    126 			case '?':
    127 				usage();
    128 		}
    129 
    130 	argv = &argv[optind];
    131 	argc -= optind;
    132 
    133 	/* you must use one and only one option -b, -c, or -f */
    134 	if (!(cflag || fflag || bflag))
    135 		usage();
    136 
    137 	/*
    138 	 * Make sure combination of options is correct
    139 	 */
    140 	if (nflag) {
    141 		if (cflag || fflag) {
    142 			(void) fprintf(stderr, gettext(
    143 			    "cut: -n may only be used with -b\n"));
    144 			usage();
    145 		}
    146 	}
    147 
    148 	if (dflag || supflag) {
    149 		if (bflag || cflag) {
    150 			if (dflag)
    151 				(void) fprintf(stderr, gettext(
    152 				    "cut: -d may only be used with -f\n"));
    153 			if (supflag)
    154 				(void) fprintf(stderr, gettext(
    155 				    "cut: -s may only be used with -f\n"));
    156 			usage();
    157 		}
    158 	}
    159 
    160 	process_list(list);
    161 
    162 	if (cflag) {
    163 		funcp = cfunc;
    164 	} else if (bflag) {
    165 		if (nflag)
    166 			funcp = bnfunc;
    167 		else
    168 			funcp = bfunc;
    169 	} else { /* fflag */
    170 		funcp = ffunc;
    171 	}
    172 
    173 	if (nranges == 0)
    174 		diag("no list specified");
    175 
    176 	filenr = 0;
    177 	do {	/* for all input files */
    178 		if (argc == 0 || strcmp(argv[filenr], "-") == 0)
    179 			inptr = stdin;
    180 		else {
    181 			if ((inptr = fopen(argv[filenr], "r")) == NULL) {
    182 				(void) fprintf(stderr, "cut: %s: %s\n",
    183 				    argv[filenr], strerror(errno));
    184 				status = 1;
    185 				continue;
    186 			}
    187 			/*
    188 			 * ftell() is used to check whether the file is an
    189 			 * open file descriptor and if the file is associate
    190 			 * with the a pipe, a FIFO,or a socket if file cannot
    191 			 * be opened ftell() can be used to check the status
    192 			 * of the file
    193 			 */
    194 			if (ftell(inptr) == -1) {
    195 				(void) fprintf(stderr, "cut: %s: %s\n",
    196 				    argv[filenr], strerror(errno));
    197 				status = 1;
    198 				continue;
    199 			}
    200 		}
    201 		(*funcp)();
    202 		(void) fclose(inptr);
    203 	} while (++filenr < argc);
    204 	return (status);
    205 }
    206 
    207 /* parse range list argument and set-up rstart/rend array */
    208 void
    209 process_list(char *list)
    210 {
    211 	int inrange = 0;
    212 	int start = 0;
    213 	int num = 0;
    214 	char *rlist = list;
    215 	char *p;
    216 	int i, j;
    217 	int tmp;
    218 
    219 	/* first, parse list of ranges */
    220 	do {
    221 		p = rlist;
    222 		switch (*p) {
    223 			case '-':
    224 				if (inrange)
    225 					diag("invalid range specifier");
    226 
    227 				inrange = 1;
    228 				if (num == 0)
    229 					start = 1;
    230 				else {
    231 					start = num;
    232 					num = 0;
    233 				}
    234 				break;
    235 
    236 			case '\0':
    237 			case ',':
    238 			case ' ':
    239 			case '\t':
    240 				/*
    241 				 * this is temporary - it will change
    242 				 * when the isblank() routine becomes
    243 				 * available.
    244 				 */
    245 				if (nranges == MAX_RANGES)
    246 					diag("too many ranges specified");
    247 
    248 				if (inrange) {
    249 					if (num == 0)
    250 						num = INT_MAX;
    251 					if (num < start)
    252 						diag("ranges must be "
    253 						    "increasing");
    254 					rstart[nranges] = start;
    255 					rend[nranges] = num;
    256 					nranges++;
    257 				} else {
    258 					rstart[nranges] = num;
    259 					rend[nranges] = num;
    260 					nranges++;
    261 				}
    262 
    263 				num = 0;
    264 				start = 0;
    265 				inrange = 0;
    266 
    267 				if (*p == '\0')
    268 					continue;
    269 				break;
    270 
    271 			default:
    272 				if (!isdigit(*p))
    273 					diag("invalid character in range");
    274 				num = atoi(p);
    275 				while (isdigit(*rlist))
    276 					rlist++;
    277 				continue;
    278 		}
    279 		rlist++;
    280 	} while (*p != '\0');
    281 
    282 	/* then, consolidate ranges where possible */
    283 	for (i = 0; i < (nranges - 1); i++) {
    284 		for (j = i + 1; j < nranges; j++) {
    285 			if (rstart[i] != 0 && rend[i] != 0 &&
    286 			    (!(rend[i] < rstart[j] || rstart[i] > rend[j]))) {
    287 				if (rstart[i] < rstart[j])
    288 					rstart[j] = rstart[i];
    289 				if (rend[i] > rend[j])
    290 					rend[j] = rend[i];
    291 				rstart[i] = 0;
    292 				rend[i] = 0;
    293 				break;
    294 			}
    295 		}
    296 	}
    297 
    298 	/* then, weed out the zero'ed/consolidated entries */
    299 	for (i = 0; i < nranges; ) {
    300 		if (rstart[i] == 0 && rend[i] == 0) {
    301 			for (j = i; j < (nranges - 1); j++) {
    302 				rstart[j] = rstart[j+1];
    303 				rend[j] = rend[j+1];
    304 			}
    305 			nranges--;
    306 		} else if (rstart[i] == 0 || rend[i] == 0) {
    307 			diag("Internal error processing input");
    308 		} else {
    309 			i++;
    310 		}
    311 	}
    312 
    313 	/* finally, sort the remaining entries */
    314 	for (i = 0; i < (nranges - 1); i++) {
    315 		for (j = i+1; j < nranges; j++) {
    316 			if (rstart[i] > rend[j]) {
    317 				tmp = rstart[i];
    318 				rstart[i] = rstart[j];
    319 				rstart[j] = tmp;
    320 
    321 				tmp = rend[i];
    322 				rend[i] = rend[j];
    323 				rend[j] = tmp;
    324 			}
    325 		}
    326 	}
    327 
    328 #ifdef DEBUG
    329 	/* dump ranges */
    330 	for (i = 0; i < nranges; i++) {
    331 		(void) printf("Range %d - start: %d end: %d\n", i, rstart[i],
    332 		    rend[i]);
    333 	}
    334 #endif
    335 }
    336 
    337 /* called when -c is used */
    338 /* print out those characters selected */
    339 
    340 void
    341 cfunc(void)
    342 {
    343 	wint_t	c;		/* current character */
    344 	int	pos = 0;	/* current position within line */
    345 	int	inrange = 0;	/* is 'pos' within a range */
    346 	int	rndx = 0;	/* current index into range table */
    347 
    348 	while ((c = fgetwc(inptr)) != EOF) {
    349 		if (c == '\n') {
    350 			(void) putchar('\n');
    351 
    352 			/* reset per-line variables */
    353 			pos = 0;
    354 			inrange = 0;
    355 			rndx = 0;
    356 		} else {
    357 			pos++;
    358 
    359 			/*
    360 			 * check if current character is within range and,
    361 			 * if so, print it.
    362 			 */
    363 			if (!inrange)
    364 				if (pos == rstart[rndx])
    365 					inrange = 1;
    366 
    367 			if (inrange) {
    368 				(void) putwchar(c);
    369 				if (pos == rend[rndx]) {
    370 					inrange = 0;
    371 					rndx++;
    372 					/*
    373 					 * optimization -
    374 					 * check for last range index
    375 					 * and eat chars until newline
    376 					 * if so.
    377 					 */
    378 				}
    379 			}
    380 		}
    381 	}
    382 }
    383 
    384 void
    385 bfunc(void) /* called when -b is used but -n is not */
    386 {
    387 	int	c;		/* current character */
    388 	int	pos = 0;	/* current position within line */
    389 	int	inrange = 0;	/* is 'pos' within a range */
    390 	int	rndx = 0;	/* current index into range table */
    391 
    392 	while ((c = getc(inptr)) != EOF) {
    393 		if (c == L'\n') {
    394 			(void) putchar('\n');
    395 
    396 			/* reset per-line variables */
    397 			pos = 0;
    398 			inrange = 0;
    399 			rndx = 0;
    400 		} else {
    401 			pos++;
    402 
    403 			/*
    404 			 * check if current character is within range and,
    405 			 * if so, print it.
    406 			 */
    407 			if (!inrange)
    408 				if (pos == rstart[rndx])
    409 					inrange = 1;
    410 
    411 			if (inrange) {
    412 				(void) putchar(c);
    413 				if (pos == rend[rndx]) {
    414 					inrange = 0;
    415 					rndx++;
    416 					/*
    417 					 * optimization -
    418 					 * check for last range index
    419 					 * and eat chars until newline
    420 					 * if so.
    421 					 */
    422 				}
    423 			}
    424 		}
    425 	}
    426 }
    427 
    428 
    429 void
    430 bnfunc(void) /* called when -b -n is used */
    431 {
    432 	wint_t	c;		/* current character */
    433 	int	pos = 0;	/* current position within line */
    434 	int	inrange = 0;	/* is 'pos' within a range */
    435 	int	rndx = 0;	/* current index into range table */
    436 	int	wlen;		/* byte length of current wide char */
    437 
    438 	while ((c = fgetwc(inptr)) != EOF) {
    439 		if (c == '\n') {
    440 			(void) putchar('\n');
    441 
    442 			/* reset per-line variables */
    443 			pos = 0;
    444 			inrange = 0;
    445 			rndx = 0;
    446 		} else {
    447 			if (rndx >= nranges)
    448 				continue;
    449 
    450 			if ((wlen = wctomb(dummy, c)) < 0)
    451 				diag("invalid multibyte character");
    452 			pos += wlen;
    453 
    454 			/*
    455 			 * when trying to figure this out, remember that
    456 			 * pos is actually pointing to the start byte of
    457 			 * the next char.
    458 			 */
    459 
    460 			/*
    461 			 * if char starts after beginning of range,
    462 			 * for the moment, consider it in range.
    463 			 */
    464 			if (!inrange && pos < rstart[rndx])
    465 				continue;
    466 
    467 			/*
    468 			 * If tail of the multibyte is out of the range.
    469 			 * do not print the character.
    470 			 * (See XCU4)
    471 			 */
    472 			if (pos <= rend[rndx]) {
    473 				inrange = 1;
    474 				(void) putwchar(c);
    475 				continue;
    476 			}
    477 			inrange = 0;
    478 			while (++rndx < nranges && pos >= rstart[rndx]) {
    479 				if (pos <= rend[rndx]) {
    480 					inrange = 1;
    481 					(void) putwchar(c);
    482 					break;
    483 				}
    484 			}
    485 		}
    486 	}
    487 }
    488 
    489 wchar_t *
    490 read_line(FILE *fp)
    491 {
    492 	wint_t	c;
    493 	wchar_t	*cp;
    494 	int charcnt;
    495 
    496 	/* alloc the line buffer if it isn't already there */
    497 	if (linebuf == NULL) {
    498 		bufsiz = BUFSIZ - 1;
    499 		if ((linebuf = (wchar_t *)malloc((bufsiz + 1) *
    500 		    sizeof (wchar_t))) == NULL)
    501 			diag("unable to allocate enough memory");
    502 	}
    503 
    504 	cp = linebuf;
    505 	charcnt = 0;
    506 	while ((c = fgetwc(fp)) != EOF) {
    507 		if (c == '\n') {
    508 			*cp = NULL;
    509 			return (linebuf);
    510 		} else {
    511 			charcnt++;
    512 			if (charcnt == bufsiz) {
    513 				/*
    514 				 * there is no line length limitation so we
    515 				 * have to be ready to expand the line buffer.
    516 				 */
    517 				bufsiz += BUFSIZ;
    518 				if ((linebuf = (wchar_t *)realloc(linebuf,
    519 				    (bufsiz + 1) * sizeof (wchar_t))) == NULL)
    520 					diag("unable to allocate "
    521 					    "enough memory");
    522 
    523 				cp = linebuf + charcnt - 1;
    524 			}
    525 			*cp++ = c;
    526 		}
    527 	}
    528 
    529 	if (cp != linebuf) {
    530 		*cp = NULL;
    531 		return (linebuf);
    532 	} else
    533 		return (NULL);
    534 }
    535 
    536 void
    537 ffunc(void)  /* called when -f is used */
    538 {
    539 	int	fpos;		/* current field position within line */
    540 	int	inrange;	/* is 'pos' within a range */
    541 	int	rndx;		/* current index into range table */
    542 	int	need_del;	/* need to put a delimiter char in output */
    543 	wchar_t	*linep;		/* pointer to line buffer */
    544 	wchar_t	*cp, *ncp;	/* working pointers into linebuf */
    545 
    546 	while ((linep = read_line(inptr)) != NULL) {
    547 
    548 		/* first, prune out line with no delimiters */
    549 		if (wcschr(linep, wdel) == NULL) {
    550 #if !defined(__lint)	/* lint doesn't grok "%ws" */
    551 			if (!supflag)
    552 				(void) printf("%ws\n", linep);
    553 #endif
    554 			continue;
    555 		}
    556 
    557 		/* init per-line variable */
    558 		fpos = 1;
    559 		inrange = 0;
    560 		rndx = 0;
    561 		need_del = 0;
    562 
    563 		for (ncp = cp = linep; ncp != NULL; fpos++) {
    564 			/* why continue processing if no more ranges? */
    565 			if (rndx >= nranges)
    566 				break;
    567 
    568 			/* find the next field delimiter */
    569 			ncp = wcschr(cp, wdel);
    570 
    571 			if (!inrange)
    572 				if (fpos == rstart[rndx])
    573 					inrange = 1;
    574 
    575 			if (inrange) {
    576 				if (need_del)
    577 					(void) putwchar(wdel);
    578 
    579 				if (ncp == NULL) {
    580 					/*
    581 					 * if there are no more delimiters
    582 					 * and we are in the range, print
    583 					 * out the rest of the line.
    584 					 */
    585 #if !defined(__lint)	/* lint doesn't grok "%ws" */
    586 					(void) printf("%ws", cp);
    587 #endif
    588 					break;
    589 				}
    590 				else
    591 					while (cp != ncp)
    592 						(void) putwchar(*cp++);
    593 				need_del = 1;
    594 
    595 				if (fpos == rend[rndx]) {
    596 					inrange = 0;
    597 					rndx++;
    598 				}
    599 			}
    600 
    601 			if (ncp != NULL)
    602 				cp = ncp + 1;
    603 		}
    604 		(void) putchar('\n');
    605 	}
    606 }
    607 
    608 
    609 void
    610 diag(const char *s)
    611 {
    612 	(void) fprintf(stderr, "cut: ");
    613 	(void) fprintf(stderr, gettext(s));
    614 	(void) fprintf(stderr, "\n");
    615 	exit(2);
    616 }
    617 
    618 
    619 void
    620 usage(void)
    621 {
    622 	(void) fprintf(stderr, gettext(
    623 	    "usage: cut -b list [-n] [filename ...]\n"
    624 	    "       cut -c list [filename ...]\n"
    625 	    "       cut -f list [-d delim] [-s] [filename]\n"));
    626 	exit(2);
    627 }
    628