1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 # 31 /* cut : cut and paste columns of a table (projection of a relation) */ 32 /* Release 1.5; handles single backspaces as produced by nroff */ 33 34 #include <stdio.h> /* make: cc cut.c */ 35 #include <ctype.h> 36 #include <limits.h> 37 #include <locale.h> 38 #include <wchar.h> 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <string.h> 42 #include <errno.h> 43 44 #define MAX_RANGES MAX_INPUT /* maximum number of ranges */ 45 /* as input args */ 46 47 static void bfunc(void); 48 static void bnfunc(void); 49 static void cfunc(void); 50 static void ffunc(void); 51 static wchar_t *read_line(FILE *); 52 static void process_list(char *); 53 static void diag(const char *); 54 static void usage(void); 55 56 static wchar_t wdel = '\t'; 57 static int dellen; 58 static int supflag = 0; 59 static int rstart[MAX_RANGES]; 60 static int rend[MAX_RANGES]; 61 static int nranges = 0; 62 static FILE *inptr; 63 static char dummy[MB_LEN_MAX]; 64 65 static wchar_t *linebuf = NULL; 66 static int bufsiz; 67 68 int 69 main(int argc, char **argv) 70 { 71 int c; 72 char *list; 73 int status = 0; 74 int bflag, nflag, cflag, fflag, dflag, filenr; 75 void (*funcp)(); 76 77 bflag = nflag = cflag = fflag = dflag = 0; 78 79 (void) setlocale(LC_ALL, ""); 80 81 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 82 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't. */ 83 #endif 84 (void) textdomain(TEXT_DOMAIN); 85 86 while ((c = getopt(argc, argv, "b:c:d:f:ns")) != EOF) 87 switch (c) { 88 case 'b': 89 if (fflag || cflag) 90 usage(); 91 bflag++; 92 list = optarg; 93 break; 94 95 case 'c': 96 if (fflag || bflag) 97 usage(); 98 cflag++; 99 list = optarg; 100 break; 101 102 case 'd': 103 dellen = mbtowc(&wdel, optarg, MB_CUR_MAX); 104 if (dellen == -1) { 105 diag("no delimiter specified"); 106 } else if (dellen != strlen(optarg)) 107 diag("invalid delimiter"); 108 dflag++; 109 break; 110 111 case 'f': 112 if (bflag || cflag) 113 usage(); 114 fflag++; 115 list = optarg; 116 break; 117 118 case 'n': 119 nflag++; 120 break; 121 122 case 's': 123 supflag++; 124 break; 125 126 case '?': 127 usage(); 128 } 129 130 argv = &argv[optind]; 131 argc -= optind; 132 133 /* you must use one and only one option -b, -c, or -f */ 134 if (!(cflag || fflag || bflag)) 135 usage(); 136 137 /* 138 * Make sure combination of options is correct 139 */ 140 if (nflag) { 141 if (cflag || fflag) { 142 (void) fprintf(stderr, gettext( 143 "cut: -n may only be used with -b\n")); 144 usage(); 145 } 146 } 147 148 if (dflag || supflag) { 149 if (bflag || cflag) { 150 if (dflag) 151 (void) fprintf(stderr, gettext( 152 "cut: -d may only be used with -f\n")); 153 if (supflag) 154 (void) fprintf(stderr, gettext( 155 "cut: -s may only be used with -f\n")); 156 usage(); 157 } 158 } 159 160 process_list(list); 161 162 if (cflag) { 163 funcp = cfunc; 164 } else if (bflag) { 165 if (nflag) 166 funcp = bnfunc; 167 else 168 funcp = bfunc; 169 } else { /* fflag */ 170 funcp = ffunc; 171 } 172 173 if (nranges == 0) 174 diag("no list specified"); 175 176 filenr = 0; 177 do { /* for all input files */ 178 if (argc == 0 || strcmp(argv[filenr], "-") == 0) 179 inptr = stdin; 180 else { 181 if ((inptr = fopen(argv[filenr], "r")) == NULL) { 182 (void) fprintf(stderr, "cut: %s: %s\n", 183 argv[filenr], strerror(errno)); 184 status = 1; 185 continue; 186 } 187 /* 188 * ftell() is used to check whether the file is an 189 * open file descriptor and if the file is associate 190 * with the a pipe, a FIFO,or a socket if file cannot 191 * be opened ftell() can be used to check the status 192 * of the file 193 */ 194 if (ftell(inptr) == -1) { 195 (void) fprintf(stderr, "cut: %s: %s\n", 196 argv[filenr], strerror(errno)); 197 status = 1; 198 continue; 199 } 200 } 201 (*funcp)(); 202 (void) fclose(inptr); 203 } while (++filenr < argc); 204 return (status); 205 } 206 207 /* parse range list argument and set-up rstart/rend array */ 208 void 209 process_list(char *list) 210 { 211 int inrange = 0; 212 int start = 0; 213 int num = 0; 214 char *rlist = list; 215 char *p; 216 int i, j; 217 int tmp; 218 219 /* first, parse list of ranges */ 220 do { 221 p = rlist; 222 switch (*p) { 223 case '-': 224 if (inrange) 225 diag("invalid range specifier"); 226 227 inrange = 1; 228 if (num == 0) 229 start = 1; 230 else { 231 start = num; 232 num = 0; 233 } 234 break; 235 236 case '\0': 237 case ',': 238 case ' ': 239 case '\t': 240 /* 241 * this is temporary - it will change 242 * when the isblank() routine becomes 243 * available. 244 */ 245 if (nranges == MAX_RANGES) 246 diag("too many ranges specified"); 247 248 if (inrange) { 249 if (num == 0) 250 num = INT_MAX; 251 if (num < start) 252 diag("ranges must be " 253 "increasing"); 254 rstart[nranges] = start; 255 rend[nranges] = num; 256 nranges++; 257 } else { 258 rstart[nranges] = num; 259 rend[nranges] = num; 260 nranges++; 261 } 262 263 num = 0; 264 start = 0; 265 inrange = 0; 266 267 if (*p == '\0') 268 continue; 269 break; 270 271 default: 272 if (!isdigit(*p)) 273 diag("invalid character in range"); 274 num = atoi(p); 275 while (isdigit(*rlist)) 276 rlist++; 277 continue; 278 } 279 rlist++; 280 } while (*p != '\0'); 281 282 /* then, consolidate ranges where possible */ 283 for (i = 0; i < (nranges - 1); i++) { 284 for (j = i + 1; j < nranges; j++) { 285 if (rstart[i] != 0 && rend[i] != 0 && 286 (!(rend[i] < rstart[j] || rstart[i] > rend[j]))) { 287 if (rstart[i] < rstart[j]) 288 rstart[j] = rstart[i]; 289 if (rend[i] > rend[j]) 290 rend[j] = rend[i]; 291 rstart[i] = 0; 292 rend[i] = 0; 293 break; 294 } 295 } 296 } 297 298 /* then, weed out the zero'ed/consolidated entries */ 299 for (i = 0; i < nranges; ) { 300 if (rstart[i] == 0 && rend[i] == 0) { 301 for (j = i; j < (nranges - 1); j++) { 302 rstart[j] = rstart[j+1]; 303 rend[j] = rend[j+1]; 304 } 305 nranges--; 306 } else if (rstart[i] == 0 || rend[i] == 0) { 307 diag("Internal error processing input"); 308 } else { 309 i++; 310 } 311 } 312 313 /* finally, sort the remaining entries */ 314 for (i = 0; i < (nranges - 1); i++) { 315 for (j = i+1; j < nranges; j++) { 316 if (rstart[i] > rend[j]) { 317 tmp = rstart[i]; 318 rstart[i] = rstart[j]; 319 rstart[j] = tmp; 320 321 tmp = rend[i]; 322 rend[i] = rend[j]; 323 rend[j] = tmp; 324 } 325 } 326 } 327 328 #ifdef DEBUG 329 /* dump ranges */ 330 for (i = 0; i < nranges; i++) { 331 (void) printf("Range %d - start: %d end: %d\n", i, rstart[i], 332 rend[i]); 333 } 334 #endif 335 } 336 337 /* called when -c is used */ 338 /* print out those characters selected */ 339 340 void 341 cfunc(void) 342 { 343 wint_t c; /* current character */ 344 int pos = 0; /* current position within line */ 345 int inrange = 0; /* is 'pos' within a range */ 346 int rndx = 0; /* current index into range table */ 347 348 while ((c = fgetwc(inptr)) != EOF) { 349 if (c == '\n') { 350 (void) putchar('\n'); 351 352 /* reset per-line variables */ 353 pos = 0; 354 inrange = 0; 355 rndx = 0; 356 } else { 357 pos++; 358 359 /* 360 * check if current character is within range and, 361 * if so, print it. 362 */ 363 if (!inrange) 364 if (pos == rstart[rndx]) 365 inrange = 1; 366 367 if (inrange) { 368 (void) putwchar(c); 369 if (pos == rend[rndx]) { 370 inrange = 0; 371 rndx++; 372 /* 373 * optimization - 374 * check for last range index 375 * and eat chars until newline 376 * if so. 377 */ 378 } 379 } 380 } 381 } 382 } 383 384 void 385 bfunc(void) /* called when -b is used but -n is not */ 386 { 387 int c; /* current character */ 388 int pos = 0; /* current position within line */ 389 int inrange = 0; /* is 'pos' within a range */ 390 int rndx = 0; /* current index into range table */ 391 392 while ((c = getc(inptr)) != EOF) { 393 if (c == L'\n') { 394 (void) putchar('\n'); 395 396 /* reset per-line variables */ 397 pos = 0; 398 inrange = 0; 399 rndx = 0; 400 } else { 401 pos++; 402 403 /* 404 * check if current character is within range and, 405 * if so, print it. 406 */ 407 if (!inrange) 408 if (pos == rstart[rndx]) 409 inrange = 1; 410 411 if (inrange) { 412 (void) putchar(c); 413 if (pos == rend[rndx]) { 414 inrange = 0; 415 rndx++; 416 /* 417 * optimization - 418 * check for last range index 419 * and eat chars until newline 420 * if so. 421 */ 422 } 423 } 424 } 425 } 426 } 427 428 429 void 430 bnfunc(void) /* called when -b -n is used */ 431 { 432 wint_t c; /* current character */ 433 int pos = 0; /* current position within line */ 434 int inrange = 0; /* is 'pos' within a range */ 435 int rndx = 0; /* current index into range table */ 436 int wlen; /* byte length of current wide char */ 437 438 while ((c = fgetwc(inptr)) != EOF) { 439 if (c == '\n') { 440 (void) putchar('\n'); 441 442 /* reset per-line variables */ 443 pos = 0; 444 inrange = 0; 445 rndx = 0; 446 } else { 447 if (rndx >= nranges) 448 continue; 449 450 if ((wlen = wctomb(dummy, c)) < 0) 451 diag("invalid multibyte character"); 452 pos += wlen; 453 454 /* 455 * when trying to figure this out, remember that 456 * pos is actually pointing to the start byte of 457 * the next char. 458 */ 459 460 /* 461 * if char starts after beginning of range, 462 * for the moment, consider it in range. 463 */ 464 if (!inrange && pos < rstart[rndx]) 465 continue; 466 467 /* 468 * If tail of the multibyte is out of the range. 469 * do not print the character. 470 * (See XCU4) 471 */ 472 if (pos <= rend[rndx]) { 473 inrange = 1; 474 (void) putwchar(c); 475 continue; 476 } 477 inrange = 0; 478 while (++rndx < nranges && pos >= rstart[rndx]) { 479 if (pos <= rend[rndx]) { 480 inrange = 1; 481 (void) putwchar(c); 482 break; 483 } 484 } 485 } 486 } 487 } 488 489 wchar_t * 490 read_line(FILE *fp) 491 { 492 wint_t c; 493 wchar_t *cp; 494 int charcnt; 495 496 /* alloc the line buffer if it isn't already there */ 497 if (linebuf == NULL) { 498 bufsiz = BUFSIZ - 1; 499 if ((linebuf = (wchar_t *)malloc((bufsiz + 1) * 500 sizeof (wchar_t))) == NULL) 501 diag("unable to allocate enough memory"); 502 } 503 504 cp = linebuf; 505 charcnt = 0; 506 while ((c = fgetwc(fp)) != EOF) { 507 if (c == '\n') { 508 *cp = NULL; 509 return (linebuf); 510 } else { 511 charcnt++; 512 if (charcnt == bufsiz) { 513 /* 514 * there is no line length limitation so we 515 * have to be ready to expand the line buffer. 516 */ 517 bufsiz += BUFSIZ; 518 if ((linebuf = (wchar_t *)realloc(linebuf, 519 (bufsiz + 1) * sizeof (wchar_t))) == NULL) 520 diag("unable to allocate " 521 "enough memory"); 522 523 cp = linebuf + charcnt - 1; 524 } 525 *cp++ = c; 526 } 527 } 528 529 if (cp != linebuf) { 530 *cp = NULL; 531 return (linebuf); 532 } else 533 return (NULL); 534 } 535 536 void 537 ffunc(void) /* called when -f is used */ 538 { 539 int fpos; /* current field position within line */ 540 int inrange; /* is 'pos' within a range */ 541 int rndx; /* current index into range table */ 542 int need_del; /* need to put a delimiter char in output */ 543 wchar_t *linep; /* pointer to line buffer */ 544 wchar_t *cp, *ncp; /* working pointers into linebuf */ 545 546 while ((linep = read_line(inptr)) != NULL) { 547 548 /* first, prune out line with no delimiters */ 549 if (wcschr(linep, wdel) == NULL) { 550 #if !defined(__lint) /* lint doesn't grok "%ws" */ 551 if (!supflag) 552 (void) printf("%ws\n", linep); 553 #endif 554 continue; 555 } 556 557 /* init per-line variable */ 558 fpos = 1; 559 inrange = 0; 560 rndx = 0; 561 need_del = 0; 562 563 for (ncp = cp = linep; ncp != NULL; fpos++) { 564 /* why continue processing if no more ranges? */ 565 if (rndx >= nranges) 566 break; 567 568 /* find the next field delimiter */ 569 ncp = wcschr(cp, wdel); 570 571 if (!inrange) 572 if (fpos == rstart[rndx]) 573 inrange = 1; 574 575 if (inrange) { 576 if (need_del) 577 (void) putwchar(wdel); 578 579 if (ncp == NULL) { 580 /* 581 * if there are no more delimiters 582 * and we are in the range, print 583 * out the rest of the line. 584 */ 585 #if !defined(__lint) /* lint doesn't grok "%ws" */ 586 (void) printf("%ws", cp); 587 #endif 588 break; 589 } 590 else 591 while (cp != ncp) 592 (void) putwchar(*cp++); 593 need_del = 1; 594 595 if (fpos == rend[rndx]) { 596 inrange = 0; 597 rndx++; 598 } 599 } 600 601 if (ncp != NULL) 602 cp = ncp + 1; 603 } 604 (void) putchar('\n'); 605 } 606 } 607 608 609 void 610 diag(const char *s) 611 { 612 (void) fprintf(stderr, "cut: "); 613 (void) fprintf(stderr, gettext(s)); 614 (void) fprintf(stderr, "\n"); 615 exit(2); 616 } 617 618 619 void 620 usage(void) 621 { 622 (void) fprintf(stderr, gettext( 623 "usage: cut -b list [-n] [filename ...]\n" 624 " cut -c list [filename ...]\n" 625 " cut -f list [-d delim] [-s] [filename]\n")); 626 exit(2); 627 } 628