1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * awk -- mainline, yylex, etc. 34 * 35 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes 36 */ 37 38 #include "awk.h" 39 #include "y.tab.h" 40 #include <stdarg.h> 41 #include <unistd.h> 42 #include <locale.h> 43 #include <search.h> 44 45 static char *progfiles[NPFILE]; /* Programmes files for yylex */ 46 static char **progfilep = &progfiles[0]; /* Pointer to last file */ 47 static wchar_t *progptr; /* In-memory programme */ 48 static int proglen; /* Length of progptr */ 49 static wchar_t context[NCONTEXT]; /* Circular buffer of context */ 50 static wchar_t *conptr = &context[0]; /* context ptr */ 51 static FILE *progfp; /* Stdio stream for programme */ 52 static char *filename; 53 #ifdef DEBUG 54 static int dflag; 55 #endif 56 57 #define AWK_EXEC_MAGIC "<MKS AWKC>" 58 #define LEN_EXEC_MAGIC 10 59 60 static char unbal[] = "unbalanced E char"; 61 62 static void awkarginit(int c, char **av); 63 static int lexid(wint_t c); 64 static int lexnumber(wint_t c); 65 static int lexstring(wint_t endc); 66 static int lexregexp(wint_t endc); 67 68 static void awkvarinit(void); 69 static wint_t lexgetc(void); 70 static void lexungetc(wint_t c); 71 static size_t lexescape(wint_t endc, int regx, int cmd_line_operand); 72 static void awkierr(int perr, char *fmt, va_list ap); 73 static int usage(void); 74 void strescape(wchar_t *str); 75 static const char *toprint(wint_t); 76 char *_cmdname; 77 static wchar_t *mbconvert(char *str); 78 79 extern int isclvar(wchar_t *arg); 80 81 /* 82 * mainline for awk 83 */ 84 int 85 main(int argc, char *argv[]) 86 { 87 wchar_t *ap; 88 char *cmd; 89 90 cmd = argv[0]; 91 _cmdname = cmd; 92 93 linebuf = emalloc(NLINE * sizeof (wchar_t)); 94 95 /* 96 * At this point only messaging should be internationalized. 97 * numbers are still scanned as in the Posix locale. 98 */ 99 (void) setlocale(LC_ALL, ""); 100 (void) setlocale(LC_NUMERIC, "C"); 101 #if !defined(TEXT_DOMAIN) 102 #define TEXT_DOMAIN "SYS_TEST" 103 #endif 104 (void) textdomain(TEXT_DOMAIN); 105 106 awkvarinit(); 107 /* running = 1; */ 108 while (argc > 1 && *argv[1] == '-') { 109 void *save_ptr = NULL; 110 ap = mbstowcsdup(&argv[1][1]); 111 if (ap == NULL) 112 break; 113 if (*ap == '\0') { 114 free(ap); 115 break; 116 } 117 save_ptr = (void *) ap; 118 ++argv; 119 --argc; 120 if (*ap == '-' && ap[1] == '\0') 121 break; 122 for (; *ap != '\0'; ++ap) { 123 switch (*ap) { 124 #ifdef DEBUG 125 case 'd': 126 dflag = 1; 127 continue; 128 129 #endif 130 case 'f': 131 if (argc < 2) { 132 (void) fprintf(stderr, 133 gettext("Missing script file\n")); 134 return (1); 135 } 136 *progfilep++ = argv[1]; 137 --argc; 138 ++argv; 139 continue; 140 141 case 'F': 142 if (ap[1] == '\0') { 143 if (argc < 2) { 144 (void) fprintf(stderr, 145 gettext("Missing field separator\n")); 146 return (1); 147 } 148 ap = mbstowcsdup(argv[1]); 149 --argc; 150 ++argv; 151 } else 152 ++ap; 153 strescape(ap); 154 strassign(varFS, linebuf, FALLOC, 155 wcslen(linebuf)); 156 break; 157 158 case 'v': { 159 wchar_t *vp; 160 wchar_t *arg; 161 162 if (argc < 2) { 163 (void) fprintf(stderr, 164 gettext("Missing variable assignment\n")); 165 return (1); 166 } 167 arg = mbconvert(argv[1]); 168 /* 169 * Ensure the variable expression 170 * is valid (correct form). 171 */ 172 if (((vp = wcschr(arg, '=')) != NULL) && 173 isclvar(arg)) { 174 *vp = '\0'; 175 strescape(vp+1); 176 strassign(vlook(arg), linebuf, 177 FALLOC|FSENSE, 178 wcslen(linebuf)); 179 *vp = '='; 180 } else { 181 (void) fprintf(stderr, gettext( 182 "Invalid form for variable " 183 "assignment: %S\n"), arg); 184 return (1); 185 } 186 --argc; 187 ++argv; 188 continue; 189 } 190 191 default: 192 (void) fprintf(stderr, 193 gettext("Unknown option \"-%S\"\n"), ap); 194 return (usage()); 195 } 196 break; 197 } 198 if (save_ptr) 199 free(save_ptr); 200 } 201 if (progfilep == &progfiles[0]) { 202 if (argc < 2) 203 return (usage()); 204 filename = "[command line]"; /* BUG: NEEDS TRANSLATION */ 205 progptr = mbstowcsdup(argv[1]); 206 proglen = wcslen(progptr); 207 --argc; 208 ++argv; 209 } 210 211 argv[0] = cmd; 212 213 awkarginit(argc, argv); 214 215 /* running = 0; */ 216 (void) yyparse(); 217 218 lineno = 0; 219 /* 220 * Ok, done parsing, so now activate the rest of the nls stuff, set 221 * the radix character. 222 */ 223 (void) setlocale(LC_ALL, ""); 224 radixpoint = *localeconv()->decimal_point; 225 awk(); 226 /* NOTREACHED */ 227 return (0); 228 } 229 230 /* 231 * Do initial setup of buffers, etc. 232 * This must be called before most processing 233 * and especially before lexical analysis. 234 * Variables initialised here will be overruled by command 235 * line parameter initialisation. 236 */ 237 static void 238 awkvarinit() 239 { 240 NODE *np; 241 242 (void) setvbuf(stderr, NULL, _IONBF, 0); 243 244 if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) { 245 (void) fprintf(stderr, 246 gettext("not enough available file descriptors")); 247 exit(1); 248 } 249 ofiles = (OFILE *)emalloc(sizeof (OFILE)*NIOSTREAM); 250 #ifdef A_ZERO_POINTERS 251 (void) memset((wchar_t *)ofiles, 0, sizeof (OFILE) * NIOSTREAM); 252 #else 253 { 254 /* initialize file descriptor table */ 255 OFILE *fp; 256 for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) { 257 fp->f_fp = FNULL; 258 fp->f_mode = 0; 259 fp->f_name = (char *)0; 260 } 261 } 262 #endif 263 constant = intnode((INT)0); 264 265 const0 = intnode((INT)0); 266 const1 = intnode((INT)1); 267 constundef = emptynode(CONSTANT, 0); 268 constundef->n_flags = FSTRING|FVINT; 269 constundef->n_string = _null; 270 constundef->n_strlen = 0; 271 inc_oper = emptynode(ADD, 0); 272 inc_oper->n_right = const1; 273 asn_oper = emptynode(ADD, 0); 274 field0 = node(FIELD, const0, NNULL); 275 276 { 277 RESFUNC near*rp; 278 279 for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) { 280 np = finstall(rp->rf_name, rp->rf_func, rp->rf_type); 281 } 282 } 283 { 284 RESERVED near*rp; 285 286 for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) { 287 switch (rp->r_type) { 288 case SVAR: 289 case VAR: 290 running = 1; 291 np = vlook(rp->r_name); 292 if (rp->r_type == SVAR) 293 np->n_flags |= FSPECIAL; 294 if (rp->r_svalue != NULL) 295 strassign(np, rp->r_svalue, FSTATIC, 296 (size_t)rp->r_ivalue); 297 else { 298 constant->n_int = rp->r_ivalue; 299 (void) assign(np, constant); 300 } 301 running = 0; 302 break; 303 304 case KEYWORD: 305 kinstall(rp->r_name, (int)rp->r_ivalue); 306 break; 307 } 308 } 309 } 310 311 varNR = vlook(s_NR); 312 varFNR = vlook(s_FNR); 313 varNF = vlook(s_NF); 314 varOFMT = vlook(s_OFMT); 315 varCONVFMT = vlook(s_CONVFMT); 316 varOFS = vlook(s_OFS); 317 varORS = vlook(s_ORS); 318 varRS = vlook(s_RS); 319 varFS = vlook(s_FS); 320 varARGC = vlook(s_ARGC); 321 varSUBSEP = vlook(s_SUBSEP); 322 varENVIRON = vlook(s_ENVIRON); 323 varFILENAME = vlook(s_FILENAME); 324 varSYMTAB = vlook(s_SYMTAB); 325 incNR = node(ASG, varNR, node(ADD, varNR, const1)); 326 incFNR = node(ASG, varFNR, node(ADD, varFNR, const1)); 327 clrFNR = node(ASG, varFNR, const0); 328 } 329 330 /* 331 * Initialise awk ARGC, ARGV variables. 332 */ 333 static void 334 awkarginit(int ac, char **av) 335 { 336 int i; 337 wchar_t *cp; 338 339 ARGVsubi = node(INDEX, vlook(s_ARGV), constant); 340 running = 1; 341 constant->n_int = ac; 342 (void) assign(varARGC, constant); 343 for (i = 0; i < ac; ++i) { 344 cp = mbstowcsdup(av[i]); 345 constant->n_int = i; 346 strassign(exprreduce(ARGVsubi), cp, 347 FSTATIC|FSENSE, wcslen(cp)); 348 } 349 running = 0; 350 } 351 352 /* 353 * Clean up when done parsing a function. 354 * All formal parameters, because of a deal (funparm) in 355 * yylex, get put into the symbol table in front of any 356 * global variable of the same name. When the entire 357 * function is parsed, remove these formal dummy nodes 358 * from the symbol table but retain the nodes because 359 * the generated tree points at them. 360 */ 361 void 362 uexit(NODE *np) 363 { 364 NODE *formal; 365 366 while ((formal = getlist(&np)) != NNULL) 367 delsymtab(formal, 0); 368 } 369 370 /* 371 * The lexical analyzer. 372 */ 373 int 374 yylex() 375 #ifdef DEBUG 376 { 377 int l; 378 379 l = yyhex(); 380 if (dflag) 381 (void) printf("%d\n", l); 382 return (l); 383 } 384 yyhex() 385 #endif 386 { 387 wint_t c, c1; 388 int i; 389 static int savetoken = 0; 390 static int wasfield; 391 static int isfuncdef; 392 static int nbrace, nparen, nbracket; 393 static struct ctosymstruct { 394 wint_t c, sym; 395 } ctosym[] = { 396 { '|', BAR }, { '^', CARAT }, 397 { '~', TILDE }, { '<', LANGLE }, 398 { '>', RANGLE }, { '+', PLUSC }, 399 { '-', HYPHEN }, { '*', STAR }, 400 { '/', SLASH }, { '%', PERCENT }, 401 { '!', EXCLAMATION }, { '$', DOLLAR }, 402 { '[', LSQUARE }, { ']', RSQUARE }, 403 { '(', LPAREN }, { ')', RPAREN }, 404 { ';', SEMI }, { '{', LBRACE }, 405 { '}', RBRACE }, { 0, 0 } 406 }; 407 408 if (savetoken) { 409 c = savetoken; 410 savetoken = 0; 411 } else if (redelim != '\0') { 412 c = redelim; 413 redelim = 0; 414 catterm = 0; 415 savetoken = c; 416 return (lexlast = lexregexp(c)); 417 } else while ((c = lexgetc()) != WEOF) { 418 if (iswalpha(c) || c == '_') { 419 c = lexid(c); 420 } else if (iswdigit(c) || c == '.') { 421 c = lexnumber(c); 422 } else if (isWblank(c)) { 423 continue; 424 } else switch (c) { 425 #if DOS || OS2 426 case 032: /* ^Z */ 427 continue; 428 #endif 429 430 case '"': 431 c = lexstring(c); 432 break; 433 434 case '#': 435 while ((c = lexgetc()) != '\n' && c != WEOF) 436 ; 437 lexungetc(c); 438 continue; 439 440 case '+': 441 if ((c1 = lexgetc()) == '+') 442 c = INC; 443 else if (c1 == '=') 444 c = AADD; 445 else 446 lexungetc(c1); 447 break; 448 449 case '-': 450 if ((c1 = lexgetc()) == '-') 451 c = DEC; 452 else if (c1 == '=') 453 c = ASUB; 454 else 455 lexungetc(c1); 456 break; 457 458 case '*': 459 if ((c1 = lexgetc()) == '=') 460 c = AMUL; 461 else if (c1 == '*') { 462 if ((c1 = lexgetc()) == '=') 463 c = AEXP; 464 else { 465 c = EXP; 466 lexungetc(c1); 467 } 468 } else 469 lexungetc(c1); 470 break; 471 472 case '^': 473 if ((c1 = lexgetc()) == '=') { 474 c = AEXP; 475 } else { 476 c = EXP; 477 lexungetc(c1); 478 } 479 break; 480 481 case '/': 482 if ((c1 = lexgetc()) == '=' && 483 lexlast != RE && lexlast != NRE && 484 lexlast != ';' && lexlast != '\n' && 485 lexlast != ',' && lexlast != '(') 486 c = ADIV; 487 else 488 lexungetc(c1); 489 break; 490 491 case '%': 492 if ((c1 = lexgetc()) == '=') 493 c = AREM; 494 else 495 lexungetc(c1); 496 break; 497 498 case '&': 499 if ((c1 = lexgetc()) == '&') 500 c = AND; 501 else 502 lexungetc(c1); 503 break; 504 505 case '|': 506 if ((c1 = lexgetc()) == '|') 507 c = OR; 508 else { 509 lexungetc(c1); 510 if (inprint) 511 c = PIPE; 512 } 513 break; 514 515 case '>': 516 if ((c1 = lexgetc()) == '=') 517 c = GE; 518 else if (c1 == '>') 519 c = APPEND; 520 else { 521 lexungetc(c1); 522 if (nparen == 0 && inprint) 523 c = WRITE; 524 } 525 break; 526 527 case '<': 528 if ((c1 = lexgetc()) == '=') 529 c = LE; 530 else 531 lexungetc(c1); 532 break; 533 534 case '!': 535 if ((c1 = lexgetc()) == '=') 536 c = NE; 537 else if (c1 == '~') 538 c = NRE; 539 else 540 lexungetc(c1); 541 break; 542 543 case '=': 544 if ((c1 = lexgetc()) == '=') 545 c = EQ; 546 else { 547 lexungetc(c1); 548 c = ASG; 549 } 550 break; 551 552 case '\n': 553 switch (lexlast) { 554 case ')': 555 if (catterm || inprint) { 556 c = ';'; 557 break; 558 } 559 /*FALLTHRU*/ 560 case AND: 561 case OR: 562 case COMMA: 563 case '{': 564 case ELSE: 565 case ';': 566 case DO: 567 continue; 568 569 case '}': 570 if (nbrace != 0) 571 continue; 572 573 default: 574 c = ';'; 575 break; 576 } 577 break; 578 579 case ELSE: 580 if (lexlast != ';') { 581 savetoken = ELSE; 582 c = ';'; 583 } 584 break; 585 586 case '(': 587 ++nparen; 588 break; 589 590 case ')': 591 if (--nparen < 0) 592 awkerr(unbal, "()"); 593 break; 594 595 case '{': 596 nbrace++; 597 break; 598 599 case '}': 600 if (--nbrace < 0) { 601 char brk[3]; 602 603 brk[0] = '{'; 604 brk[1] = '}'; 605 brk[2] = '\0'; 606 awkerr(unbal, brk); 607 } 608 if (lexlast != ';') { 609 savetoken = c; 610 c = ';'; 611 } 612 break; 613 614 case '[': 615 ++nbracket; 616 break; 617 618 case ']': 619 if (--nbracket < 0) { 620 char brk[3]; 621 622 brk[0] = '['; 623 brk[1] = ']'; 624 brk[2] = '\0'; 625 awkerr(unbal, brk); 626 } 627 break; 628 629 case '\\': 630 if ((c1 = lexgetc()) == '\n') 631 continue; 632 lexungetc(c1); 633 break; 634 635 case ',': 636 c = COMMA; 637 break; 638 639 case '?': 640 c = QUEST; 641 break; 642 643 case ':': 644 c = COLON; 645 break; 646 647 default: 648 if (!iswprint(c)) 649 awkerr( 650 gettext("invalid character \"%s\""), 651 toprint(c)); 652 break; 653 } 654 break; 655 } 656 657 switch (c) { 658 case ']': 659 ++catterm; 660 break; 661 662 case VAR: 663 if (catterm) { 664 savetoken = c; 665 c = CONCAT; 666 catterm = 0; 667 } else if (!isfuncdef) { 668 if ((c1 = lexgetc()) != '(') 669 ++catterm; 670 lexungetc(c1); 671 } 672 isfuncdef = 0; 673 break; 674 675 case PARM: 676 case CONSTANT: 677 if (catterm) { 678 savetoken = c; 679 c = CONCAT; 680 catterm = 0; 681 } else { 682 if (lexlast == '$') 683 wasfield = 2; 684 ++catterm; 685 } 686 break; 687 688 case INC: 689 case DEC: 690 if (!catterm || lexlast != CONSTANT || wasfield) 691 break; 692 693 /*FALLTHRU*/ 694 case UFUNC: 695 case FUNC: 696 case GETLINE: 697 case '!': 698 case '$': 699 case '(': 700 if (catterm) { 701 savetoken = c; 702 c = CONCAT; 703 catterm = 0; 704 } 705 break; 706 707 /* { */ case '}': 708 if (nbrace == 0) 709 savetoken = ';'; 710 /*FALLTHRU*/ 711 case ';': 712 inprint = 0; 713 /*FALLTHRU*/ 714 default: 715 if (c == DEFFUNC) 716 isfuncdef = 1; 717 catterm = 0; 718 } 719 lexlast = c; 720 if (wasfield) 721 wasfield--; 722 /* 723 * Map character constants to symbolic names. 724 */ 725 for (i = 0; ctosym[i].c != 0; i++) 726 if (c == ctosym[i].c) { 727 c = ctosym[i].sym; 728 break; 729 } 730 return ((int)c); 731 } 732 733 /* 734 * Read a number for the lexical analyzer. 735 * Input is the first character of the number. 736 * Return value is the lexical type. 737 */ 738 static int 739 lexnumber(wint_t c) 740 { 741 wchar_t *cp; 742 int dotfound = 0; 743 int efound = 0; 744 INT number; 745 746 cp = linebuf; 747 do { 748 if (iswdigit(c)) 749 ; 750 else if (c == '.') { 751 if (dotfound++) 752 break; 753 } else if (c == 'e' || c == 'E') { 754 if ((c = lexgetc()) != '-' && c != '+') { 755 lexungetc(c); 756 c = 'e'; 757 } else 758 *cp++ = 'e'; 759 if (efound++) 760 break; 761 } else 762 break; 763 *cp++ = c; 764 } while ((c = lexgetc()) != WEOF); 765 *cp = '\0'; 766 if (dotfound && cp == linebuf+1) 767 return (DOT); 768 lexungetc(c); 769 errno = 0; 770 if (!dotfound && !efound && 771 ((number = wcstol(linebuf, (wchar_t **)0, 10)), errno != ERANGE)) 772 yylval.node = intnode(number); 773 else 774 yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0)); 775 return (CONSTANT); 776 } 777 778 /* 779 * Read an identifier. 780 * Input is first character of identifier. 781 * Return VAR. 782 */ 783 static int 784 lexid(wint_t c) 785 { 786 wchar_t *cp; 787 size_t i; 788 NODE *np; 789 790 cp = linebuf; 791 do { 792 *cp++ = c; 793 c = lexgetc(); 794 } while (iswalpha(c) || iswdigit(c) || c == '_'); 795 *cp = '\0'; 796 lexungetc(c); 797 yylval.node = np = vlook(linebuf); 798 799 switch (np->n_type) { 800 case KEYWORD: 801 switch (np->n_keywtype) { 802 case PRINT: 803 case PRINTF: 804 ++inprint; 805 default: 806 return ((int)np->n_keywtype); 807 } 808 /* NOTREACHED */ 809 810 case ARRAY: 811 case VAR: 812 /* 813 * If reading the argument list, create a dummy node 814 * for the duration of that function. These variables 815 * can be removed from the symbol table at function end 816 * but they must still exist because the execution tree 817 * knows about them. 818 */ 819 if (funparm) { 820 do_funparm: 821 np = emptynode(PARM, i = (cp-linebuf)); 822 np->n_flags = FSTRING; 823 np->n_string = _null; 824 np->n_strlen = 0; 825 (void) memcpy(np->n_name, linebuf, 826 (i+1) * sizeof (wchar_t)); 827 addsymtab(np); 828 yylval.node = np; 829 } else if (np == varNF || (np == varFS && 830 (!doing_begin || begin_getline))) { 831 /* 832 * If the user program references NF or sets 833 * FS either outside of a begin block or 834 * in a begin block after a getline then the 835 * input line will be split immediately upon read 836 * rather than when a field is first referenced. 837 */ 838 needsplit = 1; 839 } else if (np == varENVIRON) 840 needenviron = 1; 841 /*FALLTHRU*/ 842 case PARM: 843 return (VAR); 844 845 case UFUNC: 846 /* 847 * It is ok to redefine functions as parameters 848 */ 849 if (funparm) goto do_funparm; 850 /*FALLTHRU*/ 851 case FUNC: 852 case GETLINE: 853 /* 854 * When a getline is encountered, clear the 'doing_begin' flag. 855 * This will force the 'needsplit' flag to be set, even inside 856 * a begin block, if FS is altered. (See VAR case above) 857 */ 858 if (doing_begin) 859 begin_getline = 1; 860 return (np->n_type); 861 } 862 /* NOTREACHED */ 863 return (0); 864 } 865 866 /* 867 * Read a string for the lexical analyzer. 868 * `endc' terminates the string. 869 */ 870 static int 871 lexstring(wint_t endc) 872 { 873 size_t length = lexescape(endc, 0, 0); 874 875 yylval.node = stringnode(linebuf, FALLOC, length); 876 return (CONSTANT); 877 } 878 879 /* 880 * Read a regular expression. 881 */ 882 static int 883 lexregexp(wint_t endc) 884 { 885 (void) lexescape(endc, 1, 0); 886 yylval.node = renode(linebuf); 887 return (URE); 888 } 889 890 /* 891 * Process a string, converting the escape characters as required by 892 * 1003.2. The processed string ends up in the global linebuf[]. This 893 * routine also changes the value of 'progfd' - the program file 894 * descriptor, so it should be used with some care. It is presently used to 895 * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()). 896 */ 897 void 898 strescape(wchar_t *str) 899 { 900 progptr = str; 901 proglen = wcslen(str) + 1; /* Include \0 */ 902 (void) lexescape('\0', 0, 1); 903 progptr = NULL; 904 } 905 906 /* 907 * Read a string or regular expression, terminated by ``endc'', 908 * for lexical analyzer, processing escape sequences. 909 * Return string length. 910 */ 911 static size_t 912 lexescape(wint_t endc, int regx, int cmd_line_operand) 913 { 914 static char nlre[256]; 915 static char nlstr[256]; 916 static char eofre[256]; 917 static char eofstr[256]; 918 int first_time = 1; 919 wint_t c; 920 wchar_t *cp; 921 int n, max; 922 923 if (first_time == 1) { 924 (void) strcpy(nlre, gettext("Newline in regular expression\n")); 925 (void) strcpy(nlstr, gettext("Newline in string\n")); 926 (void) strcpy(eofre, gettext("EOF in regular expression\n")); 927 (void) strcpy(eofstr, gettext("EOF in string\n")); 928 first_time = 0; 929 } 930 931 cp = linebuf; 932 while ((c = lexgetc()) != endc) { 933 if (c == '\n') 934 awkerr(regx ? nlre : nlstr); 935 if (c == '\\') { 936 switch (c = lexgetc(), c) { 937 case '\\': 938 if (regx) 939 *cp++ = '\\'; 940 break; 941 942 case '/': 943 c = '/'; 944 break; 945 946 case 'n': 947 c = '\n'; 948 break; 949 950 case 'b': 951 c = '\b'; 952 break; 953 954 case 't': 955 c = '\t'; 956 break; 957 958 case 'r': 959 c = '\r'; 960 break; 961 962 case 'f': 963 c = '\f'; 964 break; 965 966 case 'v': 967 c = '\v'; 968 break; 969 970 case 'a': 971 c = (char)0x07; 972 break; 973 974 case 'x': 975 n = 0; 976 while (iswxdigit(c = lexgetc())) { 977 if (iswdigit(c)) 978 c -= '0'; 979 else if (iswupper(c)) 980 c -= 'A'-10; 981 else 982 c -= 'a'-10; 983 n = (n<<4) + c; 984 } 985 lexungetc(c); 986 c = n; 987 break; 988 989 case '0': 990 case '1': 991 case '2': 992 case '3': 993 case '4': 994 case '5': 995 case '6': 996 case '7': 997 #if 0 998 /* 999 * Posix.2 draft 10 disallows the use of back-referencing - it explicitly 1000 * requires processing of the octal escapes both in strings and 1001 * regular expressions. The following code is disabled instead of 1002 * removed as back-referencing may be reintroduced in a future draft 1003 * of the standard. 1004 */ 1005 /* 1006 * For regular expressions, we disallow 1007 * \ooo to mean octal character, in favour 1008 * of back referencing. 1009 */ 1010 if (regx) { 1011 *cp++ = '\\'; 1012 break; 1013 } 1014 #endif 1015 max = 3; 1016 n = 0; 1017 do { 1018 n = (n<<3) + c-'0'; 1019 if ((c = lexgetc()) > '7' || c < '0') 1020 break; 1021 } while (--max); 1022 lexungetc(c); 1023 /* 1024 * an octal escape sequence must have at least 1025 * 2 digits after the backslash, otherwise 1026 * it gets passed straight thru for possible 1027 * use in backreferencing. 1028 */ 1029 if (max == 3) { 1030 *cp++ = '\\'; 1031 n += '0'; 1032 } 1033 c = n; 1034 break; 1035 1036 case '\n': 1037 continue; 1038 1039 default: 1040 if (c != endc || cmd_line_operand) { 1041 *cp++ = '\\'; 1042 if (c == endc) 1043 lexungetc(c); 1044 } 1045 } 1046 } 1047 if (c == WEOF) 1048 awkerr(regx ? eofre : eofstr); 1049 *cp++ = c; 1050 } 1051 *cp = '\0'; 1052 return (cp - linebuf); 1053 } 1054 1055 /* 1056 * Build a regular expression NODE. 1057 * Argument is the string holding the expression. 1058 */ 1059 NODE * 1060 renode(wchar_t *s) 1061 { 1062 NODE *np; 1063 int n; 1064 1065 np = emptynode(RE, 0); 1066 np->n_left = np->n_right = NNULL; 1067 if ((n = REGWCOMP(&np->n_regexp, s)) != REG_OK) { 1068 int m; 1069 char *p; 1070 1071 m = REGWERROR(n, np->n_regexp, NULL, 0); 1072 p = (char *)emalloc(m); 1073 REGWERROR(n, np->n_regexp, p, m); 1074 awkerr("/%S/: %s", s, p); 1075 } 1076 return (np); 1077 } 1078 /* 1079 * Get a character for the lexical analyser routine. 1080 */ 1081 static wint_t 1082 lexgetc() 1083 { 1084 wint_t c; 1085 static char **files = &progfiles[0]; 1086 1087 if (progfp != FNULL && (c = fgetwc(progfp)) != WEOF) 1088 ; 1089 else { 1090 if (progptr != NULL) { 1091 if (proglen-- <= 0) 1092 c = WEOF; 1093 else 1094 c = *progptr++; 1095 } else { 1096 if (progfp != FNULL) 1097 if (progfp != stdin) 1098 (void) fclose(progfp); 1099 else 1100 clearerr(progfp); 1101 progfp = FNULL; 1102 if (files < progfilep) { 1103 filename = *files++; 1104 lineno = 1; 1105 if (filename[0] == '-' && filename[1] == '\0') 1106 progfp = stdin; 1107 else if ((progfp = fopen(filename, r)) 1108 == FNULL) { 1109 (void) fprintf(stderr, 1110 gettext("script file \"%s\""), filename); 1111 exit(1); 1112 } 1113 c = fgetwc(progfp); 1114 } 1115 } 1116 } 1117 if (c == '\n') 1118 ++lineno; 1119 if (conptr >= &context[NCONTEXT]) 1120 conptr = &context[0]; 1121 if (c != WEOF) 1122 *conptr++ = c; 1123 return (c); 1124 } 1125 1126 /* 1127 * Return a character for lexical analyser. 1128 * Only one returned character is (not enforced) legitimite. 1129 */ 1130 static void 1131 lexungetc(wint_t c) 1132 { 1133 if (c == '\n') 1134 --lineno; 1135 if (c != WEOF) { 1136 if (conptr == &context[0]) 1137 conptr = &context[NCONTEXT]; 1138 *--conptr = '\0'; 1139 } 1140 if (progfp != FNULL) { 1141 (void) ungetwc(c, progfp); 1142 return; 1143 } 1144 if (c == WEOF) 1145 return; 1146 *--progptr = c; 1147 proglen++; 1148 } 1149 1150 /* 1151 * Syntax errors during parsing. 1152 */ 1153 void 1154 yyerror(char *s, ...) 1155 { 1156 if (lexlast == FUNC || lexlast == GETLINE || lexlast == KEYWORD) 1157 if (lexlast == KEYWORD) 1158 awkerr(gettext("inadmissible use of reserved keyword")); 1159 else 1160 awkerr(gettext("attempt to redefine builtin function")); 1161 awkerr(s); 1162 } 1163 1164 /* 1165 * Error routine for all awk errors. 1166 */ 1167 /* ARGSUSED */ 1168 void 1169 awkerr(char *fmt, ...) 1170 { 1171 va_list args; 1172 1173 va_start(args, fmt); 1174 awkierr(0, fmt, args); 1175 va_end(args); 1176 } 1177 1178 /* 1179 * Error routine like "awkerr" except that it prints out 1180 * a message that includes an errno-specific indication. 1181 */ 1182 /* ARGSUSED */ 1183 void 1184 awkperr(char *fmt, ...) 1185 { 1186 va_list args; 1187 1188 va_start(args, fmt); 1189 awkierr(1, fmt, args); 1190 va_end(args); 1191 } 1192 1193 /* 1194 * Common internal routine for awkerr, awkperr 1195 */ 1196 static void 1197 awkierr(int perr, char *fmt, va_list ap) 1198 { 1199 static char sep1[] = "\n>>>\t"; 1200 static char sep2[] = "\t<<<"; 1201 int saveerr = errno; 1202 1203 (void) fprintf(stderr, "%s: ", _cmdname); 1204 if (running) { 1205 (void) fprintf(stderr, gettext("line %u ("), 1206 curnode == NNULL ? 0 : curnode->n_lineno); 1207 if (phase == 0) 1208 (void) fprintf(stderr, "NR=%lld): ", 1209 (INT)exprint(varNR)); 1210 else 1211 (void) fprintf(stderr, "%s): ", 1212 phase == BEGIN ? s_BEGIN : s_END); 1213 } else if (lineno != 0) { 1214 (void) fprintf(stderr, gettext("file \"%s\": "), filename); 1215 (void) fprintf(stderr, gettext("line %u: "), lineno); 1216 } 1217 (void) vfprintf(stderr, gettext(fmt), ap); 1218 if (perr == 1) 1219 (void) fprintf(stderr, ": %s", strerror(saveerr)); 1220 if (perr != 2 && !running) { 1221 wchar_t *cp; 1222 int n; 1223 int c; 1224 1225 (void) fprintf(stderr, gettext(" Context is:%s"), sep1); 1226 cp = conptr; 1227 n = NCONTEXT; 1228 do { 1229 if (cp >= &context[NCONTEXT]) 1230 cp = &context[0]; 1231 if ((c = *cp++) != '\0') 1232 (void) fputs(c == '\n' ? sep1 : toprint(c), 1233 stderr); 1234 } while (--n != 0); 1235 (void) fputs(sep2, stderr); 1236 } 1237 (void) fprintf(stderr, "\n"); 1238 exit(1); 1239 } 1240 1241 wchar_t * 1242 emalloc(unsigned n) 1243 { 1244 wchar_t *cp; 1245 1246 if ((cp = malloc(n)) == NULL) 1247 awkerr(nomem); 1248 return (cp); 1249 } 1250 1251 wchar_t * 1252 erealloc(wchar_t *p, unsigned n) 1253 { 1254 wchar_t *cp; 1255 1256 if ((cp = realloc(p, n)) == NULL) 1257 awkerr(nomem); 1258 return (cp); 1259 } 1260 1261 1262 /* 1263 * usage message for awk 1264 */ 1265 static int 1266 usage()