Home | History | Annotate | Download | only in awk
      1 %{
      2 /*
      3  * CDDL HEADER START
      4  *
      5  * The contents of this file are subject to the terms of the
      6  * Common Development and Distribution License, Version 1.0 only
      7  * (the "License").  You may not use this file except in compliance
      8  * with the License.
      9  *
     10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     11  * or http://www.opensolaris.org/os/licensing.
     12  * See the License for the specific language governing permissions
     13  * and limitations under the License.
     14  *
     15  * When distributing Covered Code, include this CDDL HEADER in each
     16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     17  * If applicable, add the following below this CDDL HEADER, with the
     18  * fields enclosed by brackets "[]" replaced with your own identifying
     19  * information: Portions Copyright [yyyy] [name of copyright owner]
     20  *
     21  * CDDL HEADER END
     22  */
     23 
     24 /*
     25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
     26  * Use is subject to license terms.
     27  */
     28 
     29 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     30 /*	  All Rights Reserved  	*/
     31 %}
     32 
     33 %{
     34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     35 %}
     36 
     37 %Start A str sc reg comment
     38 
     39 %{
     40 
     41 #include	<sys/types.h>
     42 #include	"awk.h"
     43 #include	"y.tab.h"
     44 
     45 #undef	input	/* defeat lex */
     46 #undef	unput
     47 
     48 static void unput(int);
     49 static void unputstr(char *);
     50 
     51 extern YYSTYPE	yylval;
     52 extern int	infunc;
     53 
     54 off_t	lineno	= 1;
     55 int	bracecnt = 0;
     56 int	brackcnt  = 0;
     57 int	parencnt = 0;
     58 #define DEBUG
     59 #ifdef	DEBUG
     60 #	define	RET(x)	{if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
     61 #else
     62 #	define	RET(x)	return(x)
     63 #endif
     64 
     65 /*
     66  * The standards (SUSV2) requires that Record size be atleast LINE_MAX.
     67  * LINE_MAX is a standard variable defined in limits.h.
     68  * Though nawk is not standards compliant, we let RECSIZE
     69  * grow with LINE_MAX instead of the magic number 1024.
     70  */
     71 #define	CBUFLEN	(3 * LINE_MAX)
     72 
     73 #define	CADD	cbuf[clen++] = yytext[0]; \
     74 		if (clen >= CBUFLEN-1) { \
     75 			ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \
     76 			BEGIN A; \
     77 		}
     78 
     79 static uchar	cbuf[CBUFLEN];
     80 static uchar	*s;
     81 static int	clen, cflag;
     82 %}
     83 
     84 A	[a-zA-Z_]
     85 B	[a-zA-Z0-9_]
     86 D	[0-9]
     87 O	[0-7]
     88 H	[0-9a-fA-F]
     89 WS	[ \t]
     90 
     91 %%
     92 	switch (yybgin-yysvec-1) {	/* witchcraft */
     93 	case 0:
     94 		BEGIN A;
     95 		break;
     96 	case sc:
     97 		BEGIN A;
     98 		RET('}');
     99 	}
    100 
    101 <A>\n		{ lineno++; RET(NL); }
    102 <A>#.*		{ ; }	/* strip comments */
    103 <A>{WS}+	{ ; }
    104 <A>;		{ RET(';'); }
    105 
    106 <A>"\\"\n	{ lineno++; }
    107 <A>BEGIN	{ RET(XBEGIN); }
    108 <A>END		{ RET(XEND); }
    109 <A>func(tion)?	{ if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
    110 <A>return	{ if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
    111 <A>"&&"		{ RET(AND); }
    112 <A>"||"		{ RET(BOR); }
    113 <A>"!"		{ RET(NOT); }
    114 <A>"!="		{ yylval.i = NE; RET(NE); }
    115 <A>"~"		{ yylval.i = MATCH; RET(MATCHOP); }
    116 <A>"!~"		{ yylval.i = NOTMATCH; RET(MATCHOP); }
    117 <A>"<"		{ yylval.i = LT; RET(LT); }
    118 <A>"<="		{ yylval.i = LE; RET(LE); }
    119 <A>"=="		{ yylval.i = EQ; RET(EQ); }
    120 <A>">="		{ yylval.i = GE; RET(GE); }
    121 <A>">"		{ yylval.i = GT; RET(GT); }
    122 <A>">>"		{ yylval.i = APPEND; RET(APPEND); }
    123 <A>"++"		{ yylval.i = INCR; RET(INCR); }
    124 <A>"--"		{ yylval.i = DECR; RET(DECR); }
    125 <A>"+="		{ yylval.i = ADDEQ; RET(ASGNOP); }
    126 <A>"-="		{ yylval.i = SUBEQ; RET(ASGNOP); }
    127 <A>"*="		{ yylval.i = MULTEQ; RET(ASGNOP); }
    128 <A>"/="		{ yylval.i = DIVEQ; RET(ASGNOP); }
    129 <A>"%="		{ yylval.i = MODEQ; RET(ASGNOP); }
    130 <A>"^="		{ yylval.i = POWEQ; RET(ASGNOP); }
    131 <A>"**="	{ yylval.i = POWEQ; RET(ASGNOP); }
    132 <A>"="		{ yylval.i = ASSIGN; RET(ASGNOP); }
    133 <A>"**"		{ RET(POWER); }
    134 <A>"^"		{ RET(POWER); }
    135 
    136 <A>"$"{D}+	{ yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
    137 <A>"$NF"	{ unputstr("(NF)"); return(INDIRECT); }
    138 <A>"$"{A}{B}*	{ int c, n;
    139 		  c = input(); unput(c);
    140 		  if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) {
    141 			unputstr(yytext+1);
    142 			return(INDIRECT);
    143 		  } else {
    144 			yylval.cp = setsymtab((uchar *)yytext+1,
    145 				(uchar *)"",0.0,STR|NUM,symtab);
    146 			RET(IVAR);
    147 		  }
    148 		}
    149 <A>"$"		{ RET(INDIRECT); }
    150 <A>NF		{ yylval.cp = setsymtab((uchar *)yytext, (uchar *)"", 0.0, NUM, symtab); RET(VARNF); }
    151 
    152 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
    153 		  yylval.cp = setsymtab((uchar *)yytext, tostring((uchar *)yytext), atof(yytext), CON|NUM, symtab);
    154 		  RET(NUMBER); }
    155 
    156 <A>while	{ RET(WHILE); }
    157 <A>for		{ RET(FOR); }
    158 <A>do		{ RET(DO); }
    159 <A>if		{ RET(IF); }
    160 <A>else		{ RET(ELSE); }
    161 <A>next		{ RET(NEXT); }
    162 <A>exit		{ RET(EXIT); }
    163 <A>break	{ RET(BREAK); }
    164 <A>continue	{ RET(CONTINUE); }
    165 <A>print	{ yylval.i = PRINT; RET(PRINT); }
    166 <A>printf	{ yylval.i = PRINTF; RET(PRINTF); }
    167 <A>sprintf	{ yylval.i = SPRINTF; RET(SPRINTF); }
    168 <A>split	{ yylval.i = SPLIT; RET(SPLIT); }
    169 <A>substr	{ RET(SUBSTR); }
    170 <A>sub		{ yylval.i = SUB; RET(SUB); }
    171 <A>gsub		{ yylval.i = GSUB; RET(GSUB); }
    172 <A>index	{ RET(INDEX); }
    173 <A>match	{ RET(MATCHFCN); }
    174 <A>in		{ RET(IN); }
    175 <A>getline	{ RET(GETLINE); }
    176 <A>close	{ RET(CLOSE); }
    177 <A>delete	{ RET(DELETE); }
    178 <A>length	{ yylval.i = FLENGTH; RET(BLTIN); }
    179 <A>log		{ yylval.i = FLOG; RET(BLTIN); }
    180 <A>int		{ yylval.i = FINT; RET(BLTIN); }
    181 <A>exp		{ yylval.i = FEXP; RET(BLTIN); }
    182 <A>sqrt		{ yylval.i = FSQRT; RET(BLTIN); }
    183 <A>sin		{ yylval.i = FSIN; RET(BLTIN); }
    184 <A>cos		{ yylval.i = FCOS; RET(BLTIN); }
    185 <A>atan2	{ yylval.i = FATAN; RET(BLTIN); }
    186 <A>system	{ yylval.i = FSYSTEM; RET(BLTIN); }
    187 <A>rand		{ yylval.i = FRAND; RET(BLTIN); }
    188 <A>srand	{ yylval.i = FSRAND; RET(BLTIN); }
    189 <A>toupper	{ yylval.i = FTOUPPER; RET(BLTIN); }
    190 <A>tolower	{ yylval.i = FTOLOWER; RET(BLTIN); }
    191 
    192 <A>{A}{B}*	{ int n, c;
    193 		  c = input(); unput(c);	/* look for '(' */
    194 		  if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
    195 			yylval.i = n;
    196 			RET(ARG);
    197 		  } else {
    198 			yylval.cp = setsymtab((uchar *)yytext,
    199 				(uchar *)"",0.0,STR|NUM,symtab);
    200 			if (c == '(') {
    201 				RET(CALL);
    202 			} else {
    203 				RET(VAR);
    204 			}
    205 		  }
    206 		}
    207 <A>\"		{ BEGIN str; clen = 0; }
    208 
    209 <A>"}"		{ if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
    210 <A>"]"		{ if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
    211 <A>")"		{ if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
    212 
    213 <A>.		{ if (yytext[0] == '{') bracecnt++;
    214 		  else if (yytext[0] == '[') brackcnt++;
    215 		  else if (yytext[0] == '(') parencnt++;
    216 		  RET(yylval.i = yytext[0]); /* everything else */ }
    217 
    218 <reg>\\.	{ cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
    219 <reg>\n		{ ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
    220 <reg>"/"	{ BEGIN A;
    221 		  cbuf[clen] = 0;
    222 		  yylval.s = tostring(cbuf);
    223 		  unput('/');
    224 		  RET(REGEXPR); }
    225 <reg>.		{ CADD; }
    226 
    227 <str>\"		{ BEGIN A;
    228 		  cbuf[clen] = 0; s = tostring(cbuf);
    229 		  cbuf[clen] = ' '; cbuf[++clen] = 0;
    230 		  yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
    231 		  RET(STRING); }
    232 <str>\n		{ ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
    233 <str>"\\\""	{ cbuf[clen++] = '"'; }
    234 <str>"\\"n	{ cbuf[clen++] = '\n'; }
    235 <str>"\\"t	{ cbuf[clen++] = '\t'; }
    236 <str>"\\"f	{ cbuf[clen++] = '\f'; }
    237 <str>"\\"r	{ cbuf[clen++] = '\r'; }
    238 <str>"\\"b	{ cbuf[clen++] = '\b'; }
    239 <str>"\\"v	{ cbuf[clen++] = '\v'; }	/* these ANSIisms may not be known by */
    240 <str>"\\"a	{ cbuf[clen++] = '\007'; }	/* your compiler. hence 007 for bell */
    241 <str>"\\\\"	{ cbuf[clen++] = '\\'; }
    242 <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
    243 		  sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
    244 <str>"\\"x({H}+) { int n;	/* ANSI permits any number! */
    245 		  sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
    246 <str>"\\".	{ cbuf[clen++] = yytext[1]; }
    247 <str>.		{ CADD; }
    248 
    249 %%
    250 
    251 void
    252 startreg()
    253 {
    254 	BEGIN reg;
    255 	clen = 0;
    256 }
    257 
    258 /* input() and unput() are transcriptions of the standard lex
    259    macros for input and output with additions for error message
    260    printing.  God help us all if someone changes how lex works.
    261 */
    262 
    263 uchar	ebuf[300];
    264 uchar	*ep = ebuf;
    265 
    266 int
    267 input(void)
    268 {
    269 	register int c;
    270 	extern uchar *lexprog;
    271 
    272 	if (yysptr > yysbuf)
    273 		c = U(*--yysptr);
    274 	else if (lexprog != NULL)	/* awk '...' */
    275 		c = *lexprog++;
    276 	else				/* awk -f ... */
    277 		c = pgetc();
    278 	if (c == '\n')
    279 		yylineno++;
    280 	else if (c == EOF)
    281 		c = 0;
    282 	if (ep >= ebuf + sizeof ebuf)
    283 		ep = ebuf;
    284 	return *ep++ = c;
    285 }
    286 
    287 static void
    288 unput(int c)
    289 {
    290 	yytchar = c;
    291 	if (yytchar == '\n')
    292 		yylineno--;
    293 	*yysptr++ = yytchar;
    294 	if (--ep < ebuf)
    295 		ep = ebuf + sizeof(ebuf) - 1;
    296 }
    297 
    298 
    299 static void
    300 unputstr(char *s)
    301 {
    302 	int i;
    303 
    304 	for (i = strlen(s)-1; i >= 0; i--)
    305 		unput(s[i]);
    306 }
    307