Home | History | Annotate | Download | only in ndrgen
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <errno.h>
     30 #include <stdarg.h>
     31 #include "ndrgen.h"
     32 #include "y.tab.h"
     33 
     34 /*
     35  * C-like lexical analysis.
     36  *
     37  * 1. Define a "struct node"
     38  * 2. Define a "struct symbol" that encapsulates a struct node.
     39  * 3. Define a "struct integer" that encapsulates a struct node.
     40  * 4. Set the YACC stack type in the grammar:
     41  *		%{
     42  *		#define YYSTYPE struct node *
     43  *		%}
     44  * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER.
     45  *    Using "_KW" as a suffix for keyword tokens, i.e. "struct" is
     46  *    "%token STRUCT_KW":
     47  *	// atomic values
     48  *	%token INTEGER STRING IDENTIFIER
     49  *	// keywords
     50  *	%token STRUCT_KW CASE_KW
     51  *	// operators
     52  *	%token PLUS MINUS ASSIGN ARROW
     53  *	// overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...)
     54  *	%token INCOP RELOP EQUOP ASSOP
     55  * 6. It's easiest to use the yacc(1) generated token numbers for node
     56  *    labels.  For node labels that are not actually part of the grammer,
     57  *    use a %token with an L_ prefix:
     58  *	// node labels (can't be generated by lex)
     59  *	%token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ
     60  * 7. Call set_lex_input() before parsing.
     61  */
     62 
     63 #define	SQ	'\''
     64 #define	DQ	'"'
     65 
     66 #define	isquote(c) ((c) == SQ || (c) == DQ)
     67 #define	iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f')
     68 
     69 #define	is_between(c, l, u)  ((l) <= (c) && (c) <= (u))
     70 #define	is_white(c)	((c) == ' ' || c == '\r' || c == '\t' || c == '\f')
     71 #define	is_lower(c)	is_between((c), 'a', 'z')
     72 #define	is_upper(c)	is_between((c), 'A', 'Z')
     73 #define	is_alpha(c)	(is_lower(c) || is_upper(c))
     74 #define	is_digit(c)	is_between((c), '0', '9')
     75 #define	is_sstart(c)	(is_alpha(c) || (c) == '_')
     76 #define	is_sfollow(c)	(is_sstart(c) || is_digit(c))
     77 #define	is_xdigit(c)	\
     78 	(is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f'))
     79 
     80 ndr_symbol_t		*symbol_list;
     81 static ndr_integer_t	*integer_list;
     82 static FILE		*lex_infp;
     83 static ndr_symbol_t	*file_name;
     84 int			line_number;
     85 int			n_compile_error;
     86 
     87 static int		lex_at_bol;
     88 
     89 /* In yacc(1) generated parser */
     90 extern struct node	*yylval;
     91 
     92 /*
     93  * The keywtab[] and optable[] could be external to this lex
     94  * and it would all still work.
     95  */
     96 static ndr_keyword_t keywtable[] = {
     97 	{ "struct",	STRUCT_KW,	0 },
     98 	{ "union",	UNION_KW,	0 },
     99 	{ "typedef",	TYPEDEF_KW,	0 },
    100 
    101 	{ "interface",	INTERFACE_KW,	0 },
    102 	{ "uuid",	UUID_KW,	0 },
    103 	{ "_no_reorder", _NO_REORDER_KW, 0 },
    104 	{ "extern",	EXTERN_KW,	0 },
    105 	{ "reference",	REFERENCE_KW,	0 },
    106 
    107 	{ "align",	ALIGN_KW,	0 },
    108 	{ "operation",	OPERATION_KW,	0 },
    109 	{ "in",		IN_KW,		0 },
    110 	{ "out",	OUT_KW,		0 },
    111 
    112 	{ "string",	STRING_KW,	0 },
    113 	{ "size_is",	SIZE_IS_KW,	0 },
    114 	{ "length_is",	LENGTH_IS_KW,	0 },
    115 
    116 	{ "switch_is",	SWITCH_IS_KW,	0 },
    117 	{ "case",	CASE_KW,	0 },
    118 	{ "default",	DEFAULT_KW,	0 },
    119 
    120 	{ "transmit_as", TRANSMIT_AS_KW, 0 },
    121 	{ "arg_is",	ARG_IS_KW,	0 },
    122 
    123 	{ "char",	BASIC_TYPE,	1 },
    124 	{ "uchar",	BASIC_TYPE,	1 },
    125 	{ "wchar",	BASIC_TYPE,	2 },
    126 	{ "short",	BASIC_TYPE,	2 },
    127 	{ "ushort",	BASIC_TYPE,	2 },
    128 	{ "long",	BASIC_TYPE,	4 },
    129 	{ "ulong",	BASIC_TYPE,	4 },
    130 	{0}
    131 };
    132 
    133 static ndr_keyword_t optable[] = {
    134 	{ "{",		LC,		0 },
    135 	{ "}",		RC,		0 },
    136 	{ "(",		LP,		0 },
    137 	{ ")",		RP,		0 },
    138 	{ "[",		LB,		0 },
    139 	{ "]",		RB,		0 },
    140 	{ "*",		STAR,		0 },
    141 	{ ";",		SEMI,		0 },
    142 	{0}
    143 };
    144 
    145 static int getch(FILE *fp);
    146 static ndr_integer_t *int_enter(long);
    147 static ndr_symbol_t *sym_find(char *);
    148 static int str_to_sv(char *, char *sv[]);
    149 
    150 /*
    151  * Enter the symbols for keyword.
    152  */
    153 static void
    154 keyw_tab_init(ndr_keyword_t kwtable[])
    155 {
    156 	int			i;
    157 	ndr_keyword_t		*kw;
    158 	ndr_symbol_t		*sym;
    159 
    160 	for (i = 0; kwtable[i].name; i++) {
    161 		kw = &kwtable[i];
    162 
    163 		sym = sym_enter(kw->name);
    164 		sym->kw = kw;
    165 	}
    166 }
    167 
    168 void
    169 set_lex_input(FILE *fp, char *name)
    170 {
    171 	keyw_tab_init(keywtable);
    172 	keyw_tab_init(optable);
    173 
    174 	lex_infp = fp;
    175 	file_name = sym_enter(name);
    176 	line_number = 1;
    177 	lex_at_bol = 1;
    178 }
    179 
    180 static int
    181 getch(FILE *fp)
    182 {
    183 	return (getc(fp));
    184 }
    185 
    186 int
    187 yylex(void)
    188 {
    189 	char		lexeme[512];
    190 	char		*p = lexeme;
    191 	FILE		*fp = lex_infp;
    192 	int		c, xc;
    193 	ndr_symbol_t	*sym;
    194 	ndr_integer_t	*intg;
    195 
    196 top:
    197 	p = lexeme;
    198 
    199 	c = getch(fp);
    200 	if (c == EOF)
    201 		return (EOF);
    202 
    203 	if (c == '\n') {
    204 		line_number++;
    205 		lex_at_bol = 1;
    206 		goto top;
    207 	}
    208 
    209 	/*
    210 	 * Handle preprocessor lines. This just notes
    211 	 * which file we're processing.
    212 	 */
    213 	if (c == '#' && lex_at_bol) {
    214 		char		*sv[10];
    215 		int		sc;
    216 
    217 		while ((c = getch(fp)) != EOF && c != '\n')
    218 			*p++ = c;
    219 
    220 		*p = 0;
    221 		/* note: no ungetc() of newline, we don't want to count it */
    222 
    223 		if (*lexeme != ' ') {
    224 			/* not a line we know */
    225 			goto top;
    226 		}
    227 
    228 		sc = str_to_sv(lexeme, sv);
    229 		if (sc < 2)
    230 			goto top;
    231 
    232 		file_name = sym_enter(sv[1]);
    233 		line_number = atoi(sv[0]);	/* for next input line */
    234 		lex_at_bol = 1;
    235 		goto top;
    236 	}
    237 
    238 	lex_at_bol = 0;
    239 
    240 	/*
    241 	 * Skip white space
    242 	 */
    243 	if (is_white(c))
    244 		goto top;
    245 
    246 	/*
    247 	 * Symbol? Might be a keyword or just an identifier
    248 	 */
    249 	if (is_sstart(c)) {
    250 		/* we got a symbol */
    251 		do {
    252 			*p++ = c;
    253 			c = getch(fp);
    254 		} while (is_sfollow(c));
    255 		(void) ungetc(c, fp);
    256 		*p = 0;
    257 
    258 		sym = sym_enter(lexeme);
    259 
    260 		yylval = &sym->s_node;
    261 
    262 		if (sym->kw) {
    263 			return (sym->kw->token);
    264 		} else {
    265 			return (IDENTIFIER);
    266 		}
    267 	}
    268 
    269 	/*
    270 	 * Integer constant?
    271 	 */
    272 	if (is_digit(c)) {
    273 		/* we got a number */
    274 		*p++ = c;
    275 		if (c == '0') {
    276 			c = getch(fp);
    277 			if (c == 'x' || c == 'X') {
    278 				/* handle hex specially */
    279 				do {
    280 					*p++ = c;
    281 					c = getch(fp);
    282 				} while (is_xdigit(c));
    283 				goto convert_icon;
    284 			} else if (c == 'b' || c == 'B' ||
    285 			    c == 'd' || c == 'D' ||
    286 			    c == 'o' || c == 'O') {
    287 				do {
    288 					*p++ = c;
    289 					c = getch(fp);
    290 				} while (is_digit(c));
    291 				goto convert_icon;
    292 			}
    293 			(void) ungetc(c, fp);
    294 		}
    295 		/* could be anything */
    296 		c = getch(fp);
    297 		while (is_digit(c)) {
    298 			*p++ = c;
    299 			c = getch(fp);
    300 		}
    301 
    302 convert_icon:
    303 		*p = 0;
    304 		(void) ungetc(c, fp);
    305 
    306 		intg = int_enter(strtol(lexeme, 0, 0));
    307 		yylval = &intg->s_node;
    308 
    309 		return (INTEGER);
    310 	}
    311 
    312 	/* Could handle strings. We don't seem to need them yet */
    313 
    314 	yylval = 0;		/* operator tokens have no value */
    315 	xc = getch(fp);		/* get look-ahead for two-char lexemes */
    316 
    317 	lexeme[0] = c;
    318 	lexeme[1] = xc;
    319 	lexeme[2] = 0;
    320 
    321 	/*
    322 	 * Look for to-end-of-line comment
    323 	 */
    324 	if (c == '/' && xc == '/') {
    325 		/* eat the comment */
    326 		while ((c = getch(fp)) != EOF && c != '\n')
    327 			;
    328 		(void) ungetc(c, fp);		/* put back newline */
    329 		goto top;
    330 	}
    331 
    332 	/*
    333 	 * Look for multi-line comment
    334 	 */
    335 	if (c == '/' && xc == '*') {
    336 		/* eat the comment */
    337 		xc = -1;
    338 		while ((c = getch(fp)) != EOF) {
    339 			if (xc == '*' && c == '/') {
    340 				/* that's it */
    341 				break;
    342 			}
    343 			xc = c;
    344 			if (c == '\n')
    345 				line_number++;
    346 		}
    347 		goto top;
    348 	}
    349 
    350 	/*
    351 	 * Use symbol table lookup for two-character and
    352 	 * one character operator tokens.
    353 	 */
    354 	sym = sym_find(lexeme);
    355 	if (sym) {
    356 		/* there better be a keyword attached */
    357 		yylval = &sym->s_node;
    358 		return (sym->kw->token);
    359 	}
    360 
    361 	/* Try a one-character form */
    362 	(void) ungetc(xc, fp);
    363 	lexeme[1] = 0;
    364 	sym = sym_find(lexeme);
    365 	if (sym) {
    366 		/* there better be a keyword attached */
    367 		yylval = &sym->s_node;
    368 		return (sym->kw->token);
    369 	}
    370 
    371 	compile_error("unrecognized character 0x%02x", c);
    372 	goto top;
    373 }
    374 
    375 static ndr_symbol_t *
    376 sym_find(char *name)
    377 {
    378 	ndr_symbol_t		**pp;
    379 	ndr_symbol_t		*p;
    380 
    381 	for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
    382 		if (strcmp(p->name, name) == 0)
    383 			return (p);
    384 	}
    385 
    386 	return (0);
    387 }
    388 
    389 ndr_symbol_t *
    390 sym_enter(char *name)
    391 {
    392 	ndr_symbol_t		**pp;
    393 	ndr_symbol_t		*p;
    394 
    395 	for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
    396 		if (strcmp(p->name, name) == 0)
    397 			return (p);
    398 	}
    399 
    400 	p = ndr_alloc(1, sizeof (ndr_symbol_t));
    401 
    402 	if ((p->name = strdup(name)) == NULL)
    403 		fatal_error("%s", strerror(ENOMEM));
    404 
    405 	p->s_node.label = IDENTIFIER;
    406 	p->s_node.n_sym = p;
    407 
    408 	*pp = p;
    409 
    410 	return (p);
    411 }
    412 
    413 static ndr_integer_t *
    414 int_enter(long value)
    415 {
    416 	ndr_integer_t		**pp;
    417 	ndr_integer_t		*p;
    418 
    419 	for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) {
    420 		if (p->value == value)
    421 			return (p);
    422 	}
    423 
    424 	p = ndr_alloc(1, sizeof (ndr_integer_t));
    425 
    426 	p->value = value;
    427 	p->s_node.label = INTEGER;
    428 	p->s_node.n_int = value;
    429 
    430 	*pp = p;
    431 
    432 	return (p);
    433 }
    434 
    435 void *
    436 ndr_alloc(size_t nelem, size_t elsize)
    437 {
    438 	void *p;
    439 
    440 	if ((p = calloc(nelem, elsize)) == NULL) {
    441 		fatal_error("%s", strerror(ENOMEM));
    442 		/* NOTREACHED */
    443 	}
    444 
    445 	return (p);
    446 }
    447 
    448 /*
    449  * The input context (filename, line number) is maintained by the
    450  * lexical analysis, and we generally want such info reported for
    451  * errors in a consistent manner.
    452  */
    453 void
    454 compile_error(const char *fmt, ...)
    455 {
    456 	char	buf[NDLBUFSZ];
    457 	va_list ap;
    458 
    459 	va_start(ap, fmt);
    460 	(void) vsnprintf(buf, NDLBUFSZ, fmt, ap);
    461 	va_end(ap);
    462 
    463 	(void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n",
    464 	    file_name->name, line_number, buf);
    465 
    466 	n_compile_error++;
    467 }
    468 
    469 void
    470 fatal_error(const char *fmt, ...)
    471 {
    472 	char	buf[NDLBUFSZ];
    473 	va_list ap;
    474 
    475 	va_start(ap, fmt);
    476 	(void) vsnprintf(buf, NDLBUFSZ, fmt, ap);
    477 	va_end(ap);
    478 
    479 	(void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf);
    480 	exit(1);
    481 }
    482 
    483 /*
    484  * Setup nodes for the lexical analyzer.
    485  */
    486 struct node *
    487 n_cons(int label, ...)
    488 {
    489 	ndr_node_t		*np;
    490 	va_list ap;
    491 
    492 	np = ndr_alloc(1, sizeof (ndr_node_t));
    493 
    494 	va_start(ap, label);
    495 	np->label = label;
    496 	np->n_arg[0] = va_arg(ap, void *);
    497 	np->n_arg[1] = va_arg(ap, void *);
    498 	np->n_arg[2] = va_arg(ap, void *);
    499 	va_end(ap);
    500 
    501 	np->line_number = line_number;
    502 	np->file_name = file_name;
    503 
    504 	return (np);
    505 }
    506 
    507 /*
    508  *	list:	item
    509  *	|	list item	={ n_splice($1, $2); }
    510  *	;
    511  */
    512 void
    513 n_splice(struct node *np1, struct node *np2)
    514 {
    515 	while (np1->n_next)
    516 		np1 = np1->n_next;
    517 
    518 	np1->n_next = np2;
    519 }
    520 
    521 /*
    522  * Convert a string of words to a vector of strings.
    523  * Returns the number of words.
    524  */
    525 static int
    526 str_to_sv(char *buf, char *sv[])
    527 {
    528 	char		**pp = sv;
    529 	char		*p = buf;
    530 	char		*q = buf;
    531 	int		in_word = 0;
    532 	int		c;
    533 
    534 	for (;;) {
    535 		c = *p++;
    536 		if (c == 0)
    537 			break;
    538 
    539 		if (!in_word) {
    540 			if (iswhite(c))
    541 				continue;
    542 
    543 			*pp++ = q;
    544 			in_word = 1;
    545 		}
    546 
    547 		if (isquote(c)) {
    548 			int		qc = c;
    549 
    550 			while (((c = *p++) != 0) && (c != qc))
    551 				*q++ = c;
    552 			if (c == 0)
    553 				break;
    554 		} else if (iswhite(c)) {
    555 			/* end of word */
    556 			*q++ = 0;
    557 			in_word = 0;
    558 		} else {
    559 			/* still inside word */
    560 			*q++ = c;
    561 		}
    562 	}
    563 
    564 	if (in_word)
    565 		*q++ = 0;
    566 
    567 	*pp = (char *)0;
    568 	return (pp - sv);
    569 }
    570