Home | History | Annotate | Download | only in cscope-fast
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 /*	Copyright (c) 1990 AT&T	*/
     23 /*	  All Rights Reserved  	*/
     24 
     25 
     26 /*
     27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
     28  * Use is subject to license terms.
     29  */
     30 
     31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     32 
     33 /*
     34  *	cscope - interactive C symbol or text cross-reference
     35  *
     36  *	text searching functions
     37  */
     38 
     39 #include <fcntl.h>
     40 #include <setjmp.h>
     41 #include <stdio.h>
     42 #include <string.h>
     43 #include <memory.h>
     44 #include <ctype.h>
     45 #include <unistd.h>
     46 #include "library.h"
     47 
     48 typedef enum {
     49 	NO = 0,
     50 	YES = 1
     51 } BOOL;
     52 
     53 typedef struct re_bm {
     54 	int delta0[256];
     55 	int *delta2;
     56 	uchar_t *cmap;
     57 	uchar_t *bmpat;
     58 	int patlen;
     59 } re_bm;
     60 
     61 typedef struct Link {
     62 	uchar_t lit;
     63 	struct Node *node;
     64 	struct Link *next;
     65 } Link;
     66 
     67 typedef struct Node {
     68 	short out;
     69 	short d;
     70 	short shift1;
     71 	short shift2;
     72 	long id;
     73 	struct Node **tab;
     74 	Link *alts;
     75 	struct Node *fail;
     76 } Node;
     77 
     78 typedef struct re_cw {
     79 	int maxdepth, mindepth;
     80 	long nodeid;
     81 	int step[256];
     82 	uchar_t *cmap;
     83 	struct Node *root;
     84 } re_cw;
     85 
     86 typedef enum {
     87 	/* lit expression types */
     88 	Literal, Dot, Charclass, EOP,
     89 
     90 	/* non-lit expression types */
     91 	Cat, Alternate, Star, Plus, Quest,
     92 
     93 	/* not really expression types, just helping */
     94 	Lpar, Rpar, Backslash
     95 
     96 } Exprtype;
     97 
     98 typedef int ID;
     99 
    100 typedef struct Expr {
    101 	Exprtype type;	/* Type of expression (in grammar) */
    102 	ID id;		/* unique ID of lit expression  */
    103 	int lit;	/* Literal character or tag */
    104 	int flen;	/* Number of following lit expressions */
    105 	ID *follow;	/* Array of IDs of following lit expressions */
    106 	struct Expr *l; /* pointer to Left child (or ccl count) */
    107 	struct Expr *r; /* pointer to Right child (or ccl mask) */
    108 	struct Expr *parent; /* pointer to Parent */
    109 } Expr;
    110 
    111 typedef struct State {
    112 	struct State *tab[256];
    113 	int cnt; /* number of matched chars on full match, -1 otherwise  */
    114 	int npos;	/* number of IDs in position set for this state. */
    115 	int pos;	/* index into posbase for beginning of IDs */
    116 } State;
    117 
    118 typedef struct FID {
    119 	ID	id;	/* Lit Expression id */
    120 	int	fcount; /* Number of Lit exp matches before this one. */
    121 } FID;
    122 
    123 typedef struct Positionset {
    124 	int count;	/* Number of lit exps in position set */
    125 	ID last;	/* ID of last lit exp in position set */
    126 	FID *base;	/* array of MAXID FIDS */
    127 			/* 0 means not in position set */
    128 			/* -1 means first in position set */
    129 			/* n (>0) is ID of prev member of position set. */
    130 } Positionset;
    131 
    132 typedef struct re_re {
    133 	FID  *posbase;	/* Array of IDs from all states */
    134 	int nposalloc;	/* Allocated size of posbase */
    135 	int posnext;	/* Index into free space in posbase */
    136 	int posreset;	/* Index into end of IDs for initial state in posbase */
    137 	int maxid;	/* Number of (also maximum ID of) lit expressions */
    138 	Expr *root;	/* Pointer to root (EOP) expression */
    139 	Expr **ptr;	/* Pointer to array of ptrs to lit expressions. */
    140 	uchar_t *cmap;	/* Character mapping array */
    141 	Positionset firstpos;
    142 	Positionset tmp;
    143 	int nstates;	/* Number of current states defined */
    144 	int statelim;	/* Limit on number of states before flushing */
    145 	State *states;	/* Array of states */
    146 	State istate;	/* Initial state */
    147 } re_re;
    148 
    149 typedef struct {
    150 	uchar_t *cmap;
    151 	re_re *re_ptr;
    152 	re_bm *bm_ptr;
    153 	re_cw *cw_ptr;
    154 	BOOL fullmatch;
    155 	BOOL (*procfn)();
    156 	BOOL (*succfn)();
    157 	uchar_t *loc1;
    158 	uchar_t *loc2;
    159 	uchar_t *expression;
    160 } PATTERN;
    161 
    162 typedef enum {
    163 	BEGIN,		/* File is not yet in buffer at all */
    164 	MORE,		/* File is partly in buffer */
    165 	NO_MORE		/* File has been completely read into buffer */
    166 } FILE_STAT;
    167 
    168 typedef struct {
    169 	uchar_t	*prntbuf; /* current line of input from data file */
    170 	uchar_t	*newline; /* end of line (real or sentinel \n) */
    171 	long	ln;	/* line number */
    172 } LINE;
    173 
    174 #define	NL '\n'
    175 
    176 #define	CCL_SIZ		32
    177 #define	CCL_SET(a, c)	((a)[(c) >> 3] |= bittab[(c) & 07])
    178 #define	CCL_CLR(a, c)	((a)[(c) >> 3] &= ~bittab[(c) & 07])
    179 #define	CCL_CHK(a, c)	((a)[(c) >> 3] & bittab[(c) & 07])
    180 
    181 #define	ESIZE (BUFSIZ)
    182 #define	MAXBUFSIZE (64*BUFSIZ)
    183 
    184 #define	MAXMALLOCS	1024
    185 #define	MAXLIT	256	/* is plenty big enough */
    186 #define	LARGE	MAXBUFSIZE+ESIZE+2
    187 
    188 #define	CLEAR(r, rps)	(void) memset((char *)(rps)->base, 0, \
    189 			    (int)((r)->maxid * sizeof (FID))), \
    190 			    (rps)->count = 0, (rps)->last = -1
    191 #define	SET(rps, n, cnt) { \
    192 	if ((rps)->base[n].id == 0) {\
    193 		(rps)->count++;\
    194 		(rps)->base[n].fcount = (cnt);\
    195 		(rps)->base[n].id = (rps)->last;\
    196 		(rps)->last = (n);\
    197 	} else if ((cnt) > (rps)->base[n].fcount) {\
    198 		(rps)->base[n].fcount = (cnt);\
    199 	}}
    200 
    201 #define	START	{ _p = tmp; }
    202 #define	ADDL(c)	{ if (_p >= &tmp[MAXLIT]) _p--; *_p++ = c; }
    203 #define	FINISH	{ ADDL(0) if ((_p-tmp) > bestlen) \
    204 		    (void) memmove(best, tmp, bestlen = _p-tmp); }
    205 
    206 
    207 #define	NEW(N)	(froot?(t = froot, froot = froot->next, t->next = NULL, \
    208 		    t->node = N, t): newlink(0, N))
    209 #define	ADD(N)	if (qtail) qtail = qtail->next = NEW(N); \
    210 			else qtail = qhead = NEW(N)
    211 #define	DEL()	{ Link *_l = qhead; if ((qhead = qhead->next) == NULL) \
    212 			qtail = NULL; _l->next = froot; froot = _l; }
    213 
    214 static uchar_t	*buffer;
    215 static uchar_t	*bufend;
    216 static FILE	*output;
    217 static char	*format;
    218 static char	*file;
    219 static int	file_desc;
    220 static FILE_STAT file_stat;
    221 static PATTERN	match_pattern;
    222 static uchar_t	char_map[2][256];
    223 static int	iflag;
    224 static Exprtype	toktype;
    225 static int	toklit, parno, maxid;
    226 static uchar_t	tmp[MAXLIT], best[MAXLIT];
    227 static uchar_t	*_p;
    228 static int	bestlen;
    229 static Node	*next_node;
    230 static Link	*froot, *next_link;
    231 static jmp_buf	env;
    232 
    233 static int nmalloc;
    234 static uchar_t	*mallocs[MAXMALLOCS];
    235 
    236 static uchar_t	bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
    237 
    238 #ifdef	DEBUG
    239 #define		TRACE(n)	(n < debug)
    240 #define		EPRINTSIZE	50000
    241 static void spr(int c, int *p, uchar_t *buf);
    242 void epr(Expr *e, uchar_t *res);
    243 static int debug = 12;
    244 #endif
    245 
    246 static void init_file(LINE *cur_ptr);
    247 static void get_line(LINE *cur_ptr, uchar_t *s);
    248 static void get_ncount(LINE *cur_ptr, uchar_t *s);
    249 static int execute(void);
    250 static State *startstate(re_re *r);
    251 static State *stateof(re_re *r, Positionset *ps);
    252 static State *nextstate(re_re *r, State *s, int a);
    253 static State *addstate(re_re *r, Positionset *ps, int cnt);
    254 static BOOL match(Expr *e, int a);
    255 static BOOL first_lit(Positionset *fpos, Expr *e);
    256 static void eptr(re_re *r, Expr *e);
    257 static void efollow(re_re *r, Positionset *fpos, Expr *e);
    258 static void follow(Positionset *fpos, Expr *e);
    259 static void followstate(re_re *r, State *s, int a, Positionset *fpos);
    260 static Expr *eall(re_re *r, PATTERN *pat);
    261 static Expr *d0(re_re *r, PATTERN *pat);
    262 static Expr *d1(re_re *r, PATTERN *pat);
    263 static Expr *d2(re_re *r, PATTERN *pat);
    264 static Expr *d3(re_re *r, PATTERN *pat);
    265 static Expr *newexpr(Exprtype t, int lit, Expr *left, Expr *right);
    266 static void lex(re_re *r, PATTERN *pat);
    267 static int re_lit(PATTERN *pat, uchar_t **b, uchar_t **e);
    268 static void traverse(PATTERN *pat, Expr *e);
    269 static int ccl(PATTERN *pat, uchar_t *tab);
    270 static BOOL altlist(), word();
    271 static BOOL altlist(Expr *e, uchar_t *buf, re_cw *pat);
    272 static Node *newnode(re_cw *c, int d);
    273 static Link *newlink(uchar_t lit, Node *n);
    274 static void fail(Node *root);
    275 static void zeroroot(Node *root, Node *n);
    276 static void shift(re_cw *c);
    277 static void shifttab(Node *n);
    278 static void shiftprop(re_cw *c, Node *n);
    279 static void delta_2(re_bm *b);
    280 static int getstate(re_re *r, Positionset *ps);
    281 static void savestate(re_re *r);
    282 static void stateinit(re_re *r);
    283 static re_bm *re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap);
    284 static re_cw *re_cwinit(uchar_t *cmap);
    285 static re_cw *re_recw(re_re *r, uchar_t *map);
    286 static re_re *egprep(PATTERN *pat);
    287 static void re_cwadd(re_cw *c, uchar_t *s, uchar_t *e);
    288 static void re_cwcomp(re_cw *c);
    289 static void eginit(re_re *r);
    290 static BOOL re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb,
    291     uchar_t **me);
    292 static BOOL re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb,
    293     uchar_t **me);
    294 static BOOL re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb,
    295     uchar_t **me);
    296 static uchar_t *egmalloc(size_t n);
    297 static void fgetfile(LINE *cur_ptr);
    298 static void dogre(PATTERN *pat);
    299 static BOOL pattern_match(PATTERN *pat, LINE *lptr);
    300 static BOOL fixloc(uchar_t **mb, uchar_t **me);
    301 static BOOL grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me);
    302 static void err(char *s);
    303 
    304 static char *message;
    305 
    306 void
    307 egrepcaseless(int i)
    308 {
    309 	iflag = i;	/* simulate "egrep -i" */
    310 }
    311 
    312 char *
    313 egrepinit(char *expression)
    314 {
    315 	static int firsttime = 1;
    316 	int i;
    317 
    318 	if (firsttime) {
    319 		firsttime = 0;
    320 		for (i = 0; i < 256; i++) {
    321 			char_map[0][i] = (uchar_t)i;
    322 			char_map[1][i] = tolower(i);
    323 		}
    324 	}
    325 	for (i = 0; i < nmalloc; i ++)
    326 		free(mallocs[i]);
    327 	nmalloc = 0;
    328 	message = NULL;
    329 
    330 	match_pattern.expression = (uchar_t *)expression;
    331 	match_pattern.cmap = char_map[iflag];
    332 	if (setjmp(env) == 0) {
    333 		dogre(&match_pattern);
    334 #ifdef	DEBUG
    335 		{
    336 		PATTERN *p = match_pattern;
    337 		if (p->procfn == re_bmexec)
    338 			if (!p->fullmatch)
    339 				if (p->succfn == re_reexec)
    340 					printf("PARTIAL BOYER_MOORE\n");
    341 				else
    342 					printf("PARTIAL B_M with GREP\n");
    343 			else
    344 				printf("FULL BOYER_MOORE\n");
    345 		else if (p->procfn == re_cwexec)
    346 			printf("C_W\n");
    347 		else
    348 			printf("GENERAL\n");
    349 		}
    350 #endif
    351 	}
    352 	return (message);
    353 }
    354 
    355 static void
    356 dogre(PATTERN *pat)
    357 {
    358 	uchar_t *lb, *le;
    359 
    360 #ifdef	DEBUG
    361 	printf("PATTERN %s\n", pat->expression);
    362 #endif
    363 	pat->re_ptr = egprep(pat);
    364 	bestlen = re_lit(pat, &lb, &le);
    365 
    366 	if (bestlen && pat->fullmatch) { /* Full Boyer Moore */
    367 #ifdef	DEBUG
    368 		printf("BESTLEN %d\n", bestlen);
    369 		{
    370 			uchar_t *p;
    371 			for (p = lb; p < le; p++) printf("%c", *p);
    372 			printf("\n");
    373 		}
    374 #endif
    375 		pat->bm_ptr = re_bmcomp(lb, le, pat->cmap);
    376 		pat->procfn = re_bmexec;
    377 		return;
    378 	}
    379 	if (bestlen > 1) {
    380 			/* Partial Boyer Moore */
    381 		pat->bm_ptr = re_bmcomp(lb, le, pat->cmap);
    382 		pat->procfn = re_bmexec;
    383 		pat->fullmatch = NO;
    384 	} else {
    385 		pat->fullmatch = YES;
    386 		if ((pat->cw_ptr = re_recw(pat->re_ptr, pat->cmap)) != NULL) {
    387 			pat->procfn = re_cwexec; /* CW */
    388 			return;
    389 		}
    390 	}
    391 	/* general egrep regular expression */
    392 	pat->succfn = re_reexec;
    393 
    394 	if (pat->fullmatch) {
    395 		pat->procfn = pat->succfn;
    396 		pat->succfn = NULL;
    397 	}
    398 }
    399 
    400 static BOOL
    401 fixloc(uchar_t **mb, uchar_t **me)
    402 {
    403 	/* Handle match to null string */
    404 
    405 	while (*me <= *mb)
    406 		(*me)++;
    407 
    408 	if (*(*me - 1) != NL)
    409 		while (**me != NL)
    410 			(*me)++;
    411 
    412 	/* Handle match to new-line only */
    413 
    414 	if (*mb == *me - 1 && **mb == NL) {
    415 		(*me)++;
    416 	}
    417 
    418 	/* Handle match including beginning or ending new-line */
    419 
    420 	if (**mb == NL)
    421 		(*mb)++;
    422 	if (*(*me - 1) == NL)
    423 		(*me)--;
    424 	return (YES);
    425 }
    426 
    427 static BOOL
    428 grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me)
    429 {
    430 	uchar_t *s, *f;
    431 
    432 	if (pat->fullmatch)
    433 		return (fixloc(mb, me));
    434 
    435 	for (f = *me - 1; *f != NL; f++) {
    436 	}
    437 	f++;
    438 	for (s = *mb; *s != NL; s--) {
    439 	}
    440 
    441 	if ((*pat->succfn)(pat, s, f, mb, me)) {
    442 		return (YES);
    443 	} else {
    444 		*mb = f;
    445 		return (NO);
    446 	}
    447 }
    448 
    449 static void
    450 eginit(re_re *r)
    451 {
    452 	unsigned int n;
    453 
    454 	r->ptr = (Expr **)egmalloc(r->maxid * sizeof (Expr *));
    455 	eptr(r, r->root);
    456 	n = r->maxid * sizeof (FID);
    457 	r->firstpos.base = (FID *)egmalloc(n);
    458 	r->tmp.base = (FID *)egmalloc(n);
    459 	CLEAR(r, &r->firstpos);
    460 	if (!first_lit(&r->firstpos, r->root->l)) {
    461 		/*
    462 		 * This expression matches the null string!!!!
    463 		 * Add EOP to beginning position set.
    464 		 */
    465 		SET(&r->firstpos, r->root->id, 0)
    466 		/* (void) printf("first of root->l == 0, b=%s\n", b); */
    467 	}
    468 	stateinit(r);
    469 	(void) addstate(r, &r->firstpos, 0);
    470 	savestate(r);
    471 }
    472 
    473 static void
    474 eptr(re_re *r, Expr *e)
    475 {
    476 	if ((e->id < 0) || (e->id >= r->maxid)) {
    477 		err("internal error");
    478 	}
    479 	r->ptr[e->id] = e;
    480 	if (e->type != Charclass) {
    481 		if (e->l) eptr(r, e->l);
    482 		if (e->r) eptr(r, e->r);
    483 	}
    484 }
    485 
    486 static BOOL
    487 re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, uchar_t **me)
    488 {
    489 	re_re *r = pat->re_ptr;
    490 	State *s, *t;
    491 
    492 #ifdef	DEBUG
    493 	if (TRACE(10)) {
    494 		uchar_t buf[EPRINTSIZE];
    495 		epr(r->root, buf);
    496 		(void) printf("expr='%s'\n", buf);
    497 	}
    498 #endif
    499 	s = startstate(r);
    500 
    501 	for (;;) {
    502 		uchar_t c;
    503 
    504 		if (s->cnt >= 0) {
    505 #ifdef	DEBUG
    506 			if (TRACE(6))
    507 				(void) printf("match at input '%s'\n", b);
    508 #endif
    509 			*mb = b - s->cnt;
    510 			*me = b;
    511 			if (fixloc(mb, me))
    512 				return (YES);
    513 		}
    514 
    515 		if (b >= e) break;
    516 		c = pat->cmap[*b];
    517 #ifdef	DEBUG
    518 		if (TRACE(4))
    519 			(void) printf("state %d: char '%c'\n", s-r->states, *b);
    520 #endif
    521 		if ((t = s->tab[c]) != NULL) s = t;
    522 		else s = nextstate(r, s, (int)c);
    523 		b++;
    524 	}
    525 #ifdef	DEBUG
    526 	if (TRACE(3)) {
    527 		uchar_t buf[EPRINTSIZE];
    528 
    529 		epr(r->root, buf);
    530 		(void) printf("pat = %s\n", buf);
    531 	}
    532 #endif
    533 	return (NO);
    534 }
    535 
    536 static BOOL
    537 match(Expr *e, int a)
    538 {
    539 	switch (e->type) {
    540 	case Dot:	return ((BOOL)(a != NL));
    541 	case Literal:	return ((BOOL)(a == e->lit));
    542 	case Charclass:	return ((BOOL)(CCL_CHK((uchar_t *)e->r, a)));
    543 	default:	return (NO);
    544 	}
    545 }
    546 
    547 /*
    548  * generates the followset for a node in fpos
    549  */
    550 static void
    551 follow(Positionset *fpos, Expr *e)
    552 {
    553 	Expr *p;
    554 
    555 	if (e->type == EOP)
    556 		return;
    557 	else
    558 		p = e->parent;
    559 	switch (p->type) {
    560 	case EOP:
    561 		SET(fpos, p->id, 0)
    562 		break;
    563 	case Plus:
    564 	case Star:
    565 		(void) first_lit(fpos, e);
    566 		follow(fpos, p);
    567 		break;
    568 	case Quest:
    569 	case Alternate:
    570 		follow(fpos, p);
    571 		break;
    572 	case Cat:
    573 		if (e == p->r || !first_lit(fpos, p->r))
    574 			follow(fpos, p);
    575 		break;
    576 	default:
    577 		break;
    578 	}
    579 }
    580 
    581 /*
    582  * first_lit returns NO if e is nullable and in the process,
    583  * ets up fpos.
    584  */
    585 static BOOL
    586 first_lit(Positionset *fpos, Expr *e)
    587 {
    588 	BOOL k;
    589 
    590 	switch (e->type) {
    591 	case Literal:
    592 	case Dot:
    593 	case Charclass:
    594 		SET(fpos, e->id, 0)
    595 		return (YES);
    596 	case EOP:
    597 	case Star:
    598 	case Quest:
    599 		(void) first_lit(fpos, e->l);
    600 		return (NO);
    601 	case Plus:
    602 		return (first_lit(fpos, e->l));
    603 	case Cat:
    604 		return ((BOOL)(first_lit(fpos, e->l) || first_lit(fpos, e->r)));
    605 	case Alternate:
    606 		k = first_lit(fpos, e->r);
    607 		return ((BOOL)(first_lit(fpos, e->l) && k));
    608 	default:
    609 		err("internal error");
    610 	}
    611 	return (NO);
    612 }
    613 
    614 static void
    615 efollow(re_re *r, Positionset *fpos, Expr *e)
    616 {
    617 	ID i, *p;
    618 
    619 	CLEAR(r, fpos);
    620 	follow(fpos, e);
    621 	e->flen = fpos->count;
    622 	e->follow = (ID *)egmalloc(e->flen * sizeof (ID));
    623 	p = e->follow;
    624 #ifdef	DEBUG
    625 	printf("ID = %d LIT %c FLEN = %d\n", e->id, e->lit, e->flen);
    626 #endif
    627 	for (i = fpos->last; i > 0; i = fpos->base[i].id) {
    628 		*p++ = i;
    629 #ifdef	DEBUG
    630 	printf("FOLLOW ID = %d LIT %c\n", r->ptr[i]->id, r->ptr[i]->lit);
    631 #endif
    632 	}
    633 	if (p != e->follow + e->flen) {
    634 		err("internal error");
    635 	}
    636 }
    637 
    638 static State *
    639 addstate(re_re *r, Positionset *ps, int cnt)
    640 {
    641 	ID j;
    642 	FID *p, *q;
    643 	State *s;
    644 
    645 	if (cnt) {
    646 		s = r->states + getstate(r, ps);
    647 		(void) memset((char *)s->tab, 0, sizeof (s->tab));
    648 		s->cnt = r->istate.cnt;
    649 	} else {
    650 		s = &r->istate;
    651 		s->cnt = -1;
    652 	}
    653 	s->pos = r->posnext;
    654 	r->posnext += ps->count;
    655 	s->npos = ps->count;
    656 	p = r->posbase + s->pos;
    657 	for (j = ps->last; j > 0; p++, j = q->id) {
    658 		q = &ps->base[j];
    659 		p->id = j;
    660 		p->fcount = q->fcount;
    661 		if (p->id == r->root->id && s->cnt < p->fcount)
    662 			s->cnt = p->fcount;
    663 	}
    664 #ifdef	DEBUG
    665 	if (TRACE(3)) {
    666 		uchar_t buf[2000];
    667 		spr(s->npos, s->pos+r->posbase, buf);
    668 		(void) printf("new state[%d] %s%s\n", s-r->states, buf,
    669 		    s->cnt?" cnt":"");
    670 	}
    671 #endif
    672 	return (s);
    673 }
    674 
    675 static State *
    676 nextstate(re_re *r, State *s, int a)
    677 {
    678 	State *news;
    679 
    680 	CLEAR(r, &r->tmp);
    681 	followstate(r, s, a, &r->tmp);
    682 	if (s != &r->istate) followstate(r, &r->istate, a, &r->tmp);
    683 
    684 #ifdef	DEBUG
    685 	if (TRACE(5)) {
    686 		uchar_t buf[2000];
    687 		ppr(&r->tmp, buf);
    688 		(void) printf("nextstate(%d, '%c'): found %s\n", s-r->states,
    689 		    a, buf);
    690 	}
    691 #endif
    692 	if (r->tmp.count == 0)
    693 		news = &r->istate;
    694 	else if ((news = stateof(r, &r->tmp)) == NULL)
    695 		news = addstate(r, &r->tmp, 1);
    696 	s->tab[a] = news;
    697 #ifdef	DEBUG
    698 	if (TRACE(5)) {
    699 		(void) printf("nextstate(%d, '%c'): returning %ld\n",
    700 		    s-r->states, a, news);
    701 	}
    702 #endif
    703 	return (news);
    704 }
    705 
    706 static void
    707 followstate(re_re *r, State *s, int a, Positionset *fpos)
    708 {
    709 	int j;
    710 	ID *q, *eq;
    711 	Expr *e;
    712 
    713 	for (j = s->pos; j < (s->pos + s->npos); j++) {
    714 		e = r->ptr[r->posbase[j].id];
    715 		if (e->type == EOP) continue;
    716 		if (match(e, a)) {
    717 			if (e->follow == NULL) efollow(r, &r->firstpos, e);
    718 			for (q = e->follow, eq = q + e->flen; q < eq; q++) {
    719 				SET(fpos, *q, r->posbase[j].fcount + 1)
    720 #ifdef	DEBUG
    721 				printf("CHAR %c FC %c COUNT %d\n",
    722 					a,
    723 					r->ptr[*q]->lit,
    724 					r->posbase[j].fcount+1);
    725 #endif
    726 			}
    727 		}
    728 	}
    729 }
    730 
    731 static uchar_t *
    732 egmalloc(size_t n)
    733 {
    734 	uchar_t *x;
    735 
    736 	x = (uchar_t *)mymalloc(n);
    737 	mallocs[nmalloc++] = x;
    738 	if (nmalloc >= MAXMALLOCS)
    739 		nmalloc = MAXMALLOCS - 1;
    740 	return (x);
    741 }
    742 
    743 #ifdef	DEBUG
    744 void
    745 ppr(Positionse *ps, char *p)
    746 {
    747 	ID n;
    748 
    749 	if (ps->count < 1) {
    750 		(void) sprintf(p, "{}");
    751 		return;
    752 	}
    753 	*p++ = '{';
    754 	for (n = ps->last; n > 0; n = ps->base[n].id) {
    755 		(void) sprintf(p, "%d,", n);
    756 		p = strchr(p, 0);
    757 	}
    758 	p[-1] = '}';
    759 }
    760 
    761 void
    762 epr(Expr *e, uchar_t *res)
    763 {
    764 	uchar_t r1[EPRINTSIZE], r2[EPRINTSIZE];
    765 	int i;
    766 
    767 	if (e == NULL) {
    768 		(void) sprintf(res, "!0!");
    769 		return;
    770 	}
    771 	switch (e->type) {
    772 	case Dot:
    773 	case Literal:
    774 		spr(e->flen, e->follow, r1);
    775 		(void) sprintf(res, "%c%s", e->lit, r1);
    776 		break;
    777 	case Charclass:
    778 		*res++ = '[';
    779 		for (i = 0; i < 256; i++)
    780 			if (CCL_CHK((uchar_t *)e->r, i)) {
    781 				*res++ = i;
    782 			}
    783 		*res++ = ']';
    784 		*res = '\0';
    785 		break;
    786 	case Cat:
    787 		epr(e->l, r1);
    788 		epr(e->r, r2);
    789 		(void) sprintf(res, "%s%s", r1, r2);
    790 		break;
    791 	case Alternate:
    792 		epr(e->l, r1);
    793 		epr(e->r, r2);
    794 		(void) sprintf(res, "(%s|%s)", r1, r2);
    795 		break;
    796 	case Star:
    797 		epr(e->l, r1);
    798 		(void) sprintf(res, "(%s)*", r1);
    799 		break;
    800 	case Plus:
    801 		epr(e->l, r1);
    802 		(void) sprintf(res, "(%s)+", r1);
    803 		break;
    804 	case Quest:
    805 		epr(e->l, r1);
    806 		(void) sprintf(res, "(%s)?", r1);
    807 		break;
    808 	case EOP:
    809 		epr(e->l, r1);
    810 		(void) sprintf(res, "%s<EOP>", r1);
    811 		break;
    812 	default:
    813 		(void) sprintf(res, "<undef type %d>", e->type);
    814 		err(res);
    815 		break;
    816 	}
    817 }
    818 
    819 static void
    820 spr(int c, int *p, uchar_t *buf)
    821 {
    822 	if (c > 0) {
    823 		*buf++ = '{';
    824 		*buf = '\0';
    825 		while (--c > 0) {
    826 			(void) sprintf(buf, "%d,", *p++);
    827 			buf = strchr(buf, 0);
    828 		}
    829 		(void) sprintf(buf, "%d}", *p);
    830 	} else
    831 		(void) sprintf(buf, "{}");
    832 }
    833 #endif
    834 
    835 static void
    836 stateinit(re_re *r)
    837 {
    838 	/* CONSTANTCONDITION */
    839 	r->statelim = (sizeof (int) < 4 ? 32 : 128);
    840 	r->states = (State *)egmalloc(r->statelim * sizeof (State));
    841 
    842 	/* CONSTANTCONDITION */
    843 	r->nposalloc = (sizeof (int) < 4 ? 2048 : 8192);
    844 	r->posbase = (FID *)egmalloc(r->nposalloc * sizeof (FID));
    845 	r->nstates = r->posnext = 0;
    846 }
    847 
    848 static void
    849 clrstates(re_re *r)
    850 {
    851 	r->nstates = 0;		/* reclaim space for states and positions */
    852 	r->posnext = r->posreset;
    853 	(void) memset((char *)r->istate.tab, 0, sizeof (r->istate.tab));
    854 }
    855 
    856 static void
    857 savestate(re_re *r)
    858 {
    859 	r->posreset = r->posnext;	/* save for reset */
    860 }
    861 
    862 static State *
    863 startstate(re_re *r)
    864 {
    865 	return (&r->istate);
    866 }
    867 
    868 static int
    869 getstate(re_re *r, Positionset *ps)
    870 {
    871 	if (r->nstates >= r->statelim ||
    872 	    r->posnext + ps->count >= r->nposalloc) {
    873 		clrstates(r);
    874 #ifdef	DEBUG
    875 		printf("%d STATES FLUSHED\n", r->statelim);
    876 #endif
    877 	}
    878 	return (r->nstates++);
    879 }
    880 
    881 static State *
    882 stateof(re_re *r, Positionset *ps)
    883 {
    884 	State *s;
    885 	int i;
    886 	FID *p, *e;
    887 
    888 	for (i = 0, s = r->states; i < r->nstates; i++, s++) {
    889 		if (s->npos == ps->count) {
    890 			for (p = s->pos+r->posbase, e = p+s->npos; p < e; p++)
    891 				if (ps->base[p->id].id == 0 ||
    892 					ps->base[p->id].fcount != p->fcount)
    893 						goto next;
    894 			return (s);
    895 		}
    896 	next:;
    897 	}
    898 	return (NULL);
    899 }
    900 
    901 static re_re *
    902 egprep(PATTERN *pat)
    903 {
    904 	re_re *r;
    905 
    906 	r = (re_re *)egmalloc(sizeof (re_re));
    907 	(void) memset((char *)r, 0, sizeof (re_re));
    908 
    909 	pat->loc1 = pat->expression;
    910 	pat->loc2 = pat->expression + strlen((char *)pat->expression);
    911 
    912 	parno = 0;
    913 	maxid = 1;
    914 	r->cmap = pat->cmap;
    915 	lex(r, pat);
    916 	r->root = newexpr(EOP, '#', eall(r, pat), (Expr *)NULL);
    917 	r->maxid = maxid;
    918 
    919 	eginit(r);
    920 	return (r);
    921 }
    922 
    923 static Expr *
    924 newexpr(Exprtype t, int lit, Expr *left, Expr *right)
    925 {
    926 	Expr *e = (Expr *)egmalloc(sizeof (Expr));
    927 
    928 	e->type = t;
    929 	e->parent = NULL;
    930 	e->lit = lit;
    931 
    932 	if (e->lit) e->id = maxid++;
    933 	else e->id = 0;
    934 
    935 	if ((e->l = left) != NULL) {
    936 		left->parent = e;
    937 	}
    938 	if ((e->r = right) != NULL) {
    939 		right->parent = e;
    940 	}
    941 	e->follow = NULL;
    942 	e->flen = 0;
    943 	return (e);
    944 }
    945 
    946 static void
    947 lex(re_re *r, PATTERN *pat)
    948 {
    949 	if (pat->loc1 == pat->loc2) {
    950 		toktype = EOP;
    951 		toklit = -1;
    952 	} else switch (toklit = *pat->loc1++) {
    953 	case '.':	toktype = Dot; break;
    954 	case '*':	toktype = Star; break;
    955 	case '+':	toktype = Plus; break;
    956 	case '?':	toktype = Quest; break;
    957 	case '[':	toktype = Charclass; break;
    958 	case '|':	toktype = Alternate; break;
    959 	case '(':	toktype = Lpar; break;
    960 	case ')':	toktype = Rpar; break;
    961 	case '\\':	toktype = Backslash;
    962 			if (pat->loc1 == pat->loc2) {
    963 				err("syntax error - missing character "
    964 				    "after \\");
    965 			} else
    966 			    toklit = r->cmap[*pat->loc1++];
    967 			break;
    968 	case '^': case '$':	toktype = Literal; toklit = NL; break;
    969 	default:	toktype = Literal; toklit = r->cmap[toklit]; break;
    970 	}
    971 }
    972 
    973 static int
    974 ccl(PATTERN *pat, uchar_t *tab)
    975 {
    976 	int i;
    977 	int range = 0;
    978 	int lastc = -1;
    979 	int count = 0;
    980 	BOOL comp = NO;
    981 
    982 	(void) memset((char *)tab, 0, CCL_SIZ * sizeof (uchar_t));
    983 	if (*pat->loc1 == '^') {
    984 		pat->loc1++;
    985 		comp = YES;
    986 	}
    987 	if (*pat->loc1 == ']') {
    988 		uchar_t c = pat->cmap[*pat->loc1];
    989 		CCL_SET(tab, c);
    990 		lastc = *pat->loc1++;
    991 	}
    992 	/* scan for chars */
    993 	for (; (pat->loc1 < pat->loc2) && (*pat->loc1 != ']');
    994 							pat->loc1++) {
    995 		if (*pat->loc1 == '-') {
    996 			if (lastc < 0) CCL_SET(tab, pat->cmap['-']);
    997 			else range = 1;
    998 			continue;
    999 		}
   1000 		if (range) {
   1001 			for (i = *pat->loc1; i >= lastc; i--) {
   1002 				CCL_SET(tab, pat->cmap[i]);
   1003 			}
   1004 		} else {
   1005 			uchar_t c = pat->cmap[*pat->loc1];
   1006 			CCL_SET(tab, c);
   1007 		}
   1008 
   1009 		range = 0;
   1010 
   1011 		lastc = *pat->loc1;
   1012 	}
   1013 	if (range) CCL_SET(tab, pat->cmap['-']);
   1014 
   1015 	if (pat->loc1 < pat->loc2) pat->loc1++;
   1016 	else err("syntax error - missing ]");
   1017 
   1018 	if (comp) {
   1019 		CCL_SET(tab, pat->cmap[NL]);
   1020 		for (i = 0; i < CCL_SIZ; i++) tab[i] ^= 0xff;
   1021 	}
   1022 	for (i = 0; i < 256; i++) {
   1023 		if (pat->cmap[i] != i) CCL_CLR(tab, i);
   1024 		if (CCL_CHK(tab, i)) {
   1025 			lastc = i;
   1026 			count++;
   1027 		}
   1028 	}
   1029 	if (count == 1)
   1030 		*tab = (char)lastc;
   1031 	return (count);
   1032 }
   1033 
   1034 /*
   1035  * egrep patterns:
   1036  *
   1037  * Alternation:	d0:	d1 { '|' d1 }*
   1038  * Concatenation:	d1:	d2 { d2 }*
   1039  * Repetition:	d2:	d3 { '*' | '?' | '+' }
   1040  * Literal:	d3:	lit | '.' | '[]' | '(' d0 ')'
   1041  */
   1042 
   1043 static Expr *
   1044 d3(re_re *r, PATTERN *pat)
   1045 {
   1046 	Expr *e;
   1047 	uchar_t *tab;
   1048 	int count;
   1049 
   1050 	switch (toktype) {
   1051 	case Backslash:
   1052 	case Literal:
   1053 		e = newexpr(Literal, toklit, (Expr *)NULL, (Expr *)NULL);
   1054 		lex(r, pat);
   1055 		break;
   1056 	case Dot:
   1057 		e = newexpr(Dot, '.', (Expr *)NULL, (Expr *)NULL);
   1058 		lex(r, pat);