Home | History | Annotate | Download | only in cscope-fast
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 /*	Copyright (c) 1988 AT&T	*/
     23 /*	  All Rights Reserved  	*/
     24 
     25 
     26 /*
     27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
     28  * Use is subject to license terms.
     29  */
     30 
     31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     32 
     33 #include <ctype.h>
     34 #include <stdio.h>
     35 #include <sys/types.h>
     36 #include <sys/sysmacros.h>
     37 #include <sys/byteorder.h>
     38 #if SHARE
     39 #include <sys/ipc.h>
     40 #include <sys/shm.h>
     41 #define	ERR  -1
     42 #endif
     43 #include "invlib.h"
     44 #include "library.h"
     45 
     46 #define	DEBUG		0	/* debugging code and realloc messages */
     47 #define	BLOCKSIZE	2 * BUFSIZ	/* logical block size */
     48 #define	LINEMAX		1000	/* sorted posting line max size */
     49 #define	POSTINC		10000	/* posting buffer size increment */
     50 #define	SEP		' '	/* sorted posting field separator */
     51 #define	SETINC		100	/* posting set size increment */
     52 #define	STATS		0	/* print statistics */
     53 #define	SUPERINC	10000	/* super index size increment */
     54 #define	TERMMAX		512	/* term max size */
     55 #define	VERSION		1	/* inverted index format version */
     56 #define	ZIPFSIZE	200	/* zipf curve size */
     57 #define	FREAD	"r"		/* fopen for reading */
     58 #define	FREADP	"r+"		/* fopen for update */
     59 #define	FWRITE	"w"		/* fopen truncate or create for writing */
     60 #define	FWRITEP	"w+"		/* fopen truncate or create for update */
     61 
     62 extern	char	*argv0;	/* command name (must be set in main function) */
     63 
     64 int	invbreak;
     65 
     66 #if STATS
     67 int	showzipf;	/* show postings per term distribution */
     68 #endif
     69 
     70 static	POSTING	*item, *enditem, *item1 = NULL, *item2 = NULL;
     71 static	unsigned setsize1, setsize2;
     72 static	long	numitems, totterm, zerolong;
     73 static	char	*indexfile, *postingfile;
     74 static	FILE	*outfile, *fpost;
     75 static	unsigned supersize = SUPERINC, supintsize;
     76 static	int	numpost, numlogblk, amtused, nextpost,
     77 		lastinblk, numinvitems;
     78 static	POSTING	*POST, *postptr;
     79 static	unsigned long	*SUPINT, *supint, nextsupfing;
     80 static	char	*SUPFING, *supfing;
     81 static	char	thisterm[TERMMAX];
     82 static	union {
     83 	long	invblk[BLOCKSIZE / sizeof (long)];
     84 	char	chrblk[BLOCKSIZE];
     85 } logicalblk;
     86 
     87 #if DEBUG || STATS
     88 static	long	totpost;
     89 #endif
     90 
     91 #if STATS
     92 static	int	zipf[ZIPFSIZE + 1];
     93 #endif
     94 
     95 static void invcannotalloc(size_t n);
     96 static void invcannotopen(char *file);
     97 static void invcannotwrite(char *file);
     98 static int invnewterm(void);
     99 static int boolready(void);
    100 
    101 long
    102 invmake(char *invname, char *invpost, FILE *infile)
    103 {
    104 	unsigned char	*s;
    105 	long	num;
    106 	int	i;
    107 	long	fileindex;
    108 	unsigned postsize = POSTINC * sizeof (POSTING);
    109 	unsigned long	*intptr;
    110 	char	line[LINEMAX];
    111 	long	tlong;
    112 	PARAM	param;
    113 	POSTING	posting;
    114 #if STATS
    115 	int	j;
    116 	unsigned maxtermlen = 0;
    117 #endif
    118 	/* output file */
    119 	if ((outfile = vpfopen(invname, FWRITEP)) == NULL) {
    120 		invcannotopen(invname);
    121 		return (0);
    122 	}
    123 	indexfile = invname;
    124 	(void) fseek(outfile, (long)BUFSIZ, 0);
    125 
    126 	/* posting file  */
    127 	if ((fpost = vpfopen(invpost, FWRITE)) == NULL) {
    128 		invcannotopen(invpost);
    129 		return (0);
    130 	}
    131 	postingfile = invpost;
    132 	nextpost = 0;
    133 	/* get space for the postings list */
    134 	if ((POST = (POSTING *)malloc(postsize)) == NULL) {
    135 		invcannotalloc(postsize);
    136 		return (0);
    137 	}
    138 	postptr = POST;
    139 	/* get space for the superfinger (superindex) */
    140 	if ((SUPFING = malloc(supersize)) == NULL) {
    141 		invcannotalloc(supersize);
    142 		return (0);
    143 	}
    144 	supfing = SUPFING;
    145 	supintsize = supersize / 40;
    146 	/* also for the superfinger index */
    147 	if ((SUPINT = malloc(supintsize * sizeof (long))) == NULL) {
    148 		invcannotalloc(supintsize * sizeof (long));
    149 		return (0);
    150 	}
    151 	supint = SUPINT;
    152 	supint++; /* leave first term open for a count */
    153 	/* initialize using an empty term */
    154 	(void) strcpy(thisterm, "");
    155 	*supint++ = 0;
    156 	*supfing++ = ' ';
    157 	*supfing++ = '\0';
    158 	nextsupfing = 2;
    159 #if DEBUG || STATS
    160 	totpost = 0L;
    161 #endif
    162 	totterm = 0L;
    163 	numpost = 1;
    164 
    165 	/*
    166 	 * set up as though a block had come and gone, i.e., set up for
    167 	 * new block
    168 	 */
    169 	amtused = 16; /* leave no space - init 3 words + one for luck */
    170 	numinvitems = 0;
    171 	numlogblk = 0;
    172 	lastinblk = BLOCKSIZE;
    173 
    174 	/* now loop as long as more to read (till eof)  */
    175 	while (fgets(line, LINEMAX, infile) != NULL) {
    176 #if DEBUG || STATS
    177 		++totpost;
    178 #endif
    179 		s = (unsigned char *) strchr(line, SEP);
    180 		if (s == NULL)		/* where did this line come from ??? */
    181 			continue;	/* workaround: just skip it */
    182 		*s = '\0';
    183 #if STATS
    184 		if ((i = strlen(line)) > maxtermlen) {
    185 			maxtermlen = i;
    186 		}
    187 #endif
    188 #if DEBUG
    189 		(void) printf("%ld: %s ", totpost, line);
    190 		(void) fflush(stdout);
    191 #endif
    192 		if (strcmp(thisterm, line) == 0) {
    193 			if (postptr + 10 > POST + postsize / sizeof (POSTING)) {
    194 				i = postptr - POST;
    195 				postsize += POSTINC * sizeof (POSTING);
    196 				if ((POST = realloc(POST, postsize)) == NULL) {
    197 					invcannotalloc(postsize);
    198 					return (0);
    199 				}
    200 				postptr = i + POST;
    201 #if DEBUG
    202 				(void) printf("reallocated post space to %u, "
    203 				    "totpost=%ld\n", postsize, totpost);
    204 #endif
    205 			}
    206 			numpost++;
    207 		} else {
    208 			/* have a new term */
    209 			if (!invnewterm()) {
    210 				return (0);
    211 			}
    212 			(void) strcpy(thisterm, line);
    213 			numpost = 1;
    214 			postptr = POST;
    215 			fileindex = 0;
    216 		}
    217 		/* get the new posting */
    218 		num = *++s - '!';
    219 		i = 1;
    220 		do {
    221 			num = BASE * num + *++s - '!';
    222 		} while (++i < PRECISION);
    223 		posting.lineoffset = num;
    224 		while (++fileindex < nsrcoffset && num > srcoffset[fileindex]) {
    225 			;
    226 		}
    227 		posting.fileindex = --fileindex;
    228 		posting.type = *++s;
    229 		num = *++s - '!';
    230 		if (*s != '\n') {
    231 			num = *++s - '!';
    232 			while (*++s != '\n') {
    233 				num = BASE * num + *s - '!';
    234 			}
    235 			posting.fcnoffset = num;
    236 		} else {
    237 			posting.fcnoffset = 0;
    238 		}
    239 		*postptr++ = posting;
    240 #if DEBUG
    241 		(void) printf("%ld %ld %ld %ld\n", posting.fileindex,
    242 		    posting.fcnoffset, posting.lineoffset, posting.type);
    243 		(void) fflush(stdout);
    244 #endif
    245 	}
    246 	if (!invnewterm()) {
    247 		return (0);
    248 	}
    249 	/* now clean up final block  */
    250 	logicalblk.invblk[0] = numinvitems;
    251 	/* loops pointer around to start */
    252 	logicalblk.invblk[1] = 0;
    253 	logicalblk.invblk[2] = numlogblk - 1;
    254 	if (fwrite((char *)&logicalblk, BLOCKSIZE, 1, outfile) == 0) {
    255 		goto cannotwrite;
    256 	}
    257 	numlogblk++;
    258 	/* write out block to save space. what in it doesn't matter */
    259 	if (fwrite((char *)&logicalblk, BLOCKSIZE, 1, outfile) == 0) {
    260 		goto cannotwrite;
    261 	}
    262 	/* finish up the super finger */
    263 	*SUPINT = numlogblk;
    264 	/* add to the offsets the size of the offset pointers */
    265 	intptr = (SUPINT + 1);
    266 	i = (char *)supint - (char *)SUPINT;
    267 	while (intptr < supint)
    268 		*intptr++ += i;
    269 	/* write out the offsets (1 for the N at start) and the super finger */
    270 	if (fwrite((char *)SUPINT, sizeof (*SUPINT), numlogblk + 1,
    271 	    outfile) == 0 ||
    272 	    fwrite(SUPFING, 1, supfing - SUPFING, outfile) == 0) {
    273 		goto cannotwrite;
    274 	}
    275 	/* save the size for reference later */
    276 	nextsupfing = sizeof (long) + sizeof (long) * numlogblk +
    277 	    (supfing - SUPFING);
    278 	/*
    279 	 * make sure the file ends at a logical block boundary.  This is
    280 	 * necessary for invinsert to correctly create extended blocks
    281 	 */
    282 	i = nextsupfing % BLOCKSIZE;
    283 	/* write out junk to fill log blk */
    284 	if (fwrite(SUPFING, BLOCKSIZE - i, 1, outfile) == 0 ||
    285 	    fflush(outfile) == EOF) {
    286 		/* rewind doesn't check for write failure */
    287 		goto cannotwrite;
    288 	}
    289 	/* write the control area */
    290 	rewind(outfile);
    291 	param.version = VERSION;
    292 	param.filestat = 0;
    293 	param.sizeblk = BLOCKSIZE;
    294 	param.startbyte = (numlogblk + 1) * BLOCKSIZE + BUFSIZ;
    295 	param.supsize = nextsupfing;
    296 	param.cntlsize = BUFSIZ;
    297 	param.share = 0;
    298 	if (fwrite((char *)&param, sizeof (param), 1, outfile) == 0) {
    299 		goto cannotwrite;
    300 	}
    301 	for (i = 0; i < 10; i++)	/* for future use */
    302 		if (fwrite((char *)&zerolong, sizeof (zerolong),
    303 		    1, outfile) == 0) {
    304 			goto cannotwrite;
    305 		}
    306 
    307 	/* make first block loop backwards to last block */
    308 	if (fflush(outfile) == EOF) {
    309 		/* fseek doesn't check for write failure */
    310 		goto cannotwrite;
    311 	}
    312 	/* get to second word first block */
    313 	(void) fseek(outfile, (long)BUFSIZ + 8, 0);
    314 	tlong = numlogblk - 1;
    315 	if (fwrite((char *)&tlong, sizeof (tlong), 1, outfile) == 0 ||
    316 	    fclose(outfile) == EOF) {
    317 cannotwrite:
    318 		invcannotwrite(invname);
    319 		return (0);
    320 	}
    321 	if (fclose(fpost) == EOF) {
    322 		invcannotwrite(postingfile);
    323 		return (0);
    324 	}
    325 	--totterm;	/* don't count null term */
    326 #if STATS
    327 	(void) printf("logical blocks = %d, postings = %ld, terms = %ld, "
    328 	    "max term length = %d\n", numlogblk, totpost, totterm, maxtermlen);
    329 	if (showzipf) {
    330 		(void) printf(
    331 		    "\n*************   ZIPF curve  ****************\n");
    332 		for (j = ZIPFSIZE; j > 1; j--)
    333 			if (zipf[j])
    334 				break;
    335 		for (i = 1; i < j; ++i) {
    336 			(void) printf("%3d -%6d ", i, zipf[i]);
    337 			if (i % 6 == 0) (void) putchar('\n');
    338 		}
    339 		(void) printf(">%d-%6d\n", ZIPFSIZE, zipf[0]);
    340 	}
    341 #endif
    342 	/* free all malloc'd memory */
    343 	free(POST);
    344 	free(SUPFING);
    345 	free(SUPINT);
    346 	return (totterm);
    347 }
    348 
    349 /* add a term to the data base */
    350 
    351 static int
    352 invnewterm(void)
    353 {
    354 	int	backupflag, i, j, maxback, holditems, gooditems, howfar;
    355 	int	len, numwilluse, wdlen;
    356 	char	*tptr, *tptr2, *tptr3;
    357 	union {
    358 		unsigned long	packword[2];
    359 		ENTRY		e;
    360 	} iteminfo;
    361 
    362 	totterm++;
    363 #if STATS
    364 	/* keep zipfian info on the distribution */
    365 	if (numpost <= ZIPFSIZE)
    366 		zipf[numpost]++;
    367 	else
    368 		zipf[0]++;
    369 #endif
    370 	len = strlen(thisterm);
    371 	wdlen = (len + (sizeof (long) - 1)) / sizeof (long);
    372 	numwilluse = (wdlen + 3) * sizeof (long);
    373 	/* new block if at least 1 item in block */
    374 	if (numinvitems && numwilluse + amtused > BLOCKSIZE) {
    375 		/* set up new block */
    376 		if (supfing + 500 > SUPFING + supersize) {
    377 			i = supfing - SUPFING;
    378 			supersize += 20000;
    379 			if ((SUPFING = realloc(SUPFING, supersize)) == NULL) {
    380 				invcannotalloc(supersize);
    381 				return (0);
    382 			}
    383 			supfing = i + SUPFING;
    384 #if DEBUG
    385 			(void) printf("reallocated superfinger space to %d, "
    386 			    "totpost=%ld\n", supersize, totpost);
    387 #endif
    388 		}
    389 		/* check that room for the offset as well */
    390 		if ((numlogblk + 10) > supintsize) {
    391 			i = supint - SUPINT;
    392 			supintsize += SUPERINC;
    393 			if ((SUPINT = realloc((char *)SUPINT,
    394 			    supintsize * sizeof (long))) == NULL) {
    395 				invcannotalloc(supintsize * sizeof (long));
    396 				return (0);
    397 			}
    398 			supint = i + SUPINT;
    399 #if DEBUG
    400 			(void) printf("reallocated superfinger offset to %d, "
    401 			    "totpost = %ld\n", supintsize * sizeof (long),
    402 			    totpost);
    403 #endif
    404 		}
    405 		/* See if backup is efficatious  */
    406 		backupflag = 0;
    407 		maxback = strlen(thisterm) / 10;
    408 		holditems = numinvitems;
    409 		if (maxback > numinvitems)
    410 			maxback = numinvitems - 2;
    411 		howfar = 0;
    412 		while (--maxback > 0) {
    413 			howfar++;
    414 			iteminfo.packword[0] =
    415 			    logicalblk.invblk[--holditems * 2 +
    416 			    (sizeof (long) - 1)];
    417 			if ((i = iteminfo.e.size / 10) < maxback) {
    418 				maxback = i;
    419 				backupflag = howfar;
    420 				gooditems = holditems;
    421 				tptr2 = logicalblk.chrblk + iteminfo.e.offset;
    422 			}
    423 		}
    424 		/* see if backup will occur  */
    425 		if (backupflag) {
    426 			numinvitems = gooditems;
    427 		}
    428 		logicalblk.invblk[0] = numinvitems;
    429 		/* set forward pointer pointing to next */
    430 		logicalblk.invblk[1] = numlogblk + 1;
    431 		/* set back pointer to last block */
    432 		logicalblk.invblk[2] = numlogblk - 1;
    433 		if (fwrite((char *)logicalblk.chrblk, 1,
    434 		    BLOCKSIZE, outfile) == 0) {
    435 			invcannotwrite(indexfile);
    436 			return (0);
    437 		}
    438 		amtused = 16;
    439 		numlogblk++;
    440 		/* check if had to back up, if so do it */
    441 		if (backupflag) {
    442 			/* find out where the end of the new block is */
    443 			iteminfo.packword[0] =
    444 			    logicalblk.invblk[numinvitems * 2 + 1];
    445 			tptr3 = logicalblk.chrblk + iteminfo.e.offset;
    446 			/* move the index for this block */
    447 			for (i = 3; i <= (backupflag * 2 + 2); i++) {
    448 				logicalblk.invblk[i] =
    449 				    logicalblk.invblk[numinvitems * 2+i];
    450 			}
    451 			/* move the word into the super index */
    452 			iteminfo.packword[0] = logicalblk.invblk[3];
    453 			iteminfo.packword[1] = logicalblk.invblk[4];
    454 			tptr2 = logicalblk.chrblk + iteminfo.e.offset;
    455 			(void) strncpy(supfing, tptr2, (int)iteminfo.e.size);
    456 			*(supfing + iteminfo.e.size) = '\0';
    457 #if DEBUG
    458 			(void) printf("backup %d at term=%s to term=%s\n",
    459 			    backupflag, thisterm, supfing);
    460 #endif
    461 			*supint++ = nextsupfing;
    462 			nextsupfing += strlen(supfing) + 1;
    463 			supfing += strlen(supfing) + 1;
    464 			/* now fix up the logical block */
    465 			tptr = logicalblk.chrblk + lastinblk;
    466 			lastinblk = BLOCKSIZE;
    467 			tptr2 = logicalblk.chrblk + lastinblk;
    468 			j = tptr3 - tptr;
    469 			while (tptr3 > tptr)
    470 				*--tptr2 = *--tptr3;
    471 			lastinblk -= j;
    472 			amtused += (8 * backupflag + j);
    473 			for (i = 3; i < (backupflag * 2 + 2); i += 2) {
    474 				iteminfo.packword[0] = logicalblk.invblk[i];
    475 				iteminfo.e.offset += (tptr2 - tptr3);
    476 				logicalblk.invblk[i] = iteminfo.packword[0];
    477 			}
    478 			numinvitems = backupflag;
    479 		} else { /* no backup needed */
    480 			numinvitems = 0;
    481 			lastinblk = BLOCKSIZE;
    482 			/* add new term to superindex */
    483 			(void) strcpy(supfing, thisterm);
    484 			supfing += strlen(thisterm) + 1;
    485 			*supint++ = nextsupfing;
    486 			nextsupfing += strlen(thisterm) + 1;
    487 		}
    488 	}
    489 	lastinblk -= (numwilluse - 8);
    490 	iteminfo.e.offset = lastinblk;
    491 	iteminfo.e.size = (char)len;
    492 	iteminfo.e.space = 0;
    493 	iteminfo.e.post = numpost;
    494 	(void) strncpy(logicalblk.chrblk + lastinblk, thisterm, len);
    495 	amtused += numwilluse;
    496 	logicalblk.invblk[(lastinblk/sizeof (long))+wdlen] = nextpost;
    497 	if ((i = postptr - POST) > 0) {
    498 		if (fwrite((char *)POST, sizeof (POSTING), i, fpost) == 0) {
    499 			invcannotwrite(postingfile);
    500 			return (0);
    501 		}
    502 		nextpost += i * sizeof (POSTING);
    503 	}
    504 	logicalblk.invblk[3+2*numinvitems++] = iteminfo.packword[0];
    505 	logicalblk.invblk[2+2*numinvitems] = iteminfo.packword[1];
    506 	return (1);
    507 }
    508 
    509 static void
    510 swap_ints(void *p, size_t sz)
    511 {
    512 	uint32_t *s;
    513 	uint32_t *e = (uint32_t *)p + (sz / sizeof (uint32_t));
    514 
    515 	for (s = p; s < e; s++)
    516 		*s = BSWAP_32(*s);
    517 }
    518 
    519 static void
    520 write_param(INVCONTROL *invcntl)
    521 {
    522 	if (invcntl->swap)
    523 		swap_ints(&invcntl->param, sizeof (invcntl->param));
    524 
    525 	rewind(invcntl->invfile);
    526 	(void) fwrite((char *)&invcntl->param, sizeof (invcntl->param), 1,
    527 	    invcntl->invfile);
    528 
    529 	if (invcntl->swap)
    530 		swap_ints(&invcntl->param, sizeof (invcntl->param));
    531 }
    532 
    533 static void
    534 read_superfinger(INVCONTROL *invcntl)
    535 {
    536 	size_t count;
    537 
    538 	(void) fseek(invcntl->invfile, invcntl->param.startbyte, SEEK_SET);
    539 	(void) fread(invcntl->iindex, (int)invcntl->param.supsize,
    540 	    1, invcntl->invfile);
    541 
    542 	if (invcntl->swap) {
    543 		/*
    544 		 * The superfinger consists of a count, followed by
    545 		 * count offsets, followed by a string table (which
    546 		 * the offsets reference).
    547 		 *
    548 		 * We need to swap the count and the offsets.
    549 		 */
    550 		count = 1 + BSWAP_32(*(uint32_t *)invcntl->iindex);
    551 		swap_ints(invcntl->iindex, count * sizeof (unsigned long));
    552 	}
    553 }
    554 
    555 static void
    556 read_logblock(INVCONTROL *invcntl, int block)
    557 {
    558 	/* note always fetch it if the file is busy */
    559 	if ((block != invcntl->numblk) ||
    560 	    (invcntl->param.filestat >= INVBUSY)) {
    561 		(void) fseek(invcntl->invfile,
    562 		    (block * invcntl->param.sizeblk) + invcntl->param.cntlsize,
    563 		    SEEK_SET);
    564 		invcntl->numblk = block;
    565 		(void) fread(invcntl->logblk, (int)invcntl->param.sizeblk,
    566 		    1, invcntl->invfile);
    567 
    568 		if (invcntl->swap) {
    569 			size_t count;
    570 			ENTRY *ecur, *eend;
    571 			uint32_t *postptr;
    572 
    573 			/*
    574 			 * A logblock consists of a count, a next block id,
    575 			 * and a previous block id, followed by count
    576 			 * ENTRYs, followed by alternating strings and
    577 			 * offsets.
    578 			 */
    579 			swap_ints(invcntl->logblk, 3 * sizeof (unsigned long));
    580 
    581 			count = *(uint32_t *)invcntl->logblk;
    582 
    583 			ecur = (ENTRY *)((uint32_t *)invcntl->logblk + 3);
    584 			eend = ecur + count;
    585 
    586 			for (; ecur < eend; ecur++) {
    587 				ecur->offset = BSWAP_16(ecur->offset);
    588 				ecur->post = BSWAP_32(ecur->post);
    589 
    590 				/*
    591 				 * After the string is the posting offset.
    592 				 */
    593 				postptr = (uint32_t *)(invcntl->logblk +
    594 				    ecur->offset +
    595 				    P2ROUNDUP(ecur->size, sizeof (long)));
    596 
    597 				*postptr = BSWAP_32(*postptr);
    598 			}
    599 		}
    600 	}
    601 }
    602 
    603 void
    604 read_next_posting(INVCONTROL *invcntl, POSTING *posting)
    605 {
    606 	(void) fread((char *)posting, sizeof (*posting), 1, invcntl->postfile);
    607 	if (invcntl->swap) {
    608 		posting->lineoffset = BSWAP_32(posting->lineoffset);
    609 		posting->fcnoffset = BSWAP_32(posting->fcnoffset);
    610 		/*
    611 		 * fileindex is a 24-bit field, so shift it before swapping
    612 		 */
    613 		posting->fileindex = BSWAP_32(posting->fileindex << 8);
    614 	}
    615 }
    616 
    617 int
    618 invopen(INVCONTROL *invcntl, char *invname, char *invpost, int stat)
    619 {
    620 	int	read_index;
    621 
    622 	if ((invcntl->invfile = vpfopen(invname,
    623 	    ((stat == 0) ? FREAD : FREADP))) == NULL) {
    624 		invcannotopen(invname);
    625 		return (-1);
    626 	}
    627 	if (fread((char *)&invcntl->param, sizeof (invcntl->param), 1,
    628 	    invcntl->invfile) == 0) {
    629 		(void) fprintf(stderr, "%s: empty inverted file\n", argv0);
    630 		goto closeinv;
    631 	}
    632 	if (invcntl->param.version != VERSION &&
    633 	    BSWAP_32(invcntl->param.version) != VERSION) {
    634 		(void) fprintf(stderr,
    635 		    "%s: cannot read old index format; use -U option to "
    636 		    "force database to rebuild\n", argv0);
    637 		goto closeinv;
    638 	}
    639 	invcntl->swap = (invcntl->param.version != VERSION);
    640 	if (invcntl->swap)
    641 		swap_ints(&invcntl->param, sizeof (invcntl->param));
    642 
    643 	if (stat == 0 && invcntl->param.filestat == INVALONE) {
    644 		(void) fprintf(stderr, "%s: inverted file is locked\n", argv0);
    645 		goto closeinv;
    646 	}
    647 	if ((invcntl->postfile = vpfopen(invpost,
    648 	    ((stat == 0) ? FREAD : FREADP))) == NULL) {
    649 		invcannotopen(invpost);
    650 		goto closeinv;
    651 	}
    652 	/* allocate core for a logical block  */
    653 	if ((invcntl->logblk = malloc(invcntl->param.sizeblk)) == NULL) {
    654 		invcannotalloc((unsigned)invcntl->param.sizeblk);
    655 		goto closeboth;
    656 	}
    657 	/* allocate for and read in superfinger  */
    658 	read_index = 1;
    659 	invcntl->iindex = NULL;
    660 #if SHARE
    661 	if (invcntl->param.share == 1) {
    662 		key_t ftok(), shm_key;
    663 		struct shmid_ds shm_buf;
    664 		char	*shmat();
    665 		int	shm_id;
    666 
    667 		/* see if the shared segment exists */
    668 		shm_key = ftok(invname, 2);
    669 		shm_id = shmget(shm_key, 0, 0);
    670 		/*
    671 		 * Failure simply means (hopefully) that segment doesn't
    672 		 * exist
    673 		 */
    674 		if (shm_id == -1) {
    675 			/*
    676 			 * Have to give general write permission due to AMdahl
    677 			 * not having protected segments
    678 			 */
    679 			shm_id = shmget(shm_key,
    680 			    invcntl->param.supsize + sizeof (long),
    681 			    IPC_CREAT | 0666);
    682 			if (shm_id == -1)
    683 				perror("Could not create shared "
    684 				    "memory segment");
    685 		} else
    686 			read_index = 0;
    687 
    688 		if (shm_id != -1) {
    689 			invcntl->iindex = shmat(shm_id, 0,
    690 			    ((read_index) ? 0 : SHM_RDONLY));
    691 			if (invcntl->iindex == (char *)ERR) {
    692 				(void) fprintf(stderr,
    693 				    "%s: shared memory link failed\n", argv0);
    694 				invcntl->iindex = NULL;
    695 				read_index = 1;
    696 			}
    697 		}
    698 	}
    699 #endif
    700 	if (invcntl->iindex == NULL)
    701 		invcntl->iindex = malloc(invcntl->param.supsize + 16);
    702 	if (invcntl->iindex == NULL) {
    703 		invcannotalloc((unsigned)invcntl->param.supsize);
    704 		free(invcntl->logblk);
    705 		goto closeboth;
    706 	}
    707 	if (read_index) {
    708 		read_superfinger(invcntl);
    709 	}
    710 	invcntl->numblk = -1;
    711 	if (boolready() == -1) {
    712 	closeboth:
    713 		(void) fclose(invcntl->postfile);
    714 	closeinv:
    715 		(void) fclose(invcntl->invfile);
    716 		return (-1);
    717 	}
    718 	/* write back out the control block if anything changed */
    719 	invcntl->param.filestat = stat;
    720 	if (stat > invcntl->param.filestat)
    721 		write_param(invcntl);
    722 	return (1);
    723 }
    724 
    725 /* invclose must be called to wrap things up and deallocate core */
    726 void
    727 invclose(INVCONTROL *invcntl)
    728 {
    729 	/* write out the control block in case anything changed */
    730 	if (invcntl->param.filestat > 0) {
    731 		invcntl->param.filestat = 0;
    732 		write_param(invcntl);
    733 	}
    734 	(void) fclose(invcntl->invfile);
    735 	(void) fclose(invcntl->postfile);
    736 #if SHARE
    737 	if (invcntl->param.share > 0) {
    738 		shmdt(invcntl->iindex);
    739 		invcntl->iindex = NULL;
    740 	}
    741 #endif
    742 	if (invcntl->iindex != NULL)
    743 		free(invcntl->iindex);
    744 	free(invcntl->logblk);
    745 }
    746 
    747 /* invstep steps the inverted file forward one item */
    748 void
    749 invstep(INVCONTROL *invcntl)
    750 {
    751 	if (invcntl->keypnt < (*(int *)invcntl->logblk - 1)) {
    752 		invcntl->keypnt++;
    753 		return;
    754 	}
    755 
    756 	/* move forward a block else wrap */
    757 	read_logblock(invcntl, *(int *)(invcntl->logblk + sizeof (long)));
    758 
    759 	invcntl->keypnt = 0;
    760 }
    761 
    762 /* invforward moves forward one term in the inverted file  */
    763 int
    764 invforward(INVCONTROL *invcntl)
    765 {
    766 	invstep(invcntl);
    767 	/* skip things with 0 postings */
    768 	while (((ENTRY *)(invcntl->logblk + 12) + invcntl->keypnt)->post == 0) {
    769 		invstep(invcntl);
    770 	}
    771 	/* Check for having wrapped - reached start of inverted file! */
    772 	if ((invcntl->numblk == 0) && (invcntl->keypnt == 0))
    773 		return (0);
    774 	return (1);
    775 }
    776 
    777 /*  invterm gets the present term from the present logical block  */
    778 int
    779 invterm(INVCONTROL *invcntl, char *term)
    780 {
    781 	ENTRY * entryptr;
    782 
    783 	entryptr = (ENTRY *)(invcntl->logblk + 12) + invcntl->keypnt;
    784 	(void) strncpy(term, entryptr->offset + invcntl->logblk,
    785 	    (int)entryptr->size);
    786 	*(term + entryptr->size) = '\0';
    787 	return (entryptr->post);
    788 }
    789 
    790 /* invfind searches for an individual item in the inverted file */
    791 long
    792 invfind(INVCONTROL *invcntl, char *searchterm)
    793 {
    794 	int	imid, ilow, ihigh;
    795 	long	num;
    796 	int	i;
    797 	unsigned long	*intptr, *intptr2;
    798 	ENTRY *entryptr;
    799 
    800 	/* make sure it is initialized via invready  */
    801 	if (invcntl->invfile == 0)
    802 		return (-1L);
    803 
    804 	/* now search for the appropriate finger block */
    805 	intptr = (unsigned long *)invcntl->iindex;
    806 
    807 	ilow = 0;
    808 	ihigh = *intptr++ - 1;
    809 	while (ilow <= ihigh) {
    810 		imid = (ilow + ihigh) / 2;
    811 		intptr2 = intptr + imid;
    812 		i = strcmp(searchterm, (invcntl->iindex + *intptr2));
    813 		if (i < 0)
    814 			ihigh = imid - 1;
    815 		else if (i > 0)
    816 			ilow = ++imid;
    817 		else {
    818 			ilow = imid + 1;
    819 			break;
    820 		}
    821 	}
    822 	/* be careful about case where searchterm is after last in this block */
    823 	imid = (ilow) ? ilow - 1 : 0;
    824 
    825 	/* fetch the appropriate logical block if not in core  */
    826 	read_logblock(invcntl, imid);
    827 
    828 srch_ext:
    829 	/* now find the term in this block. tricky this  */
    830 	intptr = (unsigned long *)invcntl->logblk;
    831 
    832 	ilow = 0;
    833 	ihigh = *intptr - 1;
    834 	intptr += 3;
    835 	num = 0;
    836 	while (ilow <= ihigh) {
    837 		imid = (ilow + ihigh) / 2;
    838 		entryptr = (ENTRY *)intptr + imid;
    839 		i = strncmp(searchterm, (invcntl->logblk + entryptr->offset),
    840 		    (int)entryptr->size);
    841 		if (i == 0)
    842 			i = strlen(searchterm) - entryptr->size;
    843 		if (i < 0)
    844 			ihigh = imid - 1;
    845 		else if (i > 0)
    846 			ilow = ++imid;
    847 		else {
    848 			num = entryptr->post;
    849 			break;
    850 		}
    851 	}
    852 	/* be careful about case where searchterm is after last in this block */
    853 	if (imid >= *(long *)invcntl->logblk) {
    854 		invcntl->keypnt = *(long *)invcntl->logblk;
    855 		invstep(invcntl);
    856 		/* note if this happens the term could be in extended block */
    857 		if (invcntl->param.startbyte <
    858 		    invcntl->numblk * invcntl->param.sizeblk)
    859 			goto srch_ext;
    860 	} else
    861 		invcntl->keypnt = imid;
    862 	return (num);
    863 }
    864 
    865 #if DEBUG
    866 
    867 /* invdump dumps the block the term parameter is in */
    868 void
    869 invdump(INVCONTROL *invcntl, char *term)
    870 {
    871 	long	i, j, n, *longptr;
    872 	ENTRY * entryptr;
    873 	char	temp[512], *ptr;
    874 
    875 	/* dump superindex if term is "-"  */
    876 	if (*term == '-') {
    877 		j = atoi(term + 1);
    878 		longptr = (long *)invcntl->iindex;
    879 		n = *longptr++;
    880 		(void) printf("Superindex dump, num blocks=%ld\n", n);
    881 		longptr += j;
    882 		while ((longptr <= ((long *)invcntl->iindex) + n) &&
    883 		    invbreak == 0) {
    884 			(void) printf("%2ld  %6ld %s\n", j++, *longptr,
    885 			    invcntl->iindex + *longptr);
    886 			longptr++;
    887 		}
    888 		return;
    889 	} else if (*term == '#') {
    890 		j = atoi(term + 1);
    891 		/* fetch the appropriate logical block */
    892 		read_logblock(invcntl, j);
    893 	} else
    894 		i = abs((int)invfind(invcntl, term));
    895 	longptr = (long *)invcntl->logblk;
    896 	n = *longptr++;
    897 	(void) printf("Entry term to invdump=%s, postings=%ld, "
    898 	    "forward ptr=%ld, back ptr=%ld\n", term, i, *(longptr),
    899 	    *(longptr + 1));
    900 	entryptr = (ENTRY *)(invcntl->logblk + 12);
    901 	(void) printf("%ld terms in this block, block=%ld\n", n,
    902 	    invcntl->numblk);
    903 	(void) printf("\tterm\t\t\tposts\tsize\toffset\tspace\t1st word\n");
    904 	for (j = 0; j < n && invbreak == 0; j++) {
    905 		ptr = invcntl->logblk + entryptr->offset;
    906 		(void) strncpy(temp, ptr, (int)entryptr->size);
    907 		temp[entryptr->size] = '\0';
    908 		ptr += (sizeof (long) *
    909 		    (long)((entryptr->size +
    910 		    (sizeof (long) - 1)) / sizeof (long)));
    911 		(void) printf("%2ld  %-24s\t%5ld\t%3d\t%d\t%d\t%ld\n", j, temp,
    912 		    entryptr->post, entryptr->size, entryptr->offset,
    913 		    entryptr->space, *(long *)ptr);
    914 		entryptr++;
    915 	}
    916 }
    917 #endif
    918 
    919 static int
    920 boolready(void)
    921 {
    922 	numitems = 0;
    923 	if (item1 != NULL)
    924 		free(item1);
    925 	setsize1 = SETINC;
    926 	if ((item1 = (POSTING *)malloc(SETINC * sizeof (POSTING))) == NULL) {
    927 		invcannotalloc(SETINC);
    928 		return (-1);
    929 	}
    930 	if (item2 != NULL)
    931 		free(item2);
    932 	setsize2 = SETINC;
    933 	if ((item2 = (POSTING *)malloc(SETINC * sizeof (POSTING))) == NULL) {
    934 		invcannotalloc(SETINC);
    935 		return (-1);
    936 	}
    937 	item = item1;
    938 	enditem = item;
    939 	return (0);
    940 }
    941 
    942 void
    943 boolclear(void)
    944 {
    945 	numitems = 0;
    946 	item = item1;
    947 	enditem = item;
    948 }
    949 
    950 POSTING *
    951 boolfile(INVCONTROL *invcntl, long *num, int bool)
    952 {
    953 	ENTRY	*entryptr;
    954 	FILE	*file;
    955 	char	*ptr;
    956 	unsigned long	*ptr2;
    957 	POSTING	*newitem;
    958 	POSTING	posting;
    959 	unsigned u;
    960 	POSTING *newsetp, *set1p;
    961 	long	newsetc, set1c, set2c;
    962 
    963 	entryptr = (ENTRY *) (invcntl->logblk + 12) + invcntl->keypnt;
    964 	ptr = invcntl->logblk + entryptr->offset;
    965 	ptr2 = ((unsigned long *)ptr) +
    966 	    (entryptr->size + (sizeof (long) - 1)) / sizeof (long);
    967 	*num = entryptr->post;
    968 	switch (bool) {
    969 	case OR:
    970 	case NOT:
    971 		if (*num == 0) {
    972 			*num = numitems;
    973 			return (item);
    974 		}
    975 	}
    976 	/* make room for the new set */
    977 	u = 0;
    978 	switch (bool) {
    979 	case AND:
    980 	case NOT:
    981 		newsetp = set1p = item;
    982 		break;
    983 
    984 	case OR:
    985 		u = enditem - item;
    986 		/* FALLTHROUGH */
    987 	case REVERSENO