Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/proc.h>
     29 #include <sys/file.h>
     30 #include <sys/errno.h>
     31 #include <sys/param.h>
     32 #include <sys/sysmacros.h>
     33 #include <sys/cmn_err.h>
     34 #include <sys/systm.h>
     35 #include <vm/as.h>
     36 #include <vm/page.h>
     37 #include <sys/uio.h>
     38 #include <sys/kmem.h>
     39 #include <sys/debug.h>
     40 #include <sys/aio_impl.h>
     41 #include <sys/epm.h>
     42 #include <sys/fs/snode.h>
     43 #include <sys/siginfo.h>
     44 #include <sys/cpuvar.h>
     45 #include <sys/tnf_probe.h>
     46 #include <sys/conf.h>
     47 #include <sys/sdt.h>
     48 
     49 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *);
     50 void aio_done(struct buf *);
     51 void aphysio_unlock(aio_req_t *);
     52 void aio_cleanup(int);
     53 void aio_cleanup_exit(void);
     54 
     55 /*
     56  * private functions
     57  */
     58 static void aio_sigev_send(proc_t *, sigqueue_t *);
     59 static void aio_hash_delete(aio_t *, aio_req_t *);
     60 static void aio_lio_free(aio_t *, aio_lio_t *);
     61 static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int);
     62 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int);
     63 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int);
     64 static void aio_cleanup_portq(aio_t *, aio_req_t *, int);
     65 
     66 /*
     67  * async version of physio() that doesn't wait synchronously
     68  * for the driver's strategy routine to complete.
     69  */
     70 
     71 int
     72 aphysio(
     73 	int (*strategy)(struct buf *),
     74 	int (*cancel)(struct buf *),
     75 	dev_t dev,
     76 	int rw,
     77 	void (*mincnt)(struct buf *),
     78 	struct aio_req *aio)
     79 {
     80 	struct uio *uio = aio->aio_uio;
     81 	aio_req_t *reqp = (aio_req_t *)aio->aio_private;
     82 	struct buf *bp = &reqp->aio_req_buf;
     83 	struct iovec *iov;
     84 	struct as *as;
     85 	char *a;
     86 	int	error;
     87 	size_t	c;
     88 	struct page **pplist;
     89 	struct dev_ops *ops = devopsp[getmajor(dev)];
     90 
     91 	if (uio->uio_loffset < 0)
     92 		return (EINVAL);
     93 #ifdef	_ILP32
     94 	/*
     95 	 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
     96 	 * the maximum size that can be supported by the IO subsystem.
     97 	 * XXX this code assumes a D_64BIT driver.
     98 	 */
     99 	if (uio->uio_loffset > SPEC_MAXOFFSET_T)
    100 		return (EINVAL);
    101 #endif	/* _ILP32 */
    102 
    103 	TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */,
    104 	    tnf_opaque, bp, bp,
    105 	    tnf_device, device, dev,
    106 	    tnf_offset, blkno, btodt(uio->uio_loffset),
    107 	    tnf_size, size, uio->uio_iov->iov_len,
    108 	    tnf_bioflags, rw, rw);
    109 
    110 	if (rw == B_READ) {
    111 		CPU_STATS_ADD_K(sys, phread, 1);
    112 	} else {
    113 		CPU_STATS_ADD_K(sys, phwrite, 1);
    114 	}
    115 
    116 	iov = uio->uio_iov;
    117 	sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
    118 	sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
    119 
    120 	bp->b_error = 0;
    121 	bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw;
    122 	bp->b_edev = dev;
    123 	bp->b_dev = cmpdev(dev);
    124 	bp->b_lblkno = btodt(uio->uio_loffset);
    125 	bp->b_offset = uio->uio_loffset;
    126 	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
    127 	    (void *)bp->b_edev, (void **)&bp->b_dip);
    128 
    129 	/*
    130 	 * Clustering: Clustering can set the b_iodone, b_forw and
    131 	 * b_proc fields to cluster-specifc values.
    132 	 */
    133 	if (bp->b_iodone == NULL) {
    134 		bp->b_iodone = (int (*)()) aio_done;
    135 		/* b_forw points at an aio_req_t structure */
    136 		bp->b_forw = (struct buf *)reqp;
    137 		bp->b_proc = curproc;
    138 	}
    139 
    140 	a = bp->b_un.b_addr = iov->iov_base;
    141 	c = bp->b_bcount = iov->iov_len;
    142 
    143 	(*mincnt)(bp);
    144 	if (bp->b_bcount != iov->iov_len)
    145 		return (ENOTSUP);
    146 
    147 	as = bp->b_proc->p_as;
    148 
    149 	error = as_pagelock(as, &pplist, a,
    150 	    c, rw == B_READ? S_WRITE : S_READ);
    151 	if (error != 0) {
    152 		bp->b_flags |= B_ERROR;
    153 		bp->b_error = error;
    154 		bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
    155 		return (error);
    156 	}
    157 	reqp->aio_req_flags |= AIO_PAGELOCKDONE;
    158 	bp->b_shadow = pplist;
    159 	if (pplist != NULL) {
    160 		bp->b_flags |= B_SHADOW;
    161 	}
    162 
    163 	if (cancel != anocancel)
    164 		cmn_err(CE_PANIC,
    165 		    "aphysio: cancellation not supported, use anocancel");
    166 
    167 	reqp->aio_req_cancel = cancel;
    168 
    169 	DTRACE_IO1(start, struct buf *, bp);
    170 
    171 	return ((*strategy)(bp));
    172 }
    173 
    174 /*ARGSUSED*/
    175 int
    176 anocancel(struct buf *bp)
    177 {
    178 	return (ENXIO);
    179 }
    180 
    181 /*
    182  * Called from biodone().
    183  * Notify process that a pending AIO has finished.
    184  */
    185 
    186 /*
    187  * Clustering: This function is made non-static as it is used
    188  * by clustering s/w as contract private interface.
    189  */
    190 
    191 void
    192 aio_done(struct buf *bp)
    193 {
    194 	proc_t *p;
    195 	struct as *as;
    196 	aio_req_t *reqp;
    197 	aio_lio_t *head = NULL;
    198 	aio_t *aiop;
    199 	sigqueue_t *sigev = NULL;
    200 	sigqueue_t *lio_sigev = NULL;
    201 	port_kevent_t *pkevp = NULL;
    202 	port_kevent_t *lio_pkevp = NULL;
    203 	int fd;
    204 	int cleanupqflag;
    205 	int pollqflag;
    206 	int portevpend;
    207 	void (*func)();
    208 	int use_port = 0;
    209 	int reqp_flags = 0;
    210 	int send_signal = 0;
    211 
    212 	p = bp->b_proc;
    213 	as = p->p_as;
    214 	reqp = (aio_req_t *)bp->b_forw;
    215 	fd = reqp->aio_req_fd;
    216 
    217 	TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */,
    218 	    tnf_opaque, bp, bp,
    219 	    tnf_device, device, bp->b_edev,
    220 	    tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset),
    221 	    tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len,
    222 	    tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE)));
    223 
    224 	/*
    225 	 * mapout earlier so that more kmem is available when aio is
    226 	 * heavily used. bug #1262082
    227 	 */
    228 	if (bp->b_flags & B_REMAPPED)
    229 		bp_mapout(bp);
    230 
    231 	/* decrement fd's ref count by one, now that aio request is done. */
    232 	areleasef(fd, P_FINFO(p));
    233 
    234 	aiop = p->p_aio;
    235 	ASSERT(aiop != NULL);
    236 
    237 	mutex_enter(&aiop->aio_portq_mutex);
    238 	mutex_enter(&aiop->aio_mutex);
    239 	ASSERT(aiop->aio_pending > 0);
    240 	ASSERT(reqp->aio_req_flags & AIO_PENDING);
    241 	aiop->aio_pending--;
    242 	reqp->aio_req_flags &= ~AIO_PENDING;
    243 	reqp_flags = reqp->aio_req_flags;
    244 	if ((pkevp = reqp->aio_req_portkev) != NULL) {
    245 		/* Event port notification is desired for this transaction */
    246 		if (reqp->aio_req_flags & AIO_CLOSE_PORT) {
    247 			/*
    248 			 * The port is being closed and it is waiting for
    249 			 * pending asynchronous I/O transactions to complete.
    250 			 */
    251 			portevpend = --aiop->aio_portpendcnt;
    252 			aio_deq(&aiop->aio_portpending, reqp);
    253 			aio_enq(&aiop->aio_portq, reqp, 0);
    254 			mutex_exit(&aiop->aio_mutex);
    255 			mutex_exit(&aiop->aio_portq_mutex);
    256 			port_send_event(pkevp);
    257 			if (portevpend == 0)
    258 				cv_broadcast(&aiop->aio_portcv);
    259 			return;
    260 		}
    261 
    262 		if (aiop->aio_flags & AIO_CLEANUP) {
    263 			/*
    264 			 * aio_cleanup_thread() is waiting for completion of
    265 			 * transactions.
    266 			 */
    267 			mutex_enter(&as->a_contents);
    268 			aio_deq(&aiop->aio_portpending, reqp);
    269 			aio_enq(&aiop->aio_portcleanupq, reqp, 0);
    270 			cv_signal(&aiop->aio_cleanupcv);
    271 			mutex_exit(&as->a_contents);
    272 			mutex_exit(&aiop->aio_mutex);
    273 			mutex_exit(&aiop->aio_portq_mutex);
    274 			return;
    275 		}
    276 
    277 		aio_deq(&aiop->aio_portpending, reqp);
    278 		aio_enq(&aiop->aio_portq, reqp, 0);
    279 
    280 		use_port = 1;
    281 	} else {
    282 		/*
    283 		 * when the AIO_CLEANUP flag is enabled for this
    284 		 * process, or when the AIO_POLL bit is set for
    285 		 * this request, special handling is required.
    286 		 * otherwise the request is put onto the doneq.
    287 		 */
    288 		cleanupqflag = (aiop->aio_flags & AIO_CLEANUP);
    289 		pollqflag = (reqp->aio_req_flags & AIO_POLL);
    290 		if (cleanupqflag | pollqflag) {
    291 
    292 			if (cleanupqflag)
    293 				mutex_enter(&as->a_contents);
    294 
    295 			/*
    296 			 * requests with their AIO_POLL bit set are put
    297 			 * on the pollq, requests with sigevent structures
    298 			 * or with listio heads are put on the notifyq, and
    299 			 * the remaining requests don't require any special
    300 			 * cleanup handling, so they're put onto the default
    301 			 * cleanupq.
    302 			 */
    303 			if (pollqflag)
    304 				aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ);
    305 			else if (reqp->aio_req_sigqp || reqp->aio_req_lio)
    306 				aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ);
    307 			else
    308 				aio_enq(&aiop->aio_cleanupq, reqp,
    309 				    AIO_CLEANUPQ);
    310 
    311 			if (cleanupqflag) {
    312 				cv_signal(&aiop->aio_cleanupcv);
    313 				mutex_exit(&as->a_contents);
    314 				mutex_exit(&aiop->aio_mutex);
    315 				mutex_exit(&aiop->aio_portq_mutex);
    316 			} else {
    317 				ASSERT(pollqflag);
    318 				/* block aio_cleanup_exit until we're done */
    319 				aiop->aio_flags |= AIO_DONE_ACTIVE;
    320 				mutex_exit(&aiop->aio_mutex);
    321 				mutex_exit(&aiop->aio_portq_mutex);
    322 				/*
    323 				 * let the cleanup processing happen from an AST
    324 				 * set an AST on all threads in this process
    325 				 */
    326 				mutex_enter(&p->p_lock);
    327 				set_proc_ast(p);
    328 				mutex_exit(&p->p_lock);
    329 				mutex_enter(&aiop->aio_mutex);
    330 				/* wakeup anybody waiting in aiowait() */
    331 				cv_broadcast(&aiop->aio_waitcv);
    332 
    333 				/* wakeup aio_cleanup_exit if needed */
    334 				if (aiop->aio_flags & AIO_CLEANUP)
    335 					cv_signal(&aiop->aio_cleanupcv);
    336 				aiop->aio_flags &= ~AIO_DONE_ACTIVE;
    337 				mutex_exit(&aiop->aio_mutex);
    338 			}
    339 			return;
    340 		}
    341 
    342 		/*
    343 		 * save req's sigevent pointer, and check its
    344 		 * value after releasing aio_mutex lock.
    345 		 */
    346 		sigev = reqp->aio_req_sigqp;
    347 		reqp->aio_req_sigqp = NULL;
    348 
    349 		/* put request on done queue. */
    350 		aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
    351 	} /* portkevent */
    352 
    353 	/*
    354 	 * when list IO notification is enabled, a notification or
    355 	 * signal is sent only when all entries in the list are done.
    356 	 */
    357 	if ((head = reqp->aio_req_lio) != NULL) {
    358 		ASSERT(head->lio_refcnt > 0);
    359 		if (--head->lio_refcnt == 0) {
    360 			/*
    361 			 * save lio's sigevent pointer, and check
    362 			 * its value after releasing aio_mutex lock.
    363 			 */
    364 			lio_sigev = head->lio_sigqp;
    365 			head->lio_sigqp = NULL;
    366 			cv_signal(&head->lio_notify);
    367 			if (head->lio_port >= 0 &&
    368 			    (lio_pkevp = head->lio_portkev) != NULL)
    369 				head->lio_port = -1;
    370 		}
    371 	}
    372 
    373 	/*
    374 	 * if AIO_WAITN set then
    375 	 * send signal only when we reached the
    376 	 * required amount of IO's finished
    377 	 * or when all IO's are done
    378 	 */
    379 	if (aiop->aio_flags & AIO_WAITN) {
    380 		if (aiop->aio_waitncnt > 0)
    381 			aiop->aio_waitncnt--;
    382 		if (aiop->aio_pending == 0 ||
    383 		    aiop->aio_waitncnt == 0)
    384 			cv_broadcast(&aiop->aio_waitcv);
    385 	} else {
    386 		cv_broadcast(&aiop->aio_waitcv);
    387 	}
    388 
    389 	/*
    390 	 * No need to set this flag for pollq, portq, lio requests.
    391 	 * If this is an old Solaris aio request, and the process has
    392 	 * a SIGIO signal handler enabled, then send a SIGIO signal.
    393 	 */
    394 	if (!sigev && !use_port && head == NULL &&
    395 	    (reqp->aio_req_flags & AIO_SOLARIS) &&
    396 	    (func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL &&
    397 	    (func != SIG_IGN)) {
    398 		send_signal = 1;
    399 		reqp->aio_req_flags |= AIO_SIGNALLED;
    400 	}
    401 
    402 	mutex_exit(&aiop->aio_mutex);
    403 	mutex_exit(&aiop->aio_portq_mutex);
    404 
    405 	/*
    406 	 * Could the cleanup thread be waiting for AIO with locked
    407 	 * resources to finish?
    408 	 * Ideally in that case cleanup thread should block on cleanupcv,
    409 	 * but there is a window, where it could miss to see a new aio
    410 	 * request that sneaked in.
    411 	 */
    412 	mutex_enter(&as->a_contents);
    413 	if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as))
    414 		cv_broadcast(&as->a_cv);
    415 	mutex_exit(&as->a_contents);
    416 
    417 	if (sigev)
    418 		aio_sigev_send(p, sigev);
    419 	else if (send_signal)
    420 		psignal(p, SIGIO);
    421 
    422 	if (pkevp)
    423 		port_send_event(pkevp);
    424 	if (lio_sigev)
    425 		aio_sigev_send(p, lio_sigev);
    426 	if (lio_pkevp)
    427 		port_send_event(lio_pkevp);
    428 }
    429 
    430 /*
    431  * send a queued signal to the specified process when
    432  * the event signal is non-NULL. A return value of 1
    433  * will indicate that a signal is queued, and 0 means that
    434  * no signal was specified, nor sent.
    435  */
    436 static void
    437 aio_sigev_send(proc_t *p, sigqueue_t *sigev)
    438 {
    439 	ASSERT(sigev != NULL);
    440 
    441 	mutex_enter(&p->p_lock);
    442 	sigaddqa(p, NULL, sigev);
    443 	mutex_exit(&p->p_lock);
    444 }
    445 
    446 /*
    447  * special case handling for zero length requests. the aio request
    448  * short circuits the normal completion path since all that's required
    449  * to complete this request is to copyout a zero to the aio request's
    450  * return value.
    451  */
    452 void
    453 aio_zerolen(aio_req_t *reqp)
    454 {
    455 
    456 	struct buf *bp = &reqp->aio_req_buf;
    457 
    458 	reqp->aio_req_flags |= AIO_ZEROLEN;
    459 
    460 	bp->b_forw = (struct buf *)reqp;
    461 	bp->b_proc = curproc;
    462 
    463 	bp->b_resid = 0;
    464 	bp->b_flags = 0;
    465 
    466 	aio_done(bp);
    467 }
    468 
    469 /*
    470  * unlock pages previously locked by as_pagelock
    471  */
    472 void
    473 aphysio_unlock(aio_req_t *reqp)
    474 {
    475 	struct buf *bp;
    476 	struct iovec *iov;
    477 	int flags;
    478 
    479 	if (reqp->aio_req_flags & AIO_PHYSIODONE)
    480 		return;
    481 
    482 	reqp->aio_req_flags |= AIO_PHYSIODONE;
    483 
    484 	if (reqp->aio_req_flags & AIO_ZEROLEN)
    485 		return;
    486 
    487 	bp = &reqp->aio_req_buf;
    488 	iov = reqp->aio_req_uio.uio_iov;
    489 	flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ);
    490 	if (reqp->aio_req_flags & AIO_PAGELOCKDONE) {
    491 		as_pageunlock(bp->b_proc->p_as,
    492 		    bp->b_flags & B_SHADOW ? bp->b_shadow : NULL,
    493 		    iov->iov_base, iov->iov_len, flags);
    494 		reqp->aio_req_flags &= ~AIO_PAGELOCKDONE;
    495 	}
    496 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
    497 	bp->b_flags |= B_DONE;
    498 }
    499 
    500 /*
    501  * deletes a requests id from the hash table of outstanding io.
    502  */
    503 static void
    504 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp)
    505 {
    506 	long index;
    507 	aio_result_t *resultp = reqp->aio_req_resultp;
    508 	aio_req_t *current;
    509 	aio_req_t **nextp;
    510 
    511 	index = AIO_HASH(resultp);
    512 	nextp = (aiop->aio_hash + index);
    513 	while ((current = *nextp) != NULL) {
    514 		if (current->aio_req_resultp == resultp) {
    515 			*nextp = current->aio_hash_next;
    516 			return;
    517 		}
    518 		nextp = &current->aio_hash_next;
    519 	}
    520 }
    521 
    522 /*
    523  * Put a list head struct onto its free list.
    524  */
    525 static void
    526 aio_lio_free(aio_t *aiop, aio_lio_t *head)
    527 {
    528 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
    529 
    530 	if (head->lio_sigqp != NULL)
    531 		kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
    532 	head->lio_next = aiop->aio_lio_free;
    533 	aiop->aio_lio_free = head;
    534 }
    535 
    536 /*
    537  * Put a reqp onto the freelist.
    538  */
    539 void
    540 aio_req_free(aio_t *aiop, aio_req_t *reqp)
    541 {
    542 	aio_lio_t *liop;
    543 
    544 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
    545 
    546 	if (reqp->aio_req_portkev) {
    547 		port_free_event(reqp->aio_req_portkev);
    548 		reqp->aio_req_portkev = NULL;
    549 	}
    550 
    551 	if ((liop = reqp->aio_req_lio) != NULL) {
    552 		if (--liop->lio_nent == 0)
    553 			aio_lio_free(aiop, liop);
    554 		reqp->aio_req_lio = NULL;
    555 	}
    556 	if (reqp->aio_req_sigqp != NULL) {
    557 		kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t));
    558 		reqp->aio_req_sigqp = NULL;
    559 	}
    560 	reqp->aio_req_next = aiop->aio_free;
    561 	reqp->aio_req_prev = NULL;
    562 	aiop->aio_free = reqp;
    563 	aiop->aio_outstanding--;
    564 	if (aiop->aio_outstanding == 0)
    565 		cv_broadcast(&aiop->aio_waitcv);
    566 	aio_hash_delete(aiop, reqp);
    567 }
    568 
    569 /*
    570  * Put a reqp onto the freelist.
    571  */
    572 void
    573 aio_req_free_port(aio_t *aiop, aio_req_t *reqp)
    574 {
    575 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
    576 
    577 	reqp->aio_req_next = aiop->aio_free;
    578 	reqp->aio_req_prev = NULL;
    579 	aiop->aio_free = reqp;
    580 	aiop->aio_outstanding--;
    581 	aio_hash_delete(aiop, reqp);
    582 }
    583 
    584 
    585 /*
    586  * Verify the integrity of a queue.
    587  */
    588 #if defined(DEBUG)
    589 static void
    590 aio_verify_queue(aio_req_t *head,
    591 	aio_req_t *entry_present, aio_req_t *entry_missing)
    592 {
    593 	aio_req_t *reqp;
    594 	int found = 0;
    595 	int present = 0;
    596 
    597 	if ((reqp = head) != NULL) {
    598 		do {
    599 			ASSERT(reqp->aio_req_prev->aio_req_next == reqp);
    600 			ASSERT(reqp->aio_req_next->aio_req_prev == reqp);
    601 			if (entry_present == reqp)
    602 				found++;
    603 			if (entry_missing == reqp)
    604 				present++;
    605 		} while ((reqp = reqp->aio_req_next) != head);
    606 	}
    607 	ASSERT(entry_present == NULL || found == 1);
    608 	ASSERT(entry_missing == NULL || present == 0);
    609 }
    610 #else
    611 #define	aio_verify_queue(x, y, z)
    612 #endif
    613 
    614 /*
    615  * Put a request onto the tail of a queue.
    616  */
    617 void
    618 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new)
    619 {
    620 	aio_req_t *head;
    621 	aio_req_t *prev;
    622 
    623 	aio_verify_queue(*qhead, NULL, reqp);
    624 
    625 	if ((head = *qhead) == NULL) {
    626 		reqp->aio_req_next = reqp;
    627 		reqp->aio_req_prev = reqp;
    628 		*qhead = reqp;
    629 	} else {
    630 		reqp->aio_req_next = head;
    631 		reqp->aio_req_prev = prev = head->aio_req_prev;
    632 		prev->aio_req_next = reqp;
    633 		head->aio_req_prev = reqp;
    634 	}
    635 	reqp->aio_req_flags |= qflg_new;
    636 }
    637 
    638 /*
    639  * Remove a request from its queue.
    640  */
    641 void
    642 aio_deq(aio_req_t **qhead, aio_req_t *reqp)
    643 {
    644 	aio_verify_queue(*qhead, reqp, NULL);
    645 
    646 	if (reqp->aio_req_next == reqp) {
    647 		*qhead = NULL;
    648 	} else {
    649 		reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
    650 		reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
    651 		if (*qhead == reqp)
    652 			*qhead = reqp->aio_req_next;
    653 	}
    654 	reqp->aio_req_next = NULL;
    655 	reqp->aio_req_prev = NULL;
    656 }
    657 
    658 /*
    659  * concatenate a specified queue with the cleanupq. the specified
    660  * queue is put onto the tail of the cleanupq. all elements on the
    661  * specified queue should have their aio_req_flags field cleared.
    662  */
    663 /*ARGSUSED*/
    664 void
    665 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg)
    666 {
    667 	aio_req_t *cleanupqhead, *q2tail;
    668 	aio_req_t *reqp = q2;
    669 
    670 	do {
    671 		ASSERT(reqp->aio_req_flags & qflg);
    672 		reqp->aio_req_flags &= ~qflg;
    673 		reqp->aio_req_flags |= AIO_CLEANUPQ;
    674 	} while ((reqp = reqp->aio_req_next) != q2);
    675 
    676 	cleanupqhead = aiop->aio_cleanupq;
    677 	if (cleanupqhead == NULL)
    678 		aiop->aio_cleanupq = q2;
    679 	else {
    680 		cleanupqhead->aio_req_prev->aio_req_next = q2;
    681 		q2tail = q2->aio_req_prev;
    682 		q2tail->aio_req_next = cleanupqhead;
    683 		q2->aio_req_prev = cleanupqhead->aio_req_prev;
    684 		cleanupqhead->aio_req_prev = q2tail;
    685 	}
    686 }
    687 
    688 /*
    689  * cleanup aio requests that are on the per-process poll queue.
    690  */
    691 void
    692 aio_cleanup(int flag)
    693 {
    694 	aio_t *aiop = curproc->p_aio;
    695 	aio_req_t *pollqhead, *cleanupqhead, *notifyqhead;
    696 	aio_req_t *cleanupport;
    697 	aio_req_t *portq = NULL;
    698 	void (*func)();
    699 	int signalled = 0;
    700 	int qflag = 0;
    701 	int exitflg;
    702 
    703 	ASSERT(aiop != NULL);
    704 
    705 	if (flag == AIO_CLEANUP_EXIT)
    706 		exitflg = AIO_CLEANUP_EXIT;
    707 	else
    708 		exitflg = 0;
    709 
    710 	/*
    711 	 * We need to get the aio_cleanupq_mutex because we are calling
    712 	 * aio_cleanup_cleanupq()
    713 	 */
    714 	mutex_enter(&aiop->aio_cleanupq_mutex);
    715 	/*
    716 	 * take all the requests off the cleanupq, the notifyq,
    717 	 * and the pollq.
    718 	 */
    719 	mutex_enter(&aiop->aio_mutex);
    720 	if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
    721 		aiop->aio_cleanupq = NULL;
    722 		qflag++;
    723 	}
    724 	if ((notifyqhead = aiop->aio_notifyq) != NULL) {
    725 		aiop->aio_notifyq = NULL;
    726 		qflag++;
    727 	}
    728 	if ((pollqhead = aiop->aio_pollq) != NULL) {
    729 		aiop->aio_pollq = NULL;
    730 		qflag++;
    731 	}
    732 	if (flag) {
    733 		if ((portq = aiop->aio_portq) != NULL)
    734 			qflag++;
    735 
    736 		if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
    737 			aiop->aio_portcleanupq = NULL;
    738 			qflag++;
    739 		}
    740 	}
    741 	mutex_exit(&aiop->aio_mutex);
    742 
    743 	/*
    744 	 * return immediately if cleanupq, pollq, and
    745 	 * notifyq are all empty. someone else must have
    746 	 * emptied them.
    747 	 */
    748 	if (!qflag) {
    749 		mutex_exit(&aiop->aio_cleanupq_mutex);
    750 		return;
    751 	}
    752 
    753 	/*
    754 	 * do cleanup for the various queues.
    755 	 */
    756 	if (cleanupqhead)
    757 		signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg);
    758 	mutex_exit(&aiop->aio_cleanupq_mutex);
    759 	if (notifyqhead)
    760 		signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg);
    761 	if (pollqhead)
    762 		aio_cleanup_pollq(aiop, pollqhead, exitflg);
    763 	if (flag && (cleanupport || portq))
    764 		aio_cleanup_portq(aiop, cleanupport, exitflg);
    765 
    766 	if (exitflg)
    767 		return;
    768 
    769 	/*
    770 	 * If we have an active aio_cleanup_thread it's possible for
    771 	 * this routine to push something on to the done queue after
    772 	 * an aiowait/aiosuspend thread has already decided to block.
    773 	 * This being the case, we need a cv_broadcast here to wake
    774 	 * these threads up. It is simpler and cleaner to do this
    775 	 * broadcast here than in the individual cleanup routines.
    776 	 */
    777 
    778 	mutex_enter(&aiop->aio_mutex);
    779 	/*
    780 	 * If there has never been an old solaris aio request
    781 	 * issued by this process, then do not send a SIGIO signal.
    782 	 */
    783 	if (!(aiop->aio_flags & AIO_SOLARIS_REQ))
    784 		signalled = 1;
    785 	cv_broadcast(&aiop->aio_waitcv);
    786 	mutex_exit(&aiop->aio_mutex);
    787 
    788 	/*
    789 	 * Only if the process wasn't already signalled,
    790 	 * determine if a SIGIO signal should be delievered.
    791 	 */
    792 	if (!signalled &&
    793 	    (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL &&
    794 	    func != SIG_IGN)
    795 		psignal(curproc, SIGIO);
    796 }
    797 
    798 
    799 /*
    800  * Do cleanup for every element of the port cleanup queue.
    801  */
    802 static void
    803 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag)
    804 {
    805 	aio_req_t	*reqp;
    806 	aio_req_t	*next;
    807 	aio_req_t	*headp;
    808 	aio_lio_t	*liop;
    809 
    810 	/* first check the portq */
    811 	if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) {
    812 		mutex_enter(&aiop->aio_mutex);
    813 		if (aiop->aio_flags & AIO_CLEANUP)
    814 			aiop->aio_flags |= AIO_CLEANUP_PORT;
    815 		mutex_exit(&aiop->aio_mutex);
    816 
    817 		/*
    818 		 * It is not allowed to hold locks during aphysio_unlock().
    819 		 * The aio_done() interrupt function will try to acquire
    820 		 * aio_mutex and aio_portq_mutex.  Therefore we disconnect
    821 		 * the portq list from the aiop for the duration of the
    822 		 * aphysio_unlock() loop below.
    823 		 */
    824 		mutex_enter(&aiop->aio_portq_mutex);
    825 		headp = aiop->aio_portq;
    826 		aiop->aio_portq = NULL;
    827 		mutex_exit(&aiop->aio_portq_mutex);
    828 		if ((reqp = headp) != NULL) {
    829 			do {
    830 				next = reqp->aio_req_next;
    831 				aphysio_unlock(reqp);
    832 				if (exitflag) {
    833 					mutex_enter(&aiop->aio_mutex);
    834 					aio_req_free(aiop, reqp);
    835 					mutex_exit(&aiop->aio_mutex);
    836 				}
    837 			} while ((reqp = next) != headp);
    838 		}
    839 
    840 		if (headp != NULL && exitflag == 0) {
    841 			/* move unlocked requests back to the port queue */
    842 			aio_req_t *newq;
    843 
    844 			mutex_enter(&aiop->aio_portq_mutex);
    845 			if ((newq = aiop->aio_portq) != NULL) {
    846 				aio_req_t *headprev = headp->aio_req_prev;
    847 				aio_req_t *newqprev = newq->aio_req_prev;
    848 
    849 				headp->aio_req_prev = newqprev;
    850 				newq->aio_req_prev = headprev;
    851 				headprev->aio_req_next = newq;
    852 				newqprev->aio_req_next = headp;
    853 			}
    854 			aiop->aio_portq = headp;
    855 			cv_broadcast(&aiop->aio_portcv);
    856 			mutex_exit(&aiop->aio_portq_mutex);
    857 		}
    858 	}
    859 
    860 	/* now check the port cleanup queue */
    861 	if ((reqp = cleanupq) == NULL)
    862 		return;
    863 	do {
    864 		next = reqp->aio_req_next;
    865 		aphysio_unlock(reqp);
    866 		if (exitflag) {
    867 			mutex_enter(&aiop->aio_mutex);
    868 			aio_req_free(aiop, reqp);
    869 			mutex_exit(&aiop->aio_mutex);
    870 		} else {
    871 			mutex_enter(&aiop->aio_portq_mutex);
    872 			aio_enq(&aiop->aio_portq, reqp, 0);
    873 			mutex_exit(&aiop->aio_portq_mutex);
    874 			port_send_event(reqp->aio_req_portkev);
    875 			if ((liop = reqp->aio_req_lio) != NULL) {
    876 				int send_event = 0;
    877 
    878 				mutex_enter(&aiop->aio_mutex);
    879 				ASSERT(liop->lio_refcnt > 0);
    880 				if (--liop->lio_refcnt == 0) {
    881 					if (liop->lio_port >= 0 &&
    882 					    liop->lio_portkev) {
    883 						liop->lio_port = -1;
    884 						send_event = 1;
    885 					}
    886 				}
    887 				mutex_exit(&aiop->aio_mutex);
    888 				if (send_event)
    889 					port_send_event(liop->lio_portkev);
    890 			}
    891 		}
    892 	} while ((reqp = next) != cleanupq);
    893 }
    894 
    895 /*
    896  * Do cleanup for every element of the cleanupq.
    897  */
    898 static int
    899 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg)
    900 {
    901 	aio_req_t *reqp, *next;
    902 	int signalled = 0;
    903 
    904 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
    905 
    906 	/*
    907 	 * Since aio_req_done() or aio_req_find() use the HASH list to find
    908 	 * the required requests, they could potentially take away elements
    909 	 * if they are already done (AIO_DONEQ is set).
    910 	 * The aio_cleanupq_mutex protects the queue for the duration of the
    911 	 * loop from aio_req_done() and aio_req_find().
    912 	 */
    913 	if ((reqp = qhead) == NULL)
    914 		return (0);
    915 	do {
    916 		ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ);
    917 		ASSERT(reqp->aio_req_portkev == NULL);
    918 		next = reqp->aio_req_next;
    919 		aphysio_unlock(reqp);
    920 		mutex_enter(&aiop->aio_mutex);
    921 		if (exitflg)
    922 			aio_req_free(aiop, reqp);
    923 		else
    924 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
    925 		if (!exitflg) {
    926 			if (reqp->aio_req_flags & AIO_SIGNALLED)
    927 				signalled++;
    928 			else
    929 				reqp->aio_req_flags |= AIO_SIGNALLED;
    930 		}
    931 		mutex_exit(&aiop->aio_mutex);
    932 	} while ((reqp = next) != qhead);
    933 	return (signalled);
    934 }
    935 
    936 /*
    937  * do cleanup for every element of the notify queue.
    938  */
    939 static int
    940 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg)
    941 {
    942 	aio_req_t *reqp, *next;
    943 	aio_lio_t *liohead;
    944 	sigqueue_t *sigev, *lio_sigev = NULL;
    945 	int signalled = 0;
    946 
    947 	if ((reqp = qhead) == NULL)
    948 		return (0);
    949 	do {
    950 		ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ);
    951 		next = reqp->aio_req_next;
    952 		aphysio_unlock(reqp);
    953 		if (exitflg) {
    954 			mutex_enter(&aiop->aio_mutex);
    955 			aio_req_free(aiop, reqp);
    956 			mutex_exit(&aiop->aio_mutex);
    957 		} else {
    958 			mutex_enter(&aiop->aio_mutex);
    959 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
    960 			sigev = reqp->aio_req_sigqp;
    961 			reqp->aio_req_sigqp = NULL;
    962 			if ((liohead = reqp->aio_req_lio) != NULL) {
    963 				ASSERT(liohead->lio_refcnt > 0);
    964 				if (--liohead->lio_refcnt == 0) {
    965 					cv_signal(&liohead->lio_notify);
    966 					lio_sigev = liohead->lio_sigqp;
    967 					liohead->lio_sigqp = NULL;
    968 				}
    969 			}
    970 			mutex_exit(&aiop->aio_mutex);
    971 			if (sigev) {
    972 				signalled++;
    973 				aio_sigev_send(reqp->aio_req_buf.b_proc,
    974 				    sigev);
    975 			}
    976 			if (lio_sigev) {
    977 				signalled++;
    978 				aio_sigev_send(reqp->aio_req_buf.b_proc,
    979 				    lio_sigev);
    980 			}
    981 		}
    982 	} while ((reqp = next) != qhead);
    983 
    984 	return (signalled);
    985 }
    986 
    987 /*
    988  * Do cleanup for every element of the poll queue.
    989  */
    990 static void
    991 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg)
    992 {
    993 	aio_req_t *reqp, *next;
    994 
    995 	/*
    996 	 * As no other threads should be accessing the queue at this point,
    997 	 * it isn't necessary to hold aio_mutex while we traverse its elements.
    998 	 */
    999 	if ((reqp = qhead) == NULL)
   1000 		return;
   1001 	do {
   1002 		ASSERT(reqp->aio_req_flags & AIO_POLLQ);
   1003 		next = reqp->aio_req_next;
   1004 		aphysio_unlock(reqp);
   1005 		if (exitflg) {
   1006 			mutex_enter(&aiop->aio_mutex);
   1007 			aio_req_free(aiop, reqp);
   1008 			mutex_exit(&aiop->aio_mutex);
   1009 		} else {
   1010 			aio_copyout_result(reqp);
   1011 			mutex_enter(&aiop->aio_mutex);
   1012 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
   1013 			mutex_exit(&aiop->aio_mutex);
   1014 		}
   1015 	} while ((reqp = next) != qhead);
   1016 }
   1017 
   1018 /*
   1019  * called by exit(). waits for all outstanding kaio to finish
   1020  * before the kaio resources are freed.
   1021  */
   1022 void
   1023 aio_cleanup_exit(void)
   1024 {
   1025 	proc_t *p = curproc;
   1026 	aio_t *aiop = p->p_aio;
   1027 	aio_req_t *reqp, *next, *head;
   1028 	aio_lio_t *nxtlio, *liop;
   1029 
   1030 	/*
   1031 	 * wait for all outstanding kaio to complete. process
   1032 	 * is now single-threaded; no other kaio requests can
   1033 	 * happen once aio_pending is zero.
   1034 	 */
   1035 	mutex_enter(&aiop->aio_mutex);
   1036 	aiop->aio_flags |= AIO_CLEANUP;
   1037 	while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE))
   1038 		cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex);
   1039 	mutex_exit(&aiop->aio_mutex);
   1040 
   1041 	/* cleanup the cleanup-thread queues. */
   1042 	aio_cleanup(AIO_CLEANUP_EXIT);
   1043 
   1044 	/*
   1045 	 * Although this process is now single-threaded, we
   1046 	 * still need to protect ourselves against a race with
   1047 	 * aio_cleanup_dr_delete_memory().
   1048 	 */
   1049 	mutex_enter(&p->p_lock);
   1050 
   1051 	/*
   1052 	 * free up the done queue's resources.
   1053 	 */
   1054 	if ((head = aiop->aio_doneq) != NULL) {
   1055 		aiop->aio_doneq = NULL;
   1056 		reqp = head;
   1057 		do {
   1058 			next = reqp->aio_req_next;
   1059 			aphysio_unlock(reqp);
   1060 			kmem_free(reqp, sizeof (struct aio_req_t));
   1061 		} while ((reqp = next) != head);
   1062 	}
   1063 	/*
   1064 	 * release aio request freelist.
   1065 	 */
   1066 	for (reqp = aiop->aio_free; reqp != NULL; reqp = next) {
   1067 		next = reqp->aio_req_next;
   1068 		kmem_free(reqp, sizeof (struct aio_req_t));
   1069 	}
   1070 
   1071 	/*
   1072 	 * release io list head freelist.
   1073 	 */
   1074 	for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) {
   1075 		nxtlio = liop->lio_next;
   1076 		kmem_free(liop, sizeof (aio_lio_t));
   1077 	}
   1078 
   1079 	if (aiop->aio_iocb)
   1080 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
   1081 
   1082 	mutex_destroy(&aiop->aio_mutex);
   1083 	mutex_destroy(&aiop->aio_portq_mutex);
   1084 	mutex_destroy(&aiop->aio_cleanupq_mutex);
   1085 	p->p_aio = NULL;
   1086 	mutex_exit(&p->p_lock);
   1087 	kmem_free(aiop, sizeof (struct aio));
   1088 }
   1089 
   1090 /*
   1091  * copy out aio request's result to a user-level result_t buffer.
   1092  */
   1093 void
   1094 aio_copyout_result(aio_req_t *reqp)
   1095 {
   1096 	struct buf	*bp;
   1097 	struct iovec	*iov;
   1098 	void		*resultp;
   1099 	int		error;
   1100 	size_t		retval;
   1101 
   1102 	if (reqp->aio_req_flags & AIO_COPYOUTDONE)
   1103 		return;
   1104 
   1105 	reqp->aio_req_flags |= AIO_COPYOUTDONE;
   1106 
   1107 	iov = reqp->aio_req_uio.uio_iov;
   1108 	bp = &reqp->aio_req_buf;
   1109 	/* "resultp" points to user-level result_t buffer */
   1110 	resultp = (void *)reqp->aio_req_resultp;
   1111 	if (bp->b_flags & B_ERROR) {
   1112 		if (bp->b_error)
   1113 			error = bp->b_error;
   1114 		else
   1115 			error = EIO;
   1116 		retval = (size_t)-1;
   1117 	} else {
   1118 		error = 0;
   1119 		retval = iov->iov_len - bp->b_resid;
   1120 	}
   1121 #ifdef	_SYSCALL32_IMPL
   1122 	if (get_udatamodel() == DATAMODEL_NATIVE) {
   1123 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
   1124 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
   1125 	} else {
   1126 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
   1127 		    (int)retval);
   1128 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
   1129 	}
   1130 #else
   1131 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
   1132 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
   1133 #endif
   1134 }
   1135 
   1136 
   1137 void
   1138 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp)
   1139 {
   1140 	int errno;
   1141 	size_t retval;
   1142 
   1143 	if (bp->b_flags & B_ERROR) {
   1144 		if (bp->b_error)
   1145 			errno = bp->b_error;
   1146 		else
   1147 			errno = EIO;
   1148 		retval = (size_t)-1;
   1149 	} else {
   1150 		errno = 0;
   1151 		retval = iov->iov_len - bp->b_resid;
   1152 	}
   1153 #ifdef	_SYSCALL32_IMPL
   1154 	if (get_udatamodel() == DATAMODEL_NATIVE) {
   1155 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
   1156 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
   1157 	} else {
   1158 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
   1159 		    (int)retval);
   1160 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno);
   1161 	}
   1162 #else
   1163 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
   1164 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
   1165 #endif
   1166 }
   1167 
   1168 /*
   1169  * This function is used to remove a request from the done queue.
   1170  */
   1171 
   1172 void
   1173 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp)
   1174 {
   1175 	ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
   1176 	while (aiop->aio_portq == NULL) {
   1177 		/*
   1178 		 * aio_portq is set to NULL when aio_cleanup_portq()
   1179 		 * is working with the event queue.
   1180 		 * The aio_cleanup_thread() uses aio_cleanup_portq()
   1181 		 * to unlock all AIO buffers with completed transactions.
   1182 		 * Wait here until aio_cleanup_portq() restores the
   1183 		 * list of completed transactions in aio_portq.
   1184 		 */
   1185 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
   1186 	}
   1187 	aio_deq(&aiop->aio_portq, reqp);
   1188 }
   1189 
   1190 /* ARGSUSED */
   1191 void
   1192 aio_close_port(void *arg, int port, pid_t pid, int lastclose)
   1193 {
   1194 	aio_t		*aiop;
   1195 	aio_req_t 	*reqp;
   1196 	aio_req_t 	*next;
   1197 	aio_req_t	*headp;
   1198 	int		counter;
   1199 
   1200 	if (arg == NULL)
   1201 		aiop = curproc->p_aio;
   1202 	else
   1203 		aiop = (aio_t *)arg;
   1204 
   1205 	/*
   1206 	 * The PORT_SOURCE_AIO source is always associated with every new
   1207 	 * created port by default.
   1208 	 * If no asynchronous I/O transactions were associated with the port
   1209 	 * then the aiop pointer will still be set to NULL.
   1210 	 */
   1211 	if (aiop == NULL)
   1212 		return;
   1213 
   1214 	/*
   1215 	 * Within a process event ports can be used to collect events other
   1216 	 * than PORT_SOURCE_AIO events. At the same time the process can submit
   1217 	 * asynchronous I/Os transactions which are not associated with the
   1218 	 * current port.
   1219 	 * The current process oriented model of AIO uses a sigle queue for
   1220 	 * pending events. On close the pending queue (queue of asynchronous
   1221 	 * I/O transactions using event port notification) must be scanned
   1222 	 * to detect and handle pending I/Os using the current port.
   1223 	 */
   1224 	mutex_enter(&aiop->aio_portq_mutex);
   1225 	mutex_enter(&aiop->aio_mutex);
   1226 	counter = 0;
   1227 	if ((headp = aiop->aio_portpending) != NULL) {
   1228 		reqp = headp;
   1229 		do {
   1230 			if (reqp->aio_req_portkev &&
   1231 			    reqp->aio_req_port == port) {
   1232 				reqp->aio_req_flags |= AIO_CLOSE_PORT;
   1233 				counter++;
   1234 			}
   1235 		} while ((reqp = reqp->aio_req_next) != headp);
   1236 	}
   1237 	if (counter == 0) {
   1238 		/* no AIOs pending */
   1239 		mutex_exit(&aiop->aio_mutex);
   1240 		mutex_exit(&aiop->aio_portq_mutex);
   1241 		return;
   1242 	}
   1243 	aiop->aio_portpendcnt += counter;
   1244 	mutex_exit(&aiop->aio_mutex);
   1245 	while (aiop->aio_portpendcnt)
   1246 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
   1247 
   1248 	/*
   1249 	 * all pending AIOs are completed.
   1250 	 * check port doneq
   1251 	 */
   1252 	headp = NULL;
   1253 	if ((reqp = aiop->aio_portq) != NULL) {
   1254 		do {
   1255 			next = reqp->aio_req_next;
   1256 			if (reqp->aio_req_port == port) {
   1257 				/* dequeue request and discard event */
   1258 				aio_req_remove_portq(aiop, reqp);
   1259 				port_free_event(reqp->aio_req_portkev);
   1260 				/* put request in temporary queue */
   1261 				reqp->aio_req_next = headp;
   1262 				headp = reqp;
   1263 			}
   1264 		} while ((reqp = next) != aiop->aio_portq);
   1265 	}
   1266 	mutex_exit(&aiop->aio_portq_mutex);
   1267 
   1268 	/* headp points to the list of requests to be discarded */
   1269 	for (reqp = headp; reqp != NULL; reqp = next) {
   1270 		next = reqp->aio_req_next;
   1271 		aphysio_unlock(reqp);
   1272 		mutex_enter(&aiop->aio_mutex);
   1273 		aio_req_free_port(aiop, reqp);
   1274 		mutex_exit(&aiop->aio_mutex);
   1275 	}
   1276 
   1277 	if (aiop->aio_flags & AIO_CLEANUP)
   1278 		cv_broadcast(&aiop->aio_waitcv);
   1279 }
   1280 
   1281 /*
   1282  * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
   1283  * to kick start the aio_cleanup_thread for the give process to do the
   1284  * necessary cleanup.
   1285  * This is needed so that delete_memory_thread can obtain writer locks
   1286  * on pages that need to be relocated during a dr memory delete operation,
   1287  * otherwise a deadly embrace may occur.
   1288  */
   1289 int
   1290 aio_cleanup_dr_delete_memory(proc_t *procp)
   1291 {
   1292 	struct aio *aiop = procp->p_aio;
   1293 	struct as *as = procp->p_as;
   1294 	int ret = 0;
   1295 
   1296 	ASSERT(MUTEX_HELD(&procp->p_lock));
   1297 
   1298 	mutex_enter(&as->a_contents);
   1299 
   1300 	if (aiop != NULL) {
   1301 		aiop->aio_rqclnup = 1;
   1302 		cv_broadcast(&as->a_cv);
   1303 		ret = 1;
   1304 	}
   1305 	mutex_exit(&as->a_contents);
   1306 	return (ret);
   1307 }
   1308