Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)evchannels.c	1.11	07/11/20 SMI"
     27 
     28 /*
     29  * This file contains the source of the general purpose event channel extension
     30  * to the sysevent framework. This implementation is made up mainly of four
     31  * layers of functionality: the event queues (evch_evq_*()), the handling of
     32  * channels (evch_ch*()), the kernel interface (sysevent_evc_*()) and the
     33  * interface for the sysevent pseudo driver (evch_usr*()).
     34  * Libsysevent.so uses the pseudo driver sysevent's ioctl to access the event
     35  * channel extensions. The driver in turn uses the evch_usr*() functions below.
     36  *
     37  * The interfaces for user land and kernel are declared in sys/sysevent.h
     38  * Internal data structures for event channels are defined in
     39  * sys/sysevent_impl.h.
     40  *
     41  * The basic data structure for an event channel is of type evch_chan_t.
     42  * All channels are maintained by a list named evch_list. The list head
     43  * is of type evch_dlist_t.
     44  */
     45 
     46 #include <sys/types.h>
     47 #include <sys/errno.h>
     48 #include <sys/stropts.h>
     49 #include <sys/debug.h>
     50 #include <sys/ddi.h>
     51 #include <sys/vmem.h>
     52 #include <sys/cmn_err.h>
     53 #include <sys/callb.h>
     54 #include <sys/sysevent.h>
     55 #include <sys/sysevent_impl.h>
     56 #include <sys/sysmacros.h>
     57 #include <sys/disp.h>
     58 #include <sys/atomic.h>
     59 #include <sys/door.h>
     60 #include <sys/zone.h>
     61 #include <sys/sdt.h>
     62 
     63 /* Back-off delay for door_ki_upcall */
     64 #define	EVCH_MIN_PAUSE	8
     65 #define	EVCH_MAX_PAUSE	128
     66 
     67 #define	GEVENT(ev)	((evch_gevent_t *)((char *)ev - \
     68 			    offsetof(evch_gevent_t, ge_payload)))
     69 
     70 #define	EVCH_EVQ_EVCOUNT(x)	((&(x)->eq_eventq)->sq_count)
     71 #define	EVCH_EVQ_HIGHWM(x)	((&(x)->eq_eventq)->sq_highwm)
     72 
     73 struct evch_globals {
     74 	evch_dlist_t evch_list;
     75 	kmutex_t evch_list_lock;
     76 };
     77 
     78 /* Variables used by event channel routines */
     79 static int		evq_initcomplete = 0;
     80 static zone_key_t	evch_zone_key;
     81 static uint32_t		evch_channels_max;
     82 static uint32_t		evch_bindings_max = EVCH_MAX_BINDS_PER_CHANNEL;
     83 static uint32_t		evch_events_max;
     84 
     85 static void evch_evq_unsub(evch_eventq_t *, evch_evqsub_t *);
     86 static void evch_evq_destroy(evch_eventq_t *);
     87 
     88 /*
     89  * List handling. These functions handle a doubly linked list. The list has
     90  * to be protected by the calling functions. evch_dlist_t is the list head.
     91  * Every node of the list has to put a evch_dlelem_t data type in its data
     92  * structure as its first element.
     93  *
     94  * evch_dl_init		- Initialize list head
     95  * evch_dl_fini		- Terminate list handling
     96  * evch_dl_is_init	- Returns one if list is initialized
     97  * evch_dl_add		- Add element to end of list
     98  * evch_dl_del		- Remove given element from list
     99  * evch_dl_search	- Lookup element in list
    100  * evch_dl_getnum	- Get number of elements in list
    101  * evch_dl_next		- Get next elements of list
    102  */
    103 
    104 static void
    105 evch_dl_init(evch_dlist_t *hp)
    106 {
    107 	hp->dh_head.dl_prev = hp->dh_head.dl_next = &hp->dh_head;
    108 	hp->dh_count = 0;
    109 }
    110 
    111 /*
    112  * Assumes that list is empty.
    113  */
    114 static void
    115 evch_dl_fini(evch_dlist_t *hp)
    116 {
    117 	hp->dh_head.dl_prev = hp->dh_head.dl_next = NULL;
    118 }
    119 
    120 static int
    121 evch_dl_is_init(evch_dlist_t *hp)
    122 {
    123 	return (hp->dh_head.dl_next != NULL ? 1 : 0);
    124 }
    125 
    126 /*
    127  * Add an element at the end of the list.
    128  */
    129 static void
    130 evch_dl_add(evch_dlist_t *hp, evch_dlelem_t *el)
    131 {
    132 	evch_dlelem_t	*x = hp->dh_head.dl_prev;
    133 	evch_dlelem_t	*y = &hp->dh_head;
    134 
    135 	x->dl_next = el;
    136 	y->dl_prev = el;
    137 	el->dl_next = y;
    138 	el->dl_prev = x;
    139 	hp->dh_count++;
    140 }
    141 
    142 /*
    143  * Remove arbitrary element out of dlist.
    144  */
    145 static void
    146 evch_dl_del(evch_dlist_t *hp, evch_dlelem_t *p)
    147 {
    148 	ASSERT(hp->dh_count > 0 && p != &hp->dh_head);
    149 	p->dl_prev->dl_next = p->dl_next;
    150 	p->dl_next->dl_prev = p->dl_prev;
    151 	p->dl_prev = NULL;
    152 	p->dl_next = NULL;
    153 	hp->dh_count--;
    154 }
    155 
    156 /*
    157  * Search an element in a list. Caller provides comparison callback function.
    158  */
    159 static evch_dlelem_t *
    160 evch_dl_search(evch_dlist_t *hp, int (*cmp)(evch_dlelem_t *, char *), char *s)
    161 {
    162 	evch_dlelem_t *p;
    163 
    164 	for (p = hp->dh_head.dl_next; p != &hp->dh_head; p = p->dl_next) {
    165 		if (cmp(p, s) == 0) {
    166 			return (p);
    167 		}
    168 	}
    169 	return (NULL);
    170 }
    171 
    172 /*
    173  * Return number of elements in the list.
    174  */
    175 static int
    176 evch_dl_getnum(evch_dlist_t *hp)
    177 {
    178 	return (hp->dh_count);
    179 }
    180 
    181 /*
    182  * Find next element of a evch_dlist_t list. Find first element if el == NULL.
    183  * Returns NULL if end of list is reached.
    184  */
    185 static void *
    186 evch_dl_next(evch_dlist_t *hp, void *el)
    187 {
    188 	evch_dlelem_t *ep = (evch_dlelem_t *)el;
    189 
    190 	if (hp->dh_count == 0) {
    191 		return (NULL);
    192 	}
    193 	if (ep == NULL) {
    194 		return (hp->dh_head.dl_next);
    195 	}
    196 	if ((ep = ep->dl_next) == (evch_dlelem_t *)hp) {
    197 		return (NULL);
    198 	}
    199 	return ((void *)ep);
    200 }
    201 
    202 /*
    203  * Queue handling routines. Mutexes have to be entered previously.
    204  *
    205  * evch_q_init	- Initialize queue head
    206  * evch_q_in	- Put element into queue
    207  * evch_q_out	- Get element out of queue
    208  * evch_q_next	- Iterate over the elements of a queue
    209  */
    210 static void
    211 evch_q_init(evch_squeue_t *q)
    212 {
    213 	q->sq_head = NULL;
    214 	q->sq_tail = (evch_qelem_t *)q;
    215 	q->sq_count = 0;
    216 	q->sq_highwm = 0;
    217 }
    218 
    219 /*
    220  * Put element into the queue q
    221  */
    222 static void
    223 evch_q_in(evch_squeue_t *q, evch_qelem_t *el)
    224 {
    225 	q->sq_tail->q_next = el;
    226 	el->q_next = NULL;
    227 	q->sq_tail = el;
    228 	q->sq_count++;
    229 	if (q->sq_count > q->sq_highwm) {
    230 		q->sq_highwm = q->sq_count;
    231 	}
    232 }
    233 
    234 /*
    235  * Returns NULL if queue is empty.
    236  */
    237 static evch_qelem_t *
    238 evch_q_out(evch_squeue_t *q)
    239 {
    240 	evch_qelem_t *el;
    241 
    242 	if ((el = q->sq_head) != NULL) {
    243 		q->sq_head = el->q_next;
    244 		q->sq_count--;
    245 		if (q->sq_head == NULL) {
    246 			q->sq_tail = (evch_qelem_t *)q;
    247 		}
    248 	}
    249 	return (el);
    250 }
    251 
    252 /*
    253  * Returns element after *el or first if el == NULL. NULL is returned
    254  * if queue is empty or *el points to the last element in the queue.
    255  */
    256 static evch_qelem_t *
    257 evch_q_next(evch_squeue_t *q, evch_qelem_t *el)
    258 {
    259 	if (el == NULL)
    260 		return (q->sq_head);
    261 	return (el->q_next);
    262 }
    263 
    264 /*
    265  * Event queue handling functions. An event queue is the basic building block
    266  * of an event channel. One event queue makes up the publisher-side event queue.
    267  * Further event queues build the per-subscriber queues of an event channel.
    268  * Each queue is associated an event delivery thread.
    269  * These functions support a two-step initialization. First step, when kernel
    270  * memory is ready and second when threads are ready.
    271  * Events consist of an administrating evch_gevent_t structure with the event
    272  * data appended as variable length payload.
    273  * The internal interface functions for the event queue handling are:
    274  *
    275  * evch_evq_create	- create an event queue
    276  * evch_evq_thrcreate	- create thread for an event queue.
    277  * evch_evq_destroy	- delete an event queue
    278  * evch_evq_sub		- Subscribe to event delivery from an event queue
    279  * evch_evq_unsub	- Unsubscribe
    280  * evch_evq_pub		- Post an event into an event queue
    281  * evch_evq_stop	- Put delivery thread on hold
    282  * evch_evq_continue	- Resume event delivery thread
    283  * evch_evq_status	- Return status of delivery thread, running or on hold
    284  * evch_evq_evzalloc	- Allocate an event structure
    285  * evch_evq_evfree	- Free an event structure
    286  * evch_evq_evadd_dest	- Add a destructor function to an event structure
    287  * evch_evq_evnext	- Iterate over events non-destructive
    288  */
    289 
    290 /*ARGSUSED*/
    291 static void *
    292 evch_zoneinit(zoneid_t zoneid)
    293 {
    294 	struct evch_globals *eg;
    295 
    296 	eg = kmem_zalloc(sizeof (*eg), KM_SLEEP);
    297 	evch_dl_init(&eg->evch_list);
    298 	return (eg);
    299 }
    300 
    301 /*ARGSUSED*/
    302 static void
    303 evch_zonefree(zoneid_t zoneid, void *arg)
    304 {
    305 	struct evch_globals *eg = arg;
    306 	evch_chan_t *chp;
    307 	evch_subd_t *sdp;
    308 
    309 	mutex_enter(&eg->evch_list_lock);
    310 
    311 	/*
    312 	 * Keep picking the head element off the list until there are no
    313 	 * more.
    314 	 */
    315 	while ((chp = evch_dl_next(&eg->evch_list, NULL)) != NULL) {
    316 
    317 		/*
    318 		 * Since all processes are gone, all bindings should be gone,
    319 		 * and only channels with SUB_KEEP subscribers should remain.
    320 		 */
    321 		mutex_enter(&chp->ch_mutex);
    322 		ASSERT(chp->ch_bindings == 0);
    323 		ASSERT(evch_dl_getnum(&chp->ch_subscr) != 0);
    324 
    325 		/* Forcibly unsubscribe each remaining subscription */
    326 		while ((sdp = evch_dl_next(&chp->ch_subscr, NULL)) != NULL) {
    327 			/*
    328 			 * We should only be tearing down persistent
    329 			 * subscribers at this point, since all processes
    330 			 * from this zone are gone.
    331 			 */
    332 			ASSERT(sdp->sd_active == 0);
    333 			ASSERT((sdp->sd_persist & EVCH_SUB_KEEP) != 0);
    334 			/*
    335 			 * Disconnect subscriber queue from main event queue.
    336 			 */
    337 			evch_evq_unsub(chp->ch_queue, sdp->sd_msub);
    338 
    339 			/* Destruct per subscriber queue */
    340 			evch_evq_unsub(sdp->sd_queue, sdp->sd_ssub);
    341 			evch_evq_destroy(sdp->sd_queue);
    342 			/*
    343 			 * Eliminate the subscriber data from channel list.
    344 			 */
    345 			evch_dl_del(&chp->ch_subscr, &sdp->sd_link);
    346 			kmem_free(sdp->sd_classname, sdp->sd_clnsize);
    347 			kmem_free(sdp->sd_ident, strlen(sdp->sd_ident) + 1);
    348 			kmem_free(sdp, sizeof (evch_subd_t));
    349 		}
    350 
    351 		/* Channel must now have no subscribers */
    352 		ASSERT(evch_dl_getnum(&chp->ch_subscr) == 0);
    353 
    354 		/* Just like unbind */
    355 		mutex_exit(&chp->ch_mutex);
    356 		evch_dl_del(&eg->evch_list, &chp->ch_link);
    357 		evch_evq_destroy(chp->ch_queue);
    358 		mutex_destroy(&chp->ch_mutex);
    359 		mutex_destroy(&chp->ch_pubmx);
    360 		cv_destroy(&chp->ch_pubcv);
    361 		kmem_free(chp->ch_name, chp->ch_namelen);
    362 		kmem_free(chp, sizeof (evch_chan_t));
    363 	}
    364 
    365 	mutex_exit(&eg->evch_list_lock);
    366 	/* all channels should now be gone */
    367 	ASSERT(evch_dl_getnum(&eg->evch_list) == 0);
    368 	kmem_free(eg, sizeof (*eg));
    369 }
    370 
    371 /*
    372  * Frees evch_gevent_t structure including the payload, if the reference count
    373  * drops to or below zero. Below zero happens when the event is freed
    374  * without beeing queued into a queue.
    375  */
    376 static void
    377 evch_gevent_free(evch_gevent_t *evp)
    378 {
    379 	int32_t refcnt;
    380 
    381 	refcnt = (int32_t)atomic_add_32_nv(&evp->ge_refcount, -1);
    382 	if (refcnt <= 0) {
    383 		if (evp->ge_destruct != NULL) {
    384 			evp->ge_destruct((void *)&(evp->ge_payload),
    385 			    evp->ge_dstcookie);
    386 		}
    387 		kmem_free(evp, evp->ge_size);
    388 	}
    389 }
    390 
    391 /*
    392  * Deliver is called for every subscription to the current event
    393  * It calls the registered filter function and then the registered delivery
    394  * callback routine. Returns 0 on success. The callback routine returns
    395  * EVQ_AGAIN or EVQ_SLEEP in case the event could not be delivered.
    396  */
    397 static int
    398 evch_deliver(evch_evqsub_t *sp, evch_gevent_t *ep)
    399 {
    400 	void		*uep = &ep->ge_payload;
    401 	int		res = EVQ_DELIVER;
    402 
    403 	if (sp->su_filter != NULL) {
    404 		res = sp->su_filter(uep, sp->su_fcookie);
    405 	}
    406 	if (res == EVQ_DELIVER) {
    407 		return (sp->su_callb(uep, sp->su_cbcookie));
    408 	}
    409 	return (0);
    410 }
    411 
    412 /*
    413  * Holds event delivery in case of eq_holdmode set or in case the
    414  * event queue is empty. Mutex must be held when called.
    415  * Wakes up a thread waiting for the delivery thread reaching the hold mode.
    416  */
    417 static void
    418 evch_delivery_hold(evch_eventq_t *eqp, callb_cpr_t *cpip)
    419 {
    420 	if (eqp->eq_tabortflag == 0) {
    421 		do {
    422 			if (eqp->eq_holdmode) {
    423 				cv_signal(&eqp->eq_onholdcv);
    424 			}
    425 			CALLB_CPR_SAFE_BEGIN(cpip);
    426 			cv_wait(&eqp->eq_thrsleepcv, &eqp->eq_queuemx);
    427 			CALLB_CPR_SAFE_END(cpip, &eqp->eq_queuemx);
    428 		} while (eqp->eq_holdmode);
    429 	}
    430 }
    431 
    432 /*
    433  * Event delivery thread. Enumerates all subscribers and calls evch_deliver()
    434  * for each one.
    435  */
    436 static void
    437 evch_delivery_thr(evch_eventq_t *eqp)
    438 {
    439 	evch_qelem_t	*qep;
    440 	callb_cpr_t	cprinfo;
    441 	int		res;
    442 	evch_evqsub_t	*sub;
    443 	int		deltime;
    444 	int		repeatcount;
    445 	char		thnam[32];
    446 
    447 	(void) snprintf(thnam, sizeof (thnam), "sysevent_chan-%d",
    448 	    (int)eqp->eq_thrid);
    449 	CALLB_CPR_INIT(&cprinfo, &eqp->eq_queuemx, callb_generic_cpr, thnam);
    450 	mutex_enter(&eqp->eq_queuemx);
    451 	while (eqp->eq_tabortflag == 0) {
    452 		while (eqp->eq_holdmode == 0 && eqp->eq_tabortflag == 0 &&
    453 		    (qep = evch_q_out(&eqp->eq_eventq)) != NULL) {
    454 
    455 			/* Filter and deliver event to all subscribers */
    456 			deltime = EVCH_MIN_PAUSE;
    457 			repeatcount = EVCH_MAX_TRY_DELIVERY;
    458 			eqp->eq_curevent = qep->q_objref;
    459 			sub = evch_dl_next(&eqp->eq_subscr, NULL);
    460 			while (sub != NULL) {
    461 				eqp->eq_dactive = 1;
    462 				mutex_exit(&eqp->eq_queuemx);
    463 				res = evch_deliver(sub, qep->q_objref);
    464 				mutex_enter(&eqp->eq_queuemx);
    465 				eqp->eq_dactive = 0;
    466 				cv_signal(&eqp->eq_dactivecv);
    467 				switch (res) {
    468 				case EVQ_SLEEP:
    469 					/*
    470 					 * Wait for subscriber to return.
    471 					 */
    472 					eqp->eq_holdmode = 1;
    473 					evch_delivery_hold(eqp, &cprinfo);
    474 					if (eqp->eq_tabortflag) {
    475 						break;
    476 					}
    477 					continue;
    478 				case EVQ_AGAIN:
    479 					CALLB_CPR_SAFE_BEGIN(&cprinfo);
    480 					mutex_exit(&eqp->eq_queuemx);
    481 					delay(deltime);
    482 					deltime =
    483 					    deltime > EVCH_MAX_PAUSE ?
    484 					    deltime : deltime << 1;
    485 					mutex_enter(&eqp->eq_queuemx);
    486 					CALLB_CPR_SAFE_END(&cprinfo,
    487 					    &eqp->eq_queuemx);
    488 					if (repeatcount-- > 0) {
    489 						continue;
    490 					}
    491 					break;
    492 				}
    493 				if (eqp->eq_tabortflag) {
    494 					break;
    495 				}
    496 				sub = evch_dl_next(&eqp->eq_subscr, sub);
    497 				repeatcount = EVCH_MAX_TRY_DELIVERY;
    498 			}
    499 			eqp->eq_curevent = NULL;
    500 
    501 			/* Free event data and queue element */
    502 			evch_gevent_free((evch_gevent_t *)qep->q_objref);
    503 			kmem_free(qep, qep->q_objsize);
    504 		}
    505 
    506 		/* Wait for next event or end of hold mode if set */
    507 		evch_delivery_hold(eqp, &cprinfo);
    508 	}
    509 	CALLB_CPR_EXIT(&cprinfo);	/* Does mutex_exit of eqp->eq_queuemx */
    510 	thread_exit();
    511 }
    512 
    513 /*
    514  * Create the event delivery thread for an existing event queue.
    515  */
    516 static void
    517 evch_evq_thrcreate(evch_eventq_t *eqp)
    518 {
    519 	kthread_t *thp;
    520 
    521 	thp = thread_create(NULL, 0, evch_delivery_thr, (char *)eqp, 0, &p0,
    522 	    TS_RUN, minclsyspri);
    523 	eqp->eq_thrid = thp->t_did;
    524 }
    525 
    526 /*
    527  * Create event queue.
    528  */
    529 static evch_eventq_t *
    530 evch_evq_create()
    531 {
    532 	evch_eventq_t *p;
    533 
    534 	/* Allocate and initialize event queue descriptor */
    535 	p = kmem_zalloc(sizeof (evch_eventq_t), KM_SLEEP);
    536 	mutex_init(&p->eq_queuemx, NULL, MUTEX_DEFAULT, NULL);
    537 	cv_init(&p->eq_thrsleepcv, NULL, CV_DEFAULT, NULL);
    538 	evch_q_init(&p->eq_eventq);
    539 	evch_dl_init(&p->eq_subscr);
    540 	cv_init(&p->eq_dactivecv, NULL, CV_DEFAULT, NULL);
    541 	cv_init(&p->eq_onholdcv, NULL, CV_DEFAULT, NULL);
    542 
    543 	/* Create delivery thread */
    544 	if (evq_initcomplete) {
    545 		evch_evq_thrcreate(p);
    546 	}
    547 	return (p);
    548 }
    549 
    550 /*
    551  * Destroy an event queue. All subscribers have to be unsubscribed prior to
    552  * this call.
    553  */
    554 static void
    555 evch_evq_destroy(evch_eventq_t *eqp)
    556 {
    557 	evch_qelem_t *qep;
    558 
    559 	ASSERT(evch_dl_getnum(&eqp->eq_subscr) == 0);
    560 	/* Kill delivery thread */
    561 	if (eqp->eq_thrid != NULL) {
    562 		mutex_enter(&eqp->eq_queuemx);
    563 		eqp->eq_tabortflag = 1;
    564 		eqp->eq_holdmode = 0;
    565 		cv_signal(&eqp->eq_thrsleepcv);
    566 		mutex_exit(&eqp->eq_queuemx);
    567 		thread_join(eqp->eq_thrid);
    568 	}
    569 
    570 	/* Get rid of stale events in the event queue */
    571 	while ((qep = (evch_qelem_t *)evch_q_out(&eqp->eq_eventq)) != NULL) {
    572 		evch_gevent_free((evch_gevent_t *)qep->q_objref);
    573 		kmem_free(qep, qep->q_objsize);
    574 	}
    575 
    576 	/* Wrap up event queue structure */
    577 	cv_destroy(&eqp->eq_onholdcv);
    578 	cv_destroy(&eqp->eq_dactivecv);
    579 	cv_destroy(&eqp->eq_thrsleepcv);
    580 	evch_dl_fini(&eqp->eq_subscr);
    581 	mutex_destroy(&eqp->eq_queuemx);
    582 
    583 	/* Free descriptor structure */
    584 	kmem_free(eqp, sizeof (evch_eventq_t));
    585 }
    586 
    587 /*
    588  * Subscribe to an event queue. Every subscriber provides a filter callback
    589  * routine and an event delivery callback routine.
    590  */
    591 static evch_evqsub_t *
    592 evch_evq_sub(evch_eventq_t *eqp, filter_f filter, void *fcookie,
    593     deliver_f callb, void *cbcookie)
    594 {
    595 	evch_evqsub_t *sp = kmem_zalloc(sizeof (evch_evqsub_t), KM_SLEEP);
    596 
    597 	/* Initialize subscriber structure */
    598 	sp->su_filter = filter;
    599 	sp->su_fcookie = fcookie;
    600 	sp->su_callb = callb;
    601 	sp->su_cbcookie = cbcookie;
    602 
    603 	/* Add subscription to queue */
    604 	mutex_enter(&eqp->eq_queuemx);
    605 	evch_dl_add(&eqp->eq_subscr, &sp->su_link);
    606 	mutex_exit(&eqp->eq_queuemx);
    607 	return (sp);
    608 }
    609 
    610 /*
    611  * Unsubscribe from an event queue.
    612  */
    613 static void
    614 evch_evq_unsub(evch_eventq_t *eqp, evch_evqsub_t *sp)
    615 {
    616 	mutex_enter(&eqp->eq_queuemx);
    617 
    618 	/* Wait if delivery is just in progress */
    619 	if (eqp->eq_dactive) {
    620 		cv_wait(&eqp->eq_dactivecv, &eqp->eq_queuemx);
    621 	}
    622 	evch_dl_del(&eqp->eq_subscr, &sp->su_link);
    623 	mutex_exit(&eqp->eq_queuemx);
    624 	kmem_free(sp, sizeof (evch_evqsub_t));
    625 }
    626 
    627 /*
    628  * Publish an event. Returns 0 on success and -1 if memory alloc failed.
    629  */
    630 static int
    631 evch_evq_pub(evch_eventq_t *eqp, void *ev, int flags)
    632 {
    633 	size_t size;
    634 	evch_qelem_t	*qep;
    635 	evch_gevent_t	*evp = GEVENT(ev);
    636 
    637 	size = sizeof (evch_qelem_t);
    638 	if (flags & EVCH_TRYHARD) {
    639 		qep = kmem_alloc_tryhard(size, &size, KM_NOSLEEP);
    640 	} else {
    641 		qep = kmem_alloc(size, flags & EVCH_NOSLEEP ?
    642 		    KM_NOSLEEP : KM_SLEEP);
    643 	}
    644 	if (qep == NULL) {
    645 		return (-1);
    646 	}
    647 	qep->q_objref = (void *)evp;
    648 	qep->q_objsize = size;
    649 	atomic_add_32(&evp->ge_refcount, 1);
    650 	mutex_enter(&eqp->eq_queuemx);
    651 	evch_q_in(&eqp->eq_eventq, qep);
    652 
    653 	/* Wakeup delivery thread */
    654 	cv_signal(&eqp->eq_thrsleepcv);
    655 	mutex_exit(&eqp->eq_queuemx);
    656 	return (0);
    657 }
    658 
    659 /*
    660  * Enter hold mode of an event queue. Event delivery thread stops event
    661  * handling after delivery of current event (if any).
    662  */
    663 static void
    664 evch_evq_stop(evch_eventq_t *eqp)
    665 {
    666 	mutex_enter(&eqp->eq_queuemx);
    667 	eqp->eq_holdmode = 1;
    668 	if (evq_initcomplete) {
    669 		cv_signal(&eqp->eq_thrsleepcv);
    670 		cv_wait(&eqp->eq_onholdcv, &eqp->eq_queuemx);
    671 	}
    672 	mutex_exit(&eqp->eq_queuemx);
    673 }
    674 
    675 /*
    676  * Continue event delivery.
    677  */
    678 static void
    679 evch_evq_continue(evch_eventq_t *eqp)
    680 {
    681 	mutex_enter(&eqp->eq_queuemx);
    682 	eqp->eq_holdmode = 0;
    683 	cv_signal(&eqp->eq_thrsleepcv);
    684 	mutex_exit(&eqp->eq_queuemx);
    685 }
    686 
    687 /*
    688  * Returns status of delivery thread. 0 if running and 1 if on hold.
    689  */
    690 static int
    691 evch_evq_status(evch_eventq_t *eqp)
    692 {
    693 	return (eqp->eq_holdmode);
    694 }
    695 
    696 /*
    697  * Add a destructor function to an event structure.
    698  */
    699 static void
    700 evch_evq_evadd_dest(void *ev, destr_f destructor, void *cookie)
    701 {
    702 	evch_gevent_t *evp = GEVENT(ev);
    703 
    704 	evp->ge_destruct = destructor;
    705 	evp->ge_dstcookie = cookie;
    706 }
    707 
    708 /*
    709  * Allocate evch_gevent_t structure. Return address of payload offset of
    710  * evch_gevent_t.  If EVCH_TRYHARD allocation is requested, we use
    711  * kmem_alloc_tryhard to alloc memory of at least paylsize bytes.
    712  *
    713  * If either memory allocation is unsuccessful, we return NULL.
    714  */
    715 static void *
    716 evch_evq_evzalloc(size_t paylsize, int flag)
    717 {
    718 	evch_gevent_t	*evp;
    719 	size_t		rsize, evsize, ge_size;
    720 
    721 	rsize = offsetof(evch_gevent_t, ge_payload) + paylsize;
    722 	if (flag & EVCH_TRYHARD) {
    723 		evp = kmem_alloc_tryhard(rsize, &evsize, KM_NOSLEEP);
    724 		ge_size = evsize;
    725 	} else {
    726 		evp = kmem_alloc(rsize, flag & EVCH_NOSLEEP ? KM_NOSLEEP :
    727 		    KM_SLEEP);
    728 		ge_size = rsize;
    729 	}
    730 
    731 	if (evp) {
    732 		bzero(evp, rsize);
    733 		evp->ge_size = ge_size;
    734 		return (&evp->ge_payload);
    735 	}
    736 	return (evp);
    737 }
    738 
    739 /*
    740  * Free event structure. Argument ev is address of payload offset.
    741  */
    742 static void
    743 evch_evq_evfree(void *ev)
    744 {
    745 	evch_gevent_free(GEVENT(ev));
    746 }
    747 
    748 /*
    749  * Iterate over all events in the event queue. Begin with an event
    750  * which is currently being delivered. No mutexes are grabbed and no
    751  * resources allocated so that this function can be called in panic
    752  * context too. This function has to be called with ev == NULL initially.
    753  * Actually argument ev is only a flag. Internally the member eq_nextev
    754  * is used to determine the next event. But ev allows for the convenient
    755  * use like
    756  *	ev = NULL;
    757  *	while ((ev = evch_evq_evnext(evp, ev)) != NULL) ...
    758  */
    759 static void *
    760 evch_evq_evnext(evch_eventq_t *evq, void *ev)
    761 {
    762 	if (ev == NULL) {
    763 		evq->eq_nextev = NULL;
    764 		if (evq->eq_curevent != NULL)
    765 			return (&evq->eq_curevent->ge_payload);
    766 	}
    767 	evq->eq_nextev = evch_q_next(&evq->eq_eventq, evq->eq_nextev);
    768 	if (evq->eq_nextev == NULL)
    769 		return (NULL);
    770 	return (&((evch_gevent_t *)evq->eq_nextev->q_objref)->ge_payload);
    771 }
    772 
    773 /*
    774  * Channel handling functions. First some support functions. Functions belonging
    775  * to the channel handling interface start with evch_ch. The following functions
    776  * make up the channel handling internal interfaces:
    777  *
    778  * evch_chinit		- Initialize channel handling
    779  * evch_chinitthr	- Second step init: initialize threads
    780  * evch_chbind		- Bind to a channel
    781  * evch_chunbind	- Unbind from a channel
    782  * evch_chsubscribe	- Subscribe to a sysevent class
    783  * evch_chunsubscribe	- Unsubscribe
    784  * evch_chpublish	- Publish an event
    785  * evch_chgetnames	- Get names of all channels
    786  * evch_chgetchdata	- Get data of a channel
    787  * evch_chrdevent_init  - Init event q traversal
    788  * evch_chgetnextev	- Read out events queued for a subscriber
    789  * evch_chrdevent_fini  - Finish event q traversal
    790  */
    791 
    792 /*
    793  * Compare channel name. Used for evch_dl_search to find a channel with the
    794  * name s.
    795  */
    796 static int
    797 evch_namecmp(evch_dlelem_t *ep, char *s)
    798 {
    799 	return (strcmp(((evch_chan_t *)ep)->ch_name, s));
    800 }
    801 
    802 /*
    803  * Sysevent filter callback routine. Enables event delivery only if it matches
    804  * the event class string given by parameter cookie.
    805  */
    806 static int
    807 evch_class_filter(void *ev, void *cookie)
    808 {
    809 	char *class = (char *)cookie;
    810 
    811 	if (class == NULL || strcmp(SE_CLASS_NAME(ev), class) == 0) {
    812 		return (EVQ_DELIVER);
    813 	}
    814 	return (EVQ_IGNORE);
    815 }
    816 
    817 /*
    818  * Callback routine to propagate the event into a per subscriber queue.
    819  */
    820 static int
    821 evch_subq_deliver(void *evp, void *cookie)
    822 {
    823 	evch_subd_t *p = (evch_subd_t *)cookie;
    824 
    825 	(void) evch_evq_pub(p->sd_queue, evp, EVCH_SLEEP);
    826 	return (EVQ_CONT);
    827 }
    828 
    829 /*
    830  * Call kernel callback routine for sysevent kernel delivery.
    831  */
    832 static int
    833 evch_kern_deliver(void *evp, void *cookie)
    834 {
    835 	sysevent_impl_t	*ev = (sysevent_impl_t *)evp;
    836 	evch_subd_t	*sdp = (evch_subd_t *)cookie;
    837 
    838 	return (sdp->sd_callback(ev, sdp->sd_cbcookie));
    839 }
    840 
    841 /*
    842  * Door upcall for user land sysevent delivery.
    843  */
    844 static int
    845 evch_door_deliver(void *evp, void *cookie)
    846 {
    847 	int		error;
    848 	size_t		size;
    849 	sysevent_impl_t	*ev = (sysevent_impl_t *)evp;
    850 	door_arg_t	darg;
    851 	evch_subd_t	*sdp = (evch_subd_t *)cookie;
    852 	int		nticks = EVCH_MIN_PAUSE;
    853 	uint32_t	retval;
    854 	int		retry = 20;
    855 
    856 	/* Initialize door args */
    857 	size = sizeof (sysevent_impl_t) + SE_PAYLOAD_SZ(ev);
    858 
    859 	darg.rbuf = (char *)&retval;
    860 	darg.rsize = sizeof (retval);
    861 	darg.data_ptr = (char *)ev;
    862 	darg.data_size = size;
    863 	darg.desc_ptr = NULL;
    864 	darg.desc_num = 0;
    865 
    866 	for (;;) {
    867 		if ((error = door_ki_upcall(sdp->sd_door, &darg)) == 0) {
    868 			break;
    869 		}
    870 		switch (error) {
    871 		case EAGAIN:
    872 			/* Cannot deliver event - process may be forking */
    873 			delay(nticks);
    874 			nticks <<= 1;
    875 			if (nticks > EVCH_MAX_PAUSE) {
    876 				nticks = EVCH_MAX_PAUSE;
    877 			}
    878 			if (retry-- <= 0) {
    879 				cmn_err(CE_CONT, "event delivery thread: "
    880 				    "door_ki_upcall error EAGAIN\n");
    881 				return (EVQ_CONT);
    882 			}
    883 			break;
    884 		case EINTR:
    885 		case EBADF:
    886 			/* Process died */
    887 			return (EVQ_SLEEP);
    888 		default:
    889 			cmn_err(CE_CONT,
    890 			    "event delivery thread: door_ki_upcall error %d\n",
    891 			    error);
    892 			return (EVQ_CONT);
    893 		}
    894 	}
    895 	if (retval == EAGAIN) {
    896 		return (EVQ_AGAIN);
    897 	}
    898 	return (EVQ_CONT);
    899 }
    900 
    901 /*
    902  * Callback routine for evch_dl_search() to compare subscriber id's. Used by
    903  * evch_subscribe() and evch_chrdevent_init().
    904  */
    905 static int
    906 evch_subidcmp(evch_dlelem_t *ep, char *s)
    907 {
    908 	return (strcmp(((evch_subd_t *)ep)->sd_ident, s));
    909 }
    910 
    911 /*
    912  * Callback routine for evch_dl_search() to find a subscriber with EVCH_SUB_DUMP
    913  * set (indicated by sub->sd_dump != 0). Used by evch_chrdevent_init() and
    914  * evch_subscribe(). Needs to returns 0 if subscriber with sd_dump set is
    915  * found.
    916  */
    917 /*ARGSUSED1*/
    918 static int
    919 evch_dumpflgcmp(evch_dlelem_t *ep, char *s)
    920 {
    921 	return (((evch_subd_t *)ep)->sd_dump ? 0 : 1);
    922 }
    923 
    924 /*
    925  * Event destructor function. Used to maintain the number of events per channel.
    926  */
    927 /*ARGSUSED*/
    928 static void
    929 evch_destr_event(void *ev, void *ch)
    930 {
    931 	evch_chan_t *chp = (evch_chan_t *)ch;
    932 
    933 	mutex_enter(&chp->ch_pubmx);
    934 	chp->ch_nevents--;
    935 	cv_signal(&chp->ch_pubcv);
    936 	mutex_exit(&chp->ch_pubmx);
    937 }
    938 
    939 /*
    940  * Integer square root according to Newton's iteration.
    941  */
    942 static uint32_t
    943 evch_isqrt(uint64_t n)
    944 {
    945 	uint64_t	x = n >> 1;
    946 	uint64_t	xn = x - 1;
    947 	static uint32_t	lowval[] = { 0, 1, 1, 2 };
    948 
    949 	if (n < 4) {
    950 		return (lowval[n]);
    951 	}
    952 	while (xn < x) {
    953 		x = xn;
    954 		xn = (x + n / x) / 2;
    955 	}
    956 	return ((uint32_t)xn);
    957 }
    958 
    959 /*
    960  * First step sysevent channel initialization. Called when kernel memory
    961  * allocator is initialized.
    962  */
    963 static void
    964 evch_chinit()
    965 {
    966 	size_t k;
    967 
    968 	/*
    969 	 * Calculate limits: max no of channels and max no of events per
    970 	 * channel. The smallest machine with 128 MByte will allow for
    971 	 * >= 8 channels and an upper limit of 2048 events per channel.
    972 	 * The event limit is the number of channels times 256 (hence
    973 	 * the shift factor of 8). These number where selected arbitrarily.
    974 	 */
    975 	k = kmem_maxavail() >> 20;
    976 	evch_channels_max = min(evch_isqrt(k), EVCH_MAX_CHANNELS);
    977 	evch_events_max = evch_channels_max << 8;
    978 
    979 	/*
    980 	 * Will trigger creation of the global zone's evch state.
    981 	 */
    982 	zone_key_create(&evch_zone_key, evch_zoneinit, NULL, evch_zonefree);
    983 }
    984 
    985 /*
    986  * Second step sysevent channel initialization. Called when threads are ready.
    987  */
    988 static void
    989 evch_chinitthr()
    990 {
    991 	struct evch_globals *eg;
    992 	evch_chan_t	*chp;
    993 	evch_subd_t	*sdp;
    994 
    995 	/*
    996 	 * We're early enough in boot that we know that only the global
    997 	 * zone exists; we only need to initialize its threads.
    998 	 */
    999 	eg = zone_getspecific(evch_zone_key, global_zone);
   1000 	ASSERT(eg != NULL);
   1001 
   1002 	for (chp = evch_dl_next(&eg->evch_list, NULL); chp != NULL;
   1003 	    chp = evch_dl_next(&eg->evch_list, chp)) {
   1004 		for (sdp = evch_dl_next(&chp->ch_subscr, NULL); sdp;
   1005 		    sdp = evch_dl_next(&chp->ch_subscr, sdp)) {
   1006 			evch_evq_thrcreate(sdp->sd_queue);
   1007 		}
   1008 		evch_evq_thrcreate(chp->ch_queue);
   1009 	}
   1010 	evq_initcomplete = 1;
   1011 }
   1012 
   1013 /*
   1014  * Sysevent channel bind. Create channel and allocate binding structure.
   1015  */
   1016 static int
   1017 evch_chbind(const char *chnam, evch_bind_t **scpp, uint32_t flags)
   1018 {
   1019 	struct evch_globals *eg;
   1020 	evch_bind_t	*bp;
   1021 	evch_chan_t	*p;
   1022 	char		*chn;
   1023 	size_t		namlen;
   1024 	int		rv;
   1025 
   1026 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
   1027 	ASSERT(eg != NULL);
   1028 
   1029 	/* Create channel if it does not exist */
   1030 	ASSERT(evch_dl_is_init(&eg->evch_list));
   1031 	if ((namlen = strlen(chnam) + 1) > MAX_CHNAME_LEN) {
   1032 		return (EINVAL);
   1033 	}
   1034 	mutex_enter(&eg->evch_list_lock);
   1035 	if ((p = (evch_chan_t *)evch_dl_search(&eg->evch_list, evch_namecmp,
   1036 	    (char *)chnam)) == NULL) {
   1037 		if (flags & EVCH_CREAT) {
   1038 			if (evch_dl_getnum(&eg->evch_list) >=
   1039 			    evch_channels_max) {
   1040 				mutex_exit(&eg->evch_list_lock);
   1041 				return (ENOMEM);
   1042 			}
   1043 			chn = kmem_alloc(namlen, KM_SLEEP);
   1044 			bcopy(chnam, chn, namlen);
   1045 
   1046 			/* Allocate and initialize channel descriptor */
   1047 			p = kmem_zalloc(sizeof (evch_chan_t), KM_SLEEP);
   1048 			p->ch_name = chn;
   1049 			p->ch_namelen = namlen;
   1050 			mutex_init(&p->ch_mutex, NULL, MUTEX_DEFAULT, NULL);
   1051 			p->ch_queue = evch_evq_create();
   1052 			evch_dl_init(&p->ch_subscr);
   1053 			if (evq_initcomplete) {
   1054 				p->ch_uid = crgetuid(curthread->t_cred);
   1055 				p->ch_gid = crgetgid(curthread->t_cred);
   1056 			}
   1057 			cv_init(&p->ch_pubcv, NULL, CV_DEFAULT, NULL);
   1058 			mutex_init(&p->ch_pubmx, NULL, MUTEX_DEFAULT, NULL);
   1059 			p->ch_maxev = min(EVCH_DEFAULT_EVENTS, evch_events_max);
   1060 			p->ch_maxsubscr = EVCH_MAX_SUBSCRIPTIONS;
   1061 			p->ch_maxbinds = evch_bindings_max;
   1062 			p->ch_ctime = gethrestime_sec();
   1063 			if (flags & EVCH_HOLD_PEND) {
   1064 				p->ch_holdpend = 1;
   1065 				evch_evq_stop(p->ch_queue);
   1066 			}
   1067 
   1068 			/* Put new descriptor into channel list */
   1069 			evch_dl_add(&eg->evch_list, (evch_dlelem_t *)p);
   1070 		} else {
   1071 			mutex_exit(&eg->evch_list_lock);
   1072 			return (ENOENT);
   1073 		}
   1074 	}
   1075 
   1076 	/* Check for max binds and create binding */
   1077 	mutex_enter(&p->ch_mutex);
   1078 	if (p->ch_bindings >= p->ch_maxbinds) {
   1079 		rv = ENOMEM;
   1080 		/*
   1081 		 * No need to destroy the channel because this call did not
   1082 		 * create it. Other bindings will be present if ch_maxbinds
   1083 		 * is exceeded.
   1084 		 */
   1085 		goto errorexit;
   1086 	}
   1087 	bp = kmem_alloc(sizeof (evch_bind_t), KM_SLEEP);
   1088 	bp->bd_channel = p;
   1089 	bp->bd_sublst = NULL;
   1090 	p->ch_bindings++;
   1091 	rv = 0;
   1092 	*scpp = bp;
   1093 errorexit:
   1094 	mutex_exit(&p->ch_mutex);
   1095 	mutex_exit(&eg->evch_list_lock);
   1096 	return (rv);
   1097 }
   1098 
   1099 /*
   1100  * Unbind: Free bind structure. Remove channel if last binding was freed.
   1101  */
   1102 static void
   1103 evch_chunbind(evch_bind_t *bp)
   1104 {
   1105 	struct evch_globals *eg;
   1106 	evch_chan_t *chp = bp->bd_channel;
   1107 
   1108 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
   1109 	ASSERT(eg != NULL);
   1110 
   1111 	mutex_enter(&eg->evch_list_lock);
   1112 	mutex_enter(&chp->ch_mutex);
   1113 	ASSERT(chp->ch_bindings > 0);
   1114 	chp->ch_bindings--;
   1115 	kmem_free(bp, sizeof (evch_bind_t));
   1116 	if (chp->ch_bindings == 0 && evch_dl_getnum(&chp->ch_subscr) == 0) {
   1117 		/*
   1118 		 * No more bindings or persistent subscriber, destroy channel.
   1119 		 */
   1120 		mutex_exit(&chp->ch_mutex);
   1121 		evch_dl_del(&eg->evch_list, &chp->ch_link);
   1122 		evch_evq_destroy(chp->ch_queue);
   1123 		mutex_destroy(&chp->ch_mutex);
   1124 		mutex_destroy(&chp->ch_pubmx);
   1125 		cv_destroy<