Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)log_sysevent.c	1.20	07/01/24 SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/errno.h>
     30 #include <sys/stropts.h>
     31 #include <sys/debug.h>
     32 #include <sys/ddi.h>
     33 #include <sys/sunddi.h>
     34 #include <sys/vmem.h>
     35 #include <sys/cmn_err.h>
     36 #include <sys/callb.h>
     37 #include <sys/sysevent.h>
     38 #include <sys/sysevent_impl.h>
     39 #include <sys/modctl.h>
     40 #include <sys/sysmacros.h>
     41 #include <sys/disp.h>
     42 #include <sys/autoconf.h>
     43 #include <sys/atomic.h>
     44 #include <sys/sdt.h>
     45 
     46 /* for doors */
     47 #include <sys/pathname.h>
     48 #include <sys/door.h>
     49 #include <sys/kmem.h>
     50 #include <sys/cpuvar.h>
     51 #include <sys/fs/snode.h>
     52 
     53 /*
     54  * log_sysevent.c - Provides the interfaces for kernel event publication
     55  *			to the sysevent event daemon (syseventd).
     56  */
     57 
     58 /*
     59  * Debug stuff
     60  */
     61 static int log_event_debug = 0;
     62 #define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
     63 #ifdef DEBUG
     64 #define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
     65 #else
     66 #define	LOG_DEBUG1(args)
     67 #endif
     68 
     69 /*
     70  * Local static vars
     71  */
     72 /* queue of event buffers sent to syseventd */
     73 static log_eventq_t *log_eventq_sent = NULL;
     74 
     75 /*
     76  * Count of event buffers in the queue
     77  */
     78 int log_eventq_cnt = 0;
     79 
     80 /* queue of event buffers awaiting delivery to syseventd */
     81 static log_eventq_t *log_eventq_head = NULL;
     82 static log_eventq_t *log_eventq_tail = NULL;
     83 static uint64_t kernel_event_id = 0;
     84 static int encoding = NV_ENCODE_NATIVE;
     85 
     86 /* log event delivery flag */
     87 #define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
     88 #define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
     89 #define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
     90 
     91 /*
     92  * Tunable maximum event buffer queue size. Size depends on how many events
     93  * the queue must hold when syseventd is not available, for example during
     94  * system startup. Experience showed that more than 2000 events could be posted
     95  * due to correctable memory errors.
     96  */
     97 int logevent_max_q_sz = 5000;
     98 
     99 
    100 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
    101 static char *logevent_door_upcall_filename = NULL;
    102 static int logevent_door_upcall_filename_size;
    103 
    104 static door_handle_t event_door = NULL;		/* Door for upcalls */
    105 
    106 /*
    107  * async thread-related variables
    108  *
    109  * eventq_head_mutex - synchronizes access to the kernel event queue
    110  *
    111  * eventq_sent_mutex - synchronizes access to the queue of event sents to
    112  *			userlevel
    113  *
    114  * log_event_cv - condition variable signaled when an event has arrived or
    115  *			userlevel ready to process event buffers
    116  *
    117  * async_thread - asynchronous event delivery thread to userlevel daemon.
    118  *
    119  * sysevent_upcall_status - status of the door upcall link
    120  */
    121 static kmutex_t eventq_head_mutex;
    122 static kmutex_t eventq_sent_mutex;
    123 static kcondvar_t log_event_cv;
    124 static kthread_id_t async_thread = NULL;
    125 
    126 static kmutex_t event_qfull_mutex;
    127 static kcondvar_t event_qfull_cv;
    128 static int event_qfull_blocked = 0;
    129 
    130 static int sysevent_upcall_status = -1;
    131 static kmutex_t registered_channel_mutex;
    132 
    133 /*
    134  * Indicates the syseventd daemon has begun taking events
    135  */
    136 int sysevent_daemon_init = 0;
    137 
    138 /*
    139  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
    140  * caused by the server process doing a forkall().  Since all threads
    141  * but the thread actually doing the forkall() need to be quiesced,
    142  * the fork may take some time.  The min/max pause are in units
    143  * of clock ticks.
    144  */
    145 #define	LOG_EVENT_MIN_PAUSE	8
    146 #define	LOG_EVENT_MAX_PAUSE	128
    147 
    148 static kmutex_t	event_pause_mutex;
    149 static kcondvar_t event_pause_cv;
    150 static int event_pause_state = 0;
    151 
    152 /*
    153  * log_event_upcall_lookup - Establish door connection with user event
    154  *				daemon (syseventd)
    155  */
    156 static int
    157 log_event_upcall_lookup()
    158 {
    159 	int	error;
    160 
    161 	if (event_door) {	/* Release our previous hold (if any) */
    162 		door_ki_rele(event_door);
    163 	}
    164 
    165 	event_door = NULL;
    166 
    167 	/*
    168 	 * Locate the door used for upcalls
    169 	 */
    170 	if ((error =
    171 	    door_ki_open(logevent_door_upcall_filename, &event_door)) != 0) {
    172 		return (error);
    173 	}
    174 
    175 	return (0);
    176 }
    177 
    178 
    179 /*ARGSUSED*/
    180 static void
    181 log_event_busy_timeout(void *arg)
    182 {
    183 	mutex_enter(&event_pause_mutex);
    184 	event_pause_state = 0;
    185 	cv_signal(&event_pause_cv);
    186 	mutex_exit(&event_pause_mutex);
    187 }
    188 
    189 static void
    190 log_event_pause(int nticks)
    191 {
    192 	timeout_id_t id;
    193 
    194 	/*
    195 	 * Only one use of log_event_pause at a time
    196 	 */
    197 	ASSERT(event_pause_state == 0);
    198 
    199 	event_pause_state = 1;
    200 	id = timeout(log_event_busy_timeout, NULL, nticks);
    201 	if (id != 0) {
    202 		mutex_enter(&event_pause_mutex);
    203 		while (event_pause_state)
    204 			cv_wait(&event_pause_cv, &event_pause_mutex);
    205 		mutex_exit(&event_pause_mutex);
    206 	}
    207 	event_pause_state = 0;
    208 }
    209 
    210 
    211 /*
    212  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
    213  * 			Check for rebinding errors
    214  * 			This buffer is reused to by the syseventd door_return
    215  *			to hold the result code
    216  */
    217 static int
    218 log_event_upcall(log_event_upcall_arg_t *arg)
    219 {
    220 	int error;
    221 	size_t size;
    222 	sysevent_t *ev;
    223 	door_arg_t darg, save_arg;
    224 	int retry;
    225 	int neagain = 0;
    226 	int neintr = 0;
    227 	int nticks = LOG_EVENT_MIN_PAUSE;
    228 
    229 	/* Initialize door args */
    230 	ev = (sysevent_t *)&arg->buf;
    231 	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
    232 
    233 	darg.rbuf = (char *)arg;
    234 	darg.data_ptr = (char *)arg;
    235 	darg.rsize = size;
    236 	darg.data_size = size;
    237 	darg.desc_ptr = NULL;
    238 	darg.desc_num = 0;
    239 
    240 	if ((event_door == NULL) &&
    241 	    ((error = log_event_upcall_lookup()) != 0)) {
    242 		LOG_DEBUG((CE_CONT,
    243 		    "log_event_upcall: event_door error (%d)\n", error));
    244 
    245 		return (error);
    246 	}
    247 
    248 	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
    249 	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
    250 
    251 	save_arg = darg;
    252 	for (retry = 0; ; retry++) {
    253 		if ((error = door_ki_upcall(event_door, &darg)) == 0) {
    254 			break;
    255 		}
    256 		switch (error) {
    257 		case EINTR:
    258 			neintr++;
    259 			log_event_pause(2);
    260 			darg = save_arg;
    261 			break;
    262 		case EAGAIN:
    263 			/* cannot deliver upcall - process may be forking */
    264 			neagain++;
    265 			log_event_pause(nticks);
    266 			nticks <<= 1;
    267 			if (nticks > LOG_EVENT_MAX_PAUSE)
    268 				nticks = LOG_EVENT_MAX_PAUSE;
    269 			darg = save_arg;
    270 			break;
    271 		case EBADF:
    272 			LOG_DEBUG((CE_CONT, "log_event_upcall: rebinding\n"));
    273 			/* Server may have died. Try rebinding */
    274 			if ((error = log_event_upcall_lookup()) != 0) {
    275 				LOG_DEBUG((CE_CONT,
    276 				    "log_event_upcall: lookup error %d\n",
    277 				    error));
    278 				return (EBADF);
    279 			}
    280 			if (retry > 4) {
    281 				LOG_DEBUG((CE_CONT,
    282 					"log_event_upcall: ebadf\n"));
    283 				return (EBADF);
    284 			}
    285 			LOG_DEBUG((CE_CONT, "log_event_upcall: "
    286 				"retrying upcall after lookup\n"));
    287 			darg = save_arg;
    288 			break;
    289 		default:
    290 			cmn_err(CE_CONT,
    291 			    "log_event_upcall: door_ki_upcall error %d\n",
    292 			    error);
    293 			return (error);
    294 		}
    295 	}
    296 
    297 	if (neagain > 0 || neintr > 0) {
    298 		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
    299 			neagain, neintr, nticks));
    300 	}
    301 
    302 	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
    303 		"error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
    304 		error, (void *)arg, (void *)darg.rbuf,
    305 		(void *)darg.data_ptr,
    306 		*((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
    307 
    308 	if (!error) {
    309 		/*
    310 		 * upcall was successfully executed. Check return code.
    311 		 */
    312 		error = *((int *)(darg.rbuf));
    313 	}
    314 
    315 	return (error);
    316 }
    317 
    318 /*
    319  * log_event_deliver - event delivery thread
    320  *			Deliver all events on the event queue to syseventd.
    321  *			If the daemon can not process events, stop event
    322  *			delivery and wait for an indication from the
    323  *			daemon to resume delivery.
    324  *
    325  *			Once all event buffers have been delivered, wait
    326  *			until there are more to deliver.
    327  */
    328 static void
    329 log_event_deliver()
    330 {
    331 	log_eventq_t *q;
    332 	int upcall_err;
    333 	callb_cpr_t cprinfo;
    334 
    335 	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
    336 				"logevent");
    337 
    338 	/*
    339 	 * eventq_head_mutex is exited (released) when there are no more
    340 	 * events to process from the eventq in cv_wait().
    341 	 */
    342 	mutex_enter(&eventq_head_mutex);
    343 
    344 	for (;;) {
    345 		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
    346 		    (void *)log_eventq_head));
    347 
    348 		upcall_err = 0;
    349 		q = log_eventq_head;
    350 
    351 		while (q) {
    352 			log_eventq_t *next;
    353 
    354 			/*
    355 			 * Release event queue lock during upcall to
    356 			 * syseventd
    357 			 */
    358 			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
    359 				upcall_err = EAGAIN;
    360 				break;
    361 			}
    362 
    363 			mutex_exit(&eventq_head_mutex);
    364 			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
    365 				mutex_enter(&eventq_head_mutex);
    366 				break;
    367 			}
    368 
    369 			/*
    370 			 * We may be able to add entries to
    371 			 * the queue now.
    372 			 */
    373 			if (event_qfull_blocked > 0 &&
    374 			    log_eventq_cnt < logevent_max_q_sz) {
    375 				mutex_enter(&event_qfull_mutex);
    376 				if (event_qfull_blocked > 0) {
    377 					cv_signal(&event_qfull_cv);
    378 				}
    379 				mutex_exit(&event_qfull_mutex);
    380 			}
    381 
    382 			mutex_enter(&eventq_head_mutex);
    383 
    384 			/*
    385 			 * Daemon restart can cause entries to be moved from
    386 			 * the sent queue and put back on the event queue.
    387 			 * If this has occurred, replay event queue
    388 			 * processing from the new queue head.
    389 			 */
    390 			if (q != log_eventq_head) {
    391 				q = log_eventq_head;
    392 				LOG_DEBUG((CE_CONT, "log_event_deliver: "
    393 				    "door upcall/daemon restart race\n"));
    394 			} else {
    395 				/*
    396 				 * Move the event to the sent queue when a
    397 				 * successful delivery has been made.
    398 				 */
    399 				mutex_enter(&eventq_sent_mutex);
    400 				next = q->next;
    401 				q->next = log_eventq_sent;
    402 				log_eventq_sent = q;
    403 				q = next;
    404 				log_eventq_head = q;
    405 				log_eventq_cnt--;
    406 				if (q == NULL) {
    407 					ASSERT(log_eventq_cnt == 0);
    408 					log_eventq_tail = NULL;
    409 				}
    410 				mutex_exit(&eventq_sent_mutex);
    411 			}
    412 		}
    413 
    414 		switch (upcall_err) {
    415 		case 0:
    416 			/*
    417 			 * Success. The queue is empty.
    418 			 */
    419 			sysevent_upcall_status = 0;
    420 			break;
    421 		case EAGAIN:
    422 			/*
    423 			 * Delivery is on hold (but functional).
    424 			 */
    425 			sysevent_upcall_status = 0;
    426 			/*
    427 			 * If the user has already signaled for delivery
    428 			 * resumption, continue.  Otherwise, we wait until
    429 			 * we are signaled to continue.
    430 			 */
    431 			if (log_event_delivery == LOGEVENT_DELIVERY_CONT) {
    432 				log_event_delivery = LOGEVENT_DELIVERY_OK;
    433 				continue;
    434 			} else {
    435 				log_event_delivery = LOGEVENT_DELIVERY_HOLD;
    436 			}
    437 
    438 			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
    439 			break;
    440 		default:
    441 			LOG_DEBUG((CE_CONT, "log_event_deliver: "
    442 				"upcall err %d\n", upcall_err));
    443 			sysevent_upcall_status = upcall_err;
    444 			/*
    445 			 * Signal everyone waiting that transport is down
    446 			 */
    447 			if (event_qfull_blocked > 0) {
    448 				mutex_enter(&event_qfull_mutex);
    449 				if (event_qfull_blocked > 0) {
    450 					cv_broadcast(&event_qfull_cv);
    451 				}
    452 				mutex_exit(&event_qfull_mutex);
    453 			}
    454 			break;
    455 		}
    456 
    457 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
    458 		cv_wait(&log_event_cv, &eventq_head_mutex);
    459 		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
    460 	}
    461 	/* NOTREACHED */
    462 }
    463 
    464 /*
    465  * log_event_init - Allocate and initialize log_event data structures.
    466  */
    467 void
    468 log_event_init()
    469 {
    470 	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
    471 	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
    472 	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
    473 
    474 	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
    475 	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
    476 
    477 	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
    478 	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
    479 
    480 	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
    481 	sysevent_evc_init();
    482 }
    483 
    484 /*
    485  * The following routines are used by kernel event publishers to
    486  * allocate, append and free event buffers
    487  */
    488 /*
    489  * sysevent_alloc - Allocate new eventq struct.  This element contains
    490  *			an event buffer that will be used in a subsequent
    491  *			call to log_sysevent.
    492  */
    493 sysevent_t *
    494 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
    495 {
    496 	int payload_sz;
    497 	int class_sz, subclass_sz, pub_sz;
    498 	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
    499 	sysevent_t *ev;
    500 	log_eventq_t *q;
    501 
    502 	ASSERT(class != NULL);
    503 	ASSERT(subclass != NULL);
    504 	ASSERT(pub != NULL);
    505 
    506 	/*
    507 	 * Calculate and reserve space for the class, subclass and
    508 	 * publisher strings in the event buffer
    509 	 */
    510 	class_sz = strlen(class) + 1;
    511 	subclass_sz = strlen(subclass) + 1;
    512 	pub_sz = strlen(pub) + 1;
    513 
    514 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
    515 	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
    516 
    517 	/* String sizes must be 64-bit aligned in the event buffer */
    518 	aligned_class_sz = SE_ALIGN(class_sz);
    519 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
    520 	aligned_pub_sz = SE_ALIGN(pub_sz);
    521 
    522 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
    523 		(aligned_subclass_sz - sizeof (uint64_t)) +
    524 		(aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
    525 
    526 	/*
    527 	 * Allocate event buffer plus additional sysevent queue
    528 	 * and payload overhead.
    529 	 */
    530 	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
    531 	if (q == NULL) {
    532 		return (NULL);
    533 	}
    534 
    535 	/* Initialize the event buffer data */
    536 	ev = (sysevent_t *)&q->arg.buf;
    537 	SE_VERSION(ev) = SYS_EVENT_VERSION;
    538 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
    539 
    540 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
    541 		+ aligned_class_sz;
    542 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
    543 
    544 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
    545 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
    546 
    547 	SE_ATTR_PTR(ev) = UINT64_C(0);
    548 	SE_PAYLOAD_SZ(ev) = payload_sz;
    549 
    550 	return (ev);
    551 }
    552 
    553 /*
    554  * sysevent_free - Free event buffer and any attribute data.
    555  */
    556 void
    557 sysevent_free(sysevent_t *ev)
    558 {
    559 	log_eventq_t *q;
    560 	nvlist_t *nvl;
    561 
    562 	ASSERT(ev != NULL);
    563 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
    564 	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
    565 
    566 	if (nvl != NULL) {
    567 		size_t size = 0;
    568 		(void) nvlist_size(nvl, &size, encoding);
    569 		SE_PAYLOAD_SZ(ev) -= size;
    570 		nvlist_free(nvl);
    571 	}
    572 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
    573 }
    574 
    575 /*
    576  * free_packed_event - Free packed event buffer
    577  */
    578 static void
    579 free_packed_event(sysevent_t *ev)
    580 {
    581 	log_eventq_t *q;
    582 
    583 	ASSERT(ev != NULL);
    584 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
    585 
    586 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
    587 }
    588 
    589 /*
    590  * sysevent_add_attr - Add new attribute element to an event attribute list
    591  *			If attribute list is NULL, start a new list.
    592  */
    593 int
    594 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
    595 	sysevent_value_t *se_value, int flag)
    596 {
    597 	int error;
    598 	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
    599 
    600 	if (nvlp == NULL || se_value == NULL) {
    601 		return (SE_EINVAL);
    602 	}
    603 
    604 	/*
    605 	 * attr_sz is composed of the value data size + the name data size +
    606 	 * any header data.  64-bit aligned.
    607 	 */
    608 	if (strlen(name) >= MAX_ATTR_NAME) {
    609 		return (SE_EINVAL);
    610 	}
    611 
    612 	/*
    613 	 * Allocate nvlist
    614 	 */
    615 	if ((*nvlp == NULL) &&
    616 	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
    617 		return (SE_ENOMEM);
    618 
    619 	/* add the attribute */
    620 	switch (se_value->value_type) {
    621 	case SE_DATA_TYPE_BYTE:
    622 		error = nvlist_add_byte(*ev_attr_list, name,
    623 		    se_value->value.sv_byte);
    624 		break;
    625 	case SE_DATA_TYPE_INT16:
    626 		error = nvlist_add_int16(*ev_attr_list, name,
    627 		    se_value->value.sv_int16);
    628 		break;
    629 	case SE_DATA_TYPE_UINT16:
    630 		error = nvlist_add_uint16(*ev_attr_list, name,
    631 		    se_value->value.sv_uint16);
    632 		break;
    633 	case SE_DATA_TYPE_INT32:
    634 		error = nvlist_add_int32(*ev_attr_list, name,
    635 		    se_value->value.sv_int32);
    636 		break;
    637 	case SE_DATA_TYPE_UINT32:
    638 		error = nvlist_add_uint32(*ev_attr_list, name,
    639 		    se_value->value.sv_uint32);
    640 		break;
    641 	case SE_DATA_TYPE_INT64:
    642 		error = nvlist_add_int64(*ev_attr_list, name,
    643 		    se_value->value.sv_int64);
    644 		break;
    645 	case SE_DATA_TYPE_UINT64:
    646 		error = nvlist_add_uint64(*ev_attr_list, name,
    647 		    se_value->value.sv_uint64);
    648 		break;
    649 	case SE_DATA_TYPE_STRING:
    650 		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
    651 			return (SE_EINVAL);
    652 		error = nvlist_add_string(*ev_attr_list, name,
    653 		    se_value->value.sv_string);
    654 		break;
    655 	case SE_DATA_TYPE_BYTES:
    656 		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
    657 			return (SE_EINVAL);
    658 		error = nvlist_add_byte_array(*ev_attr_list, name,
    659 		    se_value->value.sv_bytes.data,
    660 		    se_value->value.sv_bytes.size);
    661 		break;
    662 	case SE_DATA_TYPE_TIME:
    663 		error = nvlist_add_hrtime(*ev_attr_list, name,
    664 		    se_value->value.sv_time);
    665 		break;
    666 	default:
    667 		return (SE_EINVAL);
    668 	}
    669 
    670 	return (error ? SE_ENOMEM : 0);
    671 }
    672 
    673 /*
    674  * sysevent_free_attr - Free an attribute list not associated with an
    675  *			event buffer.
    676  */
    677 void
    678 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
    679 {
    680 	nvlist_free((nvlist_t *)ev_attr_list);
    681 }
    682 
    683 /*
    684  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
    685  *
    686  *	This data will be re-packed into contiguous memory when the event
    687  *	buffer is posted to log_sysevent.
    688  */
    689 int
    690 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
    691 {
    692 	size_t size = 0;
    693 
    694 	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
    695 		return (SE_EINVAL);
    696 	}
    697 
    698 	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
    699 	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
    700 	SE_PAYLOAD_SZ(ev) += size;
    701 	SE_FLAG(ev) = 0;
    702 
    703 	return (0);
    704 }
    705 
    706 /*
    707  * sysevent_detach_attributes - Detach but don't free attribute list from the
    708  *				event buffer.
    709  */
    710 void
    711 sysevent_detach_attributes(sysevent_t *ev)
    712 {
    713 	size_t size = 0;
    714 	nvlist_t *nvl;
    715 
    716 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
    717 		return;
    718 	}
    719 
    720 	SE_ATTR_PTR(ev) = UINT64_C(0);
    721 	(void) nvlist_size(nvl, &size, encoding);
    722 	SE_PAYLOAD_SZ(ev) -= size;
    723 	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
    724 }
    725 
    726 /*
    727  * sysevent_attr_name - Get name of attribute
    728  */
    729 char *
    730 sysevent_attr_name(sysevent_attr_t *attr)
    731 {
    732 	if (attr == NULL) {
    733 		return (NULL);
    734 	}
    735 
    736 	return (nvpair_name(attr));
    737 }
    738 
    739 /*
    740  * sysevent_attr_type - Get type of attribute
    741  */
    742 int
    743 sysevent_attr_type(sysevent_attr_t *attr)
    744 {
    745 	/*
    746 	 * The SE_DATA_TYPE_* are typedef'ed to be the
    747 	 * same value as DATA_TYPE_*
    748 	 */
    749 	return (nvpair_type((nvpair_t *)attr));
    750 }
    751 
    752 /*
    753  * Repack event buffer into contiguous memory
    754  */
    755 static sysevent_t *
    756 se_repack(sysevent_t *ev, int flag)
    757 {
    758 	size_t copy_len;
    759 	caddr_t attr;
    760 	size_t size;
    761 	uint64_t attr_offset;
    762 	sysevent_t *copy;
    763 	log_eventq_t *qcopy;
    764 	sysevent_attr_list_t *nvl;
    765 
    766 	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
    767 	qcopy = kmem_zalloc(copy_len, flag);
    768 	if (qcopy == NULL) {
    769 		return (NULL);
    770 	}
    771 	copy = (sysevent_t *)&qcopy->arg.buf;
    772 
    773 	/*
    774 	 * Copy event header, class, subclass and publisher names
    775 	 * Set the attribute offset (in number of bytes) to contiguous
    776 	 * memory after the header.
    777 	 */
    778 
    779 	attr_offset = SE_ATTR_OFF(ev);
    780 
    781 	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
    782 
    783 	bcopy(ev, copy, attr_offset);
    784 
    785 	/* Check if attribute list exists */
    786 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
    787 		return (copy);
    788 	}
    789 
    790 	/*
    791 	 * Copy attribute data to contiguous memory
    792 	 */
    793 	attr = (char *)copy + attr_offset;
    794 	(void) nvlist_size(nvl, &size, encoding);
    795 	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
    796 		kmem_free(qcopy, copy_len);
    797 		return (NULL);
    798 	}
    799 	SE_ATTR_PTR(copy) = UINT64_C(0);
    800 	SE_FLAG(copy) = SE_PACKED_BUF;
    801 
    802 	return (copy);
    803 }
    804 
    805 /*
    806  * The sysevent registration provides a persistent and reliable database
    807  * for channel information for sysevent channel publishers and
    808  * subscribers.
    809  *
    810  * A channel is created and maintained by the kernel upon the first
    811  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
    812  * event subscription information is updated as publishers or subscribers
    813  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
    814  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
    815  *
    816  * For consistency, id's are assigned for every publisher or subscriber
    817  * bound to a particular channel.  The id's are used to constrain resources
    818  * and perform subscription lookup.
    819  *
    820  * Associated with each channel is a hashed list of the current subscriptions
    821  * based upon event class and subclasses.  A subscription contains a class name,
    822  * list of possible subclasses and an array of subscriber ids.  Subscriptions
    823  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
    824  *
    825  * Channels are closed once the last subscriber or publisher performs a
    826  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
    827  * channel are freed upon last close.
    828  *
    829  * Locking:
    830  *	Every operation to log_sysevent() is protected by a single lock,
    831  *	registered_channel_mutex.  It is expected that the granularity of
    832  *	a single lock is sufficient given the frequency that updates will
    833  *	occur.
    834  *
    835  *	If this locking strategy proves to be too contentious, a per-hash
    836  *	or per-channel locking strategy may be implemented.
    837  */
    838 
    839 
    840 #define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
    841 					% CHAN_HASH_SZ)
    842 
    843 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
    844 static int channel_cnt;
    845 static void remove_all_class(sysevent_channel_descriptor_t *chan,
    846 	uint32_t sub_id);
    847 
    848 static uint32_t
    849 hash_func(const char *s)
    850 {
    851 	uint32_t result = 0;
    852 	uint_t g;
    853 
    854 	while (*s != '\0') {
    855 		result <<= 4;
    856 		result += (uint32_t)*s++;
    857 		g = result & 0xf0000000;
    858 		if (g != 0) {
    859 			result ^= g >> 24;
    860 			result ^= g;
    861 		}
    862 	}
    863 
    864 	return (result);
    865 }
    866 
    867 static sysevent_channel_descriptor_t *
    868 get_channel(char *channel_name)
    869 {
    870 	int hash_index;
    871 	sysevent_channel_descriptor_t *chan_list;
    872 
    873 	if (channel_name == NULL)
    874 		return (NULL);
    875 
    876 	/* Find channel descriptor */
    877 	hash_index = CHANN_HASH(channel_name);
    878 	chan_list = registered_channels[hash_index];
    879 	while (chan_list != NULL) {
    880 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
    881 			break;
    882 		} else {
    883 			chan_list = chan_list->scd_next;
    884 		}
    885 	}
    886 
    887 	return (chan_list);
    888 }
    889 
    890 static class_lst_t *
    891 create_channel_registration(sysevent_channel_descriptor_t *chan,
    892     char *event_class, int index)
    893 {
    894 	size_t class_len;
    895 	class_lst_t *c_list;
    896 
    897 	class_len = strlen(event_class) + 1;
    898 	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
    899 	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
    900 	bcopy(event_class, c_list->cl_name, class_len);
    901 
    902 	c_list->cl_subclass_list =
    903 	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
    904 	c_list->cl_subclass_list->sl_name =
    905 	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
    906 	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
    907 	    sizeof (EC_SUB_ALL));
    908 
    909 	c_list->cl_next = chan->scd_class_list_tbl[index];
    910 	chan->scd_class_list_tbl[index] = c_list;
    911 
    912 	return (c_list);
    913 }
    914 
    915 static void
    916 free_channel_registration(sysevent_channel_descriptor_t *chan)
    917 {
    918 	int i;
    919 	class_lst_t *clist, *next_clist;
    920 	subclass_lst_t *sclist, *next_sc;
    921 
    922 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
    923 
    924 		clist = chan->scd_class_list_tbl[i];
    925 		while (clist != NULL) {
    926 			sclist = clist->cl_subclass_list;
    927 			while (sclist != NULL) {
    928 				kmem_free(sclist->sl_name,
    929 				    strlen(sclist->sl_name) + 1);
    930 				next_sc = sclist->sl_next;
    931 				kmem_free(sclist, sizeof (subclass_lst_t));
    932 				sclist = next_sc;
    933 			}
    934 			kmem_free(clist->cl_name,
    935 			    strlen(clist->cl_name) + 1);
    936 			next_clist = clist->cl_next;
    937 			kmem_free(clist, sizeof (class_lst_t));
    938 			clist = next_clist;
    939 		}
    940 	}
    941 	chan->scd_class_list_tbl[0] = NULL;
    942 }
    943 
    944 static int
    945 open_channel(char *channel_name)
    946 {
    947 	int hash_index;
    948 	sysevent_channel_descriptor_t *chan, *chan_list;
    949 
    950 
    951 	if (channel_cnt > MAX_CHAN) {
    952 		return (-1);
    953 	}
    954 
    955 	/* Find channel descriptor */
    956 	hash_index = CHANN_HASH(channel_name);
    957 	chan_list = registered_channels[hash_index];
    958 	while (chan_list != NULL) {
    959 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
    960 			chan_list->scd_ref_cnt++;
    961 			kmem_free(channel_name, strlen(channel_name) + 1);
    962 			return (0);
    963 		} else {
    964 			chan_list = chan_list->scd_next;
    965 		}
    966 	}
    967 
    968 
    969 	/* New channel descriptor */
    970 	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
    971 	chan->scd_channel_name = channel_name;
    972 
    973 	/*
    974 	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
    975 	 * Subscriber id 0 is never allocated, but is used as a reserved id
    976 	 * by libsysevent
    977 	 */
    978 	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
    979 	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
    980 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
    981 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
    982 		return (-1);
    983 	}
    984 	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
    985 	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
    986 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
    987 		vmem_destroy(chan->scd_subscriber_cache);
    988 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
    989 		return (-1);
    990 	}
    991 
    992 	chan->scd_ref_cnt = 1;
    993 
    994 	(void) create_channel_registration(chan, EC_ALL, 0);
    995 
    996 	if (registered_channels[hash_index] != NULL)
    997 		chan->scd_next = registered_channels[hash_index];
    998 
    999 	registered_channels[hash_index] = chan;
   1000 
   1001 	++channel_cnt;
   1002 
   1003 	return (0);
   1004 }
   1005 
   1006 static void
   1007 close_channel(char *channel_name)
   1008 {
   1009 	int hash_index;
   1010 	sysevent_channel_descriptor_t *chan, *prev_chan;
   1011 
   1012 	/* Find channel descriptor */
   1013 	hash_index = CHANN_HASH(channel_name);
   1014 	prev_chan = chan = registered_channels[hash_index];
   1015 
   1016 	while (chan != NULL) {
   1017 		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
   1018 			break;
   1019 		} else {
   1020 			prev_chan = chan;
   1021 			chan = chan->scd_next;
   1022 		}
   1023 	}
   1024 
   1025 	if (chan == NULL)
   1026 		return;
   1027 
   1028 	chan->scd_ref_cnt--;
   1029 	if (chan->scd_ref_cnt > 0)
   1030 		return;
   1031 
   1032 	free_channel_registration(chan);
   1033 	vmem_destroy(chan->scd_subscriber_cache);
   1034 	vmem_destroy(chan->scd_publisher_cache);
   1035 	kmem_free(chan->scd_channel_name,
   1036 	    strlen(chan->scd_channel_name) + 1);
   1037 	if (registered_channels[hash_index] == chan)
   1038 		registered_channels[hash_index] = chan->scd_next;
   1039 	else
   1040 		prev_chan->scd_next = chan->scd_next;
   1041 	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
   1042 	--channel_cnt;
   1043 }
   1044 
   1045 static id_t
   1046 bind_common(sysevent_channel_descriptor_t *chan, int type)
   1047 {
   1048 	id_t id;
   1049 
   1050 	if (type == SUBSCRIBER) {
   1051 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
   1052 		    VM_NOSLEEP | VM_NEXTFIT);
   1053 		if (id <= 0 || id > MAX_SUBSCRIBERS)
   1054 			return (0);
   1055 		chan->scd_subscriber_ids[id] = 1;
   1056 	} else {
   1057 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
   1058 		    VM_NOSLEEP | VM_NEXTFIT);
   1059 		if (id <= 0 || id > MAX_PUBLISHERS)
   1060 			return (0);
   1061 		chan->scd_publisher_ids[id] = 1;
   1062 	}
   1063 
   1064 	return (id);
   1065 }
   1066 
   1067 static int
   1068 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
   1069 {
   1070 	if (type == SUBSCRIBER) {
   1071 		if (id <= 0 || id > MAX_SUBSCRIBERS)
   1072 			return (0);
   1073 		if (chan->scd_subscriber_ids[id] == 0)
   1074 			return (0);
   1075 		(void) remove_all_class(chan, id);
   1076 		chan->scd_subscriber_ids[id] = 0;
   1077 		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
   1078 	} else {
   1079 		if (id <= 0 || id > MAX_PUBLISHERS)
   1080 			return (0);
   1081 		if (chan->scd_publisher_ids[id] == 0)
   1082 			return (0);
   1083 		chan->scd_publisher_ids[id] = 0;
   1084 		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
   1085 	}
   1086 
   1087 	return (1);
   1088 }
   1089 
   1090 static void
   1091 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
   1092 {
   1093 	if (unbind_common(chan, type, id))
   1094 		close_channel(chan->scd_channel_name);
   1095 }
   1096 
   1097 static subclass_lst_t *
   1098 find_subclass(class_lst_t *c_list, char *subclass)
   1099 {
   1100 	subclass_lst_t *sc_list;
   1101 
   1102 	if (c_list == NULL)
   1103 		return (NULL);
   1104 
   1105 	sc_list = c_list->cl_subclass_list;
   1106 
   1107 	while (sc_list != NULL) {
   1108 		if (strcmp(sc_list->sl_name, subclass) == 0) {
   1109 			return (sc_list);
   1110 		}
   1111 		sc_list = sc_list->sl_next;
   1112 	}
   1113 
   1114 	return (NULL);
   1115 }
   1116 
   1117 static void
   1118 insert_subclass(class_lst_t *c_list, char **subclass_names,
   1119 	int subclass_num, uint32_t sub_id)
   1120 {
   1121 	int i, subclass_sz;
   1122 	subclass_lst_t *sc_list;
   1123 
   1124 	for (i = 0; i < subclass_num; ++i) {
   1125 		if ((sc_list = find_subclass(c_list, subclass_names[i]))
   1126 		    != NULL) {
   1127 			sc_list->sl_num[sub_id] = 1;
   1128 		} else {
   1129 
   1130 			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
   1131 			    KM_SLEEP);
   1132 			subclass_sz = strlen(subclass_names[i]) + 1;
   1133 			sc_list->sl_name = kmem_zalloc(