Home | History | Annotate | Download | only in startd
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * restarter.c - service manipulation
     28  *
     29  * This component manages services whose restarter is svc.startd, the standard
     30  * restarter.  It translates restarter protocol events from the graph engine
     31  * into actions on processes, as a delegated restarter would do.
     32  *
     33  * The master restarter manages a number of always-running threads:
     34  *   - restarter event thread: events from the graph engine
     35  *   - timeout thread: thread to fire queued timeouts
     36  *   - contract thread: thread to handle contract events
     37  *   - wait thread: thread to handle wait-based services
     38  *
     39  * The other threads are created as-needed:
     40  *   - per-instance method threads
     41  *   - per-instance event processing threads
     42  *
     43  * The interaction of all threads must result in the following conditions
     44  * being satisfied (on a per-instance basis):
     45  *   - restarter events must be processed in order
     46  *   - method execution must be serialized
     47  *   - instance delete must be held until outstanding methods are complete
     48  *   - contract events shouldn't be processed while a method is running
     49  *   - timeouts should fire even when a method is running
     50  *
     51  * Service instances are represented by restarter_inst_t's and are kept in the
     52  * instance_list list.
     53  *
     54  * Service States
     55  *   The current state of a service instance is kept in
     56  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
     57  *   some time, then before we effect the transition we set
     58  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
     59  *   rotate i_next_state to i_state and set i_next_state to
     60  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
     61  *   held.  The exception is when we launch methods, which are done with
     62  *   a separate thread.  To keep any other threads from grabbing ri_lock before
     63  *   method_thread() does, we set ri_method_thread to the thread id of the
     64  *   method thread, and when it is nonzero any thread with a different thread id
     65  *   waits on ri_method_cv.
     66  *
     67  * Method execution is serialized by blocking on ri_method_cv in
     68  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
     69  * also prevents the instance structure from being deleted until all
     70  * outstanding operations such as method_thread() have finished.
     71  *
     72  * Lock ordering:
     73  *
     74  * dgraph_lock [can be held when taking:]
     75  *   utmpx_lock
     76  *   dictionary->dict_lock
     77  *   st->st_load_lock
     78  *   wait_info_lock
     79  *   ru->restarter_update_lock
     80  *     restarter_queue->rpeq_lock
     81  *   instance_list.ril_lock
     82  *     inst->ri_lock
     83  *   st->st_configd_live_lock
     84  *
     85  * instance_list.ril_lock
     86  *   graph_queue->gpeq_lock
     87  *   gu->gu_lock
     88  *   st->st_configd_live_lock
     89  *   dictionary->dict_lock
     90  *   inst->ri_lock
     91  *     graph_queue->gpeq_lock
     92  *     gu->gu_lock
     93  *     tu->tu_lock
     94  *     tq->tq_lock
     95  *     inst->ri_queue_lock
     96  *       wait_info_lock
     97  *       bp->cb_lock
     98  *     utmpx_lock
     99  *
    100  * single_user_thread_lock
    101  *   wait_info_lock
    102  *   utmpx_lock
    103  *
    104  * gu_freeze_lock
    105  *
    106  * logbuf_mutex nests inside pretty much everything.
    107  */
    108 
    109 #include <sys/contract/process.h>
    110 #include <sys/ctfs.h>
    111 #include <sys/stat.h>
    112 #include <sys/time.h>
    113 #include <sys/types.h>
    114 #include <sys/uio.h>
    115 #include <sys/wait.h>
    116 #include <assert.h>
    117 #include <errno.h>
    118 #include <fcntl.h>
    119 #include <libcontract.h>
    120 #include <libcontract_priv.h>
    121 #include <libintl.h>
    122 #include <librestart.h>
    123 #include <librestart_priv.h>
    124 #include <libuutil.h>
    125 #include <limits.h>
    126 #include <poll.h>
    127 #include <port.h>
    128 #include <pthread.h>
    129 #include <stdarg.h>
    130 #include <stdio.h>
    131 #include <strings.h>
    132 #include <unistd.h>
    133 
    134 #include "startd.h"
    135 #include "protocol.h"
    136 
    137 static uu_list_pool_t *restarter_instance_pool;
    138 static restarter_instance_list_t instance_list;
    139 
    140 static uu_list_pool_t *restarter_queue_pool;
    141 
    142 /*ARGSUSED*/
    143 static int
    144 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
    145     void *private)
    146 {
    147 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
    148 	int rc_id = *(int *)rc_arg;
    149 
    150 	if (lc_id > rc_id)
    151 		return (1);
    152 	if (lc_id < rc_id)
    153 		return (-1);
    154 	return (0);
    155 }
    156 
    157 static restarter_inst_t *
    158 inst_lookup_by_name(const char *name)
    159 {
    160 	int id;
    161 
    162 	id = dict_lookup_byname(name);
    163 	if (id == -1)
    164 		return (NULL);
    165 
    166 	return (inst_lookup_by_id(id));
    167 }
    168 
    169 restarter_inst_t *
    170 inst_lookup_by_id(int id)
    171 {
    172 	restarter_inst_t *inst;
    173 
    174 	MUTEX_LOCK(&instance_list.ril_lock);
    175 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
    176 	if (inst != NULL)
    177 		MUTEX_LOCK(&inst->ri_lock);
    178 	MUTEX_UNLOCK(&instance_list.ril_lock);
    179 
    180 	if (inst != NULL) {
    181 		while (inst->ri_method_thread != 0 &&
    182 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
    183 			++inst->ri_method_waiters;
    184 			(void) pthread_cond_wait(&inst->ri_method_cv,
    185 			    &inst->ri_lock);
    186 			assert(inst->ri_method_waiters > 0);
    187 			--inst->ri_method_waiters;
    188 		}
    189 	}
    190 
    191 	return (inst);
    192 }
    193 
    194 static restarter_inst_t *
    195 inst_lookup_queue(const char *name)
    196 {
    197 	int id;
    198 	restarter_inst_t *inst;
    199 
    200 	id = dict_lookup_byname(name);
    201 	if (id == -1)
    202 		return (NULL);
    203 
    204 	MUTEX_LOCK(&instance_list.ril_lock);
    205 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
    206 	if (inst != NULL)
    207 		MUTEX_LOCK(&inst->ri_queue_lock);
    208 	MUTEX_UNLOCK(&instance_list.ril_lock);
    209 
    210 	return (inst);
    211 }
    212 
    213 const char *
    214 service_style(int flags)
    215 {
    216 	switch (flags & RINST_STYLE_MASK) {
    217 	case RINST_CONTRACT:	return ("contract");
    218 	case RINST_TRANSIENT:	return ("transient");
    219 	case RINST_WAIT:	return ("wait");
    220 
    221 	default:
    222 #ifndef NDEBUG
    223 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
    224 #endif
    225 		abort();
    226 		/* NOTREACHED */
    227 	}
    228 }
    229 
    230 /*
    231  * Fails with ECONNABORTED or ECANCELED.
    232  */
    233 static int
    234 check_contract(restarter_inst_t *inst, boolean_t primary,
    235     scf_instance_t *scf_inst)
    236 {
    237 	ctid_t *ctidp;
    238 	int fd, r;
    239 
    240 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
    241 	    &inst->ri_i.i_transient_ctid;
    242 
    243 	assert(*ctidp >= 1);
    244 
    245 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
    246 	if (fd >= 0) {
    247 		r = close(fd);
    248 		assert(r == 0);
    249 		return (0);
    250 	}
    251 
    252 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
    253 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
    254 	switch (r) {
    255 	case 0:
    256 	case ECONNABORTED:
    257 	case ECANCELED:
    258 		*ctidp = 0;
    259 		return (r);
    260 
    261 	case ENOMEM:
    262 		uu_die("Out of memory\n");
    263 		/* NOTREACHED */
    264 
    265 	case EPERM:
    266 		uu_die("Insufficient privilege.\n");
    267 		/* NOTREACHED */
    268 
    269 	case EACCES:
    270 		uu_die("Repository backend access denied.\n");
    271 		/* NOTREACHED */
    272 
    273 	case EROFS:
    274 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
    275 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
    276 		return (0);
    277 
    278 	case EINVAL:
    279 	case EBADF:
    280 	default:
    281 		assert(0);
    282 		abort();
    283 		/* NOTREACHED */
    284 	}
    285 }
    286 
    287 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
    288 
    289 /*
    290  * int restarter_insert_inst(scf_handle_t *, char *)
    291  *   If the inst is already in the restarter list, return its id.  If the inst
    292  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
    293  *   states, insert it into the list, and return 0.
    294  *
    295  *   Fails with
    296  *     ENOENT - name is not in the repository
    297  */
    298 static int
    299 restarter_insert_inst(scf_handle_t *h, const char *name)
    300 {
    301 	int id, r;
    302 	restarter_inst_t *inst;
    303 	uu_list_index_t idx;
    304 	scf_service_t *scf_svc;
    305 	scf_instance_t *scf_inst;
    306 	scf_snapshot_t *snap = NULL;
    307 	scf_propertygroup_t *pg;
    308 	char *svc_name, *inst_name;
    309 	char logfilebuf[PATH_MAX];
    310 	char *c;
    311 	boolean_t do_commit_states;
    312 	restarter_instance_state_t state, next_state;
    313 	protocol_states_t *ps;
    314 	pid_t start_pid;
    315 
    316 	MUTEX_LOCK(&instance_list.ril_lock);
    317 
    318 	/*
    319 	 * We don't use inst_lookup_by_name() here because we want the lookup
    320 	 * & insert to be atomic.
    321 	 */
    322 	id = dict_lookup_byname(name);
    323 	if (id != -1) {
    324 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
    325 		    &idx);
    326 		if (inst != NULL) {
    327 			MUTEX_UNLOCK(&instance_list.ril_lock);
    328 			return (0);
    329 		}
    330 	}
    331 
    332 	/* Allocate an instance */
    333 	inst = startd_zalloc(sizeof (restarter_inst_t));
    334 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
    335 	inst->ri_utmpx_prefix[0] = '\0';
    336 
    337 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
    338 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
    339 
    340 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
    341 
    342 	/*
    343 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
    344 	 * just in case.
    345 	 */
    346 	inst->ri_id = (id != -1 ? id : dict_insert(name));
    347 
    348 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
    349 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
    350 
    351 	scf_svc = safe_scf_service_create(h);
    352 	scf_inst = safe_scf_instance_create(h);
    353 	pg = safe_scf_pg_create(h);
    354 	svc_name = startd_alloc(max_scf_name_size);
    355 	inst_name = startd_alloc(max_scf_name_size);
    356 
    357 rep_retry:
    358 	if (snap != NULL)
    359 		scf_snapshot_destroy(snap);
    360 	if (inst->ri_logstem != NULL)
    361 		startd_free(inst->ri_logstem, PATH_MAX);
    362 	if (inst->ri_common_name != NULL)
    363 		startd_free(inst->ri_common_name, max_scf_value_size);
    364 	if (inst->ri_C_common_name != NULL)
    365 		startd_free(inst->ri_C_common_name, max_scf_value_size);
    366 	snap = NULL;
    367 	inst->ri_logstem = NULL;
    368 	inst->ri_common_name = NULL;
    369 	inst->ri_C_common_name = NULL;
    370 
    371 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
    372 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
    373 		switch (scf_error()) {
    374 		case SCF_ERROR_CONNECTION_BROKEN:
    375 			libscf_handle_rebind(h);
    376 			goto rep_retry;
    377 
    378 		case SCF_ERROR_NOT_FOUND:
    379 			goto deleted;
    380 		}
    381 
    382 		uu_die("Can't decode FMRI %s: %s\n", name,
    383 		    scf_strerror(scf_error()));
    384 	}
    385 
    386 	/*
    387 	 * If there's no running snapshot, then we execute using the editing
    388 	 * snapshot.  Pending snapshots will be taken later.
    389 	 */
    390 	snap = libscf_get_running_snapshot(scf_inst);
    391 
    392 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
    393 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
    394 	    0)) {
    395 		switch (scf_error()) {
    396 		case SCF_ERROR_NOT_SET:
    397 			break;
    398 
    399 		case SCF_ERROR_CONNECTION_BROKEN:
    400 			libscf_handle_rebind(h);
    401 			goto rep_retry;
    402 
    403 		default:
    404 			assert(0);
    405 			abort();
    406 		}
    407 
    408 		goto deleted;
    409 	}
    410 
    411 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
    412 	for (c = logfilebuf; *c != '\0'; c++)
    413 		if (*c == '/')
    414 			*c = '-';
    415 
    416 	inst->ri_logstem = startd_alloc(PATH_MAX);
    417 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
    418 	    LOG_SUFFIX);
    419 
    420 	/*
    421 	 * If the restarter group is missing, use uninit/none.  Otherwise,
    422 	 * we're probably being restarted & don't want to mess up the states
    423 	 * that are there.
    424 	 */
    425 	state = RESTARTER_STATE_UNINIT;
    426 	next_state = RESTARTER_STATE_NONE;
    427 
    428 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
    429 	if (r != 0) {
    430 		switch (scf_error()) {
    431 		case SCF_ERROR_CONNECTION_BROKEN:
    432 			libscf_handle_rebind(h);
    433 			goto rep_retry;
    434 
    435 		case SCF_ERROR_NOT_SET:
    436 			goto deleted;
    437 
    438 		case SCF_ERROR_NOT_FOUND:
    439 			/*
    440 			 * This shouldn't happen since the graph engine should
    441 			 * have initialized the state to uninitialized/none if
    442 			 * there was no restarter pg.  In case somebody
    443 			 * deleted it, though....
    444 			 */
    445 			do_commit_states = B_TRUE;
    446 			break;
    447 
    448 		default:
    449 			assert(0);
    450 			abort();
    451 		}
    452 	} else {
    453 		r = libscf_read_states(pg, &state, &next_state);
    454 		if (r != 0) {
    455 			do_commit_states = B_TRUE;
    456 		} else {
    457 			if (next_state != RESTARTER_STATE_NONE) {
    458 				/*
    459 				 * Force next_state to _NONE since we
    460 				 * don't look for method processes.
    461 				 */
    462 				next_state = RESTARTER_STATE_NONE;
    463 				do_commit_states = B_TRUE;
    464 			} else {
    465 				/*
    466 				 * Inform the restarter of our state without
    467 				 * changing the STIME in the repository.
    468 				 */
    469 				ps = startd_alloc(sizeof (*ps));
    470 				inst->ri_i.i_state = ps->ps_state = state;
    471 				inst->ri_i.i_next_state = ps->ps_state_next =
    472 				    next_state;
    473 
    474 				graph_protocol_send_event(inst->ri_i.i_fmri,
    475 				    GRAPH_UPDATE_STATE_CHANGE, ps);
    476 
    477 				do_commit_states = B_FALSE;
    478 			}
    479 		}
    480 	}
    481 
    482 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
    483 	    &inst->ri_utmpx_prefix)) {
    484 	case 0:
    485 		break;
    486 
    487 	case ECONNABORTED:
    488 		libscf_handle_rebind(h);
    489 		goto rep_retry;
    490 
    491 	case ECANCELED:
    492 		goto deleted;
    493 
    494 	case ENOENT:
    495 		/*
    496 		 * This is odd, because the graph engine should have required
    497 		 * the general property group.  So we'll just use default
    498 		 * flags in anticipation of the graph engine sending us
    499 		 * REMOVE_INSTANCE when it finds out that the general property
    500 		 * group has been deleted.
    501 		 */
    502 		inst->ri_flags = RINST_CONTRACT;
    503 		break;
    504 
    505 	default:
    506 		assert(0);
    507 		abort();
    508 	}
    509 
    510 	switch (libscf_get_template_values(scf_inst, snap,
    511 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
    512 	case 0:
    513 		break;
    514 
    515 	case ECONNABORTED:
    516 		libscf_handle_rebind(h);
    517 		goto rep_retry;
    518 
    519 	case ECANCELED:
    520 		goto deleted;
    521 
    522 	case ECHILD:
    523 	case ENOENT:
    524 		break;
    525 
    526 	default:
    527 		assert(0);
    528 		abort();
    529 	}
    530 
    531 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
    532 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
    533 	    &start_pid)) {
    534 	case 0:
    535 		break;
    536 
    537 	case ECONNABORTED:
    538 		libscf_handle_rebind(h);
    539 		goto rep_retry;
    540 
    541 	case ECANCELED:
    542 		goto deleted;
    543 
    544 	default:
    545 		assert(0);
    546 		abort();
    547 	}
    548 
    549 	if (inst->ri_i.i_primary_ctid >= 1) {
    550 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
    551 
    552 		switch (check_contract(inst, B_TRUE, scf_inst)) {
    553 		case 0:
    554 			break;
    555 
    556 		case ECONNABORTED:
    557 			libscf_handle_rebind(h);
    558 			goto rep_retry;
    559 
    560 		case ECANCELED:
    561 			goto deleted;
    562 
    563 		default:
    564 			assert(0);
    565 			abort();
    566 		}
    567 	}
    568 
    569 	if (inst->ri_i.i_transient_ctid >= 1) {
    570 		switch (check_contract(inst, B_FALSE, scf_inst)) {
    571 		case 0:
    572 			break;
    573 
    574 		case ECONNABORTED:
    575 			libscf_handle_rebind(h);
    576 			goto rep_retry;
    577 
    578 		case ECANCELED:
    579 			goto deleted;
    580 
    581 		default:
    582 			assert(0);
    583 			abort();
    584 		}
    585 	}
    586 
    587 	/* No more failures we live through, so add it to the list. */
    588 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
    589 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
    590 	MUTEX_LOCK(&inst->ri_lock);
    591 	MUTEX_LOCK(&inst->ri_queue_lock);
    592 
    593 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
    594 
    595 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
    596 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
    597 	MUTEX_UNLOCK(&instance_list.ril_lock);
    598 
    599 	if (start_pid != -1 &&
    600 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
    601 		int ret;
    602 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
    603 		if (ret == -1) {
    604 			/*
    605 			 * Implication:  if we can't reregister the
    606 			 * instance, we will start another one.  Two
    607 			 * instances may or may not result in a resource
    608 			 * conflict.
    609 			 */
    610 			log_error(LOG_WARNING,
    611 			    "%s: couldn't reregister %ld for wait\n",
    612 			    inst->ri_i.i_fmri, start_pid);
    613 		} else if (ret == 1) {
    614 			/*
    615 			 * Leading PID has exited.
    616 			 */
    617 			(void) stop_instance(h, inst, RSTOP_EXIT);
    618 		}
    619 	}
    620 
    621 
    622 	scf_pg_destroy(pg);
    623 
    624 	if (do_commit_states)
    625 		(void) restarter_instance_update_states(h, inst, state,
    626 		    next_state, RERR_NONE, NULL);
    627 
    628 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
    629 	    service_style(inst->ri_flags));
    630 
    631 	MUTEX_UNLOCK(&inst->ri_queue_lock);
    632 	MUTEX_UNLOCK(&inst->ri_lock);
    633 
    634 	startd_free(svc_name, max_scf_name_size);
    635 	startd_free(inst_name, max_scf_name_size);
    636 	scf_snapshot_destroy(snap);
    637 	scf_instance_destroy(scf_inst);
    638 	scf_service_destroy(scf_svc);
    639 
    640 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
    641 	    name);
    642 
    643 	return (0);
    644 
    645 deleted:
    646 	MUTEX_UNLOCK(&instance_list.ril_lock);
    647 	startd_free(inst_name, max_scf_name_size);
    648 	startd_free(svc_name, max_scf_name_size);
    649 	if (snap != NULL)
    650 		scf_snapshot_destroy(snap);
    651 	scf_pg_destroy(pg);
    652 	scf_instance_destroy(scf_inst);
    653 	scf_service_destroy(scf_svc);
    654 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
    655 	uu_list_destroy(inst->ri_queue);
    656 	if (inst->ri_logstem != NULL)
    657 		startd_free(inst->ri_logstem, PATH_MAX);
    658 	if (inst->ri_common_name != NULL)
    659 		startd_free(inst->ri_common_name, max_scf_value_size);
    660 	if (inst->ri_C_common_name != NULL)
    661 		startd_free(inst->ri_C_common_name, max_scf_value_size);
    662 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
    663 	startd_free(inst, sizeof (restarter_inst_t));
    664 	return (ENOENT);
    665 }
    666 
    667 static void
    668 restarter_delete_inst(restarter_inst_t *ri)
    669 {
    670 	int id;
    671 	restarter_inst_t *rip;
    672 	void *cookie = NULL;
    673 	restarter_instance_qentry_t *e;
    674 
    675 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
    676 
    677 	/*
    678 	 * Must drop the instance lock so we can pick up the instance_list
    679 	 * lock & remove the instance.
    680 	 */
    681 	id = ri->ri_id;
    682 	MUTEX_UNLOCK(&ri->ri_lock);
    683 
    684 	MUTEX_LOCK(&instance_list.ril_lock);
    685 
    686 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
    687 	if (rip == NULL) {
    688 		MUTEX_UNLOCK(&instance_list.ril_lock);
    689 		return;
    690 	}
    691 
    692 	assert(ri == rip);
    693 
    694 	uu_list_remove(instance_list.ril_instance_list, ri);
    695 
    696 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
    697 	    ri->ri_i.i_fmri);
    698 
    699 	MUTEX_UNLOCK(&instance_list.ril_lock);
    700 
    701 	/*
    702 	 * We can lock the instance without holding the instance_list lock
    703 	 * since we removed the instance from the list.
    704 	 */
    705 	MUTEX_LOCK(&ri->ri_lock);
    706 	MUTEX_LOCK(&ri->ri_queue_lock);
    707 
    708 	if (ri->ri_i.i_primary_ctid >= 1)
    709 		contract_hash_remove(ri->ri_i.i_primary_ctid);
    710 
    711 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
    712 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
    713 
    714 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
    715 		startd_free(e, sizeof (*e));
    716 	uu_list_destroy(ri->ri_queue);
    717 
    718 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
    719 	startd_free(ri->ri_logstem, PATH_MAX);
    720 	if (ri->ri_common_name != NULL)
    721 		startd_free(ri->ri_common_name, max_scf_value_size);
    722 	if (ri->ri_C_common_name != NULL)
    723 		startd_free(ri->ri_C_common_name, max_scf_value_size);
    724 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
    725 	(void) pthread_mutex_destroy(&ri->ri_lock);
    726 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
    727 	startd_free(ri, sizeof (restarter_inst_t));
    728 }
    729 
    730 /*
    731  * instance_is_wait_style()
    732  *
    733  *   Returns 1 if the given instance is a "wait-style" service instance.
    734  */
    735 int
    736 instance_is_wait_style(restarter_inst_t *inst)
    737 {
    738 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
    739 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
    740 }
    741 
    742 /*
    743  * instance_is_transient_style()
    744  *
    745  *   Returns 1 if the given instance is a transient service instance.
    746  */
    747 int
    748 instance_is_transient_style(restarter_inst_t *inst)
    749 {
    750 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
    751 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
    752 }
    753 
    754 /*
    755  * instance_in_transition()
    756  * Returns 1 if instance is in transition, 0 if not
    757  */
    758 int
    759 instance_in_transition(restarter_inst_t *inst)
    760 {
    761 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
    762 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
    763 		return (0);
    764 	return (1);
    765 }
    766 
    767 /*
    768  * returns 1 if instance is already started, 0 if not
    769  */
    770 static int
    771 instance_started(restarter_inst_t *inst)
    772 {
    773 	int ret;
    774 
    775 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
    776 
    777 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
    778 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
    779 		ret = 1;
    780 	else
    781 		ret = 0;
    782 
    783 	return (ret);
    784 }
    785 
    786 /*
    787  * Returns
    788  *   0 - success
    789  *   ECONNRESET - success, but h was rebound
    790  */
    791 int
    792 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
    793     restarter_instance_state_t new_state,
    794     restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
    795 {
    796 	protocol_states_t *states;
    797 	int e;
    798 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
    799 	boolean_t rebound = B_FALSE;
    800 	int prev_state_online;
    801 	int state_online;
    802 
    803 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
    804 
    805 	prev_state_online = instance_started(ri);
    806 
    807 retry:
    808 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
    809 	    aux);
    810 	switch (e) {
    811 	case 0:
    812 		break;
    813 
    814 	case ENOMEM:
    815 		++retry_count;
    816 		if (retry_count < ALLOC_RETRY) {
    817 			(void) poll(NULL, 0, msecs);
    818 			msecs *= ALLOC_DELAY_MULT;
    819 			goto retry;
    820 		}
    821 
    822 		/* Like startd_alloc(). */
    823 		uu_die("Insufficient memory.\n");
    824 		/* NOTREACHED */
    825 
    826 	case ECONNABORTED:
    827 		libscf_handle_rebind(h);
    828 		rebound = B_TRUE;
    829 		goto retry;
    830 
    831 	case EPERM:
    832 	case EACCES:
    833 	case EROFS:
    834 		log_error(LOG_NOTICE, "Could not commit state change for %s "
    835 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
    836 		/* FALLTHROUGH */
    837 
    838 	case ENOENT:
    839 		ri->ri_i.i_state = new_state;
    840 		ri->ri_i.i_next_state = new_state_next;
    841 		break;
    842 
    843 	case EINVAL:
    844 	default:
    845 		bad_error("_restarter_commit_states", e);
    846 	}
    847 
    848 	states = startd_alloc(sizeof (protocol_states_t));
    849 	states->ps_state = new_state;
    850 	states->ps_state_next = new_state_next;
    851 	states->ps_err = err;
    852 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
    853 	    (void *)states);
    854 
    855 	state_online = instance_started(ri);
    856 
    857 	if (prev_state_online && !state_online)
    858 		ri->ri_post_offline_hook();
    859 	else if (!prev_state_online && state_online)
    860 		ri->ri_post_online_hook();
    861 
    862 	return (rebound ? ECONNRESET : 0);
    863 }
    864 
    865 void
    866 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
    867 {
    868 	restarter_inst_t *inst;
    869 
    870 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
    871 
    872 	inst = inst_lookup_by_name(fmri);
    873 	if (inst == NULL)
    874 		return;
    875 
    876 	inst->ri_flags |= flag;
    877 
    878 	MUTEX_UNLOCK(&inst->ri_lock);
    879 }
    880 
    881 static void
    882 restarter_take_pending_snapshots(scf_handle_t *h)
    883 {
    884 	restarter_inst_t *inst;
    885 	int r;
    886 
    887 	MUTEX_LOCK(&instance_list.ril_lock);
    888 
    889 	for (inst = uu_list_first(instance_list.ril_instance_list);
    890 	    inst != NULL;
    891 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
    892 		const char *fmri;
    893 		scf_instance_t *sinst = NULL;
    894 
    895 		MUTEX_LOCK(&inst->ri_lock);
    896 
    897 		/*
    898 		 * This is where we'd check inst->ri_method_thread and if it
    899 		 * were nonzero we'd wait in anticipation of another thread
    900 		 * executing a method for inst.  Doing so with the instance_list
    901 		 * locked, though, leads to deadlock.  Since taking a snapshot
    902 		 * during that window won't hurt anything, we'll just continue.
    903 		 */
    904 
    905 		fmri = inst->ri_i.i_fmri;
    906 
    907 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
    908 			scf_snapshot_t *rsnap;
    909 
    910 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
    911 
    912 			rsnap = libscf_get_or_make_running_snapshot(sinst,
    913 			    fmri, B_FALSE);
    914 
    915 			scf_instance_destroy(sinst);
    916 
    917 			if (rsnap != NULL)
    918 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
    919 
    920 			scf_snapshot_destroy(rsnap);
    921 		}
    922 
    923 		if (inst->ri_flags & RINST_RETAKE_START) {
    924 			switch (r = libscf_snapshots_poststart(h, fmri,
    925 			    B_FALSE)) {
    926 			case 0:
    927 			case ENOENT:
    928 				inst->ri_flags &= ~RINST_RETAKE_START;
    929 				break;
    930 
    931 			case ECONNABORTED:
    932 				break;
    933 
    934 			case EACCES:
    935 			default:
    936 				bad_error("libscf_snapshots_poststart", r);
    937 			}
    938 		}
    939 
    940 		MUTEX_UNLOCK(&inst->ri_lock);
    941 	}
    942 
    943 	MUTEX_UNLOCK(&instance_list.ril_lock);
    944 }
    945 
    946 /* ARGSUSED */
    947 void *
    948 restarter_post_fsminimal_thread(void *unused)
    949 {
    950 	scf_handle_t *h;
    951 	int r;
    952 
    953 	h = libscf_handle_create_bound_loop();
    954 
    955 	for (;;) {
    956 		r = libscf_create_self(h);
    957 		if (r == 0)
    958 			break;
    959 
    960 		assert(r == ECONNABORTED);
    961 		libscf_handle_rebind(h);
    962 	}
    963 
    964 	restarter_take_pending_snapshots(h);
    965 
    966 	(void) scf_handle_unbind(h);
    967 	scf_handle_destroy(h);
    968 
    969 	return (NULL);
    970 }
    971 
    972 /*
    973  * int stop_instance()
    974  *
    975  *   Stop the instance identified by the instance given as the second argument,
    976  *   for the cause stated.
    977  *
    978  *   Returns
    979  *     0 - success
    980  *     -1 - inst is in transition
    981  */
    982 static int
    983 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
    984     stop_cause_t cause)
    985 {
    986 	fork_info_t *info;
    987 	const char *cp;
    988 	int err;
    989 	restarter_error_t re;
    990 
    991 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
    992 	assert(inst->ri_method_thread == 0);
    993 
    994 	switch (cause) {
    995 	case RSTOP_EXIT:
    996 		re = RERR_RESTART;
    997 		cp = "all processes in service exited";
    998 		break;
    999 	case RSTOP_CORE:
   1000 		re = RERR_FAULT;
   1001 		cp = "process dumped core";
   1002 		break;
   1003 	case RSTOP_SIGNAL:
   1004 		re = RERR_FAULT;
   1005 		cp = "process received fatal signal from outside the service";
   1006 		break;
   1007 	case RSTOP_HWERR:
   1008 		re = RERR_FAULT;
   1009 		cp = "process killed due to uncorrectable hardware error";
   1010 		break;
   1011 	case RSTOP_DEPENDENCY:
   1012 		re = RERR_RESTART;
   1013 		cp = "dependency activity requires stop";
   1014 		break;
   1015 	case RSTOP_DISABLE:
   1016 		re = RERR_RESTART;
   1017 		cp = "service disabled";
   1018 		break;
   1019 	case RSTOP_RESTART:
   1020 		re = RERR_RESTART;
   1021 		cp = "service restarting";
   1022 		break;
   1023 	default:
   1024 #ifndef NDEBUG
   1025 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
   1026 		    cause, __FILE__, __LINE__);
   1027 #endif
   1028 		abort();
   1029 	}
   1030 
   1031 	/* Services in the disabled and maintenance state are ignored */
   1032 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
   1033 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
   1034 		log_framework(LOG_DEBUG,
   1035 		    "%s: stop_instance -> is maint/disabled\n",
   1036 		    inst->ri_i.i_fmri);
   1037 		return (0);
   1038 	}
   1039 
   1040 	/* Already stopped instances are left alone */
   1041 	if (instance_started(inst) == 0) {
   1042 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
   1043 		    inst->ri_i.i_fmri);
   1044 		return (0);
   1045 	}
   1046 
   1047 	if (instance_in_transition(inst)) {
   1048 		/* requeue event by returning -1 */
   1049 		log_framework(LOG_DEBUG,
   1050 		    "Restarter: Not stopping %s, in transition.\n",
   1051 		    inst->ri_i.i_fmri);
   1052 		return (-1);
   1053 	}
   1054 
   1055 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
   1056 
   1057 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
   1058 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
   1059 
   1060 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
   1061 		/*
   1062 		 * No need to stop instance, as child has exited; remove
   1063 		 * contract and move the instance to the offline state.
   1064 		 */
   1065 		switch (err = restarter_instance_update_states(local_handle,
   1066 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
   1067 		    NULL)) {
   1068 		case 0:
   1069 		case ECONNRESET:
   1070 			break;
   1071 
   1072 		default:
   1073 			bad_error("restarter_instance_update_states", err);
   1074 		}
   1075 
   1076 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
   1077 
   1078 		if (inst->ri_i.i_primary_ctid != 0) {
   1079 			inst->ri_m_inst =
   1080 			    safe_scf_instance_create(local_handle);
   1081 			inst->ri_mi_deleted = B_FALSE;
   1082 
   1083 			libscf_reget_instance(inst);
   1084 			method_remove_contract(inst, B_TRUE, B_TRUE);
   1085 
   1086 			scf_instance_destroy(inst->ri_m_inst);
   1087 			inst->ri_m_inst = NULL;
   1088 		}
   1089 
   1090 		switch (err = restarter_instance_update_states(local_handle,
   1091 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
   1092 		    NULL)) {
   1093 		case 0:
   1094 		case ECONNRESET:
   1095 			break;
   1096 
   1097 		default:
   1098 			bad_error("restarter_instance_update_states", err);
   1099 		}
   1100 
   1101 		return (0);
   1102 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
   1103 		/*
   1104 		 * Stopping a wait service through means other than the pid
   1105 		 * exiting should keep wait_thread() from restarting the
   1106 		 * service, by removing it from the wait list.
   1107 		 * We cannot remove it right now otherwise the process will
   1108 		 * end up <defunct> so mark it to be ignored.
   1109 		 */
   1110 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
   1111 	}
   1112 
   1113 	switch (err = restarter_instance_update_states(local_handle, inst,
   1114 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
   1115 	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
   1116 	case 0:
   1117 	case ECONNRESET:
   1118 		break;
   1119 
   1120 	default:
   1121 		bad_error("restarter_instance_update_states", err);
   1122 	}
   1123 
   1124 	info = startd_zalloc(sizeof (fork_info_t));
   1125 
   1126 	info->sf_id = inst->ri_id;
   1127 	info->sf_method_type = METHOD_STOP;
   1128 	info->sf_event_type = re;
   1129 	inst->ri_method_thread = startd_thread_create(method_thread, info);
   1130 
   1131 	return (0);
   1132 }
   1133 
   1134 /*
   1135  * Returns
   1136  *   ENOENT - fmri is not in instance_list
   1137  *   0 - success
   1138  *   ECONNRESET - success, though handle was rebound
   1139  *   -1 - instance is in transition
   1140  */
   1141 int
   1142 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
   1143 {
   1144 	restarter_inst_t *rip;
   1145 	int r;
   1146 
   1147 	rip = inst_lookup_by_name(fmri);
   1148 	if (rip == NULL)
   1149 		return (ENOENT);
   1150 
   1151 	r = stop_instance(h, rip, flags);
   1152 
   1153 	MUTEX_UNLOCK(&rip->ri_lock);
   1154 
   1155 	return (r);
   1156 }
   1157 
   1158 static void
   1159 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
   1160     unmaint_cause_t cause)
   1161 {
   1162 	ctid_t ctid;
   1163 	scf_instance_t *inst;
   1164 	int r;
   1165 	uint_t tries = 0, msecs = ALLOC_DELAY;
   1166 	const char *cp;
   1167 
   1168 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
   1169 
   1170 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
   1171 		log_error(LOG_DEBUG, "Restarter: "
   1172 		    "Ignoring maintenance off command because %s is not in the "
   1173 		    "maintenance state.\n", rip->ri_i.i_fmri);
   1174 		return;
   1175 	}
   1176 
   1177 	switch (cause) {
   1178 	case RUNMAINT_CLEAR:
   1179 		cp = "clear requested";
   1180 		break;
   1181 	case RUNMAINT_DISABLE:
   1182 		cp = "disable requested";
   1183 		break;
   1184 	default:
   1185 #ifndef NDEBUG
   1186 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
   1187 		    cause, __FILE__, __LINE__);
   1188 #endif
   1189 		abort();
   1190 	}
   1191 
   1192 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
   1193 	    cp);
   1194 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
   1195 	    "%s.\n", rip->ri_i.i_fmri, cp);
   1196 
   1197 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
   1198 	    RESTARTER_STATE_NONE, RERR_RESTART, "none");
   1199 
   1200 	/*
   1201 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
   1202 	 * a primary contract.
   1203 	 */
   1204 	if (rip->ri_i.i_primary_ctid == 0)
   1205 		return;
   1206 
   1207 	ctid = rip->ri_i.i_primary_ctid;
   1208 	contract_abandon(ctid);
   1209 	rip->ri_i.i_primary_ctid = 0;
   1210 
   1211 rep_retry:
   1212 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
   1213 	case 0:
   1214 		break;
   1215 
   1216 	case ECONNABORTED:
   1217 		libscf_handle_rebind(h);
   1218 		goto rep_retry;
   1219 
   1220 	case ENOENT:
   1221 		/* Must have been deleted. */
   1222 		return;
   1223 
   1224 	case EINVAL:
   1225 	case ENOTSUP:
   1226 	default:
   1227 		bad_error("libscf_handle_rebind", r);
   1228 	}
   1229 
   1230 again:
   1231 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
   1232 	switch (r) {
   1233 	case 0:
   1234 		break;
   1235 
   1236 	case ENOMEM:
   1237 		++tries;
   1238 		if (tries < ALLOC_RETRY) {
   1239 			(void) poll(NULL, 0, msecs);
   1240 			msecs *= ALLOC_DELAY_MULT;
   1241 			goto again;
   1242 		}
   1243 
   1244 		uu_die("Insufficient memory.\n");
   1245 		/* NOTREACHED */
   1246 
   1247 	case ECONNABORTED:
   1248 		scf_instance_destroy(inst);
   1249 		libscf_handle_rebind(h);
   1250 		goto rep_retry;
   1251 
   1252 	case ECANCELED:
   1253 		break;
   1254 
   1255 	case EPERM:
   1256 	case EACCES:
   1257 	case EROFS:
   1258 		log_error(LOG_INFO,
   1259 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
   1260 		    rip->ri_i.i_fmri, strerror(r));
   1261 		break;
   1262 
   1263 	case EINVAL:
   1264 	case EBADF:
   1265 	default:
   1266 		bad_error("restarter_remove_contract", r);
   1267 	}
   1268 
   1269 	scf_instance_destroy(inst);
   1270 }
   1271 
   1272 /*
   1273  * enable_inst()
   1274  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
   1275  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
   1276  *   disabled, move it to offline.  If the event is _DISABLE or
   1277  *   _ADMIN_DISABLE, make sure inst will move to disabled.
   1278  *
   1279  *   Returns
   1280  *     0 - success
   1281  *     ECONNRESET - h was rebound
   1282  */
   1283 static int
   1284 enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
   1285 {
   1286 	restarter_instance_state_t state;
   1287 	int r;
   1288 
   1289 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
   1290 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
   1291 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
   1292 	    e == RESTARTER_EVENT_TYPE_ENABLE);
   1293 	assert(instance_in_transition(inst) == 0);
   1294 
   1295 	state = inst->ri_i.i_state;
   1296 
   1297 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
   1298 		inst->ri_i.i_enabled = 1;
   1299 
   1300 		if (state == RESTARTER_STATE_UNINIT ||
   1301 		    state == RESTARTER_STATE_DISABLED) {
   1302 			/*
   1303 			 * B_FALSE: Don't log an error if the log_instance()
   1304 			 * fails because it will fail on the miniroot before
   1305 			 * install-discovery runs.
   1306 			 */
   1307 			log_instance(inst, B_FALSE, "Enabled.");
   1308 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
   1309 			    inst->ri_i.i_fmri);
   1310 			(void) restarter_instance_update_states(h, inst,
   1311 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
   1312 			    RERR_NONE, NULL);
   1313 		} else {
   1314 			log_framework(LOG_DEBUG, "Restarter: "
   1315 			    "Not changing state of %s for enable command.\n",
   1316 			    inst->ri_i.i_fmri);
   1317 		}
   1318 	} else {
   1319 		inst->ri_i.i_enabled = 0;
   1320 
   1321 		switch (state) {
   1322 		case RESTARTER_STATE_ONLINE:
   1323 		case RESTARTER_STATE_DEGRADED:
   1324 			r = stop_instance(h, inst, RSTOP_DISABLE);
   1325 			return (r == ECONNRESET ? 0 : r);
   1326 
   1327 		case RESTARTER_STATE_OFFLINE:
   1328 		case RESTARTER_STATE_UNINIT:
   1329 			if (inst->ri_i.i_primary_ctid != 0) {
   1330 				inst->ri_m_inst = safe_scf_instance_create(h);
   1331 				inst->ri_mi_deleted = B_FALSE;
   1332 
   1333 				libscf_reget_instance(inst);
   1334 				method_remove_contract(inst, B_TRUE, B_TRUE);
   1335 
   1336 				scf_instance_destroy(inst->ri_m_inst);
   1337 			}
   1338 			/* B_FALSE: See log_instance(..., "Enabled."); above */
   1339 			log_instance(inst, B_FALSE, "Disabled.");
   1340 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
   1341 			    inst->ri_i.i_fmri);
   1342 			(void) restarter_instance_update_states(h, inst,
   1343 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
   1344 			    RERR_RESTART, NULL);
   1345 			return (0);
   1346 
   1347 		case RESTARTER_STATE_DISABLED:
   1348 			break;
   1349 
   1350 		case RESTARTER_STATE_MAINT:
   1351 			/*
   1352 			 * We only want to pull the instance out of maintenance
   1353 			 * if the disable is on adminstrative request.  The
   1354 			 * graph engine sends _DISABLE events whenever a
   1355 			 * service isn't in the disabled state, and we don't
   1356 			 * want to pull the service out of maintenance if,
   1357 			 * for example, it is there due to a dependency cycle.
   1358 			 */
   1359 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
   1360 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
   1361 			break;
   1362 
   1363 		default:
   1364 #ifndef NDEBUG
   1365 			(void) fprintf(stderr, "Restarter instance %s has "
   1366 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
   1367 #endif
   1368 			abort();
   1369 		}
   1370 	}
   1371 
   1372 	return (0);
   1373 }
   1374 
   1375 static void
   1376 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
   1377 {
   1378 	fork_info_t *info;
   1379 
   1380 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
   1381 	assert(instance_in_transition(inst) == 0);
   1382 	assert(inst->ri_method_thread == 0);
   1383 
   1384 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
   1385 	    inst->ri_i.i_fmri);
   1386 
   1387 	/* Services in the disabled and maintenance state are ignored */
   1388 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
   1389 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
   1390 	    inst->ri_i.i_enabled == 0) {
   1391 		log_framework(LOG_DEBUG,
   1392 		    "%s: start_instance -> is maint/disabled\n",
   1393 		    inst->ri_i.i_fmri);
   1394 		return;
   1395 	}
   1396 
   1397 	/* Already started instances are left alone */
   1398 	if (instance_started(inst) == 1) {
   1399 		log_framework(LOG_DEBUG,
   1400 		    "%s: start_instance -> is already started\n",
   1401 		    inst->ri_i.i_fmri);
   1402 		return;
   1403 	}
   1404 
   1405 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
   1406 
   1407 	(void) restarter_instance_update_states(local_handle, inst,
   1408 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, "none");
   1409 
   1410 	info = startd_zalloc(sizeof (fork_info_t));
   1411 
   1412 	info->sf_id = inst->ri_id;
   1413 	info->sf_method_type = METHOD_START;
   1414 	info->sf_event_type = RERR_NONE;
   1415 	inst->ri_method_thread = startd_thread_create(method_thread, info);
   1416 }
   1417 
   1418 static int
   1419 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
   1420 {
   1421 	scf_instance_t *inst;
   1422 	int ret = 0;
   1423 
   1424 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
   1425 		return (-1);
   1426 
   1427 	ret = restarter_inst_ractions_from_tty(inst);
   1428 
   1429 	scf_instance_destroy(inst);
   1430 	return (ret);
   1431 }
   1432 
   1433 static void
   1434 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
   1435     const char *aux)
   1436 {
   1437 	fork_info_t *info;
   1438 	scf_instance_t *scf_inst = NULL;
   1439 
   1440 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
   1441 	assert(aux != NULL);
   1442 	assert(rip->ri_method_thread == 0);
   1443 
   1444 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
   1445 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
   1446 	    rip->ri_i.i_fmri, aux);
   1447 
   1448 	/* Services in the maintenance state are ignored */
   1449 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
   1450 		log_framework(LOG_DEBUG,
   1451 		    "%s: maintain_instance -> is already in maintenance\n",
   1452 		    rip->ri_i.i_fmri);
   1453 		return;
   1454 	}
   1455 
   1456 	/*
   1457 	 * If aux state is "service_request" and
   1458 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
   1459 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
   1460 	 */
   1461 	if (strcmp(aux, "service_request") == 0 && libscf_fmri_get_instance(h,
   1462 	    rip->ri_i.i_fmri, &scf_inst) == 0) {
   1463 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
   1464 			if (restarter_inst_set_aux_fmri(scf_inst))
   1465 				log_framework(LOG_DEBUG, "%s: "
   1466 				    "restarter_inst_set_aux_fmri failed: ",
   1467 				    rip->ri_i.i_fmri);
   1468 		} else {
   1469 			log_framework(LOG_DEBUG, "%s: "
   1470 			    "restarter_inst_validate_ractions_aux_fmri "
   1471 			    "failed: ", rip->ri_i.i_fmri);
   1472 
   1473 			if (restarter_inst_reset_aux_fmri(scf_inst))
   1474 				log_framework(LOG_DEBUG, "%s: "
   1475 				    "restarter_inst_reset_aux_fmri failed: ",
   1476 				    rip->ri_i.i_fmri);
   1477 		}
   1478 		scf_instance_destroy(scf_inst);
   1479 	}
   1480 
   1481 	if (immediate || !instance_started(rip)) {
   1482 		if (rip->ri_i.i_primary_ctid != 0) {
   1483 			rip->ri_m_inst = safe_scf_instance_create(h);
   1484 			rip->ri_mi_deleted = B_FALSE;
   1485 
   1486 			libscf_reget_instance(rip);
   1487 			method_remove_contract(rip, B_TRUE, B_TRUE);
   1488 
   1489 			scf_instance_destroy(rip->ri_m_inst);
   1490 		}
   1491 
   1492 		(void) restarter_instance_update_states(h, rip,
   1493 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
   1494 		    (char *)aux);
   1495 		return;
   1496 	}
   1497 
   1498 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
   1499 	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
   1500 
   1501 	log_transition(rip, MAINT_REQUESTED);
   1502 
   1503 	info = startd_zalloc(sizeof (*info));
   1504 	info->sf_id = rip->ri_id;
   1505 	info->sf_method_type = METHOD_STOP;
   1506 	info->sf_event_type = RERR_RESTART;
   1507 	rip->ri_method_thread = startd_thread_create(method_thread, info);
   1508 }
   1509 
   1510 static void
   1511 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
   1512 {
   1513 	scf_instance_t *inst;
   1514 	scf_snapshot_t *snap;
   1515 	fork_info_t *info;
   1516 	int r;
   1517 
   1518 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
   1519 
   1520 	log_instance(rip, B_TRUE, "Rereading configuration.");
   1521 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
   1522 	    rip->ri_i.i_fmri);
   1523 
   1524 rep_retry:
   1525 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
   1526 	switch (r) {
   1527 	case 0:
   1528 		break;
   1529 
   1530 	case ECONNABORTED:
   1531 		libscf_handle_rebind(h);
   1532 		goto rep_retry;
   1533 
   1534 	case ENOENT:
   1535 		/* Must have been deleted. */
   1536 		return;
   1537 
   1538 	case EINVAL:
   1539 	case ENOTSUP:
   1540 	default:
   1541 		bad_error("libscf_fmri_get_instance", r);
   1542 	}
   1543 
   1544 	snap = libscf_get_running_snapshot(inst);
   1545 
   1546 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
   1547 	    &rip->ri_utmpx_prefix);
   1548 	switch (r) {
   1549 	case 0:
   1550 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
   1551 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
   1552 		break;
   1553 
   1554 	case ECONNABORTED:
   1555 		scf_instance_destroy(inst);
   1556 		scf_snapshot_destroy(snap);
   1557 		libscf_handle_rebind(h);
   1558 		goto rep_retry;
   1559 
   1560 	case ECANCELED:
   1561 	case ENOENT:
   1562 		/* Succeed in anticipation of REMOVE_INSTANCE. */
   1563 		break;
   1564 
   1565 	default:
   1566 		bad_error("libscf_get_startd_properties", r);
   1567 	}
   1568 
   1569 	if (instance_started(rip)) {
   1570 		/* Refresh does not change the state. */
   1571 		(void) restarter_instance_update_states(h, rip,
   1572 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
   1573 
   1574 		info = startd_zalloc(sizeof (*info));
   1575 		info->sf_id = rip->ri_id;
   1576 		info->sf_method_type = METHOD_REFRESH;
   1577 		info->sf_event_type = RERR_REFRESH;
   1578 
   1579 		assert(rip->ri_method_thread == 0);
   1580 		rip->ri_method_thread =
   1581 		    startd_thread_create(method_thread, info);
   1582 	}
   1583 
   1584 	scf_snapshot_destroy(snap);
   1585 	scf_instance_destroy(inst);
   1586 }
   1587 
   1588 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
   1589 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
   1590 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
   1591 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
   1592 	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
   1593 };
   1594 
   1595 /*
   1596  * void *restarter_process_events()
   1597  *
   1598  *   Called in a separate thread to process the events on an instance's
   1599  *   queue.  Empties the queue completely, and tries to keep the thread
   1600  *   around for a little while after the queue is empty to save on
   1601  *   startup costs.
   1602  */
   1603 static void *
   1604 restarter_process_events(void *arg)
   1605 {
   1606 	scf_handle_t *h;
   1607 	restarter_instance_qentry_t *event;
   1608 	restarter_inst_t *rip;
   1609 	char *fmri = (char *)arg;
   1610 	struct timespec to;
   1611 
   1612 	assert(fmri != NULL);
   1613 
   1614 	h = libscf_handle_create_bound_loop();
   1615 
   1616 	/* grab the queue lock */
   1617 	rip = inst_lookup_queue(fmri);
   1618 	if (rip == NULL)
   1619 		goto out;
   1620 
   1621 again:
   1622 
   1623 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
   1624 		restarter_inst_t *inst;
   1625 
   1626 		/* drop the queue lock */
   1627 		MUTEX_UNLOCK(&rip->ri_queue_lock);
   1628 
   1629 		/*
   1630 		 * Grab the inst lock -- this waits until any outstanding
   1631 		 * method finishes running.
   1632 		 */
   1633 		inst = inst_lookup_by_name(fmri);
   1634 		if (inst == NULL) {
   1635 			/* Getting deleted in the middle isn't an error. */
   1636 			goto cont;
   1637 		}
   1638 
   1639 		assert(instance_in_transition(inst) == 0);
   1640 
   1641 		/* process the event */
   1642 		switch (event->riq_type) {
   1643 		case RESTARTER_EVENT_TYPE_ENABLE:
   1644 		case RESTARTER_EVENT_TYPE_DISABLE:
   1645 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
   1646 			(void) enable_inst(h, inst, event->riq_type);
   1647 			break;
   1648 
   1649 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
   1650 			restarter_delete_inst(inst);
   1651 			inst = NULL;
   1652 			goto cont;
   1653 
   1654 		case RESTARTER_EVENT_TYPE_STOP:
   1655 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
   1656 			break;
   1657 
   1658 		case RESTARTER_EVENT_TYPE_START:
   1659 			start_instance(h, inst);
   1660 			break;
   1661 
   1662 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
   1663 			maintain_instance(h, inst, 0, "dependency_cycle");
   1664 			break;
   1665 
   1666 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
   1667 			maintain_instance(h, inst, 0, "invalid_dependency");
   1668 			break;
   1669 
   1670 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
   1671 			if (event_from_tty(h, inst) == 0)
   1672 				maintain_instance(h, inst, 0,
   1673 				    "service_request");
   1674 			else
   1675 				maintain_instance(h, inst, 0,
   1676 				    "administrative_request");
   1677 			break;
   1678 
   1679 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
   1680 			if (event_from_tty(h, inst) == 0)
   1681 				maintain_instance(h, inst, 1,
   1682 				    "service_request");
   1683 			else
   1684 				maintain_instance(h, inst, 1,
   1685 				    "administrative_request");
   1686 			break;
   1687 
   1688 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
   1689 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
   1690 			break;
   1691 
   1692 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
   1693 			refresh_instance(h, inst);
   1694 			break;
   1695 
   1696 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
   1697 			log_framework(LOG_WARNING, "Restarter: "
   1698 			    "%s command (for %s) unimplemented.\n",
   1699 			    event_names[event->riq_type], inst->ri_i.i_fmri);
   1700 			break;
   1701 
   1702 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
   1703 			if (!instance_started(inst)) {
   1704 				log_framework(LOG_DEBUG, "Restarter: "
   1705 				    "Not restarting %s; not running.\n",
   1706 				    inst->ri_i.i_fmri);
   1707 			} else {
   1708 				/*
   1709 				 * Stop the instance.  If it can be restarted,
   1710 				 * the graph engine will send a new event.
   1711 				 */
   1712 				(void) stop_instance(h, inst, RSTOP_RESTART);
   1713 			}
   1714 			break;
   1715 
   1716 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
   1717 		default:
   1718 #ifndef NDEBUG
   1719 			uu_warn("%s:%d: Bad restarter event %d.  "
   1720 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
   1721 #endif
   1722 			abort();
   1723 		}
   1724 
   1725 		assert(inst != NULL);
   1726 		MUTEX_UNLOCK(&inst->ri_lock);
   1727 
   1728 cont:
   1729 		/* grab the queue lock */
   1730 		rip = inst_lookup_queue(fmri);
   1731 		if (rip == NULL)
   1732 			goto out;
   1733 
   1734 		/* delete the event */
   1735 		uu_list_remove(rip->ri_queue, event);
   1736 		startd_free(event, sizeof (restarter_instance_qentry_t));
   1737 	}
   1738 
   1739 	assert(rip != NULL);
   1740 
   1741 	/*
   1742 	 * Try to preserve the thread for a little while for future use.
   1743 	 */
   1744 	to.tv_sec = 3;
   1745 	to.tv_nsec = 0;
   1746 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
   1747 	    &rip->ri_queue_lock, &to);
   1748 
   1749 	if (uu_list_first(rip->ri_queue) != NULL)
   1750 		goto again;
   1751 
   1752 	rip->ri_queue_thread = 0;
   1753 	MUTEX_UNLOCK(&rip->ri_queue_lock);
   1754 out:
   1755 	(void) scf_handle_unbind(h);
   1756 	scf_handle_destroy(h);
   1757 	free(fmri);
   1758 	return (NULL);
   1759 }
   1760 
   1761 static int
   1762 is_admin_event(restarter_event_type_t t) {
   1763 
   1764 	switch (t) {
   1765 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
   1766 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
   1767 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
   1768 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
   1769 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
   1770 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
   1771 		return (1);
   1772 	default:
   1773 		return (0);
   1774 	}
   1775 }
   1776 
   1777 static void
   1778 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
   1779 {
   1780 	restarter_instance_qentry_t *qe;
   1781 	int r;
   1782 
   1783 	assert(PTHREAD_MUTEX_HELD(&ri->ri_queue_lock));
   1784 	assert(!PTHREAD_MUTEX_HELD(&ri->ri_lock));
   1785 
   1786 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
   1787 	qe->riq_type = e->rpe_type;
   1788 
   1789 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
   1790 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
   1791 	assert(r == 0);
   1792 }
   1793 
   1794 /*
   1795  * void *restarter_event_thread()
   1796  *
   1797  *  Handle incoming graph events by placing them on a per-instance
   1798  *  queue.  We can't lock the main part of the instance structure, so
   1799  *  just modify the seprarately locked event queue portion.
   1800  */
   1801 /*ARGSUSED*/
   1802 static void *
   1803 restarter_event_thread(void *unused)
   1804 {
   1805 	scf_handle_t *h;
   1806 
   1807 	/*
   1808 	 * This is a new thread, and thus, gets its own handle
   1809 	 * to the repository.
   1810 	 */
   1811 	h = libscf_handle_create_bound_loop();
   1812 
   1813 	MUTEX_LOCK(&ru->restarter_update_lock);
   1814 
   1815 	/*CONSTCOND*/
   1816 	while (1) {
   1817 		restarter_protocol_event_t *e;
   1818 
   1819 		while (ru->restarter_update_wakeup == 0)
   1820 			(void) pthread_cond_wait(&ru->restarter_update_cv,
   1821 			    &ru->restarter_update_lock);
   1822 
   1823 		ru->restarter_update_wakeup = 0;
   1824 
   1825 		while ((e = restarter_event_dequeue()) != NULL) {
   1826 			restarter_inst_t *rip;
   1827 			char *fmri;
   1828 
   1829 			MUTEX_UNLOCK(&ru->restarter_update_lock);
   1830 
   1831 			/*
   1832 			 * ADD_INSTANCE is special: there's likely no
   1833 			 * instance structure yet, so we need to handle the
   1834 			 * addition synchronously.
   1835 			 */
   1836 			switch (e->rpe_type) {
   1837 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
   1838 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
   1839 					log_error(LOG_INFO, "Restarter: "
   1840 					    "Could not add %s.\n", e->rpe_inst);
   1841 
   1842 				MUTEX_LOCK(&st->st_load_lock);
   1843 				if (--st->st_load_instances == 0)
   1844 					(void) pthread_cond_broadcast(
   1845 					    &st->st_load_cv);
   1846 				MUTEX_UNLOCK(&st->st_load_lock);
   1847 
   1848 				goto nolookup;
   1849 			}
   1850 
   1851 			/*
   1852 			 * Lookup the instance, locking only the event queue.
   1853 			 * Can't grab ri_lock here because it might be held
   1854 			 * by a long-running method.
   1855 			 */
   1856 			rip = inst_lookup_queue(e->rpe_inst);
   1857 			if (rip == NULL) {
   1858 				log_error(LOG_INFO, "Restarter: "
   1859 				    "Ignoring %s command for unknown service "
   1860 				    "%s.\n", event_names[e->rpe_type],
   1861 				    e->rpe_inst);
   1862 				goto nolookup;
   1863 			}
   1864 
   1865 			/* Keep ADMIN events from filling up the queue. */
   1866 			if (is_admin_event(e->rpe_type) &&
   1867 			    uu_list_numnodes(rip->ri_queue) >
   1868 			    RINST_QUEUE_THRESHOLD) {
   1869 				MUTEX_UNLOCK(&rip->ri_queue_lock);
   1870 				log_instance(rip, B_TRUE, "Instance event "
   1871 				    "queue overflow.  Dropping administrative "
   1872 				    "request.");
   1873 				log_framework(LOG_DEBUG, "%s: Instance event "
   1874 				    "queue overflow.  Dropping administrative "
   1875 				    "request.\n", rip->ri_i.i_fmri);
   1876 				goto nolookup;
   1877 			}
   1878 
   1879 			/* Now add the event to the instance queue. */
   1880 			restarter_queue_event(rip, e);
   1881 
   1882 			if (rip->ri_queue_thread == 0) {
   1883 				/*
   1884 				 * Start a thread if one isn't already
   1885 				 * running.
   1886 				 */
   1887 				fmri = safe_strdup(e->rpe_inst);
   1888 				rip->ri_queue_thread =  startd_thread_create(
   1889 				    restarter_process_events, (void *)fmri);
   1890 			} else {
   1891 				/*
   1892 				 * Signal the existing thread that there's
   1893 				 * a new event.
   1894 				 */
   1895 				(void) pthread_cond_broadcast(
   1896 				    &rip->ri_queue_cv);
   1897 			}
   1898 
   1899 			MUTEX_UNLOCK(&rip->ri_queue_lock);
   1900 nolookup:
   1901 			restarter_event_release(e);
   1902 
   1903 			MUTEX_LOCK(&ru->restarter_update_lock);
   1904 		}
   1905 	}
   1906 
   1907 	/*
   1908 	 * Unreachable for now -- there's currently no graceful cleanup
   1909 	 * called on exit().
   1910 	 */
   1911 	(void) scf_handle_unbind(h);
   1912 	scf_handle_destroy(h);
   1913 	return (NULL);
   1914 }
   1915 
   1916 static restarter_inst_t *
   1917 contract_to_inst(ctid_t ctid)
   1918 {
   1919 	restarter_inst_t *inst;
   1920 	int id;
   1921 
   1922 	id = lookup_inst_by_contract(ctid);
   1923 	if (id == -1)
   1924 		return (NULL);
   1925 
   1926 	inst = inst_lookup_by_id(id);
   1927 	if (inst != NULL) {
   1928 		/*
   1929 		 * Since ri_lock isn't held by the contract id lookup, this
   1930 		 * instance may have been restarted and now be in a new
   1931 		 * contract, making the old contract no longer valid for this
   1932 		 * instance.
   1933 		 */
   1934 		if (ctid != inst->ri_i.i_primary_ctid) {
   1935 			MUTEX_UNLOCK(&inst->ri_lock);
   1936 			inst = NULL;
   1937 		}
   1938 	}
   1939 	return (inst);
   1940 }
   1941 
   1942 /*
   1943  * void contract_action()
   1944  *   Take action on contract events.
   1945  */
   1946 static void
   1947 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
   1948     uint32_t type)
   1949 {
   1950 	const char *fmri = inst->ri_i.i_fmri;
   1951 
   1952 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
   1953 
   1954 	/*
   1955 	 * If startd has stopped this contract, there is no need to
   1956 	 * stop it again.
   1957 	 */
   1958 	if (inst->ri_i.i_primary_ctid > 0 &&
   1959 	    inst->ri_i.i_primary_ctid_stopped)
   1960 		return;
   1961 
   1962 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
   1963 	    | CT_PR_EV_HWERR)) == 0) {
   1964 		/*
   1965 		 * There shouldn't be other events, since that's not how we set
   1966 		 * the terms. Thus, just log an error and drive on.
   1967 		 */
   1968 		log_framework(LOG_NOTICE,
   1969 		    "%s: contract %ld received unexpected critical event "
   1970 		    "(%d)\n", fmri, id, type);
   1971 		return;
   1972 	}
   1973 
   1974 	assert(instance_in_transition(inst) == 0);
   1975 
   1976 	if (instance_is_wait_style(inst)) {
   1977 		/*
   1978 		 * We ignore all events; if they impact the
   1979 		 * process we're monitoring, then the
   1980 		 * wait_thread will stop the instance.
   1981 		 */
   1982 		log_framework(LOG_DEBUG,
   1983 		    "%s: ignoring contract event on wait-style service\n",
   1984 		    fmri);
   1985 	} else {
   1986 		/*
   1987 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
   1988 		 */
   1989 		switch (type) {
   1990 		case CT_PR_EV_EMPTY:
   1991 			(void) stop_instance(h, inst, RSTOP_EXIT);
   1992 			break;
   1993 		case CT_PR_EV_CORE:
   1994 			(void) stop_instance(h, inst, RSTOP_CORE);
   1995 			break;
   1996 		case CT_PR_EV_SIGNAL:
   1997 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
   1998 			break;
   1999 		case CT_PR_EV_HWERR:
   2000 			(void) stop_instance(h, inst, RSTOP_HWERR);
   2001 			break;
   2002 		}
   2003 	}
   2004 }
   2005 
   2006 /*
   2007  * void *restarter_contract_event_thread(void *)
   2008  *   Listens to the process contract bundle for critical events, taking action
   2009  *   on events from contracts we know we are responsible for.
   2010  */
   2011 /*ARGSUSED*/
   2012 static void *
   2013 restarter_contracts_event_thread(void *unused)
   2014 {
   2015 	int fd, err;
   2016 	scf_handle_t *local_handle;
   2017 
   2018 	/*
   2019 	 * Await graph load completion.  That is, stop here, until we've scanned
   2020 	 * the repository for contract - instance associations.
   2021 	 */
   2022 	MUTEX_LOCK(&st->st_load_lock);
   2023 	while (!(st->st_load_complete && st->st_load_instances == 0))
   2024 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
   2025 	MUTEX_UNLOCK(&st->st_load_lock);
   2026 
   2027 	/*
   2028 	 * This is a new thread, and thus, gets its own handle
   2029 	 * to the repository.
   2030 	 */
   2031 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
   2032 		uu_die("Unable to bind a new repository handle: %s\n",
   2033 		    scf_strerror(scf_error()));
   2034 
   2035 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
   2036 	if (fd == -1)
   2037 		uu_die("process bundle open failed");
   2038 
   2039 	/*
   2040 	 * Make sure we get all events (including those generated by configd
   2041 	 * before this thread was started).
   2042 	 */
   2043 	err = ct_event_reset(fd);
   2044 	assert(err == 0);
   2045 
   2046 	for (;;) {
   2047 		int efd, sfd;
   2048 		ct_evthdl_t ev;
   2049 		uint32_t type;
   2050 		ctevid_t evid;
   2051 		ct_stathdl_t status;
   2052 		ctid_t ctid;
   2053 		restarter_inst_t *inst;
   2054 		uint64_t cookie;
   2055 
   2056 		if (err = ct_event_read_critical(fd, &ev)) {
   2057 			log_error(LOG_WARNING,
   2058 			    "Error reading next contract event: %s",
   2059 			    strerror(err));
   2060 			continue;
   2061 		}
   2062 
   2063 		evid = ct_event_get_evid(ev);
   2064 		ctid = ct_event_get_ctid(ev);
   2065 		type = ct_event_get_type(ev);
   2066 
   2067 		/* Fetch cookie. */
   2068 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
   2069 		    < 0) {
   2070 			ct_event_free(ev);
   2071 			continue;
   2072 		}
   2073 
   2074 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
   2075 			log_framework(LOG_WARNING, "Could not get status for "
   2076 			    "contract %ld: %s\n", ctid, strerror(err));
   2077 
   2078 			startd_close(sfd);
   2079 			ct_event_free(ev);
   2080 			continue;
   2081 		}
   2082 
   2083 		cookie = ct_status_get_cookie(status);
   2084 
   2085 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
   2086 		    "cookie %lld\n", type, ctid, cookie);
   2087 
   2088 		ct_status_free(status);
   2089 
   2090 		startd_close(sfd);
   2091 
   2092 		/*
   2093 		 * svc.configd(1M) restart handling performed by the
   2094 		 * fork_configd_thread.  We don't acknowledge, as that thread
   2095 		 * will do so.
   2096 		 */
   2097 		if (cookie == CONFIGD_COOKIE) {
   2098 			ct_event_free(ev);
   2099 			continue;
   2100 		}
   2101 
   2102 		inst = NULL;
   2103 		if (storing_contract != 0 &&
   2104 		    (inst = contract_to_inst(ctid)) == NULL) {
   2105 			/*
   2106 			 * This can happen for two reasons:
   2107 			 * - method_run() has not yet stored the
   2108 			 *    the contract into the internal hash table.
   2109 			 * - we receive an EMPTY event for an abandoned
   2110 			 *    contract.
   2111 			 * If there is any contract in the process of
   2112 			 * being stored into the hash table then re-read
   2113 			 * the event later.
   2114 			 */
   2115 			log_framework(LOG_DEBUG,
   2116 			    "Reset event %d for unknown "
   2117 			    "contract id %ld\n", type, ctid);
   2118 
   2119 			/* don't go too fast */
   2120 			(void) poll(NULL, 0, 100);
   2121 
   2122 			(void) ct_event_reset(fd);
   2123 			ct_event_free(ev);
   2124 			continue;
   2125 		}
   2126 
   2127 		/*
   2128 		 * Do not call contract_to_inst() again if first
   2129 		 * call succeeded.
   2130 		 */
   2131 		if (inst == NULL)
   2132 			inst = contract_to_inst(ctid);
   2133 		if (inst == NULL) {
   2134 			/*
   2135 			 * This can happen if we receive an EMPTY
   2136 			 * event for an abandoned contract.
   2137 			 */
   2138 			log_framework(LOG_DEBUG,
   2139 			    "Received event %d for unknown contract id "
   2140 			    "%ld\n", type, ctid);
   2141 		} else {
   2142 			log_framework(LOG_DEBUG,
   2143 			    "Received event %d for contract id "
   2144 			    "%ld (%s)\n", type, ctid,
   2145 			    inst->ri_i.i_fmri);
   2146 
   2147 			contract_action(local_handle, inst, ctid, type);
   2148 
   2149 			MUTEX_UNLOCK(&inst->ri_lock);
   2150 		}
   2151 
   2152 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
   2153 		    O_WRONLY);
   2154 		if (efd != -1) {
   2155 			(void) ct_ctl_ack(efd, evid);
   2156 			startd_close(efd);
   2157 		}
   2158 
   2159 		ct_event_free(ev);
   2160 
   2161 	}
   2162 
   2163 	/*NOTREACHED*/
   2164 	return (NULL);
   2165 }
   2166 
   2167 /*
   2168  * Timeout queue, processed by restarter_timeouts_event_thread().
   2169  */
   2170 timeout_queue_t *timeouts;
   2171 static uu_list_pool_t *timeout_pool;
   2172 
   2173 typedef struct timeout_update {
   2174 	pthread_mutex_t		tu_lock;
   2175 	pthread_cond_t		tu_cv;
   2176 	int			tu_wakeup;
   2177 } timeout_update_t;
   2178 
   2179 timeout_update_t *tu;
   2180 
   2181 static const char *timeout_ovr_svcs[] = {
   2182 	"svc:/system/manifest-import:default",
   2183 	"svc:/network/initial:default",
   2184 	"svc:/network/service:default",
   2185 	"svc:/system/rmtmpfiles:default",
   2186 	"svc:/network/loopback:default",
   2187 	"svc:/network/physical:default",
   2188 	"svc:/system/device/local:default",
   2189 	"svc:/system/metainit:default",
   2190 	"svc:/system/filesystem/usr:default",
   2191 	"svc:/system/filesystem/minimal:default",
   2192 	"svc:/system/filesystem/local:default",
   2193 	NULL
   2194 };
   2195 
   2196 int
   2197 is_timeout_ovr(restarter_inst_t *inst)
   2198 {
   2199 	int i;
   2200 
   2201 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
   2202 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
   2203 			log_instance(inst, B_TRUE, "Timeout override by "
   2204 			    "svc.startd.  Using infinite timeout.");
   2205 			return (1);
   2206 		}
   2207 	}
   2208 
   2209 	return (0);
   2210 }
   2211 
   2212 /*ARGSUSED*/
   2213 static int
   2214 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
   2215 {
   2216 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
   2217 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
   2218 
   2219 	if (t1 > t2)
   2220 		return (1);
   2221 	else if (t1 < t2)
   2222 		return (-1);
   2223 	return (0);
   2224 }
   2225 
   2226 void
   2227 timeout_init()
   2228 {
   2229 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
   2230 
   2231 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
   2232 
   2233 	timeout_pool = startd_list_pool_create("timeouts",
   2234 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
   2235 	    timeout_compare, UU_LIST_POOL_DEBUG);
   2236 	assert(timeout_pool != NULL);
   2237 
   2238 	timeouts->tq_list = startd_list_create(timeout_pool,
   2239 	    timeouts, UU_LIST_SORTED);
   2240 	assert(timeouts->tq_list != NULL);
   2241 
   2242 	tu = startd_zalloc(sizeof (timeout_update_t));
   2243 	(void) pthread_cond_init(&tu->tu_cv, NULL);
   2244 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
   2245 }
   2246 
   2247 void
   2248 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
   2249 {
   2250 	hrtime_t now, timeout;
   2251 	timeout_entry_t *entry;
   2252 	uu_list_index_t idx;
   2253 
   2254 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
   2255 
   2256 	now = gethrtime();
   2257 
   2258 	/*
   2259 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
   2260 	 * just return.
   2261 	 */
   2262 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
   2263 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
   2264 		    "treating as infinite.");
   2265 		return;
   2266 	}
   2267 
   2268 	/* hrtime is in nanoseconds. Convert timeout_sec. */
   2269 	timeout = now + (timeout_sec * 1000000000LL);
   2270 
   2271 	entry = startd_alloc(sizeof (timeout_entry_t));
   2272 	entry->te_timeout = timeout;
   2273 	entry->te_ctid = cid;
   2274 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
   2275 	entry->te_logstem = safe_strdup(inst->ri_logstem);
   2276 	entry->te_fired = 0;
   2277 	/* Insert the calculated timeout time onto the queue. */
   2278 	MUTEX_LOCK(&timeouts->tq_lock);
   2279 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
   2280 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
   2281 	uu_list_insert(timeouts->tq_list, entry, idx);
   2282 	MUTEX_UNLOCK(&timeouts->tq_lock);
   2283 
   2284 	assert(inst->ri_timeout == NULL);
   2285 	inst->ri_timeout = entry;
   2286 
   2287 	MUTEX_LOCK(&tu->tu_lock);
   2288 	tu->tu_wakeup = 1;
   2289 	(void) pthread_cond_broadcast(&tu->tu_cv);
   2290 	MUTEX_UNLOCK(&tu->tu_lock);
   2291 }
   2292 
   2293 
   2294 void
   2295 timeout_remove(restarter_inst_t *inst, ctid_t cid)
   2296 {
   2297 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
   2298 
   2299 	if (inst->ri_timeout == NULL)
   2300 		return;
   2301 
   2302 	assert(inst->ri_timeout->te_ctid == cid);
   2303 
   2304 	MUTEX_LOCK(&timeouts->tq_lock);
   2305 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
   2306 	MUTEX_UNLOCK(&timeouts->tq_lock);
   2307 
   2308 	free(inst->ri_timeout->te_fmri);
   2309 	free(inst->ri_timeout->te_logstem);
   2310 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
   2311 	inst->ri_timeout = NULL;
   2312 }
   2313 
   2314 static int
   2315 timeout_now()
   2316 {
   2317 	timeout_entry_t *e;
   2318 	hrtime_t now;
   2319 	int ret;
   2320 
   2321 	now = gethrtime();
   2322 
   2323 	/*
   2324 	 * Walk through the (sorted) timeouts list.  While the timeout
   2325 	 * at the head of the list is <= the current time, kill the
   2326 	 * method.
   2327 	 */
   2328 	MUTEX_LOCK(&timeouts->tq_lock);
   2329 
   2330 	for (e = uu_list_first(timeouts->tq_list);
   2331 	    e != NULL && e->te_timeout <= now;
   2332 	    e = uu_list_next(timeouts->tq_list, e)) {
   2333 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
   2334 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
   2335 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
   2336 		    "Method or service exit timed out.  Killing contract %ld.",
   2337 		    e->te_ctid);
   2338 		e->te_fired = 1;
   2339 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
   2340 	}
   2341 
   2342 	if (uu_list_numnodes(timeouts->tq_list) > 0)
   2343 		ret = 0;
   2344 	else
   2345 		ret = -1;
   2346 
   2347 	MUTEX_UNLOCK(&timeouts->tq_lock);
   2348 
   2349 	return (ret);
   2350 }
   2351 
   2352 /*
   2353  * void *restarter_timeouts_event_thread(void *)
   2354  *   Responsible for monitoring the method timeouts.  This thread must
   2355  *   be started before any methods are called.
   2356  */
   2357 /*ARGSUSED*/
   2358 static void *
   2359 restarter_timeouts_event_thread(void *unused)
   2360 {
   2361 	/*
   2362 	 * Timeouts are entered on a priority queue, which is processed by
   2363 	 * this thread.  As timeouts are specified in seconds, we'll do
   2364 	 * the necessary processing every second, as long as the queue
   2365 	 * is not empty.
   2366 	 */
   2367 
   2368 	/*CONSTCOND*/
   2369 	while (1) {
   2370 		/*
   2371 		 * As long as the timeout list isn't empty, process it
   2372 		 * every second.
   2373 		 */
   2374 		if (timeout_now() == 0) {
   2375 			(void) sleep(1);
   2376 			continue;
   2377 		}
   2378 
   2379 		/* The list is empty, wait until we have more timeouts. */
   2380 		MUTEX_LOCK(&tu->tu_lock);
   2381 
   2382 		while (tu->tu_wakeup == 0)
   2383 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
   2384 
   2385 		tu->tu_wakeup = 0;
   2386 		MUTEX_UNLOCK(&tu->tu_lock);
   2387 	}
   2388 
   2389 	return (NULL);
   2390 }
   2391 
   2392 void
   2393 restarter_start()
   2394 {
   2395 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
   2396 	(void) startd_thread_create(restarter_event_thread, NULL);
   2397 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
   2398 	(void) startd_thread_create(wait_thread, NULL);
   2399 }
   2400 
   2401 
   2402 void
   2403 restarter_init()
   2404 {
   2405 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
   2406 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
   2407 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
   2408 	(void) memset(&instance_list, 0, sizeof (instance_list));
   2409 
   2410 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
   2411 	instance_list.ril_instance_list = startd_list_create(
   2412 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
   2413 
   2414 	restarter_queue_pool = startd_list_pool_create(
   2415 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
   2416 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
   2417 	    UU_LIST_POOL_DEBUG);
   2418 
   2419 	contract_list_pool = startd_list_pool_create(
   2420 	    "contract_list", sizeof (contract_entry_t),
   2421 	    offsetof(contract_entry_t,  ce_link), NULL,
   2422 	    UU_LIST_POOL_DEBUG);
   2423 	contract_hash_init();
   2424 
   2425 	log_framework(LOG_DEBUG, "Initialized restarter\n");
   2426 }
   2427