Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)contract.c	1.4	07/08/09 SMI"
     27 
     28 /*
     29  * Contracts
     30  * ---------
     31  *
     32  * Contracts are a primitive which enrich the relationships between
     33  * processes and system resources.  The primary purpose of contracts is
     34  * to provide a means for the system to negotiate the departure from a
     35  * binding relationship (e.g. pages locked in memory or a thread bound
     36  * to processor), but they can also be used as a purely asynchronous
     37  * error reporting mechanism as they are with process contracts.
     38  *
     39  * More information on how one interfaces with contracts and what
     40  * contracts can do for you can be found in:
     41  *   PSARC 2003/193 Solaris Contracts
     42  *   PSARC 2004/460 Contracts addendum
     43  *
     44  * This file contains the core contracts framework.  By itself it is
     45  * useless: it depends the contracts filesystem (ctfs) to provide an
     46  * interface to user processes and individual contract types to
     47  * implement the process/resource relationships.
     48  *
     49  * Data structure overview
     50  * -----------------------
     51  *
     52  * A contract is represented by a contract_t, which itself points to an
     53  * encapsulating contract-type specific contract object.  A contract_t
     54  * contains the contract's static identity (including its terms), its
     55  * linkage to various bookkeeping structures, the contract-specific
     56  * event queue, and a reference count.
     57  *
     58  * A contract template is represented by a ct_template_t, which, like a
     59  * contract, points to an encapsulating contract-type specific template
     60  * object.  A ct_template_t contains the template's terms.
     61  *
     62  * An event queue is represented by a ct_equeue_t, and consists of a
     63  * list of events, a list of listeners, and a list of listeners who are
     64  * waiting for new events (affectionately referred to as "tail
     65  * listeners").  There are three queue types, defined by ct_listnum_t
     66  * (an enum).  An event may be on one of each type of queue
     67  * simultaneously; the list linkage used by a queue is determined by
     68  * its type.
     69  *
     70  * An event is represented by a ct_kevent_t, which contains mostly
     71  * static event data (e.g. id, payload).  It also has an array of
     72  * ct_member_t structures, each of which contains a list_node_t and
     73  * represent the event's linkage in a specific event queue.
     74  *
     75  * Each open of an event endpoint results in the creation of a new
     76  * listener, represented by a ct_listener_t.  In addition to linkage
     77  * into the aforementioned lists in the event_queue, a ct_listener_t
     78  * contains a pointer to the ct_kevent_t it is currently positioned at
     79  * as well as a set of status flags and other administrative data.
     80  *
     81  * Each process has a list of contracts it owns, p_ct_held; a pointer
     82  * to the process contract it is a member of, p_ct_process; the linkage
     83  * for that membership, p_ct_member; and an array of event queue
     84  * structures representing the process bundle queues.
     85  *
     86  * Each LWP has an array of its active templates, lwp_ct_active; and
     87  * the most recently created contracts, lwp_ct_latest.
     88  *
     89  * A process contract has a list of member processes and a list of
     90  * inherited contracts.
     91  *
     92  * There is a system-wide list of all contracts, as well as per-type
     93  * lists of contracts.
     94  *
     95  * Lock ordering overview
     96  * ----------------------
     97  *
     98  * Locks at the top are taken first:
     99  *
    100  *                   ct_evtlock
    101  *                   regent ct_lock
    102  *                   member ct_lock
    103  *                   pidlock
    104  *                   p_lock
    105  *    contract ctq_lock         contract_lock
    106  *    pbundle ctq_lock
    107  *    cte_lock
    108  *                   ct_reflock
    109  *
    110  * contract_lock and ctq_lock/cte_lock are not currently taken at the
    111  * same time.
    112  *
    113  * Reference counting and locking
    114  * ------------------------------
    115  *
    116  * A contract has a reference count, protected by ct_reflock.
    117  * (ct_reflock is also used in a couple other places where atomic
    118  * access to a variable is needed in an innermost context).  A process
    119  * maintains a hold on each contract it owns.  A process contract has a
    120  * hold on each contract is has inherited.  Each event has a hold on
    121  * the contract which generated it.  Process contract templates have
    122  * holds on the contracts referred to by their transfer terms.  CTFS
    123  * contract directory nodes have holds on contracts.  Lastly, various
    124  * code paths may temporarily take holds on contracts to prevent them
    125  * from disappearing while other processing is going on.  It is
    126  * important to note that the global contract lists do not hold
    127  * references on contracts; a contract is removed from these structures
    128  * atomically with the release of its last reference.
    129  *
    130  * At a given point in time, a contract can either be owned by a
    131  * process, inherited by a regent process contract, or orphaned.  A
    132  * contract_t's  owner and regent pointers, ct_owner and ct_regent, are
    133  * protected by its ct_lock.  The linkage in the holder's (holder =
    134  * owner or regent) list of contracts, ct_ctlist, is protected by
    135  * whatever lock protects the holder's data structure.  In order for
    136  * these two directions to remain consistent, changing the holder of a
    137  * contract requires that both locks be held.
    138  *
    139  * Events also have reference counts.  There is one hold on an event
    140  * per queue it is present on, in addition to those needed for the
    141  * usual sundry reasons.  Individual listeners are associated with
    142  * specific queues, and increase a queue-specific reference count
    143  * stored in the ct_member_t structure.
    144  *
    145  * The dynamic contents of an event (reference count and flags) are
    146  * protected by its cte_lock, while the contents of the embedded
    147  * ct_member_t structures are protected by the locks of the queues they
    148  * are linked into.  A ct_listener_t's contents are also protected by
    149  * its event queue's ctq_lock.
    150  *
    151  * Resource controls
    152  * -----------------
    153  *
    154  * Control:      project.max-contracts (rc_project_contract)
    155  * Description:  Maximum number of contracts allowed a project.
    156  *
    157  *   When a contract is created, the project's allocation is tested and
    158  *   (assuming success) increased.  When the last reference to a
    159  *   contract is released, the creating project's allocation is
    160  *   decreased.
    161  */
    162 
    163 #include <sys/mutex.h>
    164 #include <sys/debug.h>
    165 #include <sys/types.h>
    166 #include <sys/param.h>
    167 #include <sys/kmem.h>
    168 #include <sys/thread.h>
    169 #include <sys/id_space.h>
    170 #include <sys/avl.h>
    171 #include <sys/list.h>
    172 #include <sys/sysmacros.h>
    173 #include <sys/proc.h>
    174 #include <sys/contract_impl.h>
    175 #include <sys/contract/process_impl.h>
    176 #include <sys/dditypes.h>
    177 #include <sys/contract/device_impl.h>
    178 #include <sys/systm.h>
    179 #include <sys/atomic.h>
    180 #include <sys/cmn_err.h>
    181 #include <sys/model.h>
    182 #include <sys/policy.h>
    183 #include <sys/zone.h>
    184 #include <sys/task.h>
    185 #include <sys/ddi.h>
    186 #include <sys/sunddi.h>
    187 
    188 extern rctl_hndl_t rc_project_contract;
    189 
    190 static id_space_t	*contract_ids;
    191 static avl_tree_t	contract_avl;
    192 static kmutex_t		contract_lock;
    193 
    194 int			ct_ntypes = CTT_MAXTYPE;
    195 static ct_type_t	*ct_types_static[CTT_MAXTYPE];
    196 ct_type_t		**ct_types = ct_types_static;
    197 int			ct_debug;
    198 
    199 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
    200 static void cte_queue_destroy(ct_equeue_t *);
    201 static void cte_queue_drain(ct_equeue_t *, int);
    202 static void cte_trim(ct_equeue_t *, contract_t *);
    203 static void cte_copy(ct_equeue_t *, ct_equeue_t *);
    204 
    205 /*
    206  * contract_compar
    207  *
    208  * A contract comparator which sorts on contract ID.
    209  */
    210 int
    211 contract_compar(const void *x, const void *y)
    212 {
    213 	const contract_t *ct1 = x;
    214 	const contract_t *ct2 = y;
    215 
    216 	if (ct1->ct_id < ct2->ct_id)
    217 		return (-1);
    218 	if (ct1->ct_id > ct2->ct_id)
    219 		return (1);
    220 	return (0);
    221 }
    222 
    223 /*
    224  * contract_init
    225  *
    226  * Initializes the contract subsystem, the specific contract types, and
    227  * process 0.
    228  */
    229 void
    230 contract_init(void)
    231 {
    232 	/*
    233 	 * Initialize contract subsystem.
    234 	 */
    235 	contract_ids = id_space_create("contracts", 1, INT_MAX);
    236 	avl_create(&contract_avl, contract_compar, sizeof (contract_t),
    237 	    offsetof(contract_t, ct_ctavl));
    238 	mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
    239 
    240 	/*
    241 	 * Initialize contract types.
    242 	 */
    243 	contract_process_init();
    244 	contract_device_init();
    245 
    246 	/*
    247 	 * Initialize p0/lwp0 contract state.
    248 	 */
    249 	avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
    250 	    offsetof(contract_t, ct_ctlist));
    251 }
    252 
    253 /*
    254  * contract_dtor
    255  *
    256  * Performs basic destruction of the common portions of a contract.
    257  * Called from the failure path of contract_ctor and from
    258  * contract_rele.
    259  */
    260 static void
    261 contract_dtor(contract_t *ct)
    262 {
    263 	cte_queue_destroy(&ct->ct_events);
    264 	list_destroy(&ct->ct_vnodes);
    265 	mutex_destroy(&ct->ct_reflock);
    266 	mutex_destroy(&ct->ct_lock);
    267 	mutex_destroy(&ct->ct_evtlock);
    268 }
    269 
    270 /*
    271  * contract_ctor
    272  *
    273  * Called by a contract type to initialize a contract.  Fails if the
    274  * max-contract resource control would have been exceeded.  After a
    275  * successful call to contract_ctor, the contract is unlocked and
    276  * visible in all namespaces; any type-specific initialization should
    277  * be completed before calling contract_ctor.  Returns 0 on success.
    278  *
    279  * Because not all callers can tolerate failure, a 0 value for canfail
    280  * instructs contract_ctor to ignore the project.max-contracts resource
    281  * control.  Obviously, this "out" should only be employed by callers
    282  * who are sufficiently constrained in other ways (e.g. newproc).
    283  */
    284 int
    285 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
    286     ctflags_t flags, proc_t *author, int canfail)
    287 {
    288 	avl_index_t where;
    289 	klwp_t *curlwp = ttolwp(curthread);
    290 
    291 	ASSERT(author == curproc);
    292 
    293 	mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
    294 	mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
    295 	mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
    296 	ct->ct_id = id_alloc(contract_ids);
    297 
    298 	cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
    299 	list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
    300 	    offsetof(contract_vnode_t, ctv_node));
    301 
    302 	/*
    303 	 * Instance data
    304 	 */
    305 	ct->ct_ref = 2;		/* one for the holder, one for "latest" */
    306 	ct->ct_cuid = crgetuid(CRED());
    307 	ct->ct_type = type;
    308 	ct->ct_data = data;
    309 	gethrestime(&ct->ct_ctime);
    310 	ct->ct_state = CTS_OWNED;
    311 	ct->ct_flags = flags;
    312 	ct->ct_regent = author->p_ct_process ?
    313 	    &author->p_ct_process->conp_contract : NULL;
    314 	ct->ct_ev_info = tmpl->ctmpl_ev_info;
    315 	ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
    316 	ct->ct_cookie = tmpl->ctmpl_cookie;
    317 	ct->ct_owner = author;
    318 	ct->ct_ntime.ctm_total = -1;
    319 	ct->ct_qtime.ctm_total = -1;
    320 	ct->ct_nevent = NULL;
    321 
    322 	/*
    323 	 * Test project.max-contracts.
    324 	 */
    325 	mutex_enter(&author->p_lock);
    326 	mutex_enter(&contract_lock);
    327 	if (canfail && rctl_test(rc_project_contract,
    328 	    author->p_task->tk_proj->kpj_rctls, author, 1,
    329 	    RCA_SAFE) & RCT_DENY) {
    330 		id_free(contract_ids, ct->ct_id);
    331 		mutex_exit(&contract_lock);
    332 		mutex_exit(&author->p_lock);
    333 		ct->ct_events.ctq_flags |= CTQ_DEAD;
    334 		contract_dtor(ct);
    335 		return (1);
    336 	}
    337 	ct->ct_proj = author->p_task->tk_proj;
    338 	ct->ct_proj->kpj_data.kpd_contract++;
    339 	(void) project_hold(ct->ct_proj);
    340 	mutex_exit(&contract_lock);
    341 
    342 	/*
    343 	 * Insert into holder's avl of contracts.
    344 	 * We use an avl not because order is important, but because
    345 	 * readdir of /proc/contracts requires we be able to use a
    346 	 * scalar as an index into the process's list of contracts
    347 	 */
    348 	ct->ct_zoneid = author->p_zone->zone_id;
    349 	ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
    350 	VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
    351 	avl_insert(&author->p_ct_held, ct, where);
    352 	mutex_exit(&author->p_lock);
    353 
    354 	/*
    355 	 * Insert into global contract AVL
    356 	 */
    357 	mutex_enter(&contract_lock);
    358 	VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
    359 	avl_insert(&contract_avl, ct, where);
    360 	mutex_exit(&contract_lock);
    361 
    362 	/*
    363 	 * Insert into type AVL
    364 	 */
    365 	mutex_enter(&type->ct_type_lock);
    366 	VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
    367 	avl_insert(&type->ct_type_avl, ct, where);
    368 	type->ct_type_timestruc = ct->ct_ctime;
    369 	mutex_exit(&type->ct_type_lock);
    370 
    371 	if (curlwp->lwp_ct_latest[type->ct_type_index])
    372 		contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
    373 	curlwp->lwp_ct_latest[type->ct_type_index] = ct;
    374 
    375 	return (0);
    376 }
    377 
    378 /*
    379  * contract_rele
    380  *
    381  * Releases a reference to a contract.  If the caller had the last
    382  * reference, the contract is removed from all namespaces, its
    383  * allocation against the max-contracts resource control is released,
    384  * and the contract type's free entry point is invoked for any
    385  * type-specific deconstruction and to (presumably) free the object.
    386  */
    387 void
    388 contract_rele(contract_t *ct)
    389 {
    390 	uint64_t nref;
    391 
    392 	mutex_enter(&ct->ct_reflock);
    393 	ASSERT(ct->ct_ref > 0);
    394 	nref = --ct->ct_ref;
    395 	mutex_exit(&ct->ct_reflock);
    396 	if (nref == 0) {
    397 		/*
    398 		 * ct_owner is cleared when it drops its reference.
    399 		 */
    400 		ASSERT(ct->ct_owner == NULL);
    401 		ASSERT(ct->ct_evcnt == 0);
    402 
    403 		/*
    404 		 * Remove from global contract AVL
    405 		 */
    406 		mutex_enter(&contract_lock);
    407 		avl_remove(&contract_avl, ct);
    408 		mutex_exit(&contract_lock);
    409 
    410 		/*
    411 		 * Remove from type AVL
    412 		 */
    413 		mutex_enter(&ct->ct_type->ct_type_lock);
    414 		avl_remove(&ct->ct_type->ct_type_avl, ct);
    415 		mutex_exit(&ct->ct_type->ct_type_lock);
    416 
    417 		/*
    418 		 * Release the contract's ID
    419 		 */
    420 		id_free(contract_ids, ct->ct_id);
    421 
    422 		/*
    423 		 * Release project hold
    424 		 */
    425 		mutex_enter(&contract_lock);
    426 		ct->ct_proj->kpj_data.kpd_contract--;
    427 		project_rele(ct->ct_proj);
    428 		mutex_exit(&contract_lock);
    429 
    430 		/*
    431 		 * Free the contract
    432 		 */
    433 		contract_dtor(ct);
    434 		ct->ct_type->ct_type_ops->contop_free(ct);
    435 	}
    436 }
    437 
    438 /*
    439  * contract_hold
    440  *
    441  * Adds a reference to a contract
    442  */
    443 void
    444 contract_hold(contract_t *ct)
    445 {
    446 	mutex_enter(&ct->ct_reflock);
    447 	ASSERT(ct->ct_ref < UINT64_MAX);
    448 	ct->ct_ref++;
    449 	mutex_exit(&ct->ct_reflock);
    450 }
    451 
    452 /*
    453  * contract_getzuniqid
    454  *
    455  * Get a contract's zone unique ID.  Needed because 64-bit reads and
    456  * writes aren't atomic on x86.  Since there are contexts where we are
    457  * unable to take ct_lock, we instead use ct_reflock; in actuality any
    458  * lock would do.
    459  */
    460 uint64_t
    461 contract_getzuniqid(contract_t *ct)
    462 {
    463 	uint64_t zuniqid;
    464 
    465 	mutex_enter(&ct->ct_reflock);
    466 	zuniqid = ct->ct_mzuniqid;
    467 	mutex_exit(&ct->ct_reflock);
    468 
    469 	return (zuniqid);
    470 }
    471 
    472 /*
    473  * contract_setzuniqid
    474  *
    475  * Sets a contract's zone unique ID.   See contract_getzuniqid.
    476  */
    477 void
    478 contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
    479 {
    480 	mutex_enter(&ct->ct_reflock);
    481 	ct->ct_mzuniqid = zuniqid;
    482 	mutex_exit(&ct->ct_reflock);
    483 }
    484 
    485 /*
    486  * contract_abandon
    487  *
    488  * Abandons the specified contract.  If "explicit" is clear, the
    489  * contract was implicitly abandoned (by process exit) and should be
    490  * inherited if its terms allow it and its owner was a member of a
    491  * regent contract.  Otherwise, the contract type's abandon entry point
    492  * is invoked to either destroy or orphan the contract.
    493  */
    494 int
    495 contract_abandon(contract_t *ct, proc_t *p, int explicit)
    496 {
    497 	ct_equeue_t *q = NULL;
    498 	contract_t *parent = &p->p_ct_process->conp_contract;
    499 	int inherit = 0;
    500 
    501 	ASSERT(p == curproc);
    502 
    503 	mutex_enter(&ct->ct_lock);
    504 
    505 	/*
    506 	 * Multiple contract locks are taken contract -> subcontract.
    507 	 * Check if the contract will be inherited so we can acquire
    508 	 * all the necessary locks before making sensitive changes.
    509 	 */
    510 	if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
    511 	    contract_process_accept(parent)) {
    512 		mutex_exit(&ct->ct_lock);
    513 		mutex_enter(&parent->ct_lock);
    514 		mutex_enter(&ct->ct_lock);
    515 		inherit = 1;
    516 	}
    517 
    518 	if (ct->ct_owner != p) {
    519 		mutex_exit(&ct->ct_lock);
    520 		if (inherit)
    521 			mutex_exit(&parent->ct_lock);
    522 		return (EINVAL);
    523 	}
    524 
    525 	mutex_enter(&p->p_lock);
    526 	if (explicit)
    527 		avl_remove(&p->p_ct_held, ct);
    528 	ct->ct_owner = NULL;
    529 	mutex_exit(&p->p_lock);
    530 
    531 	/*
    532 	 * Since we can't call cte_trim with the contract lock held,
    533 	 * we grab the queue pointer here.
    534 	 */
    535 	if (p->p_ct_equeue)
    536 		q = p->p_ct_equeue[ct->ct_type->ct_type_index];
    537 
    538 	/*
    539 	 * contop_abandon may destroy the contract so we rely on it to
    540 	 * drop ct_lock.  We retain a reference on the contract so that
    541 	 * the cte_trim which follows functions properly.  Even though
    542 	 * cte_trim doesn't dereference the contract pointer, it is
    543 	 * still necessary to retain a reference to the contract so
    544 	 * that we don't trim events which are sent by a subsequently
    545 	 * allocated contract infortuitously located at the same address.
    546 	 */
    547 	contract_hold(ct);
    548 
    549 	if (inherit) {
    550 		ct->ct_state = CTS_INHERITED;
    551 		ASSERT(ct->ct_regent == parent);
    552 		contract_process_take(parent, ct);
    553 
    554 		/*
    555 		 * We are handing off the process's reference to the
    556 		 * parent contract.  For this reason, the order in
    557 		 * which we drop the contract locks is also important.
    558 		 */
    559 		mutex_exit(&ct->ct_lock);
    560 		mutex_exit(&parent->ct_lock);
    561 	} else {
    562 		ct->ct_regent = NULL;
    563 		ct->ct_type->ct_type_ops->contop_abandon(ct);
    564 	}
    565 
    566 	/*
    567 	 * ct_lock has been dropped; we can safely trim the event
    568 	 * queue now.
    569 	 */
    570 	if (q) {
    571 		mutex_enter(&q->ctq_lock);
    572 		cte_trim(q, ct);
    573 		mutex_exit(&q->ctq_lock);
    574 	}
    575 
    576 	contract_rele(ct);
    577 
    578 	return (0);
    579 }
    580 
    581 int
    582 contract_newct(contract_t *ct)
    583 {
    584 	return (ct->ct_type->ct_type_ops->contop_newct(ct));
    585 }
    586 
    587 /*
    588  * contract_adopt
    589  *
    590  * Adopts a contract.  After a successful call to this routine, the
    591  * previously inherited contract will belong to the calling process,
    592  * and its events will have been appended to its new owner's process
    593  * bundle queue.
    594  */
    595 int
    596 contract_adopt(contract_t *ct, proc_t *p)
    597 {
    598 	avl_index_t where;
    599 	ct_equeue_t *q;
    600 	contract_t *parent;
    601 
    602 	ASSERT(p == curproc);
    603 
    604 	/*
    605 	 * Ensure the process has an event queue.  Checked by ASSERTs
    606 	 * below.
    607 	 */
    608 	(void) contract_type_pbundle(ct->ct_type, p);
    609 
    610 	mutex_enter(&ct->ct_lock);
    611 	parent = ct->ct_regent;
    612 	if (ct->ct_state != CTS_INHERITED ||
    613 	    &p->p_ct_process->conp_contract != parent ||
    614 	    p->p_zone->zone_uniqid != ct->ct_czuniqid) {
    615 		mutex_exit(&ct->ct_lock);
    616 		return (EINVAL);
    617 	}
    618 
    619 	/*
    620 	 * Multiple contract locks are taken contract -> subcontract.
    621 	 */
    622 	mutex_exit(&ct->ct_lock);
    623 	mutex_enter(&parent->ct_lock);
    624 	mutex_enter(&ct->ct_lock);
    625 
    626 	/*
    627 	 * It is possible that the contract was adopted by someone else
    628 	 * while its lock was dropped.  It isn't possible for the
    629 	 * contract to have been inherited by a different regent
    630 	 * contract.
    631 	 */
    632 	if (ct->ct_state != CTS_INHERITED) {
    633 		mutex_exit(&parent->ct_lock);
    634 		mutex_exit(&ct->ct_lock);
    635 		return (EBUSY);
    636 	}
    637 	ASSERT(ct->ct_regent == parent);
    638 
    639 	ct->ct_state = CTS_OWNED;
    640 
    641 	contract_process_adopt(ct, p);
    642 
    643 	mutex_enter(&p->p_lock);
    644 	ct->ct_owner = p;
    645 	VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
    646 	avl_insert(&p->p_ct_held, ct, where);
    647 	mutex_exit(&p->p_lock);
    648 
    649 	ASSERT(ct->ct_owner->p_ct_equeue);
    650 	ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
    651 	q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
    652 	cte_copy(&ct->ct_events, q);
    653 	mutex_exit(&ct->ct_lock);
    654 
    655 	return (0);
    656 }
    657 
    658 /*
    659  * contract_ack
    660  *
    661  * Acknowledges receipt of a critical event.
    662  */
    663 int
    664 contract_ack(contract_t *ct, uint64_t evid, int ack)
    665 {
    666 	ct_kevent_t *ev;
    667 	list_t *queue = &ct->ct_events.ctq_events;
    668 	int error = ESRCH;
    669 	int nego = 0;
    670 	uint_t evtype;
    671 
    672 	ASSERT(ack == CT_ACK || ack == CT_NACK);
    673 
    674 	mutex_enter(&ct->ct_lock);
    675 	mutex_enter(&ct->ct_events.ctq_lock);
    676 	/*
    677 	 * We are probably ACKing something near the head of the queue.
    678 	 */
    679 	for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
    680 		if (ev->cte_id == evid) {
    681 			if (ev->cte_flags & CTE_NEG)
    682 				nego = 1;
    683 			else if (ack == CT_NACK)
    684 				break;
    685 			if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
    686 				ev->cte_flags |= CTE_ACK;
    687 				ct->ct_evcnt--;
    688 				evtype = ev->cte_type;
    689 				error = 0;
    690 			}
    691 			break;
    692 		}
    693 	}
    694 	mutex_exit(&ct->ct_events.ctq_lock);
    695 	mutex_exit(&ct->ct_lock);
    696 
    697 	/*
    698 	 * Not all critical events are negotiation events, however
    699 	 * every negotiation event is a critical event. NEGEND events
    700 	 * are critical events but are not negotiation events
    701 	 */
    702 	if (error || !nego)
    703 		return (error);
    704 
    705 	if (ack == CT_ACK)
    706 		error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
    707 	else
    708 		error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
    709 
    710 	return (error);
    711 }
    712 
    713 /*ARGSUSED*/
    714 int
    715 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
    716 {
    717 	cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
    718 	    ct->ct_id);
    719 	return (ENOSYS);
    720 }
    721 
    722 /*ARGSUSED*/
    723 int
    724 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
    725 {
    726 	cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
    727 	    ct->ct_id);
    728 	return (ENOSYS);
    729 }
    730 
    731 /*ARGSUSED*/
    732 int
    733 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
    734 {
    735 	return (ERANGE);
    736 }
    737 
    738 /*
    739  * contract_qack
    740  *
    741  * Asks that negotiations be extended by another time quantum
    742  */
    743 int
    744 contract_qack(contract_t *ct, uint64_t evid)
    745 {
    746 	ct_kevent_t *ev;
    747 	list_t *queue = &ct->ct_events.ctq_events;
    748 	int nego = 0;
    749 	uint_t evtype;
    750 
    751 	mutex_enter(&ct->ct_lock);
    752 	mutex_enter(&ct->ct_events.ctq_lock);
    753 
    754 	for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
    755 		if (ev->cte_id == evid) {
    756 			if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
    757 				evtype = ev->cte_type;
    758 				nego = 1;
    759 			}
    760 			break;
    761 		}
    762 	}
    763 	mutex_exit(&ct->ct_events.ctq_lock);
    764 	mutex_exit(&ct->ct_lock);
    765 
    766 	/*
    767 	 * Only a negotiated event (which is by definition also a critical
    768 	 * event) which has not yet been acknowledged can provide
    769 	 * time quanta to a negotiating owner process.
    770 	 */
    771 	if (!nego)
    772 		return (ESRCH);
    773 
    774 	return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
    775 }
    776 
    777 /*
    778  * contract_orphan
    779  *
    780  * Icky-poo.  This is a process-contract special, used to ACK all
    781  * critical messages when a contract is orphaned.
    782  */
    783 void
    784 contract_orphan(contract_t *ct)
    785 {
    786 	ct_kevent_t *ev;
    787 	list_t *queue = &ct->ct_events.ctq_events;
    788 
    789 	ASSERT(MUTEX_HELD(&ct->ct_lock));
    790 	ASSERT(ct->ct_state != CTS_ORPHAN);
    791 
    792 	mutex_enter(&ct->ct_events.ctq_lock);
    793 	ct->ct_state = CTS_ORPHAN;
    794 	for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
    795 		if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
    796 			ev->cte_flags |= CTE_ACK;
    797 			ct->ct_evcnt--;
    798 		}
    799 	}
    800 	mutex_exit(&ct->ct_events.ctq_lock);
    801 
    802 	ASSERT(ct->ct_evcnt == 0);
    803 }
    804 
    805 /*
    806  * contract_destroy
    807  *
    808  * Explicit contract destruction.  Called when contract is empty.
    809  * The contract will actually stick around until all of its events are
    810  * removed from the bundle and and process bundle queues, and all fds
    811  * which refer to it are closed.  See contract_dtor if you are looking
    812  * for what destroys the contract structure.
    813  */
    814 void
    815 contract_destroy(contract_t *ct)
    816 {
    817 	ASSERT(MUTEX_HELD(&ct->ct_lock));
    818 	ASSERT(ct->ct_state != CTS_DEAD);
    819 	ASSERT(ct->ct_owner == NULL);
    820 
    821 	ct->ct_state = CTS_DEAD;
    822 	cte_queue_drain(&ct->ct_events, 1);
    823 	mutex_exit(&ct->ct_lock);
    824 	mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
    825 	cte_trim(&ct->ct_type->ct_type_events, ct);
    826 	mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
    827 	mutex_enter(&ct->ct_lock);
    828 	ct->ct_type->ct_type_ops->contop_destroy(ct);
    829 	mutex_exit(&ct->ct_lock);
    830 	contract_rele(ct);
    831 }
    832 
    833 /*
    834  * contract_vnode_get
    835  *
    836  * Obtains the contract directory vnode for this contract, if there is
    837  * one.  The caller must VN_RELE the vnode when they are through using
    838  * it.
    839  */
    840 vnode_t *
    841 contract_vnode_get(contract_t *ct, vfs_t *vfsp)
    842 {
    843 	contract_vnode_t *ctv;
    844 	vnode_t *vp = NULL;
    845 
    846 	mutex_enter(&ct->ct_lock);
    847 	for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
    848 	    ctv = list_next(&ct->ct_vnodes, ctv))
    849 		if (ctv->ctv_vnode->v_vfsp == vfsp) {
    850 			vp = ctv->ctv_vnode;
    851 			VN_HOLD(vp);
    852 			break;
    853 		}
    854 	mutex_exit(&ct->ct_lock);
    855 	return (vp);
    856 }
    857 
    858 /*
    859  * contract_vnode_set
    860  *
    861  * Sets the contract directory vnode for this contract.  We don't hold
    862  * a reference on the vnode because we don't want to prevent it from
    863  * being freed.  The vnode's inactive entry point will take care of
    864  * notifying us when it should be removed.
    865  */
    866 void
    867 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
    868 {
    869 	mutex_enter(&ct->ct_lock);
    870 	ctv->ctv_vnode = vnode;
    871 	list_insert_head(&ct->ct_vnodes, ctv);
    872 	mutex_exit(&ct->ct_lock);
    873 }
    874 
    875 /*
    876  * contract_vnode_clear
    877  *
    878  * Removes this vnode as the contract directory vnode for this
    879  * contract.  Called from a contract directory's inactive entry point,
    880  * this may return 0 indicating that the vnode gained another reference
    881  * because of a simultaneous call to contract_vnode_get.
    882  */
    883 int
    884 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
    885 {
    886 	vnode_t *vp = ctv->ctv_vnode;
    887 	int result;
    888 
    889 	mutex_enter(&ct->ct_lock);
    890 	mutex_enter(&vp->v_lock);
    891 	if (vp->v_count == 1) {
    892 		list_remove(&ct->ct_vnodes, ctv);
    893 		result = 1;
    894 	} else {
    895 		vp->v_count--;
    896 		result = 0;
    897 	}
    898 	mutex_exit(&vp->v_lock);
    899 	mutex_exit(&ct->ct_lock);
    900 
    901 	return (result);
    902 }
    903 
    904 /*
    905  * contract_exit
    906  *
    907  * Abandons all contracts held by process p, and drains process p's
    908  * bundle queues.  Called on process exit.
    909  */
    910 void
    911 contract_exit(proc_t *p)
    912 {
    913 	contract_t *ct;
    914 	void *cookie = NULL;
    915 	int i;
    916 
    917 	ASSERT(p == curproc);
    918 
    919 	/*
    920 	 * Abandon held contracts.  contract_abandon knows enough not
    921 	 * to remove the contract from the list a second time.  We are
    922 	 * exiting, so no locks are needed here.  But because
    923 	 * contract_abandon will take p_lock, we need to make sure we
    924 	 * aren't holding it.
    925 	 */
    926 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
    927 	while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
    928 		VERIFY(contract_abandon(ct, p, 0) == 0);
    929 
    930 	/*
    931 	 * Drain pbundles.  Because a process bundle queue could have
    932 	 * been passed to another process, they may not be freed right
    933 	 * away.
    934 	 */
    935 	if (p->p_ct_equeue) {
    936 		for (i = 0; i < CTT_MAXTYPE; i++)
    937 			if (p->p_ct_equeue[i])
    938 				cte_queue_drain(p->p_ct_equeue[i], 0);
    939 		kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
    940 	}
    941 }
    942 
    943 static int
    944 get_time_left(struct ct_time *t)
    945 {
    946 	clock_t ticks_elapsed;
    947 	int secs_elapsed;
    948 
    949 	if (t->ctm_total == -1)
    950 		return (-1);
    951 
    952 	ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
    953 	secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
    954 	return (secs_elapsed > 0 ? secs_elapsed : 0);
    955 }
    956 
    957 /*
    958  * contract_status_common
    959  *
    960  * Populates a ct_status structure.  Used by contract types in their
    961  * status entry points and ctfs when only common information is
    962  * requested.
    963  */
    964 void
    965 contract_status_common(contract_t *ct, zone_t *zone, void *status,
    966     model_t model)
    967 {
    968 	STRUCT_HANDLE(ct_status, lstatus);
    969 
    970 	STRUCT_SET_HANDLE(lstatus, model, status);
    971 	ASSERT(MUTEX_HELD(&ct->ct_lock));
    972 	if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
    973 	    zone->zone_uniqid == ct->ct_czuniqid) {
    974 		zone_t *czone;
    975 		zoneid_t zoneid = -1;
    976 
    977 		/*
    978 		 * Contracts don't have holds on the zones they were
    979 		 * created by.  If the contract's zone no longer
    980 		 * exists, we say its zoneid is -1.
    981 		 */
    982 		if (zone->zone_uniqid == ct->ct_czuniqid ||
    983 		    ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
    984 			zoneid = ct->ct_zoneid;
    985 		} else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
    986 			if (czone->zone_uniqid == ct->ct_mzuniqid)
    987 				zoneid = ct->ct_zoneid;
    988 			zone_rele(czone);
    989 		}
    990 
    991 		STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
    992 		STRUCT_FSET(lstatus, ctst_holder,
    993 		    (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
    994 		    (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
    995 		STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
    996 	} else {
    997 		/*
    998 		 * We are looking at a contract which was created by a
    999 		 * process outside of our zone.  We provide fake zone,
   1000 		 * holder, and state information.
   1001 		 */
   1002 
   1003 		STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
   1004 		/*
   1005 		 * Since "zone" can't disappear until the calling ctfs
   1006 		 * is unmounted, zone_zsched must be valid.
   1007 		 */
   1008 		STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
   1009 		    zone->zone_zsched->p_pid : 0);
   1010 		STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
   1011 		    CTS_OWNED : ct->ct_state);
   1012 	}
   1013 	STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
   1014 	STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
   1015 	STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
   1016 	STRUCT_FSET(lstatus, ctst_nevid,
   1017 	    ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
   1018 	STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
   1019 	STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
   1020 	STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
   1021 	STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
   1022 	STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
   1023 }
   1024 
   1025 /*
   1026  * contract_checkcred
   1027  *
   1028  * Determines if the specified contract is owned by a process with the
   1029  * same effective uid as the specified credential.  The caller must
   1030  * ensure that the uid spaces are the same.  Returns 1 on success.
   1031  */
   1032 static int
   1033 contract_checkcred(contract_t *ct, const cred_t *cr)
   1034 {
   1035 	proc_t *p;
   1036 	int fail = 1;
   1037 
   1038 	mutex_enter(&ct->ct_lock);
   1039 	if ((p = ct->ct_owner) != NULL) {
   1040 		mutex_enter(&p->p_crlock);
   1041 		fail = crgetuid(cr) != crgetuid(p->p_cred);
   1042 		mutex_exit(&p->p_crlock);
   1043 	}
   1044 	mutex_exit(&ct->ct_lock);
   1045 
   1046 	return (!fail);
   1047 }
   1048 
   1049 /*
   1050  * contract_owned
   1051  *
   1052  * Determines if the specified credential can view an event generated
   1053  * by the specified contract.  If locked is set, the contract's ct_lock
   1054  * is held and the caller will need to do additional work to determine
   1055  * if they truly can see the event.  Returns 1 on success.
   1056  */
   1057 int
   1058 contract_owned(contract_t *ct, const cred_t *cr, int locked)
   1059 {
   1060 	int owner, cmatch, zmatch;
   1061 	uint64_t zuniqid, mzuniqid;
   1062 	uid_t euid;
   1063 
   1064 	ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
   1065 
   1066 	zuniqid = curproc->p_zone->zone_uniqid;
   1067 	mzuniqid = contract_getzuniqid(ct);
   1068 	euid = crgetuid(cr);
   1069 
   1070 	/*
   1071 	 * owner: we own the contract
   1072 	 * cmatch: we are in the creator's (and holder's) zone and our
   1073 	 *   uid matches the creator's or holder's
   1074 	 * zmatch: we are in the effective zone of a contract created
   1075 	 *   in the global zone, and our uid matches that of the
   1076 	 *   virtualized holder's (zsched/kcred)
   1077 	 */
   1078 	owner = (ct->ct_owner == curproc);
   1079 	cmatch = (zuniqid == ct->ct_czuniqid) &&
   1080 	    ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
   1081 	zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
   1082 	    (crgetuid(kcred) == euid);
   1083 
   1084 	return (owner || cmatch || zmatch);
   1085 }
   1086 
   1087 
   1088 /*
   1089  * contract_type_init
   1090  *
   1091  * Called by contract types to register themselves with the contracts
   1092  * framework.
   1093  */
   1094 ct_type_t *
   1095 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
   1096     ct_f_default_t *dfault)
   1097 {
   1098 	ct_type_t *result;
   1099 
   1100 	ASSERT(type < CTT_MAXTYPE);
   1101 
   1102 	result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
   1103 
   1104 	mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
   1105 	avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
   1106 	    offsetof(contract_t, ct_cttavl));
   1107 	cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
   1108 	result->ct