Home | History | Annotate | Download | only in disp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <sys/types.h>
     30 #include <sys/param.h>
     31 #include <sys/sysmacros.h>
     32 #include <sys/signal.h>
     33 #include <sys/stack.h>
     34 #include <sys/pcb.h>
     35 #include <sys/user.h>
     36 #include <sys/systm.h>
     37 #include <sys/sysinfo.h>
     38 #include <sys/errno.h>
     39 #include <sys/cmn_err.h>
     40 #include <sys/cred.h>
     41 #include <sys/resource.h>
     42 #include <sys/task.h>
     43 #include <sys/project.h>
     44 #include <sys/proc.h>
     45 #include <sys/debug.h>
     46 #include <sys/disp.h>
     47 #include <sys/class.h>
     48 #include <vm/seg_kmem.h>
     49 #include <vm/seg_kp.h>
     50 #include <sys/machlock.h>
     51 #include <sys/kmem.h>
     52 #include <sys/varargs.h>
     53 #include <sys/turnstile.h>
     54 #include <sys/poll.h>
     55 #include <sys/vtrace.h>
     56 #include <sys/callb.h>
     57 #include <c2/audit.h>
     58 #include <sys/tnf.h>
     59 #include <sys/sobject.h>
     60 #include <sys/cpupart.h>
     61 #include <sys/pset.h>
     62 #include <sys/door.h>
     63 #include <sys/spl.h>
     64 #include <sys/copyops.h>
     65 #include <sys/rctl.h>
     66 #include <sys/brand.h>
     67 #include <sys/pool.h>
     68 #include <sys/zone.h>
     69 #include <sys/tsol/label.h>
     70 #include <sys/tsol/tndb.h>
     71 #include <sys/cpc_impl.h>
     72 #include <sys/sdt.h>
     73 #include <sys/reboot.h>
     74 #include <sys/kdi.h>
     75 #include <sys/schedctl.h>
     76 #include <sys/waitq.h>
     77 #include <sys/cpucaps.h>
     78 #include <sys/kiconv.h>
     79 
     80 struct kmem_cache *thread_cache;	/* cache of free threads */
     81 struct kmem_cache *lwp_cache;		/* cache of free lwps */
     82 struct kmem_cache *turnstile_cache;	/* cache of free turnstiles */
     83 
     84 /*
     85  * allthreads is only for use by kmem_readers.  All kernel loops can use
     86  * the current thread as a start/end point.
     87  */
     88 static kthread_t *allthreads = &t0;	/* circular list of all threads */
     89 
     90 static kcondvar_t reaper_cv;		/* synchronization var */
     91 kthread_t	*thread_deathrow;	/* circular list of reapable threads */
     92 kthread_t	*lwp_deathrow;		/* circular list of reapable threads */
     93 kmutex_t	reaplock;		/* protects lwp and thread deathrows */
     94 int	thread_reapcnt = 0;		/* number of threads on deathrow */
     95 int	lwp_reapcnt = 0;		/* number of lwps on deathrow */
     96 int	reaplimit = 16;			/* delay reaping until reaplimit */
     97 
     98 thread_free_lock_t	*thread_free_lock;
     99 					/* protects tick thread from reaper */
    100 
    101 extern int nthread;
    102 
    103 id_t	syscid;				/* system scheduling class ID */
    104 void	*segkp_thread;			/* cookie for segkp pool */
    105 
    106 int lwp_cache_sz = 32;
    107 int t_cache_sz = 8;
    108 static kt_did_t next_t_id = 1;
    109 
    110 /* Default mode for thread binding to CPUs and processor sets */
    111 int default_binding_mode = TB_ALLHARD;
    112 
    113 /*
    114  * Min/Max stack sizes for stack size parameters
    115  */
    116 #define	MAX_STKSIZE	(32 * DEFAULTSTKSZ)
    117 #define	MIN_STKSIZE	DEFAULTSTKSZ
    118 
    119 /*
    120  * default_stksize overrides lwp_default_stksize if it is set.
    121  */
    122 int	default_stksize;
    123 int	lwp_default_stksize;
    124 
    125 static zone_key_t zone_thread_key;
    126 
    127 /*
    128  * forward declarations for internal thread specific data (tsd)
    129  */
    130 static void *tsd_realloc(void *, size_t, size_t);
    131 
    132 void thread_reaper(void);
    133 
    134 /*ARGSUSED*/
    135 static int
    136 turnstile_constructor(void *buf, void *cdrarg, int kmflags)
    137 {
    138 	bzero(buf, sizeof (turnstile_t));
    139 	return (0);
    140 }
    141 
    142 /*ARGSUSED*/
    143 static void
    144 turnstile_destructor(void *buf, void *cdrarg)
    145 {
    146 	turnstile_t *ts = buf;
    147 
    148 	ASSERT(ts->ts_free == NULL);
    149 	ASSERT(ts->ts_waiters == 0);
    150 	ASSERT(ts->ts_inheritor == NULL);
    151 	ASSERT(ts->ts_sleepq[0].sq_first == NULL);
    152 	ASSERT(ts->ts_sleepq[1].sq_first == NULL);
    153 }
    154 
    155 void
    156 thread_init(void)
    157 {
    158 	kthread_t *tp;
    159 	extern char sys_name[];
    160 	extern void idle();
    161 	struct cpu *cpu = CPU;
    162 	int i;
    163 	kmutex_t *lp;
    164 
    165 	mutex_init(&reaplock, NULL, MUTEX_SPIN, (void *)ipltospl(DISP_LEVEL));
    166 	thread_free_lock =
    167 	    kmem_alloc(sizeof (thread_free_lock_t) * THREAD_FREE_NUM, KM_SLEEP);
    168 	for (i = 0; i < THREAD_FREE_NUM; i++) {
    169 		lp = &thread_free_lock[i].tf_lock;
    170 		mutex_init(lp, NULL, MUTEX_DEFAULT, NULL);
    171 	}
    172 
    173 #if defined(__i386) || defined(__amd64)
    174 	thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
    175 	    PTR24_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
    176 
    177 	/*
    178 	 * "struct _klwp" includes a "struct pcb", which includes a
    179 	 * "struct fpu", which needs to be 16-byte aligned on amd64
    180 	 * (and even on i386 for fxsave/fxrstor).
    181 	 */
    182 	lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
    183 	    16, NULL, NULL, NULL, NULL, NULL, 0);
    184 #else
    185 	/*
    186 	 * Allocate thread structures from static_arena.  This prevents
    187 	 * issues where a thread tries to relocate its own thread
    188 	 * structure and touches it after the mapping has been suspended.
    189 	 */
    190 	thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
    191 	    PTR24_ALIGN, NULL, NULL, NULL, NULL, static_arena, 0);
    192 
    193 	lwp_stk_cache_init();
    194 
    195 	lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
    196 	    0, NULL, NULL, NULL, NULL, NULL, 0);
    197 #endif
    198 
    199 	turnstile_cache = kmem_cache_create("turnstile_cache",
    200 	    sizeof (turnstile_t), 0,
    201 	    turnstile_constructor, turnstile_destructor, NULL, NULL, NULL, 0);
    202 
    203 	label_init();
    204 	cred_init();
    205 
    206 	/*
    207 	 * Initialize various resource management facilities.
    208 	 */
    209 	rctl_init();
    210 	cpucaps_init();
    211 	/*
    212 	 * Zone_init() should be called before project_init() so that project ID
    213 	 * for the first project is initialized correctly.
    214 	 */
    215 	zone_init();
    216 	project_init();
    217 	brand_init();
    218 	kiconv_init();
    219 	task_init();
    220 	tcache_init();
    221 	pool_init();
    222 
    223 	curthread->t_ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
    224 
    225 	/*
    226 	 * Originally, we had two parameters to set default stack
    227 	 * size: one for lwp's (lwp_default_stksize), and one for
    228 	 * kernel-only threads (DEFAULTSTKSZ, a.k.a. _defaultstksz).
    229 	 * Now we have a third parameter that overrides both if it is
    230 	 * set to a legal stack size, called default_stksize.
    231 	 */
    232 
    233 	if (default_stksize == 0) {
    234 		default_stksize = DEFAULTSTKSZ;
    235 	} else if (default_stksize % PAGESIZE != 0 ||
    236 	    default_stksize > MAX_STKSIZE ||
    237 	    default_stksize < MIN_STKSIZE) {
    238 		cmn_err(CE_WARN, "Illegal stack size. Using %d",
    239 		    (int)DEFAULTSTKSZ);
    240 		default_stksize = DEFAULTSTKSZ;
    241 	} else {
    242 		lwp_default_stksize = default_stksize;
    243 	}
    244 
    245 	if (lwp_default_stksize == 0) {
    246 		lwp_default_stksize = default_stksize;
    247 	} else if (lwp_default_stksize % PAGESIZE != 0 ||
    248 	    lwp_default_stksize > MAX_STKSIZE ||
    249 	    lwp_default_stksize < MIN_STKSIZE) {
    250 		cmn_err(CE_WARN, "Illegal stack size. Using %d",
    251 		    default_stksize);
    252 		lwp_default_stksize = default_stksize;
    253 	}
    254 
    255 	segkp_lwp = segkp_cache_init(segkp, lwp_cache_sz,
    256 	    lwp_default_stksize,
    257 	    (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED));
    258 
    259 	segkp_thread = segkp_cache_init(segkp, t_cache_sz,
    260 	    default_stksize, KPD_HASREDZONE | KPD_LOCKED | KPD_NO_ANON);
    261 
    262 	(void) getcid(sys_name, &syscid);
    263 	curthread->t_cid = syscid;	/* current thread is t0 */
    264 
    265 	/*
    266 	 * Set up the first CPU's idle thread.
    267 	 * It runs whenever the CPU has nothing worthwhile to do.
    268 	 */
    269 	tp = thread_create(NULL, 0, idle, NULL, 0, &p0, TS_STOPPED, -1);
    270 	cpu->cpu_idle_thread = tp;
    271 	tp->t_preempt = 1;
    272 	tp->t_disp_queue = cpu->cpu_disp;
    273 	ASSERT(tp->t_disp_queue != NULL);
    274 	tp->t_bound_cpu = cpu;
    275 	tp->t_affinitycnt = 1;
    276 
    277 	/*
    278 	 * Registering a thread in the callback table is usually
    279 	 * done in the initialization code of the thread. In this
    280 	 * case, we do it right after thread creation to avoid
    281 	 * blocking idle thread while registering itself. It also
    282 	 * avoids the possibility of reregistration in case a CPU
    283 	 * restarts its idle thread.
    284 	 */
    285 	CALLB_CPR_INIT_SAFE(tp, "idle");
    286 
    287 	/*
    288 	 * Create the thread_reaper daemon. From this point on, exited
    289 	 * threads will get reaped.
    290 	 */
    291 	(void) thread_create(NULL, 0, (void (*)())thread_reaper,
    292 	    NULL, 0, &p0, TS_RUN, minclsyspri);
    293 
    294 	/*
    295 	 * Finish initializing the kernel memory allocator now that
    296 	 * thread_create() is available.
    297 	 */
    298 	kmem_thread_init();
    299 
    300 	if (boothowto & RB_DEBUG)
    301 		kdi_dvec_thravail();
    302 }
    303 
    304 /*
    305  * Create a thread.
    306  *
    307  * thread_create() blocks for memory if necessary.  It never fails.
    308  *
    309  * If stk is NULL, the thread is created at the base of the stack
    310  * and cannot be swapped.
    311  */
    312 kthread_t *
    313 thread_create(
    314 	caddr_t	stk,
    315 	size_t	stksize,
    316 	void	(*proc)(),
    317 	void	*arg,
    318 	size_t	len,
    319 	proc_t	 *pp,
    320 	int	state,
    321 	pri_t	pri)
    322 {
    323 	kthread_t *t;
    324 	extern struct classfuncs sys_classfuncs;
    325 	turnstile_t *ts;
    326 
    327 	/*
    328 	 * Every thread keeps a turnstile around in case it needs to block.
    329 	 * The only reason the turnstile is not simply part of the thread
    330 	 * structure is that we may have to break the association whenever
    331 	 * more than one thread blocks on a given synchronization object.
    332 	 * From a memory-management standpoint, turnstiles are like the
    333 	 * "attached mblks" that hang off dblks in the streams allocator.
    334 	 */
    335 	ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
    336 
    337 	if (stk == NULL) {
    338 		/*
    339 		 * alloc both thread and stack in segkp chunk
    340 		 */
    341 
    342 		if (stksize < default_stksize)
    343 			stksize = default_stksize;
    344 
    345 		if (stksize == default_stksize) {
    346 			stk = (caddr_t)segkp_cache_get(segkp_thread);
    347 		} else {
    348 			stksize = roundup(stksize, PAGESIZE);
    349 			stk = (caddr_t)segkp_get(segkp, stksize,
    350 			    (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
    351 		}
    352 
    353 		ASSERT(stk != NULL);
    354 
    355 		/*
    356 		 * The machine-dependent mutex code may require that
    357 		 * thread pointers (since they may be used for mutex owner
    358 		 * fields) have certain alignment requirements.
    359 		 * PTR24_ALIGN is the size of the alignment quanta.
    360 		 * XXX - assumes stack grows toward low addresses.
    361 		 */
    362 		if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
    363 			cmn_err(CE_PANIC, "thread_create: proposed stack size"
    364 			    " too small to hold thread.");
    365 #ifdef STACK_GROWTH_DOWN
    366 		stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
    367 		stksize &= -PTR24_ALIGN;	/* make thread aligned */
    368 		t = (kthread_t *)(stk + stksize);
    369 		bzero(t, sizeof (kthread_t));
    370 		if (audit_active)
    371 			audit_thread_create(t);
    372 		t->t_stk = stk + stksize;
    373 		t->t_stkbase = stk;
    374 #else	/* stack grows to larger addresses */
    375 		stksize -= SA(sizeof (kthread_t));
    376 		t = (kthread_t *)(stk);
    377 		bzero(t, sizeof (kthread_t));
    378 		t->t_stk = stk + sizeof (kthread_t);
    379 		t->t_stkbase = stk + stksize + sizeof (kthread_t);
    380 #endif	/* STACK_GROWTH_DOWN */
    381 		t->t_flag |= T_TALLOCSTK;
    382 		t->t_swap = stk;
    383 	} else {
    384 		t = kmem_cache_alloc(thread_cache, KM_SLEEP);
    385 		bzero(t, sizeof (kthread_t));
    386 		ASSERT(((uintptr_t)t & (PTR24_ALIGN - 1)) == 0);
    387 		if (audit_active)
    388 			audit_thread_create(t);
    389 		/*
    390 		 * Initialize t_stk to the kernel stack pointer to use
    391 		 * upon entry to the kernel
    392 		 */
    393 #ifdef STACK_GROWTH_DOWN
    394 		t->t_stk = stk + stksize;
    395 		t->t_stkbase = stk;
    396 #else
    397 		t->t_stk = stk;			/* 3b2-like */
    398 		t->t_stkbase = stk + stksize;
    399 #endif /* STACK_GROWTH_DOWN */
    400 	}
    401 
    402 	/* set default stack flag */
    403 	if (stksize == lwp_default_stksize)
    404 		t->t_flag |= T_DFLTSTK;
    405 
    406 	t->t_ts = ts;
    407 
    408 	/*
    409 	 * p_cred could be NULL if it thread_create is called before cred_init
    410 	 * is called in main.
    411 	 */
    412 	mutex_enter(&pp->p_crlock);
    413 	if (pp->p_cred)
    414 		crhold(t->t_cred = pp->p_cred);
    415 	mutex_exit(&pp->p_crlock);
    416 	t->t_start = gethrestime_sec();
    417 	t->t_startpc = proc;
    418 	t->t_procp = pp;
    419 	t->t_clfuncs = &sys_classfuncs.thread;
    420 	t->t_cid = syscid;
    421 	t->t_pri = pri;
    422 	t->t_stime = lbolt;
    423 	t->t_schedflag = TS_LOAD | TS_DONT_SWAP;
    424 	t->t_bind_cpu = PBIND_NONE;
    425 	t->t_bindflag = (uchar_t)default_binding_mode;
    426 	t->t_bind_pset = PS_NONE;
    427 	t->t_plockp = &pp->p_lock;
    428 	t->t_copyops = NULL;
    429 	t->t_taskq = NULL;
    430 	t->t_anttime = 0;
    431 	t->t_hatdepth = 0;
    432 
    433 	t->t_dtrace_vtime = 1;	/* assure vtimestamp is always non-zero */
    434 
    435 	CPU_STATS_ADDQ(CPU, sys, nthreads, 1);
    436 #ifndef NPROBE
    437 	/* Kernel probe */
    438 	tnf_thread_create(t);
    439 #endif /* NPROBE */
    440 	LOCK_INIT_CLEAR(&t->t_lock);
    441 
    442 	/*
    443 	 * Callers who give us a NULL proc must do their own
    444 	 * stack initialization.  e.g. lwp_create()
    445 	 */
    446 	if (proc != NULL) {
    447 		t->t_stk = thread_stk_init(t->t_stk);
    448 		thread_load(t, proc, arg, len);
    449 	}
    450 
    451 	/*
    452 	 * Put a hold on project0. If this thread is actually in a
    453 	 * different project, then t_proj will be changed later in
    454 	 * lwp_create().  All kernel-only threads must be in project 0.
    455 	 */
    456 	t->t_proj = project_hold(proj0p);
    457 
    458 	lgrp_affinity_init(&t->t_lgrp_affinity);
    459 
    460 	mutex_enter(&pidlock);
    461 	nthread++;
    462 	t->t_did = next_t_id++;
    463 	t->t_prev = curthread->t_prev;
    464 	t->t_next = curthread;
    465 
    466 	/*
    467 	 * Add the thread to the list of all threads, and initialize
    468 	 * its t_cpu pointer.  We need to block preemption since
    469 	 * cpu_offline walks the thread list looking for threads
    470 	 * with t_cpu pointing to the CPU being offlined.  We want
    471 	 * to make sure that the list is consistent and that if t_cpu
    472 	 * is set, the thread is on the list.
    473 	 */
    474 	kpreempt_disable();
    475 	curthread->t_prev->t_next = t;
    476 	curthread->t_prev = t;
    477 
    478 	/*
    479 	 * Threads should never have a NULL t_cpu pointer so assign it
    480 	 * here.  If the thread is being created with state TS_RUN a
    481 	 * better CPU may be chosen when it is placed on the run queue.
    482 	 *
    483 	 * We need to keep kernel preemption disabled when setting all
    484 	 * three fields to keep them in sync.  Also, always create in
    485 	 * the default partition since that's where kernel threads go
    486 	 * (if this isn't a kernel thread, t_cpupart will be changed
    487 	 * in lwp_create before setting the thread runnable).
    488 	 */
    489 	t->t_cpupart = &cp_default;
    490 
    491 	/*
    492 	 * For now, affiliate this thread with the root lgroup.
    493 	 * Since the kernel does not (presently) allocate its memory
    494 	 * in a locality aware fashion, the root is an appropriate home.
    495 	 * If this thread is later associated with an lwp, it will have
    496 	 * it's lgroup re-assigned at that time.
    497 	 */
    498 	lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1);
    499 
    500 	/*
    501 	 * Inherit the current cpu.  If this cpu isn't part of the chosen
    502 	 * lgroup, a new cpu will be chosen by cpu_choose when the thread
    503 	 * is ready to run.
    504 	 */
    505 	if (CPU->cpu_part == &cp_default)
    506 		t->t_cpu = CPU;
    507 	else
    508 		t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
    509 		    t->t_pri, NULL);
    510 
    511 	t->t_disp_queue = t->t_cpu->cpu_disp;
    512 	kpreempt_enable();
    513 
    514 	/*
    515 	 * Initialize thread state and the dispatcher lock pointer.
    516 	 * Need to hold onto pidlock to block allthreads walkers until
    517 	 * the state is set.
    518 	 */
    519 	switch (state) {
    520 	case TS_RUN:
    521 		curthread->t_oldspl = splhigh();	/* get dispatcher spl */
    522 		THREAD_SET_STATE(t, TS_STOPPED, &transition_lock);
    523 		CL_SETRUN(t);
    524 		thread_unlock(t);
    525 		break;
    526 
    527 	case TS_ONPROC:
    528 		THREAD_ONPROC(t, t->t_cpu);
    529 		break;
    530 
    531 	case TS_FREE:
    532 		/*
    533 		 * Free state will be used for intr threads.
    534 		 * The interrupt routine must set the thread dispatcher
    535 		 * lock pointer (t_lockp) if starting on a CPU
    536 		 * other than the current one.
    537 		 */
    538 		THREAD_FREEINTR(t, CPU);
    539 		break;
    540 
    541 	case TS_STOPPED:
    542 		THREAD_SET_STATE(t, TS_STOPPED, &stop_lock);
    543 		break;
    544 
    545 	default:			/* TS_SLEEP, TS_ZOMB or TS_TRANS */
    546 		cmn_err(CE_PANIC, "thread_create: invalid state %d", state);
    547 	}
    548 	mutex_exit(&pidlock);
    549 	return (t);
    550 }
    551 
    552 /*
    553  * Move thread to project0 and take care of project reference counters.
    554  */
    555 void
    556 thread_rele(kthread_t *t)
    557 {
    558 	kproject_t *kpj;
    559 
    560 	thread_lock(t);
    561 
    562 	ASSERT(t == curthread || t->t_state == TS_FREE || t->t_procp == &p0);
    563 	kpj = ttoproj(t);
    564 	t->t_proj = proj0p;
    565 
    566 	thread_unlock(t);
    567 
    568 	if (kpj != proj0p) {
    569 		project_rele(kpj);
    570 		(void) project_hold(proj0p);
    571 	}
    572 }
    573 
    574 void
    575 thread_exit(void)
    576 {
    577 	kthread_t *t = curthread;
    578 
    579 	if ((t->t_proc_flag & TP_ZTHREAD) != 0)
    580 		cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
    581 
    582 	tsd_exit();		/* Clean up this thread's TSD */
    583 
    584 	kcpc_passivate();	/* clean up performance counter state */
    585 
    586 	/*
    587 	 * No kernel thread should have called poll() without arranging
    588 	 * calling pollcleanup() here.
    589 	 */
    590 	ASSERT(t->t_pollstate == NULL);
    591 	ASSERT(t->t_schedctl == NULL);
    592 	if (t->t_door)
    593 		door_slam();	/* in case thread did an upcall */
    594 
    595 #ifndef NPROBE
    596 	/* Kernel probe */
    597 	if (t->t_tnf_tpdp)
    598 		tnf_thread_exit();
    599 #endif /* NPROBE */
    600 
    601 	thread_rele(t);
    602 	t->t_preempt++;
    603 
    604 	/*
    605 	 * remove thread from the all threads list so that
    606 	 * death-row can use the same pointers.
    607 	 */
    608 	mutex_enter(&pidlock);
    609 	t->t_next->t_prev = t->t_prev;
    610 	t->t_prev->t_next = t->t_next;
    611 	ASSERT(allthreads != t);	/* t0 never exits */
    612 	cv_broadcast(&t->t_joincv);	/* wake up anyone in thread_join */
    613 	mutex_exit(&pidlock);
    614 
    615 	if (t->t_ctx != NULL)
    616 		exitctx(t);
    617 	if (t->t_procp->p_pctx != NULL)
    618 		exitpctx(t->t_procp);
    619 
    620 	t->t_state = TS_ZOMB;	/* set zombie thread */
    621 
    622 	swtch_from_zombie();	/* give up the CPU */
    623 	/* NOTREACHED */
    624 }
    625 
    626 /*
    627  * Check to see if the specified thread is active (defined as being on
    628  * the thread list).  This is certainly a slow way to do this; if there's
    629  * ever a reason to speed it up, we could maintain a hash table of active
    630  * threads indexed by their t_did.
    631  */
    632 static kthread_t *
    633 did_to_thread(kt_did_t tid)
    634 {
    635 	kthread_t *t;
    636 
    637 	ASSERT(MUTEX_HELD(&pidlock));
    638 	for (t = curthread->t_next; t != curthread; t = t->t_next) {
    639 		if (t->t_did == tid)
    640 			break;
    641 	}
    642 	if (t->t_did == tid)
    643 		return (t);
    644 	else
    645 		return (NULL);
    646 }
    647 
    648 /*
    649  * Wait for specified thread to exit.  Returns immediately if the thread
    650  * could not be found, meaning that it has either already exited or never
    651  * existed.
    652  */
    653 void
    654 thread_join(kt_did_t tid)
    655 {
    656 	kthread_t *t;
    657 
    658 	ASSERT(tid != curthread->t_did);
    659 	ASSERT(tid != t0.t_did);
    660 
    661 	mutex_enter(&pidlock);
    662 	/*
    663 	 * Make sure we check that the thread is on the thread list
    664 	 * before blocking on it; otherwise we could end up blocking on
    665 	 * a cv that's already been freed.  In other words, don't cache
    666 	 * the thread pointer across calls to cv_wait.
    667 	 *
    668 	 * The choice of loop invariant means that whenever a thread
    669 	 * is taken off the allthreads list, a cv_broadcast must be
    670 	 * performed on that thread's t_joincv to wake up any waiters.
    671 	 * The broadcast doesn't have to happen right away, but it
    672 	 * shouldn't be postponed indefinitely (e.g., by doing it in
    673 	 * thread_free which may only be executed when the deathrow
    674 	 * queue is processed.
    675 	 */
    676 	while (t = did_to_thread(tid))
    677 		cv_wait(&t->t_joincv, &pidlock);
    678 	mutex_exit(&pidlock);
    679 }
    680 
    681 void
    682 thread_free_prevent(kthread_t *t)
    683 {
    684 	kmutex_t *lp;
    685 
    686 	lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
    687 	mutex_enter(lp);
    688 }
    689 
    690 void
    691 thread_free_allow(kthread_t *t)
    692 {
    693 	kmutex_t *lp;
    694 
    695 	lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
    696 	mutex_exit(lp);
    697 }
    698 
    699 static void
    700 thread_free_barrier(kthread_t *t)
    701 {
    702 	kmutex_t *lp;
    703 
    704 	lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
    705 	mutex_enter(lp);
    706 	mutex_exit(lp);
    707 }
    708 
    709 void
    710 thread_free(kthread_t *t)
    711 {
    712 	ASSERT(t != &t0 && t->t_state == TS_FREE);
    713 	ASSERT(t->t_door == NULL);
    714 	ASSERT(t->t_schedctl == NULL);
    715 	ASSERT(t->t_pollstate == NULL);
    716 
    717 	t->t_pri = 0;
    718 	t->t_pc = 0;
    719 	t->t_sp = 0;
    720 	t->t_wchan0 = NULL;
    721 	t->t_wchan = NULL;
    722 	if (t->t_cred != NULL) {
    723 		crfree(t->t_cred);
    724 		t->t_cred = 0;
    725 	}
    726 	if (t->t_pdmsg) {
    727 		kmem_free(t->t_pdmsg, strlen(t->t_pdmsg) + 1);
    728 		t->t_pdmsg = NULL;
    729 	}
    730 	if (audit_active)
    731 		audit_thread_free(t);
    732 #ifndef NPROBE
    733 	if (t->t_tnf_tpdp)
    734 		tnf_thread_free(t);
    735 #endif /* NPROBE */
    736 	if (t->t_cldata) {
    737 		CL_EXITCLASS(t->t_cid, (caddr_t *)t->t_cldata);
    738 	}
    739 	if (t->t_rprof != NULL) {
    740 		kmem_free(t->t_rprof, sizeof (*t->t_rprof));
    741 		t->t_rprof = NULL;
    742 	}
    743 	t->t_lockp = NULL;	/* nothing should try to lock this thread now */
    744 	if (t->t_lwp)
    745 		lwp_freeregs(t->t_lwp, 0);
    746 	if (t->t_ctx)
    747 		freectx(t, 0);
    748 	t->t_stk = NULL;
    749 	if (t->t_lwp)
    750 		lwp_stk_fini(t->t_lwp);
    751 	lock_clear(&t->t_lock);
    752 
    753 	if (t->t_ts->ts_waiters > 0)
    754 		panic("thread_free: turnstile still active");
    755 
    756 	kmem_cache_free(turnstile_cache, t->t_ts);
    757 
    758 	free_afd(&t->t_activefd);
    759 
    760 	/*
    761 	 * Barrier for the tick accounting code.  The tick accounting code
    762 	 * holds this lock to keep the thread from going away while it's
    763 	 * looking at it.
    764 	 */
    765 	thread_free_barrier(t);
    766 
    767 	ASSERT(ttoproj(t) == proj0p);
    768 	project_rele(ttoproj(t));
    769 
    770 	lgrp_affinity_free(&t->t_lgrp_affinity);
    771 
    772 	/*
    773 	 * Free thread struct and its stack.
    774 	 */
    775 	if (t->t_flag & T_TALLOCSTK) {
    776 		/* thread struct is embedded in stack */
    777 		segkp_release(segkp, t->t_swap);
    778 		mutex_enter(&pidlock);
    779 		nthread--;
    780 		mutex_exit(&pidlock);
    781 	} else {
    782 		if (t->t_swap) {
    783 			segkp_release(segkp, t->t_swap);
    784 			t->t_swap = NULL;
    785 		}
    786 		if (t->t_lwp) {
    787 			kmem_cache_free(lwp_cache, t->t_lwp);
    788 			t->t_lwp = NULL;
    789 		}
    790 		mutex_enter(&pidlock);
    791 		nthread--;
    792 		mutex_exit(&pidlock);
    793 		kmem_cache_free(thread_cache, t);
    794 	}
    795 }
    796 
    797 /*
    798  * Removes threads associated with the given zone from a deathrow queue.
    799  * tp is a pointer to the head of the deathrow queue, and countp is a
    800  * pointer to the current deathrow count.  Returns a linked list of
    801  * threads removed from the list.
    802  */
    803 static kthread_t *
    804 thread_zone_cleanup(kthread_t **tp, int *countp, zoneid_t zoneid)
    805 {
    806 	kthread_t *tmp, *list = NULL;
    807 	cred_t *cr;
    808 
    809 	ASSERT(MUTEX_HELD(&reaplock));
    810 	while (*tp != NULL) {
    811 		if ((cr = (*tp)->t_cred) != NULL && crgetzoneid(cr) == zoneid) {
    812 			tmp = *tp;
    813 			*tp = tmp->t_forw;
    814 			tmp->t_forw = list;
    815 			list = tmp;
    816 			(*countp)--;
    817 		} else {
    818 			tp = &(*tp)->t_forw;
    819 		}
    820 	}
    821 	return (list);
    822 }
    823 
    824 static void
    825 thread_reap_list(kthread_t *t)
    826 {
    827 	kthread_t *next;
    828 
    829 	while (t != NULL) {
    830 		next = t->t_forw;
    831 		thread_free(t);
    832 		t = next;
    833 	}
    834 }
    835 
    836 /* ARGSUSED */
    837 static void
    838 thread_zone_destroy(zoneid_t zoneid, void *unused)
    839 {
    840 	kthread_t *t, *l;
    841 
    842 	mutex_enter(&reaplock);
    843 	/*
    844 	 * Pull threads and lwps associated with zone off deathrow lists.
    845 	 */
    846 	t = thread_zone_cleanup(&thread_deathrow, &thread_reapcnt, zoneid);
    847 	l = thread_zone_cleanup(&lwp_deathrow, &lwp_reapcnt, zoneid);
    848 	mutex_exit(&reaplock);
    849 
    850 	/*
    851 	 * Guard against race condition in mutex_owner_running:
    852 	 * 	thread=owner(mutex)
    853 	 * 	<interrupt>
    854 	 * 				thread exits mutex
    855 	 * 				thread exits
    856 	 * 				thread reaped
    857 	 * 				thread struct freed
    858 	 * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
    859 	 * A cross call to all cpus will cause the interrupt handler
    860 	 * to reset the PC if it is in mutex_owner_running, refreshing
    861 	 * stale thread pointers.
    862 	 */
    863 	mutex_sync();   /* sync with mutex code */
    864 
    865 	/*
    866 	 * Reap threads
    867 	 */
    868 	thread_reap_list(t);
    869 
    870 	/*
    871 	 * Reap lwps
    872 	 */
    873 	thread_reap_list(l);
    874 }
    875 
    876 /*
    877  * cleanup zombie threads that are on deathrow.
    878  */
    879 void
    880 thread_reaper()
    881 {
    882 	kthread_t *t, *l;
    883 	callb_cpr_t cprinfo;
    884 
    885 	/*
    886 	 * Register callback to clean up threads when zone is destroyed.
    887 	 */
    888 	zone_key_create(&zone_thread_key, NULL, NULL, thread_zone_destroy);
    889 
    890 	CALLB_CPR_INIT(&cprinfo, &reaplock, callb_generic_cpr, "t_reaper");
    891 	for (;;) {
    892 		mutex_enter(&reaplock);
    893 		while (thread_deathrow == NULL && lwp_deathrow == NULL) {
    894 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
    895 			cv_wait(&reaper_cv, &reaplock);
    896 			CALLB_CPR_SAFE_END(&cprinfo, &reaplock);
    897 		}
    898 		/*
    899 		 * mutex_sync() needs to be called when reaping, but
    900 		 * not too often.  We limit reaping rate to once
    901 		 * per second.  Reaplimit is max rate at which threads can
    902 		 * be freed. Does not impact thread destruction/creation.
    903 		 */
    904 		t = thread_deathrow;
    905 		l = lwp_deathrow;
    906 		thread_deathrow = NULL;
    907 		lwp_deathrow = NULL;
    908 		thread_reapcnt = 0;
    909 		lwp_reapcnt = 0;
    910 		mutex_exit(&reaplock);
    911 
    912 		/*
    913 		 * Guard against race condition in mutex_owner_running:
    914 		 * 	thread=owner(mutex)
    915 		 * 	<interrupt>
    916 		 * 				thread exits mutex
    917 		 * 				thread exits
    918 		 * 				thread reaped
    919 		 * 				thread struct freed
    920 		 * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
    921 		 * A cross call to all cpus will cause the interrupt handler
    922 		 * to reset the PC if it is in mutex_owner_running, refreshing
    923 		 * stale thread pointers.
    924 		 */
    925 		mutex_sync();   /* sync with mutex code */
    926 		/*
    927 		 * Reap threads
    928 		 */
    929 		thread_reap_list(t);
    930 
    931 		/*
    932 		 * Reap lwps
    933 		 */
    934 		thread_reap_list(l);
    935 		delay(hz);
    936 	}
    937 }
    938 
    939 /*
    940  * This is called by lwpcreate, etc.() to put a lwp_deathrow thread onto
    941  * thread_deathrow. The thread's state is changed already TS_FREE to indicate
    942  * that is reapable. The thread already holds the reaplock, and was already
    943  * freed.
    944  */
    945 void
    946 reapq_move_lq_to_tq(kthread_t *t)
    947 {
    948 	ASSERT(t->t_state == TS_FREE);
    949 	ASSERT(MUTEX_HELD(&reaplock));
    950 	t->t_forw = thread_deathrow;
    951 	thread_deathrow = t;
    952 	thread_reapcnt++;
    953 	if (lwp_reapcnt + thread_reapcnt > reaplimit)
    954 		cv_signal(&reaper_cv);  /* wake the reaper */
    955 }
    956 
    957 /*
    958  * This is called by resume() to put a zombie thread onto deathrow.
    959  * The thread's state is changed to TS_FREE to indicate that is reapable.
    960  * This is called from the idle thread so it must not block - just spin.
    961  */
    962 void
    963 reapq_add(kthread_t *t)
    964 {
    965 	mutex_enter(&reaplock);
    966 
    967 	/*
    968 	 * lwp_deathrow contains only threads with lwp linkage
    969 	 * that are of the default stacksize. Anything else goes
    970 	 * on thread_deathrow.
    971 	 */
    972 	if (ttolwp(t) && (t->t_flag & T_DFLTSTK)) {
    973 		t->t_forw = lwp_deathrow;
    974 		lwp_deathrow = t;
    975 		lwp_reapcnt++;
    976 	} else {
    977 		t->t_forw = thread_deathrow;
    978 		thread_deathrow = t;
    979 		thread_reapcnt++;
    980 	}
    981 	if (lwp_reapcnt + thread_reapcnt > reaplimit)
    982 		cv_signal(&reaper_cv);	/* wake the reaper */
    983 	t->t_state = TS_FREE;
    984 	lock_clear(&t->t_lock);
    985 
    986 	/*
    987 	 * Before we return, we need to grab and drop the thread lock for
    988 	 * the dead thread.  At this point, the current thread is the idle
    989 	 * thread, and the dead thread's CPU lock points to the current
    990 	 * CPU -- and we must grab and drop the lock to synchronize with
    991 	 * a racing thread walking a blocking chain that the zombie thread
    992 	 * was recently in.  By this point, that blocking chain is (by
    993 	 * definition) stale:  the dead thread is not holding any locks, and
    994 	 * is therefore not in any blocking chains -- but if we do not regrab
    995 	 * our lock before freeing the dead thread's data structures, the
    996 	 * thread walking the (stale) blocking chain will die on memory
    997 	 * corruption when it attempts to drop the dead thread's lock.  We
    998 	 * only need do this once because there is no way for the dead thread
    999 	 * to ever again be on a blocking chain:  once we have grabbed and
   1000 	 * dropped the thread lock, we are guaranteed that anyone that could
   1001 	 * have seen this thread in a blocking chain can no longer see it.
   1002 	 */
   1003 	thread_lock(t);
   1004 	thread_unlock(t);
   1005 
   1006 	mutex_exit(&reaplock);
   1007 }
   1008 
   1009 /*
   1010  * Install thread context ops for the current thread.
   1011  */
   1012 void
   1013 installctx(
   1014 	kthread_t *t,
   1015 	void	*arg,
   1016 	void	(*save)(void *),
   1017 	void	(*restore)(void *),
   1018 	void	(*fork)(void *, void *),
   1019 	void	(*lwp_create)(void *, void *),
   1020 	void	(*exit)(void *),
   1021 	void	(*free)(void *, int))
   1022 {
   1023 	struct ctxop *ctx;
   1024 
   1025 	ctx = kmem_alloc(sizeof (struct ctxop), KM_SLEEP);
   1026 	ctx->save_op = save;
   1027 	ctx->restore_op = restore;
   1028 	ctx->fork_op = fork;
   1029 	ctx->lwp_create_op = lwp_create;
   1030 	ctx->exit_op = exit;
   1031 	ctx->free_op = free;
   1032 	ctx->arg = arg;
   1033 	ctx->next = t->t_ctx;
   1034 	t->t_ctx = ctx;
   1035 }
   1036 
   1037 /*
   1038  * Remove the thread context ops from a thread.
   1039  */
   1040 int
   1041 removectx(
   1042 	kthread_t *t,
   1043 	void	*arg,
   1044 	void	(*save)(void *),
   1045 	void	(*restore)(void *),
   1046 	void	(*fork)(void *, void *),
   1047 	void	(*lwp_create)(void *, void *),
   1048 	void	(*exit)(void *),
   1049 	void	(*free)(void *, int))
   1050 {
   1051 	struct ctxop *ctx, *prev_ctx;
   1052 
   1053 	/*
   1054 	 * The incoming kthread_t (which is the thread for which the
   1055 	 * context ops will be removed) should be one of the following:
   1056 	 *
   1057 	 * a) the current thread,
   1058 	 *
   1059 	 * b) a thread of a process that's being forked (SIDL),
   1060 	 *
   1061 	 * c) a thread that belongs to the same process as the current
   1062 	 *    thread and for which the current thread is the agent thread,
   1063 	 *
   1064 	 * d) a thread that is TS_STOPPED which is indicative of it
   1065 	 *    being (if curthread is not an agent) a thread being created
   1066 	 *    as part of an lwp creation.
   1067 	 */
   1068 	ASSERT(t == curthread || ttoproc(t)->p_stat == SIDL ||
   1069 	    ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
   1070 
   1071 	/*
   1072 	 * Serialize modifications to t->t_ctx to prevent the agent thread
   1073 	 * and the target thread from racing with each other during lwp exit.
   1074 	 */
   1075 	mutex_enter(&t->t_ctx_lock);
   1076 	prev_ctx = NULL;
   1077 	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
   1078 		if (ctx->save_op == save && ctx->restore_op == restore &&
   1079 		    ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
   1080 		    ctx->exit_op == exit && ctx->free_op == free &&
   1081 		    ctx->arg == arg) {
   1082 			if (prev_ctx)
   1083 				prev_ctx->next = ctx->next;
   1084 			else
   1085 				t->t_ctx = ctx->next;
   1086 			mutex_exit(&t->t_ctx_lock);
   1087 			if (ctx->free_op != NULL)
   1088 				(ctx->free_op)(ctx->arg, 0);
   1089 			kmem_free(ctx, sizeof (struct ctxop));
   1090 			return (1);
   1091 		}
   1092 		prev_ctx = ctx;
   1093 	}
   1094 	mutex_exit(&t->t_ctx_lock);
   1095 
   1096 	return (0);
   1097 }
   1098 
   1099 void
   1100 savectx(kthread_t *t)
   1101 {
   1102 	struct ctxop *ctx;
   1103 
   1104 	ASSERT(t == curthread);
   1105 	for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
   1106 		if (ctx->save_op != NULL)
   1107 			(ctx->save_op)(ctx->arg);
   1108 }
   1109 
   1110 void
   1111 restorectx(kthread_t *t)
   1112 {
   1113 	struct ctxop *ctx;
   1114 
   1115 	ASSERT(t == curthread);
   1116 	for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
   1117 		if (ctx->restore_op != NULL)
   1118 			(ctx->restore_op)(ctx->arg);
   1119 }
   1120 
   1121 void
   1122 forkctx(kthread_t *t, kthread_t *ct)
   1123 {
   1124 	struct ctxop *ctx;
   1125 
   1126 	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
   1127 		if (ctx->fork_op != NULL)
   1128 			(ctx->fork_op)(t, ct);
   1129 }
   1130 
   1131 /*
   1132  * Note that this operator is only invoked via the _lwp_create
   1133  * system call.  The system may have other reasons to create lwps
   1134  * e.g. the agent lwp or the doors unreferenced lwp.
   1135  */
   1136 void
   1137 lwp_createctx(kthread_t *t, kthread_t *ct)
   1138 {
   1139 	struct ctxop *ctx;
   1140 
   1141 	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
   1142 		if (ctx->lwp_create_op != NULL)
   1143 			(ctx->lwp_create_op)(t, ct);
   1144 }
   1145 
   1146 /*
   1147  * exitctx is called from thread_exit() and lwp_exit() to perform any actions
   1148  * needed when the thread/LWP leaves the processor for the last time. This
   1149  * routine is not intended to deal with freeing memory; freectx() is used for
   1150  * that purpose during thread_free(). This routine is provided to allow for
   1151  * clean-up that can't wait until thread_free().
   1152  */
   1153 void
   1154 exitctx(kthread_t *t)
   1155 {
   1156 	struct ctxop *ctx;
   1157 
   1158 	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
   1159 		if (ctx->exit_op != NULL)
   1160 			(ctx->exit_op)(t);
   1161 }
   1162 
   1163 /*
   1164  * freectx is called from thread_free() and exec() to get
   1165  * rid of old thread context ops.
   1166  */
   1167 void
   1168 freectx(kthread_t *t, int isexec)
   1169 {
   1170 	struct ctxop *ctx;
   1171 
   1172 	w