Home | History | Annotate | Download | only in disp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)thread.c	1.185	08/01/03 SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/param.h>
     30 #include <sys/sysmacros.h>
     31 #include <sys/signal.h>
     32 #include <sys/stack.h>
     33 #include <sys/pcb.h>
     34 #include <sys/user.h>
     35 #include <sys/systm.h>
     36 #include <sys/sysinfo.h>
     37 #include <sys/errno.h>
     38 #include <sys/cmn_err.h>
     39 #include <sys/cred.h>
     40 #include <sys/resource.h>
     41 #include <sys/task.h>
     42 #include <sys/project.h>
     43 #include <sys/proc.h>
     44 #include <sys/debug.h>
     45 #include <sys/disp.h>
     46 #include <sys/class.h>
     47 #include <vm/seg_kmem.h>
     48 #include <vm/seg_kp.h>
     49 #include <sys/machlock.h>
     50 #include <sys/kmem.h>
     51 #include <sys/varargs.h>
     52 #include <sys/turnstile.h>
     53 #include <sys/poll.h>
     54 #include <sys/vtrace.h>
     55 #include <sys/callb.h>
     56 #include <c2/audit.h>
     57 #include <sys/tnf.h>
     58 #include <sys/sobject.h>
     59 #include <sys/cpupart.h>
     60 #include <sys/pset.h>
     61 #include <sys/door.h>
     62 #include <sys/spl.h>
     63 #include <sys/copyops.h>
     64 #include <sys/rctl.h>
     65 #include <sys/brand.h>
     66 #include <sys/pool.h>
     67 #include <sys/zone.h>
     68 #include <sys/tsol/label.h>
     69 #include <sys/tsol/tndb.h>
     70 #include <sys/cpc_impl.h>
     71 #include <sys/sdt.h>
     72 #include <sys/reboot.h>
     73 #include <sys/kdi.h>
     74 #include <sys/waitq.h>
     75 #include <sys/cpucaps.h>
     76 #include <sys/kiconv.h>
     77 
     78 struct kmem_cache *thread_cache;	/* cache of free threads */
     79 struct kmem_cache *lwp_cache;		/* cache of free lwps */
     80 struct kmem_cache *turnstile_cache;	/* cache of free turnstiles */
     81 
     82 /*
     83  * allthreads is only for use by kmem_readers.  All kernel loops can use
     84  * the current thread as a start/end point.
     85  */
     86 static kthread_t *allthreads = &t0;	/* circular list of all threads */
     87 
     88 static kcondvar_t reaper_cv;		/* synchronization var */
     89 kthread_t	*thread_deathrow;	/* circular list of reapable threads */
     90 kthread_t	*lwp_deathrow;		/* circular list of reapable threads */
     91 kmutex_t	reaplock;		/* protects lwp and thread deathrows */
     92 int	thread_reapcnt = 0;		/* number of threads on deathrow */
     93 int	lwp_reapcnt = 0;		/* number of lwps on deathrow */
     94 int	reaplimit = 16;			/* delay reaping until reaplimit */
     95 
     96 thread_free_lock_t	*thread_free_lock;
     97 					/* protects tick thread from reaper */
     98 
     99 extern int nthread;
    100 
    101 id_t	syscid;				/* system scheduling class ID */
    102 void	*segkp_thread;			/* cookie for segkp pool */
    103 
    104 int lwp_cache_sz = 32;
    105 int t_cache_sz = 8;
    106 static kt_did_t next_t_id = 1;
    107 
    108 /*
    109  * Min/Max stack sizes for stack size parameters
    110  */
    111 #define	MAX_STKSIZE	(32 * DEFAULTSTKSZ)
    112 #define	MIN_STKSIZE	DEFAULTSTKSZ
    113 
    114 /*
    115  * default_stksize overrides lwp_default_stksize if it is set.
    116  */
    117 int	default_stksize;
    118 int	lwp_default_stksize;
    119 
    120 static zone_key_t zone_thread_key;
    121 
    122 /*
    123  * forward declarations for internal thread specific data (tsd)
    124  */
    125 static void *tsd_realloc(void *, size_t, size_t);
    126 
    127 void thread_reaper(void);
    128 
    129 /*ARGSUSED*/
    130 static int
    131 turnstile_constructor(void *buf, void *cdrarg, int kmflags)
    132 {
    133 	bzero(buf, sizeof (turnstile_t));
    134 	return (0);
    135 }
    136 
    137 /*ARGSUSED*/
    138 static void
    139 turnstile_destructor(void *buf, void *cdrarg)
    140 {
    141 	turnstile_t *ts = buf;
    142 
    143 	ASSERT(ts->ts_free == NULL);
    144 	ASSERT(ts->ts_waiters == 0);
    145 	ASSERT(ts->ts_inheritor == NULL);
    146 	ASSERT(ts->ts_sleepq[0].sq_first == NULL);
    147 	ASSERT(ts->ts_sleepq[1].sq_first == NULL);
    148 }
    149 
    150 void
    151 thread_init(void)
    152 {
    153 	kthread_t *tp;
    154 	extern char sys_name[];
    155 	extern void idle();
    156 	struct cpu *cpu = CPU;
    157 	int i;
    158 	kmutex_t *lp;
    159 
    160 	mutex_init(&reaplock, NULL, MUTEX_SPIN, (void *)ipltospl(DISP_LEVEL));
    161 	thread_free_lock =
    162 	    kmem_alloc(sizeof (thread_free_lock_t) * THREAD_FREE_NUM, KM_SLEEP);
    163 	for (i = 0; i < THREAD_FREE_NUM; i++) {
    164 		lp = &thread_free_lock[i].tf_lock;
    165 		mutex_init(lp, NULL, MUTEX_DEFAULT, NULL);
    166 	}
    167 
    168 #if defined(__i386) || defined(__amd64)
    169 	thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
    170 	    PTR24_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
    171 
    172 	/*
    173 	 * "struct _klwp" includes a "struct pcb", which includes a
    174 	 * "struct fpu", which needs to be 16-byte aligned on amd64
    175 	 * (and even on i386 for fxsave/fxrstor).
    176 	 */
    177 	lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
    178 	    16, NULL, NULL, NULL, NULL, NULL, 0);
    179 #else
    180 	/*
    181 	 * Allocate thread structures from static_arena.  This prevents
    182 	 * issues where a thread tries to relocate its own thread
    183 	 * structure and touches it after the mapping has been suspended.
    184 	 */
    185 	thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
    186 	    PTR24_ALIGN, NULL, NULL, NULL, NULL, static_arena, 0);
    187 
    188 	lwp_stk_cache_init();
    189 
    190 	lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
    191 	    0, NULL, NULL, NULL, NULL, NULL, 0);
    192 #endif
    193 
    194 	turnstile_cache = kmem_cache_create("turnstile_cache",
    195 	    sizeof (turnstile_t), 0,
    196 	    turnstile_constructor, turnstile_destructor, NULL, NULL, NULL, 0);
    197 
    198 	label_init();
    199 	cred_init();
    200 
    201 	/*
    202 	 * Initialize various resource management facilities.
    203 	 */
    204 	rctl_init();
    205 	cpucaps_init();
    206 	/*
    207 	 * Zone_init() should be called before project_init() so that project ID
    208 	 * for the first project is initialized correctly.
    209 	 */
    210 	zone_init();
    211 	project_init();
    212 	brand_init();
    213 	kiconv_init();
    214 	task_init();
    215 	tcache_init();
    216 	pool_init();
    217 
    218 	curthread->t_ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
    219 
    220 	/*
    221 	 * Originally, we had two parameters to set default stack
    222 	 * size: one for lwp's (lwp_default_stksize), and one for
    223 	 * kernel-only threads (DEFAULTSTKSZ, a.k.a. _defaultstksz).
    224 	 * Now we have a third parameter that overrides both if it is
    225 	 * set to a legal stack size, called default_stksize.
    226 	 */
    227 
    228 	if (default_stksize == 0) {
    229 		default_stksize = DEFAULTSTKSZ;
    230 	} else if (default_stksize % PAGESIZE != 0 ||
    231 	    default_stksize > MAX_STKSIZE ||
    232 	    default_stksize < MIN_STKSIZE) {
    233 		cmn_err(CE_WARN, "Illegal stack size. Using %d",
    234 		    (int)DEFAULTSTKSZ);
    235 		default_stksize = DEFAULTSTKSZ;
    236 	} else {
    237 		lwp_default_stksize = default_stksize;
    238 	}
    239 
    240 	if (lwp_default_stksize == 0) {
    241 		lwp_default_stksize = default_stksize;
    242 	} else if (lwp_default_stksize % PAGESIZE != 0 ||
    243 	    lwp_default_stksize > MAX_STKSIZE ||
    244 	    lwp_default_stksize < MIN_STKSIZE) {
    245 		cmn_err(CE_WARN, "Illegal stack size. Using %d",
    246 		    default_stksize);
    247 		lwp_default_stksize = default_stksize;
    248 	}
    249 
    250 	segkp_lwp = segkp_cache_init(segkp, lwp_cache_sz,
    251 	    lwp_default_stksize,
    252 	    (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED));
    253 
    254 	segkp_thread = segkp_cache_init(segkp, t_cache_sz,
    255 	    default_stksize, KPD_HASREDZONE | KPD_LOCKED | KPD_NO_ANON);
    256 
    257 	(void) getcid(sys_name, &syscid);
    258 	curthread->t_cid = syscid;	/* current thread is t0 */
    259 
    260 	/*
    261 	 * Set up the first CPU's idle thread.
    262 	 * It runs whenever the CPU has nothing worthwhile to do.
    263 	 */
    264 	tp = thread_create(NULL, 0, idle, NULL, 0, &p0, TS_STOPPED, -1);
    265 	cpu->cpu_idle_thread = tp;
    266 	tp->t_preempt = 1;
    267 	tp->t_disp_queue = cpu->cpu_disp;
    268 	ASSERT(tp->t_disp_queue != NULL);
    269 	tp->t_bound_cpu = cpu;
    270 	tp->t_affinitycnt = 1;
    271 
    272 	/*
    273 	 * Registering a thread in the callback table is usually
    274 	 * done in the initialization code of the thread. In this
    275 	 * case, we do it right after thread creation to avoid
    276 	 * blocking idle thread while registering itself. It also
    277 	 * avoids the possibility of reregistration in case a CPU
    278 	 * restarts its idle thread.
    279 	 */
    280 	CALLB_CPR_INIT_SAFE(tp, "idle");
    281 
    282 	/*
    283 	 * Create the thread_reaper daemon. From this point on, exited
    284 	 * threads will get reaped.
    285 	 */
    286 	(void) thread_create(NULL, 0, (void (*)())thread_reaper,
    287 	    NULL, 0, &p0, TS_RUN, minclsyspri);
    288 
    289 	/*
    290 	 * Finish initializing the kernel memory allocator now that
    291 	 * thread_create() is available.
    292 	 */
    293 	kmem_thread_init();
    294 
    295 	if (boothowto & RB_DEBUG)
    296 		kdi_dvec_thravail();
    297 }
    298 
    299 /*
    300  * Create a thread.
    301  *
    302  * thread_create() blocks for memory if necessary.  It never fails.
    303  *
    304  * If stk is NULL, the thread is created at the base of the stack
    305  * and cannot be swapped.
    306  */
    307 kthread_t *
    308 thread_create(
    309 	caddr_t	stk,
    310 	size_t	stksize,
    311 	void	(*proc)(),
    312 	void	*arg,
    313 	size_t	len,
    314 	proc_t	 *pp,
    315 	int	state,
    316 	pri_t	pri)
    317 {
    318 	kthread_t *t;
    319 	extern struct classfuncs sys_classfuncs;
    320 	turnstile_t *ts;
    321 
    322 	/*
    323 	 * Every thread keeps a turnstile around in case it needs to block.
    324 	 * The only reason the turnstile is not simply part of the thread
    325 	 * structure is that we may have to break the association whenever
    326 	 * more than one thread blocks on a given synchronization object.
    327 	 * From a memory-management standpoint, turnstiles are like the
    328 	 * "attached mblks" that hang off dblks in the streams allocator.
    329 	 */
    330 	ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
    331 
    332 	if (stk == NULL) {
    333 		/*
    334 		 * alloc both thread and stack in segkp chunk
    335 		 */
    336 
    337 		if (stksize < default_stksize)
    338 			stksize = default_stksize;
    339 
    340 		if (stksize == default_stksize) {
    341 			stk = (caddr_t)segkp_cache_get(segkp_thread);
    342 		} else {
    343 			stksize = roundup(stksize, PAGESIZE);
    344 			stk = (caddr_t)segkp_get(segkp, stksize,
    345 			    (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
    346 		}
    347 
    348 		ASSERT(stk != NULL);
    349 
    350 		/*
    351 		 * The machine-dependent mutex code may require that
    352 		 * thread pointers (since they may be used for mutex owner
    353 		 * fields) have certain alignment requirements.
    354 		 * PTR24_ALIGN is the size of the alignment quanta.
    355 		 * XXX - assumes stack grows toward low addresses.
    356 		 */
    357 		if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
    358 			cmn_err(CE_PANIC, "thread_create: proposed stack size"
    359 			    " too small to hold thread.");
    360 #ifdef STACK_GROWTH_DOWN
    361 		stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
    362 		stksize &= -PTR24_ALIGN;	/* make thread aligned */
    363 		t = (kthread_t *)(stk + stksize);
    364 		bzero(t, sizeof (kthread_t));
    365 		if (audit_active)
    366 			audit_thread_create(t);
    367 		t->t_stk = stk + stksize;
    368 		t->t_stkbase = stk;
    369 #else	/* stack grows to larger addresses */
    370 		stksize -= SA(sizeof (kthread_t));
    371 		t = (kthread_t *)(stk);
    372 		bzero(t, sizeof (kthread_t));
    373 		t->t_stk = stk + sizeof (kthread_t);
    374 		t->t_stkbase = stk + stksize + sizeof (kthread_t);
    375 #endif	/* STACK_GROWTH_DOWN */
    376 		t->t_flag |= T_TALLOCSTK;
    377 		t->t_swap = stk;
    378 	} else {
    379 		t = kmem_cache_alloc(thread_cache, KM_SLEEP);
    380 		bzero(t, sizeof (kthread_t));
    381 		ASSERT(((uintptr_t)t & (PTR24_ALIGN - 1)) == 0);
    382 		if (audit_active)
    383 			audit_thread_create(t);
    384 		/*
    385 		 * Initialize t_stk to the kernel stack pointer to use
    386 		 * upon entry to the kernel
    387 		 */
    388 #ifdef STACK_GROWTH_DOWN
    389 		t->t_stk = stk + stksize;
    390 		t->t_stkbase = stk;
    391 #else
    392 		t->t_stk = stk;			/* 3b2-like */
    393 		t->t_stkbase = stk + stksize;
    394 #endif /* STACK_GROWTH_DOWN */
    395 	}
    396 
    397 	/* set default stack flag */
    398 	if (stksize == lwp_default_stksize)
    399 		t->t_flag |= T_DFLTSTK;
    400 
    401 	t->t_ts = ts;
    402 
    403 	/*
    404 	 * p_cred could be NULL if it thread_create is called before cred_init
    405 	 * is called in main.
    406 	 */
    407 	mutex_enter(&pp->p_crlock);
    408 	if (pp->p_cred)
    409 		crhold(t->t_cred = pp->p_cred);
    410 	mutex_exit(&pp->p_crlock);
    411 	t->t_start = gethrestime_sec();
    412 	t->t_startpc = proc;
    413 	t->t_procp = pp;
    414 	t->t_clfuncs = &sys_classfuncs.thread;
    415 	t->t_cid = syscid;
    416 	t->t_pri = pri;
    417 	t->t_stime = lbolt;
    418 	t->t_schedflag = TS_LOAD | TS_DONT_SWAP;
    419 	t->t_bind_cpu = PBIND_NONE;
    420 	t->t_bind_pset = PS_NONE;
    421 	t->t_plockp = &pp->p_lock;
    422 	t->t_copyops = NULL;
    423 	t->t_taskq = NULL;
    424 	t->t_anttime = 0;
    425 	t->t_hatdepth = 0;
    426 
    427 	t->t_dtrace_vtime = 1;	/* assure vtimestamp is always non-zero */
    428 
    429 	CPU_STATS_ADDQ(CPU, sys, nthreads, 1);
    430 #ifndef NPROBE
    431 	/* Kernel probe */
    432 	tnf_thread_create(t);
    433 #endif /* NPROBE */
    434 	LOCK_INIT_CLEAR(&t->t_lock);
    435 
    436 	/*
    437 	 * Callers who give us a NULL proc must do their own
    438 	 * stack initialization.  e.g. lwp_create()
    439 	 */
    440 	if (proc != NULL) {
    441 		t->t_stk = thread_stk_init(t->t_stk);
    442 		thread_load(t, proc, arg, len);
    443 	}
    444 
    445 	/*
    446 	 * Put a hold on project0. If this thread is actually in a
    447 	 * different project, then t_proj will be changed later in
    448 	 * lwp_create().  All kernel-only threads must be in project 0.
    449 	 */
    450 	t->t_proj = project_hold(proj0p);
    451 
    452 	lgrp_affinity_init(&t->t_lgrp_affinity);
    453 
    454 	mutex_enter(&pidlock);
    455 	nthread++;
    456 	t->t_did = next_t_id++;
    457 	t->t_prev = curthread->t_prev;
    458 	t->t_next = curthread;
    459 
    460 	/*
    461 	 * Add the thread to the list of all threads, and initialize
    462 	 * its t_cpu pointer.  We need to block preemption since
    463 	 * cpu_offline walks the thread list looking for threads
    464 	 * with t_cpu pointing to the CPU being offlined.  We want
    465 	 * to make sure that the list is consistent and that if t_cpu
    466 	 * is set, the thread is on the list.
    467 	 */
    468 	kpreempt_disable();
    469 	curthread->t_prev->t_next = t;
    470 	curthread->t_prev = t;
    471 
    472 	/*
    473 	 * Threads should never have a NULL t_cpu pointer so assign it
    474 	 * here.  If the thread is being created with state TS_RUN a
    475 	 * better CPU may be chosen when it is placed on the run queue.
    476 	 *
    477 	 * We need to keep kernel preemption disabled when setting all
    478 	 * three fields to keep them in sync.  Also, always create in
    479 	 * the default partition since that's where kernel threads go
    480 	 * (if this isn't a kernel thread, t_cpupart will be changed
    481 	 * in lwp_create before setting the thread runnable).
    482 	 */
    483 	t->t_cpupart = &cp_default;
    484 
    485 	/*
    486 	 * For now, affiliate this thread with the root lgroup.
    487 	 * Since the kernel does not (presently) allocate its memory
    488 	 * in a locality aware fashion, the root is an appropriate home.
    489 	 * If this thread is later associated with an lwp, it will have
    490 	 * it's lgroup re-assigned at that time.
    491 	 */
    492 	lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1);
    493 
    494 	/*
    495 	 * Inherit the current cpu.  If this cpu isn't part of the chosen
    496 	 * lgroup, a new cpu will be chosen by cpu_choose when the thread
    497 	 * is ready to run.
    498 	 */
    499 	if (CPU->cpu_part == &cp_default)
    500 		t->t_cpu = CPU;
    501 	else
    502 		t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
    503 		    t->t_pri, NULL);
    504 
    505 	t->t_disp_queue = t->t_cpu->cpu_disp;
    506 	kpreempt_enable();
    507 
    508 	/*
    509 	 * Initialize thread state and the dispatcher lock pointer.
    510 	 * Need to hold onto pidlock to block allthreads walkers until
    511 	 * the state is set.
    512 	 */
    513 	switch (state) {
    514 	case TS_RUN:
    515 		curthread->t_oldspl = splhigh();	/* get dispatcher spl */
    516 		THREAD_SET_STATE(t, TS_STOPPED, &transition_lock);
    517 		CL_SETRUN(t);
    518 		thread_unlock(t);
    519 		break;
    520 
    521 	case TS_ONPROC:
    522 		THREAD_ONPROC(t, t->t_cpu);
    523 		break;
    524 
    525 	case TS_FREE:
    526 		/*
    527 		 * Free state will be used for intr threads.
    528 		 * The interrupt routine must set the thread dispatcher
    529 		 * lock pointer (t_lockp) if starting on a CPU
    530 		 * other than the current one.
    531 		 */
    532 		THREAD_FREEINTR(t, CPU);
    533 		break;
    534 
    535 	case TS_STOPPED:
    536 		THREAD_SET_STATE(t, TS_STOPPED, &stop_lock);
    537 		break;
    538 
    539 	default:			/* TS_SLEEP, TS_ZOMB or TS_TRANS */
    540 		cmn_err(CE_PANIC, "thread_create: invalid state %d", state);
    541 	}
    542 	mutex_exit(&pidlock);
    543 	return (t);
    544 }
    545 
    546 /*
    547  * Move thread to project0 and take care of project reference counters.
    548  */
    549 void
    550 thread_rele(kthread_t *t)
    551 {
    552 	kproject_t *kpj;
    553 
    554 	thread_lock(t);
    555 
    556 	ASSERT(t == curthread || t->t_state == TS_FREE || t->t_procp == &p0);
    557 	kpj = ttoproj(t);
    558 	t->t_proj = proj0p;
    559 
    560 	thread_unlock(t);
    561 
    562 	if (kpj != proj0p) {
    563 		project_rele(kpj);
    564 		(void) project_hold(proj0p);
    565 	}
    566 }
    567 
    568 void
    569 thread_exit(void)
    570 {
    571 	kthread_t *t = curthread;
    572 
    573 	if ((t->t_proc_flag & TP_ZTHREAD) != 0)
    574 		cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
    575 
    576 	tsd_exit();		/* Clean up this thread's TSD */
    577 
    578 	kcpc_passivate();	/* clean up performance counter state */
    579 
    580 	/*
    581 	 * No kernel thread should have called poll() without arranging
    582 	 * calling pollcleanup() here.
    583 	 */
    584 	ASSERT(t->t_pollstate == NULL);
    585 	ASSERT(t->t_schedctl == NULL);
    586 	if (t->t_door)
    587 		door_slam();	/* in case thread did an upcall */
    588 
    589 #ifndef NPROBE
    590 	/* Kernel probe */
    591 	if (t->t_tnf_tpdp)
    592 		tnf_thread_exit();
    593 #endif /* NPROBE */
    594 
    595 	thread_rele(t);
    596 	t->t_preempt++;
    597 
    598 	/*
    599 	 * remove thread from the all threads list so that
    600 	 * death-row can use the same pointers.
    601 	 */
    602 	mutex_enter(&pidlock);
    603 	t->t_next->t_prev = t->t_prev;
    604 	t->t_prev->t_next = t->t_next;
    605 	ASSERT(allthreads != t);	/* t0 never exits */
    606 	cv_broadcast(&t->t_joincv);	/* wake up anyone in thread_join */
    607 	mutex_exit(&pidlock);
    608 
    609 	if (t->t_ctx != NULL)
    610 		exitctx(t);
    611 	if (t->t_procp->p_pctx != NULL)
    612 		exitpctx(t->t_procp);
    613 
    614 	t->t_state = TS_ZOMB;	/* set zombie thread */
    615 
    616 	swtch_from_zombie();	/* give up the CPU */
    617 	/* NOTREACHED */
    618 }
    619 
    620 /*
    621  * Check to see if the specified thread is active (defined as being on
    622  * the thread list).  This is certainly a slow way to do this; if there's
    623  * ever a reason to speed it up, we could maintain a hash table of active
    624  * threads indexed by their t_did.
    625  */
    626 static kthread_t *
    627 did_to_thread(kt_did_t tid)
    628 {
    629 	kthread_t *t;
    630 
    631 	ASSERT(MUTEX_HELD(&pidlock));
    632 	for (t = curthread->t_next; t != curthread; t = t->t_next) {
    633 		if (t->t_did == tid)
    634 			break;
    635 	}
    636 	if (t->t_did == tid)
    637 		return (t);
    638 	else
    639 		return (NULL);
    640 }
    641 
    642 /*
    643  * Wait for specified thread to exit.  Returns immediately if the thread
    644  * could not be found, meaning that it has either already exited or never
    645  * existed.
    646  */
    647 void
    648 thread_join(kt_did_t tid)
    649 {
    650 	kthread_t *t;
    651 
    652 	ASSERT(tid != curthread->t_did);
    653 	ASSERT(tid != t0.t_did);
    654 
    655 	mutex_enter(&pidlock);
    656 	/*
    657 	 * Make sure we check that the thread is on the thread list
    658 	 * before blocking on it; otherwise we could end up blocking on
    659 	 * a cv that's already been freed.  In other words, don't cache
    660 	 * the thread pointer across calls to cv_wait.
    661 	 *
    662 	 * The choice of loop invariant means that whenever a thread
    663 	 * is taken off the allthreads list, a cv_broadcast must be
    664 	 * performed on that thread's t_joincv to wake up any waiters.
    665 	 * The broadcast doesn't have to happen right away, but it
    666 	 * shouldn't be postponed indefinitely (e.g., by doing it in
    667 	 * thread_free which may only be executed when the deathrow
    668 	 * queue is processed.
    669 	 */
    670 	while (t = did_to_thread(tid))
    671 		cv_wait(&t->t_joincv, &pidlock);
    672 	mutex_exit(&pidlock);
    673 }
    674 
    675 void
    676 thread_free_prevent(kthread_t *t)
    677 {
    678 	kmutex_t *lp;
    679 
    680 	lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
    681 	mutex_enter(lp);
    682 }
    683 
    684 void
    685 thread_free_allow(kthread_t *t)
    686 {
    687 	kmutex_t *lp;
    688 
    689 	lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
    690 	mutex_exit(lp);
    691 }
    692 
    693 static void
    694 thread_free_barrier(kthread_t *t)
    695 {
    696 	kmutex_t *lp;
    697 
    698 	lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
    699 	mutex_enter(lp);
    700 	mutex_exit(lp);
    701 }
    702 
    703 void
    704 thread_free(kthread_t *t)
    705 {
    706 	ASSERT(t != &t0 && t->t_state == TS_FREE);
    707 	ASSERT(t->t_door == NULL);
    708 	ASSERT(t->t_schedctl == NULL);
    709 	ASSERT(t->t_pollstate == NULL);
    710 
    711 	t->t_pri = 0;
    712 	t->t_pc = 0;
    713 	t->t_sp = 0;
    714 	t->t_wchan0 = NULL;
    715 	t->t_wchan = NULL;
    716 	if (t->t_cred != NULL) {
    717 		crfree(t->t_cred);
    718 		t->t_cred = 0;
    719 	}
    720 	if (t->t_pdmsg) {
    721 		kmem_free(t->t_pdmsg, strlen(t->t_pdmsg) + 1);
    722 		t->t_pdmsg = NULL;
    723 	}
    724 	if (audit_active)
    725 		audit_thread_free(t);
    726 #ifndef NPROBE
    727 	if (t->t_tnf_tpdp)
    728 		tnf_thread_free(t);
    729 #endif /* NPROBE */
    730 	if (t->t_cldata) {
    731 		CL_EXITCLASS(t->t_cid, (caddr_t *)t->t_cldata);
    732 	}
    733 	if (t->t_rprof != NULL) {
    734 		kmem_free(t->t_rprof, sizeof (*t->t_rprof));
    735 		t->t_rprof = NULL;
    736 	}
    737 	t->t_lockp = NULL;	/* nothing should try to lock this thread now */
    738 	if (t->t_lwp)
    739 		lwp_freeregs(t->t_lwp, 0);
    740 	if (t->t_ctx)
    741 		freectx(t, 0);
    742 	t->t_stk = NULL;
    743 	if (t->t_lwp)
    744 		lwp_stk_fini(t->t_lwp);
    745 	lock_clear(&t->t_lock);
    746 
    747 	if (t->t_ts->ts_waiters > 0)
    748 		panic("thread_free: turnstile still active");
    749 
    750 	kmem_cache_free(turnstile_cache, t->t_ts);
    751 
    752 	free_afd(&t->t_activefd);
    753 
    754 	/*
    755 	 * Barrier for the tick accounting code.  The tick accounting code
    756 	 * holds this lock to keep the thread from going away while it's
    757 	 * looking at it.
    758 	 */
    759 	thread_free_barrier(t);
    760 
    761 	ASSERT(ttoproj(t) == proj0p);
    762 	project_rele(ttoproj(t));
    763 
    764 	lgrp_affinity_free(&t->t_lgrp_affinity);
    765 
    766 	/*
    767 	 * Free thread struct and its stack.
    768 	 */
    769 	if (t->t_flag & T_TALLOCSTK) {
    770 		/* thread struct is embedded in stack */
    771 		segkp_release(segkp, t->t_swap);
    772 		mutex_enter(&pidlock);
    773 		nthread--;
    774 		mutex_exit(&pidlock);
    775 	} else {
    776 		if (t->t_swap) {
    777 			segkp_release(segkp, t->t_swap);
    778 			t->t_swap = NULL;
    779 		}
    780 		if (t->t_lwp) {
    781 			kmem_cache_free(lwp_cache, t->t_lwp);
    782 			t->t_lwp = NULL;
    783 		}
    784 		mutex_enter(&pidlock);
    785 		nthread--;
    786 		mutex_exit(&pidlock);
    787 		kmem_cache_free(thread_cache, t);
    788 	}
    789 }
    790 
    791 /*
    792  * Removes threads associated with the given zone from a deathrow queue.
    793  * tp is a pointer to the head of the deathrow queue, and countp is a
    794  * pointer to the current deathrow count.  Returns a linked list of
    795  * threads removed from the list.
    796  */
    797 static kthread_t *
    798 thread_zone_cleanup(kthread_t **tp, int *countp, zoneid_t zoneid)
    799 {
    800 	kthread_t *tmp, *list = NULL;
    801 	cred_t *cr;
    802 
    803 	ASSERT(MUTEX_HELD(&reaplock));
    804 	while (*tp != NULL) {
    805 		if ((cr = (*tp)->t_cred) != NULL && crgetzoneid(cr) == zoneid) {
    806 			tmp = *tp;
    807 			*tp = tmp->t_forw;
    808 			tmp->t_forw = list;
    809 			list = tmp;
    810 			(*countp)--;
    811 		} else {
    812 			tp = &(*tp)->t_forw;
    813 		}
    814 	}
    815 	return (list);
    816 }
    817 
    818 static void
    819 thread_reap_list(kthread_t *t)
    820 {
    821 	kthread_t *next;
    822 
    823 	while (t != NULL) {
    824 		next = t->t_forw;
    825 		thread_free(t);
    826 		t = next;
    827 	}
    828 }
    829 
    830 /* ARGSUSED */
    831 static void
    832 thread_zone_destroy(zoneid_t zoneid, void *unused)
    833 {
    834 	kthread_t *t, *l;
    835 
    836 	mutex_enter(&reaplock);
    837 	/*
    838 	 * Pull threads and lwps associated with zone off deathrow lists.
    839 	 */
    840 	t = thread_zone_cleanup(&thread_deathrow, &thread_reapcnt, zoneid);
    841 	l = thread_zone_cleanup(&lwp_deathrow, &lwp_reapcnt, zoneid);
    842 	mutex_exit(&reaplock);
    843 
    844 	/*
    845 	 * Reap threads
    846 	 */
    847 	thread_reap_list(t);
    848 
    849 	/*
    850 	 * Reap lwps
    851 	 */
    852 	thread_reap_list(l);
    853 }
    854 
    855 /*
    856  * cleanup zombie threads that are on deathrow.
    857  */
    858 void
    859 thread_reaper()
    860 {
    861 	kthread_t *t, *l;
    862 	callb_cpr_t cprinfo;
    863 
    864 	/*
    865 	 * Register callback to clean up threads when zone is destroyed.
    866 	 */
    867 	zone_key_create(&zone_thread_key, NULL, NULL, thread_zone_destroy);
    868 
    869 	CALLB_CPR_INIT(&cprinfo, &reaplock, callb_generic_cpr, "t_reaper");
    870 	for (;;) {
    871 		mutex_enter(&reaplock);
    872 		while (thread_deathrow == NULL && lwp_deathrow == NULL) {
    873 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
    874 			cv_wait(&reaper_cv, &reaplock);
    875 			CALLB_CPR_SAFE_END(&cprinfo, &reaplock);
    876 		}
    877 		t = thread_deathrow;
    878 		l = lwp_deathrow;
    879 		thread_deathrow = NULL;
    880 		lwp_deathrow = NULL;
    881 		thread_reapcnt = 0;
    882 		lwp_reapcnt = 0;
    883 		mutex_exit(&reaplock);
    884 
    885 		/*
    886 		 * Reap threads
    887 		 */
    888 		thread_reap_list(t);
    889 
    890 		/*
    891 		 * Reap lwps
    892 		 */
    893 		thread_reap_list(l);
    894 	}
    895 }
    896 
    897 /*
    898  * This is called by resume() to put a zombie thread onto deathrow.
    899  * The thread's state is changed to TS_FREE to indicate that is reapable.
    900  * This is called from the idle thread so it must not block (just spin).
    901  */
    902 void
    903 reapq_add(kthread_t *t)
    904 {
    905 	mutex_enter(&reaplock);
    906 
    907 	/*
    908 	 * lwp_deathrow contains only threads with lwp linkage
    909 	 * that are of the default stacksize. Anything else goes
    910 	 * on thread_deathrow.
    911 	 */
    912 	if (ttolwp(t) && (t->t_flag & T_DFLTSTK)) {
    913 		t->t_forw = lwp_deathrow;
    914 		lwp_deathrow = t;
    915 		lwp_reapcnt++;
    916 	} else {
    917 		t->t_forw = thread_deathrow;
    918 		thread_deathrow = t;
    919 		thread_reapcnt++;
    920 	}
    921 	if (lwp_reapcnt + thread_reapcnt > reaplimit)
    922 		cv_signal(&reaper_cv);	/* wake the reaper */
    923 	t->t_state = TS_FREE;
    924 	lock_clear(&t->t_lock);
    925 
    926 	/*
    927 	 * Before we return, we need to grab and drop the thread lock for
    928 	 * the dead thread.  At this point, the current thread is the idle
    929 	 * thread, and the dead thread's CPU lock points to the current
    930 	 * CPU -- and we must grab and drop the lock to synchronize with
    931 	 * a racing thread walking a blocking chain that the zombie thread
    932 	 * was recently in.  By this point, that blocking chain is (by
    933 	 * definition) stale:  the dead thread is not holding any locks, and
    934 	 * is therefore not in any blocking chains -- but if we do not regrab
    935 	 * our lock before freeing the dead thread's data structures, the
    936 	 * thread walking the (stale) blocking chain will die on memory
    937 	 * corruption when it attempts to drop the dead thread's lock.  We
    938 	 * only need do this once because there is no way for the dead thread
    939 	 * to ever again be on a blocking chain:  once we have grabbed and
    940 	 * dropped the thread lock, we are guaranteed that anyone that could
    941 	 * have seen this thread in a blocking chain can no longer see it.
    942 	 */
    943 	thread_lock(t);
    944 	thread_unlock(t);
    945 
    946 	mutex_exit(&reaplock);
    947 }
    948 
    949 /*
    950  * Install thread context ops for the current thread.
    951  */
    952 void
    953 installctx(
    954 	kthread_t *t,
    955 	void	*arg,
    956 	void	(*save)(void *),
    957 	void	(*restore)(void *),
    958 	void	(*fork)(void *, void *),
    959 	void	(*lwp_create)(void *, void *),
    960 	void	(*exit)(void *),
    961 	void	(*free)(void *, int))
    962 {
    963 	struct ctxop *ctx;
    964 
    965 	ctx = kmem_alloc(sizeof (struct ctxop), KM_SLEEP);
    966 	ctx->save_op = save;
    967 	ctx->restore_op = restore;
    968 	ctx->fork_op = fork;
    969 	ctx->lwp_create_op = lwp_create;
    970 	ctx->exit_op = exit;
    971 	ctx->free_op = free;
    972 	ctx->arg = arg;
    973 	ctx->next = t->t_ctx;
    974 	t->t_ctx = ctx;
    975 }
    976 
    977 /*
    978  * Remove the thread context ops from a thread.
    979  */
    980 int
    981 removectx(
    982 	kthread_t *t,
    983 	void	*arg,
    984 	void	(*save)(void *),
    985 	void	(*restore)(void *),
    986 	void	(*fork)(void *, void *),
    987 	void	(*lwp_create)(void *, void *),
    988 	void	(*exit)(void *),
    989 	void	(*free)(void *, int))
    990 {
    991 	struct ctxop *ctx, *prev_ctx;
    992 
    993 	/*
    994 	 * The incoming kthread_t (which is the thread for which the
    995 	 * context ops will be removed) should be one of the following:
    996 	 *
    997 	 * a) the current thread,
    998 	 *
    999 	 * b) a thread of a process that's being forked (SIDL),
   1000 	 *
   1001 	 * c) a thread that belongs to the same process as the current
   1002 	 *    thread and for which the current thread is the agent thread,
   1003 	 *
   1004 	 * d) a thread that is TS_STOPPED which is indicative of it
   1005 	 *    being (if curthread is not an agent) a thread being created
   1006 	 *    as part of an lwp creation.
   1007 	 */
   1008 	ASSERT(t == curthread || ttoproc(t)->p_stat == SIDL ||
   1009 	    ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
   1010 
   1011 	/*
   1012 	 * Serialize modifications to t->t_ctx to prevent the agent thread
   1013 	 * and the target thread from racing with each other during lwp exit.
   1014 	 */
   1015 	mutex_enter(&t->t_ctx_lock);
   1016 	prev_ctx = NULL;
   1017 	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
   1018 		if (ctx->save_op == save && ctx->restore_op == restore &&
   1019 		    ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
   1020 		    ctx->exit_op == exit && ctx->free_op == free &&
   1021 		    ctx->arg == arg) {
   1022 			if (prev_ctx)
   1023 				prev_ctx->next = ctx->next;
   1024 			else
   1025 				t->t_ctx = ctx->next;
   1026 			mutex_exit(&t->t_ctx_lock);
   1027 			if (ctx->free_op != NULL)
   1028 				(ctx->free_op)(ctx->arg, 0);
   1029 			kmem_free(ctx, sizeof (struct ctxop));
   1030 			return (1);
   1031 		}
   1032 		prev_ctx = ctx;
   1033 	}
   1034 	mutex_exit(&t->t_ctx_lock);
   1035 
   1036 	return (0);
   1037 }
   1038 
   1039 void
   1040 savectx(kthread_t *t)
   1041 {
   1042 	struct ctxop *ctx;
   1043 
   1044 	ASSERT(t == curthread);
   1045 	for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
   1046 		if (ctx->save_op != NULL)
   1047 			(ctx->save_op)(ctx->arg);
   1048 }
   1049 
   1050 void
   1051 restorectx(kthread_t *t)
   1052 {
   1053 	struct ctxop *ctx;
   1054 
   1055 	ASSERT(t == curthread);
   1056 	for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
   1057 		if (ctx->restore_op != NULL)
   1058 			(ctx->restore_op)(ctx->arg);
   1059 }
   1060 
   1061 void
   1062 forkctx(kthread_t *t, kthread_t *ct)
   1063 {
   1064 	struct ctxop *ctx;
   1065 
   1066 	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
   1067 		if (ctx->fork_op != NULL)
   1068 			(ctx->fork_op)(t, ct);
   1069 }
   1070 
   1071 /*
   1072  * Note that this operator is only invoked via the _lwp_create
   1073  * system call.  The system may have other reasons to create lwps
   1074  * e.g. the agent lwp or the doors unreferenced lwp.
   1075  */
   1076 void
   1077 lwp_createctx(kthread_t *t, kthread_t *ct)
   1078 {
   1079 	struct ctxop *ctx;
   1080 
   1081 	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
   1082 		if (ctx->lwp_create_op != NULL)
   1083 			(ctx->lwp_create_op)(t, ct);
   1084 }
   1085 
   1086 /*
   1087  * exitctx is called from thread_exit() and lwp_exit() to perform any actions
   1088  * needed when the thread/LWP leaves the processor for the last time. This
   1089  * routine is not intended to deal with freeing memory; freectx() is used for
   1090  * that purpose during thread_free(). This routine is provided to allow for
   1091  * clean-up that can't wait until thread_free().
   1092  */
   1093 void
   1094 exitctx(kthread_t *t)
   1095 {
   1096 	struct ctxop *ctx;
   1097 
   1098 	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
   1099 		if (ctx->exit_op != NULL)
   1100 			(ctx->exit_op)(t);
   1101 }
   1102 
   1103 /*
   1104  * freectx is called from thread_free() and exec() to get
   1105  * rid of old thread context ops.
   1106  */
   1107 void
   1108 freectx(kthread_t *t, int isexec)
   1109 {
   1110 	struct ctxop *ctx;
   1111 
   1112 	while ((ctx = t->t_ctx) != NULL) {
   1113 		t->t_ctx = ctx->next;
   1114 		if (ctx->free_op != NULL)
   1115 			(ctx->free_op)(ctx->arg, isexec);
   1116 		kmem_free(ctx, sizeof (struct ctxop));
   1117 	}
   1118 }
   1119 
   1120 /*
   1121  * Set the thread running; arrange for it to be swapped in if necessary.
   1122  */
   1123 void
   1124 setrun_locked(kthread_t *t)
   1125 {
   1126 	ASSERT(THREAD_LOCK_HELD(t));
   1127 	if (t->t_state == TS_SLEEP) {
   1128 		/*
   1129 		 * Take off sleep queue.
   1130 		 */
   1131 		SOBJ_UNSLEEP(t->t_sobj_ops, t);
   1132 	} else if (t->t_state & (TS_RUN | TS_ONPROC)) {
   1133 		/*
   1134 		 * Already on dispatcher queue.
   1135 		 */
   1136 		return;
   1137 	} else if (t->t_state == TS_WAIT) {
   1138 		waitq_setrun(t);
   1139 	} else if (t->t_state == TS_STOPPED) {
   1140 		/*
   1141 		 * All of the sending of SIGCONT (TC_XSTART) and /proc
   1142 		 * (TC_PSTART) and lwp_continue() (TC_CSTART) must have
   1143 		 * requested that the thread be run.
   1144 		 * Just calling setrun() is not sufficient to set a stopped
   1145 		 * thread running.  TP_TXSTART is always set if the thread
   1146 		 * is not stopped by a jobcontrol stop signal.
   1147 		 * TP_TPSTART is always set if /proc is not controlling it.
   1148 		 * TP_TCSTART is always set if lwp_suspend() didn't stop it.
   1149 		 * The thread won't be stopped unless one of these
   1150 		 * three mechanisms did it.
   1151 		 *
   1152 		 * These flags must be set before calling setrun_locked(t).
   1153 		 * They can't be passed as arguments because the streams
   1154 		 * code calls setrun() indirectly and the mechanism for
   1155 		 * doing so admits only one argument.  Note that the
   1156 		 * thread must be locked in order to change t_schedflags.
   1157 		 */
   1158 		if ((t->t_schedflag & TS_ALLSTART) != TS_ALLSTART)
   1159 			return;
   1160 		/*
   1161 		 * Process is no longer stopped (a thread is running).
   1162 		 */
   1163 		t->t_whystop = 0;
   1164 		t->t_whatstop = 0;
   1165 		/*
   1166 		 * Strictly speaking, we do not have to clear these
   1167 		 * flags here; they are cleared on entry to stop().
   1168 		 * However, they are confusing when doing kernel
   1169 		 * debugging or when they are revealed by ps(1).
   1170 		 */
   1171 		t->t_schedflag &= ~TS_ALLSTART;
   1172 		THREAD_TRANSITION(t);	/* drop stopped-thread lock */
   1173 		ASSERT(t->t_lockp == &transition_lock);
   1174 		ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
   1175 		/*
   1176 		 * Let the class put the process on the dispatcher queue.
   1177 		 */
   1178 		CL_SETRUN(t);
   1179 	}
   1180 }
   1181 
   1182 void
   1183 setrun(kthread_t *t)
   1184 {
   1185 	thread_lock(t);
   1186 	setrun_locked(t);
   1187 	thread_unlock(t);
   1188 }
   1189 
   1190 /*
   1191  * Unpin an interrupted thread.
   1192  *	When an interrupt occurs, the interrupt is handled on the stack
   1193  *	of an interrupt thread, taken from a pool linked to the CPU structure.
   1194  *
   1195  *	When swtch() is switching away from an interrupt thread because it
   1196  *	blocked or was preempted, this routine is called to complete the
   1197  *	saving of the interrupted thread state, and returns the interrupted
   1198  *	thread pointer so it may be resumed.
   1199  *
   1200  *	Called by swtch() only at high spl.
   1201  */
   1202 kthread_t *
   1203 thread_unpin()
   1204 {
   1205 	kthread_t	*t = curthread;	/* current thread */
   1206 	kthread_t	*itp;		/* interrupted thread */
   1207 	int		i;		/* interrupt level */
   1208 	extern int	intr_passivate();
   1209 
   1210 	ASSERT(t->t_intr != NULL);
   1211 
   1212 	itp = t->t_intr;		/* interrupted thread */
   1213 	t->t_intr = NULL;		/* clear interrupt ptr */
   1214 
   1215 	/*
   1216 	 * Get state from interrupt thread for the one
   1217 	 * it interrupted.
   1218 	 */
   1219 
   1220 	i = intr_passivate(t, itp);
   1221 
   1222 	TRACE_5(TR_FAC_INTR, TR_INTR_PASSIVATE,
   1223 	    "intr_passivate:level %d curthread %p (%T) ithread %p (%T)",
   1224 	    i, t, t, itp, itp);
   1225 
   1226 	/*
   1227 	 * Dissociate the current thread from the interrupted thread's LWP.
   1228 	 */
   1229 	t->t_lwp = NULL;
   1230 
   1231 	/*
   1232 	 * Interrupt handlers above the level that spinlocks block must
   1233 	 * not block.
   1234 	 */
   1235 #if DEBUG
   1236 	if (i < 0 || i > LOCK_LEVEL)
   1237 		cmn_err(CE_PANIC, "thread_unpin: ipl out of range %x", i);
   1238 #endif
   1239 
   1240 	/*
   1241 	 * Compute the CPU's base interrupt level based on the active
   1242 	 * interrupts.
   1243 	 */
   1244 	ASSERT(CPU->cpu_intr_actv & (1 << i));
   1245 	set_base_spl();
   1246 
   1247 	return (itp);
   1248 }
   1249 
   1250 /*
   1251  * Create and initialize an interrupt thread.
   1252  *	Returns non-zero on error.
   1253  *	Called at spl7() or better.
   1254  */
   1255 void
   1256 thread_create_intr(struct cpu *cp)
   1257 {
   1258 	kthread_t *tp;
   1259 
   1260 	tp = thread_create(NULL, 0,
   1261 	    (void (*)())thread_create_intr, NULL, 0, &p0, TS_ONPROC, 0);
   1262 
   1263 	/*
   1264 	 * Set the thread in the TS_FREE state.  The state will change
   1265 	 * to TS_ONPROC only while the interrupt is active.  Think of these
   1266 	 * as being on a private free list for the CPU.  Being TS_FREE keeps
   1267 	 * inactive interrupt threads out of debugger thread lists.
   1268 	 *
   1269 	 * We cannot call thread_create with TS_FREE because of the current
   1270 	 * checks there for ONPROC.  Fix this when thread_create takes flags.
   1271 	 */
   1272 	THREAD_FREEINTR(tp, cp);
   1273 
   1274 	/*
   1275 	 * Nobody should ever reference the credentials of an interrupt
   1276 	 * thread so make it NULL to catch any such references.
   1277 	 */
   1278 	tp->t_cred = NULL;
   1279 	tp->t_flag |= T_INTR_THREAD;
   1280 	tp->t_cpu = cp;
   1281 	tp->t_bound_cpu = cp;
   1282 	tp->t_disp_queue = cp->cpu_disp;
   1283 	tp->t_affinitycnt = 1;
   1284 	tp->t_preempt = 1;
   1285 
   1286 	/*
   1287 	 * Don't make a user-requested binding on this thread so that
   1288 	 * the processor can be offlined.
   1289 	 */
   1290 	tp->t_bind_cpu = PBIND_NONE;	/* no USER-requested binding */
   1291 	tp->t_bind_pset = PS_NONE;
   1292 
   1293 #if defined(__i386) || defined(__amd64)
   1294 	tp->t_stk -= STACK_ALIGN;
   1295 	*(tp->t_stk) = 0;		/* terminate intr thread stack */
   1296 #endif
   1297 
   1298 	/*
   1299 	 * Link onto CPU's interrupt pool.
   1300 	 */
   1301 	tp->t_link = cp->cpu_intr_thread;
   1302 	cp->cpu_intr_thread = tp;
   1303 }
   1304 
   1305 /*
   1306  * TSD -- THREAD SPECIFIC DATA
   1307  */
   1308 static kmutex_t		tsd_mutex;	 /* linked list spin lock */
   1309 static uint_t		tsd_nkeys;	 /* size of destructor array */
   1310 /* per-key destructor funcs */
   1311 static void 		(**tsd_destructor)(void *);
   1312 /* list of tsd_thread's */
   1313 static struct tsd_thread	*tsd_list;
   1314 
   1315 /*
   1316  * Default destructor
   1317  *	Needed because NULL destructor means that the key is unused
   1318  */
   1319 /* ARGSUSED */
   1320 void
   1321 tsd_defaultdestructor(void *value)
   1322 {}
   1323 
   1324 /*
   1325  * Create a key (index into per thread array)
   1326  *	Locks out tsd_create, tsd_destroy, and tsd_exit
   1327  *	May allocate memory with lock held
   1328  */
   1329 void
   1330 tsd_create(uint_t *keyp, void (*destructor)(void *))
   1331 {
   1332 	int	i;
   1333 	uint_t	nkeys;
   1334 
   1335 	/*
   1336 	 * if key is allocated, do nothing
   1337 	 */
   1338 	mutex_enter(&tsd_mutex);
   1339 	if (*keyp) {
   1340 		mutex_exit(&tsd_mutex);
   1341 		return;
   1342 	}
   1343 	/*
   1344 	 * find an unused key
   1345 	 */
   1346 	if (destructor == NULL)
   1347 		destructor = tsd_defaultdestructor;
   1348 
   1349 	for (i = 0; i < tsd_nkeys; ++i)
   1350 		if (tsd_destructor[i] == NULL)
   1351 			break;
   1352 
   1353 	/*
   1354 	 * if no unused keys, increase the size of the destructor array
   1355 	 */
   1356 	if (i == tsd_nkeys) {
   1357 		if ((nkeys = (tsd_nkeys << 1)) == 0)
   1358 			nkeys = 1;
   1359 		tsd_destructor =
   1360 		    (void (**)(void *))tsd_realloc((void *)tsd_destructor,
   1361 		    (size_t)(tsd_nkeys * sizeof (void (*)(void *))),
   1362 		    (size_t)(nkeys * sizeof (void (*)(void *))));
   1363 		tsd_nkeys = nkeys;
   1364 	}
   1365 
   1366 	/*
   1367 	 * allocate the next available unused key
   1368 	 */
   1369 	tsd_destructor[i] = destructor;
   1370 	*keyp = i + 1;
   1371 	mutex_exit(&tsd_mutex);
   1372 }
   1373 
   1374 /*
   1375  * Destroy a key -- this is for unloadable modules
   1376  *
   1377  * Assumes that the caller is preventing tsd_set and tsd_get
   1378  * Locks out tsd_create, tsd_destroy, and tsd_exit
   1379  * May free memory with lock held
   1380  */
   1381 void
   1382 tsd_destroy(uint_t *keyp)
   1383 {
   1384 	uint_t key;
   1385 	struct tsd_thread *tsd;
   1386 
   1387 	/*
   1388 	 * protect the key namespace and our destructor lists
   1389 	 */
   1390 	mutex_enter(&tsd_mutex);
   1391 	key = *keyp;
   1392 	*keyp = 0;
   1393 
   1394 	ASSERT(key <= tsd_nkeys);
   1395 
   1396 	/*
   1397 	 * if the key is valid
   1398 	 */
   1399 	if (key != 0) {
   1400 		uint_t k = key - 1;
   1401 		/*
   1402 		 * for every thread with TSD, call key's destructor
   1403 		 */
   1404 		for (tsd = tsd_list; tsd; tsd = tsd->ts_next) {
   1405 			/*
   1406 			 * no TSD for key in this thread
   1407 			 */
   1408 			if (key > tsd->ts_nkeys)
   1409 				continue;
   1410 			/*
   1411 			 * call destructor for key
   1412 			 */
   1413 			if (tsd->ts_value[k] && tsd_destructor[k])
   1414 				(*tsd_destructor[k])(tsd->ts_value[k]);
   1415 			/*
   1416 			 * reset value for key
   1417 			 */
   1418 			tsd->ts_value[k] = NULL;
   1419 		}
   1420 		/*
   1421 		 * actually free the key (NULL destructor == unused)
   1422 		 */
   1423 		tsd_destructor[k] = NULL;
   1424 	}
   1425 
   1426 	mutex_exit(&tsd_mutex);
   1427 }
   1428 
   1429 /*
   1430  * Quickly return the per thread value that was stored with the specified key
   1431  * Assumes the caller is protecting key from tsd_create and tsd_destroy
   1432  */
   1433 void *
   1434 tsd_get(uint_t key)
   1435 {
   1436 	return (tsd_agent_get(curthread, key));
   1437 }
   1438 
   1439 /*
   1440  * Set a per thread value indexed with the specified key
   1441  */
   1442 int
   1443 tsd_set(uint_t key, void *value)
   1444 {
   1445 	return (tsd_agent_set(curthread, key, value));
   1446 }
   1447 
   1448 /*
   1449  * Like tsd_get(), except that the agent lwp can get the tsd of
   1450  * another thread in the same process (the agent thread only runs when the
   1451  * process is completely stopped by /proc), or syslwp is creating a new lwp.
   1452  */
   1453 void *
   1454 tsd_agent_get(kthread_t *t, uint_t key)
   1455 {
   1456 	struct tsd_thread *tsd = t->t_tsd;
   1457 
   1458 	ASSERT(t == curthread ||
   1459 	    ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
   1460 
   1461 	if (key && tsd != NULL && key <= tsd->ts_nkeys)
   1462 		return (tsd->ts_value[key - 1]);
   1463 	return (NULL);
   1464 }
   1465 
   1466 /*
   1467  * Like tsd_set(), except that the agent lwp can set the tsd of
   1468  * another thread in the same process, or syslwp can set the tsd
   1469  * of a thread it's in the middle of creating.
   1470  *
   1471  * Assumes the caller is protecting key from tsd_create and tsd_destroy
   1472  * May lock out tsd_destroy (and tsd_create), may allocate memory with
   1473  * lock held
   1474  */
   1475 int
   1476 tsd_agent_set(kthread_t *t, uint_t key, void *value)
   1477 {
   1478 	struct tsd_thread *tsd = t->t_tsd;
   1479 
   1480 	ASSERT(t == curthread ||
   1481 	    ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
   1482 
   1483 	if (key == 0)
   1484 		return (EINVAL);
   1485 	if (tsd == NULL)
   1486 		tsd = t->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
   1487 	if (key <= tsd->ts_nkeys) {
   1488 		tsd->ts_value[key - 1] = value;
   1489 		return (0);
   1490 	}
   1491 
   1492 	ASSERT(key <= tsd_nkeys);
   1493 
   1494 	/*
   1495 	 * lock out tsd_destroy()
   1496 	 */
   1497 	mutex_enter(&tsd_mutex);
   1498 	if (tsd->ts_nkeys == 0) {
   1499 		/*
   1500 		 * Link onto list of threads with TSD
   1501 		 */
   1502 		if ((tsd->ts_next = tsd_list) != NULL)
   1503 			tsd_list->ts_prev = tsd;
   1504 		tsd_list = tsd;
   1505 	}
   1506 
   1507 	/*
   1508 	 * Allocate thread local storage and set the value for key
   1509 	 */
   1510 	tsd->ts_value = tsd_realloc(tsd->ts_value,
   1511 	    tsd->ts_nkeys * sizeof (void *),
   1512 	    key * sizeof (void *));
   1513 	tsd->ts_nkeys = key;
   1514 	tsd->ts_value[key - 1] = value;
   1515 	mutex_exit(&tsd_mutex);
   1516 
   1517 	return (0);
   1518 }
   1519 
   1520 
   1521 /*
   1522  * Return the per thread value that was stored with the specified key
   1523  *	If necessary, create the key and the value
   1524  *	Assumes the caller is protecting *keyp from tsd_destroy
   1525  */
   1526 void *
   1527 tsd_getcreate(uint_t *keyp, void (*destroy)(void *), void *(*allocate)(void))
   1528 {
   1529 	void *value;
   1530 	uint_t key = *keyp;
   1531 	struct tsd_thread *tsd = curthread->t_tsd;
   1532 
   1533 	if (tsd == NULL)
   1534 		tsd = curthread->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
   1535 	if (key && key <= tsd->ts_nkeys && (value = tsd->ts_value[key - 1]))
   1536 		return (value);
   1537 	if (key == 0)
   1538 		tsd_create(keyp, destroy);
   1539 	(void) tsd_set(*keyp, value = (*allocate)());
   1540 
   1541 	return (value);
   1542 }
   1543 
   1544 /*
   1545  * Called from thread_exit() to run the destructor function for each tsd
   1546  *	Locks out tsd_create and tsd_destroy
   1547  *	Assumes that the destructor *DOES NOT* use tsd
   1548  */
   1549 void
   1550 tsd_exit(void)
   1551 {
   1552 	int i;
   1553 	struct tsd_thread *tsd = curthread->t_tsd;
   1554 
   1555 	if (tsd == NULL)
   1556 		return;
   1557 
   1558 	if (tsd->ts_nkeys == 0) {
   1559 		kmem_free(tsd, sizeof (*tsd));
   1560 		curthread->t_tsd = NULL;
   1561 		return;
   1562 	}
   1563 
   1564 	/*
   1565 	 * lock out tsd_create and tsd_destroy, call
   1566 	 * the destructor, and mark the value as destroyed.
   1567 	 */
   1568 	mutex_enter(&tsd_mutex);
   1569 
   1570 	for (i = 0; i < tsd->ts_nkeys; i++) {
   1571 		if (tsd->ts_value[i] && tsd_destructor[i])
   1572 			(*tsd_destructor[i])(tsd->ts_value[i]);
   1573 		tsd->ts_value[i] = NULL;
   1574 	}
   1575 
   1576 	/*
   1577 	 * remove from linked list of threads with TSD
   1578 	 */
   1579 	if (tsd->ts_next)
   1580 		tsd->ts_next->ts_prev = tsd->ts_prev;
   1581 	if (tsd->ts_prev)
   1582 		tsd->ts_prev->ts_next = tsd->ts_next;
   1583 	if (tsd_list == tsd)
   1584 		tsd_list = tsd->ts_next;
   1585 
   1586 	mutex_exit(&tsd_mutex);
   1587 
   1588 	/*
   1589 	 * free up the TSD
   1590 	 */
   1591 	kmem_free(tsd->ts_value, tsd->ts_nkeys * sizeof (void *));
   1592 	kmem_free(tsd, sizeof (struct tsd_thread));
   1593 	curthread->t_tsd = NULL;
   1594 }
   1595 
   1596 /*
   1597  * realloc
   1598  */
   1599 static void *
   1600 tsd_realloc(void *old, size_t osize, size_t nsize)
   1601 {
   1602 	void *new;
   1603 
   1604 	new = kmem_zalloc(nsize, KM_SLEEP);
   1605 	if (old) {
   1606 		bcopy(old, new, osize);
   1607 		kmem_free(old, osize);
   1608 	}
   1609 	return (new);
   1610 }
   1611 
   1612 /*
   1613  * Check to see if an interrupt thread might be active at a given ipl.
   1614  * If so return true.
   1615  * We must be conservative--it is ok to give a false yes, but a false no
   1616  * will cause disaster.  (But if the situation changes after we check it is
   1617  * ok--the caller is trying to ensure that an interrupt routine has been
   1618  * exited).
   1619  * This is used when trying to remove an interrupt handler from an autovector
   1620  * list in avintr.c.
   1621  */
   1622 int
   1623 intr_active(struct cpu *cp, int level)
   1624 {
   1625 	if (level <= LOCK_LEVEL)
   1626 		return (cp->cpu_thread != cp->cpu_dispthread);
   1627 	else
   1628 		return (CPU_ON_INTR(cp));
   1629 }
   1630 
   1631 /*
   1632  * Return non-zero if an interrupt is being serviced.
   1633  */
   1634 int
   1635 servicing_interrupt()
   1636 {
   1637 	int onintr = 0;
   1638 
   1639 	/* Are we an interrupt thread */
   1640 	if (curthread->t_flag & T_INTR_THREAD)
   1641 		return (1);
   1642 	/* Are we servicing a high level interrupt? */
   1643 	if (CPU_ON_INTR(CPU)) {
   1644 		kpreempt_disable();
   1645 		onintr = CPU_ON_INTR(CPU);
   1646 		kpreempt_enable();
   1647 	}
   1648 	return (onintr);
   1649 }
   1650 
   1651 
   1652 /*
   1653  * Change the dispatch priority of a thread in the system.
   1654  * Used when raising or lowering a thread's priority.
   1655  * (E.g., priority inheritance)
   1656  *
   1657  * Since threads are queued according to their priority, we
   1658  * we must check the thread's state to determine whether it
   1659  * is on a queue somewhere. If it is, we've got to:
   1660  *
   1661  *	o Dequeue the thread.
   1662  *	o Change its effective priority.
   1663  *	o Enqueue the thread.
   1664  *
   1665  * Assumptions: The thread whose priority we wish to change
   1666  * must be locked before we call thread_change_(e)pri().
   1667  * The thread_change(e)pri() function doesn't drop the thread
   1668  * lock--that must be done by its caller.
   1669  */
   1670 void
   1671 thread_change_epri(kthread_t *t, pri_t disp_pri)
   1672 {
   1673 	uint_t	state;
   1674 
   1675 	ASSERT(THREAD_LOCK_HELD(t));
   1676 
   1677 	/*
   1678 	 * If the inherited priority hasn't actually changed,
   1679 	 * just return.
   1680 	 */
   1681 	if (t->t_epri == disp_pri)
   1682 		return;
   1683 
   1684 	state = t->t_state;
   1685 
   1686 	/*
   1687 	 * If it's not on a queue, change the priority with
   1688 	 * impunity.
   1689 	 */
   1690 	if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
   1691 		t->t_epri = disp_pri;
   1692 
   1693 		if (state == TS_ONPROC) {
   1694 			cpu_t *cp = t->t_disp_queue->disp_cpu;
   1695 
   1696 			if (t == cp->cpu_dispthread)
   1697 				cp->cpu_dispatch_pri = DISP_PRIO(t);
   1698 		}
   1699 		return;
   1700 	}
   1701 
   1702 	/*
   1703 	 * It's either on a sleep queue or a run queue.
   1704 	 */
   1705 	if (state == TS_SLEEP) {
   1706 		/*
   1707 		 * Take the thread out of its sleep queue.
   1708 		 * Change the inherited priority.
   1709 		 * Re-enqueue the thread.
   1710 		 * Each synchronization object exports a function
   1711 		 * to do this in an appropriate manner.
   1712 		 */
   1713 		SOBJ_CHANGE_EPRI(t->t_sobj_ops, t, disp_pri);
   1714 	} else if (state == TS_WAIT) {
   1715 		/*
   1716 		 * Re-enqueue a thread on the wait queue if its
   1717 		 * effective priority needs to change.
   1718 		 */
   1719 		if (disp_pri != t->t_epri)
   1720 			waitq_change_pri(t, disp_pri);
   1721 	} else {
   1722 		/*
   1723 		 * The thread is on a run queue.
   1724 		 * Note: setbackdq() may not put the thread
   1725 		 * back on the same run queue where it originally
   1726 		 * resided.
   1727 		 */
   1728 		(void) dispdeq(t);
   1729 		t->t_epri = disp_pri;
   1730 		setbackdq(t);
   1731 	}
   1732 }	/* end of thread_change_epri */
   1733 
   1734 /*
   1735  * Function: Change the t_pri field of a thread.
   1736  * Side Effects: Adjust the thread ordering on a run queue
   1737  *		 or sleep queue, if necessary.
   1738  * Returns: 1 if the thread was on a run queue, else 0.
   1739  */
   1740 int
   1741 thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
   1742 {
   1743 	uint_t	state;
   1744 	int	on_rq = 0;
   1745 
   1746 	ASSERT(THREAD_LOCK_HELD(t));
   1747 
   1748 	state = t->t_state;
   1749 	THREAD_WILLCHANGE_PRI(t, disp_pri);
   1750 
   1751 	/*
   1752 	 * If it's not on a queue, change the priority with
   1753 	 * impunity.
   1754 	 */
   1755 	if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
   1756 		t->t_pri = disp_pri;
   1757 
   1758 		if (state == TS_ONPROC) {
   1759 			cpu_t *cp = t->t_disp_queue->disp_cpu;
   1760 
   1761 			if (t == cp->cpu_dispthread)
   1762 				cp->cpu_dispatch_pri = DISP_PRIO(t);
   1763 		}
   1764 		return (0);
   1765 	}
   1766 
   1767 	/*
   1768 	 * It's either on a sleep queue or a run queue.
   1769 	 */
   1770 	if (state == TS_SLEEP) {
   1771 		/*
   1772 		 * If the priority has changed, take the thread out of
   1773 		 * its sleep queue and change the priority.
   1774 		 * Re-enqueue the thread.
   1775 		 * Each synchronization object exports a function
   1776 		 * to do this in an appropriate manner.
   1777 		 */
   1778 		if (disp_pri != t->t_pri)
   1779 			SOBJ_CHANGE_PRI(t->t_sobj_ops, t, disp_pri);
   1780 	} else if (state == TS_WAIT) {
   1781 		/*
   1782 		 * Re-enqueue a thread on the wait queue if its
   1783 		 * priority needs to change.
   1784 		 */
   1785 		if (disp_pri != t->t_pri)
   1786 			waitq_change_pri(t, disp_pri);
   1787 	} else {
   1788 		/*
   1789 		 * The thread is on a run queue.
   1790 		 * Note: setbackdq() may not put the thread
   1791 		 * back on the same run queue where it originally
   1792 		 * resided.
   1793 		 *
   1794 		 * We still requeue the thread even if the priority
   1795 		 * is unchanged to preserve round-robin (and other)
   1796 		 * effects between threads of the same priority.
   1797 		 */
   1798 		on_rq = dispdeq(t);
   1799 		ASSERT(on_rq);
   1800 		t->t_pri = disp_pri;
   1801 		if (front) {
   1802 			setfrontdq(t);
   1803 		} else {
   1804 			setbackdq(t);
   1805 		}
   1806 	}
   1807 	return (on_rq);
   1808 }
   1809