Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)cpu.c	1.190	08/01/03 SMI"
     27 
     28 /*
     29  * Architecture-independent CPU control functions.
     30  */
     31 
     32 #include <sys/types.h>
     33 #include <sys/param.h>
     34 #include <sys/var.h>
     35 #include <sys/thread.h>
     36 #include <sys/cpuvar.h>
     37 #include <sys/kstat.h>
     38 #include <sys/uadmin.h>
     39 #include <sys/systm.h>
     40 #include <sys/errno.h>
     41 #include <sys/cmn_err.h>
     42 #include <sys/procset.h>
     43 #include <sys/processor.h>
     44 #include <sys/debug.h>
     45 #include <sys/cpupart.h>
     46 #include <sys/lgrp.h>
     47 #include <sys/pset.h>
     48 #include <sys/pghw.h>
     49 #include <sys/kmem.h>
     50 #include <sys/kmem_impl.h>	/* to set per-cpu kmem_cache offset */
     51 #include <sys/atomic.h>
     52 #include <sys/callb.h>
     53 #include <sys/vtrace.h>
     54 #include <sys/cyclic.h>
     55 #include <sys/bitmap.h>
     56 #include <sys/nvpair.h>
     57 #include <sys/pool_pset.h>
     58 #include <sys/msacct.h>
     59 #include <sys/time.h>
     60 #include <sys/archsystm.h>
     61 #if defined(__x86)
     62 #include <sys/x86_archext.h>
     63 #endif
     64 
     65 extern int	mp_cpu_start(cpu_t *);
     66 extern int	mp_cpu_stop(cpu_t *);
     67 extern int	mp_cpu_poweron(cpu_t *);
     68 extern int	mp_cpu_poweroff(cpu_t *);
     69 extern int	mp_cpu_configure(int);
     70 extern int	mp_cpu_unconfigure(int);
     71 extern void	mp_cpu_faulted_enter(cpu_t *);
     72 extern void	mp_cpu_faulted_exit(cpu_t *);
     73 
     74 extern int cmp_cpu_to_chip(processorid_t cpuid);
     75 #ifdef __sparcv9
     76 extern char *cpu_fru_fmri(cpu_t *cp);
     77 #endif
     78 
     79 static void cpu_add_active_internal(cpu_t *cp);
     80 static void cpu_remove_active(cpu_t *cp);
     81 static void cpu_info_kstat_create(cpu_t *cp);
     82 static void cpu_info_kstat_destroy(cpu_t *cp);
     83 static void cpu_stats_kstat_create(cpu_t *cp);
     84 static void cpu_stats_kstat_destroy(cpu_t *cp);
     85 
     86 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw);
     87 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw);
     88 static int cpu_stat_ks_update(kstat_t *ksp, int rw);
     89 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t);
     90 
     91 /*
     92  * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active,
     93  * and dispatch queue reallocations.  The lock ordering with respect to
     94  * related locks is:
     95  *
     96  *	cpu_lock --> thread_free_lock  --->  p_lock  --->  thread_lock()
     97  *
     98  * Warning:  Certain sections of code do not use the cpu_lock when
     99  * traversing the cpu_list (e.g. mutex_vector_enter(), clock()).  Since
    100  * all cpus are paused during modifications to this list, a solution
    101  * to protect the list is too either disable kernel preemption while
    102  * walking the list, *or* recheck the cpu_next pointer at each
    103  * iteration in the loop.  Note that in no cases can any cached
    104  * copies of the cpu pointers be kept as they may become invalid.
    105  */
    106 kmutex_t	cpu_lock;
    107 cpu_t		*cpu_list;		/* list of all CPUs */
    108 cpu_t		*clock_cpu_list;	/* used by clock to walk CPUs */
    109 cpu_t		*cpu_active;		/* list of active CPUs */
    110 static cpuset_t	cpu_available;		/* set of available CPUs */
    111 cpuset_t	cpu_seqid_inuse;	/* which cpu_seqids are in use */
    112 
    113 /*
    114  * max_ncpus keeps the max cpus the system can have. Initially
    115  * it's NCPU, but since most archs scan the devtree for cpus
    116  * fairly early on during boot, the real max can be known before
    117  * ncpus is set (useful for early NCPU based allocations).
    118  */
    119 int max_ncpus = NCPU;
    120 /*
    121  * platforms that set max_ncpus to maxiumum number of cpus that can be
    122  * dynamically added will set boot_max_ncpus to the number of cpus found
    123  * at device tree scan time during boot.
    124  */
    125 int boot_max_ncpus = -1;
    126 /*
    127  * Maximum possible CPU id.  This can never be >= NCPU since NCPU is
    128  * used to size arrays that are indexed by CPU id.
    129  */
    130 processorid_t max_cpuid = NCPU - 1;
    131 
    132 int ncpus = 1;
    133 int ncpus_online = 1;
    134 
    135 /*
    136  * CPU that we're trying to offline.  Protected by cpu_lock.
    137  */
    138 cpu_t *cpu_inmotion;
    139 
    140 /*
    141  * Can be raised to suppress further weakbinding, which are instead
    142  * satisfied by disabling preemption.  Must be raised/lowered under cpu_lock,
    143  * while individual thread weakbinding synchronisation is done under thread
    144  * lock.
    145  */
    146 int weakbindingbarrier;
    147 
    148 /*
    149  * Variables used in pause_cpus().
    150  */
    151 static volatile char safe_list[NCPU];
    152 
    153 static struct _cpu_pause_info {
    154 	int		cp_spl;		/* spl saved in pause_cpus() */
    155 	volatile int	cp_go;		/* Go signal sent after all ready */
    156 	int		cp_count;	/* # of CPUs to pause */
    157 	ksema_t		cp_sem;		/* synch pause_cpus & cpu_pause */
    158 	kthread_id_t	cp_paused;
    159 } cpu_pause_info;
    160 
    161 static kmutex_t pause_free_mutex;
    162 static kcondvar_t pause_free_cv;
    163 
    164 void *(*cpu_pause_func)(void *) = NULL;
    165 
    166 
    167 static struct cpu_sys_stats_ks_data {
    168 	kstat_named_t cpu_ticks_idle;
    169 	kstat_named_t cpu_ticks_user;
    170 	kstat_named_t cpu_ticks_kernel;
    171 	kstat_named_t cpu_ticks_wait;
    172 	kstat_named_t cpu_nsec_idle;
    173 	kstat_named_t cpu_nsec_user;
    174 	kstat_named_t cpu_nsec_kernel;
    175 	kstat_named_t cpu_nsec_intr;
    176 	kstat_named_t cpu_load_intr;
    177 	kstat_named_t wait_ticks_io;
    178 	kstat_named_t bread;
    179 	kstat_named_t bwrite;
    180 	kstat_named_t lread;
    181 	kstat_named_t lwrite;
    182 	kstat_named_t phread;
    183 	kstat_named_t phwrite;
    184 	kstat_named_t pswitch;
    185 	kstat_named_t trap;
    186 	kstat_named_t intr;
    187 	kstat_named_t syscall;
    188 	kstat_named_t sysread;
    189 	kstat_named_t syswrite;
    190 	kstat_named_t sysfork;
    191 	kstat_named_t sysvfork;
    192 	kstat_named_t sysexec;
    193 	kstat_named_t readch;
    194 	kstat_named_t writech;
    195 	kstat_named_t rcvint;
    196 	kstat_named_t xmtint;
    197 	kstat_named_t mdmint;
    198 	kstat_named_t rawch;
    199 	kstat_named_t canch;
    200 	kstat_named_t outch;
    201 	kstat_named_t msg;
    202 	kstat_named_t sema;
    203 	kstat_named_t namei;
    204 	kstat_named_t ufsiget;
    205 	kstat_named_t ufsdirblk;
    206 	kstat_named_t ufsipage;
    207 	kstat_named_t ufsinopage;
    208 	kstat_named_t procovf;
    209 	kstat_named_t intrthread;
    210 	kstat_named_t intrblk;
    211 	kstat_named_t intrunpin;
    212 	kstat_named_t idlethread;
    213 	kstat_named_t inv_swtch;
    214 	kstat_named_t nthreads;
    215 	kstat_named_t cpumigrate;
    216 	kstat_named_t xcalls;
    217 	kstat_named_t mutex_adenters;
    218 	kstat_named_t rw_rdfails;
    219 	kstat_named_t rw_wrfails;
    220 	kstat_named_t modload;
    221 	kstat_named_t modunload;
    222 	kstat_named_t bawrite;
    223 	kstat_named_t iowait;
    224 } cpu_sys_stats_ks_data_template = {
    225 	{ "cpu_ticks_idle", 	KSTAT_DATA_UINT64 },
    226 	{ "cpu_ticks_user", 	KSTAT_DATA_UINT64 },
    227 	{ "cpu_ticks_kernel", 	KSTAT_DATA_UINT64 },
    228 	{ "cpu_ticks_wait", 	KSTAT_DATA_UINT64 },
    229 	{ "cpu_nsec_idle",	KSTAT_DATA_UINT64 },
    230 	{ "cpu_nsec_user",	KSTAT_DATA_UINT64 },
    231 	{ "cpu_nsec_kernel",	KSTAT_DATA_UINT64 },
    232 	{ "cpu_nsec_intr",	KSTAT_DATA_UINT64 },
    233 	{ "cpu_load_intr",	KSTAT_DATA_UINT64 },
    234 	{ "wait_ticks_io", 	KSTAT_DATA_UINT64 },
    235 	{ "bread", 		KSTAT_DATA_UINT64 },
    236 	{ "bwrite", 		KSTAT_DATA_UINT64 },
    237 	{ "lread", 		KSTAT_DATA_UINT64 },
    238 	{ "lwrite", 		KSTAT_DATA_UINT64 },
    239 	{ "phread", 		KSTAT_DATA_UINT64 },
    240 	{ "phwrite", 		KSTAT_DATA_UINT64 },
    241 	{ "pswitch", 		KSTAT_DATA_UINT64 },
    242 	{ "trap", 		KSTAT_DATA_UINT64 },
    243 	{ "intr", 		KSTAT_DATA_UINT64 },
    244 	{ "syscall", 		KSTAT_DATA_UINT64 },
    245 	{ "sysread", 		KSTAT_DATA_UINT64 },
    246 	{ "syswrite", 		KSTAT_DATA_UINT64 },
    247 	{ "sysfork", 		KSTAT_DATA_UINT64 },
    248 	{ "sysvfork", 		KSTAT_DATA_UINT64 },
    249 	{ "sysexec", 		KSTAT_DATA_UINT64 },
    250 	{ "readch", 		KSTAT_DATA_UINT64 },
    251 	{ "writech", 		KSTAT_DATA_UINT64 },
    252 	{ "rcvint", 		KSTAT_DATA_UINT64 },
    253 	{ "xmtint", 		KSTAT_DATA_UINT64 },
    254 	{ "mdmint", 		KSTAT_DATA_UINT64 },
    255 	{ "rawch", 		KSTAT_DATA_UINT64 },
    256 	{ "canch", 		KSTAT_DATA_UINT64 },
    257 	{ "outch", 		KSTAT_DATA_UINT64 },
    258 	{ "msg", 		KSTAT_DATA_UINT64 },
    259 	{ "sema", 		KSTAT_DATA_UINT64 },
    260 	{ "namei", 		KSTAT_DATA_UINT64 },
    261 	{ "ufsiget", 		KSTAT_DATA_UINT64 },
    262 	{ "ufsdirblk", 		KSTAT_DATA_UINT64 },
    263 	{ "ufsipage", 		KSTAT_DATA_UINT64 },
    264 	{ "ufsinopage", 	KSTAT_DATA_UINT64 },
    265 	{ "procovf", 		KSTAT_DATA_UINT64 },
    266 	{ "intrthread", 	KSTAT_DATA_UINT64 },
    267 	{ "intrblk", 		KSTAT_DATA_UINT64 },
    268 	{ "intrunpin",		KSTAT_DATA_UINT64 },
    269 	{ "idlethread", 	KSTAT_DATA_UINT64 },
    270 	{ "inv_swtch", 		KSTAT_DATA_UINT64 },
    271 	{ "nthreads", 		KSTAT_DATA_UINT64 },
    272 	{ "cpumigrate", 	KSTAT_DATA_UINT64 },
    273 	{ "xcalls", 		KSTAT_DATA_UINT64 },
    274 	{ "mutex_adenters", 	KSTAT_DATA_UINT64 },
    275 	{ "rw_rdfails", 	KSTAT_DATA_UINT64 },
    276 	{ "rw_wrfails", 	KSTAT_DATA_UINT64 },
    277 	{ "modload", 		KSTAT_DATA_UINT64 },
    278 	{ "modunload", 		KSTAT_DATA_UINT64 },
    279 	{ "bawrite", 		KSTAT_DATA_UINT64 },
    280 	{ "iowait",		KSTAT_DATA_UINT64 },
    281 };
    282 
    283 static struct cpu_vm_stats_ks_data {
    284 	kstat_named_t pgrec;
    285 	kstat_named_t pgfrec;
    286 	kstat_named_t pgin;
    287 	kstat_named_t pgpgin;
    288 	kstat_named_t pgout;
    289 	kstat_named_t pgpgout;
    290 	kstat_named_t swapin;
    291 	kstat_named_t pgswapin;
    292 	kstat_named_t swapout;
    293 	kstat_named_t pgswapout;
    294 	kstat_named_t zfod;
    295 	kstat_named_t dfree;
    296 	kstat_named_t scan;
    297 	kstat_named_t rev;
    298 	kstat_named_t hat_fault;
    299 	kstat_named_t as_fault;
    300 	kstat_named_t maj_fault;
    301 	kstat_named_t cow_fault;
    302 	kstat_named_t prot_fault;
    303 	kstat_named_t softlock;
    304 	kstat_named_t kernel_asflt;
    305 	kstat_named_t pgrrun;
    306 	kstat_named_t execpgin;
    307 	kstat_named_t execpgout;
    308 	kstat_named_t execfree;
    309 	kstat_named_t anonpgin;
    310 	kstat_named_t anonpgout;
    311 	kstat_named_t anonfree;
    312 	kstat_named_t fspgin;
    313 	kstat_named_t fspgout;
    314 	kstat_named_t fsfree;
    315 } cpu_vm_stats_ks_data_template = {
    316 	{ "pgrec",		KSTAT_DATA_UINT64 },
    317 	{ "pgfrec",		KSTAT_DATA_UINT64 },
    318 	{ "pgin",		KSTAT_DATA_UINT64 },
    319 	{ "pgpgin",		KSTAT_DATA_UINT64 },
    320 	{ "pgout",		KSTAT_DATA_UINT64 },
    321 	{ "pgpgout",		KSTAT_DATA_UINT64 },
    322 	{ "swapin",		KSTAT_DATA_UINT64 },
    323 	{ "pgswapin",		KSTAT_DATA_UINT64 },
    324 	{ "swapout",		KSTAT_DATA_UINT64 },
    325 	{ "pgswapout",		KSTAT_DATA_UINT64 },
    326 	{ "zfod",		KSTAT_DATA_UINT64 },
    327 	{ "dfree",		KSTAT_DATA_UINT64 },
    328 	{ "scan",		KSTAT_DATA_UINT64 },
    329 	{ "rev",		KSTAT_DATA_UINT64 },
    330 	{ "hat_fault",		KSTAT_DATA_UINT64 },
    331 	{ "as_fault",		KSTAT_DATA_UINT64 },
    332 	{ "maj_fault",		KSTAT_DATA_UINT64 },
    333 	{ "cow_fault",		KSTAT_DATA_UINT64 },
    334 	{ "prot_fault",		KSTAT_DATA_UINT64 },
    335 	{ "softlock",		KSTAT_DATA_UINT64 },
    336 	{ "kernel_asflt",	KSTAT_DATA_UINT64 },
    337 	{ "pgrrun",		KSTAT_DATA_UINT64 },
    338 	{ "execpgin",		KSTAT_DATA_UINT64 },
    339 	{ "execpgout",		KSTAT_DATA_UINT64 },
    340 	{ "execfree",		KSTAT_DATA_UINT64 },
    341 	{ "anonpgin",		KSTAT_DATA_UINT64 },
    342 	{ "anonpgout",		KSTAT_DATA_UINT64 },
    343 	{ "anonfree",		KSTAT_DATA_UINT64 },
    344 	{ "fspgin",		KSTAT_DATA_UINT64 },
    345 	{ "fspgout",		KSTAT_DATA_UINT64 },
    346 	{ "fsfree",		KSTAT_DATA_UINT64 },
    347 };
    348 
    349 /*
    350  * Force the specified thread to migrate to the appropriate processor.
    351  * Called with thread lock held, returns with it dropped.
    352  */
    353 static void
    354 force_thread_migrate(kthread_id_t tp)
    355 {
    356 	ASSERT(THREAD_LOCK_HELD(tp));
    357 	if (tp == curthread) {
    358 		THREAD_TRANSITION(tp);
    359 		CL_SETRUN(tp);
    360 		thread_unlock_nopreempt(tp);
    361 		swtch();
    362 	} else {
    363 		if (tp->t_state == TS_ONPROC) {
    364 			cpu_surrender(tp);
    365 		} else if (tp->t_state == TS_RUN) {
    366 			(void) dispdeq(tp);
    367 			setbackdq(tp);
    368 		}
    369 		thread_unlock(tp);
    370 	}
    371 }
    372 
    373 /*
    374  * Set affinity for a specified CPU.
    375  * A reference count is incremented and the affinity is held until the
    376  * reference count is decremented to zero by thread_affinity_clear().
    377  * This is so regions of code requiring affinity can be nested.
    378  * Caller needs to ensure that cpu_id remains valid, which can be
    379  * done by holding cpu_lock across this call, unless the caller
    380  * specifies CPU_CURRENT in which case the cpu_lock will be acquired
    381  * by thread_affinity_set and CPU->cpu_id will be the target CPU.
    382  */
    383 void
    384 thread_affinity_set(kthread_id_t t, int cpu_id)
    385 {
    386 	cpu_t		*cp;
    387 	int		c;
    388 
    389 	ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL));
    390 
    391 	if ((c = cpu_id) == CPU_CURRENT) {
    392 		mutex_enter(&cpu_lock);
    393 		cpu_id = CPU->cpu_id;
    394 	}
    395 	/*
    396 	 * We should be asserting that cpu_lock is held here, but
    397 	 * the NCA code doesn't acquire it.  The following assert
    398 	 * should be uncommented when the NCA code is fixed.
    399 	 *
    400 	 * ASSERT(MUTEX_HELD(&cpu_lock));
    401 	 */
    402 	ASSERT((cpu_id >= 0) && (cpu_id < NCPU));
    403 	cp = cpu[cpu_id];
    404 	ASSERT(cp != NULL);		/* user must provide a good cpu_id */
    405 	/*
    406 	 * If there is already a hard affinity requested, and this affinity
    407 	 * conflicts with that, panic.
    408 	 */
    409 	thread_lock(t);
    410 	if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) {
    411 		panic("affinity_set: setting %p but already bound to %p",
    412 		    (void *)cp, (void *)t->t_bound_cpu);
    413 	}
    414 	t->t_affinitycnt++;
    415 	t->t_bound_cpu = cp;
    416 
    417 	/*
    418 	 * Make sure we're running on the right CPU.
    419 	 */
    420 	if (cp != t->t_cpu || t != curthread) {
    421 		force_thread_migrate(t);	/* drops thread lock */
    422 	} else {
    423 		thread_unlock(t);
    424 	}
    425 
    426 	if (c == CPU_CURRENT)
    427 		mutex_exit(&cpu_lock);
    428 }
    429 
    430 /*
    431  *	Wrapper for backward compatibility.
    432  */
    433 void
    434 affinity_set(int cpu_id)
    435 {
    436 	thread_affinity_set(curthread, cpu_id);
    437 }
    438 
    439 /*
    440  * Decrement the affinity reservation count and if it becomes zero,
    441  * clear the CPU affinity for the current thread, or set it to the user's
    442  * software binding request.
    443  */
    444 void
    445 thread_affinity_clear(kthread_id_t t)
    446 {
    447 	register processorid_t binding;
    448 
    449 	thread_lock(t);
    450 	if (--t->t_affinitycnt == 0) {
    451 		if ((binding = t->t_bind_cpu) == PBIND_NONE) {
    452 			/*
    453 			 * Adjust disp_max_unbound_pri if necessary.
    454 			 */
    455 			disp_adjust_unbound_pri(t);
    456 			t->t_bound_cpu = NULL;
    457 			if (t->t_cpu->cpu_part != t->t_cpupart) {
    458 				force_thread_migrate(t);
    459 				return;
    460 			}
    461 		} else {
    462 			t->t_bound_cpu = cpu[binding];
    463 			/*
    464 			 * Make sure the thread is running on the bound CPU.
    465 			 */
    466 			if (t->t_cpu != t->t_bound_cpu) {
    467 				force_thread_migrate(t);
    468 				return;		/* already dropped lock */
    469 			}
    470 		}
    471 	}
    472 	thread_unlock(t);
    473 }
    474 
    475 /*
    476  * Wrapper for backward compatibility.
    477  */
    478 void
    479 affinity_clear(void)
    480 {
    481 	thread_affinity_clear(curthread);
    482 }
    483 
    484 /*
    485  * Weak cpu affinity.  Bind to the "current" cpu for short periods
    486  * of time during which the thread must not block (but may be preempted).
    487  * Use this instead of kpreempt_disable() when it is only "no migration"
    488  * rather than "no preemption" semantics that are required - disabling
    489  * preemption holds higher priority threads off of cpu and if the
    490  * operation that is protected is more than momentary this is not good
    491  * for realtime etc.
    492  *
    493  * Weakly bound threads will not prevent a cpu from being offlined -
    494  * we'll only run them on the cpu to which they are weakly bound but
    495  * (because they do not block) we'll always be able to move them on to
    496  * another cpu at offline time if we give them just a short moment to
    497  * run during which they will unbind.  To give a cpu a chance of offlining,
    498  * however, we require a barrier to weak bindings that may be raised for a
    499  * given cpu (offline/move code may set this and then wait a short time for
    500  * existing weak bindings to drop); the cpu_inmotion pointer is that barrier.
    501  *
    502  * There are few restrictions on the calling context of thread_nomigrate.
    503  * The caller must not hold the thread lock.  Calls may be nested.
    504  *
    505  * After weakbinding a thread must not perform actions that may block.
    506  * In particular it must not call thread_affinity_set; calling that when
    507  * already weakbound is nonsensical anyway.
    508  *
    509  * If curthread is prevented from migrating for other reasons
    510  * (kernel preemption disabled; high pil; strongly bound; interrupt thread)
    511  * then the weak binding will succeed even if this cpu is the target of an
    512  * offline/move request.
    513  */
    514 void
    515 thread_nomigrate(void)
    516 {
    517 	cpu_t *cp;
    518 	kthread_id_t t = curthread;
    519 
    520 again:
    521 	kpreempt_disable();
    522 	cp = CPU;
    523 
    524 	/*
    525 	 * A highlevel interrupt must not modify t_nomigrate or
    526 	 * t_weakbound_cpu of the thread it has interrupted.  A lowlevel
    527 	 * interrupt thread cannot migrate and we can avoid the
    528 	 * thread_lock call below by short-circuiting here.  In either
    529 	 * case we can just return since no migration is possible and
    530 	 * the condition will persist (ie, when we test for these again
    531 	 * in thread_allowmigrate they can't have changed).   Migration
    532 	 * is also impossible if we're at or above DISP_LEVEL pil.
    533 	 */
    534 	if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD ||
    535 	    getpil() >= DISP_LEVEL) {
    536 		kpreempt_enable();
    537 		return;
    538 	}
    539 
    540 	/*
    541 	 * We must be consistent with existing weak bindings.  Since we
    542 	 * may be interrupted between the increment of t_nomigrate and
    543 	 * the store to t_weakbound_cpu below we cannot assume that
    544 	 * t_weakbound_cpu will be set if t_nomigrate is.  Note that we
    545 	 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not
    546 	 * always the case.
    547 	 */
    548 	if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) {
    549 		if (!panicstr)
    550 			panic("thread_nomigrate: binding to %p but already "
    551 			    "bound to %p", (void *)cp,
    552 			    (void *)t->t_weakbound_cpu);
    553 	}
    554 
    555 	/*
    556 	 * At this point we have preemption disabled and we don't yet hold
    557 	 * the thread lock.  So it's possible that somebody else could
    558 	 * set t_bind_cpu here and not be able to force us across to the
    559 	 * new cpu (since we have preemption disabled).
    560 	 */
    561 	thread_lock(curthread);
    562 
    563 	/*
    564 	 * If further weak bindings are being (temporarily) suppressed then
    565 	 * we'll settle for disabling kernel preemption (which assures
    566 	 * no migration provided the thread does not block which it is
    567 	 * not allowed to if using thread_nomigrate).  We must remember
    568 	 * this disposition so we can take appropriate action in
    569 	 * thread_allowmigrate.  If this is a nested call and the
    570 	 * thread is already weakbound then fall through as normal.
    571 	 * We remember the decision to settle for kpreempt_disable through
    572 	 * negative nesting counting in t_nomigrate.  Once a thread has had one
    573 	 * weakbinding request satisfied in this way any further (nested)
    574 	 * requests will continue to be satisfied in the same way,
    575 	 * even if weak bindings have recommenced.
    576 	 */
    577 	if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) {
    578 		--t->t_nomigrate;
    579 		thread_unlock(curthread);
    580 		return;		/* with kpreempt_disable still active */
    581 	}
    582 
    583 	/*
    584 	 * We hold thread_lock so t_bind_cpu cannot change.  We could,
    585 	 * however, be running on a different cpu to which we are t_bound_cpu
    586 	 * to (as explained above).  If we grant the weak binding request
    587 	 * in that case then the dispatcher must favour our weak binding
    588 	 * over our strong (in which case, just as when preemption is
    589 	 * disabled, we can continue to run on a cpu other than the one to
    590 	 * which we are strongbound; the difference in this case is that
    591 	 * this thread can be preempted and so can appear on the dispatch
    592 	 * queues of a cpu other than the one it is strongbound to).
    593 	 *
    594 	 * If the cpu we are running on does not appear to be a current
    595 	 * offline target (we check cpu_inmotion to determine this - since
    596 	 * we don't hold cpu_lock we may not see a recent store to that,
    597 	 * so it's possible that we at times can grant a weak binding to a
    598 	 * cpu that is an offline target, but that one request will not
    599 	 * prevent the offline from succeeding) then we will always grant
    600 	 * the weak binding request.  This includes the case above where
    601 	 * we grant a weakbinding not commensurate with our strong binding.
    602 	 *
    603 	 * If our cpu does appear to be an offline target then we're inclined
    604 	 * not to grant the weakbinding request just yet - we'd prefer to
    605 	 * migrate to another cpu and grant the request there.  The
    606 	 * exceptions are those cases where going through preemption code
    607 	 * will not result in us changing cpu:
    608 	 *
    609 	 *	. interrupts have already bypassed this case (see above)
    610 	 *	. we are already weakbound to this cpu (dispatcher code will
    611 	 *	  always return us to the weakbound cpu)
    612 	 *	. preemption was disabled even before we disabled it above
    613 	 *	. we are strongbound to this cpu (if we're strongbound to
    614 	 *	another and not yet running there the trip through the
    615 	 *	dispatcher will move us to the strongbound cpu and we
    616 	 *	will grant the weak binding there)
    617 	 */
    618 	if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 ||
    619 	    t->t_bound_cpu == cp) {
    620 		/*
    621 		 * Don't be tempted to store to t_weakbound_cpu only on
    622 		 * the first nested bind request - if we're interrupted
    623 		 * after the increment of t_nomigrate and before the
    624 		 * store to t_weakbound_cpu and the interrupt calls
    625 		 * thread_nomigrate then the assertion in thread_allowmigrate
    626 		 * would fail.
    627 		 */
    628 		t->t_nomigrate++;
    629 		t->t_weakbound_cpu = cp;
    630 		membar_producer();
    631 		thread_unlock(curthread);
    632 		/*
    633 		 * Now that we have dropped the thread_lock another thread
    634 		 * can set our t_weakbound_cpu, and will try to migrate us
    635 		 * to the strongbound cpu (which will not be prevented by
    636 		 * preemption being disabled since we're about to enable
    637 		 * preemption).  We have granted the weakbinding to the current
    638 		 * cpu, so again we are in the position that is is is possible
    639 		 * that our weak and strong bindings differ.  Again this
    640 		 * is catered for by dispatcher code which will favour our
    641 		 * weak binding.
    642 		 */
    643 		kpreempt_enable();
    644 	} else {
    645 		/*
    646 		 * Move to another cpu before granting the request by
    647 		 * forcing this thread through preemption code.  When we
    648 		 * get to set{front,back}dq called from CL_PREEMPT()
    649 		 * cpu_choose() will be used to select a cpu to queue
    650 		 * us on - that will see cpu_inmotion and take
    651 		 * steps to avoid returning us to this cpu.
    652 		 */
    653 		cp->cpu_kprunrun = 1;
    654 		thread_unlock(curthread);
    655 		kpreempt_enable();	/* will call preempt() */
    656 		goto again;
    657 	}
    658 }
    659 
    660 void
    661 thread_allowmigrate(void)
    662 {
    663 	kthread_id_t t = curthread;
    664 
    665 	ASSERT(t->t_weakbound_cpu == CPU ||
    666 	    (t->t_nomigrate < 0 && t->t_preempt > 0) ||
    667 	    CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD ||
    668 	    getpil() >= DISP_LEVEL);
    669 
    670 	if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) ||
    671 	    getpil() >= DISP_LEVEL)
    672 		return;
    673 
    674 	if (t->t_nomigrate < 0) {
    675 		/*
    676 		 * This thread was granted "weak binding" in the
    677 		 * stronger form of kernel preemption disabling.
    678 		 * Undo a level of nesting for both t_nomigrate
    679 		 * and t_preempt.
    680 		 */
    681 		++t->t_nomigrate;
    682 		kpreempt_enable();
    683 	} else if (--t->t_nomigrate == 0) {
    684 		/*
    685 		 * Time to drop the weak binding.  We need to cater
    686 		 * for the case where we're weakbound to a different
    687 		 * cpu than that to which we're strongbound (a very
    688 		 * temporary arrangement that must only persist until
    689 		 * weak binding drops).  We don't acquire thread_lock
    690 		 * here so even as this code executes t_bound_cpu
    691 		 * may be changing.  So we disable preemption and
    692 		 * a) in the case that t_bound_cpu changes while we
    693 		 * have preemption disabled kprunrun will be set
    694 		 * asynchronously, and b) if before disabling
    695 		 * preemption we were already on a different cpu to
    696 		 * our t_bound_cpu then we set kprunrun ourselves
    697 		 * to force a trip through the dispatcher when
    698 		 * preemption is enabled.
    699 		 */
    700 		kpreempt_disable();
    701 		if (t->t_bound_cpu &&
    702 		    t->t_weakbound_cpu != t->t_bound_cpu)
    703 			CPU->cpu_kprunrun = 1;
    704 		t->t_weakbound_cpu = NULL;
    705 		membar_producer();
    706 		kpreempt_enable();
    707 	}
    708 }
    709 
    710 /*
    711  * weakbinding_stop can be used to temporarily cause weakbindings made
    712  * with thread_nomigrate to be satisfied through the stronger action of
    713  * kpreempt_disable.  weakbinding_start recommences normal weakbinding.
    714  */
    715 
    716 void
    717 weakbinding_stop(void)
    718 {
    719 	ASSERT(MUTEX_HELD(&cpu_lock));
    720 	weakbindingbarrier = 1;
    721 	membar_producer();	/* make visible before subsequent thread_lock */
    722 }
    723 
    724 void
    725 weakbinding_start(void)
    726 {
    727 	ASSERT(MUTEX_HELD(&cpu_lock));
    728 	weakbindingbarrier = 0;
    729 }
    730 
    731 /*
    732  * This routine is called to place the CPUs in a safe place so that
    733  * one of them can be taken off line or placed on line.  What we are
    734  * trying to do here is prevent a thread from traversing the list
    735  * of active CPUs while we are changing it or from getting placed on
    736  * the run queue of a CPU that has just gone off line.  We do this by
    737  * creating a thread with the highest possible prio for each CPU and
    738  * having it call this routine.  The advantage of this method is that
    739  * we can eliminate all checks for CPU_ACTIVE in the disp routines.
    740  * This makes disp faster at the expense of making p_online() slower
    741  * which is a good trade off.
    742  */
    743 static void
    744 cpu_pause(int index)
    745 {
    746 	int s;
    747 	struct _cpu_pause_info *cpi = &cpu_pause_info;
    748 	volatile char *safe = &safe_list[index];
    749 	long    lindex = index;
    750 
    751 	ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE));
    752 
    753 	while (*safe != PAUSE_DIE) {
    754 		*safe = PAUSE_READY;
    755 		membar_enter();		/* make sure stores are flushed */
    756 		sema_v(&cpi->cp_sem);	/* signal requesting thread */
    757 
    758 		/*
    759 		 * Wait here until all pause threads are running.  That
    760 		 * indicates that it's safe to do the spl.  Until
    761 		 * cpu_pause_info.cp_go is set, we don't want to spl
    762 		 * because that might block clock interrupts needed
    763 		 * to preempt threads on other CPUs.
    764 		 */
    765 		while (cpi->cp_go == 0)
    766 			;
    767 		/*
    768 		 * Even though we are at the highest disp prio, we need
    769 		 * to block out all interrupts below LOCK_LEVEL so that
    770 		 * an intr doesn't come in, wake up a thread, and call
    771 		 * setbackdq/setfrontdq.
    772 		 */
    773 		s = splhigh();
    774 		/*
    775 		 * if cpu_pause_func() has been set then call it using
    776 		 * index as the argument, currently only used by
    777 		 * cpr_suspend_cpus().  This function is used as the
    778 		 * code to execute on the "paused" cpu's when a machine
    779 		 * comes out of a sleep state and CPU's were powered off.
    780 		 * (could also be used for hotplugging CPU's).
    781 		 */
    782 		if (cpu_pause_func != NULL)
    783 			(*cpu_pause_func)((void *)lindex);
    784 
    785 		mach_cpu_pause(safe);
    786 
    787 		splx(s);
    788 		/*
    789 		 * Waiting is at an end. Switch out of cpu_pause
    790 		 * loop and resume useful work.
    791 		 */
    792 		swtch();
    793 	}
    794 
    795 	mutex_enter(&pause_free_mutex);
    796 	*safe = PAUSE_DEAD;
    797 	cv_broadcast(&pause_free_cv);
    798 	mutex_exit(&pause_free_mutex);
    799 }
    800 
    801 /*
    802  * Allow the cpus to start running again.
    803  */
    804 void
    805 start_cpus()
    806 {
    807 	int i;
    808 
    809 	ASSERT(MUTEX_HELD(&cpu_lock));
    810 	ASSERT(cpu_pause_info.cp_paused);
    811 	cpu_pause_info.cp_paused = NULL;
    812 	for (i = 0; i < NCPU; i++)
    813 		safe_list[i] = PAUSE_IDLE;
    814 	membar_enter();			/* make sure stores are flushed */
    815 	affinity_clear();
    816 	splx(cpu_pause_info.cp_spl);
    817 	kpreempt_enable();
    818 }
    819 
    820 /*
    821  * Allocate a pause thread for a CPU.
    822  */
    823 static void
    824 cpu_pause_alloc(cpu_t *cp)
    825 {
    826 	kthread_id_t	t;
    827 	long		cpun = cp->cpu_id;
    828 
    829 	/*
    830 	 * Note, v.v_nglobpris will not change value as long as I hold
    831 	 * cpu_lock.
    832 	 */
    833 	t = thread_create(NULL, 0, cpu_pause, (void *)cpun,
    834 	    0, &p0, TS_STOPPED, v.v_nglobpris - 1);
    835 	thread_lock(t);
    836 	t->t_bound_cpu = cp;
    837 	t->t_disp_queue = cp->cpu_disp;
    838 	t->t_affinitycnt = 1;
    839 	t->t_preempt = 1;
    840 	thread_unlock(t);
    841 	cp->cpu_pause_thread = t;
    842 	/*
    843 	 * Registering a thread in the callback table is usually done
    844 	 * in the initialization code of the thread.  In this
    845 	 * case, we do it right after thread creation because the
    846 	 * thread itself may never run, and we need to register the
    847 	 * fact that it is safe for cpr suspend.
    848 	 */
    849 	CALLB_CPR_INIT_SAFE(t, "cpu_pause");
    850 }
    851 
    852 /*
    853  * Free a pause thread for a CPU.
    854  */
    855 static void
    856 cpu_pause_free(cpu_t *cp)
    857 {
    858 	kthread_id_t	t;
    859 	int		cpun = cp->cpu_id;
    860 
    861 	ASSERT(MUTEX_HELD(&cpu_lock));
    862 	/*
    863 	 * We have to get the thread and tell him to die.
    864 	 */
    865 	if ((t = cp->cpu_pause_thread) == NULL) {
    866 		ASSERT(safe_list[cpun] == PAUSE_IDLE);
    867 		return;
    868 	}
    869 	thread_lock(t);
    870 	t->t_cpu = CPU;		/* disp gets upset if last cpu is quiesced. */
    871 	t->t_bound_cpu = NULL;	/* Must un-bind; cpu may not be running. */
    872 	t->t_pri = v.v_nglobpris - 1;
    873 	ASSERT(safe_list[cpun] == PAUSE_IDLE);
    874 	safe_list[cpun] = PAUSE_DIE;
    875 	THREAD_TRANSITION(t);
    876 	setbackdq(t);
    877 	thread_unlock_nopreempt(t);
    878 
    879 	/*
    880 	 * If we don't wait for the thread to actually die, it may try to
    881 	 * run on the wrong cpu as part of an actual call to pause_cpus().
    882 	 */
    883 	mutex_enter(&pause_free_mutex);
    884 	while (safe_list[cpun] != PAUSE_DEAD) {
    885 		cv_wait(&pause_free_cv, &pause_free_mutex);
    886 	}
    887 	mutex_exit(&pause_free_mutex);
    888 	safe_list[cpun] = PAUSE_IDLE;
    889 
    890 	cp->cpu_pause_thread = NULL;
    891 }
    892 
    893 /*
    894  * Initialize basic structures for pausing CPUs.
    895  */
    896 void
    897 cpu_pause_init()
    898 {
    899 	sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL);
    900 	/*
    901 	 * Create initial CPU pause thread.
    902 	 */
    903 	cpu_pause_alloc(CPU);
    904 }
    905 
    906 /*
    907  * Start the threads used to pause another CPU.
    908  */
    909 static int
    910 cpu_pause_start(processorid_t cpu_id)
    911 {
    912 	int	i;
    913 	int	cpu_count = 0;
    914 
    915 	for (i = 0; i < NCPU; i++) {
    916 		cpu_t		*cp;
    917 		kthread_id_t	t;
    918 
    919 		cp = cpu[i];
    920 		if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) {
    921 			safe_list[i] = PAUSE_WAIT;
    922 			continue;
    923 		}
    924 
    925 		/*
    926 		 * Skip CPU if it is quiesced or not yet started.
    927 		 */
    928 		if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) {
    929 			safe_list[i] = PAUSE_WAIT;
    930 			continue;
    931 		}
    932 
    933 		/*
    934 		 * Start this CPU's pause thread.
    935 		 */
    936 		t = cp->cpu_pause_thread;
    937 		thread_lock(t);
    938 		/*
    939 		 * Reset the priority, since nglobpris may have
    940 		 * changed since the thread was created, if someone
    941 		 * has loaded the RT (or some other) scheduling
    942 		 * class.
    943 		 */
    944 		t->t_pri = v.v_nglobpris - 1;
    945 		THREAD_TRANSITION(t);
    946 		setbackdq(t);
    947 		thread_unlock_nopreempt(t);
    948 		++cpu_count;
    949 	}
    950 	return (cpu_count);
    951 }
    952 
    953 
    954 /*
    955  * Pause all of the CPUs except the one we are on by creating a high
    956  * priority thread bound to those CPUs.
    957  *
    958  * Note that one must be extremely careful regarding code
    959  * executed while CPUs are paused.  Since a CPU may be paused
    960  * while a thread scheduling on that CPU is holding an adaptive
    961  * lock, code executed with CPUs paused must not acquire adaptive
    962  * (or low-level spin) locks.  Also, such code must not block,
    963  * since the thread that is supposed to initiate the wakeup may
    964  * never run.
    965  *
    966  * With a few exceptions, the restrictions on code executed with CPUs
    967  * paused match those for code executed at high-level interrupt
    968  * context.
    969  */
    970 void
    971 pause_cpus(cpu_t *off_cp)
    972 {
    973 	processorid_t	cpu_id;
    974 	int		i;
    975 	struct _cpu_pause_info	*cpi = &cpu_pause_info;
    976 
    977 	ASSERT(MUTEX_HELD(&cpu_lock));
    978 	ASSERT(cpi->cp_paused == NULL);
    979 	cpi->cp_count = 0;
    980 	cpi->cp_go = 0;
    981 	for (i = 0; i < NCPU; i++)
    982 		safe_list[i] = PAUSE_IDLE;
    983 	kpreempt_disable();
    984 
    985 	/*
    986 	 * If running on the cpu that is going offline, get off it.
    987 	 * This is so that it won't be necessary to rechoose a CPU
    988 	 * when done.
    989 	 */
    990 	if (CPU == off_cp)
    991 		cpu_id = off_cp->cpu_next_part->cpu_id;
    992 	else
    993 		cpu_id = CPU->cpu_id;
    994 	affinity_set(cpu_id);
    995 
    996 	/*
    997 	 * Start the pause threads and record how many were started
    998 	 */
    999 	cpi->cp_count = cpu_pause_start(cpu_id);
   1000 
   1001 	/*
   1002 	 * Now wait for all CPUs to be running the pause thread.
   1003 	 */
   1004 	while (cpi->cp_count > 0) {
   1005 		/*
   1006 		 * Spin reading the count without grabbing the disp
   1007 		 * lock to make sure we don't prevent the pause
   1008 		 * threads from getting the lock.
   1009 		 */
   1010 		while (sema_held(&cpi->cp_sem))
   1011 			;
   1012 		if (sema_tryp(&cpi->cp_sem))
   1013 			--cpi->cp_count;
   1014 	}
   1015 	cpi->cp_go = 1;			/* all have reached cpu_pause */
   1016 
   1017 	/*
   1018 	 * Now wait for all CPUs to spl. (Transition from PAUSE_READY
   1019 	 * to PAUSE_WAIT.)
   1020 	 */
   1021 	for (i = 0; i < NCPU; i++) {
   1022 		while (safe_list[i] != PAUSE_WAIT)
   1023 			;
   1024 	}
   1025 	cpi->cp_spl = splhigh();	/* block dispatcher on this CPU */
   1026 	cpi->cp_paused = curthread;
   1027 }
   1028 
   1029 /*
   1030  * Check whether the current thread has CPUs paused
   1031  */
   1032 int
   1033 cpus_paused(void)
   1034 {
   1035 	if (cpu_pause_info.cp_paused != NULL) {
   1036 		ASSERT(cpu_pause_info.cp_paused == curthread);
   1037 		return (1);
   1038 	}
   1039 	return (0);
   1040 }
   1041 
   1042 static cpu_t *
   1043 cpu_get_all(processorid_t cpun)
   1044 {
   1045 	ASSERT(MUTEX_HELD(&cpu_lock));
   1046 
   1047 	if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun))
   1048 		return (NULL);
   1049 	return (cpu[cpun]);
   1050 }
   1051 
   1052 /*
   1053  * Check whether cpun is a valid processor id and whether it should be
   1054  * visible from the current zone. If it is, return a pointer to the
   1055  * associated CPU structure.
   1056  */
   1057 cpu_t *
   1058 cpu_get(processorid_t cpun)
   1059 {
   1060 	cpu_t *c;
   1061 
   1062 	ASSERT(MUTEX_HELD(&cpu_lock));
   1063 	c = cpu_get_all(cpun);
   1064 	if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
   1065 	    zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c))
   1066 		return (NULL);
   1067 	return (c);
   1068 }
   1069 
   1070 /*
   1071  * The following functions should be used to check CPU states in the kernel.
   1072  * They should be invoked with cpu_lock held.  Kernel subsystems interested
   1073  * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc
   1074  * states.  Those are for user-land (and system call) use only.
   1075  */
   1076 
   1077 /*
   1078  * Determine whether the CPU is online and handling interrupts.
   1079  */
   1080 int
   1081 cpu_is_online(cpu_t *cpu)
   1082 {
   1083 	ASSERT(MUTEX_HELD(&cpu_lock));
   1084 	return (cpu_flagged_online(cpu->cpu_flags));
   1085 }
   1086 
   1087 /*
   1088  * Determine whether the CPU is offline (this includes spare and faulted).
   1089  */
   1090 int
   1091 cpu_is_offline(cpu_t *cpu)
   1092 {
   1093 	ASSERT(MUTEX_HELD(&cpu_lock));
   1094 	return (cpu_flagged_offline(cpu->cpu_flags));
   1095 }
   1096 
   1097 /*
   1098  * Determine whether the CPU is powered off.
   1099  */
   1100 int
   1101 cpu_is_poweredoff(cpu_t *cpu)
   1102 {
   1103 	ASSERT(MUTEX_HELD(&cpu_lock));
   1104 	return (cpu_flagged_poweredoff(cpu->cpu_flags));
   1105 }
   1106 
   1107 /*
   1108  * Determine whether the CPU is handling interrupts.
   1109  */
   1110 int
   1111 cpu_is_nointr(cpu_t *cpu)
   1112 {
   1113 	ASSERT(MUTEX_HELD(&cpu_lock));
   1114 	return (cpu_flagged_nointr(cpu->cpu_flags));
   1115 }
   1116 
   1117 /*
   1118  * Determine whether the CPU is active (scheduling threads).
   1119  */
   1120 int
   1121 cpu_is_active(cpu_t *cpu)
   1122 {
   1123 	ASSERT(MUTEX_HELD(&cpu_lock));
   1124 	return (cpu_flagged_active(cpu->cpu_flags));
   1125 }
   1126 
   1127 /*
   1128  * Same as above, but these require cpu_flags instead of cpu_t pointers.
   1129  */
   1130 int
   1131 cpu_flagged_online(cpu_flag_t cpu_flags)
   1132 {
   1133 	return (cpu_flagged_active(cpu_flags) &&
   1134 	    (cpu_flags & CPU_ENABLE));
   1135 }
   1136 
   1137 int
   1138 cpu_flagged_offline(cpu_flag_t cpu_flags)
   1139 {
   1140 	return (((cpu_flags & CPU_POWEROFF) == 0) &&
   1141 	    ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY));
   1142 }
   1143 
   1144 int
   1145 cpu_flagged_poweredoff(cpu_flag_t cpu_flags)
   1146 {
   1147 	return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF);
   1148 }
   1149 
   1150 int
   1151 cpu_flagged_nointr(cpu_flag_t cpu_flags)
   1152 {
   1153 	return (cpu_flagged_active(cpu_flags) &&
   1154 	    (cpu_flags & CPU_ENABLE) == 0);
   1155 }
   1156 
   1157 int
   1158 cpu_flagged_active(cpu_flag_t cpu_flags)
   1159 {
   1160 	return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) &&
   1161 	    ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY));
   1162 }
   1163 
   1164 /*
   1165  * Bring the indicated CPU online.
   1166  */
   1167 int
   1168 cpu_online(cpu_t *cp)
   1169 {
   1170 	int	error = 0;
   1171 
   1172 	/*
   1173 	 * Handle on-line request.
   1174 	 *	This code must put the new CPU on the active list before
   1175 	 *	starting it because it will not be paused, and will start
   1176 	 * 	using the active list immediately.  The real start occurs
   1177 	 *	when the CPU_QUIESCED flag is turned off.
   1178 	 */
   1179 
   1180 	ASSERT(MUTEX_HELD(&cpu_lock));
   1181 
   1182 	/*
   1183 	 * Put all the cpus into a known safe place.
   1184 	 * No mutexes can be entered while CPUs are paused.
   1185 	 */
   1186 	error = mp_cpu_start(cp);	/* arch-dep hook */
   1187 	if (error == 0) {
   1188 		pg_cpupart_in(cp, cp->cpu_part);
   1189 		pause_cpus(NULL);
   1190 		cpu_add_active_internal(cp);
   1191 		if (cp->cpu_flags & CPU_FAULTED) {
   1192 			cp->cpu_flags &= ~CPU_FAULTED;
   1193 			mp_cpu_faulted_exit(cp);
   1194 		}
   1195 		cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN |
   1196 		    CPU_SPARE);
   1197 		start_cpus();
   1198 		cpu_stats_kstat_create(cp);
   1199 		cpu_create_intrstat(cp);
   1200 		lgrp_kstat_create(cp);
   1201 		cpu_state_change_notify(cp->cpu_id, CPU_ON);
   1202 		cpu_intr_enable(cp);	/* arch-dep hook */
   1203 		cpu_set_state(cp);
   1204 		cyclic_online(cp);
   1205 		poke_cpu(cp->cpu_id);
   1206 	}