Home | History | Annotate | Download | only in disp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/systm.h>
     27 #include <sys/types.h>
     28 #include <sys/param.h>
     29 #include <sys/thread.h>
     30 #include <sys/cpuvar.h>
     31 #include <sys/cpupart.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/disp.h>
     34 #include <sys/group.h>
     35 #include <sys/bitset.h>
     36 #include <sys/lgrp.h>
     37 #include <sys/cmt.h>
     38 
     39 /*
     40  * CMT dispatcher policies
     41  *
     42  * This file implements CMT dispatching policies using Processor Groups.
     43  *
     44  * The scheduler/dispatcher leverages knowledge of the performance
     45  * relevant CMT sharing relationships existing between CPUs to implement
     46  * load balancing, and coalescence thread placement policies.
     47  *
     48  * Load balancing policy seeks to improve performance by minimizing
     49  * contention over shared processor resources / facilities. Coalescence
     50  * policies improve resource utilization and ultimately power efficiency.
     51  *
     52  * On NUMA systems, the dispatcher will generally perform load balancing and
     53  * coalescence within (and not across) lgroups. This is because there isn't
     54  * much sense in trying to correct an imbalance by sending a thread outside
     55  * of its home, if it would attempt to return home a short while later.
     56  * The dispatcher will implement CMT policy across lgroups however, if
     57  * it can do so with a thread homed to the root lgroup, since root homed
     58  * threads have no lgroup affinity.
     59  */
     60 
     61 /*
     62  * Return non-zero if, given the policy, we should migrate from running
     63  * somewhere "here" to somewhere "there".
     64  */
     65 static int
     66 cmt_should_migrate(pg_cmt_t *here, pg_cmt_t *there, pg_cmt_policy_t policy,
     67     int self)
     68 {
     69 	uint32_t here_util, there_util;
     70 
     71 	here_util = here->cmt_utilization;
     72 	there_util = there->cmt_utilization;
     73 
     74 	/*
     75 	 * This assumes that curthread's utilization is "1"
     76 	 */
     77 	if (self && bitset_in_set(&here->cmt_cpus_actv_set, CPU->cpu_seqid))
     78 		here_util--;	/* Ignore curthread's effect */
     79 
     80 	/*
     81 	 * Load balancing and coalescence are conflicting policies
     82 	 */
     83 	ASSERT((policy & (CMT_BALANCE|CMT_COALESCE)) !=
     84 	    (CMT_BALANCE|CMT_COALESCE));
     85 
     86 	if (policy & CMT_BALANCE) {
     87 		/*
     88 		 * Balance utilization
     89 		 *
     90 		 * If the target is comparatively underutilized
     91 		 * (either in an absolute sense, or scaled by capacity),
     92 		 * then choose to balance.
     93 		 */
     94 		if ((here_util > there_util) ||
     95 		    (here_util == there_util &&
     96 		    (CMT_CAPACITY(there) > CMT_CAPACITY(here)))) {
     97 			return (1);
     98 		}
     99 	} else if (policy & CMT_COALESCE) {
    100 		/*
    101 		 * Attempt to drive group utilization up to capacity
    102 		 */
    103 		if (there_util > here_util &&
    104 		    there_util < CMT_CAPACITY(there))
    105 			return (1);
    106 	}
    107 	return (0);
    108 }
    109 
    110 /*
    111  * Perform multi-level CMT load balancing of running threads.
    112  *
    113  * tp is the thread being enqueued.
    114  * cp is a hint CPU, against which CMT load balancing will be performed.
    115  *
    116  * Returns cp, or a CPU better than cp with respect to balancing
    117  * running thread load.
    118  */
    119 cpu_t *
    120 cmt_balance(kthread_t *tp, cpu_t *cp)
    121 {
    122 	int		hint, i, cpu, nsiblings;
    123 	int		self = 0;
    124 	group_t		*cmt_pgs, *siblings;
    125 	pg_cmt_t	*pg, *pg_tmp, *tpg = NULL;
    126 	int		level = 0;
    127 	cpu_t		*newcp;
    128 	extern cmt_lgrp_t *cmt_root;
    129 
    130 	ASSERT(THREAD_LOCK_HELD(tp));
    131 
    132 	cmt_pgs = &cp->cpu_pg->cmt_pgs;
    133 
    134 	if (GROUP_SIZE(cmt_pgs) == 0)
    135 		return (cp);	/* nothing to do */
    136 
    137 	if (tp == curthread)
    138 		self = 1;
    139 
    140 	/*
    141 	 * Balance across siblings in the CPUs CMT lineage
    142 	 * If the thread is homed to the root lgroup, perform
    143 	 * top level balancing against other top level PGs
    144 	 * in the system. Otherwise, start with the default
    145 	 * top level siblings group, which is within the leaf lgroup
    146 	 */
    147 	pg = GROUP_ACCESS(cmt_pgs, level);
    148 	if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID)
    149 		siblings = &cmt_root->cl_pgs;
    150 	else
    151 		siblings = pg->cmt_siblings;
    152 
    153 	/*
    154 	 * Traverse down the lineage until we find a level that needs
    155 	 * balancing, or we get to the end.
    156 	 */
    157 	for (;;) {
    158 		nsiblings = GROUP_SIZE(siblings);	/* self inclusive */
    159 		if (nsiblings == 1)
    160 			goto next_level;
    161 
    162 		hint = CPU_PSEUDO_RANDOM() % nsiblings;
    163 
    164 		/*
    165 		 * Find a balancing candidate from among our siblings
    166 		 * "hint" is a hint for where to start looking
    167 		 */
    168 		i = hint;
    169 		do {
    170 			ASSERT(i < nsiblings);
    171 			pg_tmp = GROUP_ACCESS(siblings, i);
    172 
    173 			/*
    174 			 * The candidate must not be us, and must
    175 			 * have some CPU resources in the thread's
    176 			 * partition
    177 			 */
    178 			if (pg_tmp != pg &&
    179 			    bitset_in_set(&tp->t_cpupart->cp_cmt_pgs,
    180 			    ((pg_t *)pg_tmp)->pg_id)) {
    181 				tpg = pg_tmp;
    182 				break;
    183 			}
    184 
    185 			if (++i >= nsiblings)
    186 				i = 0;
    187 		} while (i != hint);
    188 
    189 		if (!tpg)
    190 			goto next_level; /* no candidates at this level */
    191 
    192 		/*
    193 		 * Decide if we should migrate from the current PG to a
    194 		 * target PG given a policy
    195 		 */
    196 		if (cmt_should_migrate(pg, tpg, pg->cmt_policy, self))
    197 			break;
    198 		tpg = NULL;
    199 
    200 next_level:
    201 		if (++level == GROUP_SIZE(cmt_pgs))
    202 			break;
    203 
    204 		pg = GROUP_ACCESS(cmt_pgs, level);
    205 		siblings = pg->cmt_siblings;
    206 	}
    207 
    208 	if (tpg) {
    209 		uint_t	tgt_size = GROUP_SIZE(&tpg->cmt_cpus_actv);
    210 
    211 		/*
    212 		 * Select an idle CPU from the target
    213 		 */
    214 		hint = CPU_PSEUDO_RANDOM() % tgt_size;
    215 		cpu = hint;
    216 		do {
    217 			newcp = GROUP_ACCESS(&tpg->cmt_cpus_actv, cpu);
    218 			if (newcp->cpu_part == tp->t_cpupart &&
    219 			    newcp->cpu_dispatch_pri == -1) {
    220 				cp = newcp;
    221 				break;
    222 			}
    223 			if (++cpu == tgt_size)
    224 				cpu = 0;
    225 		} while (cpu != hint);
    226 	}
    227 
    228 	return (cp);
    229 }
    230