1 3792 akolb /* 2 3792 akolb * CDDL HEADER START 3 3792 akolb * 4 3792 akolb * The contents of this file are subject to the terms of the 5 3792 akolb * Common Development and Distribution License (the "License"). 6 3792 akolb * You may not use this file except in compliance with the License. 7 3792 akolb * 8 3792 akolb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 3792 akolb * or http://www.opensolaris.org/os/licensing. 10 3792 akolb * See the License for the specific language governing permissions 11 3792 akolb * and limitations under the License. 12 3792 akolb * 13 3792 akolb * When distributing Covered Code, include this CDDL HEADER in each 14 3792 akolb * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 3792 akolb * If applicable, add the following below this CDDL HEADER, with the 16 3792 akolb * fields enclosed by brackets "[]" replaced with your own identifying 17 3792 akolb * information: Portions Copyright [yyyy] [name of copyright owner] 18 3792 akolb * 19 3792 akolb * CDDL HEADER END 20 3792 akolb */ 21 3792 akolb 22 3792 akolb /* 23 11066 rafael * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 3792 akolb * Use is subject to license terms. 25 3792 akolb */ 26 3792 akolb 27 3792 akolb #include <sys/disp.h> 28 3792 akolb #include <sys/param.h> 29 3792 akolb #include <sys/systm.h> 30 3792 akolb #include <sys/sysmacros.h> 31 3792 akolb #include <sys/atomic.h> 32 3792 akolb #include <sys/cpucaps_impl.h> 33 3792 akolb #include <sys/dtrace.h> 34 3792 akolb #include <sys/sdt.h> 35 3792 akolb #include <sys/debug.h> 36 3792 akolb #include <sys/rctl.h> 37 3792 akolb #include <sys/errno.h> 38 3792 akolb 39 3792 akolb /* 40 3792 akolb * CPU Caps implementation 41 3792 akolb * ======================= 42 3792 akolb * 43 3792 akolb * A CPU cap can be set on any project or any zone. Zone CPU cap limits the CPU 44 3792 akolb * usage for all projects running inside the zone. If the zone CPU cap is set 45 3792 akolb * below the project CPU cap, the latter will have no effect. 46 3792 akolb * 47 3792 akolb * When CPU usage of projects and/or zones reaches specified caps, threads in 48 3792 akolb * them do not get scheduled and instead are placed on wait queues associated 49 3792 akolb * with a cap. Such threads will start running again only when CPU usage drops 50 3792 akolb * below the cap level. Each zone and each project has its own wait queue. 51 3792 akolb * 52 3792 akolb * When CPU cap is set, the kernel continously keeps track of CPU time used by 53 3792 akolb * capped zones and/or projects over a short time interval and calculates their 54 3792 akolb * current CPU usage as a percentage. When the accumulated usage reaches the CPU 55 3792 akolb * cap, LWPs running in the user-land (when they are not holding any critical 56 3792 akolb * kernel locks) are placed on special wait queues until their project's or 57 3792 akolb * zone's CPU usage drops below the cap. 58 3792 akolb * 59 3792 akolb * The system maintains a list of all capped projects and all capped zones. On 60 3792 akolb * every clock tick every active thread belonging to a capped project adds its 61 3792 akolb * CPU usage to its project. Usage from all projects belonging to a capped zone 62 3792 akolb * is aggregated to get the zone usage. 63 3792 akolb * 64 3792 akolb * When the current CPU usage is above the cap, a project or zone is considered 65 3792 akolb * over-capped. Every user thread caught running in an over-capped project or 66 3792 akolb * zone is marked by setting TS_PROJWAITQ flag in thread's t_schedflag field and 67 3792 akolb * is requested to surrender its CPU. This causes scheduling class specific 68 3792 akolb * CL_PREEMPT() callback to be invoked. The callback function places threads 69 3792 akolb * marked as TS_PROJWAIT on a wait queue and calls switch(). 70 3792 akolb * 71 3792 akolb * Threads are only placed on wait queues after trapping from user-land 72 3792 akolb * (they could be holding some user locks, but no kernel locks) and while 73 3792 akolb * returning from the trap back to the user-land when no kernel locks are held. 74 3792 akolb * Putting threads on wait queues in random places while running in the 75 3792 akolb * kernel might lead to all kinds of locking problems. 76 3792 akolb * 77 3792 akolb * Accounting 78 3792 akolb * ========== 79 3792 akolb * 80 3792 akolb * Accounting of CPU usage is based on per-thread micro-state accounting data. 81 3792 akolb * On every clock tick clock() adds new on-CPU time for every thread found on 82 3792 akolb * CPU. Scheduling classes also add new on-CPU time for any thread leaving CPU. 83 3792 akolb * New times means time since it was last accounted for. On-CPU times greater 84 3792 akolb * than 1 tick are truncated to 1 tick. 85 3792 akolb * 86 3792 akolb * Project CPU usage is aggregated from all threads within the project. 87 3792 akolb * Zone CPU usage is the sum of usages for all projects within the zone. Zone 88 3792 akolb * CPU usage is calculated on every clock tick by walking list of projects and 89 3792 akolb * adding their usage together. 90 3792 akolb * 91 3792 akolb * Decay 92 3792 akolb * ===== 93 3792 akolb * 94 3792 akolb * CPU usage is decayed by the caps_update() routine which is called once per 95 3792 akolb * every clock tick. It walks lists of project caps and decays their usages by 96 3792 akolb * one per cent. If CPU usage drops below cap levels, threads on the wait queue 97 3792 akolb * are made runnable again, one thread per clock tick. 98 3792 akolb * 99 3792 akolb * Interfaces 100 3792 akolb * ========== 101 3792 akolb * 102 3792 akolb * The CPU Caps facility provides the following interfaces to the rest of the 103 3792 akolb * system: 104 3792 akolb * 105 3792 akolb * cpucaps_project_add(kproject_t *) 106 3792 akolb * 107 3792 akolb * Notifies the framework of a new project. It should be put on the 108 3792 akolb * capped_projects list if its zone has a cap. 109 3792 akolb * 110 3792 akolb * cpucaps_project_remove(kproject_t *) 111 3792 akolb * 112 3792 akolb * Remove the association between the specified project and its cap. 113 3792 akolb * Called right before the project is destroyed. 114 3792 akolb * 115 3792 akolb * cpucaps_project_set(kproject_t *, rctl_qty_t) 116 3792 akolb * 117 3792 akolb * Set project cap of the specified project to the specified value. Setting the 118 3792 akolb * value to NOCAP is equivalent to removing the cap. 119 3792 akolb * 120 3792 akolb * cpucaps_zone_set(zone_t *, rctl_qty_t) 121 3792 akolb * 122 3792 akolb * Set zone cap of the specified zone to the specified value. Setting the value 123 3792 akolb * to NOCAP is equivalent to removing the cap. 124 3792 akolb * 125 3792 akolb * cpucaps_zone_remove(zone_t *) 126 3792 akolb * 127 3792 akolb * Remove the association between the zone and its cap. 128 3792 akolb * 129 3792 akolb * cpucaps_charge(kthread_id_t, caps_sc_t *, cpucaps_charge_t) 130 3792 akolb * 131 3792 akolb * Charges specified thread's project the amount of on-CPU time that it used. 132 3792 akolb * If the third argument is CPUCAPS_CHARGE_ONLY returns False. 133 3792 akolb * Otherwise returns True if project or zone should be penalized because its 134 3792 akolb * project or zone is exceeding its cap. Also sets TS_PROJWAITQ or TS_ZONEWAITQ 135 3792 akolb * bits in t_schedflag in this case. 136 3792 akolb * 137 3792 akolb * CPUCAPS_ENFORCE(kthread_id_t *) 138 3792 akolb * 139 3792 akolb * Enforces CPU caps for a specified thread. Places LWPs running in LWP_USER 140 3792 akolb * state on project or zone wait queues, as requested by TS_PROJWAITQ or 141 3792 akolb * TS_ZONEWAITQ bits in t_schedflag. Returns True if the thread was placed on a 142 3792 akolb * wait queue or False otherwise. 143 3792 akolb * 144 3792 akolb * cpucaps_sc_init(caps_sc_t *) 145 3792 akolb * 146 3792 akolb * Initializes the scheduling-class specific CPU Caps data for a thread. 147 3792 akolb * 148 3792 akolb * LOCKS 149 3792 akolb * ===== 150 3792 akolb * 151 3792 akolb * all the individual caps structures and their lists are protected by a global 152 3792 akolb * caps_lock mutex. The lock is grabbed either by clock() or by events modifying 153 3792 akolb * caps, so it is usually uncontended. We avoid all blocking memory allocations 154 3792 akolb * while holding caps_lock to prevent clock() from blocking. 155 3792 akolb * 156 3792 akolb * Thread state is protected by the thread lock. It protects the association 157 3792 akolb * between a thread and its project and, as a consequence, to its zone. The 158 3792 akolb * association can not break while thread lock is held, so the project or zone 159 3792 akolb * cap are not going to disappear while thread lock is held. 160 3792 akolb * 161 3792 akolb * Cap usage field is protected by high-pil spin-lock cap_usagelock. It is 162 3792 akolb * grabbed by scheduling classes already holding thread lock at high PIL and by 163 3792 akolb * clock thread performing usage decay. We should do as little work as possible 164 3792 akolb * while holding the lock since it may be very hot. All threads in the project 165 3792 akolb * contend for the same cache line doing cap usage updates. 166 3792 akolb */ 167 3792 akolb 168 3792 akolb /* 169 3792 akolb * caps_lock protects list of capped projects and zones, changes in the cap 170 3792 akolb * state and changes of the global cpucaps_enabled flag. 171 3792 akolb * 172 3792 akolb * Changing zone caps also sets cpucaps_busy to avoid races when a zone cap is 173 3792 akolb * modified in parallel. This can be per-zone cap flag, but we don't keep any 174 3792 akolb * cap state for now. 175 3792 akolb */ 176 3792 akolb static kmutex_t caps_lock; /* lock to protect: */ 177 3792 akolb static list_t capped_zones; /* - list of zones with caps */ 178 3792 akolb static list_t capped_projects; /* - list of projects with caps */ 179 3792 akolb boolean_t cpucaps_enabled; /* - are there any caps defined? */ 180 3792 akolb boolean_t cpucaps_busy; /* - is framework busy? */ 181 3792 akolb 182 3792 akolb /* 183 3792 akolb * The accounting is based on the number of nanoseconds threads spend running 184 3792 akolb * during a tick which is kept in the cap_tick_cost variable. 185 3792 akolb */ 186 3792 akolb static hrtime_t cap_tick_cost; 187 3792 akolb 188 3792 akolb /* 189 3792 akolb * How much of the usage value is decayed every clock tick 190 3792 akolb * Decay one per cent of value per tick 191 3792 akolb */ 192 3792 akolb #define CAP_DECAY_FACTOR 100 193 3792 akolb 194 3792 akolb /* 195 3792 akolb * Scale the value and round it to the closest integer value 196 3792 akolb */ 197 3792 akolb #define ROUND_SCALE(x, y) (((x) + (y) / 2) / (y)) 198 3792 akolb 199 3792 akolb static void caps_update(); 200 3792 akolb 201 3792 akolb /* 202 3792 akolb * CAP kstats. 203 3792 akolb */ 204 3792 akolb struct cap_kstat { 205 3792 akolb kstat_named_t cap_value; 206 3792 akolb kstat_named_t cap_usage; 207 3792 akolb kstat_named_t cap_nwait; 208 3792 akolb kstat_named_t cap_below; 209 3792 akolb kstat_named_t cap_above; 210 3792 akolb kstat_named_t cap_maxusage; 211 3792 akolb kstat_named_t cap_zonename; 212 3792 akolb } cap_kstat = { 213 3792 akolb { "value", KSTAT_DATA_UINT64 }, 214 3792 akolb { "usage", KSTAT_DATA_UINT64 }, 215 3792 akolb { "nwait", KSTAT_DATA_UINT64 }, 216 3792 akolb { "below_sec", KSTAT_DATA_UINT64 }, 217 3792 akolb { "above_sec", KSTAT_DATA_UINT64 }, 218 3792 akolb { "maxusage", KSTAT_DATA_UINT64 }, 219 3792 akolb { "zonename", KSTAT_DATA_STRING }, 220 3792 akolb }; 221 3792 akolb 222 3792 akolb 223 3792 akolb static kmutex_t cap_kstat_lock; 224 3792 akolb static int cap_kstat_update(kstat_t *, int); 225 3792 akolb 226 3792 akolb /* 227 3792 akolb * Initialize CPU caps infrastructure. 228 3792 akolb * - Initialize lists of capped zones and capped projects 229 3792 akolb * - Set cpucaps_clock_callout to NULL 230 3792 akolb */ 231 3792 akolb void 232 3792 akolb cpucaps_init() 233 3792 akolb { 234 3792 akolb /* 235 3792 akolb * Initialize global variables 236 3792 akolb */ 237 3792 akolb cap_tick_cost = TICK_TO_NSEC((hrtime_t)1); 238 3792 akolb 239 3792 akolb list_create(&capped_zones, sizeof (cpucap_t), 240 3792 akolb offsetof(cpucap_t, cap_link)); 241 3792 akolb list_create(&capped_projects, sizeof (cpucap_t), 242 3792 akolb offsetof(cpucap_t, cap_link)); 243 3792 akolb 244 3792 akolb cpucaps_enabled = B_FALSE; 245 3792 akolb cpucaps_busy = B_FALSE; 246 3792 akolb cpucaps_clock_callout = NULL; 247 3792 akolb } 248 3792 akolb 249 3792 akolb /* 250 3792 akolb * Initialize scheduling-class specific CPU Caps data. 251 3792 akolb */ 252 3792 akolb void 253 3792 akolb cpucaps_sc_init(caps_sc_t *csc) 254 3792 akolb { 255 3792 akolb csc->csc_cputime = 0; 256 3792 akolb } 257 3792 akolb 258 3792 akolb /* 259 3792 akolb * Allocate and initialize cpucap structure 260 3792 akolb */ 261 3792 akolb static cpucap_t * 262 3792 akolb cap_alloc(void) 263 3792 akolb { 264 3792 akolb cpucap_t *cap = kmem_zalloc(sizeof (cpucap_t), KM_SLEEP); 265 3792 akolb 266 3792 akolb DISP_LOCK_INIT(&cap->cap_usagelock); 267 3792 akolb waitq_init(&cap->cap_waitq); 268 3792 akolb 269 3792 akolb return (cap); 270 3792 akolb } 271 3792 akolb 272 3792 akolb /* 273 3792 akolb * Free cpucap structure 274 3792 akolb */ 275 3792 akolb static void 276 3792 akolb cap_free(cpucap_t *cap) 277 3792 akolb { 278 3792 akolb if (cap == NULL) 279 3792 akolb return; 280 3792 akolb 281 3792 akolb /* 282 3792 akolb * This cap should not be active 283 3792 akolb */ 284 3792 akolb ASSERT(!list_link_active(&cap->cap_link)); 285 3792 akolb ASSERT(cap->cap_value == 0); 286 3792 akolb ASSERT(!DISP_LOCK_HELD(&cap->cap_usagelock)); 287 3792 akolb 288 3792 akolb waitq_fini(&cap->cap_waitq); 289 3792 akolb DISP_LOCK_DESTROY(&cap->cap_usagelock); 290 3792 akolb 291 3792 akolb kmem_free(cap, sizeof (cpucap_t)); 292 3792 akolb } 293 3792 akolb 294 3792 akolb /* 295 3792 akolb * Activate cap - insert into active list and unblock its 296 3792 akolb * wait queue. Should be called with caps_lock held. 297 3792 akolb * The cap_value field is set to the value supplied. 298 3792 akolb */ 299 3792 akolb static void 300 3792 akolb cap_enable(list_t *l, cpucap_t *cap, hrtime_t value) 301 3792 akolb { 302 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 303 3792 akolb 304 3792 akolb /* 305 3792 akolb * Cap can not be already enabled 306 3792 akolb */ 307 3792 akolb ASSERT(!CAP_ENABLED(cap)); 308 3792 akolb ASSERT(!list_link_active(&cap->cap_link)); 309 3792 akolb 310 3792 akolb list_insert_tail(l, cap); 311 3792 akolb cap->cap_below = cap->cap_above = 0; 312 3792 akolb cap->cap_maxusage = 0; 313 3792 akolb cap->cap_usage = 0; 314 3792 akolb cap->cap_value = value; 315 3792 akolb waitq_unblock(&cap->cap_waitq); 316 3792 akolb if (CPUCAPS_OFF()) { 317 3792 akolb cpucaps_enabled = B_TRUE; 318 3792 akolb cpucaps_clock_callout = caps_update; 319 3792 akolb } 320 3792 akolb } 321 3792 akolb 322 3792 akolb /* 323 3792 akolb * Deactivate cap 324 3792 akolb * - Block its wait queue. This prevents any new threads from being 325 3792 akolb * enqueued there and moves all enqueued threads to the run queue. 326 3792 akolb * - Remove cap from list l. 327 3792 akolb * - Disable CPU caps globally if there are no capped projects or zones 328 3792 akolb * 329 3792 akolb * Should be called with caps_lock held. 330 3792 akolb */ 331 3792 akolb static void 332 3792 akolb cap_disable(list_t *l, cpucap_t *cap) 333 3792 akolb { 334 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 335 3792 akolb /* 336 3792 akolb * Cap should be currently active 337 3792 akolb */ 338 3792 akolb ASSERT(CPUCAPS_ON()); 339 3792 akolb ASSERT(list_link_active(&cap->cap_link)); 340 3792 akolb ASSERT(CAP_ENABLED(cap)); 341 3792 akolb 342 3792 akolb waitq_block(&cap->cap_waitq); 343 3792 akolb list_remove(l, cap); 344 3792 akolb if (list_is_empty(&capped_projects) && list_is_empty(&capped_zones)) { 345 3792 akolb cpucaps_enabled = B_FALSE; 346 3792 akolb cpucaps_clock_callout = NULL; 347 3792 akolb } 348 3792 akolb cap->cap_value = 0; 349 3792 akolb cap->cap_project = NULL; 350 3792 akolb cap->cap_zone = NULL; 351 3792 akolb if (cap->cap_kstat != NULL) { 352 3792 akolb kstat_delete(cap->cap_kstat); 353 3792 akolb cap->cap_kstat = NULL; 354 3792 akolb } 355 3792 akolb 356 3792 akolb } 357 3792 akolb 358 3792 akolb /* 359 3792 akolb * Enable cap for a project kpj 360 3792 akolb * It is safe to enable already enabled project cap. 361 3792 akolb * Should be called with caps_lock held. 362 3792 akolb */ 363 3792 akolb static void 364 3792 akolb cap_project_enable(kproject_t *kpj, hrtime_t value) 365 3792 akolb { 366 3792 akolb cpucap_t *cap = kpj->kpj_cpucap; 367 3792 akolb 368 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 369 3792 akolb ASSERT(cap != NULL); 370 3792 akolb 371 3792 akolb if (CAP_DISABLED(cap)) { 372 3792 akolb ASSERT(cap->cap_kstat == NULL); 373 3792 akolb cap_enable(&capped_projects, cap, value); 374 3792 akolb cap->cap_project = kpj; 375 3792 akolb cap->cap_zone = kpj->kpj_zone; 376 3792 akolb 377 3792 akolb /* 378 3792 akolb * Create cap kstats 379 3792 akolb */ 380 3792 akolb if ((cap->cap_kstat = rctl_kstat_create_project(kpj, "cpucaps", 381 3792 akolb KSTAT_TYPE_NAMED, 382 3792 akolb sizeof (cap_kstat) / sizeof (kstat_named_t), 383 3792 akolb KSTAT_FLAG_VIRTUAL)) != NULL) { 384 11066 rafael cap->cap_kstat->ks_data_size += 385 11066 rafael strlen(cap->cap_zone->zone_name) + 1; 386 11066 rafael cap->cap_kstat->ks_lock = &cap_kstat_lock; 387 11066 rafael cap->cap_kstat->ks_data = &cap_kstat; 388 11066 rafael cap->cap_kstat->ks_update = cap_kstat_update; 389 11066 rafael cap->cap_kstat->ks_private = cap; 390 11066 rafael kstat_install(cap->cap_kstat); 391 3792 akolb } 392 3792 akolb } 393 3792 akolb } 394 3792 akolb 395 3792 akolb /* 396 3792 akolb * Disable project cap. 397 3792 akolb * It is safe to disable already disabled project cap. 398 3792 akolb * Should be called with caps_lock held. 399 3792 akolb */ 400 3792 akolb static void 401 3792 akolb cap_project_disable(kproject_t *kpj) 402 3792 akolb { 403 3792 akolb cpucap_t *cap = kpj->kpj_cpucap; 404 3792 akolb 405 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 406 3792 akolb ASSERT(cap != NULL); 407 3792 akolb ASSERT(cap->cap_project == kpj); 408 3792 akolb 409 3792 akolb if (CAP_ENABLED(cap)) 410 3792 akolb cap_disable(&capped_projects, cap); 411 3792 akolb } 412 3792 akolb 413 3792 akolb /* 414 3792 akolb * Enable cap for a zone 415 3792 akolb * It is safe to enable already enabled zone cap. 416 3792 akolb * Should be called with caps_lock held. 417 3792 akolb */ 418 3792 akolb static void 419 3792 akolb cap_zone_enable(zone_t *zone, hrtime_t value) 420 3792 akolb { 421 3792 akolb cpucap_t *cap = zone->zone_cpucap; 422 3792 akolb 423 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 424 3792 akolb ASSERT(cap != NULL); 425 3792 akolb 426 3792 akolb if (CAP_DISABLED(cap)) { 427 3792 akolb ASSERT(cap->cap_kstat == NULL); 428 3792 akolb cap_enable(&capped_zones, cap, value); 429 3792 akolb cap->cap_zone = zone; 430 3792 akolb 431 3792 akolb /* 432 3792 akolb * Create cap kstats 433 3792 akolb */ 434 3792 akolb if ((cap->cap_kstat = rctl_kstat_create_zone(zone, "cpucaps", 435 3792 akolb KSTAT_TYPE_NAMED, 436 3792 akolb sizeof (cap_kstat) / sizeof (kstat_named_t), 437 3792 akolb KSTAT_FLAG_VIRTUAL)) != NULL) { 438 11066 rafael cap->cap_kstat->ks_data_size += 439 11066 rafael strlen(cap->cap_zone->zone_name) + 1; 440 11066 rafael cap->cap_kstat->ks_lock = &cap_kstat_lock; 441 11066 rafael cap->cap_kstat->ks_data = &cap_kstat; 442 11066 rafael cap->cap_kstat->ks_update = cap_kstat_update; 443 11066 rafael cap->cap_kstat->ks_private = cap; 444 11066 rafael kstat_install(cap->cap_kstat); 445 3792 akolb } 446 3792 akolb } 447 3792 akolb } 448 3792 akolb 449 3792 akolb /* 450 3792 akolb * Disable zone cap. 451 3792 akolb * It is safe to disable already disabled zone cap. 452 3792 akolb * Should be called with caps_lock held. 453 3792 akolb */ 454 3792 akolb static void 455 3792 akolb cap_zone_disable(zone_t *zone) 456 3792 akolb { 457 3792 akolb cpucap_t *cap = zone->zone_cpucap; 458 3792 akolb 459 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 460 3792 akolb ASSERT(cap != NULL); 461 3792 akolb ASSERT(cap->cap_zone == zone); 462 3792 akolb 463 3792 akolb if (CAP_ENABLED(cap)) 464 3792 akolb cap_disable(&capped_zones, cap); 465 3792 akolb } 466 3792 akolb 467 3792 akolb /* 468 3792 akolb * Apply specified callback to all caps contained in the list `l'. 469 3792 akolb */ 470 3792 akolb static void 471 11066 rafael cap_walk(list_t *l, void (*cb)(cpucap_t *, int64_t)) 472 3792 akolb { 473 11066 rafael static uint64_t cpucap_walk_gen; 474 3792 akolb cpucap_t *cap; 475 3792 akolb 476 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 477 3792 akolb 478 3792 akolb for (cap = list_head(l); cap != NULL; cap = list_next(l, cap)) { 479 11066 rafael (*cb)(cap, cpucap_walk_gen); 480 3792 akolb } 481 11066 rafael 482 11066 rafael atomic_inc_64(&cpucap_walk_gen); 483 3792 akolb } 484 3792 akolb 485 3792 akolb /* 486 3792 akolb * If cap limit is not reached, make one thread from wait queue runnable. 487 3792 akolb * The waitq_isempty check is performed without the waitq lock. If a new thread 488 3792 akolb * is placed on the waitq right after the check, it will be picked up during the 489 3792 akolb * next invocation of cap_poke_waitq(). 490 3792 akolb */ 491 11066 rafael /* ARGSUSED */ 492 3792 akolb static void 493 11066 rafael cap_poke_waitq(cpucap_t *cap, int64_t gen) 494 3792 akolb { 495 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 496 3792 akolb 497 3792 akolb if (cap->cap_usage >= cap->cap_value) { 498 3792 akolb cap->cap_above++; 499 3792 akolb } else { 500 3792 akolb waitq_t *wq = &cap->cap_waitq; 501 3792 akolb 502 3792 akolb cap->cap_below++; 503 3792 akolb 504 3792 akolb if (!waitq_isempty(wq)) 505 3792 akolb waitq_runone(wq); 506 3792 akolb } 507 3792 akolb } 508 3792 akolb 509 3792 akolb /* 510 3792 akolb * The callback function called for every cap on capped_projects list. 511 3792 akolb * Decay cap usage by CAP_DECAY_FACTOR 512 3792 akolb * Add this cap project usage to its zone usage. 513 3792 akolb * Kick off a thread from the cap waitq if cap is not reached. 514 3792 akolb */ 515 3792 akolb static void 516 11066 rafael cap_project_usage_walker(cpucap_t *cap, int64_t gen) 517 3792 akolb { 518 3792 akolb zone_t *zone = cap->cap_zone; 519 3792 akolb hrtime_t cap_usage = cap->cap_usage; 520 3792 akolb 521 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 522 3792 akolb ASSERT(cap->cap_project->kpj_cpucap == cap); 523 3792 akolb ASSERT(zone == cap->cap_project->kpj_zone); 524 3792 akolb ASSERT(CAP_ENABLED(cap)); 525 3792 akolb 526 3792 akolb /* 527 3792 akolb * Set or clear the CAP_REACHED flag based on the current usage. 528 3792 akolb * Only projects having their own caps are ever marked as CAP_REACHED. 529 3792 akolb */ 530 11066 rafael cap_poke_waitq(cap, 0); 531 3792 akolb 532 3792 akolb /* 533 3792 akolb * Add project's CPU usage to our zone's CPU usage. 534 3792 akolb */ 535 3792 akolb if (ZONE_IS_CAPPED(zone)) { 536 3792 akolb cpucap_t *zcap = zone->zone_cpucap; 537 3792 akolb 538 3792 akolb ASSERT(zcap->cap_zone == zone); 539 3792 akolb 540 3792 akolb /* 541 3792 akolb * If we haven't reset this zone's usage during this clock tick 542 11066 rafael * yet, then do it now. The cap_gen field is used to check 543 3792 akolb * whether this is the first zone's project we see during this 544 3792 akolb * tick or a subsequent one. 545 3792 akolb */ 546 11066 rafael if (zcap->cap_gen != gen) { 547 3792 akolb if (zcap->cap_usage > zcap->cap_maxusage) 548 3792 akolb zcap->cap_maxusage = zcap->cap_usage; 549 3792 akolb zcap->cap_usage = 0; 550 11066 rafael zcap->cap_gen = gen; 551 3792 akolb } 552 3792 akolb DTRACE_PROBE2(cpucaps__zusage, cpucap_t *, zcap, 553 3792 akolb hrtime_t, cap_usage); 554 3792 akolb zcap->cap_usage += cap_usage; 555 3792 akolb /* Check for overflows */ 556 3792 akolb if (zcap->cap_usage < 0) 557 3792 akolb zcap->cap_usage = MAX_USAGE - 1; 558 3792 akolb } 559 3792 akolb 560 3792 akolb /* 561 3792 akolb * Decay project usage. 562 3792 akolb */ 563 3792 akolb disp_lock_enter(&cap->cap_usagelock); 564 3792 akolb cap->cap_usage -= ROUND_SCALE(cap_usage, CAP_DECAY_FACTOR); 565 3792 akolb disp_lock_exit(&cap->cap_usagelock); 566 3792 akolb } 567 3792 akolb 568 3792 akolb /* 569 3792 akolb * On every clock tick walk the list of project caps and update the CPU usage. 570 3792 akolb * Also walk the list of zone caps checking whether any threads should 571 3792 akolb * transition from wait queue to run queue. 572 3792 akolb * 573 3792 akolb * This function gets called by the clock thread directly when there are any 574 3792 akolb * defined caps. The only lock that it grabs is caps_lock. Nothing else grabs 575 3792 akolb * caps_lock for long periods of time, so there should be almost no contention 576 3792 akolb * for it. 577 3792 akolb */ 578 3792 akolb static void 579 3792 akolb caps_update() 580 3792 akolb { 581 3792 akolb mutex_enter(&caps_lock); 582 3792 akolb cap_walk(&capped_projects, cap_project_usage_walker); 583 3792 akolb cap_walk(&capped_zones, cap_poke_waitq); 584 3792 akolb mutex_exit(&caps_lock); 585 3792 akolb } 586 3792 akolb 587 3792 akolb /* 588 3792 akolb * The function is called for each project in a zone when the zone cap is 589 3792 akolb * modified. It enables project caps if zone cap is enabled and disables if the 590 3792 akolb * zone cap is disabled and project doesn't have its own cap. 591 3792 akolb * 592 3792 akolb * For each project that does not have cpucap structure allocated it allocates a 593 3792 akolb * new structure and assigns to kpj->cpu_cap. The allocation is performed 594 3792 akolb * without holding caps_lock to avoid using KM_SLEEP allocation with caps_lock 595 3792 akolb * held. 596 3792 akolb */ 597 3792 akolb static int 598 3792 akolb cap_project_zone_modify_walker(kproject_t *kpj, void *arg) 599 3792 akolb { 600 3792 akolb cpucap_t *project_cap = NULL; 601 3792 akolb cpucap_t *zone_cap = (cpucap_t *)arg; 602 3792 akolb 603 3792 akolb ASSERT(zone_cap != NULL); 604 3792 akolb 605 3792 akolb if (kpj->kpj_cpucap == NULL) { 606 3792 akolb /* 607 3792 akolb * This is the first time any cap was established for this 608 3792 akolb * project. Allocate a new cpucap structure for it. 609 3792 akolb */ 610 3792 akolb project_cap = cap_alloc(); 611 3792 akolb } 612 3792 akolb 613 3792 akolb mutex_enter(&caps_lock); 614 3792 akolb 615 3792 akolb /* 616 3792 akolb * Double-check that kpj_cpucap is still NULL - now with caps_lock held 617 3792 akolb * and assign the newly allocated cpucap structure to it. 618 3792 akolb */ 619 3792 akolb if (kpj->kpj_cpucap == NULL) { 620 3792 akolb kpj->kpj_cpucap = project_cap; 621 3792 akolb } else if (project_cap != NULL) { 622 3792 akolb cap_free(project_cap); 623 3792 akolb } 624 3792 akolb 625 3792 akolb project_cap = kpj->kpj_cpucap; 626 3792 akolb 627 3792 akolb if (CAP_DISABLED(zone_cap)) { 628 3792 akolb /* 629 3792 akolb * Remove all projects in this zone without caps 630 3792 akolb * from the capped_projects list. 631 3792 akolb */ 632 3792 akolb if (project_cap->cap_value == MAX_USAGE) { 633 3792 akolb cap_project_disable(kpj); 634 3792 akolb } 635 3792 akolb } else if (CAP_DISABLED(project_cap)) { 636 3792 akolb /* 637 3792 akolb * Add the project to capped_projects list. 638 3792 akolb */ 639 3792 akolb ASSERT(project_cap->cap_value == 0); 640 3792 akolb cap_project_enable(kpj, MAX_USAGE); 641 3792 akolb } 642 3792 akolb mutex_exit(&caps_lock); 643 3792 akolb 644 3792 akolb return (0); 645 3792 akolb } 646 3792 akolb 647 3792 akolb /* 648 3792 akolb * Set zone cap to cap_val 649 3792 akolb * If cap_val is equal to NOCAP, disable zone cap. 650 3792 akolb * 651 3792 akolb * If this is the first time a cap is set on a zone, allocate cpucap structure 652 3792 akolb * without holding caps_lock to avoid KM_SLEEP allocation with caps_lock held. 653 3792 akolb */ 654 3792 akolb int 655 3792 akolb cpucaps_zone_set(zone_t *zone, rctl_qty_t cap_val) 656 3792 akolb { 657 3792 akolb cpucap_t *cap = NULL; 658 3792 akolb hrtime_t value; 659 3792 akolb 660 3792 akolb if (cap_val == 0) 661 3792 akolb return (EINVAL); 662 3792 akolb 663 3792 akolb ASSERT(cap_val <= MAXCAP); 664 3792 akolb if (cap_val > MAXCAP) 665 3792 akolb cap_val = MAXCAP; 666 3792 akolb 667 3792 akolb /* 668 3792 akolb * Nothing to do if trying to disable a cap on a zone when caps are off 669 3792 akolb * or a zone which does not have a cap yet. 670 3792 akolb */ 671 3792 akolb if ((CPUCAPS_OFF() || !ZONE_IS_CAPPED(zone)) && (cap_val == NOCAP)) 672 3792 akolb return (0); 673 3792 akolb 674 3792 akolb if (zone->zone_cpucap == NULL) 675 3792 akolb cap = cap_alloc(); 676 3792 akolb 677 3792 akolb mutex_enter(&caps_lock); 678 3792 akolb 679 3792 akolb if (cpucaps_busy) { 680 3792 akolb mutex_exit(&caps_lock); 681 3792 akolb return (EBUSY); 682 3792 akolb } 683 3792 akolb 684 3792 akolb /* 685 3792 akolb * Double-check whether zone->zone_cpucap is NULL, now with caps_lock 686 3792 akolb * held. If it is still NULL, assign a newly allocated cpucap to it. 687 3792 akolb */ 688 3792 akolb if (zone->zone_cpucap == NULL) { 689 3792 akolb zone->zone_cpucap = cap; 690 3792 akolb } else if (cap != NULL) { 691 3792 akolb cap_free(cap); 692 3792 akolb } 693 3792 akolb 694 3792 akolb cap = zone->zone_cpucap; 695 3792 akolb value = cap_val * cap_tick_cost; 696 3792 akolb if (value < 0) 697 3792 akolb value = MAX_USAGE; 698 3792 akolb 699 3792 akolb /* Nothing to do if the value is staying the same */ 700 3792 akolb if (value == cap->cap_value) { 701 3792 akolb mutex_exit(&caps_lock); 702 3792 akolb return (0); 703 3792 akolb } 704 3792 akolb 705 3792 akolb /* 706 3792 akolb * Clear cap statistics since the cap value itself changes. 707 3792 akolb */ 708 3792 akolb cap->cap_above = cap->cap_below = 0; 709 3792 akolb 710 3792 akolb 711 3792 akolb if (cap_val == NOCAP) { 712 3792 akolb if (CAP_ENABLED(cap)) { 713 3792 akolb /* 714 3792 akolb * Remove cap for the zone 715 3792 akolb */ 716 3792 akolb cap_zone_disable(zone); 717 3792 akolb cpucaps_busy = B_TRUE; 718 3792 akolb mutex_exit(&caps_lock); 719 3792 akolb /* 720 3792 akolb * Disable caps for all project belonging to this zone 721 3792 akolb * unless they have their own cap. 722 3792 akolb */ 723 3792 akolb (void) project_walk_all(zone->zone_id, 724 3792 akolb cap_project_zone_modify_walker, cap); 725 3792 akolb 726 3792 akolb mutex_enter(&caps_lock); 727 3792 akolb cpucaps_busy = B_FALSE; 728 3792 akolb } 729 3792 akolb } else if (CAP_DISABLED(cap)) { 730 3792 akolb /* 731 3792 akolb * Set a cap on a zone which previously was not capped. 732 3792 akolb */ 733 3792 akolb cap_zone_enable(zone, value); 734 3792 akolb cpucaps_busy = B_TRUE; 735 3792 akolb mutex_exit(&caps_lock); 736 3792 akolb 737 3792 akolb /* 738 3792 akolb * Enable cap for all projects belonging to this zone. 739 3792 akolb */ 740 3792 akolb (void) project_walk_all(zone->zone_id, 741 3792 akolb cap_project_zone_modify_walker, cap); 742 3792 akolb 743 3792 akolb mutex_enter(&caps_lock); 744 3792 akolb cpucaps_busy = B_FALSE; 745 3792 akolb } else { 746 3792 akolb /* 747 3792 akolb * No state transitions, just change the value 748 3792 akolb */ 749 3792 akolb cap->cap_value = value; 750 3792 akolb } 751 3792 akolb 752 3792 akolb ASSERT(MUTEX_HELD(&caps_lock)); 753 3792 akolb ASSERT(!cpucaps_busy); 754 3792 akolb mutex_exit(&caps_lock); 755 3792 akolb 756 3792 akolb return (0); 757 3792 akolb } 758 3792 akolb 759 3792 akolb /* 760 3792 akolb * The project is going away so disable its cap. 761 3792 akolb */ 762 3792 akolb void 763 3792 akolb cpucaps_project_remove(kproject_t *kpj) 764 3792 akolb { 765 3792 akolb mutex_enter(&caps_lock); 766 3792 akolb if (PROJECT_IS_CAPPED(kpj)) 767 3792 akolb cap_project_disable(kpj); 768 3792 akolb if (kpj->kpj_cpucap != NULL) { 769 3792 akolb cap_free(kpj->kpj_cpucap); 770 3792 akolb kpj->kpj_cpucap = NULL; 771 3792 akolb } 772 3792 akolb mutex_exit(&caps_lock); 773 3792 akolb } 774 3792 akolb 775 3792 akolb /* 776 3792 akolb * The zone is going away, so disable its cap. 777 3792 akolb */ 778 3792 akolb void 779 3792 akolb cpucaps_zone_remove(zone_t *zone) 780 3792 akolb { 781 3792 akolb mutex_enter(&caps_lock); 782 3792 akolb while (ZONE_IS_CAPPED(zone)) { 783 3792 akolb mutex_exit(&caps_lock); 784 3792 akolb (void) cpucaps_zone_set(zone, NOCAP); 785 3792 akolb mutex_enter(&caps_lock); 786 3792 akolb } 787 3792 akolb if (zone->zone_cpucap != NULL) { 788 3792 akolb cap_free(zone->zone_cpucap); 789 3792 akolb zone->zone_cpucap = NULL; 790 3792 akolb } 791 3792 akolb mutex_exit(&caps_lock); 792 3792 akolb } 793 3792 akolb 794 3792 akolb /* 795 3792 akolb * New project was created. It should be put on the capped_projects list if 796 3792 akolb * its zone has a cap. 797 3792 akolb */ 798 3792 akolb void 799 3792 akolb cpucaps_project_add(kproject_t *kpj) 800 3792 akolb { 801 3792 akolb cpucap_t *cap = NULL; 802 3792 akolb 803 3792 akolb if (CPUCAPS_OFF() || !ZONE_IS_CAPPED(kpj->kpj_zone)) 804 3792 akolb return; 805 3792 akolb 806 3792 akolb /* 807 3792 akolb * This project was never capped before, so allocate its cap structure. 808 3792 akolb */ 809 3792 akolb if (kpj->kpj_cpucap == NULL) 810 3792 akolb cap = cap_alloc(); 811 3792 akolb 812 3792 akolb mutex_enter(&caps_lock); 813 3792 akolb /* 814 3792 akolb * Double-check with caps_lock held 815 3792 akolb */ 816 3792 akolb if (kpj->kpj_cpucap == NULL) { 817 3792 akolb kpj->kpj_cpucap = cap; 818 3792 akolb } else if (cap != NULL) { 819 3792 akolb cap_free(cap); 820 3792 akolb } 821 3792 akolb 822 3792 akolb if (ZONE_IS_CAPPED(kpj->kpj_zone)) 823 3792 akolb cap_project_enable(kpj, MAX_USAGE); 824 3792 akolb 825 3792 akolb mutex_exit(&caps_lock); 826 3792 akolb } 827 3792 akolb 828 3792 akolb /* 829 3792 akolb * Set project cap to cap_val 830 3792 akolb * If cap_val is equal to NOCAP, disable project cap. 831 3792 akolb * 832 3792 akolb * If this is the first time a cap is set on a project, allocate cpucap 833 3792 akolb * structure without holding caps_lock to avoid KM_SLEEP allocation with 834 3792 akolb * caps_lock held. 835 3792 akolb */ 836 3792 akolb int 837 3792 akolb cpucaps_project_set(kproject_t *kpj, rctl_qty_t cap_val) 838 3792 akolb { 839 3792 akolb cpucap_t *cap = NULL; 840 3792 akolb hrtime_t value; 841 3792 akolb 842 3792 akolb if (cap_val == 0) 843 3792 akolb return (EINVAL); 844 3792 akolb 845 3792 akolb ASSERT(cap_val <= MAXCAP); 846 3792 akolb if (cap_val > MAXCAP) 847 3792 akolb cap_val = MAXCAP; 848 3792 akolb 849 3792 akolb /* 850 3792 akolb * Nothing to do if trying to disable project cap and caps are not 851 3792 akolb * enabled or if trying to disable cap on a project that does not have 852 3792 akolb * cap enabled. 853 3792 akolb */ 854 3792 akolb if ((cap_val == NOCAP) && (CPUCAPS_OFF() || !PROJECT_IS_CAPPED(kpj))) 855 3792 akolb return (0); 856 3792 akolb 857 3792 akolb if (kpj->kpj_cpucap == NULL) { 858 3792 akolb /* 859 3792 akolb * This project was never capped before, so allocate its cap 860 3792 akolb * structure. 861 3792 akolb */ 862 3792 akolb cap = cap_alloc(); 863 3792 akolb } 864 3792 akolb 865 3792 akolb mutex_enter(&caps_lock); 866 3792 akolb 867 3792 akolb /* 868 3792 akolb * Double-check with caps_lock held. 869 3792 akolb */ 870 3792 akolb if (kpj->kpj_cpucap == NULL) { 871 3792 akolb kpj->kpj_cpucap = cap; 872 3792 akolb } else if (cap != NULL) { 873 3792 akolb cap_free(cap); 874 3792 akolb } 875 3792 akolb 876 3792 akolb /* 877 3792 akolb * Get the actual pointer to the project cap. 878 3792 akolb */ 879 3792 akolb cap = kpj->kpj_cpucap; 880 3792 akolb value = cap_val * cap_tick_cost; 881 3792 akolb if (value < 0) 882 3792 akolb value = MAX_USAGE; 883 3792 akolb 884 3792 akolb /* 885 3792 akolb * Nothing to do if the value is not changing 886 3792 akolb */ 887 3792 akolb if (value == cap->cap_value) { 888 3792 akolb mutex_exit(&caps_lock); 889 3792 akolb return (0); 890 3792 akolb } 891 3792 akolb 892 3792 akolb /* 893 3792 akolb * Clear cap statistics since the cap value itself changes. 894 3792 akolb */ 895 3792 akolb cap->cap_above = cap->cap_below = 0; 896 3792 akolb cap->cap_maxusage = 0; 897 3792 akolb 898 3792 akolb if (cap_val != NOCAP) { 899 3792 akolb /* 900 3792 akolb * Enable this cap if it is not already enabled. 901 3792 akolb */ 902 3792 akolb if (CAP_DISABLED(cap)) 903 3792 akolb cap_project_enable(kpj, value); 904 3792 akolb else 905 3792 akolb cap->cap_value = value; 906 3792 akolb } else if (CAP_ENABLED(cap)) { 907 3792 akolb /* 908 3792 akolb * User requested to drop a cap on the project. If it is part of 909 3792 akolb * capped zone, keep the cap and set the value to MAX_USAGE, 910 3792 akolb * otherwise disable the cap. 911 3792 akolb */ 912 3792 akolb if (ZONE_IS_CAPPED(kpj->kpj_zone)) { 913 3792 akolb cap->cap_value = MAX_USAGE; 914 3792 akolb } else { 915 3792 akolb cap_project_disable(kpj); 916 3792 akolb } 917 3792 akolb } 918 3792 akolb mutex_exit(&caps_lock); 919 3792 akolb 920 3792 akolb return (0); 921 3792 akolb } 922 3792 akolb 923 3792 akolb /* 924 3792 akolb * Get cap usage. 925 3792 akolb */ 926 3792 akolb static rctl_qty_t 927 3792 akolb cap_get(cpucap_t *cap) 928 3792 akolb { 929 3792 akolb return (cap != NULL ? (rctl_qty_t)(cap->cap_usage / cap_tick_cost) : 0); 930 3792 akolb } 931 3792 akolb 932 3792 akolb /* 933 3792 akolb * Get current project usage. 934 3792 akolb */ 935 3792 akolb rctl_qty_t 936 3792 akolb cpucaps_project_get(kproject_t *kpj) 937 3792 akolb { 938 3792 akolb return (cap_get(kpj->kpj_cpucap)); 939 3792 akolb } 940 3792 akolb 941 3792 akolb /* 942 3792 akolb * Get current zone usage. 943 3792 akolb */ 944 3792 akolb rctl_qty_t 945 3792 akolb cpucaps_zone_get(zone_t *zone) 946 3792 akolb { 947 3792 akolb return (cap_get(zone->zone_cpucap)); 948 3792 akolb } 949 3792 akolb 950 3792 akolb /* 951 3792 akolb * Charge project of thread t the time thread t spent on CPU since previously 952 3792 akolb * adjusted. 953 3792 akolb * 954 3792 akolb * Record the current on-CPU time in the csc structure. 955 3792 akolb * 956 3792 akolb * Do not adjust for more than one tick worth of time. 957 3792 akolb * 958 4939 akolb * It is possible that the project cap is being disabled while this routine is 959 4939 akolb * executed. This should not cause any issues since the association between the 960 4939 akolb * thread and its project is protected by thread lock. 961 3792 akolb */ 962 3792 akolb static void 963 3792 akolb caps_charge_adjust(kthread_id_t t, caps_sc_t *csc) 964 3792 akolb { 965 3792 akolb kproject_t *kpj = ttoproj(t); 966 3792 akolb hrtime_t new_usage; 967 3792 akolb hrtime_t usage_delta; 968 3792 akolb 969 3792 akolb ASSERT(THREAD_LOCK_HELD(t)); 970 4939 akolb ASSERT(kpj->kpj_cpucap != NULL); 971 3792 akolb 972 3792 akolb /* Get on-CPU time since birth of a thread */ 973 3792 akolb new_usage = mstate_thread_onproc_time(t); 974 3792 akolb 975 3792 akolb /* Time spent on CPU since last checked */ 976 3792 akolb usage_delta = new_usage - csc->csc_cputime; 977 3792 akolb 978 3792 akolb /* Save the accumulated on-CPU time */ 979 3792 akolb csc->csc_cputime = new_usage; 980 3792 akolb 981 3792 akolb /* Charge at most one tick worth of on-CPU time */ 982 3792 akolb if (usage_delta > cap_tick_cost) 983 3792 akolb usage_delta = cap_tick_cost; 984 3792 akolb 985 3792 akolb /* Add usage_delta to the project usage value. */ 986 3792 akolb if (usage_delta > 0) { 987 3792 akolb cpucap_t *cap = kpj->kpj_cpucap; 988 3792 akolb 989 3792 akolb DTRACE_PROBE2(cpucaps__project__charge, 990 3792 akolb kthread_id_t, t, hrtime_t, usage_delta); 991 3792 akolb 992 3792 akolb disp_lock_enter_high(&cap->cap_usagelock); 993 3792 akolb cap->cap_usage += usage_delta; 994 3792 akolb 995 3792 akolb /* Check for overflows */ 996 3792 akolb if (cap->cap_usage < 0) 997 3792 akolb cap->cap_usage = MAX_USAGE - 1; 998 3792 akolb 999 3792 akolb disp_lock_exit_high(&cap->cap_usagelock); 1000 3792 akolb 1001 3792 akolb /* 1002 3792 akolb * cap_maxusage is only kept for observability. Move it outside 1003 3792 akolb * the lock to reduce the time spent while holding the lock. 1004 3792 akolb */ 1005 3792 akolb if (cap->cap_usage > cap->cap_maxusage) 1006 3792 akolb cap->cap_maxusage = cap->cap_usage; 1007 3792 akolb } 1008 3792 akolb } 1009 3792 akolb 1010 3792 akolb /* 1011 3792 akolb * Charge thread's project and return True if project or zone should be 1012 3792 akolb * penalized because its project or zone is exceeding its cap. Also sets 1013 3792 akolb * TS_PROJWAITQ or TS_ZONEWAITQ in this case. 1014 4939 akolb * 1015 4939 akolb * It is possible that the project cap is being disabled while this routine is 1016 4939 akolb * executed. This should not cause any issues since the association between the 1017 4939 akolb * thread and its project is protected by thread lock. It will still set 1018 4939 akolb * TS_PROJECTWAITQ/TS_ZONEWAITQ in this case but cpucaps_enforce will not place 1019 4939 akolb * anything on the blocked wait queue. 1020 4939 akolb * 1021 3792 akolb */ 1022 3792 akolb boolean_t 1023 3792 akolb cpucaps_charge(kthread_id_t t, caps_sc_t *csc, cpucaps_charge_t charge_type) 1024 3792 akolb { 1025 3792 akolb kproject_t *kpj = ttoproj(t); 1026 3792 akolb klwp_t *lwp = t->t_lwp; 1027 3792 akolb zone_t *zone; 1028 3792 akolb cpucap_t *project_cap; 1029 3792 akolb boolean_t rc = B_FALSE; 1030 3792 akolb 1031 3792 akolb ASSERT(THREAD_LOCK_HELD(t)); 1032 3792 akolb 1033 3792 akolb /* Nothing to do for projects that are not capped. */ 1034 3792 akolb if (lwp == NULL || !PROJECT_IS_CAPPED(kpj)) 1035 3792 akolb return (B_FALSE); 1036 3792 akolb 1037 3792 akolb caps_charge_adjust(t, csc); 1038 3792 akolb 1039 3792 akolb /* 1040 3792 akolb * The caller only requested to charge the project usage, no enforcement 1041 3792 akolb * part. 1042 3792 akolb */ 1043 3792 akolb if (charge_type == CPUCAPS_CHARGE_ONLY) 1044 3792 akolb return (B_FALSE); 1045 3792 akolb 1046 3792 akolb project_cap = kpj->kpj_cpucap; 1047 3792 akolb 1048 3792 akolb if (project_cap->cap_usage >= project_cap->cap_value) { 1049 3792 akolb t->t_schedflag |= TS_PROJWAITQ; 1050 3792 akolb rc = B_TRUE; 1051 3792 akolb } else if (t->t_schedflag & TS_PROJWAITQ) { 1052 3792 akolb t->t_schedflag &= ~TS_PROJWAITQ; 1053 3792 akolb } 1054 3792 akolb 1055 3792 akolb zone = ttozone(t); 1056 3792 akolb if (!ZONE_IS_CAPPED(zone)) { 1057 3792 akolb if (t->t_schedflag & TS_ZONEWAITQ) 1058 3792 akolb t->t_schedflag &= ~TS_ZONEWAITQ; 1059 3792 akolb } else { 1060 3792 akolb cpucap_t *zone_cap = zone->zone_cpucap; 1061 3792 akolb 1062 3792 akolb if (zone_cap->cap_usage >= zone_cap->cap_value) { 1063 3792 akolb t->t_schedflag |= TS_ZONEWAITQ; 1064 3792 akolb rc = B_TRUE; 1065 3792 akolb } else if (t->t_schedflag & TS_ZONEWAITQ) { 1066 3792 akolb t->t_schedflag &= ~TS_ZONEWAITQ; 1067 3792 akolb } 1068 3792 akolb } 1069 3792 akolb 1070 3792 akolb 1071 3792 akolb return (rc); 1072 3792 akolb } 1073 3792 akolb 1074 3792 akolb /* 1075 3792 akolb * Enforce CPU caps. If got preempted in the user-land, we know that thread does 1076 3792 akolb * not hold any kernel locks, so enqueue ourselves on the waitq, if needed. 1077 3792 akolb * 1078 3792 akolb * CPU Caps are only enforced for user threads. 1079 3792 akolb * 1080 3792 akolb * Threads flagged with TS_PROJWAITQ are placed on their project wait queues and 1081 3792 akolb * threads marked with TS_ZONEWAITQ are placed on their zone wait queue. 1082 3792 akolb * 1083 3792 akolb * It is possible that by the time we enter cpucaps_enforce() the cap is already 1084 3792 akolb * disabled. In this case waitq_enqueue() fails and doesn't enqueue anything. We 1085 3792 akolb * still clear TS_PROJWAITQ/TS_ZONEWAITQ flags in this case since they no longer 1086 3792 akolb * apply. 1087 3792 akolb */ 1088 3792 akolb boolean_t 1089 3792 akolb cpucaps_enforce(kthread_t *t) 1090 3792 akolb { 1091 3792 akolb klwp_t *lwp = t->t_lwp; 1092 3792 akolb 1093 3792 akolb ASSERT(THREAD_LOCK_HELD(t)); 1094 3792 akolb 1095 3792 akolb if (lwp != NULL && lwp->lwp_state == LWP_USER) { 1096 3792 akolb if (t->t_schedflag & TS_PROJWAITQ) { 1097 3792 akolb ASSERT(ttoproj(t)->kpj_cpucap != NULL); 1098 3792 akolb t->t_schedflag &= ~TS_ANYWAITQ; 1099 3792 akolb if (waitq_enqueue(&(ttoproj(t)->kpj_cpucap->cap_waitq), 1100 11066 rafael t)) { 1101 3792 akolb return (B_TRUE); 1102 3792 akolb } 1103 3792 akolb } 1104 3792 akolb if (t->t_schedflag & TS_ZONEWAITQ) { 1105 3792 akolb ASSERT(ttozone(t)->zone_cpucap != NULL); 1106 3792 akolb t->t_schedflag &= ~TS_ZONEWAITQ; 1107 3792 akolb if (waitq_enqueue(&(ttozone(t)->zone_cpucap->cap_waitq), 1108 11066 rafael t)) { 1109 3792 akolb return (B_TRUE); 1110 3792 akolb } 1111 3792 akolb } 1112 3792 akolb } 1113 3792 akolb 1114 3792 akolb /* 1115 3792 akolb * The thread is not enqueued on the wait queue. 1116 3792 akolb */ 1117 3792 akolb return (B_FALSE); 1118 3792 akolb } 1119 3792 akolb 1120 3792 akolb /* 1121 3792 akolb * Convert internal cap statistics into values exported by cap kstat. 1122 3792 akolb */ 1123 3792 akolb static int 1124 3792 akolb cap_kstat_update(kstat_t *ksp, int rw) 1125 3792 akolb { 1126 3792 akolb struct cap_kstat *capsp = &cap_kstat; 1127 3792 akolb cpucap_t *cap = ksp->ks_private; 1128 3792 akolb clock_t tick_sec = SEC_TO_TICK(1); 1129 3792 akolb char *zonename = cap->cap_zone->zone_name; 1130 3792 akolb 1131 3792 akolb if (rw == KSTAT_WRITE) 1132 3792 akolb return (EACCES); 1133 3792 akolb 1134 3792 akolb capsp->cap_value.value.ui64 = 1135 3792 akolb ROUND_SCALE(cap->cap_value, cap_tick_cost); 1136 3792 akolb capsp->cap_usage.value.ui64 = 1137 3792 akolb ROUND_SCALE(cap->cap_usage, cap_tick_cost); 1138 3792 akolb capsp->cap_maxusage.value.ui64 = 1139 3792 akolb ROUND_SCALE(cap->cap_maxusage, cap_tick_cost); 1140 3792 akolb capsp->cap_nwait.value.ui64 = cap->cap_waitq.wq_count; 1141 3792 akolb capsp->cap_below.value.ui64 = ROUND_SCALE(cap->cap_below, tick_sec); 1142 3792 akolb capsp->cap_above.value.ui64 = ROUND_SCALE(cap->cap_above, tick_sec); 1143 3792 akolb kstat_named_setstr(&capsp->cap_zonename, zonename); 1144 3792 akolb 1145 3792 akolb return (0); 1146 3792 akolb } 1147