1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "lint.h" 30 #include "thr_uberdata.h" 31 #include <sys/rtpriocntl.h> 32 #include <sys/sdt.h> 33 #include <atomic.h> 34 35 #if defined(THREAD_DEBUG) 36 #define INCR32(x) (((x) != UINT32_MAX)? (x)++ : 0) 37 #define INCR(x) ((x)++) 38 #define DECR(x) ((x)--) 39 #define MAXINCR(m, x) ((m < ++x)? (m = x) : 0) 40 #else 41 #define INCR32(x) 42 #define INCR(x) 43 #define DECR(x) 44 #define MAXINCR(m, x) 45 #endif 46 47 /* 48 * This mutex is initialized to be held by lwp#1. 49 * It is used to block a thread that has returned from a mutex_lock() 50 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 51 */ 52 mutex_t stall_mutex = DEFAULTMUTEX; 53 54 static int shared_mutex_held(mutex_t *); 55 static int mutex_queuelock_adaptive(mutex_t *); 56 static void mutex_wakeup_all(mutex_t *); 57 58 /* 59 * Lock statistics support functions. 60 */ 61 void 62 record_begin_hold(tdb_mutex_stats_t *msp) 63 { 64 tdb_incr(msp->mutex_lock); 65 msp->mutex_begin_hold = gethrtime(); 66 } 67 68 hrtime_t 69 record_hold_time(tdb_mutex_stats_t *msp) 70 { 71 hrtime_t now = gethrtime(); 72 73 if (msp->mutex_begin_hold) 74 msp->mutex_hold_time += now - msp->mutex_begin_hold; 75 msp->mutex_begin_hold = 0; 76 return (now); 77 } 78 79 /* 80 * Called once at library initialization. 81 */ 82 void 83 mutex_setup(void) 84 { 85 if (set_lock_byte(&stall_mutex.mutex_lockw)) 86 thr_panic("mutex_setup() cannot acquire stall_mutex"); 87 stall_mutex.mutex_owner = (uintptr_t)curthread; 88 } 89 90 /* 91 * The default spin count of 1000 is experimentally determined. 92 * On sun4u machines with any number of processors it could be raised 93 * to 10,000 but that (experimentally) makes almost no difference. 94 * The environment variable: 95 * _THREAD_ADAPTIVE_SPIN=count 96 * can be used to override and set the count in the range [0 .. 1,000,000]. 97 */ 98 int thread_adaptive_spin = 1000; 99 uint_t thread_max_spinners = 100; 100 int thread_queue_verify = 0; 101 static int ncpus; 102 103 /* 104 * Distinguish spinning for queue locks from spinning for regular locks. 105 * We try harder to acquire queue locks by spinning. 106 * The environment variable: 107 * _THREAD_QUEUE_SPIN=count 108 * can be used to override and set the count in the range [0 .. 1,000,000]. 109 */ 110 int thread_queue_spin = 10000; 111 112 #define ALL_ATTRIBUTES \ 113 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 114 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 115 LOCK_ROBUST) 116 117 /* 118 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 119 * augmented by zero or more the flags: 120 * LOCK_RECURSIVE 121 * LOCK_ERRORCHECK 122 * LOCK_PRIO_INHERIT 123 * LOCK_PRIO_PROTECT 124 * LOCK_ROBUST 125 */ 126 #pragma weak _mutex_init = mutex_init 127 /* ARGSUSED2 */ 128 int 129 mutex_init(mutex_t *mp, int type, void *arg) 130 { 131 int basetype = (type & ~ALL_ATTRIBUTES); 132 const pcclass_t *pccp; 133 int error = 0; 134 int ceil; 135 136 if (basetype == USYNC_PROCESS_ROBUST) { 137 /* 138 * USYNC_PROCESS_ROBUST is a deprecated historical type. 139 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 140 * retain the USYNC_PROCESS_ROBUST flag so we can return 141 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 142 * mutexes will ever draw ELOCKUNMAPPED). 143 */ 144 type |= (USYNC_PROCESS | LOCK_ROBUST); 145 basetype = USYNC_PROCESS; 146 } 147 148 if (type & LOCK_PRIO_PROTECT) 149 pccp = get_info_by_policy(SCHED_FIFO); 150 if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) || 151 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 152 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) || 153 ((type & LOCK_PRIO_PROTECT) && 154 ((ceil = *(int *)arg) < pccp->pcc_primin || 155 ceil > pccp->pcc_primax))) { 156 error = EINVAL; 157 } else if (type & LOCK_ROBUST) { 158 /* 159 * Callers of mutex_init() with the LOCK_ROBUST attribute 160 * are required to pass an initially all-zero mutex. 161 * Multiple calls to mutex_init() are allowed; all but 162 * the first return EBUSY. A call to mutex_init() is 163 * allowed to make an inconsistent robust lock consistent 164 * (for historical usage, even though the proper interface 165 * for this is mutex_consistent()). Note that we use 166 * atomic_or_16() to set the LOCK_INITED flag so as 167 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 168 */ 169 if (!(mp->mutex_flag & LOCK_INITED)) { 170 mp->mutex_type = (uint8_t)type; 171 atomic_or_16(&mp->mutex_flag, LOCK_INITED); 172 mp->mutex_magic = MUTEX_MAGIC; 173 } else if (type != mp->mutex_type || 174 ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) { 175 error = EINVAL; 176 } else if (mutex_consistent(mp) != 0) { 177 error = EBUSY; 178 } 179 /* register a process robust mutex with the kernel */ 180 if (basetype == USYNC_PROCESS) 181 register_lock(mp); 182 } else { 183 (void) memset(mp, 0, sizeof (*mp)); 184 mp->mutex_type = (uint8_t)type; 185 mp->mutex_flag = LOCK_INITED; 186 mp->mutex_magic = MUTEX_MAGIC; 187 } 188 189 if (error == 0 && (type & LOCK_PRIO_PROTECT)) { 190 mp->mutex_ceiling = ceil; 191 } 192 193 return (error); 194 } 195 196 /* 197 * Delete mp from list of ceiling mutexes owned by curthread. 198 * Return 1 if the head of the chain was updated. 199 */ 200 int 201 _ceil_mylist_del(mutex_t *mp) 202 { 203 ulwp_t *self = curthread; 204 mxchain_t **mcpp; 205 mxchain_t *mcp; 206 207 for (mcpp = &self->ul_mxchain; 208 (mcp = *mcpp) != NULL; 209 mcpp = &mcp->mxchain_next) { 210 if (mcp->mxchain_mx == mp) { 211 *mcpp = mcp->mxchain_next; 212 lfree(mcp, sizeof (*mcp)); 213 return (mcpp == &self->ul_mxchain); 214 } 215 } 216 return (0); 217 } 218 219 /* 220 * Add mp to the list of ceiling mutexes owned by curthread. 221 * Return ENOMEM if no memory could be allocated. 222 */ 223 int 224 _ceil_mylist_add(mutex_t *mp) 225 { 226 ulwp_t *self = curthread; 227 mxchain_t *mcp; 228 229 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 230 return (ENOMEM); 231 mcp->mxchain_mx = mp; 232 mcp->mxchain_next = self->ul_mxchain; 233 self->ul_mxchain = mcp; 234 return (0); 235 } 236 237 /* 238 * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below. 239 */ 240 static void 241 set_rt_priority(ulwp_t *self, int prio) 242 { 243 pcparms_t pcparm; 244 245 pcparm.pc_cid = self->ul_rtclassid; 246 ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; 247 ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; 248 (void) priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm); 249 } 250 251 /* 252 * Inherit priority from ceiling. 253 * This changes the effective priority, not the assigned priority. 254 */ 255 void 256 _ceil_prio_inherit(int prio) 257 { 258 ulwp_t *self = curthread; 259 260 self->ul_epri = prio; 261 set_rt_priority(self, prio); 262 } 263 264 /* 265 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 266 * if holding at least one ceiling lock. If no ceiling locks are held at this 267 * point, disinherit completely, reverting back to assigned priority. 268 */ 269 void 270 _ceil_prio_waive(void) 271 { 272 ulwp_t *self = curthread; 273 mxchain_t *mcp = self->ul_mxchain; 274 int prio; 275 276 if (mcp == NULL) { 277 prio = self->ul_pri; 278 self->ul_epri = 0; 279 } else { 280 prio = mcp->mxchain_mx->mutex_ceiling; 281 self->ul_epri = prio; 282 } 283 set_rt_priority(self, prio); 284 } 285 286 /* 287 * Clear the lock byte. Retain the waiters byte and the spinners byte. 288 * Return the old value of the lock word. 289 */ 290 static uint32_t 291 clear_lockbyte(volatile uint32_t *lockword) 292 { 293 uint32_t old; 294 uint32_t new; 295 296 do { 297 old = *lockword; 298 new = old & ~LOCKMASK; 299 } while (atomic_cas_32(lockword, old, new) != old); 300 301 return (old); 302 } 303 304 /* 305 * Same as clear_lockbyte(), but operates on mutex_lockword64. 306 * The mutex_ownerpid field is cleared along with the lock byte. 307 */ 308 static uint64_t 309 clear_lockbyte64(volatile uint64_t *lockword64) 310 { 311 uint64_t old; 312 uint64_t new; 313 314 do { 315 old = *lockword64; 316 new = old & ~LOCKMASK64; 317 } while (atomic_cas_64(lockword64, old, new) != old); 318 319 return (old); 320 } 321 322 /* 323 * Similar to set_lock_byte(), which only tries to set the lock byte. 324 * Here, we attempt to set the lock byte AND the mutex_ownerpid, 325 * keeping the remaining bytes constant. 326 */ 327 static int 328 set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid) 329 { 330 uint64_t old; 331 uint64_t new; 332 333 old = *lockword64 & ~LOCKMASK64; 334 new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64; 335 if (atomic_cas_64(lockword64, old, new) == old) 336 return (LOCKCLEAR); 337 338 return (LOCKSET); 339 } 340 341 /* 342 * Increment the spinners count in the mutex lock word. 343 * Return 0 on success. Return -1 if the count would overflow. 344 */ 345 static int 346 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 347 { 348 uint32_t old; 349 uint32_t new; 350 351 do { 352 old = *lockword; 353 if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 354 return (-1); 355 new = old + (1 << SPINNERSHIFT); 356 } while (atomic_cas_32(lockword, old, new) != old); 357 358 return (0); 359 } 360 361 /* 362 * Decrement the spinners count in the mutex lock word. 363 * Return the new value of the lock word. 364 */ 365 static uint32_t 366 spinners_decr(volatile uint32_t *lockword) 367 { 368 uint32_t old; 369 uint32_t new; 370 371 do { 372 new = old = *lockword; 373 if (new & SPINNERMASK) 374 new -= (1 << SPINNERSHIFT); 375 } while (atomic_cas_32(lockword, old, new) != old); 376 377 return (new); 378 } 379 380 /* 381 * Non-preemptive spin locks. Used by queue_lock(). 382 * No lock statistics are gathered for these locks. 383 * No DTrace probes are provided for these locks. 384 */ 385 void 386 spin_lock_set(mutex_t *mp) 387 { 388 ulwp_t *self = curthread; 389 390 no_preempt(self); 391 if (set_lock_byte(&mp->mutex_lockw) == 0) { 392 mp->mutex_owner = (uintptr_t)self; 393 return; 394 } 395 /* 396 * Spin for a while, attempting to acquire the lock. 397 */ 398 INCR32(self->ul_spin_lock_spin); 399 if (mutex_queuelock_adaptive(mp) == 0 || 400 set_lock_byte(&mp->mutex_lockw) == 0) { 401 mp->mutex_owner = (uintptr_t)self; 402 return; 403 } 404 /* 405 * Try harder if we were previously at a no premption level. 406 */ 407 if (self->ul_preempt > 1) { 408 INCR32(self->ul_spin_lock_spin2); 409 if (mutex_queuelock_adaptive(mp) == 0 || 410 set_lock_byte(&mp->mutex_lockw) == 0) { 411 mp->mutex_owner = (uintptr_t)self; 412 return; 413 } 414 } 415 /* 416 * Give up and block in the kernel for the mutex. 417 */ 418 INCR32(self->ul_spin_lock_sleep); 419 (void) ___lwp_mutex_timedlock(mp, NULL); 420 mp->mutex_owner = (uintptr_t)self; 421 } 422 423 void 424 spin_lock_clear(mutex_t *mp) 425 { 426 ulwp_t *self = curthread; 427 428 mp->mutex_owner = 0; 429 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 430 (void) ___lwp_mutex_wakeup(mp, 0); 431 INCR32(self->ul_spin_lock_wakeup); 432 } 433 preempt(self); 434 } 435 436 /* 437 * Allocate the sleep queue hash table. 438 */ 439 void 440 queue_alloc(void) 441 { 442 ulwp_t *self = curthread; 443 uberdata_t *udp = self->ul_uberdata; 444 queue_head_t *qp; 445 void *data; 446 int i; 447 448 /* 449 * No locks are needed; we call here only when single-threaded. 450 */ 451 ASSERT(self == udp->ulwp_one); 452 ASSERT(!udp->uberflags.uf_mt); 453 if ((data = mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 454 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 455 == MAP_FAILED) 456 thr_panic("cannot allocate thread queue_head table"); 457 udp->queue_head = qp = (queue_head_t *)data; 458 for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { 459 qp->qh_type = (i < QHASHSIZE)? MX : CV; 460 qp->qh_lock.mutex_flag = LOCK_INITED; 461 qp->qh_lock.mutex_magic = MUTEX_MAGIC; 462 qp->qh_hlist = &qp->qh_def_root; 463 #if defined(THREAD_DEBUG) 464 qp->qh_hlen = 1; 465 qp->qh_hmax = 1; 466 #endif 467 } 468 } 469 470 #if defined(THREAD_DEBUG) 471 472 /* 473 * Debugging: verify correctness of a sleep queue. 474 */ 475 void 476 QVERIFY(queue_head_t *qp) 477 { 478 ulwp_t *self = curthread; 479 uberdata_t *udp = self->ul_uberdata; 480 queue_root_t *qrp; 481 ulwp_t *ulwp; 482 ulwp_t *prev; 483 uint_t index; 484 uint32_t cnt; 485 char qtype; 486 void *wchan; 487 488 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 489 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 490 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 491 cnt++; 492 ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) || 493 (qrp->qr_head == NULL && qrp->qr_tail == NULL)); 494 } 495 ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt); 496 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 497 ASSERT(qp->qh_type == qtype); 498 if (!thread_queue_verify) 499 return; 500 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 501 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 502 for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL; 503 prev = ulwp, ulwp = ulwp->ul_link) { 504 cnt++; 505 if (ulwp->ul_writer) 506 ASSERT(prev == NULL || prev->ul_writer); 507 ASSERT(ulwp->ul_qtype == qtype); 508 ASSERT(ulwp->ul_wchan != NULL); 509 ASSERT(ulwp->ul_sleepq == qp); 510 wchan = ulwp->ul_wchan; 511 ASSERT(qrp->qr_wchan == wchan); 512 index = QUEUE_HASH(wchan, qtype); 513 ASSERT(&udp->queue_head[index] == qp); 514 } 515 ASSERT(qrp->qr_tail == prev); 516 } 517 ASSERT(qp->qh_qlen == cnt); 518 } 519 520 #else /* THREAD_DEBUG */ 521 522 #define QVERIFY(qp) 523 524 #endif /* THREAD_DEBUG */ 525 526 /* 527 * Acquire a queue head. 528 */ 529 queue_head_t * 530 queue_lock(void *wchan, int qtype) 531 { 532 uberdata_t *udp = curthread->ul_uberdata; 533 queue_head_t *qp; 534 queue_root_t *qrp; 535 536 ASSERT(qtype == MX || qtype == CV); 537 538 /* 539 * It is possible that we could be called while still single-threaded. 540 * If so, we call queue_alloc() to allocate the queue_head[] array. 541 */ 542 if ((qp = udp->queue_head) == NULL) { 543 queue_alloc(); 544 qp = udp->queue_head; 545 } 546 qp += QUEUE_HASH(wchan, qtype); 547 spin_lock_set(&qp->qh_lock); 548 for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) 549 if (qrp->qr_wchan == wchan) 550 break; 551 if (qrp == NULL && qp->qh_def_root.qr_head == NULL) { 552 /* the default queue root is available; use it */ 553 qrp = &qp->qh_def_root; 554 qrp->qr_wchan = wchan; 555 ASSERT(qrp->qr_next == NULL); 556 ASSERT(qrp->qr_tail == NULL && 557 qrp->qr_rtcount == 0 && qrp->qr_qlen == 0); 558 } 559 qp->qh_wchan = wchan; /* valid until queue_unlock() is called */ 560 qp->qh_root = qrp; /* valid until queue_unlock() is called */ 561 INCR32(qp->qh_lockcount); 562 QVERIFY(qp); 563 return (qp); 564 } 565 566 /* 567 * Release a queue head. 568 */ 569 void 570 queue_unlock(queue_head_t *qp) 571 { 572 QVERIFY(qp); 573 spin_lock_clear(&qp->qh_lock); 574 } 575 576 /* 577 * For rwlock queueing, we must queue writers ahead of readers of the 578 * same priority. We do this by making writers appear to have a half 579 * point higher priority for purposes of priority comparisons below. 580 */ 581 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 582 583 void 584 enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo) 585 { 586 queue_root_t *qrp; 587 ulwp_t **ulwpp; 588 ulwp_t *next; 589 int pri = CMP_PRIO(ulwp); 590 591 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 592 ASSERT(ulwp->ul_sleepq != qp); 593 594 if ((qrp = qp->qh_root) == NULL) { 595 /* use the thread's queue root for the linkage */ 596 qrp = &ulwp->ul_queue_root; 597 qrp->qr_next = qp->qh_hlist; 598 qrp->qr_prev = NULL; 599 qrp->qr_head = NULL; 600 qrp->qr_tail = NULL; 601 qrp->qr_wchan = qp->qh_wchan; 602 qrp->qr_rtcount = 0; 603 qrp->qr_qlen = 0; 604 qrp->qr_qmax = 0; 605 qp->qh_hlist->qr_prev = qrp; 606 qp->qh_hlist = qrp; 607 qp->qh_root = qrp; 608 MAXINCR(qp->qh_hmax, qp->qh_hlen); 609 } 610 611 /* 612 * LIFO queue ordering is unfair and can lead to starvation, 613 * but it gives better performance for heavily contended locks. 614 * We use thread_queue_fifo (range is 0..8) to determine 615 * the frequency of FIFO vs LIFO queuing: 616 * 0 : every 256th time (almost always LIFO) 617 * 1 : every 128th time 618 * 2 : every 64th time 619 * 3 : every 32nd time 620 * 4 : every 16th time (the default value, mostly LIFO) 621 * 5 : every 8th time 622 * 6 : every 4th time 623 * 7 : every 2nd time 624 * 8 : every time (never LIFO, always FIFO) 625 * Note that there is always some degree of FIFO ordering. 626 * This breaks live lock conditions that occur in applications 627 * that are written assuming (incorrectly) that threads acquire 628 * locks fairly, that is, in roughly round-robin order. 629 * In any event, the queue is maintained in kernel priority order. 630 * 631 * If force_fifo is non-zero, fifo queueing is forced. 632 * SUSV3 requires this for semaphores. 633 */ 634 if (qrp->qr_head == NULL) { 635 /* 636 * The queue is empty. LIFO/FIFO doesn't matter. 637 */ 638 ASSERT(qrp->qr_tail == NULL); 639 ulwpp = &qrp->qr_head; 640 } else if (force_fifo | 641 (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) { 642 /* 643 * Enqueue after the last thread whose priority is greater 644 * than or equal to the priority of the thread being queued. 645 * Attempt first to go directly onto the tail of the queue. 646 */ 647 if (pri <= CMP_PRIO(qrp->qr_tail)) 648 ulwpp = &qrp->qr_tail->ul_link; 649 else { 650 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 651 ulwpp = &next->ul_link) 652 if (pri > CMP_PRIO(next)) 653 break; 654 } 655 } else { 656 /* 657 * Enqueue before the first thread whose priority is less 658 * than or equal to the priority of the thread being queued. 659 * Hopefully we can go directly onto the head of the queue. 660 */ 661 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 662 ulwpp = &next->ul_link) 663 if (pri >= CMP_PRIO(next)) 664 break; 665 } 666 if ((ulwp->ul_link = *ulwpp) == NULL) 667 qrp->qr_tail = ulwp; 668 *ulwpp = ulwp; 669 670 ulwp->ul_sleepq = qp; 671 ulwp->ul_wchan = qp->qh_wchan; 672 ulwp->ul_qtype = qp->qh_type; 673 if ((ulwp->ul_schedctl != NULL && 674 ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) | 675 ulwp->ul_pilocks) { 676 ulwp->ul_rtqueued = 1; 677 qrp->qr_rtcount++; 678 } 679 MAXINCR(qrp->qr_qmax, qrp->qr_qlen); 680 MAXINCR(qp->qh_qmax, qp->qh_qlen); 681 } 682 683 /* 684 * Helper function for queue_slot() and queue_slot_rt(). 685 * Try to find a non-suspended thread on the queue. 686 */ 687 static ulwp_t ** 688 queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt) 689 { 690 ulwp_t *ulwp; 691 ulwp_t **foundpp = NULL; 692 int priority = -1; 693 ulwp_t *prev; 694 int tpri; 695 696 for (prev = NULL; 697 (ulwp = *ulwpp) != NULL; 698 prev = ulwp, ulwpp = &ulwp->ul_link) { 699 if (ulwp->ul_stop) /* skip suspended threads */ 700 continue; 701 tpri = rt? CMP_PRIO(ulwp) : 0; 702 if (tpri > priority) { 703 foundpp = ulwpp; 704 *prevp = prev; 705 priority = tpri; 706 if (!rt) 707 break; 708 } 709 } 710 return (foundpp); 711 } 712 713 /* 714 * For real-time, we search the entire queue because the dispatch 715 * (kernel) priorities may have changed since enqueueing. 716 */ 717 static ulwp_t ** 718 queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp) 719 { 720 ulwp_t **ulwpp = ulwpp_org; 721 ulwp_t *ulwp = *ulwpp; 722 ulwp_t **foundpp = ulwpp; 723 int priority = CMP_PRIO(ulwp); 724 ulwp_t *prev; 725 int tpri; 726 727 for (prev = ulwp, ulwpp = &ulwp->ul_link; 728 (ulwp = *ulwpp) != NULL; 729 prev = ulwp, ulwpp = &ulwp->ul_link) { 730 tpri = CMP_PRIO(ulwp); 731 if (tpri > priority) { 732 foundpp = ulwpp; 733 *prevp = prev; 734 priority = tpri; 735 } 736 } 737 ulwp = *foundpp; 738 739 /* 740 * Try not to return a suspended thread. 741 * This mimics the old libthread's behavior. 742 */ 743 if (ulwp->ul_stop && 744 (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) { 745 foundpp = ulwpp; 746 ulwp = *foundpp; 747 } 748 ulwp->ul_rt = 1; 749 return (foundpp); 750 } 751 752 ulwp_t ** 753 queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more) 754 { 755 queue_root_t *qrp; 756 ulwp_t **ulwpp; 757 ulwp_t *ulwp; 758 int rt; 759 760 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 761 762 if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) { 763 *more = 0; 764 return (NULL); /* no lwps on the queue */ 765 } 766 rt = (qrp->qr_rtcount != 0); 767 *prevp = NULL; 768 if (ulwp->ul_link == NULL) { /* only one lwp on the queue */ 769 *more = 0; 770 ulwp->ul_rt = rt; 771 return (&qrp->qr_head); 772 } 773 *more = 1; 774 775 if (rt) /* real-time queue */ 776 return (queue_slot_rt(&qrp->qr_head, prevp)); 777 /* 778 * Try not to return a suspended thread. 779 * This mimics the old libthread's behavior. 780 */ 781 if (ulwp->ul_stop && 782 (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) { 783 ulwp = *ulwpp; 784 ulwp->ul_rt = 0; 785 return (ulwpp); 786 } 787 /* 788 * The common case; just pick the first thread on the queue. 789 */ 790 ulwp->ul_rt = 0; 791 return (&qrp->qr_head); 792 } 793 794 /* 795 * Common code for unlinking an lwp from a user-level sleep queue. 796 */ 797 void 798 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 799 { 800 queue_root_t *qrp = qp->qh_root; 801 queue_root_t *nqrp; 802 ulwp_t *ulwp = *ulwpp; 803 ulwp_t *next; 804 805 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 806 ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan); 807 808 DECR(qp->qh_qlen); 809 DECR(qrp->qr_qlen); 810 if (ulwp->ul_rtqueued) { 811 ulwp->ul_rtqueued = 0; 812 qrp->qr_rtcount--; 813 } 814 next = ulwp->ul_link; 815 *ulwpp = next; 816 ulwp->ul_link = NULL; 817 if (qrp->qr_tail == ulwp) 818 qrp->qr_tail = prev; 819 if (qrp == &ulwp->ul_queue_root) { 820 /* 821 * We can't continue to use the unlinked thread's 822 * queue root for the linkage. 823 */ 824 queue_root_t *qr_next = qrp->qr_next; 825 queue_root_t *qr_prev = qrp->qr_prev; 826 827 if (qrp->qr_tail) { 828 /* switch to using the last thread's queue root */ 829 ASSERT(qrp->qr_qlen != 0); 830 nqrp = &qrp->qr_tail->ul_queue_root; 831 *nqrp = *qrp; 832 if (qr_next) 833 qr_next->qr_prev = nqrp; 834 if (qr_prev) 835 qr_prev->qr_next = nqrp; 836 else 837 qp->qh_hlist = nqrp; 838 qp->qh_root = nqrp; 839 } else { 840 /* empty queue root; just delete from the hash list */ 841 ASSERT(qrp->qr_qlen == 0); 842 if (qr_next) 843 qr_next->qr_prev = qr_prev; 844 if (qr_prev) 845 qr_prev->qr_next = qr_next; 846 else 847 qp->qh_hlist = qr_next; 848 qp->qh_root = NULL; 849 DECR(qp->qh_hlen); 850 } 851 } 852 } 853 854 ulwp_t * 855 dequeue(queue_head_t *qp, int *more) 856 { 857 ulwp_t **ulwpp; 858 ulwp_t *ulwp; 859 ulwp_t *prev; 860 861 if ((ulwpp = queue_slot(qp, &prev, more)) == NULL) 862 return (NULL); 863 ulwp = *ulwpp; 864 queue_unlink(qp, ulwpp, prev); 865 ulwp->ul_sleepq = NULL; 866 ulwp->ul_wchan = NULL; 867 return (ulwp); 868 } 869 870 /* 871 * Return a pointer to the highest priority thread sleeping on wchan. 872 */ 873 ulwp_t * 874 queue_waiter(queue_head_t *qp) 875 { 876 ulwp_t **ulwpp; 877 ulwp_t *prev; 878 int more; 879 880 if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL) 881 return (NULL); 882 return (*ulwpp); 883 } 884 885 int 886 dequeue_self(queue_head_t *qp) 887 { 888 ulwp_t *self = curthread; 889 queue_root_t *qrp; 890 ulwp_t **ulwpp; 891 ulwp_t *ulwp; 892 ulwp_t *prev; 893 int found = 0; 894 895 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 896 897 /* find self on the sleep queue */ 898 if ((qrp = qp->qh_root) != NULL) { 899 for (prev = NULL, ulwpp = &qrp->qr_head; 900 (ulwp = *ulwpp) != NULL; 901 prev = ulwp, ulwpp = &ulwp->ul_link) { 902 if (ulwp == self) { 903 queue_unlink(qp, ulwpp, prev); 904 self->ul_cvmutex = NULL; 905 self->ul_sleepq = NULL; 906 self->ul_wchan = NULL; 907 found = 1; 908 break; 909 } 910 } 911 } 912 913 if (!found) 914 thr_panic("dequeue_self(): curthread not found on queue"); 915 916 return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL); 917 } 918 919 /* 920 * Called from call_user_handler() and _thrp_suspend() to take 921 * ourself off of our sleep queue so we can grab locks. 922 */ 923 void 924 unsleep_self(void) 925 { 926 ulwp_t *self = curthread; 927 queue_head_t *qp; 928 929 /* 930 * Calling enter_critical()/exit_critical() here would lead 931 * to recursion. Just manipulate self->ul_critical directly. 932 */ 933 self->ul_critical++; 934 while (self->ul_sleepq != NULL) { 935 qp = queue_lock(self->ul_wchan, self->ul_qtype); 936 /* 937 * We may have been moved from a CV queue to a 938 * mutex queue while we were attempting queue_lock(). 939 * If so, just loop around and try again. 940 * dequeue_self() clears self->ul_sleepq. 941 */ 942 if (qp == self->ul_sleepq) 943 (void) dequeue_self(qp); 944 queue_unlock(qp); 945 } 946 self->ul_writer = 0; 947 self->ul_critical--; 948 } 949 950 /* 951 * Common code for calling the the ___lwp_mutex_timedlock() system call. 952 * Returns with mutex_owner and mutex_ownerpid set correctly. 953 */ 954 static int 955 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 956 { 957 ulwp_t *self = curthread; 958 uberdata_t *udp = self->ul_uberdata; 959 int mtype = mp->mutex_type; 960 hrtime_t begin_sleep; 961 int acquired; 962 int error; 963 964 self->ul_sp = stkptr(); 965 self->ul_wchan = mp; 966 if (__td_event_report(self, TD_SLEEP, udp)) { 967 self->ul_td_evbuf.eventnum = TD_SLEEP; 968 self->ul_td_evbuf.eventdata = mp; 969 tdb_event(TD_SLEEP, udp); 970 } 971 if (msp) { 972 tdb_incr(msp->mutex_sleep); 973 begin_sleep = gethrtime(); 974 } 975 976 DTRACE_PROBE1(plockstat, mutex__block, mp); 977 978 for (;;) { 979 /* 980 * A return value of EOWNERDEAD or ELOCKUNMAPPED 981 * means we successfully acquired the lock. 982 */ 983 if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 984 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 985 acquired = 0; 986 break; 987 } 988 989 if (mtype & USYNC_PROCESS) { 990 /* 991 * Defend against forkall(). We may be the child, 992 * in which case we don't actually own the mutex. 993 */ 994 enter_critical(self); 995 if (mp->mutex_ownerpid == udp->pid) { 996 mp->mutex_owner = (uintptr_t)self; 997 exit_critical(self); 998 acquired = 1; 999 break; 1000 } 1001 exit_critical(self); 1002 } else { 1003 mp->mutex_owner = (uintptr_t)self; 1004 acquired = 1; 1005 break; 1006 } 1007 } 1008 if (msp) 1009 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1010 self->ul_wchan = NULL; 1011 self->ul_sp = 0; 1012 1013 if (acquired) { 1014 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1015 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1016 } else { 1017 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1018 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1019 } 1020 1021 return (error); 1022 } 1023 1024 /* 1025 * Common code for calling the ___lwp_mutex_trylock() system call. 1026 * Returns with mutex_owner and mutex_ownerpid set correctly. 1027 */ 1028 int 1029 mutex_trylock_kernel(mutex_t *mp) 1030 { 1031 ulwp_t *self = curthread; 1032 uberdata_t *udp = self->ul_uberdata; 1033 int mtype = mp->mutex_type; 1034 int error; 1035 int acquired; 1036 1037 for (;;) { 1038 /* 1039 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1040 * means we successfully acquired the lock. 1041 */ 1042 if ((error = ___lwp_mutex_trylock(mp)) != 0 && 1043 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1044 acquired = 0; 1045 break; 1046 } 1047 1048 if (mtype & USYNC_PROCESS) { 1049 /* 1050 * Defend against forkall(). We may be the child, 1051 * in which case we don't actually own the mutex. 1052 */ 1053 enter_critical(self); 1054 if (mp->mutex_ownerpid == udp->pid) { 1055 mp->mutex_owner = (uintptr_t)self; 1056 exit_critical(self); 1057 acquired = 1; 1058 break; 1059 } 1060 exit_critical(self); 1061 } else { 1062 mp->mutex_owner = (uintptr_t)self; 1063 acquired = 1; 1064 break; 1065 } 1066 } 1067 1068 if (acquired) { 1069 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1070 } else if (error != EBUSY) { 1071 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1072 } 1073 1074 return (error); 1075 } 1076 1077 volatile sc_shared_t * 1078 setup_schedctl(void) 1079 { 1080 ulwp_t *self = curthread; 1081 volatile