Home | History | Annotate | Download | only in threads
      1      0    stevel /*
      2      0    stevel  * CDDL HEADER START
      3      0    stevel  *
      4      0    stevel  * The contents of this file are subject to the terms of the
      5   1893       raf  * Common Development and Distribution License (the "License").
      6   1893       raf  * You may not use this file except in compliance with the License.
      7      0    stevel  *
      8      0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0    stevel  * or http://www.opensolaris.org/os/licensing.
     10      0    stevel  * See the License for the specific language governing permissions
     11      0    stevel  * and limitations under the License.
     12      0    stevel  *
     13      0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0    stevel  *
     19      0    stevel  * CDDL HEADER END
     20      0    stevel  */
     21   1219       raf 
     22      0    stevel /*
     23   9170     Roger  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24      0    stevel  * Use is subject to license terms.
     25      0    stevel  */
     26      0    stevel 
     27      0    stevel #include "lint.h"
     28      0    stevel #include "thr_uberdata.h"
     29   6247       raf #include <sys/rtpriocntl.h>
     30   6057       raf #include <sys/sdt.h>
     31   6057       raf #include <atomic.h>
     32   6247       raf 
     33   6247       raf #if defined(THREAD_DEBUG)
     34   6247       raf #define	INCR32(x)	(((x) != UINT32_MAX)? (x)++ : 0)
     35   6247       raf #define	INCR(x)		((x)++)
     36   6247       raf #define	DECR(x)		((x)--)
     37   6247       raf #define	MAXINCR(m, x)	((m < ++x)? (m = x) : 0)
     38   6247       raf #else
     39   6247       raf #define	INCR32(x)
     40   6247       raf #define	INCR(x)
     41   6247       raf #define	DECR(x)
     42   6247       raf #define	MAXINCR(m, x)
     43   6247       raf #endif
     44      0    stevel 
     45      0    stevel /*
     46      0    stevel  * This mutex is initialized to be held by lwp#1.
     47      0    stevel  * It is used to block a thread that has returned from a mutex_lock()
     48   4574       raf  * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error.
     49      0    stevel  */
     50      0    stevel mutex_t	stall_mutex = DEFAULTMUTEX;
     51      0    stevel 
     52      0    stevel static int shared_mutex_held(mutex_t *);
     53   4574       raf static int mutex_queuelock_adaptive(mutex_t *);
     54   4574       raf static void mutex_wakeup_all(mutex_t *);
     55      0    stevel 
     56      0    stevel /*
     57      0    stevel  * Lock statistics support functions.
     58      0    stevel  */
     59      0    stevel void
     60      0    stevel record_begin_hold(tdb_mutex_stats_t *msp)
     61      0    stevel {
     62      0    stevel 	tdb_incr(msp->mutex_lock);
     63      0    stevel 	msp->mutex_begin_hold = gethrtime();
     64      0    stevel }
     65      0    stevel 
     66      0    stevel hrtime_t
     67      0    stevel record_hold_time(tdb_mutex_stats_t *msp)
     68      0    stevel {
     69      0    stevel 	hrtime_t now = gethrtime();
     70      0    stevel 
     71      0    stevel 	if (msp->mutex_begin_hold)
     72      0    stevel 		msp->mutex_hold_time += now - msp->mutex_begin_hold;
     73      0    stevel 	msp->mutex_begin_hold = 0;
     74      0    stevel 	return (now);
     75      0    stevel }
     76      0    stevel 
     77      0    stevel /*
     78      0    stevel  * Called once at library initialization.
     79      0    stevel  */
     80      0    stevel void
     81      0    stevel mutex_setup(void)
     82      0    stevel {
     83      0    stevel 	if (set_lock_byte(&stall_mutex.mutex_lockw))
     84      0    stevel 		thr_panic("mutex_setup() cannot acquire stall_mutex");
     85      0    stevel 	stall_mutex.mutex_owner = (uintptr_t)curthread;
     86      0    stevel }
     87      0    stevel 
     88      0    stevel /*
     89   5629       raf  * The default spin count of 1000 is experimentally determined.
     90   5629       raf  * On sun4u machines with any number of processors it could be raised
     91      0    stevel  * to 10,000 but that (experimentally) makes almost no difference.
     92   5629       raf  * The environment variable:
     93      0    stevel  *	_THREAD_ADAPTIVE_SPIN=count
     94   5629       raf  * can be used to override and set the count in the range [0 .. 1,000,000].
     95      0    stevel  */
     96      0    stevel int	thread_adaptive_spin = 1000;
     97      0    stevel uint_t	thread_max_spinners = 100;
     98      0    stevel int	thread_queue_verify = 0;
     99      0    stevel static	int	ncpus;
    100      0    stevel 
    101      0    stevel /*
    102      0    stevel  * Distinguish spinning for queue locks from spinning for regular locks.
    103   5629       raf  * We try harder to acquire queue locks by spinning.
    104      0    stevel  * The environment variable:
    105      0    stevel  *	_THREAD_QUEUE_SPIN=count
    106      0    stevel  * can be used to override and set the count in the range [0 .. 1,000,000].
    107      0    stevel  */
    108   5629       raf int	thread_queue_spin = 10000;
    109      0    stevel 
    110   4574       raf #define	ALL_ATTRIBUTES				\
    111   4574       raf 	(LOCK_RECURSIVE | LOCK_ERRORCHECK |	\
    112   4574       raf 	LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT |	\
    113   4574       raf 	LOCK_ROBUST)
    114      0    stevel 
    115      0    stevel /*
    116   4574       raf  * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST,
    117   4574       raf  * augmented by zero or more the flags:
    118   4574       raf  *	LOCK_RECURSIVE
    119   4574       raf  *	LOCK_ERRORCHECK
    120   4574       raf  *	LOCK_PRIO_INHERIT
    121   4574       raf  *	LOCK_PRIO_PROTECT
    122   4574       raf  *	LOCK_ROBUST
    123      0    stevel  */
    124   6812       raf #pragma weak _mutex_init = mutex_init
    125      0    stevel /* ARGSUSED2 */
    126      0    stevel int
    127   6812       raf mutex_init(mutex_t *mp, int type, void *arg)
    128      0    stevel {
    129   4574       raf 	int basetype = (type & ~ALL_ATTRIBUTES);
    130   6247       raf 	const pcclass_t *pccp;
    131   4574       raf 	int error = 0;
    132   6247       raf 	int ceil;
    133      0    stevel 
    134   4574       raf 	if (basetype == USYNC_PROCESS_ROBUST) {
    135   4574       raf 		/*
    136   4574       raf 		 * USYNC_PROCESS_ROBUST is a deprecated historical type.
    137   4574       raf 		 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but
    138   4574       raf 		 * retain the USYNC_PROCESS_ROBUST flag so we can return
    139   4574       raf 		 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST
    140   4574       raf 		 * mutexes will ever draw ELOCKUNMAPPED).
    141   4574       raf 		 */
    142   4574       raf 		type |= (USYNC_PROCESS | LOCK_ROBUST);
    143   4574       raf 		basetype = USYNC_PROCESS;
    144   4574       raf 	}
    145   4574       raf 
    146   6247       raf 	if (type & LOCK_PRIO_PROTECT)
    147   6247       raf 		pccp = get_info_by_policy(SCHED_FIFO);
    148   6247       raf 	if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) ||
    149   4574       raf 	    (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT))
    150   6247       raf 	    == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) ||
    151   6247       raf 	    ((type & LOCK_PRIO_PROTECT) &&
    152   6247       raf 	    ((ceil = *(int *)arg) < pccp->pcc_primin ||
    153   6247       raf 	    ceil > pccp->pcc_primax))) {
    154   4574       raf 		error = EINVAL;
    155   4574       raf 	} else if (type & LOCK_ROBUST) {
    156   4574       raf 		/*
    157   4574       raf 		 * Callers of mutex_init() with the LOCK_ROBUST attribute
    158   4574       raf 		 * are required to pass an initially all-zero mutex.
    159   4574       raf 		 * Multiple calls to mutex_init() are allowed; all but
    160   4574       raf 		 * the first return EBUSY.  A call to mutex_init() is
    161   4574       raf 		 * allowed to make an inconsistent robust lock consistent
    162   4574       raf 		 * (for historical usage, even though the proper interface
    163   4574       raf 		 * for this is mutex_consistent()).  Note that we use
    164   4574       raf 		 * atomic_or_16() to set the LOCK_INITED flag so as
    165   4574       raf 		 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc).
    166   4574       raf 		 */
    167   4574       raf 		if (!(mp->mutex_flag & LOCK_INITED)) {
    168   4574       raf 			mp->mutex_type = (uint8_t)type;
    169   6812       raf 			atomic_or_16(&mp->mutex_flag, LOCK_INITED);
    170   4574       raf 			mp->mutex_magic = MUTEX_MAGIC;
    171   4574       raf 		} else if (type != mp->mutex_type ||
    172   6247       raf 		    ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) {
    173   4574       raf 			error = EINVAL;
    174   6812       raf 		} else if (mutex_consistent(mp) != 0) {
    175   4574       raf 			error = EBUSY;
    176   4574       raf 		}
    177   4574       raf 		/* register a process robust mutex with the kernel */
    178   4574       raf 		if (basetype == USYNC_PROCESS)
    179   4574       raf 			register_lock(mp);
    180   4574       raf 	} else {
    181   6515       raf 		(void) memset(mp, 0, sizeof (*mp));
    182      0    stevel 		mp->mutex_type = (uint8_t)type;
    183      0    stevel 		mp->mutex_flag = LOCK_INITED;
    184   4574       raf 		mp->mutex_magic = MUTEX_MAGIC;
    185      0    stevel 	}
    186   4574       raf 
    187   6247       raf 	if (error == 0 && (type & LOCK_PRIO_PROTECT)) {
    188   6247       raf 		mp->mutex_ceiling = ceil;
    189   6247       raf 	}
    190   4574       raf 
    191   7255       raf 	/*
    192   7255       raf 	 * This should be at the beginning of the function,
    193   7255       raf 	 * but for the sake of old broken applications that
    194   7255       raf 	 * do not have proper alignment for their mutexes
    195   7255       raf 	 * (and don't check the return code from mutex_init),
    196   7255       raf 	 * we put it here, after initializing the mutex regardless.
    197   7255       raf 	 */
    198   7255       raf 	if (error == 0 &&
    199   7255       raf 	    ((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) &&
    200   7255       raf 	    curthread->ul_misaligned == 0)
    201   7255       raf 		error = EINVAL;
    202   7255       raf 
    203      0    stevel 	return (error);
    204      0    stevel }
    205      0    stevel 
    206      0    stevel /*
    207   6247       raf  * Delete mp from list of ceiling mutexes owned by curthread.
    208      0    stevel  * Return 1 if the head of the chain was updated.
    209      0    stevel  */
    210      0    stevel int
    211      0    stevel _ceil_mylist_del(mutex_t *mp)
    212      0    stevel {
    213      0    stevel 	ulwp_t *self = curthread;
    214      0    stevel 	mxchain_t **mcpp;
    215      0    stevel 	mxchain_t *mcp;
    216      0    stevel 
    217   6247       raf 	for (mcpp = &self->ul_mxchain;
    218   6247       raf 	    (mcp = *mcpp) != NULL;
    219   6247       raf 	    mcpp = &mcp->mxchain_next) {
    220   6247       raf 		if (mcp->mxchain_mx == mp) {
    221   6247       raf 			*mcpp = mcp->mxchain_next;
    222   6247       raf 			lfree(mcp, sizeof (*mcp));
    223   6247       raf 			return (mcpp == &self->ul_mxchain);
    224   6247       raf 		}
    225   6247       raf 	}
    226   6247       raf 	return (0);
    227      0    stevel }
    228      0    stevel 
    229      0    stevel /*
    230   6247       raf  * Add mp to the list of ceiling mutexes owned by curthread.
    231      0    stevel  * Return ENOMEM if no memory could be allocated.
    232      0    stevel  */
    233      0    stevel int
    234      0    stevel _ceil_mylist_add(mutex_t *mp)
    235      0    stevel {
    236      0    stevel 	ulwp_t *self = curthread;
    237      0    stevel 	mxchain_t *mcp;
    238      0    stevel 
    239      0    stevel 	if ((mcp = lmalloc(sizeof (*mcp))) == NULL)
    240      0    stevel 		return (ENOMEM);
    241      0    stevel 	mcp->mxchain_mx = mp;
    242      0    stevel 	mcp->mxchain_next = self->ul_mxchain;
    243      0    stevel 	self->ul_mxchain = mcp;
    244      0    stevel 	return (0);
    245      0    stevel }
    246      0    stevel 
    247      0    stevel /*
    248   6247       raf  * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below.
    249   6247       raf  */
    250   6247       raf static void
    251   6247       raf set_rt_priority(ulwp_t *self, int prio)
    252   6247       raf {
    253   6247       raf 	pcparms_t pcparm;
    254   6247       raf 
    255   6247       raf 	pcparm.pc_cid = self->ul_rtclassid;
    256   6247       raf 	((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE;
    257   6247       raf 	((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
    258   6515       raf 	(void) priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm);
    259   6247       raf }
    260   6247       raf 
    261   6247       raf /*
    262   6247       raf  * Inherit priority from ceiling.
    263   6247       raf  * This changes the effective priority, not the assigned priority.
    264      0    stevel  */
    265      0    stevel void
    266   6247       raf _ceil_prio_inherit(int prio)
    267      0    stevel {
    268      0    stevel 	ulwp_t *self = curthread;
    269      0    stevel 
    270   6247       raf 	self->ul_epri = prio;
    271   6247       raf 	set_rt_priority(self, prio);
    272      0    stevel }
    273      0    stevel 
    274      0    stevel /*
    275      0    stevel  * Waive inherited ceiling priority.  Inherit from head of owned ceiling locks
    276      0    stevel  * if holding at least one ceiling lock.  If no ceiling locks are held at this
    277      0    stevel  * point, disinherit completely, reverting back to assigned priority.
    278      0    stevel  */
    279      0    stevel void
    280      0    stevel _ceil_prio_waive(void)
    281      0    stevel {
    282      0    stevel 	ulwp_t *self = curthread;
    283   6247       raf 	mxchain_t *mcp = self->ul_mxchain;
    284   6247       raf 	int prio;
    285      0    stevel 
    286   6247       raf 	if (mcp == NULL) {
    287   6247       raf 		prio = self->ul_pri;
    288   6247       raf 		self->ul_epri = 0;
    289      0    stevel 	} else {
    290   6247       raf 		prio = mcp->mxchain_mx->mutex_ceiling;
    291   6247       raf 		self->ul_epri = prio;
    292      0    stevel 	}
    293   6247       raf 	set_rt_priority(self, prio);
    294      0    stevel }
    295      0    stevel 
    296      0    stevel /*
    297   5629       raf  * Clear the lock byte.  Retain the waiters byte and the spinners byte.
    298   5629       raf  * Return the old value of the lock word.
    299   5629       raf  */
    300   5629       raf static uint32_t
    301   5629       raf clear_lockbyte(volatile uint32_t *lockword)
    302   5629       raf {
    303   5629       raf 	uint32_t old;
    304   5629       raf 	uint32_t new;
    305   5629       raf 
    306   5629       raf 	do {
    307   5629       raf 		old = *lockword;
    308   5629       raf 		new = old & ~LOCKMASK;
    309   5629       raf 	} while (atomic_cas_32(lockword, old, new) != old);
    310   5629       raf 
    311   5629       raf 	return (old);
    312   6057       raf }
    313   6057       raf 
    314   6057       raf /*
    315   6057       raf  * Same as clear_lockbyte(), but operates on mutex_lockword64.
    316   6057       raf  * The mutex_ownerpid field is cleared along with the lock byte.
    317   6057       raf  */
    318   6057       raf static uint64_t
    319   6057       raf clear_lockbyte64(volatile uint64_t *lockword64)
    320   6057       raf {
    321   6057       raf 	uint64_t old;
    322   6057       raf 	uint64_t new;
    323   6057       raf 
    324   6057       raf 	do {
    325   6057       raf 		old = *lockword64;
    326   6057       raf 		new = old & ~LOCKMASK64;
    327   6057       raf 	} while (atomic_cas_64(lockword64, old, new) != old);
    328   6057       raf 
    329   6057       raf 	return (old);
    330   6057       raf }
    331   6057       raf 
    332   6057       raf /*
    333   6057       raf  * Similar to set_lock_byte(), which only tries to set the lock byte.
    334   7255       raf  * Here, we attempt to set the lock byte AND the mutex_ownerpid, keeping
    335   7255       raf  * the remaining bytes constant.  This atomic operation is required for the
    336   7255       raf  * correctness of process-shared robust locks, otherwise there would be
    337   7255       raf  * a window or vulnerability in which the lock byte had been set but the
    338   7255       raf  * mutex_ownerpid had not yet been set.  If the process were to die in
    339   7255       raf  * this window of vulnerability (due to some other thread calling exit()
    340   7255       raf  * or the process receiving a fatal signal), the mutex would be left locked
    341   7255       raf  * but without a process-ID to determine which process was holding the lock.
    342   7255       raf  * The kernel would then be unable to mark the robust mutex as LOCK_OWNERDEAD
    343   7255       raf  * when the process died.  For all other cases of process-shared locks, this
    344   7255       raf  * operation is just a convenience, for the sake of common code.
    345   7255       raf  *
    346   7255       raf  * This operation requires process-shared robust locks to be properly
    347   7255       raf  * aligned on an 8-byte boundary, at least on sparc machines, lest the
    348   7255       raf  * operation incur an alignment fault.  This is automatic when locks
    349   7255       raf  * are declared properly using the mutex_t or pthread_mutex_t data types
    350   7255       raf  * and the application does not allocate dynamic memory on less than an
    351   7255       raf  * 8-byte boundary.  See the 'horrible hack' comments below for cases
    352   7255       raf  * dealing with such broken applications.
    353   6057       raf  */
    354   6057       raf static int
    355   6057       raf set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid)
    356   6057       raf {
    357   6057       raf 	uint64_t old;
    358   6057       raf 	uint64_t new;
    359   6057       raf 
    360   6057       raf 	old = *lockword64 & ~LOCKMASK64;
    361   6057       raf 	new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64;
    362   6057       raf 	if (atomic_cas_64(lockword64, old, new) == old)
    363   6057       raf 		return (LOCKCLEAR);
    364   6057       raf 
    365   6057       raf 	return (LOCKSET);
    366   5629       raf }
    367   5629       raf 
    368   5629       raf /*
    369   5629       raf  * Increment the spinners count in the mutex lock word.
    370   5629       raf  * Return 0 on success.  Return -1 if the count would overflow.
    371   5629       raf  */
    372   5629       raf static int
    373   5629       raf spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners)
    374   5629       raf {
    375   5629       raf 	uint32_t old;
    376   5629       raf 	uint32_t new;
    377   5629       raf 
    378   5629       raf 	do {
    379   5629       raf 		old = *lockword;
    380   5629       raf 		if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners)
    381   5629       raf 			return (-1);
    382   5629       raf 		new = old + (1 << SPINNERSHIFT);
    383   5629       raf 	} while (atomic_cas_32(lockword, old, new) != old);
    384   5629       raf 
    385   5629       raf 	return (0);
    386   5629       raf }
    387   5629       raf 
    388   5629       raf /*
    389   5629       raf  * Decrement the spinners count in the mutex lock word.
    390   5629       raf  * Return the new value of the lock word.
    391   5629       raf  */
    392   5629       raf static uint32_t
    393   5629       raf spinners_decr(volatile uint32_t *lockword)
    394   5629       raf {
    395   5629       raf 	uint32_t old;
    396   5629       raf 	uint32_t new;
    397   5629       raf 
    398   5629       raf 	do {
    399   5629       raf 		new = old = *lockword;
    400   5629       raf 		if (new & SPINNERMASK)
    401   5629       raf 			new -= (1 << SPINNERSHIFT);
    402   5629       raf 	} while (atomic_cas_32(lockword, old, new) != old);
    403   5629       raf 
    404   5629       raf 	return (new);
    405   5629       raf }
    406   5629       raf 
    407   5629       raf /*
    408      0    stevel  * Non-preemptive spin locks.  Used by queue_lock().
    409      0    stevel  * No lock statistics are gathered for these locks.
    410   5629       raf  * No DTrace probes are provided for these locks.
    411      0    stevel  */
    412      0    stevel void
    413      0    stevel spin_lock_set(mutex_t *mp)
    414      0    stevel {
    415      0    stevel 	ulwp_t *self = curthread;
    416      0    stevel 
    417      0    stevel 	no_preempt(self);
    418      0    stevel 	if (set_lock_byte(&mp->mutex_lockw) == 0) {
    419      0    stevel 		mp->mutex_owner = (uintptr_t)self;
    420      0    stevel 		return;
    421      0    stevel 	}
    422      0    stevel 	/*
    423      0    stevel 	 * Spin for a while, attempting to acquire the lock.
    424      0    stevel 	 */
    425   6247       raf 	INCR32(self->ul_spin_lock_spin);
    426      0    stevel 	if (mutex_queuelock_adaptive(mp) == 0 ||
    427      0    stevel 	    set_lock_byte(&mp->mutex_lockw) == 0) {
    428      0    stevel 		mp->mutex_owner = (uintptr_t)self;
    429      0    stevel 		return;
    430      0    stevel 	}
    431      0    stevel 	/*
    432      0    stevel 	 * Try harder if we were previously at a no premption level.
    433      0    stevel 	 */
    434      0    stevel 	if (self->ul_preempt > 1) {
    435   6247       raf 		INCR32(self->ul_spin_lock_spin2);
    436      0    stevel 		if (mutex_queuelock_adaptive(mp) == 0 ||
    437      0    stevel 		    set_lock_byte(&mp->mutex_lockw) == 0) {
    438      0    stevel 			mp->mutex_owner = (uintptr_t)self;
    439      0    stevel 			return;
    440      0    stevel 		}
    441      0    stevel 	}
    442      0    stevel 	/*
    443      0    stevel 	 * Give up and block in the kernel for the mutex.
    444      0    stevel 	 */
    445   6247       raf 	INCR32(self->ul_spin_lock_sleep);
    446  10887     Roger 	(void) ___lwp_mutex_timedlock(mp, NULL, self);
    447      0    stevel }
    448      0    stevel 
    449      0    stevel void
    450      0    stevel spin_lock_clear(mutex_t *mp)
    451      0    stevel {
    452      0    stevel 	ulwp_t *self = curthread;
    453      0    stevel 
    454      0    stevel 	mp->mutex_owner = 0;
    455   4570       raf 	if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) {
    456   4574       raf 		(void) ___lwp_mutex_wakeup(mp, 0);
    457   6247       raf 		INCR32(self->ul_spin_lock_wakeup);
    458      0    stevel 	}
    459      0    stevel 	preempt(self);
    460      0    stevel }
    461      0    stevel 
    462      0    stevel /*
    463      0    stevel  * Allocate the sleep queue hash table.
    464      0    stevel  */
    465      0    stevel void
    466      0    stevel queue_alloc(void)
    467      0    stevel {
    468      0    stevel 	ulwp_t *self = curthread;
    469      0    stevel 	uberdata_t *udp = self->ul_uberdata;
    470   6247       raf 	queue_head_t *qp;
    471      0    stevel 	void *data;
    472      0    stevel 	int i;
    473      0    stevel 
    474      0    stevel 	/*
    475      0    stevel 	 * No locks are needed; we call here only when single-threaded.
    476      0    stevel 	 */
    477      0    stevel 	ASSERT(self == udp->ulwp_one);
    478      0    stevel 	ASSERT(!udp->uberflags.uf_mt);
    479   6515       raf 	if ((data = mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t),
    480      0    stevel 	    PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0))
    481      0    stevel 	    == MAP_FAILED)
    482      0    stevel 		thr_panic("cannot allocate thread queue_head table");
    483   6247       raf 	udp->queue_head = qp = (queue_head_t *)data;
    484   6247       raf 	for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
    485   6247       raf 		qp->qh_type = (i < QHASHSIZE)? MX : CV;
    486   6247       raf 		qp->qh_lock.mutex_flag = LOCK_INITED;
    487   6247       raf 		qp->qh_lock.mutex_magic = MUTEX_MAGIC;
    488   6247       raf 		qp->qh_hlist = &qp->qh_def_root;
    489   6247       raf #if defined(THREAD_DEBUG)
    490   6247       raf 		qp->qh_hlen = 1;
    491   6247       raf 		qp->qh_hmax = 1;
    492   6247       raf #endif
    493   4574       raf 	}
    494      0    stevel }
    495      0    stevel 
    496      0    stevel #if defined(THREAD_DEBUG)
    497      0    stevel 
    498      0    stevel /*
    499      0    stevel  * Debugging: verify correctness of a sleep queue.
    500      0    stevel  */
    501      0    stevel void
    502      0    stevel QVERIFY(queue_head_t *qp)
    503      0    stevel {
    504      0    stevel 	ulwp_t *self = curthread;
    505      0    stevel 	uberdata_t *udp = self->ul_uberdata;
    506   6247       raf 	queue_root_t *qrp;
    507      0    stevel 	ulwp_t *ulwp;
    508      0    stevel 	ulwp_t *prev;
    509      0    stevel 	uint_t index;
    510   6247       raf 	uint32_t cnt;
    511      0    stevel 	char qtype;
    512      0    stevel 	void *wchan;
    513      0    stevel 
    514      0    stevel 	ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE);
    515      0    stevel 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
    516   6247       raf 	for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) {
    517   6247       raf 		cnt++;
    518   6247       raf 		ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) ||
    519   6247       raf 		    (qrp->qr_head == NULL && qrp->qr_tail == NULL));
    520   6247       raf 	}
    521   6247       raf 	ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt);
    522   6247       raf 	qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV;
    523   6247       raf 	ASSERT(qp->qh_type == qtype);
    524      0    stevel 	if (!thread_queue_verify)
    525      0    stevel 		return;
    526      0    stevel 	/* real expensive stuff, only for _THREAD_QUEUE_VERIFY */
    527   6247       raf 	for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) {
    528   6247       raf 		for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL;
    529   6247       raf 		    prev = ulwp, ulwp = ulwp->ul_link) {
    530   6247       raf 			cnt++;
    531   6247       raf 			if (ulwp->ul_writer)
    532   6247       raf 				ASSERT(prev == NULL || prev->ul_writer);
    533   6247       raf 			ASSERT(ulwp->ul_qtype == qtype);
    534   6247       raf 			ASSERT(ulwp->ul_wchan != NULL);
    535   6247       raf 			ASSERT(ulwp->ul_sleepq == qp);
    536   6247       raf 			wchan = ulwp->ul_wchan;
    537   6247       raf 			ASSERT(qrp->qr_wchan == wchan);
    538   6247       raf 			index = QUEUE_HASH(wchan, qtype);
    539   6247       raf 			ASSERT(&udp->queue_head[index] == qp);
    540   6247       raf 		}
    541   6247       raf 		ASSERT(qrp->qr_tail == prev);
    542      0    stevel 	}
    543      0    stevel 	ASSERT(qp->qh_qlen == cnt);
    544      0    stevel }
    545      0    stevel 
    546      0    stevel #else	/* THREAD_DEBUG */
    547      0    stevel 
    548      0    stevel #define	QVERIFY(qp)
    549      0    stevel 
    550      0    stevel #endif	/* THREAD_DEBUG */
    551      0    stevel 
    552      0    stevel /*
    553      0    stevel  * Acquire a queue head.
    554      0    stevel  */
    555      0    stevel queue_head_t *
    556      0    stevel queue_lock(void *wchan, int qtype)
    557      0    stevel {
    558      0    stevel 	uberdata_t *udp = curthread->ul_uberdata;
    559      0    stevel 	queue_head_t *qp;
    560   6247       raf 	queue_root_t *qrp;
    561      0    stevel 
    562      0    stevel 	ASSERT(qtype == MX || qtype == CV);
    563      0    stevel 
    564      0    stevel 	/*
    565      0    stevel 	 * It is possible that we could be called while still single-threaded.
    566      0    stevel 	 * If so, we call queue_alloc() to allocate the queue_head[] array.
    567      0    stevel 	 */
    568      0    stevel 	if ((qp = udp->queue_head) == NULL) {
    569      0    stevel 		queue_alloc();
    570      0    stevel 		qp = udp->queue_head;
    571      0    stevel 	}
    572      0    stevel 	qp += QUEUE_HASH(wchan, qtype);
    573      0    stevel 	spin_lock_set(&qp->qh_lock);
    574   6247       raf 	for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next)
    575   6247       raf 		if (qrp->qr_wchan == wchan)
    576   6247       raf 			break;
    577   6247       raf 	if (qrp == NULL && qp->qh_def_root.qr_head == NULL) {
    578   6247       raf 		/* the default queue root is available; use it */
    579   6247       raf 		qrp = &qp->qh_def_root;
    580   6247       raf 		qrp->qr_wchan = wchan;
    581   6247       raf 		ASSERT(qrp->qr_next == NULL);
    582   6247       raf 		ASSERT(qrp->qr_tail == NULL &&
    583   6247       raf 		    qrp->qr_rtcount == 0 && qrp->qr_qlen == 0);
    584   6247       raf 	}
    585   6247       raf 	qp->qh_wchan = wchan;	/* valid until queue_unlock() is called */
    586   6247       raf 	qp->qh_root = qrp;	/* valid until queue_unlock() is called */
    587   6247       raf 	INCR32(qp->qh_lockcount);
    588      0    stevel 	QVERIFY(qp);
    589      0    stevel 	return (qp);
    590      0    stevel }
    591      0    stevel 
    592      0    stevel /*
    593      0    stevel  * Release a queue head.
    594      0    stevel  */
    595      0    stevel void
    596      0    stevel queue_unlock(queue_head_t *qp)
    597      0    stevel {
    598      0    stevel 	QVERIFY(qp);
    599      0    stevel 	spin_lock_clear(&qp->qh_lock);
    600      0    stevel }
    601      0    stevel 
    602      0    stevel /*
    603      0    stevel  * For rwlock queueing, we must queue writers ahead of readers of the
    604      0    stevel  * same priority.  We do this by making writers appear to have a half
    605      0    stevel  * point higher priority for purposes of priority comparisons below.
    606      0    stevel  */
    607      0    stevel #define	CMP_PRIO(ulwp)	((real_priority(ulwp) << 1) + (ulwp)->ul_writer)
    608      0    stevel 
    609      0    stevel void
    610   6247       raf enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo)
    611      0    stevel {
    612   6247       raf 	queue_root_t *qrp;
    613      0    stevel 	ulwp_t **ulwpp;
    614      0    stevel 	ulwp_t *next;
    615      0    stevel 	int pri = CMP_PRIO(ulwp);
    616      0    stevel 
    617      0    stevel 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
    618      0    stevel 	ASSERT(ulwp->ul_sleepq != qp);
    619   6247       raf 
    620   6247       raf 	if ((qrp = qp->qh_root) == NULL) {
    621   6247       raf 		/* use the thread's queue root for the linkage */
    622   6247       raf 		qrp = &ulwp->ul_queue_root;
    623   6247       raf 		qrp->qr_next = qp->qh_hlist;
    624   6247       raf 		qrp->qr_prev = NULL;
    625   6247       raf 		qrp->qr_head = NULL;
    626   6247       raf 		qrp->qr_tail = NULL;
    627   6247       raf 		qrp->qr_wchan = qp->qh_wchan;
    628   6247       raf 		qrp->qr_rtcount = 0;
    629   6247       raf 		qrp->qr_qlen = 0;
    630   6247       raf 		qrp->qr_qmax = 0;
    631   6247       raf 		qp->qh_hlist->qr_prev = qrp;
    632   6247       raf 		qp->qh_hlist = qrp;
    633   6247       raf 		qp->qh_root = qrp;
    634   6247       raf 		MAXINCR(qp->qh_hmax, qp->qh_hlen);
    635   6247       raf 	}
    636      0    stevel 
    637      0    stevel 	/*
    638      0    stevel 	 * LIFO queue ordering is unfair and can lead to starvation,
    639      0    stevel 	 * but it gives better performance for heavily contended locks.
    640      0    stevel 	 * We use thread_queue_fifo (range is 0..8) to determine
    641      0    stevel 	 * the frequency of FIFO vs LIFO queuing:
    642      0    stevel 	 *	0 : every 256th time	(almost always LIFO)
    643      0    stevel 	 *	1 : every 128th time
    644      0    stevel 	 *	2 : every 64th  time
    645      0    stevel 	 *	3 : every 32nd  time
    646      0    stevel 	 *	4 : every 16th  time	(the default value, mostly LIFO)
    647      0    stevel 	 *	5 : every 8th   time
    648      0    stevel 	 *	6 : every 4th   time
    649      0    stevel 	 *	7 : every 2nd   time
    650      0    stevel 	 *	8 : every time		(never LIFO, always FIFO)
    651      0    stevel 	 * Note that there is always some degree of FIFO ordering.
    652      0    stevel 	 * This breaks live lock conditions that occur in applications
    653      0    stevel 	 * that are written assuming (incorrectly) that threads acquire
    654      0    stevel 	 * locks fairly, that is, in roughly round-robin order.
    655   6247       raf 	 * In any event, the queue is maintained in kernel priority order.
    656      0    stevel 	 *
    657   6247       raf 	 * If force_fifo is non-zero, fifo queueing is forced.
    658      0    stevel 	 * SUSV3 requires this for semaphores.
    659      0    stevel 	 */
    660   6247       raf 	if (qrp->qr_head == NULL) {
    661      0    stevel 		/*
    662      0    stevel 		 * The queue is empty.  LIFO/FIFO doesn't matter.
    663      0    stevel 		 */
    664   6247       raf 		ASSERT(qrp->qr_tail == NULL);
    665   6247       raf 		ulwpp = &qrp->qr_head;
    666   6247       raf 	} else if (force_fifo |
    667   6247       raf 	    (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) {
    668      0    stevel 		/*
    669      0    stevel 		 * Enqueue after the last thread whose priority is greater
    670      0    stevel 		 * than or equal to the priority of the thread being queued.
    671      0    stevel 		 * Attempt first to go directly onto the tail of the queue.
    672      0    stevel 		 */
    673   6247       raf 		if (pri <= CMP_PRIO(qrp->qr_tail))
    674   6247       raf 			ulwpp = &qrp->qr_tail->ul_link;
    675      0    stevel 		else {
    676   6247       raf 			for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL;
    677      0    stevel 			    ulwpp = &next->ul_link)
    678      0    stevel 				if (pri > CMP_PRIO(next))
    679      0    stevel 					break;
    680      0    stevel 		}
    681      0    stevel 	} else {
    682      0    stevel 		/*
    683      0    stevel 		 * Enqueue before the first thread whose priority is less
    684      0    stevel 		 * than or equal to the priority of the thread being queued.
    685      0    stevel 		 * Hopefully we can go directly onto the head of the queue.
    686      0    stevel 		 */
    687   6247       raf 		for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL;
    688      0    stevel 		    ulwpp = &next->ul_link)
    689      0    stevel 			if (pri >= CMP_PRIO(next))
    690      0    stevel 				break;
    691      0    stevel 	}
    692      0    stevel 	if ((ulwp->ul_link = *ulwpp) == NULL)
    693   6247       raf 		qrp->qr_tail = ulwp;
    694      0    stevel 	*ulwpp = ulwp;
    695      0    stevel 
    696      0    stevel 	ulwp->ul_sleepq = qp;
    697   6247       raf 	ulwp->ul_wchan = qp->qh_wchan;
    698   6247       raf 	ulwp->ul_qtype = qp->qh_type;
    699   6247       raf 	if ((ulwp->ul_schedctl != NULL &&
    700   6247       raf 	    ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) |
    701   6247       raf 	    ulwp->ul_pilocks) {
    702   6247       raf 		ulwp->ul_rtqueued = 1;
    703   6247       raf 		qrp->qr_rtcount++;
    704   6247       raf 	}
    705   6247       raf 	MAXINCR(qrp->qr_qmax, qrp->qr_qlen);
    706   6247       raf 	MAXINCR(qp->qh_qmax, qp->qh_qlen);
    707      0    stevel }
    708      0    stevel 
    709      0    stevel /*
    710   6247       raf  * Helper function for queue_slot() and queue_slot_rt().
    711   6247       raf  * Try to find a non-suspended thread on the queue.
    712      0    stevel  */
    713      0    stevel static ulwp_t **
    714   6247       raf queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt)
    715   6247       raf {
    716   6247       raf 	ulwp_t *ulwp;
    717   6247       raf 	ulwp_t **foundpp = NULL;
    718   6247       raf 	int priority = -1;
    719   6247       raf 	ulwp_t *prev;
    720   6247       raf 	int tpri;
    721   6247       raf 
    722   6247       raf 	for (prev = NULL;
    723   6247       raf 	    (ulwp = *ulwpp) != NULL;
    724   6247       raf 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
    725   6247       raf 		if (ulwp->ul_stop)	/* skip suspended threads */
    726   6247       raf 			continue;
    727   6247       raf 		tpri = rt? CMP_PRIO(ulwp) : 0;
    728   6247       raf 		if (tpri > priority) {
    729   6247       raf 			foundpp = ulwpp;
    730   6247       raf 			*prevp = prev;
    731   6247       raf 			priority = tpri;
    732   6247       raf 			if (!rt)
    733   6247       raf 				break;
    734   6247       raf 		}
    735   6247       raf 	}
    736   6247       raf 	return (foundpp);
    737   6247       raf }
    738   6247       raf 
    739   6247       raf /*
    740   6247       raf  * For real-time, we search the entire queue because the dispatch
    741   6247       raf  * (kernel) priorities may have changed since enqueueing.
    742   6247       raf  */
    743   6247       raf static ulwp_t **
    744   6247       raf queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp)
    745   6247       raf {
    746   6247       raf 	ulwp_t **ulwpp = ulwpp_org;
    747   6247       raf 	ulwp_t *ulwp = *ulwpp;
    748   6247       raf 	ulwp_t **foundpp = ulwpp;
    749   6247       raf 	int priority = CMP_PRIO(ulwp);
    750   6247       raf 	ulwp_t *prev;
    751   6247       raf 	int tpri;
    752   6247       raf 
    753   6247       raf 	for (prev = ulwp, ulwpp = &ulwp->ul_link;
    754   6247       raf 	    (ulwp = *ulwpp) != NULL;
    755   6247       raf 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
    756   6247       raf 		tpri = CMP_PRIO(ulwp);
    757   6247       raf 		if (tpri > priority) {
    758   6247       raf 			foundpp = ulwpp;
    759   6247       raf 			*prevp = prev;
    760   6247       raf 			priority = tpri;
    761   6247       raf 		}
    762   6247       raf 	}
    763   6247       raf 	ulwp = *foundpp;
    764   6247       raf 
    765   6247       raf 	/*
    766   6247       raf 	 * Try not to return a suspended thread.
    767   6247       raf 	 * This mimics the old libthread's behavior.
    768   6247       raf 	 */
    769   6247       raf 	if (ulwp->ul_stop &&
    770   6247       raf 	    (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) {
    771   6247       raf 		foundpp = ulwpp;
    772   6247       raf 		ulwp = *foundpp;
    773   6247       raf 	}
    774   6247       raf 	ulwp->ul_rt = 1;
    775   6247       raf 	return (foundpp);
    776   6247       raf }
    777   6247       raf 
    778   6247       raf ulwp_t **
    779   6247       raf queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more)
    780   6247       raf {
    781   6247       raf 	queue_root_t *qrp;
    782   6247       raf 	ulwp_t **ulwpp;
    783   6247       raf 	ulwp_t *ulwp;
    784   6247       raf 	int rt;
    785   6247       raf 
    786   6247       raf 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
    787   6247       raf 
    788   6247       raf 	if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) {
    789   6247       raf 		*more = 0;
    790   6247       raf 		return (NULL);		/* no lwps on the queue */
    791   6247       raf 	}
    792   6247       raf 	rt = (qrp->qr_rtcount != 0);
    793   6247       raf 	*prevp = NULL;
    794   6247       raf 	if (ulwp->ul_link == NULL) {	/* only one lwp on the queue */
    795   6247       raf 		*more = 0;
    796   6247       raf 		ulwp->ul_rt = rt;
    797   6247       raf 		return (&qrp->qr_head);
    798   6247       raf 	}
    799   6247       raf 	*more = 1;
    800   6247       raf 
    801   6247       raf 	if (rt)		/* real-time queue */
    802   6247       raf 		return (queue_slot_rt(&qrp->qr_head, prevp));
    803   6247       raf 	/*
    804   6247       raf 	 * Try not to return a suspended thread.
    805   6247       raf 	 * This mimics the old libthread's behavior.
    806   6247       raf 	 */
    807   6247       raf 	if (ulwp->ul_stop &&
    808   6247       raf 	    (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) {
    809   6247       raf 		ulwp = *ulwpp;
    810   6247       raf 		ulwp->ul_rt = 0;
    811   6247       raf 		return (ulwpp);
    812   6247       raf 	}
    813   6247       raf 	/*
    814   6247       raf 	 * The common case; just pick the first thread on the queue.
    815   6247       raf 	 */
    816   6247       raf 	ulwp->ul_rt = 0;
    817   6247       raf 	return (&qrp->qr_head);
    818   6247       raf }
    819   6247       raf 
    820   6247       raf /*
    821   6247       raf  * Common code for unlinking an lwp from a user-level sleep queue.
    822   6247       raf  */
    823   6247       raf void
    824   6247       raf queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev)
    825   6247       raf {
    826   6247       raf 	queue_root_t *qrp = qp->qh_root;
    827   6247       raf 	queue_root_t *nqrp;
    828   6247       raf 	ulwp_t *ulwp = *ulwpp;
    829   6247       raf 	ulwp_t *next;
    830   6247       raf 
    831   6247       raf 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
    832   6247       raf 	ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan);
    833   6247       raf 
    834   6247       raf 	DECR(qp->qh_qlen);
    835   6247       raf 	DECR(qrp->qr_qlen);
    836   6247       raf 	if (ulwp->ul_rtqueued) {
    837   6247       raf 		ulwp->ul_rtqueued = 0;
    838   6247       raf 		qrp->qr_rtcount--;
    839   6247       raf 	}
    840   6247       raf 	next = ulwp->ul_link;
    841   6247       raf 	*ulwpp = next;
    842   6247       raf 	ulwp->ul_link = NULL;
    843   6247       raf 	if (qrp->qr_tail == ulwp)
    844   6247       raf 		qrp->qr_tail = prev;
    845   6247       raf 	if (qrp == &ulwp->ul_queue_root) {
    846   6247       raf 		/*
    847   6247       raf 		 * We can't continue to use the unlinked thread's
    848   6247       raf 		 * queue root for the linkage.
    849   6247       raf 		 */
    850   6247       raf 		queue_root_t *qr_next = qrp->qr_next;
    851   6247       raf 		queue_root_t *qr_prev = qrp->qr_prev;
    852   6247       raf 
    853   6247       raf 		if (qrp->qr_tail) {
    854   6247       raf 			/* switch to using the last thread's queue root */
    855   6247       raf 			ASSERT(qrp->qr_qlen != 0);
    856   6247       raf 			nqrp = &qrp->qr_tail->ul_queue_root;
    857   6247       raf 			*nqrp = *qrp;
    858   6247       raf 			if (qr_next)
    859   6247       raf 				qr_next->qr_prev = nqrp;
    860   6247       raf 			if (qr_prev)
    861   6247       raf 				qr_prev->qr_next = nqrp;
    862   6247       raf 			else
    863   6247       raf 				qp->qh_hlist = nqrp;
    864   6247       raf 			qp->qh_root = nqrp;
    865   6247       raf 		} else {
    866   6247       raf 			/* empty queue root; just delete from the hash list */
    867   6247       raf 			ASSERT(qrp->qr_qlen == 0);
    868   6247       raf 			if (qr_next)
    869   6247       raf 				qr_next->qr_prev = qr_prev;
    870   6247       raf 			if (qr_prev)
    871   6247       raf 				qr_prev->qr_next = qr_next;
    872   6247       raf 			else
    873   6247       raf 				qp->qh_hlist = qr_next;
    874   6247       raf 			qp->qh_root = NULL;
    875   6247       raf 			DECR(qp->qh_hlen);
    876   6247       raf 		}
    877   6247       raf 	}
    878   6247       raf }
    879   6247       raf 
    880   6247       raf ulwp_t *
    881   6247       raf dequeue(queue_head_t *qp, int *more)
    882      0    stevel {
    883      0    stevel 	ulwp_t **ulwpp;
    884      0    stevel 	ulwp_t *ulwp;
    885   6247       raf 	ulwp_t *prev;
    886      0    stevel 
    887   6247       raf 	if ((ulwpp = queue_slot(qp, &prev, more)) == NULL)
    888      0    stevel 		return (NULL);
    889      0    stevel 	ulwp = *ulwpp;
    890   6247       raf 	queue_unlink(qp, ulwpp, prev);
    891      0    stevel 	ulwp->ul_sleepq = NULL;
    892      0    stevel 	ulwp->ul_wchan = NULL;
    893      0    stevel 	return (ulwp);
    894      0    stevel }
    895      0    stevel 
    896      0    stevel /*
    897      0    stevel  * Return a pointer to the highest priority thread sleeping on wchan.
    898      0    stevel  */
    899      0    stevel ulwp_t *
    900   6247       raf queue_waiter(queue_head_t *qp)
    901      0    stevel {
    902      0    stevel 	ulwp_t **ulwpp;
    903   6247       raf 	ulwp_t *prev;
    904   6247       raf 	int more;
    905      0    stevel 
    906   6247       raf 	if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL)
    907      0    stevel 		return (NULL);
    908      0    stevel 	return (*ulwpp);
    909      0    stevel }
    910      0    stevel 
    911   6247       raf int
    912   6247       raf dequeue_self(queue_head_t *qp)
    913      0    stevel {
    914      0    stevel 	ulwp_t *self = curthread;
    915   6247       raf 	queue_root_t *qrp;
    916      0    stevel 	ulwp_t **ulwpp;
    917      0    stevel 	ulwp_t *ulwp;
    918   6247       raf 	ulwp_t *prev;
    919      0    stevel 	int found = 0;
    920      0    stevel 
    921      0    stevel 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
    922      0    stevel 
    923      0    stevel 	/* find self on the sleep queue */
    924   6247       raf 	if ((qrp = qp->qh_root) != NULL) {
    925   6247       raf 		for (prev = NULL, ulwpp = &qrp->qr_head;
    926   6247       raf 		    (ulwp = *ulwpp) != NULL;
    927   6247       raf 		    prev = ulwp, ulwpp = &ulwp->ul_link) {
    928   6247       raf 			if (ulwp == self) {
    929   6247       raf 				queue_unlink(qp, ulwpp, prev);
    930   6247       raf 				self->ul_cvmutex = NULL;
    931   6247       raf 				self->ul_sleepq = NULL;
    932   6247       raf 				self->ul_wchan = NULL;
    933   6247       raf 				found = 1;
    934   6247       raf 				break;
    935   6247       raf 			}
    936      0    stevel 		}
    937      0    stevel 	}
    938      0    stevel 
    939      0    stevel 	if (!found)
    940      0    stevel 		thr_panic("dequeue_self(): curthread not found on queue");
    941      0    stevel 
    942   6247       raf 	return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL);
    943      0    stevel }
    944      0    stevel 
    945      0    stevel /*
    946      0    stevel  * Called from call_user_handler() and _thrp_suspend() to take
    947      0    stevel  * ourself off of our sleep queue so we can grab locks.
    948      0    stevel  */
    949      0    stevel void
    950      0    stevel unsleep_self(void)
    951      0    stevel {
    952      0    stevel 	ulwp_t *self = curthread;
    953      0    stevel 	queue_head_t *qp;
    954      0    stevel 
    955      0    stevel 	/*
    956      0    stevel 	 * Calling enter_critical()/exit_critical() here would lead
    957      0    stevel 	 * to recursion.  Just manipulate self->ul_critical directly.
    958      0    stevel 	 */
    959      0    stevel 	self->ul_critical++;
    960      0    stevel 	while (self->ul_sleepq != NULL) {
    961      0    stevel 		qp = queue_lock(self->ul_wchan, self->ul_qtype);
    962      0    stevel 		/*
    963      0    stevel 		 * We may have been moved from a CV queue to a
    964      0    stevel 		 * mutex queue while we were attempting queue_lock().
    965      0    stevel 		 * If so, just loop around and try again.
    966      0    stevel 		 * dequeue_self() clears self->ul_sleepq.
    967      0    stevel 		 */
    968   6247       raf 		if (qp == self->ul_sleepq)
    969   6247       raf 			(void) dequeue_self(qp);
    970      0    stevel 		queue_unlock(qp);
    971      0    stevel 	}
    972   6247       raf 	self->ul_writer = 0;
    973      0    stevel 	self->ul_critical--;
    974      0    stevel }
    975      0    stevel 
    976      0    stevel /*
    977      0    stevel  * Common code for calling the the ___lwp_mutex_timedlock() system call.
    978      0    stevel  * Returns with mutex_owner and mutex_ownerpid set correctly.
    979      0    stevel  */
    980   4574       raf static int
    981      0    stevel mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp)
    982      0    stevel {
    983      0    stevel 	ulwp_t *self = curthread;
    984      0    stevel 	uberdata_t *udp = self->ul_uberdata;
    985   4574       raf 	int mtype = mp->mutex_type;
    986      0    stevel 	hrtime_t begin_sleep;
    987   4574       raf 	int acquired;
    988      0    stevel 	int error;
    989      0    stevel 
    990      0    stevel 	self->ul_sp = stkptr();
    991      0    stevel 	self->ul_wchan = mp;
    992      0    stevel 	if (__td_event_report(self, TD_SLEEP, udp)) {
    993      0    stevel 		self->ul_td_evbuf.eventnum = TD_SLEEP;
    994      0    stevel 		self->ul_td_evbuf.eventdata = mp;
    995      0    stevel 		tdb_event(TD_SLEEP, udp);
    996      0    stevel 	}
    997      0    stevel 	if (msp) {
    998      0    stevel 		tdb_incr(msp->mutex_sleep);
    999      0    stevel 		begin_sleep = gethrtime();
   1000      0    stevel 	}
   1001      0    stevel 
   1002      0    stevel 	DTRACE_PROBE1(plockstat, mutex__block, mp);
   1003      0    stevel 
   1004      0    stevel 	for (;;) {
   1005   4574       raf 		/*
   1006   4574       raf 		 * A return value of EOWNERDEAD or ELOCKUNMAPPED
   1007   4574       raf 		 * means we successfully acquired the lock.
   1008   4574       raf 		 */
   1009  10887     Roger 		if ((error = ___lwp_mutex_timedlock(mp, tsp, self)) != 0 &&
   1010   4574       raf 		    error != EOWNERDEAD && error != ELOCKUNMAPPED) {
   1011   4574       raf 			acquired = 0;
   1012      0    stevel 			break;
   1013      0    stevel 		}
   1014      0    stevel 
   1015   4574       raf 		if (mtype & USYNC_PROCESS) {
   1016      0    stevel 			/*
   1017      0    stevel 			 * Defend against forkall().  We may be the child,
   1018      0    stevel 			 * in which case we don't actually own the mutex.
   1019      0    stevel 			 */
   1020      0    stevel 			enter_critical(self);
   1021      0    stevel 			if (mp->mutex_ownerpid == udp->pid) {
   1022      0    stevel 				exit_critical(self);
   1023   4574       raf 				acquired = 1;
   1024      0    stevel 				break;
   1025      0    stevel 			}
   1026      0    stevel 			exit_critical(self);
   1027      0    stevel 		} else {
   1028   4574       raf 			acquired = 1;
   1029      0    stevel 			break;
   1030      0    stevel 		}
   1031      0    stevel 	}
   1032   7907     Roger 
   1033      0    stevel 	if (msp)
   1034      0    stevel 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
   1035      0    stevel 	self->ul_wchan = NULL;
   1036      0    stevel 	self->ul_sp = 0;
   1037   4574       raf 
   1038   4574       raf 	if (acquired) {
   1039  10887     Roger 		ASSERT(mp->mutex_owner == (uintptr_t)self);
   1040   4574       raf 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
   1041   4574       raf 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   1042   4574       raf 	} else {
   1043   4574       raf 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
   1044   4574       raf 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
   1045   4574       raf 	}
   1046      0    stevel 
   1047      0    stevel 	return (error);
   1048      0    stevel }
   1049      0    stevel 
   1050      0    stevel /*
   1051      0    stevel  * Common code for calling the ___lwp_mutex_trylock() system call.
   1052      0    stevel  * Returns with mutex_owner and mutex_ownerpid set correctly.
   1053      0    stevel  */
   1054      0    stevel int
   1055      0    stevel mutex_trylock_kernel(mutex_t *mp)
   1056      0    stevel {
   1057      0    stevel 	ulwp_t *self = curthread;
   1058      0    stevel 	uberdata_t *udp = self->ul_uberdata;
   1059   4574       raf 	int mtype = mp->mutex_type;
   1060      0    stevel 	int error;
   1061   4574       raf 	int acquired;
   1062      0    stevel 
   1063      0    stevel 	for (;;) {
   1064   4574       raf 		/*
   1065   4574       raf 		 * A return value of EOWNERDEAD or ELOCKUNMAPPED
   1066   4574       raf 		 * means we successfully acquired the lock.
   1067   4574       raf 		 */
   1068  10887     Roger 		if ((error = ___lwp_mutex_trylock(mp, self)) != 0 &&
   1069   4574       raf 		    error != EOWNERDEAD && error != ELOCKUNMAPPED) {
   1070   4574       raf 			acquired = 0;
   1071      0    stevel 			break;
   1072      0    stevel 		}
   1073      0    stevel 
   1074   4574       raf 		if (mtype & USYNC_PROCESS) {
   1075      0    stevel 			/*
   1076      0    stevel 			 * Defend against forkall().  We may be the child,
   1077      0    stevel 			 * in which case we don't actually own the mutex.
   1078      0    stevel 			 */
   1079      0    stevel 			enter_critical(self);
   1080      0    stevel 			if (mp->mutex_ownerpid == udp->pid) {
   1081      0    stevel 				exit_critical(self);
   1082   4574       raf 				acquired = 1;
   1083      0    stevel 				break;
   1084      0    stevel 			}
   1085      0    stevel 			exit_critical(self);
   1086      0    stevel 		} else {
   1087   4574       raf 			acquired = 1;
   1088      0    stevel 			break;
   1089      0    stevel 		}
   1090   4574       raf 	}
   1091   4574       raf 
   1092   4574       raf 	if (acquired) {
   1093  10887     Roger 		ASSERT(mp->mutex_owner == (uintptr_t)self);
   1094   4574       raf 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   1095   4574       raf 	} else if (error != EBUSY) {
   1096   4574       raf 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
   1097      0    stevel 	}
   1098      0    stevel 
   1099      0    stevel 	return (error);
   1100      0    stevel }
   1101      0    stevel 
   1102      0    stevel volatile sc_shared_t *
   1103      0    stevel setup_schedctl(void)
   1104      0    stevel {
   1105      0    stevel 	ulwp_t *self = curthread;
   1106      0    stevel 	volatile sc_shared_t *scp;
   1107      0    stevel 	sc_shared_t *tmp;
   1108      0    stevel 
   1109      0    stevel 	if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */
   1110      0    stevel 	    !self->ul_vfork &&			/* not a child of vfork() */
   1111      0    stevel 	    !self->ul_schedctl_called) {	/* haven't been called before */
   1112      0    stevel 		enter_critical(self);
   1113      0    stevel 		self->ul_schedctl_called = &self->ul_uberdata->uberflags;
   1114      0    stevel 		if ((tmp = __schedctl()) != (sc_shared_t *)(-1))
   1115      0    stevel 			self->ul_schedctl = scp = tmp;
   1116      0    stevel 		exit_critical(self);
   1117      0    stevel 	}
   1118      0    stevel 	/*
   1119      0    stevel 	 * Unless the call to setup_schedctl() is surrounded
   1120      0    stevel 	 * by enter_critical()/exit_critical(), the address
   1121      0    stevel 	 * we are returning could be invalid due to a forkall()
   1122      0    stevel 	 * having occurred in another thread.
   1123      0    stevel 	 */
   1124      0    stevel 	return (scp);
   1125      0    stevel }
   1126      0    stevel 
   1127      0    stevel /*
   1128      0    stevel  * Interfaces from libsched, incorporated into libc.
   1129      0    stevel  * libsched.so.1 is now a filter library onto libc.
   1130      0    stevel  */
   1131   6812       raf #pragma weak schedctl_lookup = schedctl_init
   1132      0    stevel schedctl_t *
   1133   6812       raf schedctl_init(void)
   1134      0    stevel {
   1135      0    stevel 	volatile sc_shared_t *scp = setup_schedctl();
   1136      0    stevel 	return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl);
   1137      0    stevel }
   1138      0    stevel 
   1139      0    stevel void
   1140   6812       raf schedctl_exit(void)
   1141      0    stevel {
   1142      0    stevel }
   1143      0    stevel 
   1144      0    stevel /*
   1145      0    stevel  * Contract private interface for java.
   1146      0    stevel  * Set up the schedctl data if it doesn't exist yet.
   1147      0    stevel  * Return a pointer to the pointer to the schedctl data.
   1148      0    stevel  */
   1149      0    stevel volatile sc_shared_t *volatile *
   1150      0    stevel _thr_schedctl(void)
   1151      0    stevel {
   1152      0    stevel 	ulwp_t *self = curthread;
   1153      0    stevel 	volatile sc_shared_t *volatile *ptr;
   1154      0    stevel 
   1155      0    stevel 	if (self->ul_vfork)
   1156      0    stevel 		return (NULL);
   1157      0    stevel 	if (*(ptr = &self->ul_schedctl) == NULL)
   1158      0    stevel 		(void) setup_schedctl();
   1159      0    stevel 	return (ptr);
   1160      0    stevel }
   1161      0    stevel 
   1162      0    stevel /*
   1163      0    stevel  * Block signals and attempt to block preemption.
   1164      0    stevel  * no_preempt()/preempt() must be used in pairs but can be nested.
   1165      0    stevel  */
   1166      0    stevel void
   1167      0    stevel no_preempt(ulwp_t *self)
   1168      0    stevel {
   1169      0    stevel 	volatile sc_shared_t *scp;
   1170      0    stevel 
   1171      0    stevel 	if (self->ul_preempt++ == 0) {
   1172      0    stevel 		enter_critical(self);
   1173      0    stevel 		if ((scp = self->ul_schedctl) != NULL ||
   1174      0    stevel 		    (scp = setup_schedctl()) != NULL) {
   1175      0    stevel 			/*
   1176      0    stevel 			 * Save the pre-existing preempt value.
   1177      0    stevel 			 */
   1178      0    stevel 			self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt;
   1179      0    stevel 			scp->sc_preemptctl.sc_nopreempt = 1;
   1180      0    stevel 		}
   1181      0    stevel 	}
   1182      0    stevel }
   1183      0    stevel 
   1184      0    stevel /*
   1185      0    stevel  * Undo the effects of no_preempt().
   1186      0    stevel  */
   1187      0    stevel void
   1188      0    stevel preempt(ulwp_t *self)
   1189      0    stevel {
   1190      0    stevel 	volatile sc_shared_t *scp;
   1191      0    stevel 
   1192      0    stevel 	ASSERT(self->ul_preempt > 0);
   1193      0    stevel 	if (--self->ul_preempt == 0) {
   1194      0    stevel 		if ((scp = self->ul_schedctl) != NULL) {
   1195      0    stevel 			/*
   1196      0    stevel 			 * Restore the pre-existing preempt value.
   1197      0    stevel 			 */
   1198      0    stevel 			scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt;
   1199      0    stevel 			if (scp->sc_preemptctl.sc_yield &&
   1200      0    stevel 			    scp->sc_preemptctl.sc_nopreempt == 0) {
   1201   6515       raf 				yield();
   1202      0    stevel 				if (scp->sc_preemptctl.sc_yield) {
   1203      0    stevel 					/*
   1204      0    stevel 					 * Shouldn't happen.  This is either
   1205      0    stevel 					 * a race condition or the thread
   1206      0    stevel 					 * just entered the real-time class.
   1207      0    stevel 					 */
   1208   6515       raf 					yield();
   1209      0    stevel 					scp->sc_preemptctl.sc_yield = 0;
   1210      0    stevel 				}
   1211      0    stevel 			}
   1212      0    stevel 		}
   1213      0    stevel 		exit_critical(self);
   1214      0    stevel 	}
   1215      0    stevel }
   1216      0    stevel 
   1217      0    stevel /*
   1218      0    stevel  * If a call to preempt() would cause the current thread to yield or to
   1219      0    stevel  * take deferred actions in exit_critical(), then unpark the specified
   1220      0    stevel  * lwp so it can run while we delay.  Return the original lwpid if the
   1221      0    stevel  * unpark was not performed, else return zero.  The tests are a repeat
   1222      0    stevel  * of some of the tests in preempt(), above.  This is a statistical
   1223      0    stevel  * optimization solely for cond_sleep_queue(), below.
   1224      0    stevel  */
   1225      0    stevel static lwpid_t
   1226      0    stevel preempt_unpark(ulwp_t *self, lwpid_t lwpid)
   1227      0    stevel {
   1228      0    stevel 	volatile sc_shared_t *scp = self->ul_schedctl;
   1229      0    stevel 
   1230      0    stevel 	ASSERT(self->ul_preempt == 1 && self->ul_critical > 0);
   1231      0    stevel 	if ((scp != NULL && scp->sc_preemptctl.sc_yield) ||
   1232      0    stevel 	    (self->ul_curplease && self->ul_critical == 1)) {
   1233      0    stevel 		(void) __lwp_unpark(lwpid);
   1234      0    stevel 		lwpid = 0;
   1235      0    stevel 	}
   1236      0    stevel 	return (lwpid);
   1237      0    stevel }
   1238      0    stevel 
   1239      0    stevel /*
   1240   4613       raf  * Spin for a while (if 'tryhard' is true), trying to grab the lock.
   1241      0    stevel  * If this fails, return EBUSY and let the caller deal with it.
   1242      0    stevel  * If this succeeds, return 0 with mutex_owner set to curthread.
   1243      0    stevel  */
   1244   4574       raf static int
   1245   4613       raf mutex_trylock_adaptive(mutex_t *mp, int tryhard)
   1246      0    stevel {
   1247      0    stevel 	ulwp_t *self = curthread;
   1248   4574       raf 	int error = EBUSY;
   1249      0    stevel 	ulwp_t *ulwp;
   1250      0    stevel 	volatile sc_shared_t *scp;
   1251   5629       raf 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
   1252   5629       raf 	volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner;
   1253   5629       raf 	uint32_t new_lockword;
   1254   5629       raf 	int count = 0;
   1255   5629       raf 	int max_count;
   1256   5629       raf 	uint8_t max_spinners;
   1257      0    stevel 
   1258   4574       raf 	ASSERT(!(mp->mutex_type & USYNC_PROCESS));
   1259      0    stevel 
   1260   7907     Roger 	if (MUTEX_OWNED(mp, self))
   1261      0    stevel 		return (EBUSY);
   1262   7907     Roger 
   1263   7907     Roger 	enter_critical(self);
   1264   4574       raf 
   1265   4574       raf 	/* short-cut, not definitive (see below) */
   1266   4574       raf 	if (mp->mutex_flag & LOCK_NOTRECOVERABLE) {
   1267   4574       raf 		ASSERT(mp->mutex_type & LOCK_ROBUST);
   1268   5629       raf 		error = ENOTRECOVERABLE;
   1269   5629       raf 		goto done;
   1270   4574       raf 	}
   1271   4574       raf 
   1272   5629       raf 	/*
   1273   5629       raf 	 * Make one attempt to acquire the lock before
   1274   5629       raf 	 * incurring the overhead of the spin loop.
   1275   5629       raf 	 */
   1276   5629       raf 	if (set_lock_byte(lockp) == 0) {
   1277   5629       raf 		*ownerp = (uintptr_t)self;
   1278   5629       raf 		error = 0;
   1279   5629       raf 		goto done;
   1280   5629       raf 	}
   1281   5629       raf 	if (!tryhard)
   1282   5629       raf 		goto done;
   1283   5629       raf 	if (ncpus == 0)
   1284   5629       raf 		ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
   1285   5629       raf 	if ((max_spinners = self->ul_max_spinners) >= ncpus)
   1286   5629       raf 		max_spinners = ncpus - 1;
   1287   5629       raf 	max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0;
   1288   5629       raf 	if (max_count == 0)
   1289   5629       raf 		goto done;
   1290   4574       raf 
   1291      0    stevel 	/*
   1292      0    stevel 	 * This spin loop is unfair to lwps that have already dropped into
   1293      0    stevel 	 * the kernel to sleep.  They will starve on a highly-contended mutex.
   1294      0    stevel 	 * This is just too bad.  The adaptive spin algorithm is intended
   1295      0    stevel 	 * to allow programs with highly-contended locks (that is, broken
   1296      0    stevel 	 * programs) to execute with reasonable speed despite their contention.
   1297      0    stevel 	 * Being fair would reduce the speed of such programs and well-written
   1298      0    stevel 	 * programs will not suffer in any case.
   1299      0    stevel 	 */
   1300   7907     Roger 	if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1)
   1301   5629       raf 		goto done;
   1302   5629       raf 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
   1303   5629       raf 	for (count = 1; ; count++) {
   1304      0    stevel 		if (*lockp == 0 && set_lock_byte(lockp) == 0) {
   1305      0    stevel 			*ownerp = (uintptr_t)self;
   1306   4574       raf 			error = 0;
   1307   4574       raf 			break;
   1308      0    stevel 		}
   1309   5629       raf 		if (count == max_count)
   1310   5629       raf 			break;
   1311      0    stevel 		SMT_PAUSE();
   1312      0    stevel 		/*
   1313      0    stevel 		 * Stop spinning if the mutex owner is not running on
   1314      0    stevel 		 * a processor; it will not drop the lock any time soon
   1315      0    stevel 		 * and we would just be wasting time to keep spinning.
   1316      0    stevel 		 *
   1317      0    stevel 		 * Note that we are looking at another thread (ulwp_t)
   1318      0    stevel 		 * without ensuring that the other thread does not exit.
   1319      0    stevel 		 * The scheme relies on ulwp_t structures never being
   1320      0    stevel 		 * deallocated by the library (the library employs a free
   1321      0    stevel 		 * list of ulwp_t structs that are reused when new threads
   1322      0    stevel 		 * are created) and on schedctl shared memory never being
   1323      0    stevel 		 * deallocated once created via __schedctl().
   1324      0    stevel 		 *
   1325      0    stevel 		 * Thus, the worst that can happen when the spinning thread
   1326      0    stevel 		 * looks at the owner's schedctl data is that it is looking
   1327      0    stevel 		 * at some other thread's schedctl data.  This almost never
   1328      0    stevel 		 * happens and is benign when it does.
   1329      0    stevel 		 */
   1330      0    stevel 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
   1331      0    stevel 		    ((scp = ulwp->ul_schedctl) == NULL ||
   1332      0    stevel 		    scp->sc_state != SC_ONPROC))
   1333      0    stevel 			break;
   1334      0    stevel 	}
   1335   5629       raf 	new_lockword = spinners_decr(&mp->mutex_lockword);
   1336   5629       raf 	if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) {
   1337   5629       raf 		/*
   1338   5629       raf 		 * We haven't yet acquired the lock, the lock
   1339   5629       raf 		 * is free, and there are no other spinners.
   1340   5629       raf 		 * Make one final attempt to acquire the lock.
   1341   5629       raf 		 *
   1342   5629       raf 		 * This isn't strictly necessary since mutex_lock_queue()
   1343   5629       raf 		 * (the next action this thread will take if it doesn't
   1344   5629       raf 		 * acquire the lock here) makes one attempt to acquire
   1345   5629       raf 		 * the lock before putting the thread to sleep.
   1346   5629       raf 		 *
   1347   5629       raf 		 * If the next action for this thread (on failure here)
   1348   5629       raf 		 * were not to call mutex_lock_queue(), this would be
   1349   5629       raf 		 * necessary for correctness, to avoid ending up with an
   1350   5629       raf 		 * unheld mutex with waiters but no one to wake them up.
   1351   5629       raf 		 */
   1352   5629       raf 		if (set_lock_byte(lockp) == 0) {
   1353   5629       raf 			*ownerp = (uintptr_t)self;
   1354   5629       raf 			error = 0;
   1355   5629       raf 		}
   1356   5629       raf 		count++;
   1357   5629       raf 	}
   1358      0    stevel 
   1359   5629       raf done:
   1360   4574       raf 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
   1361   4574       raf 		ASSERT(mp->mutex_type & LOCK_ROBUST);
   1362   4574       raf 		/*
   1363   6057       raf 		 * We shouldn't own the mutex.
   1364   6057       raf 		 * Just clear the lock; everyone has already been waked up.
   1365   4574       raf 		 */
   1366   7907     Roger 		*ownerp = 0;
   1367   6057       raf 		(void) clear_lockbyte(&mp->mutex_lockword);
   1368   4574       raf 		error = ENOTRECOVERABLE;
   1369   4574       raf 	}
   1370   7907     Roger 
   1371   7907     Roger 	exit_critical(self);
   1372      0    stevel 
   1373   4574       raf 	if (error) {
   1374   5629       raf 		if (count) {
   1375   9397  Jonathan 			DTRACE_PROBE3(plockstat, mutex__spun, mp, 0, count);
   1376   5629       raf 		}
   1377   4574       raf 		if (error != EBUSY) {
   1378   4574       raf 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
   1379   4574       raf 		}
   1380   4574       raf 	} else {
   1381   5629       raf 		if (count) {
   1382   9397  Jonathan 			DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count);
   1383   5629       raf 		}
   1384   4574       raf 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
   1385   4574       raf 		if (mp->mutex_flag & LOCK_OWNERDEAD) {
   1386   4574       raf 			ASSERT(mp->mutex_type & LOCK_ROBUST);
   1387   4574       raf 			error = EOWNERDEAD;
   1388   4574       raf 		}
   1389   4574       raf 	}
   1390   4574       raf 
   1391   4574       raf 	return (error);
   1392      0    stevel }
   1393      0    stevel 
   1394      0    stevel /*
   1395      0    stevel  * Same as mutex_trylock_adaptive(), except specifically for queue locks.
   1396      0    stevel  * The owner field is not set here; the caller (spin_lock_set()) sets it.
   1397      0    stevel  */
   1398   4574       raf static int
   1399      0    stevel mutex_queuelock_adaptive(mutex_t *mp)
   1400      0    stevel {
   1401      0    stevel 	ulwp_t *ulwp;
   1402      0    stevel 	volatile sc_shared_t *scp;
   1403      0    stevel 	volatile uint8_t *lockp;
   1404      0    stevel 	volatile uint64_t *ownerp;
   1405      0    stevel 	int count = curthread->ul_queue_spin;
   1406      0    stevel 
   1407      0    stevel 	ASSERT(mp->mutex_type == USYNC_THREAD);
   1408      0    stevel 
   1409      0    stevel 	if (count == 0)
   1410      0    stevel 		return (EBUSY);
   1411      0    stevel 
   1412      0    stevel 	lockp = (volatile uint8_t *)&mp->mutex_lockw;
   1413      0    stevel 	ownerp = (volatile uint64_t *)&mp->mutex_owner;
   1414      0    stevel 	while (--count >= 0) {
   1415      0    stevel 		if (*lockp == 0 && set_lock_byte(lockp) == 0)
   1416      0    stevel 			return (0);
   1417      0    stevel 		SMT_PAUSE();
   1418      0    stevel 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
   1419      0    stevel 		    ((scp = ulwp->ul_schedctl) == NULL ||
   1420      0    stevel 		    scp->sc_state != SC_ONPROC))
   1421      0    stevel 			break;
   1422      0    stevel 	}
   1423      0    stevel 
   1424      0    stevel 	return (EBUSY);
   1425      0    stevel }
   1426      0    stevel 
   1427      0    stevel /*
   1428      0    stevel  * Like mutex_trylock_adaptive(), but for process-shared mutexes.
   1429   4613       raf  * Spin for a while (if 'tryhard' is true), trying to grab the lock.
   1430      0    stevel  * If this fails, return EBUSY and let the caller deal with it.
   1431      0    stevel  * If this succeeds, return 0 with mutex_owner set to curthread
   1432      0    stevel  * and mutex_ownerpid set to the current pid.
   1433      0    stevel  */
   1434   4574       raf static int
   1435   4613       raf mutex_trylock_process(mutex_t *mp, int tryhard)
   1436      0    stevel {
   1437      0    stevel 	ulwp_t *self = curthread;
   1438   5629       raf 	uberdata_t *udp = self->ul_uberdata;
   1439   4574       raf 	int error = EBUSY;
   1440   6057       raf 	volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64;
   1441   5629       raf 	uint32_t new_lockword;
   1442   5629       raf 	int count = 0;
   1443   5629       raf 	int max_count;
   1444   5629       raf 	uint8_t max_spinners;
   1445      0    stevel 
   1446   7255       raf #if defined(__sparc) && !defined(_LP64)
   1447   7255       raf 	/* horrible hack, necessary only on 32-bit sparc */
   1448   7255       raf 	int fix_alignment_problem =
   1449   7255       raf 	    (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) &&
   1450   7255       raf 	    self->ul_misaligned && !(mp->mutex_type & LOCK_ROBUST));
   1451   7255       raf #endif
   1452   7255       raf 
   1453   4574       raf 	ASSERT(mp->mutex_type & USYNC_PROCESS);
   1454      0    stevel 
   1455   4574       raf 	if (shared_mutex_held(mp))
   1456      0    stevel 		return (EBUSY);
   1457      0    stevel 
   1458   7907     Roger 	enter_critical(self);
   1459   7907     Roger 
   1460   4574       raf 	/* short-cut, not definitive (see below) */
   1461   4574       raf 	if (mp->mutex_flag & LOCK_NOTRECOVERABLE) {
   1462   4574       raf 		ASSERT(mp->mutex_type & LOCK_ROBUST);
   1463   5629       raf 		error = ENOTRECOVERABLE;
   1464   5629       raf 		goto done;
   1465   4574       raf 	}
   1466   4574       raf 
   1467   5629       raf 	/*
   1468   5629       raf 	 * Make one attempt to acquire the lock before
   1469   5629       raf 	 * incurring the overhead of the spin loop.
   1470   5629       raf 	 */
   1471   7255       raf #if defined(__sparc) && !defined(_LP64)
   1472   7255       raf 	/* horrible hack, necessary only on 32-bit sparc */
   1473   7255       raf 	if (fix_alignment_problem) {
   1474   7255       raf 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
   1475   7255       raf 			mp->mutex_ownerpid = udp->pid;
   1476   7255       raf 			mp->mutex_owner = (uintptr_t)self;
   1477   7255       raf 			error = 0;
   1478   7255       raf 			goto done;
   1479   7255       raf 		}
   1480   7255       raf 	} else
   1481   7255       raf #endif
   1482   6057       raf 	if (set_lock_byte64(lockp, udp->pid) == 0) {
   1483   5629       raf 		mp->mutex_owner = (uintptr_t)self;
   1484   6057       raf 		/* mp->mutex_ownerpid was set by set_lock_byte64() */
   1485   5629       raf 		error = 0;
   1486   5629       raf 		goto done;
   1487   5629       raf 	}
   1488   5629       raf 	if (!tryhard)
   1489   5629       raf 		goto done;
   1490   4574       raf 	if (ncpus == 0)
   1491   4574       raf 		ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
   1492   5629       raf 	if ((max_spinners = self->ul_max_spinners) >= ncpus)
   1493   5629       raf 		max_spinners = ncpus - 1;
   1494   5629       raf 	max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0;
   1495   5629       raf 	if (max_count == 0)
   1496   5629       raf 		goto done;
   1497   4574       raf 
   1498      0    stevel 	/*
   1499      0    stevel 	 * This is a process-shared mutex.
   1500      0    stevel 	 * We cannot know if the owner is running on a processor.
   1501      0    stevel 	 * We just spin and hope that it is on a processor.
   1502      0    stevel 	 */
   1503   7907     Roger 	if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1)
   1504   5629       raf 		goto done;
   1505   5629       raf 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
   1506   5629       raf 	for (count = 1; ; count++) {
   1507   7255       raf #if defined(__sparc) && !defined(_LP64)
   1508   7255       raf 		/* horrible hack, necessary only on 32-bit sparc */
   1509   7255       raf 		if (fix_alignment_problem) {
   1510   7255       raf 			if ((*lockp & LOCKMASK64) == 0 &&
   1511   7255       raf 			    set_lock_byte(&mp->mutex_lockw) == 0) {
   1512   7255       raf 				mp->mutex_ownerpid = udp->pid;
   1513   7255       raf 				mp->mutex_owner = (uintptr_t)self;
   1514   7255       raf 				error = 0;
   1515   7255       raf 				break;
   1516   7255       raf 			}
   1517   7255       raf 		} else
   1518   7255       raf #endif
   1519   6057       raf 		if ((*lockp & LOCKMASK64) == 0 &&
   1520   6057       raf 		    set_lock_byte64(lockp, udp->pid) == 0) {
   1521   4574       raf 			mp->mutex_owner = (uintptr_t)self;
   1522   6057       raf 			/* mp->mutex_ownerpid was set by set_lock_byte64() */
   1523   4574       raf 			error = 0;
   1524   4574       raf 			break;
   1525      0    stevel 		}
   1526   5629       raf 		if (count == max_count)
   1527   5629       raf 			break;
   1528   4574       raf 		SMT_PAUSE();
   1529   5629       raf 	}
   1530   5629       raf 	new_lockword = spinners_decr(&mp->mutex_lockword);
   1531   5629       raf 	if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) {
   1532   5629       raf 		/*
   1533   5629       raf 		 * We haven't yet acquired the lock, the lock
   1534   5629       raf 		 * is free, and there are no other spinners.
   1535   5629       raf 		 * Make one final attempt to acquire the lock.
   1536   5629       raf 		 *
   1537   5629       raf 		 * This isn't strictly necessary since mutex_lock_kernel()
   1538   5629       raf 		 * (the next action this thread will take if it doesn't
   1539   5629       raf 		 * acquire the lock here) makes one attempt to acquire
   1540   5629       raf 		 * the lock before putting the thread to sleep.
   1541   5629       raf 		 *
   1542   5629       raf 		 * If the next action for this thread (on failure here)
   1543   5629       raf 		 * were not to call mutex_lock_kernel(), this would be
   1544   5629       raf 		 * necessary for correctness, to avoid ending up with an
   1545   5629       raf 		 * unheld mutex with waiters but no one to wake them up.
   1546   5629       raf 		 */
   1547   7255       raf #if defined(__sparc) && !defined(_LP64)
   1548   7255       raf 		/* horrible hack, necessary only on 32-bit sparc */
   1549   7255       raf 		if (fix_alignment_problem) {
   1550   7255       raf 			if (set_lock_byte(&mp->mutex_lockw) == 0) {
   1551   7255       raf 				mp->mutex_ownerpid = udp->pid;
   1552   7255       raf 				mp->mutex_owner = (uintptr_t)self;
   1553   7255       raf 				error = 0;
   1554   7255       raf 			}
   1555   7255       raf 		} else
   1556   7255       raf #endif
   1557   6057       raf 		if (set_lock_byte64(lockp, udp->pid) == 0) {
   1558   5629       raf 			mp->mutex_owner = (uintptr_t)self;
   1559   6057       raf 			/* mp->mutex_ownerpid was set by set_lock_byte64() */
   1560   5629       raf 			error = 0;
   1561   5629       raf 		}
   1562   5629       raf 		count++;
   1563   4574       raf 	}
   1564   4574       raf 
   1565   5629       raf done:
   1566   4574       raf 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
   1567   4574       raf 		ASSERT(mp->mutex_type & LOCK_ROBUST);
   1568      0    stevel 		/*
   1569   6057       raf 		 * We shouldn't own the mutex.
   1570   6057       raf 		 * Just clear the lock; everyone has already been waked up.
   1571      0    stevel 		 */
   1572   4574       raf 		mp->mutex_owner = 0;
   1573   6057       raf 		/* mp->mutex_ownerpid is cleared by clear_lockbyte64() */
   1574   6057       raf 		(void) clear_lockbyte64(&mp->mutex_lockword64);
   1575   4574       raf 		error = ENOTRECOVERABLE;
   1576      0    stevel 	}
   1577   7907     Roger 
   1578   7907     Roger 	exit_critical(self);
   1579      0    stevel 
   1580   4574       raf 	if (error) {
   1581   5629       raf 		if (count) {
   1582   9397  Jonathan 			DTRACE_PROBE3(plockstat, mutex__spun, mp, 0, count);
   1583   5629       raf 		}
   1584   4574       raf 		if (error != EBUSY) {
   1585   4574       raf 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
   1586   4574       raf 		}
   1587   4574       raf 	} else {
   1588   5629       raf 		if (count) {
   1589   9397  Jonathan 			DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count);
   1590   5629       raf 		}
   1591   4574       raf 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
   1592   4574       raf 		if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
   1593   4574       raf 			ASSERT(mp->mutex_type & LOCK_ROBUST);
   1594   4574       raf 			if (mp->mutex_flag & LOCK_OWNERDEAD)
   1595   4574       raf 				error = EOWNERDEAD;
   1596   4574       raf 			else if (mp->mutex_type & USYNC_PROCESS_ROBUST)
   1597   4574       raf 				error = ELOCKUNMAPPED;
   1598   4574       raf 			else
   1599   4574       raf 				error = EOWNERDEAD;
   1600   4574       raf 		}
   1601   4574       raf 	}
   1602   4574       raf 
   1603   4574       raf 	return (error);
   1604      0    stevel }
   1605      0    stevel 
   1606      0    stevel /*
   1607      0    stevel  * Mutex wakeup code for releasing a USYNC_THREAD mutex.
   1608      0    stevel  * Returns the lwpid of the thread that was dequeued, if any.
   1609      0    stevel  * The caller of mutex_wakeup() must call __lwp_unpark(lwpid)
   1610      0    stevel  * to wake up the specified lwp.
   1611      0    stevel  */
   1612   4574       raf static lwpid_t
   1613      0    stevel mutex_wakeup(mutex_t *mp)
   1614      0    stevel {
   1615      0    stevel 	lwpid_t lwpid = 0;
   1616   6247       raf 	int more;
   1617      0    stevel 	queue_head_t *qp;
   1618      0    stevel 	ulwp_t *ulwp;
   1619      0    stevel 
   1620      0    stevel 	/*
   1621      0    stevel 	 * Dequeue a waiter from the sleep queue.  Don't touch the mutex
   1622      0    stevel 	 * waiters bit if no one was found on the queue because the mutex
   1623      0    stevel 	 * might have been deallocated or reallocated for another purpose.
   1624      0    stevel 	 */
   1625      0    stevel 	qp = queue_lock(mp, MX);
   1626   6247       raf 	if ((ulwp = dequeue(qp, &more)) != NULL) {
   1627      0    stevel 		lwpid = ulwp->ul_lwpid;
   1628   6247       raf 		mp->mutex_waiters = more;
   1629      0    stevel 	}
   1630      0    stevel 	queue_unlock(qp);
   1631      0    stevel 	return (lwpid);
   1632      0    stevel }
   1633      0    stevel 
   1634      0    stevel /*
   1635   4574       raf  * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex.
   1636   4574       raf  */
   1637   4574       raf static void
   1638   4574       raf mutex_wakeup_all(mutex_t *mp)
   1639   4574       raf {
   1640   4574       raf 	queue_head_t *qp;
   1641   6247       raf 	queue_root_t *qrp;
   1642   4574       raf 	int nlwpid = 0;
   1643   4574       raf 	int maxlwps = MAXLWPS;
   1644   4574       raf 	ulwp_t *ulwp;
   1645   4574       raf 	lwpid_t buffer[MAXLWPS];
   1646   4574       raf 	lwpid_t *lwpid = buffer;
   1647   4574       raf 
   1648   4574       raf 	/*
   1649   4574       raf 	 * Walk the list of waiters and prepare to wake up all of them.
   1650   4574       raf 	 * The waiters flag has already been cleared from the mutex.
   1651   4574       raf 	 *
   1652   4574       raf 	 * We keep track of lwpids that are to be unparked in lwpid[].
   1653   4574       raf 	 * __lwp_unpark_all() is called to unpark all of them after
   1654   4574       raf 	 * they have been removed from the sleep queue and the sleep
   1655   4574       raf 	 * queue lock has been dropped.  If we run out of space in our
   1656   4574       raf 	 * on-stack buffer, we need to allocate more but we can't call
   1657   4574       raf 	 * lmalloc() because we are holding a queue lock when the overflow
   1658   4574       raf 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
   1659   4574       raf 	 * either because the application may have allocated a small
   1660   4574       raf 	 * stack and we don't want to overrun the stack.  So we call
   1661   4574       raf 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
   1662   4574       raf 	 * system call directly since that path acquires no locks.
   1663   4574       raf 	 */
   1664   4574       raf 	qp = queue_lock(mp, MX);
   1665   6247       raf 	for (;;) {
   1666   6247       raf 		if ((qrp = qp->qh_root) == NULL ||
   1667   6247       raf 		    (ulwp = qrp->qr_head) == NULL)
   1668   6247       raf 			break;
   1669   6247       raf 		ASSERT(ulwp->ul_wchan == mp);
   1670   6247       raf 		queue_unlink(qp, &qrp->qr_head, NULL);
   1671   6247       raf 		ulwp->ul_sleepq = NULL;
   1672   6247       raf 		ulwp->ul_wchan = NULL;
   1673   6247       raf 		if (nlwpid == maxlwps)
   1674   6247       raf 			lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
   1675   6247       raf 		lwpid[nlwpid++] = ulwp->ul_lwpid;
   1676   4574       raf 	}
   1677   4574       raf 
   1678   4574       raf 	if (nlwpid == 0) {
   1679   4574       raf 		queue_unlock(qp);
   1680   4574       raf 	} else {
   1681   5629       raf 		mp->mutex_waiters = 0;
   1682   4574       raf 		no_preempt(curthread);
   1683   4574       raf 		queue_unlock(qp);
   1684   4574       raf 		if (nlwpid == 1)
   1685   4574       raf 			(void) __lwp_unpark(lwpid[0]);
   1686   4574       raf 		else
   1687   4574       raf 			(void) __lwp_unpark_all(lwpid, nlwpid);
   1688   4574       raf 		preempt(curthread);
   1689   4574       raf 	}
   1690   4574       raf 
   1691   4574       raf 	if (lwpid != buffer)
   1692   6515       raf 		(void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
   1693   4574       raf }
   1694   4574       raf 
   1695   4574       raf /*
   1696   5629       raf  * Release a process-private mutex.
   1697   5629       raf  * As an optimization, if there are waiters but there are also spinners
   1698   5629       raf  * attempting to acquire the mutex, then don't bother waking up a waiter;
   1699   5629       raf  * one of the spinners will acquire the mutex soon and it would be a waste
   1700   5629       raf  * of resources to wake up some thread just to have it spin for a while
   1701   5629       raf  * and then possibly go back to sleep.  See mutex_trylock_adaptive().
   1702      0    stevel  */
   1703   4574       raf static lwpid_t
   1704   4574       raf mutex_unlock_queue(mutex_t *mp, int release_all)
   1705      0    stevel {
   1706   7907     Roger 	ulwp_t *self = curthread;
   1707   5629       raf 	lwpid_t lwpid = 0;
   1708   5629       raf 	uint32_t old_lockword;
   1709      0    stevel 
   1710   6057       raf 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
   1711   7907     Roger 	sigoff(self);
   1712   5629       raf 	mp->mutex_owner = 0;
   1713   5629       raf 	old_lockword = clear_lockbyte(&mp->mutex_lockword);
   1714   5629       raf 	if ((old_lockword & WAITERMASK) &&
   1715   5629       raf 	    (release_all || (old_lockword & SPINNERMASK) == 0)) {
   1716      0    stevel 		no_preempt(self);	/* ensure a prompt wakeup */
   1717   5629       raf 		if (release_all)
   1718   5629       raf 			mutex_wakeup_all(mp);
   1719   5629       raf 		else
   1720   5629       raf 			lwpid = mutex_wakeup(mp);
   1721   5629       raf 		if (lwpid == 0)
   1722   5629       raf 			preempt(self);
   1723   4574       raf 	}
   1724   7907     Roger 	sigon(self);
   1725      0    stevel 	return (lwpid);
   1726      0    stevel }
   1727      0    stevel 
   1728      0    stevel /*
   1729      0    stevel  * Like mutex_unlock_queue(), but for process-shared mutexes.
   1730      0    stevel  */
   1731   4574       raf static void
   1732   4574       raf mutex_unlock_process(mutex_t *mp, int release_all)
   1733      0    stevel {
   1734   7255       raf 	ulwp_t *self = curthread;
   1735   6057       raf 	uint64_t old_lockword64;
   1736      0    stevel 
   1737   6057       raf 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
   1738   7907     Roger 	sigoff(self);
   1739      0    stevel 	mp->mutex_owner = 0;
   1740   7255       raf #if defined(__sparc) && !defined(_LP64)
   1741   7255       raf 	/* horrible hack, necessary only on 32-bit sparc */
   1742   7255       raf 	if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) &&
   1743   7255       raf 	    self->ul_misaligned && !(mp->mutex_type & LOCK_ROBUST)) {
   1744   7255       raf 		uint32_t old_lockword;
   1745   7255       raf 		mp->mutex_ownerpid = 0;
   1746   7255       raf 		old_lockword = clear_lockbyte(&mp->mutex_lockword);
   1747   7255       raf 		if ((old_lockword & WAITERMASK) &&
   1748   7255       raf 		    (release_all || (old_lockword & SPINNERMASK) == 0)) {
   1749   7255       raf 			no_preempt(self);	/* ensure a prompt wakeup */
   1750   7255       raf 			(void) ___lwp_mutex_wakeup(mp, release_all);
   1751   7255       raf 			preempt(self);
   1752   7255       raf 		}
   1753   7907     Roger 		sigon(self);
   1754   7255       raf 		return;
   1755   7255       raf 	}
   1756   7255       raf #endif
   1757   6057       raf 	/* mp->mutex_ownerpid is cleared by clear_lockbyte64() */
   1758   6057       raf 	old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64);
   1759   6057       raf 	if ((old_lockword64 & WAITERMASK64) &&
   1760   6057       raf 	    (release_all || (old_lockword64 & SPINNERMASK64) == 0)) {
   1761   5629       raf 		no_preempt(self);	/* ensure a prompt wakeup */
   1762   5629       raf 		(void) ___lwp_mutex_wakeup(mp, release_all);
   1763   5629       raf 		preempt(self);
   1764      0    stevel 	}
   1765   7907     Roger 	sigon(self);
   1766      0    stevel }
   1767      0    stevel 
   1768      0    stevel void
   1769      0    stevel stall(void)
   1770      0    stevel {
   1771      0    stevel 	for (;;)
   1772      0    stevel 		(void) mutex_lock_kernel(&stall_mutex, NULL, NULL);
   1773      0    stevel }
   1774      0    stevel 
   1775      0    stevel /*
   1776      0    stevel  * Acquire a USYNC_THREAD mutex via user-level sleep queues.
   1777      0    stevel  * We failed set_lock_byte(&mp->mutex_lockw) before coming here.
   1778   4574       raf  * If successful, returns with mutex_owner set correctly.
   1779      0    stevel  */
   1780      0    stevel int
   1781      0    stevel mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
   1782      0    stevel 	timespec_t *tsp)
   1783      0    stevel {
   1784      0    stevel 	uberdata_t *udp = curthread->ul_uberdata;
   1785      0    stevel 	queue_head_t *qp;
   1786      0    stevel 	hrtime_t begin_sleep;
   1787      0    stevel 	int error = 0;
   1788      0    stevel 
   1789      0    stevel 	self->ul_sp = stkptr();
   1790      0    stevel 	if (__td_event_report(self, TD_SLEEP, udp)) {
   1791      0    stevel 		self->ul_wchan = mp;
   1792      0    stevel 		self->ul_td_evbuf.eventnum = TD_SLEEP;
   1793      0    stevel 		self->ul_td_evbuf.eventdata = mp;
   1794      0    stevel 		tdb_event(TD_SLEEP, udp);
   1795      0    stevel 	}
   1796      0    stevel 	if (msp) {
   1797      0    stevel 		tdb_incr(msp->mutex_sleep);
   1798      0    stevel 		begin_sleep = gethrtime();
   1799      0    stevel 	}
   1800      0    stevel 
   1801      0    stevel 	DTRACE_PROBE1(plockstat, mutex__block, mp);
   1802      0    stevel 
   1803      0    stevel 	/*
   1804      0    stevel 	 * Put ourself on the sleep queue, and while we are
   1805      0    stevel 	 * unable to grab the lock, go park in the kernel.
   1806      0    stevel 	 * Take ourself off the sleep queue after we acquire the lock.
   1807      0    stevel 	 * The waiter bit can be set/cleared only while holding the queue lock.
   1808      0    stevel 	 */
   1809      0    stevel 	qp = queue_lock(mp, MX);
   1810   6247       raf 	enqueue(qp, self, 0);
   1811      0    stevel 	mp->mutex_waiters = 1;
   1812      0    stevel 	for (;;) {
   1813      0    stevel 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
   1814      0    stevel 			mp->mutex_owner = (uintptr_t)self;
   1815   6247       raf 			mp->mutex_waiters = dequeue_self(qp);
   1816      0    stevel 			break;
   1817      0    stevel 		}
   1818      0    stevel 		set_parking_flag(self, 1);
   1819      0    stevel 		queue_unlock(qp);
   1820      0    stevel 		/*
   1821      0    stevel 		 * __lwp_park() will return the residual time in tsp
   1822      0    stevel 		 * if we are unparked before the timeout expires.
   1823      0    stevel 		 */
   1824   5629       raf 		error = __lwp_park(tsp, 0);
   1825      0    stevel 		set_parking_flag(self, 0);
   1826      0    stevel 		/*
   1827      0    stevel 		 * We could have taken a signal or suspended ourself.
   1828      0    stevel 		 * If we did, then we removed ourself from the queue.
   1829      0    stevel 		 * Someone else may have removed us from the queue
   1830      0    stevel 		 * as a consequence of mutex_unlock().  We may have
   1831      0    stevel 		 * gotten a timeout from __lwp_park().  Or we may still
   1832      0    stevel 		 * be on the queue and this is just a spurious wakeup.
   1833      0    stevel 		 */
   1834      0    stevel 		qp = queue_lock(mp, MX);
   1835      0    stevel 		if (self->ul_sleepq == NULL) {
   1836   5629       raf 			if (error) {
   1837   6247       raf 				mp->mutex_waiters = queue_waiter(qp)? 1 : 0;
   1838   5629       raf 				if (error != EINTR)
   1839   5629       raf 					break;
   1840   5629       raf 				error = 0;
   1841   5629       raf 			}
   1842      0    stevel 			if (set_lock_byte(&mp->mutex_lockw) == 0) {
   1843      0    stevel 				mp->mutex_owner = (uintptr_t)self;
   1844      0    stevel 				break;
   1845      0    stevel 			}
   1846   6247       raf 			enqueue(qp, self, 0);
   1847      0    stevel 			mp->mutex_waiters = 1;
   1848      0    stevel 		}
   1849      0    stevel 		ASSERT(self->ul_sleepq == qp &&
   1850      0    stevel 		    self->ul_qtype == MX &&
   1851      0    stevel 		    self->ul_wchan == mp);
   1852      0    stevel 		if (error) {
   1853   5629       raf 			if (error != EINTR) {
   1854   6247       raf 				mp->mutex_waiters = dequeue_self(qp);
   1855   5629       raf 				break;
   1856   5629       raf 			}
   1857   5629       raf 			error = 0;
   1858      0    stevel 		}
   1859      0    stevel 	}
   1860      0    stevel 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
   1861      0    stevel 	    self->ul_wchan == NULL);
   1862      0    stevel 	self->ul_sp = 0;
   1863      0    stevel 
   1864      0    stevel 	ASSERT(error == 0 || error == EINVAL || error == ETIME);
   1865   4574       raf 
   1866   4574       raf 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
   1867   4574       raf 		ASSERT(mp->mutex_type & LOCK_ROBUST);
   1868   4574       raf 		/*
   1869   6057       raf 		 * We shouldn't own the mutex.
   1870   6057       raf 		 * Just clear the lock; everyone has already been waked up.
   1871   4574       raf 		 */
   1872   4574       raf 		mp->mutex_owner = 0;
   1873   6057       raf 		(void) clear_lockbyte(&mp->mutex_lockword);
   1874   4574       raf 		error = ENOTRECOVERABLE;
   1875   4574       raf 	}
   1876   7907     Roger 
   1877   7907     Roger 	queue_unlock(qp);
   1878   7907     Roger 
   1879   7907     Roger 	if (msp)
   1880   7907     Roger 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
   1881   4574       raf 
   1882   4574       raf 	if (error) {
   1883   4574       raf 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
   1884   4574       raf 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
   1885   4574       raf 	} else {
   1886   4574       raf 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
   1887   4574       raf 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   1888   4574       raf 		if (mp->mutex_flag & LOCK_OWNERDEAD) {
   1889   4574       raf 			ASSERT(mp->mutex_type & LOCK_ROBUST);
   1890   4574       raf 			error = EOWNERDEAD;
   1891   4574       raf 		}
   1892   4574       raf 	}
   1893   4574       raf 
   1894      0    stevel 	return (error);
   1895   4574       raf }
   1896   4574       raf 
   1897   4574       raf static int
   1898   4574       raf mutex_recursion(mutex_t *mp, int mtype, int try)
   1899   4574       raf {
   1900   6812       raf 	ASSERT(mutex_held(mp));
   1901   4574       raf 	ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK));
   1902   4574       raf 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
   1903   4574       raf 
   1904   4574       raf 	if (mtype & LOCK_RECURSIVE) {
   1905   4574       raf 		if (mp->mutex_rcount == RECURSION_MAX) {
   1906   4574       raf 			DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN);
   1907   4574       raf 			return (EAGAIN);
   1908   4574       raf 		}
   1909   4574       raf 		mp->mutex_rcount++;
   1910   4574       raf 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0);
   1911   4574       raf 		return (0);
   1912   4574       raf 	}
   1913   4574       raf 	if (try == MUTEX_LOCK) {
   1914   4574       raf 		DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
   1915   4574       raf 		return (EDEADLK);
   1916   4574       raf 	}
   1917   4574       raf 	return (EBUSY);
   1918   4574       raf }
   1919   4574       raf 
   1920   4574       raf /*
   1921   4574       raf  * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so
   1922   4574       raf  * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary.
   1923   4574       raf  * We use tdb_hash_lock here and in the synch object tracking code in
   1924   4574       raf  * the tdb_agent.c file.  There is no conflict between these two usages.
   1925   4574       raf  */
   1926   4574       raf void
   1927   4574       raf register_lock(mutex_t *mp)
   1928   4574       raf {
   1929   4574       raf 	uberdata_t *udp = curthread->ul_uberdata;
   1930   4574       raf 	uint_t hash = LOCK_HASH(mp);
   1931   4574       raf 	robust_t *rlp;
   1932   9170     Roger 	robust_t *invalid;
   1933   4574       raf 	robust_t **rlpp;
   1934   4574       raf 	robust_t **table;
   1935   4574       raf 
   1936   4574       raf 	if ((table = udp->robustlocks) == NULL) {
   1937   4574       raf 		lmutex_lock(&udp->tdb_hash_lock);
   1938   4574       raf 		if ((table = udp->robustlocks) == NULL) {
   1939   4574       raf 			table = lmalloc(LOCKHASHSZ * sizeof (robust_t *));
   1940   6812       raf 			membar_producer();
   1941   4574       raf 			udp->robustlocks = table;
   1942   4574       raf 		}
   1943   4574       raf 		lmutex_unlock(&udp->tdb_hash_lock);
   1944   4574       raf 	}
   1945   6812       raf 	membar_consumer();
   1946   4574       raf 
   1947   4574       raf 	/*
   1948   4574       raf 	 * First search the registered table with no locks held.
   1949   4574       raf 	 * This is safe because the table never shrinks
   1950   4574       raf 	 * and we can only get a false negative.
   1951   4574       raf 	 */
   1952   4574       raf 	for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) {
   1953   4574       raf 		if (rlp->robust_lock == mp)	/* already registered */
   1954   4574       raf 			return;
   1955   4574       raf 	}
   1956   4574       raf 
   1957   4574       raf 	/*
   1958   4574       raf 	 * The lock was not found.
   1959   4574       raf 	 * Repeat the operation with tdb_hash_lock held.
   1960   4574       raf 	 */
   1961   4574       raf 	lmutex_lock(&udp->tdb_hash_lock);
   1962   4574       raf 
   1963   9170     Roger 	invalid = NULL;
   1964   4574       raf 	for (rlpp = &table[hash];
   1965   4574       raf 	    (rlp = *rlpp) != NULL;
   1966   4574       raf 	    rlpp = &rlp->robust_next) {
   1967   4574       raf 		if (rlp->robust_lock == mp) {	/* already registered */
   1968   4574       raf 			lmutex_unlock(&udp->tdb_hash_lock);
   1969   4574       raf 			return;
   1970   4574       raf 		}
   1971   9170     Roger 		/* remember the first invalid entry, if any */
   1972   9170     Roger 		if (rlp->robust_lock == INVALID_ADDR && invalid == NULL)
   1973   9170     Roger 			invalid = rlp;
   1974   4574       raf 	}
   1975   4574       raf 
   1976   4574       raf 	/*
   1977   4574       raf 	 * The lock has never been registered.
   1978   9170     Roger 	 * Add it to the table and register it now.
   1979   9170     Roger 	 */
   1980   9264     Roger 	if ((rlp = invalid) != NULL) {
   1981   9170     Roger 		/*
   1982   9170     Roger 		 * Reuse the invalid entry we found above.
   1983   9170     Roger 		 * The linkages are still correct.
   1984   9170     Roger 		 */
   1985   9264     Roger 		rlp->robust_lock = mp;
   1986   9170     Roger 		membar_producer();
   1987   9170     Roger 	} else {
   1988   9170     Roger 		/*
   1989   9170     Roger 		 * Allocate a new entry and add it to
   1990   9170     Roger 		 * the hash table and to the global list.
   1991   9170     Roger 		 */
   1992   9170     Roger 		rlp = lmalloc(sizeof (*rlp));
   1993   9170     Roger 		rlp->robust_lock = mp;
   1994   9170     Roger 		rlp->robust_next = NULL;
   1995   9170     Roger 		rlp->robust_list = udp->robustlist;
   1996   9170     Roger 		udp->robustlist = rlp;
   1997   9170     Roger 		membar_producer();
   1998   9170     Roger 		*rlpp = rlp;
   1999   9170     Roger 	}
   2000   9170     Roger 
   2001   9170     Roger 	lmutex_unlock(&udp->tdb_hash_lock);
   2002   9170     Roger 
   2003   9264     Roger 	(void) ___lwp_mutex_register(mp, &rlp->robust_lock);
   2004   4574       raf }
   2005   4574       raf 
   2006   4574       raf /*
   2007   4574       raf  * This is called in the child of fork()/forkall() to start over
   2008   4574       raf  * with a clean slate.  (Each process must register its own locks.)
   2009   4574       raf  * No locks are needed because all other threads are suspended or gone.
   2010   4574       raf  */
   2011   4574       raf void
   2012   9264     Roger unregister_locks(void)
   2013   9170     Roger {
   2014   9170     Roger 	uberdata_t *udp = curthread->ul_uberdata;
   2015   4574       raf 	robust_t **table;
   2016   4574       raf 	robust_t *rlp;
   2017   4574       raf 	robust_t *next;
   2018   4574       raf 
   2019   9170     Roger 	/*
   2020   9170     Roger 	 * Do this first, before calling lfree().
   2021   9170     Roger 	 */
   2022   9170     Roger 	table = udp->robustlocks;
   2023   9170     Roger 	udp->robustlocks = NULL;
   2024   9170     Roger 	rlp = udp->robustlist;
   2025   9170     Roger 	udp->robustlist = NULL;
   2026   9170     Roger 
   2027   9170     Roger 	/*
   2028   9264     Roger 	 * Do this by traversing the global list, not the hash table.
   2029   9170     Roger 	 */
   2030   9170     Roger 	while (rlp != NULL) {
   2031   9170     Roger 		next = rlp->robust_list;
   2032   9170     Roger 		lfree(rlp, sizeof (*rlp));
   2033   9170     Roger 		rlp = next;
   2034   9170     Roger 	}
   2035   9170     Roger 	if (table != NULL)
   2036   4574       raf 		lfree(table, LOCKHASHSZ * sizeof (robust_t *));
   2037      0    stevel }
   2038      0    stevel 
   2039      0    stevel /*
   2040      0    stevel  * Returns with mutex_owner set correctly.
   2041      0    stevel  */
   2042   6247       raf int
   2043      0    stevel mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
   2044      0    stevel {
   2045      0    stevel 	ulwp_t *self = curthread;
   2046      0    stevel 	uberdata_t *udp = self->ul_uberdata;
   2047      0    stevel 	int mtype = mp->mutex_type;
   2048      0    stevel 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
   2049      0    stevel 	int error = 0;
   2050   6247       raf 	int noceil = try & MUTEX_NOCEIL;
   2051   4574       raf 	uint8_t ceil;
   2052   4574       raf 	int myprio;
   2053      0    stevel 
   2054   6247       raf 	try &= ~MUTEX_NOCEIL;
   2055      0    stevel 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
   2056      0    stevel 
   2057      0    stevel 	if (!self->ul_schedctl_called)
   2058      0    stevel 		(void) setup_schedctl();
   2059      0    stevel 
   2060      0    stevel 	if (msp && try == MUTEX_TRY)
   2061      0    stevel 		tdb_incr(msp->mutex_try);
   2062      0    stevel 
   2063   6812       raf 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_held(mp))
   2064   4574       raf 		return (mutex_recursion(mp, mtype, try));
   2065      0    stevel 
   2066      0    stevel 	if (self->ul_error_detection && try == MUTEX_LOCK &&
   2067   6812       raf 	    tsp == NULL && mutex_held(mp))
   2068      0    stevel 		lock_error(mp, "mutex_lock", NULL, NULL);
   2069      0    stevel 
   2070   6247       raf 	if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) {
   2071   6247       raf 		update_sched(self);
   2072   6247       raf 		if (self->ul_cid != self->ul_rtclassid) {
   2073   6247       raf 			DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM);
   2074   6247       raf 			return (EPERM);
   2075   6247       raf 		}
   2076   4574       raf 		ceil = mp->mutex_ceiling;
   2077   6247       raf 		myprio = self->ul_epri? self->ul_epri : self->ul_pri;
   2078   4574       raf 		if (myprio > ceil) {
   2079   4574       raf 			DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL);
   2080   4574       raf 			return (EINVAL);
   2081   4574       raf 		}
   2082   4574       raf 		if ((error = _ceil_mylist_add(mp)) != 0) {
   2083   4574       raf 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
   2084   4574       raf 			return (error);
   2085   4574       raf 		}
   2086   4574       raf 		if (myprio < ceil)
   2087   4574       raf 			_ceil_prio_inherit(ceil);
   2088   4574       raf 	}
   2089      0    stevel 
   2090   4574       raf 	if ((mtype & (USYNC_PROCESS | LOCK_ROBUST))
   2091   4574       raf 	    == (USYNC_PROCESS | LOCK_ROBUST))
   2092   4574       raf 		register_lock(mp);
   2093      0    stevel 
   2094   4574       raf 	if (mtype & LOCK_PRIO_INHERIT) {
   2095   4574       raf 		/* go straight to the kernel */
   2096   4574       raf 		if (try == MUTEX_TRY)
   2097   4574       raf 			error = mutex_trylock_kernel(mp);
   2098   4574       raf 		else	/* MUTEX_LOCK */
   2099   4574       raf 			error = mutex_lock_kernel(mp, tsp, msp);
   2100   4574       raf 		/*
   2101   4574       raf 		 * The kernel never sets or clears the lock byte
   2102   4574       raf 		 * for LOCK_PRIO_INHERIT mutexes.
   2103   4574       raf 		 * Set it here for consistency.
   2104   4574       raf 		 */
   2105   4574       raf 		switch (error) {
   2106   4574       raf 		case 0:
   2107   6247       raf 			self->ul_pilocks++;
   2108   4574       raf 			mp->mutex_lockw = LOCKSET;
   2109   4574       raf 			break;
   2110   4574       raf 		case EOWNERDEAD:
   2111   4574       raf 		case ELOCKUNMAPPED:
   2112   6247       raf 			self->ul_pilocks++;
   2113   4574       raf 			mp->mutex_lockw = LOCKSET;
   2114   4574       raf 			/* FALLTHROUGH */
   2115   4574       raf 		case ENOTRECOVERABLE:
   2116   4574       raf 			ASSERT(mtype & LOCK_ROBUST);
   2117   4574       raf 			break;
   2118   4574       raf 		case EDEADLK:
   2119   7376     Roger 			if (try == MUTEX_TRY) {
   2120   7376     Roger 				error = EBUSY;
   2121   7376     Roger 			} else if (tsp != NULL) {	/* simulate a timeout */
   2122   7376     Roger 				/*
   2123   7376     Roger 				 * Note: mutex_timedlock() never returns EINTR.
   2124   7376     Roger 				 */
   2125   7376     Roger 				timespec_t ts = *tsp;
   2126   7376     Roger 				timespec_t rts;
   2127   7376     Roger 
   2128   7376     Roger 				while (__nanosleep(&ts, &rts) == EINTR)
   2129   7376     Roger 					ts = rts;
   2130   7376     Roger 				error = ETIME;
   2131   7376     Roger 			} else {		/* simulate a deadlock */
   2132   4574       raf 				stall();
   2133   7376     Roger 			}
   2134   4574       raf 			break;
   2135      0    stevel 		}
   2136      0    stevel 	} else if (mtype & USYNC_PROCESS) {
   2137   4613       raf 		error = mutex_trylock_process(mp, try == MUTEX_LOCK);
   2138   4574       raf 		if (error == EBUSY && try == MUTEX_LOCK)
   2139      0    stevel 			error = mutex_lock_kernel(mp, tsp, msp);
   2140   5629       raf 	} else {	/* USYNC_THREAD */
   2141   4613       raf 		error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK);
   2142   4574       raf 		if (error == EBUSY && try == MUTEX_LOCK)
   2143   4574       raf 			error = mutex_lock_queue(self, msp, mp, tsp);
   2144      0    stevel 	}
   2145      0    stevel 
   2146      0    stevel 	switch (error) {
   2147   4574       raf 	case 0:
   2148      0    stevel 	case EOWNERDEAD:
   2149      0    stevel 	case ELOCKUNMAPPED:
   2150   4574       raf 		if (mtype & LOCK_ROBUST)
   2151   4574       raf 			remember_lock(mp);
   2152      0    stevel 		if (msp)
   2153      0    stevel 			record_begin_hold(msp);
   2154      0    stevel 		break;
   2155      0    stevel 	default:
   2156   6247       raf 		if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) {
   2157   4574       raf 			(void) _ceil_mylist_del(mp);
   2158   4574       raf 			if (myprio < ceil)
   2159   4574       raf 				_ceil_prio_waive();
   2160   4574       raf 		}
   2161      0    stevel 		if (try == MUTEX_TRY) {
   2162      0    stevel 			if (msp)
   2163      0    stevel 				tdb_incr(msp->mutex_try_fail);
   2164      0    stevel 			if (__td_event_report(self, TD_LOCK_TRY, udp)) {
   2165      0    stevel 				self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
   2166      0    stevel 				tdb_event(TD_LOCK_TRY, udp);
   2167      0    stevel 			}
   2168      0    stevel 		}
   2169      0    stevel 		break;
   2170      0    stevel 	}
   2171      0    stevel 
   2172      0    stevel 	return (error);
   2173      0    stevel }
   2174      0    stevel 
   2175      0    stevel int
   2176      0    stevel fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try)
   2177      0    stevel {
   2178      0    stevel 	ulwp_t *self = curthread;
   2179      0    stevel 	uberdata_t *udp = self->ul_uberdata;
   2180      0    stevel 
   2181      0    stevel 	/*
   2182      0    stevel 	 * We know that USYNC_PROCESS is set in mtype and that
   2183      0    stevel 	 * zero, one, or both of the flags LOCK_RECURSIVE and
   2184      0    stevel 	 * LOCK_ERRORCHECK are set, and that no other flags are set.
   2185      0    stevel 	 */
   2186   4574       raf 	ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0);
   2187      0    stevel 	enter_critical(self);
   2188   7255       raf #if defined(__sparc) && !defined(_LP64)
   2189   7255       raf 	/* horrible hack, necessary only on 32-bit sparc */
   2190   7255       raf 	if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) &&
   2191   7255       raf 	    self->ul_misaligned) {
   2192   7255       raf 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
   2193   7255       raf 			mp->mutex_ownerpid = udp->pid;
   2194   7255       raf 			mp->mutex_owner = (uintptr_t)self;
   2195   7255       raf 			exit_critical(self);
   2196   7255       raf 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2197   7255       raf 			return (0);
   2198   7255       raf 		}
   2199   7255       raf 	} else
   2200   7255       raf #endif
   2201   6057       raf 	if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) {
   2202      0    stevel 		mp->mutex_owner = (uintptr_t)self;
   2203   6057       raf 		/* mp->mutex_ownerpid was set by set_lock_byte64() */
   2204      0    stevel 		exit_critical(self);
   2205      0    stevel 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2206      0    stevel 		return (0);
   2207      0    stevel 	}
   2208      0    stevel 	exit_critical(self);
   2209      0    stevel 
   2210   4574       raf 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp))
   2211   4574       raf 		return (mutex_recursion(mp, mtype, try));
   2212      0    stevel 
   2213   4613       raf 	if (try == MUTEX_LOCK) {
   2214   4613       raf 		if (mutex_trylock_process(mp, 1) == 0)
   2215   4613       raf 			return (0);
   2216      0    stevel 		return (mutex_lock_kernel(mp, tsp, NULL));
   2217   4613       raf 	}
   2218      0    stevel 
   2219      0    stevel 	if (__td_event_report(self, TD_LOCK_TRY, udp)) {
   2220      0    stevel 		self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
   2221      0    stevel 		tdb_event(TD_LOCK_TRY, udp);
   2222      0    stevel 	}
   2223      0    stevel 	return (EBUSY);
   2224      0    stevel }
   2225      0    stevel 
   2226      0    stevel static int
   2227      0    stevel mutex_lock_impl(mutex_t *mp, timespec_t *tsp)
   2228      0    stevel {
   2229      0    stevel 	ulwp_t *self = curthread;
   2230   6247       raf 	int mtype = mp->mutex_type;
   2231      0    stevel 	uberflags_t *gflags;
   2232   7255       raf 
   2233   7255       raf 	if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) &&
   2234   7255       raf 	    self->ul_error_detection && self->ul_misaligned == 0)
   2235   7255       raf 		lock_error(mp, "mutex_lock", NULL, "mutex is misaligned");
   2236      0    stevel 
   2237      0    stevel 	/*
   2238      0    stevel 	 * Optimize the case of USYNC_THREAD, including
   2239      0    stevel 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
   2240      0    stevel 	 * no error detection, no lock statistics,
   2241      0    stevel 	 * and the process has only a single thread.
   2242      0    stevel 	 * (Most likely a traditional single-threaded application.)
   2243      0    stevel 	 */
   2244   6247       raf 	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
   2245   6247       raf 	    self->ul_uberdata->uberflags.uf_all) == 0) {
   2246      0    stevel 		/*
   2247      0    stevel 		 * Only one thread exists so we don't need an atomic operation.
   2248   7907     Roger 		 * We do, however, need to protect against signals.
   2249      0    stevel 		 */
   2250      0    stevel 		if (mp->mutex_lockw == 0) {
   2251   7907     Roger 			sigoff(self);
   2252      0    stevel 			mp->mutex_lockw = LOCKSET;
   2253      0    stevel 			mp->mutex_owner = (uintptr_t)self;
   2254   7907     Roger 			sigon(self);
   2255      0    stevel 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2256      0    stevel 			return (0);
   2257      0    stevel 		}
   2258   4574       raf 		if (mtype && MUTEX_OWNER(mp) == self)
   2259   4574       raf 			return (mutex_recursion(mp, mtype, MUTEX_LOCK));
   2260      0    stevel 		/*
   2261      0    stevel 		 * We have reached a deadlock, probably because the
   2262      0    stevel 		 * process is executing non-async-signal-safe code in
   2263      0    stevel 		 * a signal handler and is attempting to acquire a lock
   2264      0    stevel 		 * that it already owns.  This is not surprising, given
   2265      0    stevel 		 * bad programming practices over the years that has
   2266      0    stevel 		 * resulted in applications calling printf() and such
   2267      0    stevel 		 * in their signal handlers.  Unless the user has told
   2268      0    stevel 		 * us that the signal handlers are safe by setting:
   2269      0    stevel 		 *	export _THREAD_ASYNC_SAFE=1
   2270      0    stevel 		 * we return EDEADLK rather than actually deadlocking.
   2271      0    stevel 		 */
   2272      0    stevel 		if (tsp == NULL &&
   2273      0    stevel 		    MUTEX_OWNER(mp) == self && !self->ul_async_safe) {
   2274      0    stevel 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
   2275      0    stevel 			return (EDEADLK);
   2276      0    stevel 		}
   2277      0    stevel 	}
   2278      0    stevel 
   2279      0    stevel 	/*
   2280      0    stevel 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
   2281      0    stevel 	 * no error detection, and no lock statistics.
   2282      0    stevel 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
   2283      0    stevel 	 */
   2284      0    stevel 	if ((gflags = self->ul_schedctl_called) != NULL &&
   2285      0    stevel 	    (gflags->uf_trs_ted |
   2286      0    stevel 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
   2287      0    stevel 		if (mtype & USYNC_PROCESS)
   2288      0    stevel 			return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK));
   2289   7907     Roger 		sigoff(self);
   2290      0    stevel 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
   2291      0    stevel 			mp->mutex_owner = (uintptr_t)self;
   2292   7907     Roger 			sigon(self);
   2293      0    stevel 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2294      0    stevel 			return (0);
   2295      0    stevel 		}
   2296   7907     Roger 		sigon(self);
   2297   4574       raf 		if (mtype && MUTEX_OWNER(mp) == self)
   2298   4574       raf 			return (mutex_recursion(mp, mtype, MUTEX_LOCK));
   2299   4613       raf 		if (mutex_trylock_adaptive(mp, 1) != 0)
   2300   4574       raf 			return (mutex_lock_queue(self, NULL, mp, tsp));
   2301   4574       raf 		return (0);
   2302      0    stevel 	}
   2303      0    stevel 
   2304      0    stevel 	/* else do it the long way */
   2305      0    stevel 	return (mutex_lock_internal(mp, tsp, MUTEX_LOCK));
   2306      0    stevel }
   2307      0    stevel 
   2308   6812       raf #pragma weak pthread_mutex_lock = mutex_lock
   2309   6812       raf #pragma weak _mutex_lock = mutex_lock
   2310      0    stevel int
   2311   6812       raf mutex_lock(mutex_t *mp)
   2312      0    stevel {
   2313      0    stevel 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
   2314      0    stevel 	return (mutex_lock_impl(mp, NULL));
   2315      0    stevel }
   2316      0    stevel 
   2317      0    stevel int
   2318   6812       raf pthread_mutex_timedlock(pthread_mutex_t *_RESTRICT_KYWD mp,
   2319   6812       raf 	const struct timespec *_RESTRICT_KYWD abstime)
   2320      0    stevel {
   2321      0    stevel 	timespec_t tslocal;
   2322      0    stevel 	int error;
   2323      0    stevel 
   2324      0    stevel 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
   2325      0    stevel 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
   2326   6812       raf 	error = mutex_lock_impl((mutex_t *)mp, &tslocal);
   2327      0    stevel 	if (error == ETIME)
   2328      0    stevel 		error = ETIMEDOUT;
   2329      0    stevel 	return (error);
   2330      0    stevel }
   2331      0    stevel 
   2332      0    stevel int
   2333   6812       raf pthread_mutex_reltimedlock_np(pthread_mutex_t *_RESTRICT_KYWD mp,
   2334   6812       raf 	const struct timespec *_RESTRICT_KYWD reltime)
   2335      0    stevel {
   2336      0    stevel 	timespec_t tslocal;
   2337      0    stevel 	int error;
   2338      0    stevel 
   2339      0    stevel 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
   2340      0    stevel 	tslocal = *reltime;
   2341   6812       raf 	error = mutex_lock_impl((mutex_t *)mp, &tslocal);
   2342      0    stevel 	if (error == ETIME)
   2343      0    stevel 		error = ETIMEDOUT;
   2344      0    stevel 	return (error);
   2345      0    stevel }
   2346      0    stevel 
   2347   6812       raf #pragma weak pthread_mutex_trylock = mutex_trylock
   2348      0    stevel int
   2349   6812       raf mutex_trylock(mutex_t *mp)
   2350      0    stevel {
   2351      0    stevel 	ulwp_t *self = curthread;
   2352      0    stevel 	uberdata_t *udp = self->ul_uberdata;
   2353   6247       raf 	int mtype = mp->mutex_type;
   2354      0    stevel 	uberflags_t *gflags;
   2355      0    stevel 
   2356      0    stevel 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
   2357   6247       raf 
   2358      0    stevel 	/*
   2359      0    stevel 	 * Optimize the case of USYNC_THREAD, including
   2360      0    stevel 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
   2361      0    stevel 	 * no error detection, no lock statistics,
   2362      0    stevel 	 * and the process has only a single thread.
   2363      0    stevel 	 * (Most likely a traditional single-threaded application.)
   2364      0    stevel 	 */
   2365   6247       raf 	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
   2366      0    stevel 	    udp->uberflags.uf_all) == 0) {
   2367      0    stevel 		/*
   2368      0    stevel 		 * Only one thread exists so we don't need an atomic operation.
   2369   7907     Roger 		 * We do, however, need to protect against signals.
   2370      0    stevel 		 */
   2371      0    stevel 		if (mp->mutex_lockw == 0) {
   2372   7907     Roger 			sigoff(self);
   2373      0    stevel 			mp->mutex_lockw = LOCKSET;
   2374      0    stevel 			mp->mutex_owner = (uintptr_t)self;
   2375   7907     Roger 			sigon(self);
   2376      0    stevel 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2377      0    stevel 			return (0);
   2378      0    stevel 		}
   2379   4574       raf 		if (mtype && MUTEX_OWNER(mp) == self)
   2380   4574       raf 			return (mutex_recursion(mp, mtype, MUTEX_TRY));
   2381      0    stevel 		return (EBUSY);
   2382      0    stevel 	}
   2383      0    stevel 
   2384      0    stevel 	/*
   2385      0    stevel 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
   2386      0    stevel 	 * no error detection, and no lock statistics.
   2387      0    stevel 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
   2388      0    stevel 	 */
   2389      0    stevel 	if ((gflags = self->ul_schedctl_called) != NULL &&
   2390      0    stevel 	    (gflags->uf_trs_ted |
   2391      0    stevel 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
   2392      0    stevel 		if (mtype & USYNC_PROCESS)
   2393      0    stevel 			return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY));
   2394   7907     Roger 		sigoff(self);
   2395      0    stevel 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
   2396      0    stevel 			mp->mutex_owner = (uintptr_t)self;
   2397   7907     Roger 			sigon(self);
   2398      0    stevel 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2399      0    stevel 			return (0);
   2400      0    stevel 		}
   2401   7907     Roger 		sigon(self);
   2402   4574       raf 		if (mtype && MUTEX_OWNER(mp) == self)
   2403   4574       raf 			return (mutex_recursion(mp, mtype, MUTEX_TRY));
   2404   4613       raf 		if (__td_event_report(self, TD_LOCK_TRY, udp)) {
   2405   4613       raf 			self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
   2406   4613       raf 			tdb_event(TD_LOCK_TRY, udp);
   2407      0    stevel 		}
   2408   4613       raf 		return (EBUSY);
   2409      0    stevel 	}
   2410      0    stevel 
   2411      0    stevel 	/* else do it the long way */
   2412      0    stevel 	return (mutex_lock_internal(mp, NULL, MUTEX_TRY));
   2413      0    stevel }
   2414      0    stevel 
   2415      0    stevel int
   2416   4574       raf mutex_unlock_internal(mutex_t *mp, int retain_robust_flags)
   2417      0    stevel {
   2418      0    stevel 	ulwp_t *self = curthread;
   2419      0    stevel 	uberdata_t *udp = self->ul_uberdata;
   2420      0    stevel 	int mtype = mp->mutex_type;
   2421      0    stevel 	tdb_mutex_stats_t *msp;
   2422   4574       raf 	int error = 0;
   2423   4574       raf 	int release_all;
   2424      0    stevel 	lwpid_t lwpid;
   2425      0    stevel 
   2426   8036     Roger 	if ((mtype & (LOCK_ERRORCHECK | LOCK_ROBUST)) &&
   2427   8036     Roger 	    !mutex_held(mp))
   2428      0    stevel 		return (EPERM);
   2429      0    stevel 
   2430   6812       raf 	if (self->ul_error_detection && !mutex_held(mp))
   2431      0    stevel 		lock_error(mp, "mutex_unlock", NULL, NULL);
   2432      0    stevel 
   2433      0    stevel 	if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
   2434      0    stevel 		mp->mutex_rcount--;
   2435      0    stevel 		DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
   2436      0    stevel 		return (0);
   2437      0    stevel 	}
   2438      0    stevel 
   2439      0    stevel 	if ((msp = MUTEX_STATS(mp, udp)) != NULL)
   2440      0    stevel 		(void) record_hold_time(msp);
   2441      0    stevel 
   2442   4574       raf 	if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) &&
   2443   4574       raf 	    (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) {
   2444   8036     Roger 		ASSERT(mtype & LOCK_ROBUST);
   2445   4574       raf 		mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
   2446   4574       raf 		mp->mutex_flag |= LOCK_NOTRECOVERABLE;
   2447   4574       raf 	}
   2448   4574       raf 	release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0);
   2449   4574       raf 
   2450   4574       raf 	if (mtype & LOCK_PRIO_INHERIT) {
   2451      0    stevel 		no_preempt(self);
   2452      0    stevel 		mp->mutex_owner = 0;
   2453   6057       raf 		/* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */
   2454      0    stevel 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
   2455   4574       raf 		mp->mutex_lockw = LOCKCLEAR;
   2456   6247       raf 		self->ul_pilocks--;
   2457   4574       raf 		error = ___lwp_mutex_unlock(mp);
   2458      0    stevel 		preempt(self);
   2459      0    stevel 	} else if (mtype & USYNC_PROCESS) {
   2460   5629       raf 		mutex_unlock_process(mp, release_all);
   2461      0    stevel 	} else {	/* USYNC_THREAD */
   2462   4574       raf 		if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) {
   2463      0    stevel 			(void) __lwp_unpark(lwpid);
   2464      0    stevel 			preempt(self);
   2465      0    stevel 		}
   2466      0    stevel 	}
   2467   4574       raf 
   2468   4574       raf 	if (mtype & LOCK_ROBUST)
   2469   4574       raf 		forget_lock(mp);
   2470   4574       raf 
   2471   4574       raf 	if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp))
   2472   4574       raf 		_ceil_prio_waive();
   2473      0    stevel 
   2474      0    stevel 	return (error);
   2475      0    stevel }
   2476      0    stevel 
   2477   6812       raf #pragma weak pthread_mutex_unlock = mutex_unlock
   2478   6812       raf #pragma weak _mutex_unlock = mutex_unlock
   2479      0    stevel int
   2480   6812       raf mutex_unlock(mutex_t *mp)
   2481      0    stevel {
   2482      0    stevel 	ulwp_t *self = curthread;
   2483   6247       raf 	int mtype = mp->mutex_type;
   2484      0    stevel 	uberflags_t *gflags;
   2485      0    stevel 	lwpid_t lwpid;
   2486      0    stevel 	short el;
   2487      0    stevel 
   2488      0    stevel 	/*
   2489      0    stevel 	 * Optimize the case of USYNC_THREAD, including
   2490      0    stevel 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
   2491      0    stevel 	 * no error detection, no lock statistics,
   2492      0    stevel 	 * and the process has only a single thread.
   2493      0    stevel 	 * (Most likely a traditional single-threaded application.)
   2494      0    stevel 	 */
   2495   6247       raf 	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
   2496   6247       raf 	    self->ul_uberdata->uberflags.uf_all) == 0) {
   2497      0    stevel 		if (mtype) {
   2498      0    stevel 			/*
   2499      0    stevel 			 * At this point we know that one or both of the
   2500      0    stevel 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
   2501      0    stevel 			 */
   2502      0    stevel 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
   2503      0    stevel 				return (EPERM);
   2504      0    stevel 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
   2505      0    stevel 				mp->mutex_rcount--;
   2506      0    stevel 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
   2507      0    stevel 				return (0);
   2508      0    stevel 			}
   2509      0    stevel 		}
   2510      0    stevel 		/*
   2511      0    stevel 		 * Only one thread exists so we don't need an atomic operation.
   2512      0    stevel 		 * Also, there can be no waiters.
   2513      0    stevel 		 */
   2514   7907     Roger 		sigoff(self);
   2515      0    stevel 		mp->mutex_owner = 0;
   2516      0    stevel 		mp->mutex_lockword = 0;
   2517   7907     Roger 		sigon(self);
   2518      0    stevel 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
   2519      0    stevel 		return (0);
   2520      0    stevel 	}
   2521      0    stevel 
   2522      0    stevel 	/*
   2523      0    stevel 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
   2524      0    stevel 	 * no error detection, and no lock statistics.
   2525      0    stevel 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
   2526      0    stevel 	 */
   2527      0    stevel 	if ((gflags = self->ul_schedctl_called) != NULL) {
   2528      0    stevel 		if (((el = gflags->uf_trs_ted) | mtype) == 0) {
   2529      0    stevel fast_unlock:
   2530   5629       raf 			if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) {
   2531      0    stevel 				(void) __lwp_unpark(lwpid);
   2532      0    stevel 				preempt(self);
   2533      0    stevel 			}
   2534      0    stevel 			return (0);
   2535      0    stevel 		}
   2536      0    stevel 		if (el)		/* error detection or lock statistics */
   2537      0    stevel 			goto slow_unlock;
   2538      0    stevel 		if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
   2539      0    stevel 			/*
   2540      0    stevel 			 * At this point we know that one or both of the
   2541      0    stevel 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
   2542      0    stevel 			 */
   2543      0    stevel 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
   2544      0    stevel 				return (EPERM);
   2545      0    stevel 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
   2546      0    stevel 				mp->mutex_rcount--;
   2547      0    stevel 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
   2548      0    stevel 				return (0);
   2549      0    stevel 			}
   2550      0    stevel 			goto fast_unlock;
   2551      0    stevel 		}
   2552      0    stevel 		if ((mtype &
   2553      0    stevel 		    ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
   2554      0    stevel 			/*
   2555      0    stevel 			 * At this point we know that zero, one, or both of the
   2556      0    stevel 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and
   2557      0    stevel 			 * that the USYNC_PROCESS flag is set.
   2558      0    stevel 			 */
   2559      0    stevel 			if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp))
   2560      0    stevel 				return (EPERM);
   2561      0    stevel 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
   2562      0    stevel 				mp->mutex_rcount--;
   2563      0    stevel 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
   2564      0    stevel 				return (0);
   2565      0    stevel 			}
   2566   5629       raf 			mutex_unlock_process(mp, 0);
   2567      0    stevel 			return (0);
   2568      0    stevel 		}
   2569      0    stevel 	}
   2570      0    stevel 
   2571      0    stevel 	/* else do it the long way */
   2572      0    stevel slow_unlock:
   2573   4574       raf 	return (mutex_unlock_internal(mp, 0));
   2574      0    stevel }
   2575      0    stevel 
   2576      0    stevel /*
   2577      0    stevel  * Internally to the library, almost all mutex lock/unlock actions
   2578      0    stevel  * go through these lmutex_ functions, to protect critical regions.
   2579   6812       raf  * We replicate a bit of code from mutex_lock() and mutex_unlock()
   2580      0    stevel  * to make these functions faster since we know that the mutex type
   2581      0    stevel  * of all internal locks is USYNC_THREAD.  We also know that internal
   2582      0    stevel  * locking can never fail, so we panic if it does.
   2583      0    stevel  */
   2584      0    stevel void
   2585      0    stevel lmutex_lock(mutex_t *mp)
   2586      0    stevel {
   2587      0    stevel 	ulwp_t *self = curthread;
   2588      0    stevel 	uberdata_t *udp = self->ul_uberdata;
   2589      0    stevel 
   2590      0    stevel 	ASSERT(mp->mutex_type == USYNC_THREAD);
   2591      0    stevel 
   2592      0    stevel 	enter_critical(self);
   2593      0    stevel 	/*
   2594      0    stevel 	 * Optimize the case of no lock statistics and only a single thread.
   2595      0    stevel 	 * (Most likely a traditional single-threaded application.)
   2596      0    stevel 	 */
   2597      0    stevel 	if (udp->uberflags.uf_all == 0) {
   2598      0    stevel 		/*
   2599      0    stevel 		 * Only one thread exists; the mutex must be free.
   2600      0    stevel 		 */
   2601      0    stevel 		ASSERT(mp->mutex_lockw == 0);
   2602      0    stevel 		mp->mutex_lockw = LOCKSET;
   2603      0    stevel 		mp->mutex_owner = (uintptr_t)self;
   2604      0    stevel 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2605      0    stevel 	} else {
   2606      0    stevel 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
   2607      0    stevel 
   2608      0    stevel 		if (!self->ul_schedctl_called)
   2609      0    stevel 			(void) setup_schedctl();
   2610      0    stevel 
   2611      0    stevel 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
   2612      0    stevel 			mp->mutex_owner = (uintptr_t)self;
   2613      0    stevel 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2614   4613       raf 		} else if (mutex_trylock_adaptive(mp, 1) != 0) {
   2615      0    stevel 			(void) mutex_lock_queue(self, msp, mp, NULL);
   2616      0    stevel 		}
   2617      0    stevel 
   2618      0    stevel 		if (msp)
   2619      0    stevel 			record_begin_hold(msp);
   2620      0    stevel 	}
   2621      0    stevel }
   2622      0    stevel 
   2623      0    stevel void
   2624      0    stevel lmutex_unlock(mutex_t *mp)
   2625      0    stevel {
   2626      0    stevel 	ulwp_t *self = curthread;
   2627      0    stevel 	uberdata_t *udp = self->ul_uberdata;
   2628      0    stevel 
   2629      0    stevel 	ASSERT(mp->mutex_type == USYNC_THREAD);
   2630      0    stevel 
   2631      0    stevel 	/*
   2632      0    stevel 	 * Optimize the case of no lock statistics and only a single thread.
   2633      0    stevel 	 * (Most likely a traditional single-threaded application.)
   2634      0    stevel 	 */
   2635      0    stevel 	if (udp->uberflags.uf_all == 0) {
   2636      0    stevel 		/*
   2637      0    stevel 		 * Only one thread exists so there can be no waiters.
   2638      0    stevel 		 */
   2639      0    stevel 		mp->mutex_owner = 0;
   2640      0    stevel 		mp->mutex_lockword = 0;
   2641      0    stevel 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
   2642      0    stevel 	} else {
   2643      0    stevel 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
   2644      0    stevel 		lwpid_t lwpid;
   2645      0    stevel 
   2646      0    stevel 		if (msp)
   2647      0    stevel 			(void) record_hold_time(msp);
   2648   4574       raf 		if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) {
   2649      0    stevel 			(void) __lwp_unpark(lwpid);
   2650      0    stevel 			preempt(self);
   2651      0    stevel 		}
   2652      0    stevel 	}
   2653      0    stevel 	exit_critical(self);
   2654      0    stevel }
   2655      0    stevel 
   2656   2248       raf /*
   2657   2248       raf  * For specialized code in libc, like the asynchronous i/o code,
   2658   2248       raf  * the following sig_*() locking primitives are used in order
   2659   2248       raf  * to make the code asynchronous signal safe.  Signals are
   2660   2248       raf  * deferred while locks acquired by these functions are held.
   2661   2248       raf  */
   2662   2248       raf void
   2663   2248       raf sig_mutex_lock(mutex_t *mp)
   2664   2248       raf {
   2665  10637     Roger 	ulwp_t *self = curthread;
   2666  10637     Roger 
   2667  10637     Roger 	sigoff(self);
   2668   6515       raf 	(void) mutex_lock(mp);
   2669   2248       raf }
   2670   2248       raf 
   2671   2248       raf void
   2672   2248       raf sig_mutex_unlock(mutex_t *mp)
   2673   2248       raf {
   2674  10637     Roger 	ulwp_t *self = curthread;
   2675  10637     Roger 
   2676   6515       raf 	(void) mutex_unlock(mp);
   2677  10637     Roger 	sigon(self);
   2678   2248       raf }
   2679   2248       raf 
   2680   2248       raf int
   2681   2248       raf sig_mutex_trylock(mutex_t *mp)
   2682   2248       raf {
   2683  10637     Roger 	ulwp_t *self = curthread;
   2684   2248       raf 	int error;
   2685   2248       raf 
   2686  10637     Roger 	sigoff(self);
   2687   6515       raf 	if ((error = mutex_trylock(mp)) != 0)
   2688  10637     Roger 		sigon(self);
   2689   2248       raf 	return (error);
   2690   2248       raf }
   2691   2248       raf 
   2692   2248       raf /*
   2693   2248       raf  * sig_cond_wait() is a cancellation point.
   2694   2248       raf  */
   2695   2248       raf int
   2696   2248       raf sig_cond_wait(cond_t *cv, mutex_t *mp)
   2697   2248       raf {
   2698   2248       raf 	int error;
   2699   2248       raf 
   2700   2248       raf 	ASSERT(curthread->ul_sigdefer != 0);
   2701   6515       raf 	pthread_testcancel();
   2702   5891       raf 	error = __cond_wait(cv, mp);
   2703   2248       raf 	if (error == EINTR && curthread->ul_cursig) {
   2704   2248       raf 		sig_mutex_unlock(mp);
   2705   2248       raf 		/* take the deferred signal here */
   2706   2248       raf 		sig_mutex_lock(mp);
   2707   2248       raf 	}
   2708   6515       raf 	pthread_testcancel();
   2709   2248       raf 	return (error);
   2710   2248       raf }
   2711   2248       raf 
   2712   2248       raf /*
   2713   2248       raf  * sig_cond_reltimedwait() is a cancellation point.
   2714   2248       raf  */
   2715   2248       raf int
   2716   2248       raf sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts)
   2717   2248       raf {
   2718   2248       raf 	int error;
   2719   2248       raf 
   2720   2248       raf 	ASSERT(curthread->ul_sigdefer != 0);
   2721   6515       raf 	pthread_testcancel();
   2722   5891       raf 	error = __cond_reltimedwait(cv, mp, ts);
   2723   2248       raf 	if (error == EINTR && curthread->ul_cursig) {
   2724   2248       raf 		sig_mutex_unlock(mp);
   2725   2248       raf 		/* take the deferred signal here */
   2726   2248       raf 		sig_mutex_lock(mp);
   2727   2248       raf 	}
   2728   6515       raf 	pthread_testcancel();
   2729   2248       raf 	return (error);
   2730   5891       raf }
   2731   5891       raf 
   2732   5891       raf /*
   2733   5891       raf  * For specialized code in libc, like the stdio code.
   2734   5891       raf  * the following cancel_safe_*() locking primitives are used in
   2735   5891       raf  * order to make the code cancellation-safe.  Cancellation is
   2736   5891       raf  * deferred while locks acquired by these functions are held.
   2737   5891       raf  */
   2738   5891       raf void
   2739   5891       raf cancel_safe_mutex_lock(mutex_t *mp)
   2740   5891       raf {
   2741   6515       raf 	(void) mutex_lock(mp);
   2742   5891       raf 	curthread->ul_libc_locks++;
   2743   5891       raf }
   2744   5891       raf 
   2745   5891       raf int
   2746   5891       raf cancel_safe_mutex_trylock(mutex_t *mp)
   2747   5891       raf {
   2748   5891       raf 	int error;
   2749   5891       raf 
   2750   6515       raf 	if ((error = mutex_trylock(mp)) == 0)
   2751   5891       raf 		curthread->ul_libc_locks++;
   2752   5891       raf 	return (error);
   2753   5891       raf }
   2754   5891       raf 
   2755   5891       raf void
   2756   5891       raf cancel_safe_mutex_unlock(mutex_t *mp)
   2757   5891       raf {
   2758   5891       raf 	ulwp_t *self = curthread;
   2759   5891       raf 
   2760   5891       raf 	ASSERT(self->ul_libc_locks != 0);
   2761   5891       raf 
   2762   6515       raf 	(void) mutex_unlock(mp);
   2763   5891       raf 
   2764   5891       raf 	/*
   2765   5891       raf 	 * Decrement the count of locks held by cancel_safe_mutex_lock().
   2766   5891       raf 	 * If we are then in a position to terminate cleanly and
   2767   5891       raf 	 * if there is a pending cancellation and cancellation
   2768   5891       raf 	 * is not disabled and we received EINTR from a recent
   2769   5891       raf 	 * system call then perform the cancellation action now.
   2770   5891       raf 	 */
   2771   5891       raf 	if (--self->ul_libc_locks == 0 &&
   2772   5891       raf 	    !(self->ul_vfork | self->ul_nocancel |
   2773   5891       raf 	    self->ul_critical | self->ul_sigdefer) &&
   2774   5891       raf 	    cancel_active())
   2775   6812       raf 		pthread_exit(PTHREAD_CANCELED);
   2776   2248       raf }
   2777   2248       raf 
   2778      0    stevel static int
   2779      0    stevel shared_mutex_held(mutex_t *mparg)
   2780      0    stevel {
   2781      0    stevel 	/*
   2782   4574       raf 	 * The 'volatile' is necessary to make sure the compiler doesn't
   2783   4574       raf 	 * reorder the tests of the various components of the mutex.
   2784   4574       raf 	 * They must be tested in this order:
   2785   4574       raf 	 *	mutex_lockw
   2786   4574       raf 	 *	mutex_owner
   2787   4574       raf 	 *	mutex_ownerpid
   2788   4574       raf 	 * This relies on the fact that everywhere mutex_lockw is cleared,
   2789   4574       raf 	 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw
   2790   4574       raf 	 * is cleared, and that everywhere mutex_lockw is set, mutex_owner
   2791   4574       raf 	 * and mutex_ownerpid are set after mutex_lockw is set, and that
   2792   4574       raf 	 * mutex_lockw is set or cleared with a memory barrier.
   2793      0    stevel 	 */
   2794      0    stevel 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
   2795      0    stevel 	ulwp_t *self = curthread;
   2796      0    stevel 	uberdata_t *udp = self->ul_uberdata;
   2797      0    stevel 
   2798   4574       raf 	return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid);
   2799      0    stevel }
   2800      0    stevel 
   2801   6812       raf #pragma weak _mutex_held = mutex_held
   2802      0    stevel int
   2803   6812       raf mutex_held(mutex_t *mparg)
   2804      0    stevel {
   2805   4574       raf 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
   2806   4574       raf 
   2807   4574       raf 	if (mparg->mutex_type & USYNC_PROCESS)
   2808   4574       raf 		return (shared_mutex_held(mparg));
   2809      0    stevel 	return (MUTEX_OWNED(mp, curthread));
   2810      0    stevel }
   2811      0    stevel 
   2812   6812       raf #pragma weak pthread_mutex_destroy = mutex_destroy
   2813   6812       raf #pragma weak _mutex_destroy = mutex_destroy
   2814      0    stevel int
   2815   6812       raf mutex_destroy(mutex_t *mp)
   2816      0    stevel {
   2817   4574       raf 	if (mp->mutex_type & USYNC_PROCESS)
   2818   4574       raf 		forget_lock(mp);
   2819   6515       raf 	(void) memset(mp, 0, sizeof (*mp));
   2820      0    stevel 	tdb_sync_obj_deregister(mp);
   2821      0    stevel 	return (0);
   2822   4574       raf }
   2823   4574       raf 
   2824   6812       raf #pragma weak pthread_mutex_consistent_np = mutex_consistent
   2825   8036     Roger #pragma weak pthread_mutex_consistent = mutex_consistent
   2826   4574       raf int
   2827   6812       raf mutex_consistent(mutex_t *mp)
   2828   4574       raf {
   2829   4574       raf 	/*
   2830   4574       raf 	 * Do this only for an inconsistent, initialized robust lock
   2831   4574       raf 	 * that we hold.  For all other cases, return EINVAL.
   2832   4574       raf 	 */
   2833   6812       raf 	if (mutex_held(mp) &&
   2834   4574       raf 	    (mp->mutex_type & LOCK_ROBUST) &&
   2835   4574       raf 	    (mp->mutex_flag & LOCK_INITED) &&
   2836   4574       raf 	    (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) {
   2837   4574       raf 		mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
   2838   4574       raf 		mp->mutex_rcount = 0;
   2839   4574       raf 		return (0);
   2840   4574       raf 	}
   2841   4574       raf 	return (EINVAL);
   2842      0    stevel }
   2843      0    stevel 
   2844      0    stevel /*
   2845      0    stevel  * Spin locks are separate from ordinary mutexes,
   2846      0    stevel  * but we use the same data structure for them.
   2847      0    stevel  */
   2848      0    stevel 
   2849      0    stevel int
   2850   6812       raf pthread_spin_init(pthread_spinlock_t *lock, int pshared)
   2851      0    stevel {
   2852      0    stevel 	mutex_t *mp = (mutex_t *)lock;
   2853      0    stevel 
   2854   6515       raf 	(void) memset(mp, 0, sizeof (*mp));
   2855      0    stevel 	if (pshared == PTHREAD_PROCESS_SHARED)
   2856      0    stevel 		mp->mutex_type = USYNC_PROCESS;
   2857      0    stevel 	else
   2858      0    stevel 		mp->mutex_type = USYNC_THREAD;
   2859      0    stevel 	mp->mutex_flag = LOCK_INITED;
   2860      0    stevel 	mp->mutex_magic = MUTEX_MAGIC;
   2861   7255       raf 
   2862   7255       raf 	/*
   2863   7255       raf 	 * This should be at the beginning of the function,
   2864   7255       raf 	 * but for the sake of old broken applications that
   2865   7255       raf 	 * do not have proper alignment for their mutexes
   2866   7255       raf 	 * (and don't check the return code from pthread_spin_init),
   2867   7255       raf 	 * we put it here, after initializing the mutex regardless.
   2868   7255       raf 	 */
   2869   7255       raf 	if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) &&
   2870   7255       raf 	    curthread->ul_misaligned == 0)
   2871   7255       raf 		return (EINVAL);
   2872   7255       raf 
   2873      0    stevel 	return (0);
   2874      0    stevel }
   2875      0    stevel 
   2876      0    stevel int
   2877   6812       raf pthread_spin_destroy(pthread_spinlock_t *lock)
   2878      0    stevel {
   2879   6515       raf 	(void) memset(lock, 0, sizeof (*lock));
   2880      0    stevel 	return (0);
   2881      0    stevel }
   2882      0    stevel 
   2883      0    stevel int
   2884   6812       raf pthread_spin_trylock(pthread_spinlock_t *lock)
   2885      0    stevel {
   2886      0    stevel 	mutex_t *mp = (mutex_t *)lock;
   2887      0    stevel 	ulwp_t *self = curthread;
   2888      0    stevel 	int error = 0;
   2889      0    stevel 
   2890      0    stevel 	no_preempt(self);
   2891      0    stevel 	if (set_lock_byte(&mp->mutex_lockw) != 0)
   2892      0    stevel 		error = EBUSY;
   2893      0    stevel 	else {
   2894      0    stevel 		mp->mutex_owner = (uintptr_t)self;
   2895      0    stevel 		if (mp->mutex_type == USYNC_PROCESS)
   2896      0    stevel 			mp->mutex_ownerpid = self->ul_uberdata->pid;
   2897      0    stevel 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
   2898      0    stevel 	}
   2899      0    stevel 	preempt(self);
   2900      0    stevel 	return (error);
   2901      0    stevel }
   2902      0    stevel 
   2903      0    stevel int
   2904   6812       raf pthread_spin_lock(pthread_spinlock_t *lock)
   2905      0    stevel {
   2906   4574       raf 	mutex_t *mp = (mutex_t *)lock;
   2907   4574       raf 	ulwp_t *self = curthread;
   2908   4574       raf 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
   2909   4574       raf 	int count = 0;
   2910      0    stevel 
   2911   4574       raf 	ASSERT(!self->ul_critical || self->ul_bindflags);
   2912   4574       raf 
   2913   4574       raf 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
   2914   4574       raf 
   2915      0    stevel 	/*
   2916      0    stevel 	 * We don't care whether the owner is running on a processor.
   2917      0    stevel 	 * We just spin because that's what this interface requires.
   2918      0    stevel 	 */
   2919      0    stevel 	for (;;) {
   2920      0    stevel 		if (*lockp == 0) {	/* lock byte appears to be clear */
   2921   4574       raf 			no_preempt(self);
   2922   4574       raf 			if (set_lock_byte(lockp) == 0)
   2923   4574       raf 				break;
   2924   4574       raf 			preempt(self);
   2925      0    stevel 		}
   2926   5629       raf 		if (count < INT_MAX)
   2927   5629       raf 			count++;
   2928      0    stevel 		SMT_PAUSE();
   2929      0    stevel 	}
   2930   4574       raf 	mp->mutex_owner = (uintptr_t)self;
   2931   4574       raf 	if (mp->mutex_type == USYNC_PROCESS)
   2932   4574       raf 		mp->mutex_ownerpid = self->ul_uberdata->pid;
   2933   4574       raf 	preempt(self);
   2934   5629       raf 	if (count) {
   2935   9397  Jonathan 		DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count);
   2936   5629       raf 	}
   2937   4574       raf 	DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
   2938   4574       raf 	return (0);
   2939      0    stevel }
   2940      0    stevel 
   2941      0    stevel int
   2942   6812       raf pthread_spin_unlock(pthread_spinlock_t *lock)
   2943      0    stevel {
   2944      0    stevel 	mutex_t *mp = (mutex_t *)lock;
   2945      0    stevel 	ulwp_t *self = curthread;
   2946      0    stevel 
   2947      0    stevel 	no_preempt(self);
   2948      0    stevel 	mp->mutex_owner = 0;
   2949      0    stevel 	mp->mutex_ownerpid = 0;
   2950      0    stevel 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
   2951   4570       raf 	(void) atomic_swap_32(&mp->mutex_lockword, 0);
   2952      0    stevel 	preempt(self);
   2953      0    stevel 	return (0);
   2954   4574       raf }
   2955   4574       raf 
   2956   5629       raf #define	INITIAL_LOCKS	8	/* initial size of ul_heldlocks.array */
   2957   4574       raf 
   2958   4574       raf /*
   2959   4574       raf  * Find/allocate an entry for 'lock' in our array of held locks.
   2960   4574       raf  */
   2961   4574       raf static mutex_t **
   2962   4574       raf find_lock_entry(mutex_t *lock)
   2963   4574       raf {
   2964   4574       raf 	ulwp_t *self = curthread;
   2965   4574       raf 	mutex_t **remembered = NULL;
   2966   4574       raf 	mutex_t **lockptr;
   2967   4574       raf 	uint_t nlocks;
   2968   4574       raf 
   2969   4574       raf 	if ((nlocks = self->ul_heldlockcnt) != 0)
   2970   4574       raf 		lockptr = self->ul_heldlocks.array;
   2971   4574       raf 	else {
   2972   4574       raf 		nlocks = 1;
   2973   4574       raf 		lockptr = &self->ul_heldlocks.single;
   2974   4574       raf 	}
   2975   4574       raf 
   2976   4574       raf 	for (; nlocks; nlocks--, lockptr++) {
   2977   4574       raf 		if (*lockptr == lock)
   2978   4574       raf 			return (lockptr);
   2979   4574       raf 		if (*lockptr == NULL && remembered == NULL)
   2980   4574       raf 			remembered = lockptr;
   2981   4574       raf 	}
   2982   4574       raf 	if (remembered != NULL) {
   2983   4574       raf 		*remembered = lock;
   2984   4574       raf 		return (remembered);
   2985   4574       raf 	}
   2986   4574       raf 
   2987   4574       raf 	/*
   2988   4574       raf 	 * No entry available.  Allocate more space, converting
   2989   4574       raf 	 * the single entry into an array of entries if necessary.
   2990   4574       raf 	 */
   2991   4574       raf 	if ((nlocks = self->ul_heldlockcnt) == 0) {
   2992   4574       raf 		/*
   2993   4574       raf 		 * Initial allocation of the array.
   2994   4574       raf 		 * Convert the single entry into an array.
   2995   4574       raf 		 */
   2996   4574       raf 		self->ul_heldlockcnt = nlocks = INITIAL_LOCKS;
   2997   4574       raf 		lockptr = lmalloc(nlocks * sizeof (mutex_t *));
   2998   4574       raf 		/*
   2999   4574       raf 		 * The single entry becomes the first entry in the array.
   3000   4574       raf 		 */
   3001   4574       raf 		*lockptr = self->ul_heldlocks.single;
   3002   4574       raf 		self->ul_heldlocks.array = lockptr;
   3003   4574       raf 		/*
   3004   4574       raf 		 * Return the next available entry in the array.
   3005   4574       raf 		 */
   3006   4574       raf 		*++lockptr = lock;
   3007   4574       raf 		return (lockptr);
   3008   4574       raf 	}
   3009   4574       raf 	/*
   3010   4574       raf 	 * Reallocate the array, double the size each time.
   3011   4574       raf 	 */
   3012   4574       raf 	lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *));
   3013   6515       raf 	(void) memcpy(lockptr, self->ul_heldlocks.array,
   3014   4574       raf 	    nlocks * sizeof (mutex_t *));
   3015   4574       raf 	lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *));
   3016   4574       raf 	self->ul_heldlocks.array = lockptr;
   3017   4574       raf 	self->ul_heldlockcnt *= 2;
   3018   4574       raf 	/*
   3019   4574       raf 	 * Return the next available entry in the newly allocated array.
   3020   4574       raf 	 */
   3021   4574       raf 	*(lockptr += nlocks) = lock;
   3022   4574       raf 	return (lockptr);
   3023   4574       raf }
   3024   4574       raf 
   3025   4574       raf /*
   3026   4574       raf  * Insert 'lock' into our list of held locks.
   3027   4574       raf  * Currently only used for LOCK_ROBUST mutexes.
   3028   4574       raf  */
   3029   4574       raf void
   3030   4574       raf remember_lock(mutex_t *lock)
   3031   4574       raf {
   3032   4574       raf 	(void) find_lock_entry(lock);
   3033   4574       raf }
   3034   4574       raf 
   3035   4574       raf /*
   3036   4574       raf  * Remove 'lock' from our list of held locks.
   3037   4574       raf  * Currently only used for LOCK_ROBUST mutexes.
   3038   4574       raf  */
   3039   4574       raf void
   3040   4574       raf forget_lock(mutex_t *lock)
   3041   4574       raf {
   3042   4574       raf 	*find_lock_entry(lock) = NULL;
   3043   4574       raf }
   3044   4574       raf 
   3045   4574       raf /*
   3046   4574       raf  * Free the array of held locks.
   3047   4574       raf  */
   3048   4574       raf void
   3049   4574       raf heldlock_free(ulwp_t *ulwp)
   3050   4574       raf {
   3051   4574       raf 	uint_t nlocks;
   3052   4574       raf 
   3053   4574       raf 	if ((nlocks = ulwp->ul_heldlockcnt) != 0)
   3054   4574       raf 		lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *));
   3055   4574       raf 	ulwp->ul_heldlockcnt = 0;
   3056   4574       raf 	ulwp->ul_heldlocks.array = NULL;
   3057   4574       raf }
   3058   4574       raf 
   3059   4574       raf /*
   3060   4574       raf  * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD.
   3061   4574       raf  * Called from _thrp_exit() to deal with abandoned locks.
   3062   4574       raf  */
   3063   4574       raf void
   3064   4574       raf heldlock_exit(void)
   3065   4574       raf {
   3066   4574       raf 	ulwp_t *self = curthread;
   3067   4574       raf 	mutex_t **lockptr;
   3068   4574       raf 	uint_t nlocks;
   3069   4574       raf 	mutex_t *mp;
   3070   4574       raf 
   3071   4574       raf 	if ((nlocks = self->ul_heldlockcnt) != 0)
   3072   4574       raf 		lockptr = self->ul_heldlocks.array;
   3073   4574       raf 	else {
   3074   4574       raf 		nlocks = 1;
   3075   4574       raf 		lockptr = &self->ul_heldlocks.single;
   3076   4574       raf 	}
   3077   4574       raf 
   3078   4574       raf 	for (; nlocks; nlocks--, lockptr++) {
   3079   4574       raf 		/*
   3080   4574       raf 		 * The kernel takes care of transitioning held
   3081   4574       raf 		 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD.
   3082   4574       raf 		 * We avoid that case here.
   3083   4574       raf 		 */
   3084   4574       raf 		if ((mp = *lockptr) != NULL &&
   3085   6812       raf 		    mutex_held(mp) &&
   3086   4574       raf 		    (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) ==
   3087   4574       raf 		    LOCK_ROBUST) {
   3088   4574       raf 			mp->mutex_rcount = 0;
   3089   4574       raf 			if (!(mp->mutex_flag & LOCK_UNMAPPED))
   3090   4574       raf 				mp->mutex_flag |= LOCK_OWNERDEAD;
   3091   4574       raf 			(void) mutex_unlock_internal(mp, 1);
   3092   4574       raf 		}
   3093   4574       raf 	}
   3094   4574       raf 
   3095   4574       raf 	heldlock_free(self);
   3096      0    stevel }
   3097      0