OpenGrok

Cross Reference: lock_prim.s
xref: /onnv/onnv-gate/usr/src/uts/intel/ia32/ml/lock_prim.s
Home | History | Annotate | Line # | Download | only in ml
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #if defined(lint) || defined(__lint)
     29 #include <sys/types.h>
     30 #include <sys/thread.h>
     31 #include <sys/cpuvar.h>
     32 #include <vm/page.h>
     33 #else	/* __lint */
     34 #include "assym.h"
     35 #endif	/* __lint */
     36 
     37 #include <sys/mutex_impl.h>
     38 #include <sys/asm_linkage.h>
     39 #include <sys/asm_misc.h>
     40 #include <sys/regset.h>
     41 #include <sys/rwlock_impl.h>
     42 #include <sys/lockstat.h>
     43 
     44 /*
     45  * lock_try(lp), ulock_try(lp)
     46  *	- returns non-zero on success.
     47  *	- doesn't block interrupts so don't use this to spin on a lock.
     48  *
     49  * ulock_try() is for a lock in the user address space.
     50  */
     51 
     52 #if defined(lint) || defined(__lint)
     53 
     54 /* ARGSUSED */
     55 int
     56 lock_try(lock_t *lp)
     57 { return (0); }
     58 
     59 /* ARGSUSED */
     60 int
     61 lock_spin_try(lock_t *lp)
     62 { return (0); }
     63 
     64 /* ARGSUSED */
     65 int
     66 ulock_try(lock_t *lp)
     67 { return (0); }
     68 
     69 #else	/* __lint */
     70 	.globl	kernelbase
     71 
     72 #if defined(__amd64)
     73 
     74 	ENTRY(lock_try)
     75 	movb	$-1, %dl
     76 	movzbq	%dl, %rax
     77 	xchgb	%dl, (%rdi)
     78 	xorb	%dl, %al
     79 .lock_try_lockstat_patch_point:
     80 	ret
     81 	testb	%al, %al
     82 	jnz	0f
     83 	ret
     84 0:
     85 	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
     86 	movq	%rdi, %rsi		/* rsi = lock addr */
     87 	movl	$LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */
     88 	jmp	lockstat_wrapper
     89 	SET_SIZE(lock_try)
     90 
     91 	ENTRY(lock_spin_try)
     92 	movb	$-1, %dl
     93 	movzbq	%dl, %rax
     94 	xchgb	%dl, (%rdi)
     95 	xorb	%dl, %al
     96 	ret
     97 	SET_SIZE(lock_spin_try)
     98 
     99 	ENTRY(ulock_try)
    100 #ifdef DEBUG
    101 	movq	kernelbase(%rip), %rax
    102 	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
    103 	jb	ulock_pass		/*	uaddr < kernelbase, proceed */
    104 
    105 	movq	%rdi, %r12		/* preserve lock ptr for debugging */
    106 	leaq	.ulock_panic_msg(%rip), %rdi
    107 	pushq	%rbp			/* align stack properly */
    108 	movq	%rsp, %rbp
    109 	xorl	%eax, %eax		/* clear for varargs */
    110 	call	panic
    111 
    112 #endif /* DEBUG */
    113 
    114 ulock_pass:
    115 	movl	$1, %eax
    116 	xchgb	%al, (%rdi)
    117 	xorb	$1, %al
    118 	ret
    119 	SET_SIZE(ulock_try)
    120 
    121 #else
    122 
    123 	ENTRY(lock_try)
    124 	movl	$1,%edx
    125 	movl	4(%esp),%ecx		/* ecx = lock addr */
    126 	xorl	%eax,%eax
    127 	xchgb	%dl, (%ecx)		/* using dl will avoid partial */
    128 	testb	%dl,%dl			/* stalls on P6 ? */
    129 	setz	%al
    130 .lock_try_lockstat_patch_point:
    131 	ret
    132 	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
    133 	testl	%eax, %eax
    134 	jz	0f
    135 	movl	$LS_LOCK_TRY_ACQUIRE, %eax
    136 	jmp	lockstat_wrapper
    137 0:
    138 	ret
    139 	SET_SIZE(lock_try)
    140 
    141 	ENTRY(lock_spin_try)
    142 	movl	$-1,%edx
    143 	movl	4(%esp),%ecx		/* ecx = lock addr */
    144 	xorl	%eax,%eax
    145 	xchgb	%dl, (%ecx)		/* using dl will avoid partial */
    146 	testb	%dl,%dl			/* stalls on P6 ? */
    147 	setz	%al
    148 	ret
    149 	SET_SIZE(lock_spin_try)
    150 
    151 	ENTRY(ulock_try)
    152 #ifdef DEBUG
    153 	movl	kernelbase, %eax
    154 	cmpl	%eax, 4(%esp)		/* test uaddr < kernelbase */
    155 	jb	ulock_pass		/* uaddr < kernelbase, proceed */
    156 
    157 	pushl	$.ulock_panic_msg
    158 	call	panic
    159 
    160 #endif /* DEBUG */
    161 
    162 ulock_pass:
    163 	movl	$1,%eax
    164 	movl	4(%esp),%ecx
    165 	xchgb	%al, (%ecx)
    166 	xorb	$1, %al
    167 	ret
    168 	SET_SIZE(ulock_try)
    169 
    170 #endif	/* !__amd64 */
    171 
    172 #ifdef DEBUG
    173 	.data
    174 .ulock_panic_msg:
    175 	.string "ulock_try: Argument is above kernelbase"
    176 	.text
    177 #endif	/* DEBUG */
    178 
    179 #endif	/* __lint */
    180 
    181 /*
    182  * lock_clear(lp)
    183  *	- unlock lock without changing interrupt priority level.
    184  */
    185 
    186 #if defined(lint) || defined(__lint)
    187 
    188 /* ARGSUSED */
    189 void
    190 lock_clear(lock_t *lp)
    191 {}
    192 
    193 /* ARGSUSED */
    194 void
    195 ulock_clear(lock_t *lp)
    196 {}
    197 
    198 #else	/* __lint */
    199 
    200 #if defined(__amd64)
    201 
    202 	ENTRY(lock_clear)
    203 	movb	$0, (%rdi)
    204 .lock_clear_lockstat_patch_point:
    205 	ret
    206 	movq	%rdi, %rsi			/* rsi = lock addr */
    207 	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread addr */
    208 	movl	$LS_LOCK_CLEAR_RELEASE, %edi	/* edi = event */
    209 	jmp	lockstat_wrapper
    210 	SET_SIZE(lock_clear)
    211 
    212 	ENTRY(ulock_clear)
    213 #ifdef DEBUG
    214 	movq	kernelbase(%rip), %rcx
    215 	cmpq	%rcx, %rdi		/* test uaddr < kernelbase */
    216 	jb	ulock_clr		/*	 uaddr < kernelbase, proceed */
    217 
    218 	leaq	.ulock_clear_msg(%rip), %rdi
    219 	pushq	%rbp			/* align stack properly */
    220 	movq	%rsp, %rbp
    221 	xorl	%eax, %eax		/* clear for varargs */
    222 	call	panic
    223 #endif
    224 
    225 ulock_clr:
    226 	movb	$0, (%rdi)
    227 	ret
    228 	SET_SIZE(ulock_clear)
    229 
    230 #else
    231 
    232 	ENTRY(lock_clear)
    233 	movl	4(%esp), %eax
    234 	movb	$0, (%eax)
    235 .lock_clear_lockstat_patch_point:
    236 	ret
    237 	movl	%gs:CPU_THREAD, %edx		/* edx = thread addr */
    238 	movl	%eax, %ecx			/* ecx = lock pointer */
    239 	movl	$LS_LOCK_CLEAR_RELEASE, %eax
    240 	jmp	lockstat_wrapper
    241 	SET_SIZE(lock_clear)
    242 
    243 	ENTRY(ulock_clear)
    244 #ifdef DEBUG
    245 	movl	kernelbase, %ecx
    246 	cmpl	%ecx, 4(%esp)		/* test uaddr < kernelbase */
    247 	jb	ulock_clr		/* uaddr < kernelbase, proceed */
    248 
    249 	pushl	$.ulock_clear_msg
    250 	call	panic
    251 #endif
    252 
    253 ulock_clr:
    254 	movl	4(%esp),%eax
    255 	xorl	%ecx,%ecx
    256 	movb	%cl, (%eax)
    257 	ret
    258 	SET_SIZE(ulock_clear)
    259 
    260 #endif	/* !__amd64 */
    261 
    262 #ifdef DEBUG
    263 	.data
    264 .ulock_clear_msg:
    265 	.string "ulock_clear: Argument is above kernelbase"
    266 	.text
    267 #endif	/* DEBUG */
    268 
    269 
    270 #endif	/* __lint */
    271 
    272 /*
    273  * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
    274  * Drops lp, sets pil to new_pil, stores old pil in *old_pil.
    275  */
    276 
    277 #if defined(lint) || defined(__lint)
    278 
    279 /* ARGSUSED */
    280 void
    281 lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
    282 {}
    283 
    284 #else	/* __lint */
    285 
    286 #if defined(__amd64)
    287 
    288 	ENTRY(lock_set_spl)
    289 	pushq	%rbp
    290 	movq	%rsp, %rbp
    291 	subq	$32, %rsp
    292 	movl	%esi, 8(%rsp)		/* save priority level */
    293 	movq	%rdx, 16(%rsp)		/* save old pil ptr */
    294 	movq	%rdi, 24(%rsp)		/* save lock pointer */
    295 	movl	%esi, %edi		/* pass priority level */
    296 	call	splr			/* raise priority level */
    297 	movq	24(%rsp), %rdi		/* rdi = lock addr */
    298 	movb	$-1, %dl
    299 	xchgb	%dl, (%rdi)		/* try to set lock */
    300 	testb	%dl, %dl		/* did we get the lock? ... */
    301 	jnz	.lss_miss		/* ... no, go to C for the hard case */
    302 	movq	16(%rsp), %rdx		/* rdx = old pil addr */
    303 	movw	%ax, (%rdx)		/* store old pil */
    304 	leave
    305 .lock_set_spl_lockstat_patch_point:
    306 	ret
    307 	movq	%rdi, %rsi		/* rsi = lock addr */
    308 	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
    309 	movl	$LS_LOCK_SET_SPL_ACQUIRE, %edi
    310 	jmp	lockstat_wrapper
    311 .lss_miss:
    312 	movl	8(%rsp), %esi		/* new_pil */
    313 	movq	16(%rsp), %rdx		/* old_pil_addr */
    314 	movl	%eax, %ecx		/* original pil */
    315 	leave				/* unwind stack */
    316 	jmp	lock_set_spl_spin
    317 	SET_SIZE(lock_set_spl)
    318 
    319 #else
    320 
    321 	ENTRY(lock_set_spl)
    322 	movl	8(%esp), %eax		/* get priority level */
    323 	pushl	%eax
    324 	call	splr			/* raise priority level */
    325 	movl 	8(%esp), %ecx		/* ecx = lock addr */
    326 	movl	$-1, %edx
    327 	addl	$4, %esp
    328 	xchgb	%dl, (%ecx)		/* try to set lock */
    329 	testb	%dl, %dl		/* did we get the lock? ... */
    330 	movl	12(%esp), %edx		/* edx = olp pil addr (ZF unaffected) */
    331 	jnz	.lss_miss		/* ... no, go to C for the hard case */
    332 	movw	%ax, (%edx)		/* store old pil */
    333 .lock_set_spl_lockstat_patch_point:
    334 	ret
    335 	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr*/
    336 	movl	$LS_LOCK_SET_SPL_ACQUIRE, %eax
    337 	jmp	lockstat_wrapper
    338 .lss_miss:
    339 	pushl	%eax			/* original pil */
    340 	pushl	%edx			/* old_pil addr */
    341 	pushl	16(%esp)		/* new_pil */
    342 	pushl	%ecx			/* lock addr */
    343 	call	lock_set_spl_spin
    344 	addl	$16, %esp
    345 	ret
    346 	SET_SIZE(lock_set_spl)
    347 
    348 #endif	/* !__amd64 */
    349 
    350 #endif	/* __lint */
    351 
    352 /*
    353  * void
    354  * lock_init(lp)
    355  */
    356 
    357 #if defined(__lint)
    358 
    359 /* ARGSUSED */
    360 void
    361 lock_init(lock_t *lp)
    362 {}
    363 
    364 #else	/* __lint */
    365 
    366 #if defined(__amd64)
    367 
    368 	ENTRY(lock_init)
    369 	movb	$0, (%rdi)
    370 	ret
    371 	SET_SIZE(lock_init)
    372 
    373 #else
    374 
    375 	ENTRY(lock_init)
    376 	movl	4(%esp), %eax
    377 	movb	$0, (%eax)
    378 	ret
    379 	SET_SIZE(lock_init)
    380 
    381 #endif	/* !__amd64 */
    382 
    383 #endif	/* __lint */
    384 
    385 /*
    386  * void
    387  * lock_set(lp)
    388  */
    389 
    390 #if defined(lint) || defined(__lint)
    391 
    392 /* ARGSUSED */
    393 void
    394 lock_set(lock_t *lp)
    395 {}
    396 
    397 #else	/* __lint */
    398 
    399 #if defined(__amd64)
    400 
    401 	ENTRY(lock_set)
    402 	movb	$-1, %dl
    403 	xchgb	%dl, (%rdi)		/* try to set lock */
    404 	testb	%dl, %dl		/* did we get it? */
    405 	jnz	lock_set_spin		/* no, go to C for the hard case */
    406 .lock_set_lockstat_patch_point:
    407 	ret
    408 	movq	%rdi, %rsi		/* rsi = lock addr */
    409 	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
    410 	movl	$LS_LOCK_SET_ACQUIRE, %edi
    411 	jmp	lockstat_wrapper
    412 	SET_SIZE(lock_set)
    413 
    414 #else
    415 
    416 	ENTRY(lock_set)
    417 	movl	4(%esp), %ecx		/* ecx = lock addr */
    418 	movl	$-1, %edx
    419 	xchgb	%dl, (%ecx)		/* try to set lock */
    420 	testb	%dl, %dl		/* did we get it? */
    421 	jnz	lock_set_spin		/* no, go to C for the hard case */
    422 .lock_set_lockstat_patch_point:
    423 	ret
    424 	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
    425 	movl	$LS_LOCK_SET_ACQUIRE, %eax
    426 	jmp	lockstat_wrapper
    427 	SET_SIZE(lock_set)
    428 
    429 #endif	/* !__amd64 */
    430 
    431 #endif	/* __lint */
    432 
    433 /*
    434  * lock_clear_splx(lp, s)
    435  */
    436 
    437 #if defined(lint) || defined(__lint)
    438 
    439 /* ARGSUSED */
    440 void
    441 lock_clear_splx(lock_t *lp, int s)
    442 {}
    443 
    444 #else	/* __lint */
    445 
    446 #if defined(__amd64)
    447 
    448 	ENTRY(lock_clear_splx)
    449 	movb	$0, (%rdi)		/* clear lock */
    450 .lock_clear_splx_lockstat_patch_point:
    451 	jmp	0f
    452 0:
    453 	movl	%esi, %edi		/* arg for splx */
    454 	jmp	splx			/* let splx do its thing */
    455 .lock_clear_splx_lockstat:
    456 	pushq	%rbp			/* align stack properly */
    457 	movq	%rsp, %rbp
    458 	subq	$16, %rsp		/* space to save args across splx */
    459 	movq	%rdi, 8(%rsp)		/* save lock ptr across splx call */
    460 	movl	%esi, %edi		/* arg for splx */
    461 	call	splx			/* lower the priority */
    462 	movq	8(%rsp), %rsi		/* rsi = lock ptr */
    463 	leave				/* unwind stack */
    464 	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
    465 	movl	$LS_LOCK_CLEAR_SPLX_RELEASE, %edi
    466 	jmp	lockstat_wrapper
    467 	SET_SIZE(lock_clear_splx)
    468 
    469 #else
    470 
    471 	ENTRY(lock_clear_splx)
    472 	movl	4(%esp), %eax		/* eax = lock addr */
    473 	movb	$0, (%eax)		/* clear lock */
    474 .lock_clear_splx_lockstat_patch_point:
    475 	jmp	0f
    476 0:
    477 	movl	8(%esp), %edx		/* edx = desired pil */
    478 	movl	%edx, 4(%esp)		/* set spl arg up for splx */
    479 	jmp	splx			/* let splx do it's thing */
    480 .lock_clear_splx_lockstat:
    481 	movl	8(%esp), %edx		/* edx = desired pil */
    482 	pushl	%ebp			/* set up stack frame */
    483 	movl	%esp, %ebp
    484 	pushl	%edx
    485 	call	splx
    486 	leave				/* unwind stack */
    487 	movl	4(%esp), %ecx		/* ecx = lock pointer */
    488 	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
    489 	movl	$LS_LOCK_CLEAR_SPLX_RELEASE, %eax
    490 	jmp	lockstat_wrapper
    491 	SET_SIZE(lock_clear_splx)
    492 
    493 #endif	/* !__amd64 */
    494 
    495 #if defined(__GNUC_AS__)
    496 #define	LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL	\
    497 	(.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
    498 
    499 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT	\
    500 	(.lock_clear_splx_lockstat_patch_point + 1)
    501 #else
    502 #define	LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL	\
    503 	[.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
    504 
    505 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT	\
    506 	[.lock_clear_splx_lockstat_patch_point + 1]
    507 #endif
    508 
    509 #endif	/* __lint */
    510 
    511 /*
    512  * mutex_enter() and mutex_exit().
    513  *
    514  * These routines handle the simple cases of mutex_enter() (adaptive
    515  * lock, not held) and mutex_exit() (adaptive lock, held, no waiters).
    516  * If anything complicated is going on we punt to mutex_vector_enter().
    517  *
    518  * mutex_tryenter() is similar to mutex_enter() but returns zero if
    519  * the lock cannot be acquired, nonzero on success.
    520  *
    521  * If mutex_exit() gets preempted in the window between checking waiters
    522  * and clearing the lock, we can miss wakeups.  Disabling preemption
    523  * in the mutex code is prohibitively expensive, so instead we detect
    524  * mutex preemption by examining the trapped PC in the interrupt path.
    525  * If we interrupt a thread in mutex_exit() that has not yet cleared
    526  * the lock, cmnint() resets its PC back to the beginning of
    527  * mutex_exit() so it will check again for waiters when it resumes.
    528  *
    529  * The lockstat code below is activated when the lockstat driver
    530  * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
    531  * Note that we don't need to test lockstat_event_mask here -- we won't
    532  * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
    533  */
    534 #if defined(lint) || defined(__lint)
    535 
    536 /* ARGSUSED */
    537 void
    538 mutex_enter(kmutex_t *lp)
    539 {}
    540 
    541 /* ARGSUSED */
    542 int
    543 mutex_tryenter(kmutex_t *lp)
    544 { return (0); }
    545 
    546 /* ARGSUSED */
    547 int
    548 mutex_adaptive_tryenter(mutex_impl_t *lp)
    549 { return (0); }
    550 
    551 /* ARGSUSED */
    552 void
    553 mutex_exit(kmutex_t *lp)
    554 {}
    555 
    556 #else
    557 
    558 #if defined(__amd64)
    559 
    560 	ENTRY_NP(mutex_enter)
    561 	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
    562 	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
    563 	lock
    564 	cmpxchgq %rdx, (%rdi)
    565 	jnz	mutex_vector_enter
    566 .mutex_enter_lockstat_patch_point:
    567 #if defined(OPTERON_WORKAROUND_6323525)
    568 .mutex_enter_6323525_patch_point:
    569 	ret					/* nop space for lfence */
    570 	nop
    571 	nop
    572 .mutex_enter_lockstat_6323525_patch_point:	/* new patch point if lfence */
    573 	nop
    574 #else	/* OPTERON_WORKAROUND_6323525 */
    575 	ret
    576 #endif	/* OPTERON_WORKAROUND_6323525 */
    577 	movq	%rdi, %rsi
    578 	movl	$LS_MUTEX_ENTER_ACQUIRE, %edi
    579 /*
    580  * expects %rdx=thread, %rsi=lock, %edi=lockstat event
    581  */
    582 	ALTENTRY(lockstat_wrapper)
    583 	incb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat++ */
    584 	leaq	lockstat_probemap(%rip), %rax
    585 	movl	(%rax, %rdi, DTRACE_IDSIZE), %eax
    586 	testl	%eax, %eax			/* check for non-zero probe */
    587 	jz	1f
    588 	pushq	%rbp				/* align stack properly */
    589 	movq	%rsp, %rbp
    590 	movl	%eax, %edi
    591 	call	*lockstat_probe
    592 	leave					/* unwind stack */
    593 1:
    594 	movq	%gs:CPU_THREAD, %rdx		/* reload thread ptr */
    595 	decb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat-- */
    596 	movl	$1, %eax			/* return success if tryenter */
    597 	ret
    598 	SET_SIZE(lockstat_wrapper)
    599 	SET_SIZE(mutex_enter)
    600 
    601 /*
    602  * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
    603  */
    604 	ENTRY(lockstat_wrapper_arg)
    605 	incb	T_LOCKSTAT(%rcx)		/* curthread->t_lockstat++ */
    606 	leaq	lockstat_probemap(%rip), %rax
    607 	movl	(%rax, %rdi, DTRACE_IDSIZE), %eax
    608 	testl	%eax, %eax			/* check for non-zero probe */
    609 	jz	1f
    610 	pushq	%rbp				/* align stack properly */
    611 	movq	%rsp, %rbp
    612 	movl	%eax, %edi
    613 	call	*lockstat_probe
    614 	leave					/* unwind stack */
    615 1:
    616 	movq	%gs:CPU_THREAD, %rdx		/* reload thread ptr */
    617 	decb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat-- */
    618 	movl	$1, %eax			/* return success if tryenter */
    619 	ret
    620 	SET_SIZE(lockstat_wrapper_arg)
    621 
    622 
    623 	ENTRY(mutex_tryenter)
    624 	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
    625 	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
    626 	lock
    627 	cmpxchgq %rdx, (%rdi)
    628 	jnz	mutex_vector_tryenter
    629 	not	%eax				/* return success (nonzero) */
    630 #if defined(OPTERON_WORKAROUND_6323525)
    631 .mutex_tryenter_lockstat_patch_point:
    632 .mutex_tryenter_6323525_patch_point:
    633 	ret					/* nop space for lfence */
    634 	nop
    635 	nop
    636 .mutex_tryenter_lockstat_6323525_patch_point:	/* new patch point if lfence */
    637 	nop
    638 #else	/* OPTERON_WORKAROUND_6323525 */
    639 .mutex_tryenter_lockstat_patch_point:
    640 	ret
    641 #endif	/* OPTERON_WORKAROUND_6323525 */
    642 	movq	%rdi, %rsi
    643 	movl	$LS_MUTEX_ENTER_ACQUIRE, %edi
    644 	jmp	lockstat_wrapper
    645 	SET_SIZE(mutex_tryenter)
    646 
    647 	ENTRY(mutex_adaptive_tryenter)
    648 	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
    649 	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
    650 	lock
    651 	cmpxchgq %rdx, (%rdi)
    652 	jnz	0f
    653 	not	%eax				/* return success (nonzero) */
    654 #if defined(OPTERON_WORKAROUND_6323525)
    655 .mutex_atryenter_6323525_patch_point:
    656 	ret					/* nop space for lfence */
    657 	nop
    658 	nop
    659 	nop
    660 #else	/* OPTERON_WORKAROUND_6323525 */
    661 	ret
    662 #endif	/* OPTERON_WORKAROUND_6323525 */
    663 0:
    664 	xorl	%eax, %eax			/* return failure */
    665 	ret
    666 	SET_SIZE(mutex_adaptive_tryenter)
    667 
    668 	.globl	mutex_owner_running_critical_start
    669 
    670 	ENTRY(mutex_owner_running)
    671 mutex_owner_running_critical_start:
    672 	movq	(%rdi), %r11		/* get owner field */
    673 	andq	$MUTEX_THREAD, %r11	/* remove waiters bit */
    674 	cmpq	$0, %r11		/* if free, skip */
    675 	je	1f			/* go return 0 */
    676 	movq	T_CPU(%r11), %r8	/* get owner->t_cpu */
    677 	movq	CPU_THREAD(%r8), %r9	/* get t_cpu->cpu_thread */
    678 .mutex_owner_running_critical_end:
    679 	cmpq	%r11, %r9	/* owner == running thread? */
    680 	je	2f		/* yes, go return cpu */
    681 1:
    682 	xorq	%rax, %rax	/* return 0 */
    683 	ret
    684 2:
    685 	movq	%r8, %rax		/* return cpu */
    686 	ret
    687 	SET_SIZE(mutex_owner_running)
    688 
    689 	.globl	mutex_owner_running_critical_size
    690 	.type	mutex_owner_running_critical_size, @object
    691 	.align	CPTRSIZE
    692 mutex_owner_running_critical_size:
    693 	.quad	.mutex_owner_running_critical_end - mutex_owner_running_critical_start
    694 	SET_SIZE(mutex_owner_running_critical_size)
    695 
    696 	.globl	mutex_exit_critical_start
    697 
    698 	ENTRY(mutex_exit)
    699 mutex_exit_critical_start:		/* If interrupted, restart here */
    700 	movq	%gs:CPU_THREAD, %rdx
    701 	cmpq	%rdx, (%rdi)
    702 	jne	mutex_vector_exit		/* wrong type or wrong owner */
    703 	movq	$0, (%rdi)			/* clear owner AND lock */
    704 .mutex_exit_critical_end:
    705 .mutex_exit_lockstat_patch_point:
    706 	ret
    707 	movq	%rdi, %rsi
    708 	movl	$LS_MUTEX_EXIT_RELEASE, %edi
    709 	jmp	lockstat_wrapper
    710 	SET_SIZE(mutex_exit)
    711 
    712 	.globl	mutex_exit_critical_size
    713 	.type	mutex_exit_critical_size, @object
    714 	.align	CPTRSIZE
    715 mutex_exit_critical_size:
    716 	.quad	.mutex_exit_critical_end - mutex_exit_critical_start
    717 	SET_SIZE(mutex_exit_critical_size)
    718 
    719 #else
    720 
    721 	ENTRY_NP(mutex_enter)
    722 	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
    723 	movl	4(%esp), %ecx			/* ecx = lock ptr */
    724 	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
    725 	lock
    726 	cmpxchgl %edx, (%ecx)
    727 	jnz	mutex_vector_enter
    728 #if defined(OPTERON_WORKAROUND_6323525)
    729 .mutex_enter_lockstat_patch_point:
    730 .mutex_enter_6323525_patch_point:
    731 	ret					/* nop space for lfence */
    732 	nop
    733 	nop
    734 .mutex_enter_lockstat_6323525_patch_point:	/* new patch point if lfence */
    735 	nop
    736 #else	/* OPTERON_WORKAROUND_6323525 */
    737 .mutex_enter_lockstat_patch_point:
    738 	ret
    739 #endif	/* OPTERON_WORKAROUND_6323525 */
    740 	movl	$LS_MUTEX_ENTER_ACQUIRE, %eax
    741 	ALTENTRY(lockstat_wrapper)	/* expects edx=thread, ecx=lock, */
    742 					/*   eax=lockstat event */
    743 	pushl	%ebp				/* buy a frame */
    744 	movl	%esp, %ebp
    745 	incb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat++ */
    746 	pushl	%edx				/* save thread pointer	 */
    747 	movl	$lockstat_probemap, %edx
    748 	movl	(%edx, %eax, DTRACE_IDSIZE), %eax
    749 	testl	%eax, %eax			/* check for non-zero probe */
    750 	jz	1f
    751 	pushl	%ecx				/* push lock */
    752 	pushl	%eax				/* push probe ID */
    753 	call	*lockstat_probe
    754 	addl	$8, %esp
    755 1:
    756 	popl	%edx				/* restore thread pointer */
    757 	decb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat-- */
    758 	movl	$1, %eax			/* return success if tryenter */
    759 	popl	%ebp				/* pop off frame */
    760 	ret
    761 	SET_SIZE(lockstat_wrapper)
    762 	SET_SIZE(mutex_enter)
    763 
    764 	ENTRY(lockstat_wrapper_arg)	/* expects edx=thread, ecx=lock, */
    765 					/* eax=lockstat event, pushed arg */
    766 	incb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat++ */
    767 	pushl	%edx				/* save thread pointer	 */
    768 	movl	$lockstat_probemap, %edx
    769 	movl	(%edx, %eax, DTRACE_IDSIZE), %eax
    770 	testl	%eax, %eax			/* check for non-zero probe */
    771 	jz	1f
    772 	pushl	%ebp				/* save %ebp */
    773 	pushl	8(%esp)				/* push arg1 */
    774 	movl	%ebp, 12(%esp)			/* fake up the stack frame */
    775 	movl	%esp, %ebp			/* fake up base pointer */
    776 	addl	$12, %ebp			/* adjust faked base pointer */
    777 	pushl	%ecx				/* push lock */
    778 	pushl	%eax				/* push probe ID */
    779 	call	*lockstat_probe
    780 	addl	$12, %esp			/* adjust for arguments */
    781 	popl	%ebp				/* pop frame */
    782 1:
    783 	popl	%edx				/* restore thread pointer */
    784 	decb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat-- */
    785 	movl	$1, %eax			/* return success if tryenter */
    786 	addl	$4, %esp			/* pop argument */
    787 	ret
    788 	SET_SIZE(lockstat_wrapper_arg)
    789 
    790 
    791 	ENTRY(mutex_tryenter)
    792 	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
    793 	movl	4(%esp), %ecx			/* ecx = lock ptr */
    794 	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
    795 	lock
    796 	cmpxchgl %edx, (%ecx)
    797 	jnz	mutex_vector_tryenter
    798 	movl	%ecx, %eax
    799 #if defined(OPTERON_WORKAROUND_6323525)
    800 .mutex_tryenter_lockstat_patch_point:
    801 .mutex_tryenter_6323525_patch_point:
    802 	ret					/* nop space for lfence */
    803 	nop
    804 	nop
    805 .mutex_tryenter_lockstat_6323525_patch_point:	/* new patch point if lfence */
    806 	nop
    807 #else	/* OPTERON_WORKAROUND_6323525 */
    808 .mutex_tryenter_lockstat_patch_point:
    809 	ret
    810 #endif	/* OPTERON_WORKAROUND_6323525 */
    811 	movl	$LS_MUTEX_ENTER_ACQUIRE, %eax
    812 	jmp	lockstat_wrapper
    813 	SET_SIZE(mutex_tryenter)
    814 
    815 	ENTRY(mutex_adaptive_tryenter)
    816 	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
    817 	movl	4(%esp), %ecx			/* ecx = lock ptr */
    818 	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
    819 	lock
    820 	cmpxchgl %edx, (%ecx)
    821 	jnz	0f
    822 	movl	%ecx, %eax
    823 #if defined(OPTERON_WORKAROUND_6323525)
    824 .mutex_atryenter_6323525_patch_point:
    825 	ret					/* nop space for lfence */
    826 	nop
    827 	nop
    828 	nop
    829 #else	/* OPTERON_WORKAROUND_6323525 */
    830 	ret
    831 #endif	/* OPTERON_WORKAROUND_6323525 */
    832 0:
    833 	xorl	%eax, %eax
    834 	ret
    835 	SET_SIZE(mutex_adaptive_tryenter)
    836 
    837 	.globl	mutex_owner_running_critical_start
    838 
    839 	ENTRY(mutex_owner_running)
    840 mutex_owner_running_critical_start:
    841 	movl	4(%esp), %eax		/* get owner field */
    842 	movl	(%eax), %eax
    843 	andl	$MUTEX_THREAD, %eax	/* remove waiters bit */
    844 	cmpl	$0, %eax		/* if free, skip */
    845 	je	1f			/* go return 0 */
    846 	movl	T_CPU(%eax), %ecx	/* get owner->t_cpu */
    847 	movl	CPU_THREAD(%ecx), %edx	/* get t_cpu->cpu_thread */
    848 .mutex_owner_running_critical_end:
    849 	cmpl	%eax, %edx	/* owner == running thread? */
    850 	je	2f		/* yes, go return cpu */
    851 1:
    852 	xorl	%eax, %eax	/* return 0 */
    853 	ret
    854 2:
    855 	movl	%ecx, %eax	/* return cpu */
    856 	ret
    857 
    858 	SET_SIZE(mutex_owner_running)
    859 
    860 	.globl	mutex_owner_running_critical_size
    861 	.type	mutex_owner_running_critical_size, @object
    862 	.align	CPTRSIZE
    863 mutex_owner_running_critical_size:
    864 	.long	.mutex_owner_running_critical_end - mutex_owner_running_critical_start
    865 	SET_SIZE(mutex_owner_running_critical_size)
    866 
    867 	.globl	mutex_exit_critical_start
    868 
    869 	ENTRY(mutex_exit)
    870 mutex_exit_critical_start:		/* If interrupted, restart here */
    871 	movl	%gs:CPU_THREAD, %edx
    872 	movl	4(%esp), %ecx
    873 	cmpl	%edx, (%ecx)
    874 	jne	mutex_vector_exit		/* wrong type or wrong owner */
    875 	movl	$0, (%ecx)			/* clear owner AND lock */
    876 .mutex_exit_critical_end:
    877 .mutex_exit_lockstat_patch_point:
    878 	ret
    879 	movl	$LS_MUTEX_EXIT_RELEASE, %eax
    880 	jmp	lockstat_wrapper
    881 	SET_SIZE(mutex_exit)
    882 
    883 	.globl	mutex_exit_critical_size
    884 	.type	mutex_exit_critical_size, @object
    885 	.align	CPTRSIZE
    886 mutex_exit_critical_size:
    887 	.long	.mutex_exit_critical_end - mutex_exit_critical_start
    888 	SET_SIZE(mutex_exit_critical_size)
    889 
    890 #endif	/* !__amd64 */
    891 
    892 #endif	/* __lint */
    893 
    894 /*
    895  * rw_enter() and rw_exit().
    896  *
    897  * These routines handle the simple cases of rw_enter (write-locking an unheld
    898  * lock or read-locking a lock that's neither write-locked nor write-wanted)
    899  * and rw_exit (no waiters or not the last reader).  If anything complicated
    900  * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively.
    901  */
    902 #if defined(lint) || defined(__lint)
    903 
    904 /* ARGSUSED */
    905 void
    906 rw_enter(krwlock_t *lp, krw_t rw)
    907 {}
    908 
    909 /* ARGSUSED */
    910 void
    911 rw_exit(krwlock_t *lp)
    912 {}
    913 
    914 #else	/* __lint */
    915 
    916 #if defined(__amd64)
    917 
    918 	ENTRY(rw_enter)
    919 	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
    920 	cmpl	$RW_WRITER, %esi
    921 	je	.rw_write_enter
    922 	incl	T_KPRI_REQ(%rdx)		/* THREAD_KPRI_REQUEST() */
    923 	movq	(%rdi), %rax			/* rax = old rw_wwwh value */
    924 	testl	$RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
    925 	jnz	rw_enter_sleep
    926 	leaq	RW_READ_LOCK(%rax), %rdx	/* rdx = new rw_wwwh value */
    927 	lock
    928 	cmpxchgq %rdx, (%rdi)			/* try to grab read lock */
    929 	jnz	rw_enter_sleep
    930 .rw_read_enter_lockstat_patch_point:
    931 	ret
    932 	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
    933 	movq	%rdi, %rsi			/* rsi = lock ptr */
    934 	movl	$LS_RW_ENTER_ACQUIRE, %edi
    935 	movl	$RW_READER, %edx
    936 	jmp	lockstat_wrapper_arg
    937 .rw_write_enter:
    938 	orq	$RW_WRITE_LOCKED, %rdx		/* rdx = write-locked value */
    939 	xorl	%eax, %eax			/* rax = unheld value */
    940 	lock
    941 	cmpxchgq %rdx, (%rdi)			/* try to grab write lock */
    942 	jnz	rw_enter_sleep
    943 
    944 #if defined(OPTERON_WORKAROUND_6323525)
    945 .rw_write_enter_lockstat_patch_point:
    946 .rw_write_enter_6323525_patch_point:
    947 	ret
    948 	nop
    949 	nop
    950 .rw_write_enter_lockstat_6323525_patch_point:
    951 	nop
    952 #else	/* OPTERON_WORKAROUND_6323525 */
    953 .rw_write_enter_lockstat_patch_point:
    954 	ret
    955 #endif	/* OPTERON_WORKAROUND_6323525 */
    956 
    957 	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
    958 	movq	%rdi, %rsi			/* rsi = lock ptr */
    959 	movl	$LS_RW_ENTER_ACQUIRE, %edi
    960 	movl	$RW_WRITER, %edx
    961 	jmp	lockstat_wrapper_arg
    962 	SET_SIZE(rw_enter)
    963 
    964 	ENTRY(rw_exit)
    965 	movq	(%rdi), %rax			/* rax = old rw_wwwh value */
    966 	cmpl	$RW_READ_LOCK, %eax		/* single-reader, no waiters? */
    967 	jne	.rw_not_single_reader
    968 	xorl	%edx, %edx			/* rdx = new value (unheld) */
    969 .rw_read_exit:
    970 	lock
    971 	cmpxchgq %rdx, (%rdi)			/* try to drop read lock */
    972 	jnz	rw_exit_wakeup
    973 	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
    974 	decl	T_KPRI_REQ(%rcx)		/* THREAD_KPRI_RELEASE() */
    975 .rw_read_exit_lockstat_patch_point:
    976 	ret
    977 	movq	%rdi, %rsi			/* rsi = lock ptr */
    978 	movl	$LS_RW_EXIT_RELEASE, %edi
    979 	movl	$RW_READER, %edx
    980 	jmp	lockstat_wrapper_arg
    981 .rw_not_single_reader:
    982 	testl	$RW_WRITE_LOCKED, %eax	/* write-locked or write-wanted? */
    983 	jnz	.rw_write_exit
    984 	leaq	-RW_READ_LOCK(%rax), %rdx	/* rdx = new value */
    985 	cmpl	$RW_READ_LOCK, %edx
    986 	jge	.rw_read_exit		/* not last reader, safe to drop */
    987 	jmp	rw_exit_wakeup			/* last reader with waiters */
    988 .rw_write_exit:
    989 	movq	%gs:CPU_THREAD, %rax		/* rax = thread ptr */
    990 	xorl	%edx, %edx			/* rdx = new value (unheld) */
    991 	orq	$RW_WRITE_LOCKED, %rax		/* eax = write-locked value */
    992 	lock
    993 	cmpxchgq %rdx, (%rdi)			/* try to drop read lock */
    994 	jnz	rw_exit_wakeup
    995 .rw_write_exit_lockstat_patch_point:
    996 	ret
    997 	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
    998 	movq	%rdi, %rsi			/* rsi - lock ptr */
    999 	movl	$LS_RW_EXIT_RELEASE, %edi
   1000 	movl	$RW_WRITER, %edx
   1001 	jmp	lockstat_wrapper_arg
   1002 	SET_SIZE(rw_exit)
   1003 
   1004 #else
   1005 
   1006 	ENTRY(rw_enter)
   1007 	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
   1008 	movl	4(%esp), %ecx			/* ecx = lock ptr */
   1009 	cmpl	$RW_WRITER, 8(%esp)
   1010 	je	.rw_write_enter
   1011 	incl	T_KPRI_REQ(%edx)		/* THREAD_KPRI_REQUEST() */
   1012 	movl	(%ecx), %eax			/* eax = old rw_wwwh value */
   1013 	testl	$RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
   1014 	jnz	rw_enter_sleep
   1015 	leal	RW_READ_LOCK(%eax), %edx	/* edx = new rw_wwwh value */
   1016 	lock
   1017 	cmpxchgl %edx, (%ecx)			/* try to grab read lock */
   1018 	jnz	rw_enter_sleep
   1019 .rw_read_enter_lockstat_patch_point:
   1020 	ret
   1021 	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
   1022 	movl	$LS_RW_ENTER_ACQUIRE, %eax
   1023 	pushl	$RW_READER
   1024 	jmp	lockstat_wrapper_arg
   1025 .rw_write_enter:
   1026 	orl	$RW_WRITE_LOCKED, %edx		/* edx = write-locked value */
   1027 	xorl	%eax, %eax			/* eax = unheld value */
   1028 	lock
   1029 	cmpxchgl %edx, (%ecx)			/* try to grab write lock */
   1030 	jnz	rw_enter_sleep
   1031 
   1032 #if defined(OPTERON_WORKAROUND_6323525)
   1033 .rw_write_enter_lockstat_patch_point:
   1034 .rw_write_enter_6323525_patch_point:
   1035 	ret
   1036 	nop
   1037 	nop
   1038 .rw_write_enter_lockstat_6323525_patch_point:
   1039 	nop
   1040 #else	/* OPTERON_WORKAROUND_6323525 */
   1041 .rw_write_enter_lockstat_patch_point:
   1042 	ret
   1043 #endif	/* OPTERON_WORKAROUND_6323525 */
   1044 
   1045 	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
   1046 	movl	$LS_RW_ENTER_ACQUIRE, %eax
   1047 	pushl	$RW_WRITER
   1048 	jmp	lockstat_wrapper_arg
   1049 	SET_SIZE(rw_enter)
   1050 
   1051 	ENTRY(rw_exit)
   1052 	movl	4(%esp), %ecx			/* ecx = lock ptr */
   1053 	movl	(%ecx), %eax			/* eax = old rw_wwwh value */
   1054 	cmpl	$RW_READ_LOCK, %eax		/* single-reader, no waiters? */
   1055 	jne	.rw_not_single_reader
   1056 	xorl	%edx, %edx			/* edx = new value (unheld) */
   1057 .rw_read_exit:
   1058 	lock
   1059 	cmpxchgl %edx, (%ecx)			/* try to drop read lock */
   1060 	jnz	rw_exit_wakeup
   1061 	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
   1062 	decl	T_KPRI_REQ(%edx)		/* THREAD_KPRI_RELEASE() */
   1063 .rw_read_exit_lockstat_patch_point:
   1064 	ret
   1065 	movl	$LS_RW_EXIT_RELEASE, %eax
   1066 	pushl	$RW_READER
   1067 	jmp	lockstat_wrapper_arg
   1068 .rw_not_single_reader:
   1069 	testl	$RW_WRITE_LOCKED, %eax	/* write-locked or write-wanted? */
   1070 	jnz	.rw_write_exit
   1071 	leal	-RW_READ_LOCK(%eax), %edx	/* edx = new value */
   1072 	cmpl	$RW_READ_LOCK, %edx
   1073 	jge	.rw_read_exit		/* not last reader, safe to drop */
   1074 	jmp	rw_exit_wakeup			/* last reader with waiters */
   1075 .rw_write_exit:
   1076 	movl	%gs:CPU_THREAD, %eax		/* eax = thread ptr */
   1077 	xorl	%edx, %edx			/* edx = new value (unheld) */
   1078 	orl	$RW_WRITE_LOCKED, %eax		/* eax = write-locked value */
   1079 	lock
   1080 	cmpxchgl %edx, (%ecx)			/* try to drop read lock */
   1081 	jnz	rw_exit_wakeup
   1082 .rw_write_exit_lockstat_patch_point:
   1083 	ret
   1084 	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
   1085 	movl	$LS_RW_EXIT_RELEASE, %eax
   1086 	pushl	$RW_WRITER
   1087 	jmp	lockstat_wrapper_arg
   1088 	SET_SIZE(rw_exit)
   1089 
   1090 #endif	/* !__amd64 */
   1091 
   1092 #endif	/* __lint */
   1093 
   1094 #if defined(OPTERON_WORKAROUND_6323525)
   1095 #if defined(lint) || defined(__lint)
   1096 
   1097 int	workaround_6323525_patched;
   1098 
   1099 void
   1100 patch_workaround_6323525(void)
   1101 {}
   1102 
   1103 #else	/* lint */
   1104 
   1105 /*
   1106  * If it is necessary to patch the lock enter routines with the lfence
   1107  * workaround, workaround_6323525_patched is set to a non-zero value so that
   1108  * the lockstat_hat_patch routine can patch to the new location of the 'ret'
   1109  * instruction.
   1110  */
   1111 	DGDEF3(workaround_6323525_patched, 4, 4)
   1112 	.long	0
   1113 
   1114 #if defined(__amd64)
   1115 
   1116 #define HOT_MUTEX_PATCH(srcaddr, dstaddr, size)	\
   1117 	movq	$size, %rbx;			\
   1118 	movq	$dstaddr, %r13;			\
   1119 	addq	%rbx, %r13;			\
   1120 	movq	$srcaddr, %r12;			\
   1121 	addq	%rbx, %r12;			\
   1122 0:						\
   1123 	decq	%r13;				\
   1124 	decq	%r12;				\
   1125 	movzbl	(%r12), %esi;			\
   1126 	movq	$1, %rdx;			\
   1127 	movq	%r13, %rdi;			\
   1128 	call	hot_patch_kernel_text;		\
   1129 	decq	%rbx;				\
   1130 	testq	%rbx, %rbx;			\
   1131 	jg	0b;
   1132 
   1133 /*
   1134  * patch_workaround_6323525: provide workaround for 6323525
   1135  *
   1136  * The workaround is to place a fencing instruction (lfence) between the
   1137  * mutex operation and the subsequent read-modify-write instruction.
   1138  *
   1139  * This routine hot patches the lfence instruction on top of the space
   1140  * reserved by nops in the lock enter routines.
   1141  */
   1142 	ENTRY_NP(patch_workaround_6323525)
   1143 	pushq	%rbp
   1144 	movq	%rsp, %rbp
   1145 	pushq	%r12
   1146 	pushq	%r13
   1147 	pushq	%rbx
   1148 
   1149 	/*
   1150 	 * lockstat_hot_patch() to use the alternate lockstat workaround
   1151 	 * 6323525 patch points (points past the lfence instruction to the
   1152 	 * new ret) when workaround_6323525_patched is set.
   1153 	 */
   1154 	movl	$1, workaround_6323525_patched
   1155 
   1156 	/*
   1157 	 * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
   1158 	 * routines. The 4 bytes are patched in reverse order so that the
   1159 	 * the existing ret is overwritten last. This provides lock enter
   1160 	 * sanity during the intermediate patching stages.
   1161 	 */
   1162 	HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
   1163 	HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
   1164 	HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
   1165 	HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
   1166 
   1167 	popq	%rbx
   1168 	popq	%r13
   1169 	popq	%r12
   1170 	movq	%rbp, %rsp
   1171 	popq	%rbp
   1172 	ret
   1173 _lfence_insn:
   1174 	lfence
   1175 	ret
   1176 	SET_SIZE(patch_workaround_6323525)
   1177 
   1178 
   1179 #else	/* __amd64 */
   1180 
   1181 #define HOT_MUTEX_PATCH(srcaddr, dstaddr, size)	\
   1182 	movl	$size, %ebx;			\
   1183 	movl	$srcaddr, %esi;			\
   1184 	addl	%ebx, %esi;			\
   1185 	movl	$dstaddr, %edi;			\
   1186 	addl	%ebx, %edi;			\
   1187 0:      					\
   1188 	decl	%esi;				\
   1189 	decl	%edi;				\
   1190 	pushl	$1;				\
   1191 	movzbl	(%esi), %eax;			\
   1192 	pushl	%eax;				\
   1193 	pushl	%edi;				\
   1194 	call	hot_patch_kernel_text;		\
   1195 	addl	$12, %esp;			\
   1196 	decl	%ebx;				\
   1197 	testl	%ebx, %ebx;			\
   1198 	jg	0b;
   1199 
   1200 
   1201 	/* see comments above */
   1202 	ENTRY_NP(patch_workaround_6323525)
   1203 	pushl	%ebp
   1204 	movl	%esp, %ebp
   1205 	pushl	%ebx
   1206 	pushl	%esi
   1207 	pushl	%edi
   1208 
   1209 	movl	$1, workaround_6323525_patched
   1210 
   1211 	HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
   1212 	HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
   1213 	HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
   1214 	HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
   1215 
   1216 	popl	%edi
   1217 	popl	%esi
   1218 	popl	%ebx
   1219 	movl	%ebp, %esp
   1220 	popl	%ebp
   1221 	ret
   1222 _lfence_insn:
   1223 	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]
   1224 	ret
   1225 	SET_SIZE(patch_workaround_6323525)
   1226 
   1227 #endif	/* !__amd64 */
   1228 #endif	/* !lint */
   1229 #endif	/* OPTERON_WORKAROUND_6323525 */
   1230 
   1231 
   1232 #if defined(lint) || defined(__lint)
   1233 
   1234 void
   1235 lockstat_hot_patch(void)
   1236 {}
   1237 
   1238 #else
   1239 
   1240 #if defined(__amd64)
   1241 
   1242 #define	HOT_PATCH(addr, event, active_instr, normal_instr, len)	\
   1243 	movq	$normal_instr, %rsi;		\
   1244 	movq	$active_instr, %rdi;		\
   1245 	leaq	lockstat_probemap(%rip), %rax;	\
   1246 	movl 	_MUL(event, DTRACE_IDSIZE)(%rax), %eax;	\
   1247 	testl	%eax, %eax;			\
   1248 	jz	9f;				\
   1249 	movq	%rdi, %rsi;			\
   1250 9:						\
   1251 	movq	$len, %rdx;			\
   1252 	movq	$addr, %rdi;			\
   1253 	call	hot_patch_kernel_text
   1254 
   1255 #else
   1256 
   1257 #define	HOT_PATCH(addr, event, active_instr, normal_instr, len)	\
   1258 	movl	$normal_instr, %ecx;		\
   1259 	movl	$active_instr, %edx;		\
   1260 	movl	$lockstat_probemap, %eax;	\
   1261 	movl	_MUL(event, DTRACE_IDSIZE)(%eax), %eax;	\
   1262 	testl	%eax, %eax;			\
   1263 	jz	. + 4;				\
   1264 	movl	%edx, %ecx;			\
   1265 	pushl	$len;				\
   1266 	pushl	%ecx;				\
   1267 	pushl	$addr;				\
   1268 	call	hot_patch_kernel_text;		\
   1269 	addl	$12, %esp;
   1270 
   1271 #endif	/* !__amd64 */
   1272 
   1273 	ENTRY(lockstat_hot_patch)
   1274 #if defined(__amd64)
   1275 	pushq	%rbp			/* align stack properly */
   1276 	movq	%rsp, %rbp
   1277 #endif	/* __amd64 */
   1278 
   1279 #if defined(OPTERON_WORKAROUND_6323525)
   1280 	cmpl	$0, workaround_6323525_patched
   1281 	je	1f
   1282 	HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
   1283 		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1284 	HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
   1285 		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1286 	HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
   1287 		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1288 	jmp	2f
   1289 1:
   1290 	HOT_PATCH(.mutex_enter_lockstat_patch_point,
   1291 		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1292 	HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
   1293 		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1294 	HOT_PATCH(.rw_write_enter_lockstat_patch_point,
   1295 		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1296 2:
   1297 #else	/* OPTERON_WORKAROUND_6323525 */
   1298 	HOT_PATCH(.mutex_enter_lockstat_patch_point,
   1299 		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1300 	HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
   1301 		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1302 	HOT_PATCH(.rw_write_enter_lockstat_patch_point,
   1303 		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1304 #endif	/* !OPTERON_WORKAROUND_6323525 */
   1305 	HOT_PATCH(.mutex_exit_lockstat_patch_point,
   1306 		LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
   1307 	HOT_PATCH(.rw_read_enter_lockstat_patch_point,
   1308 		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1309 	HOT_PATCH(.rw_write_exit_lockstat_patch_point,
   1310 		LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
   1311 	HOT_PATCH(.rw_read_exit_lockstat_patch_point,
   1312 		LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
   1313 	HOT_PATCH(.lock_set_lockstat_patch_point,
   1314 		LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1315 	HOT_PATCH(.lock_try_lockstat_patch_point,
   1316 		LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1317 	HOT_PATCH(.lock_clear_lockstat_patch_point,
   1318 		LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
   1319 	HOT_PATCH(.lock_set_spl_lockstat_patch_point,
   1320 		LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
   1321 
   1322 	HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
   1323 		LS_LOCK_CLEAR_SPLX_RELEASE,
   1324 		LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
   1325 #if defined(__amd64)
   1326 	leave			/* unwind stack */
   1327 #endif	/* __amd64 */
   1328 	ret
   1329 	SET_SIZE(lockstat_hot_patch)
   1330 
   1331 #endif	/* __lint */
   1332 
   1333 #if defined(lint) || defined(__lint)
   1334 
   1335 /* XX64 membar_*() should be inlines */
   1336 
   1337 void
   1338 membar_sync(void)
   1339 {}
   1340 
   1341 void
   1342 membar_enter(void)
   1343 {}
   1344 
   1345 void
   1346 membar_exit(void)
   1347 {}
   1348 
   1349 void
   1350 membar_producer(void)
   1351 {}
   1352 
   1353 void
   1354 membar_consumer(void)
   1355 {}
   1356 
   1357 #else	/* __lint */
   1358 
   1359 #if defined(__amd64)
   1360 
   1361 	ENTRY(membar_enter)
   1362 	ALTENTRY(membar_exit)
   1363 	ALTENTRY(membar_sync)
   1364 	mfence			/* lighter weight than lock; xorq $0,(%rsp) */
   1365 	ret
   1366 	SET_SIZE(membar_sync)
   1367 	SET_SIZE(membar_exit)
   1368 	SET_SIZE(membar_enter)
   1369 
   1370 	ENTRY(membar_producer)
   1371 	sfence
   1372 	ret
   1373 	SET_SIZE(membar_producer)
   1374 
   1375 	ENTRY(membar_consumer)
   1376 	lfence
   1377 	ret
   1378 	SET_SIZE(membar_consumer)
   1379 
   1380 #else
   1381 
   1382 	ENTRY(membar_enter)
   1383 	ALTENTRY(membar_exit)
   1384 	ALTENTRY(membar_sync)
   1385 	lock
   1386 	xorl	$0, (%esp)
   1387 	ret
   1388 	SET_SIZE(membar_sync)
   1389 	SET_SIZE(membar_exit)
   1390 	SET_SIZE(membar_enter)
   1391 
   1392 /*
   1393  * On machines that support sfence and lfence, these
   1394  * memory barriers can be more precisely implemented
   1395  * without causing the whole world to stop
   1396  */
   1397 	ENTRY(membar_producer)
   1398 	.globl	_patch_sfence_ret
   1399 _patch_sfence_ret:			/* c.f. membar #StoreStore */
   1400 	lock
   1401 	xorl	$0, (%esp)
   1402 	ret
   1403 	SET_SIZE(membar_producer)
   1404 
   1405 	ENTRY(membar_consumer)
   1406 	.globl	_patch_lfence_ret
   1407 _patch_lfence_ret:			/* c.f. membar #LoadLoad */
   1408 	lock
   1409 	xorl	$0, (%esp)
   1410 	ret
   1411 	SET_SIZE(membar_consumer)
   1412 
   1413 #endif	/* !__amd64 */
   1414 
   1415 #endif	/* __lint */
   1416 
   1417 /*
   1418  * thread_onproc()
   1419  * Set thread in onproc state for the specified CPU.
   1420  * Also set the thread lock pointer to the CPU's onproc lock.
   1421  * Since the new lock isn't held, the store ordering is important.
   1422  * If not done in assembler, the compiler could reorder the stores.
   1423  */
   1424 #if defined(lint) || defined(__lint)
   1425 
   1426 void
   1427 thread_onproc(kthread_id_t t, cpu_t *cp)
   1428 {
   1429 	t->t_state = TS_ONPROC;
   1430 	t->t_lockp = &cp->cpu_thread_lock;
   1431 }
   1432 
   1433 #else	/* __lint */
   1434 
   1435 #if defined(__amd64)
   1436 
   1437 	ENTRY(thread_onproc)
   1438 	addq	$CPU_THREAD_LOCK, %rsi	/* pointer to disp_lock while running */
   1439 	movl	$ONPROC_THREAD, T_STATE(%rdi)	/* set state to TS_ONPROC */
   1440 	movq	%rsi, T_LOCKP(%rdi)	/* store new lock pointer */
   1441 	ret
   1442 	SET_SIZE(thread_onproc)
   1443 
   1444 #else
   1445 
   1446 	ENTRY(thread_onproc)
   1447 	movl	4(%esp), %eax
   1448 	movl	8(%esp), %ecx
   1449 	addl	$CPU_THREAD_LOCK, %ecx	/* pointer to disp_lock while running */
   1450 	movl	$ONPROC_THREAD, T_STATE(%eax)	/* set state to TS_ONPROC */
   1451 	movl	%ecx, T_LOCKP(%eax)	/* store new lock pointer */
   1452 	ret
   1453 	SET_SIZE(thread_onproc)
   1454 
   1455 #endif	/* !__amd64 */
   1456 
   1457 #endif	/* __lint */
   1458 
   1459 /*
   1460  * mutex_delay_default(void)
   1461  * Spins for approx a few hundred processor cycles and returns to caller.
   1462  */
   1463 
   1464 #if defined(lint) || defined(__lint)
   1465 
   1466 void
   1467 mutex_delay_default(void)
   1468 {}
   1469 
   1470 #else	/* __lint */
   1471 
   1472 #if defined(__amd64)
   1473 
   1474 	ENTRY(mutex_delay_default)
   1475 	movq	$92,%r11
   1476 0:	decq	%r11
   1477 	jg	0b
   1478 	ret
   1479 	SET_SIZE(mutex_delay_default)
   1480 
   1481 #else
   1482 
   1483 	ENTRY(mutex_delay_default)
   1484 	push	%ebp
   1485 	movl	%esp,%ebp
   1486 	andl	$-16,%esp
   1487 	push	%ebx
   1488 	movl	$93,%ebx
   1489 0:	decl	%ebx
   1490 	jg	0b
   1491 	pop	%ebx
   1492 	leave
   1493 	ret
   1494 	SET_SIZE(mutex_delay_default)
   1495 
   1496 #endif	/* !__amd64 */
   1497 #endif	/* __lint */
   1498