Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /* common code with bug fixes from original version in trap.c */
     27 
     28 #include <sys/param.h>
     29 #include <sys/types.h>
     30 #include <sys/systm.h>
     31 #include <sys/archsystm.h>
     32 #include <sys/vmsystm.h>
     33 #include <sys/fpu/fpusystm.h>
     34 #include <sys/fpu/fpu_simulator.h>
     35 #include <sys/inline.h>
     36 #include <sys/debug.h>
     37 #include <sys/privregs.h>
     38 #include <sys/machpcb.h>
     39 #include <sys/simulate.h>
     40 #include <sys/proc.h>
     41 #include <sys/cmn_err.h>
     42 #include <sys/stack.h>
     43 #include <sys/watchpoint.h>
     44 #include <sys/trap.h>
     45 #include <sys/machtrap.h>
     46 #include <sys/mman.h>
     47 #include <sys/asi.h>
     48 #include <sys/copyops.h>
     49 #include <vm/as.h>
     50 #include <vm/page.h>
     51 #include <sys/model.h>
     52 #include <vm/seg_vn.h>
     53 #include <sys/byteorder.h>
     54 #include <sys/time.h>
     55 
     56 #define	IS_IBIT_SET(x)	(x & 0x2000)
     57 #define	IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
     58 #define	IS_FLOAT_QUAD_OP(op, op3)(op == 2 && (op3 == 0x34 ||	\
     59 		op3 == 0x35))
     60 #define	IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi)		\
     61 		(op == 3 && (op3 == IOP_V8_LDDFA ||		\
     62 		op3 == IOP_V8_STDFA) &&	asi > ASI_SNFL)
     63 
     64 static int aligndebug = 0;
     65 
     66 /*
     67  * For the sake of those who must be compatible with unaligned
     68  * architectures, users can link their programs to use a
     69  * corrective trap handler that will fix unaligned references
     70  * a special trap #6 (T_FIX_ALIGN) enables this 'feature'.
     71  * Returns 1 for success, 0 for failure.
     72  */
     73 
     74 int
     75 do_unaligned(struct regs *rp, caddr_t *badaddr)
     76 {
     77 	uint_t	inst, op3, asi = 0;
     78 	uint_t	rd, rs1, rs2;
     79 	int	sz, nf = 0, ltlend = 0;
     80 	int	floatflg;
     81 	int	fsrflg;
     82 	int	immflg;
     83 	int	lddstdflg;
     84 	caddr_t	addr;
     85 	uint64_t val;
     86 	union {
     87 		uint64_t	l[2];
     88 		uint32_t	i[4];
     89 		uint16_t	s[8];
     90 		uint8_t		c[16];
     91 	} data;
     92 
     93 	ASSERT(USERMODE(rp->r_tstate));
     94 	inst = fetch_user_instr((caddr_t)rp->r_pc);
     95 
     96 	op3 = (inst >> 19) & 0x3f;
     97 	rd = (inst >> 25) & 0x1f;
     98 	rs1 = (inst >> 14) & 0x1f;
     99 	rs2 = inst & 0x1f;
    100 	floatflg = (inst >> 24) & 1;
    101 	immflg = (inst >> 13) & 1;
    102 	lddstdflg = fsrflg = 0;
    103 
    104 	/* if not load or store do nothing */
    105 	if ((inst >> 30) != 3)
    106 		return (0);
    107 
    108 	/* if ldstub or swap, do nothing */
    109 	if ((inst & 0xc1680000) == 0xc0680000)
    110 		return (0);
    111 
    112 	/* if cas/casx, do nothing */
    113 	if ((inst & 0xc1e00000) == 0xc1e00000)
    114 		return (0);
    115 
    116 	if (floatflg) {
    117 		switch ((inst >> 19) & 3) {	/* map size bits to a number */
    118 		case 0: sz = 4;
    119 			break;			/* ldf{a}/stf{a} */
    120 		case 1: fsrflg = 1;
    121 			if (rd == 0)
    122 				sz = 4;		/* ldfsr/stfsr */
    123 			else  if (rd == 1)
    124 				sz = 8;		/* ldxfsr/stxfsr */
    125 			else
    126 				return (SIMU_ILLEGAL);
    127 			break;
    128 		case 2: sz = 16;
    129 			break;		/* ldqf{a}/stqf{a} */
    130 		case 3: sz = 8;
    131 			break;		/* lddf{a}/stdf{a} */
    132 		}
    133 		/*
    134 		 * Fix to access extra double register encoding plus
    135 		 * compensate to access the correct fpu_dreg.
    136 		 */
    137 		if ((sz > 4) && (fsrflg == 0)) {
    138 			if ((rd & 1) == 1)
    139 				rd = (rd & 0x1e) | 0x20;
    140 			rd = rd >> 1;
    141 			if ((sz == 16) && ((rd & 0x1) != 0))
    142 				return (SIMU_ILLEGAL);
    143 		}
    144 	} else {
    145 		int sz_bits = (inst >> 19) & 0xf;
    146 		switch (sz_bits) {		/* map size bits to a number */
    147 		case 0:				/* lduw{a} */
    148 		case 4:				/* stw{a} */
    149 		case 8:				/* ldsw{a} */
    150 		case 0xf:			/* swap */
    151 			sz = 4; break;
    152 		case 1:				/* ldub{a} */
    153 		case 5:				/* stb{a} */
    154 		case 9:				/* ldsb{a} */
    155 		case 0xd:			/* ldstub */
    156 			sz = 1; break;
    157 		case 2:				/* lduh{a} */
    158 		case 6:				/* sth{a} */
    159 		case 0xa:			/* ldsh{a} */
    160 			sz = 2; break;
    161 		case 3:				/* ldd{a} */
    162 		case 7:				/* std{a} */
    163 			lddstdflg = 1;
    164 			sz = 8; break;
    165 		case 0xb:			/* ldx{a} */
    166 		case 0xe:			/* stx{a} */
    167 			sz = 8; break;
    168 		}
    169 	}
    170 
    171 
    172 	/* only support primary and secondary asi's */
    173 	if ((op3 >> 4) & 1) {
    174 		if (immflg) {
    175 			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
    176 			    TSTATE_ASI_MASK;
    177 		} else {
    178 			asi = (inst >> 5) & 0xff;
    179 		}
    180 		switch (asi) {
    181 		case ASI_P:
    182 		case ASI_S:
    183 			break;
    184 		case ASI_PNF:
    185 		case ASI_SNF:
    186 			nf = 1;
    187 			break;
    188 		case ASI_PL:
    189 		case ASI_SL:
    190 			ltlend = 1;
    191 			break;
    192 		case ASI_PNFL:
    193 		case ASI_SNFL:
    194 			ltlend = 1;
    195 			nf = 1;
    196 			break;
    197 		default:
    198 			return (0);
    199 		}
    200 		/*
    201 		 * Non-faulting stores generate a data_access_exception trap,
    202 		 * according to the Spitfire manual, which should be signaled
    203 		 * as an illegal instruction trap, because it can't be fixed.
    204 		 */
    205 		if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA)))
    206 			return (SIMU_ILLEGAL);
    207 	}
    208 
    209 	if (aligndebug) {
    210 		printf("unaligned access at %p, instruction: 0x%x\n",
    211 		    (void *)rp->r_pc, inst);
    212 		printf("type %s", (((inst >> 21) & 1) ? "st" : "ld"));
    213 		if (((inst >> 21) & 1) == 0)
    214 			printf(" %s", (((inst >> 22) & 1) ?
    215 			    "signed" : "unsigned"));
    216 		printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg);
    217 		printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n",
    218 		    rd, op3, rs1, rs2, (inst & 0x1fff));
    219 	}
    220 
    221 	(void) flush_user_windows_to_stack(NULL);
    222 	if (getreg(rp, rs1, &val, badaddr))
    223 		return (SIMU_FAULT);
    224 	addr = (caddr_t)val;		/* convert to 32/64 bit address */
    225 	if (aligndebug)
    226 		printf("addr 1 = %p\n", (void *)addr);
    227 
    228 	/* check immediate bit and use immediate field or reg (rs2) */
    229 	if (immflg) {
    230 		int imm;
    231 		imm  = inst & 0x1fff;		/* mask out immediate field */
    232 		imm <<= 19;			/* sign extend it */
    233 		imm >>= 19;
    234 		addr += imm;			/* compute address */
    235 	} else {
    236 		if (getreg(rp, rs2, &val, badaddr))
    237 			return (SIMU_FAULT);
    238 		addr += val;
    239 	}
    240 
    241 	/*
    242 	 * If this is a 32-bit program, chop the address accordingly.  The
    243 	 * intermediate uintptr_t casts prevent warnings under a certain
    244 	 * compiler, and the temporary 32 bit storage is intended to force
    245 	 * proper code generation and break up what would otherwise be a
    246 	 * quadruple cast.
    247 	 */
    248 	if (curproc->p_model == DATAMODEL_ILP32) {
    249 		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
    250 		addr = (caddr_t)(uintptr_t)addr32;
    251 	}
    252 
    253 	if (aligndebug)
    254 		printf("addr 2 = %p\n", (void *)addr);
    255 
    256 	if (addr >= curproc->p_as->a_userlimit) {
    257 		*badaddr = addr;
    258 		goto badret;
    259 	}
    260 
    261 	/* a single bit differentiates ld and st */
    262 	if ((inst >> 21) & 1) {			/* store */
    263 		if (floatflg) {
    264 			klwp_id_t lwp = ttolwp(curthread);
    265 			kfpu_t *fp = lwptofpu(lwp);
    266 			/* Ensure fp has been enabled */
    267 			if (fpu_exists) {
    268 				if (!(_fp_read_fprs() & FPRS_FEF))
    269 					fp_enable();
    270 			} else {
    271 				if (!fp->fpu_en)
    272 					fp_enable();
    273 			}
    274 			/* if fpu_exists read fpu reg */
    275 			if (fpu_exists) {
    276 				if (fsrflg) {
    277 					_fp_read_pfsr(&data.l[0]);
    278 				} else {
    279 					if (sz == 4) {
    280 						data.i[0] = 0;
    281 						_fp_read_pfreg(
    282 						    (unsigned *)&data.i[1], rd);
    283 					}
    284 					if (sz >= 8)
    285 						_fp_read_pdreg(
    286 						    &data.l[0], rd);
    287 					if (sz == 16)
    288 						_fp_read_pdreg(
    289 						    &data.l[1], rd+1);
    290 				}
    291 			} else {
    292 				if (fsrflg) {
    293 					/* Clear reserved bits, set version=7 */
    294 					fp->fpu_fsr &= ~0x30301000;
    295 					fp->fpu_fsr |= 0xE0000;
    296 					data.l[0] = fp->fpu_fsr;
    297 				} else {
    298 					if (sz == 4) {
    299 						data.i[0] = 0;
    300 						data.i[1] =
    301 						    (unsigned)fp->
    302 						    fpu_fr.fpu_regs[rd];
    303 					}
    304 					if (sz >= 8)
    305 						data.l[0] =
    306 						    fp->fpu_fr.fpu_dregs[rd];
    307 					if (sz == 16)
    308 						data.l[1] =
    309 						    fp->fpu_fr.fpu_dregs[rd+1];
    310 				}
    311 			}
    312 		} else {
    313 			if (lddstdflg) {		/* combine the data */
    314 				if (getreg(rp, rd, &data.l[0], badaddr))
    315 					return (SIMU_FAULT);
    316 				if (getreg(rp, rd+1, &data.l[1], badaddr))
    317 					return (SIMU_FAULT);
    318 				if (ltlend) {
    319 					/*
    320 					 * For STD, each 32-bit word is byte-
    321 					 * swapped individually.  For
    322 					 * simplicity we don't want to do that
    323 					 * below, so we swap the words now to
    324 					 * get the desired result in the end.
    325 					 */
    326 					data.i[0] = data.i[3];
    327 				} else {
    328 					data.i[0] = data.i[1];
    329 					data.i[1] = data.i[3];
    330 				}
    331 			} else {
    332 				if (getreg(rp, rd, &data.l[0], badaddr))
    333 					return (SIMU_FAULT);
    334 			}
    335 		}
    336 
    337 		if (aligndebug) {
    338 			if (sz == 16) {
    339 				printf("data %x %x %x %x\n",
    340 				    data.i[0], data.i[1], data.i[2], data.c[3]);
    341 			} else {
    342 				printf("data %x %x %x %x %x %x %x %x\n",
    343 				    data.c[0], data.c[1], data.c[2], data.c[3],
    344 				    data.c[4], data.c[5], data.c[6], data.c[7]);
    345 			}
    346 		}
    347 
    348 		if (ltlend) {
    349 			if (sz == 1) {
    350 				if (xcopyout_little(&data.c[7], addr,
    351 				    (size_t)sz) != 0)
    352 					goto badret;
    353 			} else if (sz == 2) {
    354 				if (xcopyout_little(&data.s[3], addr,
    355 				    (size_t)sz) != 0)
    356 					goto badret;
    357 			} else if (sz == 4) {
    358 				if (xcopyout_little(&data.i[1], addr,
    359 				    (size_t)sz) != 0)
    360 					goto badret;
    361 			} else {
    362 				if (xcopyout_little(&data.l[0], addr,
    363 				    (size_t)sz) != 0)
    364 					goto badret;
    365 			}
    366 		} else {
    367 			if (sz == 1) {
    368 				if (copyout(&data.c[7], addr, (size_t)sz) == -1)
    369 					goto badret;
    370 			} else if (sz == 2) {
    371 				if (copyout(&data.s[3], addr, (size_t)sz) == -1)
    372 					goto badret;
    373 			} else if (sz == 4) {
    374 				if (copyout(&data.i[1], addr, (size_t)sz) == -1)
    375 					goto badret;
    376 			} else {
    377 				if (copyout(&data.l[0], addr, (size_t)sz) == -1)
    378 					goto badret;
    379 			}
    380 		}
    381 	} else {				/* load */
    382 		if (sz == 1) {
    383 			if (ltlend) {
    384 				if (xcopyin_little(addr, &data.c[7],
    385 				    (size_t)sz) != 0) {
    386 					if (nf)
    387 						data.c[7] = 0;
    388 					else
    389 						goto badret;
    390 				}
    391 			} else {
    392 				if (copyin(addr, &data.c[7],
    393 				    (size_t)sz) == -1) {
    394 					if (nf)
    395 						data.c[7] = 0;
    396 					else
    397 						goto badret;
    398 				}
    399 			}
    400 			/* if signed and the sign bit is set extend it */
    401 			if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) {
    402 				data.i[0] = (uint_t)-1;	/* extend sign bit */
    403 				data.s[2] = (ushort_t)-1;
    404 				data.c[6] = (uchar_t)-1;
    405 			} else {
    406 				data.i[0] = 0;	/* clear upper 32+24 bits */
    407 				data.s[2] = 0;
    408 				data.c[6] = 0;
    409 			}
    410 		} else if (sz == 2) {
    411 			if (ltlend) {
    412 				if (xcopyin_little(addr, &data.s[3],
    413 				    (size_t)sz) != 0) {
    414 					if (nf)
    415 						data.s[3] = 0;
    416 					else
    417 						goto badret;
    418 				}
    419 			} else {
    420 				if (copyin(addr, &data.s[3],
    421 				    (size_t)sz) == -1) {
    422 					if (nf)
    423 						data.s[3] = 0;
    424 					else
    425 						goto badret;
    426 				}
    427 			}
    428 			/* if signed and the sign bit is set extend it */
    429 			if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) {
    430 				data.i[0] = (uint_t)-1;	/* extend sign bit */
    431 				data.s[2] = (ushort_t)-1;
    432 			} else {
    433 				data.i[0] = 0;	/* clear upper 32+16 bits */
    434 				data.s[2] = 0;
    435 			}
    436 		} else if (sz == 4) {
    437 			if (ltlend) {
    438 				if (xcopyin_little(addr, &data.i[1],
    439 				    (size_t)sz) != 0) {
    440 					if (!nf)
    441 						goto badret;
    442 					data.i[1] = 0;
    443 				}
    444 			} else {
    445 				if (copyin(addr, &data.i[1],
    446 				    (size_t)sz) == -1) {
    447 					if (!nf)
    448 						goto badret;
    449 					data.i[1] = 0;
    450 				}
    451 			}
    452 			/* if signed and the sign bit is set extend it */
    453 			if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) {
    454 				data.i[0] = (uint_t)-1;	/* extend sign bit */
    455 			} else {
    456 				data.i[0] = 0;	/* clear upper 32 bits */
    457 			}
    458 		} else {
    459 			if (ltlend) {
    460 				if (xcopyin_little(addr, &data.l[0],
    461 				    (size_t)sz) != 0) {
    462 					if (!nf)
    463 						goto badret;
    464 					data.l[0] = 0;
    465 				}
    466 			} else {
    467 				if (copyin(addr, &data.l[0],
    468 				    (size_t)sz) == -1) {
    469 					if (!nf)
    470 						goto badret;
    471 					data.l[0] = 0;
    472 				}
    473 			}
    474 		}
    475 
    476 		if (aligndebug) {
    477 			if (sz == 16) {
    478 				printf("data %x %x %x %x\n",
    479 				    data.i[0], data.i[1], data.i[2], data.c[3]);
    480 			} else {
    481 				printf("data %x %x %x %x %x %x %x %x\n",
    482 				    data.c[0], data.c[1], data.c[2], data.c[3],
    483 				    data.c[4], data.c[5], data.c[6], data.c[7]);
    484 			}
    485 		}
    486 
    487 		if (floatflg) {		/* if fpu_exists write fpu reg */
    488 			klwp_id_t lwp = ttolwp(curthread);
    489 			kfpu_t *fp = lwptofpu(lwp);
    490 			/* Ensure fp has been enabled */
    491 			if (fpu_exists) {
    492 				if (!(_fp_read_fprs() & FPRS_FEF))
    493 					fp_enable();
    494 			} else {
    495 				if (!fp->fpu_en)
    496 					fp_enable();
    497 			}
    498 			/* if fpu_exists read fpu reg */
    499 			if (fpu_exists) {
    500 				if (fsrflg) {
    501 					_fp_write_pfsr(&data.l[0]);
    502 				} else {
    503 					if (sz == 4)
    504 						_fp_write_pfreg(
    505 						    (unsigned *)&data.i[1], rd);
    506 					if (sz >= 8)
    507 						_fp_write_pdreg(
    508 						    &data.l[0], rd);
    509 					if (sz == 16)
    510 						_fp_write_pdreg(
    511 						    &data.l[1], rd+1);
    512 				}
    513 			} else {
    514 				if (fsrflg) {
    515 					fp->fpu_fsr = data.l[0];
    516 				} else {
    517 					if (sz == 4)
    518 						fp->fpu_fr.fpu_regs[rd] =
    519 						    (unsigned)data.i[1];
    520 					if (sz >= 8)
    521 						fp->fpu_fr.fpu_dregs[rd] =
    522 						    data.l[0];
    523 					if (sz == 16)
    524 						fp->fpu_fr.fpu_dregs[rd+1] =
    525 						    data.l[1];
    526 				}
    527 			}
    528 		} else {
    529 			if (lddstdflg) {		/* split the data */
    530 				if (ltlend) {
    531 					/*
    532 					 * For LDD, each 32-bit word is byte-
    533 					 * swapped individually.  We didn't
    534 					 * do that above, but this will give
    535 					 * us the desired result.
    536 					 */
    537 					data.i[3] = data.i[0];
    538 				} else {
    539 					data.i[3] = data.i[1];
    540 					data.i[1] = data.i[0];
    541 				}
    542 				data.i[0] = 0;
    543 				data.i[2] = 0;
    544 				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
    545 					goto badret;
    546 				if (putreg(&data.l[1], rp, rd+1, badaddr) == -1)
    547 					goto badret;
    548 			} else {
    549 				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
    550 					goto badret;
    551 			}
    552 		}
    553 	}
    554 	return (SIMU_SUCCESS);
    555 badret:
    556 	return (SIMU_FAULT);
    557 }
    558 
    559 
    560 int
    561 simulate_lddstd(struct regs *rp, caddr_t *badaddr)
    562 {
    563 	uint_t	inst, op3, asi = 0;
    564 	uint_t	rd, rs1, rs2;
    565 	int	nf = 0, ltlend = 0, usermode;
    566 	int	immflg;
    567 	uint64_t reven;
    568 	uint64_t rodd;
    569 	caddr_t	addr;
    570 	uint64_t val;
    571 	uint64_t data;
    572 
    573 	usermode = USERMODE(rp->r_tstate);
    574 
    575 	if (usermode)
    576 		inst = fetch_user_instr((caddr_t)rp->r_pc);
    577 	else
    578 		inst = *(uint_t *)rp->r_pc;
    579 
    580 	op3 = (inst >> 19) & 0x3f;
    581 	rd = (inst >> 25) & 0x1f;
    582 	rs1 = (inst >> 14) & 0x1f;
    583 	rs2 = inst & 0x1f;
    584 	immflg = (inst >> 13) & 1;
    585 
    586 	if (USERMODE(rp->r_tstate))
    587 		(void) flush_user_windows_to_stack(NULL);
    588 	else
    589 		flush_windows();
    590 
    591 	if ((op3 >> 4) & 1) {		/* is this LDDA/STDA? */
    592 		if (immflg) {
    593 			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
    594 			    TSTATE_ASI_MASK;
    595 		} else {
    596 			asi = (inst >> 5) & 0xff;
    597 		}
    598 		switch (asi) {
    599 		case ASI_P:
    600 		case ASI_S:
    601 			break;
    602 		case ASI_PNF:
    603 		case ASI_SNF:
    604 			nf = 1;
    605 			break;
    606 		case ASI_PL:
    607 		case ASI_SL:
    608 			ltlend = 1;
    609 			break;
    610 		case ASI_PNFL:
    611 		case ASI_SNFL:
    612 			ltlend = 1;
    613 			nf = 1;
    614 			break;
    615 		case ASI_AIUP:
    616 		case ASI_AIUS:
    617 			usermode = 1;
    618 			break;
    619 		case ASI_AIUPL:
    620 		case ASI_AIUSL:
    621 			usermode = 1;
    622 			ltlend = 1;
    623 			break;
    624 		default:
    625 			return (SIMU_ILLEGAL);
    626 		}
    627 	}
    628 
    629 	if (getreg(rp, rs1, &val, badaddr))
    630 		return (SIMU_FAULT);
    631 	addr = (caddr_t)val;		/* convert to 32/64 bit address */
    632 
    633 	/* check immediate bit and use immediate field or reg (rs2) */
    634 	if (immflg) {
    635 		int imm;
    636 		imm  = inst & 0x1fff;		/* mask out immediate field */
    637 		imm <<= 19;			/* sign extend it */
    638 		imm >>= 19;
    639 		addr += imm;			/* compute address */
    640 	} else {
    641 		if (getreg(rp, rs2, &val, badaddr))
    642 			return (SIMU_FAULT);
    643 		addr += val;
    644 	}
    645 
    646 	/*
    647 	 * T_UNIMP_LDD and T_UNIMP_STD are higher priority than
    648 	 * T_ALIGNMENT.  So we have to make sure that the address is
    649 	 * kosher before trying to use it, because the hardware hasn't
    650 	 * checked it for us yet.
    651 	 */
    652 	if (((uintptr_t)addr & 0x7) != 0) {
    653 		if (curproc->p_fixalignment)
    654 			return (do_unaligned(rp, badaddr));
    655 		else
    656 			return (SIMU_UNALIGN);
    657 	}
    658 
    659 	/*
    660 	 * If this is a 32-bit program, chop the address accordingly.  The
    661 	 * intermediate uintptr_t casts prevent warnings under a certain
    662 	 * compiler, and the temporary 32 bit storage is intended to force
    663 	 * proper code generation and break up what would otherwise be a
    664 	 * quadruple cast.
    665 	 */
    666 	if (curproc->p_model == DATAMODEL_ILP32 && usermode) {
    667 		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
    668 		addr = (caddr_t)(uintptr_t)addr32;
    669 	}
    670 
    671 	if ((inst >> 21) & 1) {			/* store */
    672 		if (getreg(rp, rd, &reven, badaddr))
    673 			return (SIMU_FAULT);
    674 		if (getreg(rp, rd+1, &rodd, badaddr))
    675 			return (SIMU_FAULT);
    676 		if (ltlend) {
    677 			reven = BSWAP_32(reven);
    678 			rodd  = BSWAP_32(rodd);
    679 		}
    680 		data = (reven << 32) | rodd;
    681 		if (usermode) {
    682 			if (suword64_nowatch(addr, data) == -1)
    683 				return (SIMU_FAULT);
    684 		} else {
    685 			*(uint64_t *)addr = data;
    686 		}
    687 	} else {				/* load */
    688 		if (usermode) {
    689 			if (fuword64_nowatch(addr, &data)) {
    690 				if (nf)
    691 					data = 0;
    692 				else
    693 					return (SIMU_FAULT);
    694 			}
    695 		} else
    696 			data = *(uint64_t *)addr;
    697 
    698 		reven = (data >> 32);
    699 		rodd  = (uint64_t)(uint32_t)data;
    700 		if (ltlend) {
    701 			reven = BSWAP_32(reven);
    702 			rodd  = BSWAP_32(rodd);
    703 		}
    704 
    705 		if (putreg(&reven, rp, rd, badaddr) == -1)
    706 			return (SIMU_FAULT);
    707 		if (putreg(&rodd, rp, rd+1, badaddr) == -1)
    708 			return (SIMU_FAULT);
    709 	}
    710 	return (SIMU_SUCCESS);
    711 }
    712 
    713 
    714 /*
    715  * simulate popc
    716  */
    717 static int
    718 simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst)
    719 {
    720 	uint_t	rd, rs2, rs1;
    721 	uint_t	immflg;
    722 	uint64_t val, cnt = 0;
    723 
    724 	rd = (inst >> 25) & 0x1f;
    725 	rs1 = (inst >> 14) & 0x1f;
    726 	rs2 = inst & 0x1f;
    727 	immflg = (inst >> 13) & 1;
    728 
    729 	if (rs1 > 0)
    730 		return (SIMU_ILLEGAL);
    731 
    732 	(void) flush_user_windows_to_stack(NULL);
    733 
    734 	/* check immediate bit and use immediate field or reg (rs2) */
    735 	if (immflg) {
    736 		int64_t imm;
    737 		imm  = inst & 0x1fff;		/* mask out immediate field */
    738 		imm <<= 51;			/* sign extend it */
    739 		imm >>= 51;
    740 		if (imm != 0) {
    741 			for (cnt = 0; imm != 0; imm &= imm-1)
    742 				cnt++;
    743 		}
    744 	} else {
    745 		if (getreg(rp, rs2, &val, badaddr))
    746 			return (SIMU_FAULT);
    747 		if (val != 0) {
    748 			for (cnt = 0; val != 0; val &= val-1)
    749 				cnt++;
    750 		}
    751 	}
    752 
    753 	if (putreg(&cnt, rp, rd, badaddr) == -1)
    754 		return (SIMU_FAULT);
    755 
    756 	return (SIMU_SUCCESS);
    757 }
    758 
    759 /*
    760  * simulate mulscc
    761  */
    762 static int
    763 simulate_mulscc(struct regs *rp, caddr_t *badaddr, uint_t inst)
    764 {
    765 	uint32_t	s1, s2;
    766 	uint32_t	c, d, v;
    767 	uint_t		rd, rs1;
    768 	int64_t		d64;
    769 	uint64_t	ud64;
    770 	uint64_t	drs1;
    771 
    772 	(void) flush_user_windows_to_stack(NULL);
    773 
    774 	if ((inst >> 13) & 1) {		/* immediate */
    775 		d64 = inst & 0x1fff;
    776 		d64 <<= 51;		/* sign extend it */
    777 		d64 >>= 51;
    778 	} else {
    779 		uint_t		rs2;
    780 		uint64_t	drs2;
    781 
    782 		if (inst & 0x1fe0) {
    783 			return (SIMU_ILLEGAL);
    784 		}
    785 		rs2 = inst & 0x1f;
    786 		if (getreg(rp, rs2, &drs2, badaddr)) {
    787 			return (SIMU_FAULT);
    788 		}
    789 		d64 = (int64_t)drs2;
    790 	}
    791 
    792 	rs1 = (inst >> 14) & 0x1f;
    793 	if (getreg(rp, rs1, &drs1, badaddr)) {
    794 		return (SIMU_FAULT);
    795 	}
    796 	/* icc.n xor icc.v */
    797 	s1 = ((rp->r_tstate & TSTATE_IN) >> (TSTATE_CCR_SHIFT + 3)) ^
    798 	    ((rp->r_tstate & TSTATE_IV) >> (TSTATE_CCR_SHIFT + 1));
    799 	s1 = (s1 << 31) | (((uint32_t)drs1) >> 1);
    800 
    801 	if (rp->r_y & 1) {
    802 		s2 = (uint32_t)d64;
    803 	} else {
    804 		s2 = 0;
    805 	}
    806 	d = s1 + s2;
    807 
    808 	ud64 = (uint64_t)d;
    809 
    810 	/* set the icc flags */
    811 	v = (s1 & s2 & ~d) | (~s1 & ~s2 & d);
    812 	c = (s1 & s2) | (~d & (s1 | s2));
    813 	rp->r_tstate &= ~TSTATE_ICC;
    814 	rp->r_tstate |= (uint64_t)((c >> 31) & 1) << (TSTATE_CCR_SHIFT + 0);
    815 	rp->r_tstate |= (uint64_t)((v >> 31) & 1) << (TSTATE_CCR_SHIFT + 1);
    816 	rp->r_tstate |= (uint64_t)(d ? 0 : 1) << (TSTATE_CCR_SHIFT + 2);
    817 	rp->r_tstate |= (uint64_t)((d >> 31) & 1) << (TSTATE_CCR_SHIFT + 3);
    818 
    819 	if (rp->r_tstate & TSTATE_IC) {
    820 		ud64 |= (1ULL << 32);
    821 	}
    822 
    823 	/* set the xcc flags */
    824 	rp->r_tstate &= ~TSTATE_XCC;
    825 	if (ud64 == 0) {
    826 		rp->r_tstate |= TSTATE_XZ;
    827 	}
    828 
    829 	rd = (inst >> 25) & 0x1f;
    830 	if (putreg(&ud64, rp, rd, badaddr)) {
    831 		return (SIMU_FAULT);
    832 	}
    833 
    834 	d64 = (drs1 << 32) | (uint32_t)rp->r_y;
    835 	d64 >>= 1;
    836 	rp->r_y = (uint32_t)d64;
    837 
    838 	return (SIMU_SUCCESS);
    839 }
    840 
    841 /*
    842  * simulate unimplemented instructions (popc, ldqf{a}, stqf{a})
    843  */
    844 int
    845 simulate_unimp(struct regs *rp, caddr_t *badaddr)
    846 {
    847 	uint_t	inst, optype, op3, asi;
    848 	uint_t	rs1, rd;
    849 	uint_t	ignor, i;
    850 	machpcb_t *mpcb = lwptompcb(ttolwp(curthread));
    851 	int	nomatch = 0;
    852 	caddr_t	addr = (caddr_t)rp->r_pc;
    853 	struct as *as;
    854 	caddr_t	ka;
    855 	pfn_t	pfnum;
    856 	page_t *pp;
    857 	proc_t *p = ttoproc(curthread);
    858 	struct seg *mapseg;
    859 	struct segvn_data *svd;
    860 
    861 	ASSERT(USERMODE(rp->r_tstate));
    862 	inst = fetch_user_instr(addr);
    863 	if (inst == (uint_t)-1) {
    864 		mpcb->mpcb_illexcaddr = addr;
    865 		mpcb->mpcb_illexcinsn = (uint32_t)-1;
    866 		return (SIMU_ILLEGAL);
    867 	}
    868 
    869 	/*
    870 	 * When fixing dirty v8 instructions there's a race if two processors
    871 	 * are executing the dirty executable at the same time.  If one
    872 	 * cleans the instruction as the other is executing it the second
    873 	 * processor will see a clean instruction when it comes through this
    874 	 * code and will return SIMU_ILLEGAL.  To work around the race
    875 	 * this code will keep track of the last illegal instruction seen
    876 	 * by each lwp and will only take action if the illegal instruction
    877 	 * is repeatable.
    878 	 */
    879 	if (addr != mpcb->mpcb_illexcaddr ||
    880 	    inst != mpcb->mpcb_illexcinsn)
    881 		nomatch = 1;
    882 	mpcb->mpcb_illexcaddr = addr;
    883 	mpcb->mpcb_illexcinsn = inst;
    884 
    885 	/* instruction fields */
    886 	i = (inst >> 13) & 0x1;
    887 	rd = (inst >> 25) & 0x1f;
    888 	optype = (inst >> 30) & 0x3;
    889 	op3 = (inst >> 19) & 0x3f;
    890 	ignor = (inst >> 5) & 0xff;
    891 	if (IS_IBIT_SET(inst)) {
    892 		asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
    893 		    TSTATE_ASI_MASK);
    894 	} else {
    895 		asi = ignor;
    896 	}
    897 
    898 	if (IS_VIS1(optype, op3) ||
    899 	    IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi) ||
    900 	    IS_FLOAT_QUAD_OP(optype, op3)) {
    901 		klwp_t *lwp = ttolwp(curthread);
    902 		kfpu_t *fp = lwptofpu(lwp);
    903 		if (fpu_exists) {
    904 			if (!(_fp_read_fprs() & FPRS_FEF))
    905 				fp_enable();
    906 			_fp_read_pfsr(&fp->fpu_fsr);
    907 		} else {
    908 			if (!fp->fpu_en)
    909 				fp_enable();
    910 		}
    911 		fp_precise(rp);
    912 		return (SIMU_RETRY);
    913 	}
    914 
    915 	if (optype == 2 && op3 == IOP_V8_POPC) {
    916 		return (simulate_popc(rp, badaddr, inst));
    917 	} else if (optype == 3 && op3 == IOP_V8_POPC) {
    918 		return (SIMU_ILLEGAL);
    919 	} else if (optype == OP_V8_ARITH && op3 == IOP_V8_MULScc) {
    920 		return (simulate_mulscc(rp, badaddr, inst));
    921 	}
    922 
    923 	if (optype == OP_V8_LDSTR) {
    924 		if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA ||
    925 		    op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA)
    926 			return (do_unaligned(rp, badaddr));
    927 	}
    928 
    929 	/* This is a new instruction so illexccnt should also be set. */
    930 	if (nomatch) {
    931 		mpcb->mpcb_illexccnt = 0;
    932 		return (SIMU_RETRY);
    933 	}
    934 
    935 	/*
    936 	 * In order to keep us from entering into an infinite loop while
    937 	 * attempting to clean up faulty instructions, we will return
    938 	 * SIMU_ILLEGAL once we've cleaned up the instruction as much
    939 	 * as we can, and still end up here.
    940 	 */
    941 	if (mpcb->mpcb_illexccnt >= 3)
    942 		return (SIMU_ILLEGAL);
    943 
    944 	mpcb->mpcb_illexccnt += 1;
    945 
    946 	/*
    947 	 * The rest of the code handles v8 binaries with instructions
    948 	 * that have dirty (non-zero) bits in reserved or 'ignored'
    949 	 * fields; these will cause core dumps on v9 machines.
    950 	 *
    951 	 * We only clean dirty instructions in 32-bit programs (ie, v8)
    952 	 * running on SPARCv9 processors.  True v9 programs are forced
    953 	 * to use the instruction set as intended.
    954 	 */
    955 	if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32)
    956 		return (SIMU_ILLEGAL);
    957 	switch (optype) {
    958 	case OP_V8_BRANCH:
    959 	case OP_V8_CALL:
    960 		return (SIMU_ILLEGAL);	/* these don't have ignored fields */
    961 		/*NOTREACHED*/
    962 	case OP_V8_ARITH:
    963 		switch (op3) {
    964 		case IOP_V8_RETT:
    965 			if (rd == 0 && !(i == 0 && ignor))
    966 				return (SIMU_ILLEGAL);
    967 			if (rd)
    968 				inst &= ~(0x1f << 25);
    969 			if (i == 0 && ignor)
    970 				inst &= ~(0xff << 5);
    971 			break;
    972 		case IOP_V8_TCC:
    973 			if (i == 0 && ignor != 0) {
    974 				inst &= ~(0xff << 5);
    975 			} else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) {
    976 				inst &= ~(0x3f << 7);
    977 			} else {
    978 				return (SIMU_ILLEGAL);
    979 			}
    980 			break;
    981 		case IOP_V8_JMPL:
    982 		case IOP_V8_RESTORE:
    983 		case IOP_V8_SAVE:
    984 			if ((op3 == IOP_V8_RETT && rd) ||
    985 			    (i == 0 && ignor)) {
    986 				inst &= ~(0xff << 5);
    987 			} else {
    988 				return (SIMU_ILLEGAL);
    989 			}
    990 			break;
    991 		case IOP_V8_FCMP:
    992 			if (rd == 0)
    993 				return (SIMU_ILLEGAL);
    994 			inst &= ~(0x1f << 25);
    995 			break;
    996 		case IOP_V8_RDASR:
    997 			rs1 = ((inst >> 14) & 0x1f);
    998 			if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) {
    999 				/*
   1000 				 * The instruction specifies an invalid
   1001 				 * state register - better bail out than
   1002 				 * "fix" it when we're not sure what was
   1003 				 * intended.
   1004 				 */
   1005 				return (SIMU_ILLEGAL);
   1006 			}
   1007 				/*
   1008 				 * Note: this case includes the 'stbar'
   1009 				 * instruction (rs1 == 15 && i == 0).
   1010 				 */
   1011 				if ((ignor = (inst & 0x3fff)) != 0)
   1012 					inst &= ~(0x3fff);
   1013 			break;
   1014 		case IOP_V8_SRA:
   1015 		case IOP_V8_SRL:
   1016 		case IOP_V8_SLL:
   1017 			if (ignor == 0)
   1018 				return (SIMU_ILLEGAL);
   1019 			inst &= ~(0xff << 5);
   1020 			break;
   1021 		case IOP_V8_ADD:
   1022 		case IOP_V8_AND:
   1023 		case IOP_V8_OR:
   1024 		case IOP_V8_XOR:
   1025 		case IOP_V8_SUB:
   1026 		case IOP_V8_ANDN:
   1027 		case IOP_V8_ORN:
   1028 		case IOP_V8_XNOR:
   1029 		case IOP_V8_ADDC:
   1030 		case IOP_V8_UMUL:
   1031 		case IOP_V8_SMUL:
   1032 		case IOP_V8_SUBC:
   1033 		case IOP_V8_UDIV:
   1034 		case IOP_V8_SDIV:
   1035 		case IOP_V8_ADDcc:
   1036 		case IOP_V8_ANDcc:
   1037 		case IOP_V8_ORcc:
   1038 		case IOP_V8_XORcc:
   1039 		case IOP_V8_SUBcc:
   1040 		case IOP_V8_ANDNcc:
   1041 		case IOP_V8_ORNcc:
   1042 		case IOP_V8_XNORcc:
   1043 		case IOP_V8_ADDCcc:
   1044 		case IOP_V8_UMULcc:
   1045 		case IOP_V8_SMULcc:
   1046 		case IOP_V8_SUBCcc:
   1047 		case IOP_V8_UDIVcc:
   1048 		case IOP_V8_SDIVcc:
   1049 		case IOP_V8_TADDcc:
   1050 		case IOP_V8_TSUBcc:
   1051 		case IOP_V8_TADDccTV:
   1052 		case IOP_V8_TSUBccTV:
   1053 		case IOP_V8_MULScc:
   1054 		case IOP_V8_WRASR:
   1055 		case IOP_V8_FLUSH:
   1056 			if (i != 0 || ignor == 0)
   1057 				return (SIMU_ILLEGAL);
   1058 			inst &= ~(0xff << 5);
   1059 			break;
   1060 		default:
   1061 			return (SIMU_ILLEGAL);
   1062 		}
   1063 		break;
   1064 	case OP_V8_LDSTR:
   1065 		switch (op3) {
   1066 		case IOP_V8_STFSR:
   1067 		case IOP_V8_LDFSR:
   1068 			if (rd == 0 && !(i == 0 && ignor))
   1069 				return (SIMU_ILLEGAL);
   1070 			if (rd)
   1071 				inst &= ~(0x1f << 25);
   1072 			if (i == 0 && ignor)
   1073 				inst &= ~(0xff << 5);
   1074 			break;
   1075 		default:
   1076 			if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) &&
   1077 			    i == 0 && ignor)
   1078 				inst &= ~(0xff << 5);
   1079 			else
   1080 				return (SIMU_ILLEGAL);
   1081 			break;
   1082 		}
   1083 		break;
   1084 	default:
   1085 		return (SIMU_ILLEGAL);
   1086 	}
   1087 
   1088 	as = p->p_as;
   1089 
   1090 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   1091 	mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0);
   1092 	ASSERT(mapseg != NULL);
   1093 	svd = (struct segvn_data *)mapseg->s_data;
   1094 
   1095 	/*
   1096 	 * We only create COW page for MAP_PRIVATE mappings.
   1097 	 */
   1098 	SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
   1099 	if ((svd->type & MAP_TYPE) & MAP_SHARED) {
   1100 		SEGVN_LOCK_EXIT(as, &svd->lock);
   1101 		AS_LOCK_EXIT(as, &as->a_lock);
   1102 		return (SIMU_ILLEGAL);
   1103 	}
   1104 	SEGVN_LOCK_EXIT(as, &svd->lock);
   1105 	AS_LOCK_EXIT(as, &as->a_lock);
   1106 
   1107 	/*
   1108 	 * A "flush" instruction using the user PC's vaddr will not work
   1109 	 * here, at least on Spitfire. Instead we create a temporary kernel
   1110 	 * mapping to the user's text page, then modify and flush that.
   1111 	 * Break COW by locking user page.
   1112 	 */
   1113 	if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE,
   1114 	    F_SOFTLOCK, S_READ))
   1115 		return (SIMU_FAULT);
   1116 
   1117 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   1118 	pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc);
   1119 	AS_LOCK_EXIT(as, &as->a_lock);
   1120 	if (pf_is_memory(pfnum)) {
   1121 		pp = page_numtopp_nolock(pfnum);
   1122 		ASSERT(pp == NULL || PAGE_LOCKED(pp));
   1123 	} else {
   1124 		(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
   1125 		    PAGESIZE, F_SOFTUNLOCK, S_READ);
   1126 		return (SIMU_FAULT);
   1127 	}
   1128 
   1129 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   1130 	ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc);
   1131 	*(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst;
   1132 	doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE));
   1133 	ppmapout(ka);
   1134 	AS_LOCK_EXIT(as, &as->a_lock);
   1135 
   1136 	(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
   1137 	    PAGESIZE, F_SOFTUNLOCK, S_READ);
   1138 	return (SIMU_RETRY);
   1139 }
   1140 
   1141 /*
   1142  * Simulate a "rd %tick" or "rd %stick" (%asr24) instruction.
   1143  */
   1144 int
   1145 simulate_rdtick(struct regs *rp)
   1146 {
   1147 	uint_t	inst, op, op3, rd, rs1, i;
   1148 	caddr_t badaddr;
   1149 
   1150 	inst = fetch_user_instr((caddr_t)rp->r_pc);
   1151 	op   = (inst >> 30) & 0x3;
   1152 	rd   = (inst >> 25) & 0x1F;
   1153 	op3  = (inst >> 19) & 0x3F;
   1154 	i    = (inst >> 13) & 0x1;
   1155 
   1156 	/*
   1157 	 * Make sure this is either a %tick read (rs1 == 0x4) or
   1158 	 * a %stick read (rs1 == 0x18) instruction.
   1159 	 */
   1160 	if (op == 2 && op3 == 0x28 && i == 0) {
   1161 		rs1 = (inst >> 14) & 0x1F;
   1162 
   1163 		if (rs1 == 0x4) {
   1164 			uint64_t tick;
   1165 			(void) flush_user_windows_to_stack(NULL);
   1166 			tick = gettick_counter();
   1167 			if (putreg(&tick, rp, rd, &badaddr) == 0)
   1168 				return (SIMU_SUCCESS);
   1169 		} else if (rs1 == 0x18) {
   1170 			uint64_t stick;
   1171 			(void) flush_user_windows_to_stack(NULL);
   1172 			stick = gethrtime_unscaled();
   1173 			if (putreg(&stick, rp, rd, &badaddr) == 0)
   1174 				return (SIMU_SUCCESS);
   1175 		}
   1176 	}
   1177 
   1178 	return (SIMU_FAULT);
   1179 }
   1180 
   1181 /*
   1182  * Get the value of a register for instruction simulation
   1183  * by using the regs or window structure pointers.
   1184  * Return 0 for success, and -1 for failure.  If there is a failure,
   1185  * save the faulting address using badaddr pointer.
   1186  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
   1187  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
   1188  */
   1189 int
   1190 getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr)
   1191 {
   1192 	uint64_t *rgs, *sp;
   1193 	int rv = 0;
   1194 
   1195 	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
   1196 	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
   1197 	if (reg == 0) {
   1198 		*val = 0;
   1199 	} else if (reg < 16) {
   1200 		*val = rgs[reg];
   1201 	} else if (IS_V9STACK(sp)) {
   1202 		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
   1203 		uint64_t *addr = (uint64_t *)&rw[reg - 16];
   1204 		uint64_t res;
   1205 
   1206 		if (USERMODE(rp->r_tstate)) {
   1207 			if (fuword64_nowatch(addr, &res) == -1) {
   1208 				*badaddr = (caddr_t)addr;
   1209 				rv = -1;
   1210 			}
   1211 		} else {
   1212 			res = *addr;
   1213 		}
   1214 		*val = res;
   1215 	} else {
   1216 		caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
   1217 		uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
   1218 		uint32_t *addr = (uint32_t *)&rw[reg - 16];
   1219 		uint32_t res;
   1220 
   1221 		if (USERMODE(rp->r_tstate)) {
   1222 			if (fuword32_nowatch(addr, &res) == -1) {
   1223 				*badaddr = (caddr_t)addr;
   1224 				rv = -1;
   1225 			}
   1226 		} else {
   1227 			res = *addr;
   1228 		}
   1229 		*val = (uint64_t)res;
   1230 	}
   1231 	return (rv);
   1232 }
   1233 
   1234 /*
   1235  * Set the value of a register after instruction simulation
   1236  * by using the regs or window structure pointers.
   1237  * Return 0 for succes -1 failure.
   1238  * save the faulting address using badaddr pointer.
   1239  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
   1240  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
   1241  */
   1242 int
   1243 putreg(uint64_t	*data, struct regs *rp, uint_t reg, caddr_t *badaddr)
   1244 {
   1245 	uint64_t *rgs, *sp;
   1246 	int rv = 0;
   1247 
   1248 	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
   1249 	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
   1250 	if (reg == 0) {
   1251 		return (0);
   1252 	} else if (reg < 16) {
   1253 		rgs[reg] = *data;
   1254 	} else if (IS_V9STACK(sp)) {
   1255 		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
   1256 		uint64_t *addr = (uint64_t *)&rw[reg - 16];
   1257 		uint64_t res;
   1258 
   1259 		if (USERMODE(rp->r_tstate)) {
   1260 			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
   1261 
   1262 			res = *data;
   1263 			if (suword64_nowatch(addr, res) != 0) {
   1264 				*badaddr = (caddr_t)addr;
   1265 				rv = -1;
   1266 			}
   1267 			/*
   1268 			 * We have changed a local or in register;
   1269 			 * nuke the watchpoint return windows.
   1270 			 */
   1271 			mpcb->mpcb_rsp[0] = NULL;
   1272 			mpcb->mpcb_rsp[1] = NULL;
   1273 		} else {
   1274 			res = *data;
   1275 			*addr = res;
   1276 		}
   1277 	} else {
   1278 		caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
   1279 		uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
   1280 		uint32_t *addr = (uint32_t *)&rw[reg - 16];
   1281 		uint32_t res;
   1282 
   1283 		if (USERMODE(rp->r_tstate)) {
   1284 			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
   1285 
   1286 			res = (uint_t)*data;
   1287 			if (suword32_nowatch(addr, res) != 0) {
   1288 				*badaddr = (caddr_t)addr;
   1289 				rv = -1;
   1290 			}
   1291 			/*
   1292 			 * We have changed a local or in register;
   1293 			 * nuke the watchpoint return windows.
   1294 			 */
   1295 			mpcb->mpcb_rsp[0] = NULL;
   1296 			mpcb->mpcb_rsp[1] = NULL;
   1297 
   1298 		} else {
   1299 			res = (uint_t)*data;
   1300 			*addr = res;
   1301 		}
   1302 	}
   1303 	return (rv);
   1304 }
   1305 
   1306 /*
   1307  * Calculate a memory reference address from instruction
   1308  * operands, used to return the address of a fault, instead
   1309  * of the instruction when an error occurs.  This is code that is
   1310  * common with most of the routines that simulate instructions.
   1311  */
   1312 int
   1313 calc_memaddr(struct regs *rp, caddr_t *badaddr)
   1314 {
   1315 	uint_t	inst;
   1316 	uint_t	rd, rs1, rs2;
   1317 	int	sz;
   1318 	int	immflg;
   1319 	int	floatflg;
   1320 	caddr_t  addr;
   1321 	uint64_t val;
   1322 
   1323 	if (USERMODE(rp->r_tstate))
   1324 		inst = fetch_user_instr((caddr_t)rp->r_pc);
   1325 	else
   1326 		inst = *(uint_t *)rp->r_pc;
   1327 
   1328 	rd = (inst >> 25) & 0x1f;
   1329 	rs1 = (inst >> 14) & 0x1f;
   1330 	rs2 = inst & 0x1f;
   1331 	floatflg = (inst >> 24) & 1;
   1332 	immflg = (inst >> 13) & 1;
   1333 
   1334 	if (floatflg) {
   1335 		switch ((inst >> 19) & 3) {	/* map size bits to a number */
   1336 		case 0: sz = 4; break;		/* ldf/stf */
   1337 		case 1: return (0);		/* ld[x]fsr/st[x]fsr */
   1338 		case 2: sz = 16; break;		/* ldqf/stqf */
   1339 		case 3: sz = 8; break;		/* lddf/stdf */
   1340 		}
   1341 		/*
   1342 		 * Fix to access extra double register encoding plus
   1343 		 * compensate to access the correct fpu_dreg.
   1344 		 */
   1345 		if (sz > 4) {
   1346 			if ((rd & 1) == 1)
   1347 				rd = (rd & 0x1e) | 0x20;
   1348 			rd = rd >> 1;
   1349 		}
   1350 	} else {
   1351 		switch ((inst >> 19) & 0xf) {	/* map size bits to a number */
   1352 		case 0:				/* lduw */
   1353 		case 4:				/* stw */
   1354 		case 8:				/* ldsw */
   1355 		case 0xf:			/* swap */
   1356 			sz = 4; break;
   1357 		case 1:				/* ldub */
   1358 		case 5:				/* stb */
   1359 		case 9:				/* ldsb */
   1360 		case 0xd:			/* ldstub */
   1361 			sz = 1; break;
   1362 		case 2:				/* lduh */
   1363 		case 6:				/* sth */
   1364 		case 0xa:			/* ldsh */
   1365 			sz = 2; break;
   1366 		case 3:				/* ldd */
   1367 		case 7:				/* std */
   1368 		case 0xb:			/* ldx */
   1369 		case 0xe:			/* stx */
   1370 			sz = 8; break;
   1371 		}
   1372 	}
   1373 
   1374 	if (USERMODE(rp->r_tstate))
   1375 		(void) flush_user_windows_to_stack(NULL);
   1376 	else
   1377 		flush_windows();
   1378 
   1379 	if (getreg(rp, rs1, &val, badaddr))
   1380 		return (SIMU_FAULT);
   1381 	addr = (caddr_t)val;
   1382 
   1383 	/* check immediate bit and use immediate field or reg (rs2) */
   1384 	if (immflg) {
   1385 		int imm;
   1386 		imm = inst & 0x1fff;		/* mask out immediate field */
   1387 		imm <<= 19;			/* sign extend it */
   1388 		imm >>= 19;
   1389 		addr += imm;			/* compute address */
   1390 	} else {
   1391 		if (getreg(rp, rs2, &val, badaddr))
   1392 			return (SIMU_FAULT);
   1393 		addr += val;
   1394 	}
   1395 
   1396 	/*
   1397 	 * If this is a 32-bit program, chop the address accordingly.  The
   1398 	 * intermediate uintptr_t casts prevent warnings under a certain
   1399 	 * compiler, and the temporary 32 bit storage is intended to force
   1400 	 * proper code generation and break up what would otherwise be a
   1401 	 * quadruple cast.
   1402 	 */
   1403 	if (curproc->p_model == DATAMODEL_ILP32 && USERMODE(rp->r_tstate)) {
   1404 		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
   1405 		addr = (caddr_t)(uintptr_t)addr32;
   1406 	}
   1407 
   1408 	*badaddr = addr;
   1409 	return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS);
   1410 }
   1411 
   1412 /*
   1413  * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64).
   1414  * Also compute the precise address by instruction disassembly.
   1415  * (v9 page faults only provide the page address via the hardware.)
   1416  * Return 0 on failure (not a load or store instruction).
   1417  */
   1418 int
   1419 instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr)
   1420 {
   1421 	uint_t	inst, op3, asi;
   1422 	uint_t	rd, rs1, rs2;
   1423 	int	sz = 0;
   1424 	int	immflg;
   1425 	int	floatflg;
   1426 	caddr_t	addr;
   1427 	caddr_t badaddr;
   1428 	uint64_t val;
   1429 
   1430 	if (rdwr == S_EXEC) {
   1431 		*addrp = (caddr_t)rp->r_pc;
   1432 		return (4);
   1433 	}
   1434 
   1435 	/*
   1436 	 * Fetch the instruction from user-level.
   1437 	 * We would like to assert this:
   1438 	 *   ASSERT(USERMODE(rp->r_tstate));
   1439 	 * but we can't because we can reach this point from a
   1440 	 * register window underflow/overflow and the v9 wbuf
   1441 	 * traps call trap() with T_USER even though r_tstate
   1442 	 * indicates a system trap, not a user trap.
   1443 	 */
   1444 	inst = fetch_user_instr((caddr_t)rp->r_pc);
   1445 
   1446 	op3 = (inst >> 19) & 0x3f;
   1447 	rd = (inst >> 25) & 0x1f;
   1448 	rs1 = (inst >> 14) & 0x1f;
   1449 	rs2 = inst & 0x1f;
   1450 	floatflg = (inst >> 24) & 1;
   1451 	immflg = (inst >> 13) & 1;
   1452 
   1453 	/* if not load or store do nothing.  can't happen? */
   1454 	if ((inst >> 30) != 3)
   1455 		return (0);
   1456 
   1457 	if (immflg)
   1458 		asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
   1459 		    TSTATE_ASI_MASK);
   1460 	else
   1461 		asi = (inst >> 5) & 0xff;
   1462 
   1463 	if (floatflg) {
   1464 		/* check for ld/st alternate and highest defined V9 asi */
   1465 		if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) {
   1466 			sz = extended_asi_size(asi);
   1467 		} else {
   1468 			switch (op3 & 3) {
   1469 			case 0:
   1470 				sz = 4;			/* ldf/stf/cas */
   1471 				break;
   1472 			case 1:
   1473 				if (rd == 0)
   1474 					sz = 4;		/* ldfsr/stfsr */
   1475 				else
   1476 					sz = 8;		/* ldxfsr/stxfsr */
   1477 				break;
   1478 			case 2:
   1479 				if (op3 == 0x3e)
   1480 					sz = 8;		/* casx */
   1481 				else
   1482 					sz = 16;	/* ldqf/stqf */
   1483 				break;
   1484 			case 3:
   1485 				sz = 8;			/* lddf/stdf */
   1486 				break;
   1487 			}
   1488 		}
   1489 	} else {
   1490 		switch (op3 & 0xf) {		/* map size bits to a number */
   1491 		case 0:				/* lduw */
   1492 		case 4:				/* stw */
   1493 		case 8:				/* ldsw */
   1494 		case 0xf:			/* swap */
   1495 			sz = 4; break;
   1496 		case 1:				/* ldub */
   1497 		case 5:				/* stb */
   1498 		case 9:				/* ldsb */
   1499 		case 0xd:			/* ldstub */
   1500 			sz = 1; break;
   1501 		case 2:				/* lduh */
   1502 		case 6:				/* sth */
   1503 		case 0xa:			/* ldsh */
   1504 			sz = 2; break;
   1505 		case 3:				/* ldd */
   1506 		case 7:				/* std */
   1507 		case 0xb:			/* ldx */
   1508 		case 0xe:			/* stx */
   1509 			sz = 8; break;
   1510 		}
   1511 	}
   1512 
   1513 	if (sz == 0)	/* can't happen? */
   1514 		return (0);
   1515 	(void) flush_user_windows_to_stack(NULL);
   1516 
   1517 	if (getreg(rp, rs1, &val, &badaddr))
   1518 		return (0);
   1519 	addr = (caddr_t)val;
   1520 
   1521 	/* cas/casx don't use rs2 / simm13 to compute the address */
   1522 	if ((op3 & 0x3d) != 0x3c) {
   1523 		/* check immediate bit and use immediate field or reg (rs2) */
   1524 		if (immflg) {
   1525 			int imm;
   1526 			imm  = inst & 0x1fff;	/* mask out immediate field */
   1527 			imm <<= 19;		/* sign extend it */
   1528 			imm >>= 19;
   1529 			addr += imm;		/* compute address */
   1530 		} else {
   1531 			/*
   1532 			 * asi's in the 0xCx range are partial store
   1533 			 * instructions.  For these, rs2 is a mask, not part of
   1534 			 * the address.
   1535 			 */
   1536 			if (!(floatflg && (asi & 0xf0) == 0xc0)) {
   1537 				if (getreg(rp, rs2, &val, &badaddr))
   1538 					return (0);
   1539 				addr += val;
   1540 			}
   1541 		}
   1542 	}
   1543 
   1544 	/*
   1545 	 * If this is a 32-bit program, chop the address accordingly.  The
   1546 	 * intermediate uintptr_t casts prevent warnings under a certain
   1547 	 * compiler, and the temporary 32 bit storage is intended to force
   1548 	 * proper code generation and break up what would otherwise be a
   1549 	 * quadruple cast.
   1550 	 */
   1551 	if (curproc->p_model == DATAMODEL_ILP32) {
   1552 		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
   1553 		addr = (caddr_t)(uintptr_t)addr32;
   1554 	}
   1555 
   1556 	*addrp = addr;
   1557 	ASSERT(sz != 0);
   1558 	return (sz);
   1559 }
   1560 
   1561 /*
   1562  * Fetch an instruction from user-level.
   1563  * Deal with watchpoints, if they are in effect.
   1564  */
   1565 int32_t
   1566 fetch_user_instr(caddr_t vaddr)
   1567 {
   1568 	proc_t *p = curproc;
   1569 	int32_t instr;
   1570 
   1571 	/*
   1572 	 * If this is a 32-bit program, chop the address accordingly.  The
   1573 	 * intermediate uintptr_t casts prevent warnings under a certain
   1574 	 * compiler, and the temporary 32 bit storage is intended to force
   1575 	 * proper code generation and break up what would otherwise be a
   1576 	 * quadruple cast.
   1577 	 */
   1578 	if (p->p_model == DATAMODEL_ILP32) {
   1579 		caddr32_t vaddr32 = (caddr32_t)(uintptr_t)vaddr;
   1580 		vaddr = (caddr_t)(uintptr_t)vaddr32;
   1581 	}
   1582 
   1583 	if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1)
   1584 		instr = -1;
   1585 
   1586 	return (instr);
   1587 }
   1588