Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /* common code with bug fixes from original version in trap.c */
     27 
     28 #include <sys/param.h>
     29 #include <sys/types.h>
     30 #include <sys/systm.h>
     31 #include <sys/archsystm.h>
     32 #include <sys/vmsystm.h>
     33 #include <sys/fpu/fpusystm.h>
     34 #include <sys/fpu/fpu_simulator.h>
     35 #include <sys/inline.h>
     36 #include <sys/debug.h>
     37 #include <sys/privregs.h>
     38 #include <sys/machpcb.h>
     39 #include <sys/simulate.h>
     40 #include <sys/proc.h>
     41 #include <sys/cmn_err.h>
     42 #include <sys/stack.h>
     43 #include <sys/watchpoint.h>
     44 #include <sys/trap.h>
     45 #include <sys/machtrap.h>
     46 #include <sys/mman.h>
     47 #include <sys/asi.h>
     48 #include <sys/copyops.h>
     49 #include <vm/as.h>
     50 #include <vm/page.h>
     51 #include <sys/model.h>
     52 #include <vm/seg_vn.h>
     53 #include <sys/byteorder.h>
     54 
     55 #define	IS_IBIT_SET(x)	(x & 0x2000)
     56 #define	IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
     57 #define	IS_FLOAT_QUAD_OP(op, op3)(op == 2 && (op3 == 0x34 ||	\
     58 		op3 == 0x35))
     59 #define	IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi)		\
     60 		(op == 3 && (op3 == IOP_V8_LDDFA ||		\
     61 		op3 == IOP_V8_STDFA) &&	asi > ASI_SNFL)
     62 
     63 static int aligndebug = 0;
     64 
     65 /*
     66  * For the sake of those who must be compatible with unaligned
     67  * architectures, users can link their programs to use a
     68  * corrective trap handler that will fix unaligned references
     69  * a special trap #6 (T_FIX_ALIGN) enables this 'feature'.
     70  * Returns 1 for success, 0 for failure.
     71  */
     72 
     73 int
     74 do_unaligned(struct regs *rp, caddr_t *badaddr)
     75 {
     76 	uint_t	inst, op3, asi = 0;
     77 	uint_t	rd, rs1, rs2;
     78 	int	sz, nf = 0, ltlend = 0;
     79 	int	floatflg;
     80 	int	fsrflg;
     81 	int	immflg;
     82 	int	lddstdflg;
     83 	caddr_t	addr;
     84 	uint64_t val;
     85 	union {
     86 		uint64_t	l[2];
     87 		uint32_t	i[4];
     88 		uint16_t	s[8];
     89 		uint8_t		c[16];
     90 	} data;
     91 
     92 	ASSERT(USERMODE(rp->r_tstate));
     93 	inst = fetch_user_instr((caddr_t)rp->r_pc);
     94 
     95 	op3 = (inst >> 19) & 0x3f;
     96 	rd = (inst >> 25) & 0x1f;
     97 	rs1 = (inst >> 14) & 0x1f;
     98 	rs2 = inst & 0x1f;
     99 	floatflg = (inst >> 24) & 1;
    100 	immflg = (inst >> 13) & 1;
    101 	lddstdflg = fsrflg = 0;
    102 
    103 	/* if not load or store do nothing */
    104 	if ((inst >> 30) != 3)
    105 		return (0);
    106 
    107 	/* if ldstub or swap, do nothing */
    108 	if ((inst & 0xc1680000) == 0xc0680000)
    109 		return (0);
    110 
    111 	/* if cas/casx, do nothing */
    112 	if ((inst & 0xc1e00000) == 0xc1e00000)
    113 		return (0);
    114 
    115 	if (floatflg) {
    116 		switch ((inst >> 19) & 3) {	/* map size bits to a number */
    117 		case 0: sz = 4;
    118 			break;			/* ldf{a}/stf{a} */
    119 		case 1: fsrflg = 1;
    120 			if (rd == 0)
    121 				sz = 4;		/* ldfsr/stfsr */
    122 			else  if (rd == 1)
    123 				sz = 8;		/* ldxfsr/stxfsr */
    124 			else
    125 				return (SIMU_ILLEGAL);
    126 			break;
    127 		case 2: sz = 16;
    128 			break;		/* ldqf{a}/stqf{a} */
    129 		case 3: sz = 8;
    130 			break;		/* lddf{a}/stdf{a} */
    131 		}
    132 		/*
    133 		 * Fix to access extra double register encoding plus
    134 		 * compensate to access the correct fpu_dreg.
    135 		 */
    136 		if ((sz > 4) && (fsrflg == 0)) {
    137 			if ((rd & 1) == 1)
    138 				rd = (rd & 0x1e) | 0x20;
    139 			rd = rd >> 1;
    140 			if ((sz == 16) && ((rd & 0x1) != 0))
    141 				return (SIMU_ILLEGAL);
    142 		}
    143 	} else {
    144 		int sz_bits = (inst >> 19) & 0xf;
    145 		switch (sz_bits) {		/* map size bits to a number */
    146 		case 0:				/* lduw{a} */
    147 		case 4:				/* stw{a} */
    148 		case 8:				/* ldsw{a} */
    149 		case 0xf:			/* swap */
    150 			sz = 4; break;
    151 		case 1:				/* ldub{a} */
    152 		case 5:				/* stb{a} */
    153 		case 9:				/* ldsb{a} */
    154 		case 0xd:			/* ldstub */
    155 			sz = 1; break;
    156 		case 2:				/* lduh{a} */
    157 		case 6:				/* sth{a} */
    158 		case 0xa:			/* ldsh{a} */
    159 			sz = 2; break;
    160 		case 3:				/* ldd{a} */
    161 		case 7:				/* std{a} */
    162 			lddstdflg = 1;
    163 			sz = 8; break;
    164 		case 0xb:			/* ldx{a} */
    165 		case 0xe:			/* stx{a} */
    166 			sz = 8; break;
    167 		}
    168 	}
    169 
    170 
    171 	/* only support primary and secondary asi's */
    172 	if ((op3 >> 4) & 1) {
    173 		if (immflg) {
    174 			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
    175 			    TSTATE_ASI_MASK;
    176 		} else {
    177 			asi = (inst >> 5) & 0xff;
    178 		}
    179 		switch (asi) {
    180 		case ASI_P:
    181 		case ASI_S:
    182 			break;
    183 		case ASI_PNF:
    184 		case ASI_SNF:
    185 			nf = 1;
    186 			break;
    187 		case ASI_PL:
    188 		case ASI_SL:
    189 			ltlend = 1;
    190 			break;
    191 		case ASI_PNFL:
    192 		case ASI_SNFL:
    193 			ltlend = 1;
    194 			nf = 1;
    195 			break;
    196 		default:
    197 			return (0);
    198 		}
    199 		/*
    200 		 * Non-faulting stores generate a data_access_exception trap,
    201 		 * according to the Spitfire manual, which should be signaled
    202 		 * as an illegal instruction trap, because it can't be fixed.
    203 		 */
    204 		if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA)))
    205 			return (SIMU_ILLEGAL);
    206 	}
    207 
    208 	if (aligndebug) {
    209 		printf("unaligned access at %p, instruction: 0x%x\n",
    210 		    (void *)rp->r_pc, inst);
    211 		printf("type %s", (((inst >> 21) & 1) ? "st" : "ld"));
    212 		if (((inst >> 21) & 1) == 0)
    213 			printf(" %s", (((inst >> 22) & 1) ?
    214 			    "signed" : "unsigned"));
    215 		printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg);
    216 		printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n",
    217 		    rd, op3, rs1, rs2, (inst & 0x1fff));
    218 	}
    219 
    220 	(void) flush_user_windows_to_stack(NULL);
    221 	if (getreg(rp, rs1, &val, badaddr))
    222 		return (SIMU_FAULT);
    223 	addr = (caddr_t)val;		/* convert to 32/64 bit address */
    224 	if (aligndebug)
    225 		printf("addr 1 = %p\n", (void *)addr);
    226 
    227 	/* check immediate bit and use immediate field or reg (rs2) */
    228 	if (immflg) {
    229 		int imm;
    230 		imm  = inst & 0x1fff;		/* mask out immediate field */
    231 		imm <<= 19;			/* sign extend it */
    232 		imm >>= 19;
    233 		addr += imm;			/* compute address */
    234 	} else {
    235 		if (getreg(rp, rs2, &val, badaddr))
    236 			return (SIMU_FAULT);
    237 		addr += val;
    238 	}
    239 
    240 	/*
    241 	 * If this is a 32-bit program, chop the address accordingly.  The
    242 	 * intermediate uintptr_t casts prevent warnings under a certain
    243 	 * compiler, and the temporary 32 bit storage is intended to force
    244 	 * proper code generation and break up what would otherwise be a
    245 	 * quadruple cast.
    246 	 */
    247 	if (curproc->p_model == DATAMODEL_ILP32) {
    248 		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
    249 		addr = (caddr_t)(uintptr_t)addr32;
    250 	}
    251 
    252 	if (aligndebug)
    253 		printf("addr 2 = %p\n", (void *)addr);
    254 
    255 	if (addr >= curproc->p_as->a_userlimit) {
    256 		*badaddr = addr;
    257 		goto badret;
    258 	}
    259 
    260 	/* a single bit differentiates ld and st */
    261 	if ((inst >> 21) & 1) {			/* store */
    262 		if (floatflg) {
    263 			klwp_id_t lwp = ttolwp(curthread);
    264 			kfpu_t *fp = lwptofpu(lwp);
    265 			/* Ensure fp has been enabled */
    266 			if (fpu_exists) {
    267 				if (!(_fp_read_fprs() & FPRS_FEF))
    268 					fp_enable();
    269 			} else {
    270 				if (!fp->fpu_en)
    271 					fp_enable();
    272 			}
    273 			/* if fpu_exists read fpu reg */
    274 			if (fpu_exists) {
    275 				if (fsrflg) {
    276 					_fp_read_pfsr(&data.l[0]);
    277 				} else {
    278 					if (sz == 4) {
    279 						data.i[0] = 0;
    280 						_fp_read_pfreg(
    281 						    (unsigned *)&data.i[1], rd);
    282 					}
    283 					if (sz >= 8)
    284 						_fp_read_pdreg(
    285 						    &data.l[0], rd);
    286 					if (sz == 16)
    287 						_fp_read_pdreg(
    288 						    &data.l[1], rd+1);
    289 				}
    290 			} else {
    291 				if (fsrflg) {
    292 					/* Clear reserved bits, set version=7 */
    293 					fp->fpu_fsr &= ~0x30301000;
    294 					fp->fpu_fsr |= 0xE0000;
    295 					data.l[0] = fp->fpu_fsr;
    296 				} else {
    297 					if (sz == 4) {
    298 						data.i[0] = 0;
    299 						data.i[1] =
    300 						    (unsigned)fp->
    301 						    fpu_fr.fpu_regs[rd];
    302 					}
    303 					if (sz >= 8)
    304 						data.l[0] =
    305 						    fp->fpu_fr.fpu_dregs[rd];
    306 					if (sz == 16)
    307 						data.l[1] =
    308 						    fp->fpu_fr.fpu_dregs[rd+1];
    309 				}
    310 			}
    311 		} else {
    312 			if (lddstdflg) {		/* combine the data */
    313 				if (getreg(rp, rd, &data.l[0], badaddr))
    314 					return (SIMU_FAULT);
    315 				if (getreg(rp, rd+1, &data.l[1], badaddr))
    316 					return (SIMU_FAULT);
    317 				if (ltlend) {
    318 					/*
    319 					 * For STD, each 32-bit word is byte-
    320 					 * swapped individually.  For
    321 					 * simplicity we don't want to do that
    322 					 * below, so we swap the words now to
    323 					 * get the desired result in the end.
    324 					 */
    325 					data.i[0] = data.i[3];
    326 				} else {
    327 					data.i[0] = data.i[1];
    328 					data.i[1] = data.i[3];
    329 				}
    330 			} else {
    331 				if (getreg(rp, rd, &data.l[0], badaddr))
    332 					return (SIMU_FAULT);
    333 			}
    334 		}
    335 
    336 		if (aligndebug) {
    337 			if (sz == 16) {
    338 				printf("data %x %x %x %x\n",
    339 				    data.i[0], data.i[1], data.i[2], data.c[3]);
    340 			} else {
    341 				printf("data %x %x %x %x %x %x %x %x\n",
    342 				    data.c[0], data.c[1], data.c[2], data.c[3],
    343 				    data.c[4], data.c[5], data.c[6], data.c[7]);
    344 			}
    345 		}
    346 
    347 		if (ltlend) {
    348 			if (sz == 1) {
    349 				if (xcopyout_little(&data.c[7], addr,
    350 				    (size_t)sz) != 0)
    351 					goto badret;
    352 			} else if (sz == 2) {
    353 				if (xcopyout_little(&data.s[3], addr,
    354 				    (size_t)sz) != 0)
    355 					goto badret;
    356 			} else if (sz == 4) {
    357 				if (xcopyout_little(&data.i[1], addr,
    358 				    (size_t)sz) != 0)
    359 					goto badret;
    360 			} else {
    361 				if (xcopyout_little(&data.l[0], addr,
    362 				    (size_t)sz) != 0)
    363 					goto badret;
    364 			}
    365 		} else {
    366 			if (sz == 1) {
    367 				if (copyout(&data.c[7], addr, (size_t)sz) == -1)
    368 					goto badret;
    369 			} else if (sz == 2) {
    370 				if (copyout(&data.s[3], addr, (size_t)sz) == -1)
    371 					goto badret;
    372 			} else if (sz == 4) {
    373 				if (copyout(&data.i[1], addr, (size_t)sz) == -1)
    374 					goto badret;
    375 			} else {
    376 				if (copyout(&data.l[0], addr, (size_t)sz) == -1)
    377 					goto badret;
    378 			}
    379 		}
    380 	} else {				/* load */
    381 		if (sz == 1) {
    382 			if (ltlend) {
    383 				if (xcopyin_little(addr, &data.c[7],
    384 				    (size_t)sz) != 0) {
    385 					if (nf)
    386 						data.c[7] = 0;
    387 					else
    388 						goto badret;
    389 				}
    390 			} else {
    391 				if (copyin(addr, &data.c[7],
    392 				    (size_t)sz) == -1) {
    393 					if (nf)
    394 						data.c[7] = 0;
    395 					else
    396 						goto badret;
    397 				}
    398 			}
    399 			/* if signed and the sign bit is set extend it */
    400 			if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) {
    401 				data.i[0] = (uint_t)-1;	/* extend sign bit */
    402 				data.s[2] = (ushort_t)-1;
    403 				data.c[6] = (uchar_t)-1;
    404 			} else {
    405 				data.i[0] = 0;	/* clear upper 32+24 bits */
    406 				data.s[2] = 0;
    407 				data.c[6] = 0;
    408 			}
    409 		} else if (sz == 2) {
    410 			if (ltlend) {
    411 				if (xcopyin_little(addr, &data.s[3],
    412 				    (size_t)sz) != 0) {
    413 					if (nf)
    414 						data.s[3] = 0;
    415 					else
    416 						goto badret;
    417 				}
    418 			} else {
    419 				if (copyin(addr, &data.s[3],
    420 				    (size_t)sz) == -1) {
    421 					if (nf)
    422 						data.s[3] = 0;
    423 					else
    424 						goto badret;
    425 				}
    426 			}
    427 			/* if signed and the sign bit is set extend it */
    428 			if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) {
    429 				data.i[0] = (uint_t)-1;	/* extend sign bit */
    430 				data.s[2] = (ushort_t)-1;
    431 			} else {
    432 				data.i[0] = 0;	/* clear upper 32+16 bits */
    433 				data.s[2] = 0;
    434 			}
    435 		} else if (sz == 4) {
    436 			if (ltlend) {
    437 				if (xcopyin_little(addr, &data.i[1],
    438 				    (size_t)sz) != 0) {
    439 					if (!nf)
    440 						goto badret;
    441 					data.i[1] = 0;
    442 				}
    443 			} else {
    444 				if (copyin(addr, &data.i[1],
    445 				    (size_t)sz) == -1) {
    446 					if (!nf)
    447 						goto badret;
    448 					data.i[1] = 0;
    449 				}
    450 			}
    451 			/* if signed and the sign bit is set extend it */
    452 			if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) {
    453 				data.i[0] = (uint_t)-1;	/* extend sign bit */
    454 			} else {
    455 				data.i[0] = 0;	/* clear upper 32 bits */
    456 			}
    457 		} else {
    458 			if (ltlend) {
    459 				if (xcopyin_little(addr, &data.l[0],
    460 				    (size_t)sz) != 0) {
    461 					if (!nf)
    462 						goto badret;
    463 					data.l[0] = 0;
    464 				}
    465 			} else {
    466 				if (copyin(addr, &data.l[0],
    467 				    (size_t)sz) == -1) {
    468 					if (!nf)
    469 						goto badret;
    470 					data.l[0] = 0;
    471 				}
    472 			}
    473 		}
    474 
    475 		if (aligndebug) {
    476 			if (sz == 16) {
    477 				printf("data %x %x %x %x\n",
    478 				    data.i[0], data.i[1], data.i[2], data.c[3]);
    479 			} else {
    480 				printf("data %x %x %x %x %x %x %x %x\n",
    481 				    data.c[0], data.c[1], data.c[2], data.c[3],
    482 				    data.c[4], data.c[5], data.c[6], data.c[7]);
    483 			}
    484 		}
    485 
    486 		if (floatflg) {		/* if fpu_exists write fpu reg */
    487 			klwp_id_t lwp = ttolwp(curthread);
    488 			kfpu_t *fp = lwptofpu(lwp);
    489 			/* Ensure fp has been enabled */
    490 			if (fpu_exists) {
    491 				if (!(_fp_read_fprs() & FPRS_FEF))
    492 					fp_enable();
    493 			} else {
    494 				if (!fp->fpu_en)
    495 					fp_enable();
    496 			}
    497 			/* if fpu_exists read fpu reg */
    498 			if (fpu_exists) {
    499 				if (fsrflg) {
    500 					_fp_write_pfsr(&data.l[0]);
    501 				} else {
    502 					if (sz == 4)
    503 						_fp_write_pfreg(
    504 						    (unsigned *)&data.i[1], rd);
    505 					if (sz >= 8)
    506 						_fp_write_pdreg(
    507 						    &data.l[0], rd);
    508 					if (sz == 16)
    509 						_fp_write_pdreg(
    510 						    &data.l[1], rd+1);
    511 				}
    512 			} else {
    513 				if (fsrflg) {
    514 					fp->fpu_fsr = data.l[0];
    515 				} else {
    516 					if (sz == 4)
    517 						fp->fpu_fr.fpu_regs[rd] =
    518 						    (unsigned)data.i[1];
    519 					if (sz >= 8)
    520 						fp->fpu_fr.fpu_dregs[rd] =
    521 						    data.l[0];
    522 					if (sz == 16)
    523 						fp->fpu_fr.fpu_dregs[rd+1] =
    524 						    data.l[1];
    525 				}
    526 			}
    527 		} else {
    528 			if (lddstdflg) {		/* split the data */
    529 				if (ltlend) {
    530 					/*
    531 					 * For LDD, each 32-bit word is byte-
    532 					 * swapped individually.  We didn't
    533 					 * do that above, but this will give
    534 					 * us the desired result.
    535 					 */
    536 					data.i[3] = data.i[0];
    537 				} else {
    538 					data.i[3] = data.i[1];
    539 					data.i[1] = data.i[0];
    540 				}
    541 				data.i[0] = 0;
    542 				data.i[2] = 0;
    543 				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
    544 					goto badret;
    545 				if (putreg(&data.l[1], rp, rd+1, badaddr) == -1)
    546 					goto badret;
    547 			} else {
    548 				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
    549 					goto badret;
    550 			}
    551 		}
    552 	}
    553 	return (SIMU_SUCCESS);
    554 badret:
    555 	return (SIMU_FAULT);
    556 }
    557 
    558 
    559 int
    560 simulate_lddstd(struct regs *rp, caddr_t *badaddr)
    561 {
    562 	uint_t	inst, op3, asi = 0;
    563 	uint_t	rd, rs1, rs2;
    564 	int	nf = 0, ltlend = 0, usermode;
    565 	int	immflg;
    566 	uint64_t reven;
    567 	uint64_t rodd;
    568 	caddr_t	addr;
    569 	uint64_t val;
    570 	uint64_t data;
    571 
    572 	usermode = USERMODE(rp->r_tstate);
    573 
    574 	if (usermode)
    575 		inst = fetch_user_instr((caddr_t)rp->r_pc);
    576 	else
    577 		inst = *(uint_t *)rp->r_pc;
    578 
    579 	op3 = (inst >> 19) & 0x3f;
    580 	rd = (inst >> 25) & 0x1f;
    581 	rs1 = (inst >> 14) & 0x1f;
    582 	rs2 = inst & 0x1f;
    583 	immflg = (inst >> 13) & 1;
    584 
    585 	if (USERMODE(rp->r_tstate))
    586 		(void) flush_user_windows_to_stack(NULL);
    587 	else
    588 		flush_windows();
    589 
    590 	if ((op3 >> 4) & 1) {		/* is this LDDA/STDA? */
    591 		if (immflg) {
    592 			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
    593 			    TSTATE_ASI_MASK;
    594 		} else {
    595 			asi = (inst >> 5) & 0xff;
    596 		}
    597 		switch (asi) {
    598 		case ASI_P:
    599 		case ASI_S:
    600 			break;
    601 		case ASI_PNF:
    602 		case ASI_SNF:
    603 			nf = 1;
    604 			break;
    605 		case ASI_PL:
    606 		case ASI_SL:
    607 			ltlend = 1;
    608 			break;
    609 		case ASI_PNFL:
    610 		case ASI_SNFL:
    611 			ltlend = 1;
    612 			nf = 1;
    613 			break;
    614 		case ASI_AIUP:
    615 		case ASI_AIUS:
    616 			usermode = 1;
    617 			break;
    618 		case ASI_AIUPL:
    619 		case ASI_AIUSL:
    620 			usermode = 1;
    621 			ltlend = 1;
    622 			break;
    623 		default:
    624 			return (SIMU_ILLEGAL);
    625 		}
    626 	}
    627 
    628 	if (getreg(rp, rs1, &val, badaddr))
    629 		return (SIMU_FAULT);
    630 	addr = (caddr_t)val;		/* convert to 32/64 bit address */
    631 
    632 	/* check immediate bit and use immediate field or reg (rs2) */
    633 	if (immflg) {
    634 		int imm;
    635 		imm  = inst & 0x1fff;		/* mask out immediate field */
    636 		imm <<= 19;			/* sign extend it */
    637 		imm >>= 19;
    638 		addr += imm;			/* compute address */
    639 	} else {
    640 		if (getreg(rp, rs2, &val, badaddr))
    641 			return (SIMU_FAULT);
    642 		addr += val;
    643 	}
    644 
    645 	/*
    646 	 * T_UNIMP_LDD and T_UNIMP_STD are higher priority than
    647 	 * T_ALIGNMENT.  So we have to make sure that the address is
    648 	 * kosher before trying to use it, because the hardware hasn't
    649 	 * checked it for us yet.
    650 	 */
    651 	if (((uintptr_t)addr & 0x7) != 0) {
    652 		if (curproc->p_fixalignment)
    653 			return (do_unaligned(rp, badaddr));
    654 		else
    655 			return (SIMU_UNALIGN);
    656 	}
    657 
    658 	/*
    659 	 * If this is a 32-bit program, chop the address accordingly.  The
    660 	 * intermediate uintptr_t casts prevent warnings under a certain
    661 	 * compiler, and the temporary 32 bit storage is intended to force
    662 	 * proper code generation and break up what would otherwise be a
    663 	 * quadruple cast.
    664 	 */
    665 	if (curproc->p_model == DATAMODEL_ILP32 && usermode) {
    666 		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
    667 		addr = (caddr_t)(uintptr_t)addr32;
    668 	}
    669 
    670 	if ((inst >> 21) & 1) {			/* store */
    671 		if (getreg(rp, rd, &reven, badaddr))
    672 			return (SIMU_FAULT);
    673 		if (getreg(rp, rd+1, &rodd, badaddr))
    674 			return (SIMU_FAULT);
    675 		if (ltlend) {
    676 			reven = BSWAP_32(reven);
    677 			rodd  = BSWAP_32(rodd);
    678 		}
    679 		data = (reven << 32) | rodd;
    680 		if (usermode) {
    681 			if (suword64_nowatch(addr, data) == -1)
    682 				return (SIMU_FAULT);
    683 		} else {
    684 			*(uint64_t *)addr = data;
    685 		}
    686 	} else {				/* load */
    687 		if (usermode) {
    688 			if (fuword64_nowatch(addr, &data)) {
    689 				if (nf)
    690 					data = 0;
    691 				else
    692 					return (SIMU_FAULT);
    693 			}
    694 		} else
    695 			data = *(uint64_t *)addr;
    696 
    697 		reven = (data >> 32);
    698 		rodd  = (uint64_t)(uint32_t)data;
    699 		if (ltlend) {
    700 			reven = BSWAP_32(reven);
    701 			rodd  = BSWAP_32(rodd);
    702 		}
    703 
    704 		if (putreg(&reven, rp, rd, badaddr) == -1)
    705 			return (SIMU_FAULT);
    706 		if (putreg(&rodd, rp, rd+1, badaddr) == -1)
    707 			return (SIMU_FAULT);
    708 	}
    709 	return (SIMU_SUCCESS);
    710 }
    711 
    712 
    713 /*
    714  * simulate popc
    715  */
    716 static int
    717 simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst)
    718 {
    719 	uint_t	rd, rs2, rs1;
    720 	uint_t	immflg;
    721 	uint64_t val, cnt = 0;
    722 
    723 	rd = (inst >> 25) & 0x1f;
    724 	rs1 = (inst >> 14) & 0x1f;
    725 	rs2 = inst & 0x1f;
    726 	immflg = (inst >> 13) & 1;
    727 
    728 	if (rs1 > 0)
    729 		return (SIMU_ILLEGAL);
    730 
    731 	(void) flush_user_windows_to_stack(NULL);
    732 
    733 	/* check immediate bit and use immediate field or reg (rs2) */
    734 	if (immflg) {
    735 		int64_t imm;
    736 		imm  = inst & 0x1fff;		/* mask out immediate field */
    737 		imm <<= 51;			/* sign extend it */
    738 		imm >>= 51;
    739 		if (imm != 0) {
    740 			for (cnt = 0; imm != 0; imm &= imm-1)
    741 				cnt++;
    742 		}
    743 	} else {
    744 		if (getreg(rp, rs2, &val, badaddr))
    745 			return (SIMU_FAULT);
    746 		if (val != 0) {
    747 			for (cnt = 0; val != 0; val &= val-1)
    748 				cnt++;
    749 		}
    750 	}
    751 
    752 	if (putreg(&cnt, rp, rd, badaddr) == -1)
    753 		return (SIMU_FAULT);
    754 
    755 	return (SIMU_SUCCESS);
    756 }
    757 
    758 /*
    759  * simulate mulscc
    760  */
    761 static int
    762 simulate_mulscc(struct regs *rp, caddr_t *badaddr, uint_t inst)
    763 {
    764 	uint32_t	s1, s2;
    765 	uint32_t	c, d, v;
    766 	uint_t		rd, rs1;
    767 	int64_t		d64;
    768 	uint64_t	ud64;
    769 	uint64_t	drs1;
    770 
    771 	(void) flush_user_windows_to_stack(NULL);
    772 
    773 	if ((inst >> 13) & 1) {		/* immediate */
    774 		d64 = inst & 0x1fff;
    775 		d64 <<= 51;		/* sign extend it */
    776 		d64 >>= 51;
    777 	} else {
    778 		uint_t		rs2;
    779 		uint64_t	drs2;
    780 
    781 		if (inst & 0x1fe0) {
    782 			return (SIMU_ILLEGAL);
    783 		}
    784 		rs2 = inst & 0x1f;
    785 		if (getreg(rp, rs2, &drs2, badaddr)) {
    786 			return (SIMU_FAULT);
    787 		}
    788 		d64 = (int64_t)drs2;
    789 	}
    790 
    791 	rs1 = (inst >> 14) & 0x1f;
    792 	if (getreg(rp, rs1, &drs1, badaddr)) {
    793 		return (SIMU_FAULT);
    794 	}
    795 	/* icc.n xor icc.v */
    796 	s1 = ((rp->r_tstate & TSTATE_IN) >> (TSTATE_CCR_SHIFT + 3)) ^
    797 	    ((rp->r_tstate & TSTATE_IV) >> (TSTATE_CCR_SHIFT + 1));
    798 	s1 = (s1 << 31) | (((uint32_t)drs1) >> 1);
    799 
    800 	if (rp->r_y & 1) {
    801 		s2 = (uint32_t)d64;
    802 	} else {
    803 		s2 = 0;
    804 	}
    805 	d = s1 + s2;
    806 
    807 	ud64 = (uint64_t)d;
    808 
    809 	/* set the icc flags */
    810 	v = (s1 & s2 & ~d) | (~s1 & ~s2 & d);
    811 	c = (s1 & s2) | (~d & (s1 | s2));
    812 	rp->r_tstate &= ~TSTATE_ICC;
    813 	rp->r_tstate |= (uint64_t)((c >> 31) & 1) << (TSTATE_CCR_SHIFT + 0);
    814 	rp->r_tstate |= (uint64_t)((v >> 31) & 1) << (TSTATE_CCR_SHIFT + 1);
    815 	rp->r_tstate |= (uint64_t)(d ? 0 : 1) << (TSTATE_CCR_SHIFT + 2);
    816 	rp->r_tstate |= (uint64_t)((d >> 31) & 1) << (TSTATE_CCR_SHIFT + 3);
    817 
    818 	if (rp->r_tstate & TSTATE_IC) {
    819 		ud64 |= (1ULL << 32);
    820 	}
    821 
    822 	/* set the xcc flags */
    823 	rp->r_tstate &= ~TSTATE_XCC;
    824 	if (ud64 == 0) {
    825 		rp->r_tstate |= TSTATE_XZ;
    826 	}
    827 
    828 	rd = (inst >> 25) & 0x1f;
    829 	if (putreg(&ud64, rp, rd, badaddr)) {
    830 		return (SIMU_FAULT);
    831 	}
    832 
    833 	d64 = (drs1 << 32) | (uint32_t)rp->r_y;
    834 	d64 >>= 1;
    835 	rp->r_y = (uint32_t)d64;
    836 
    837 	return (SIMU_SUCCESS);
    838 }
    839 
    840 /*
    841  * simulate unimplemented instructions (popc, ldqf{a}, stqf{a})
    842  */
    843 int
    844 simulate_unimp(struct regs *rp, caddr_t *badaddr)
    845 {
    846 	uint_t	inst, optype, op3, asi;
    847 	uint_t	rs1, rd;
    848 	uint_t	ignor, i;
    849 	machpcb_t *mpcb = lwptompcb(ttolwp(curthread));
    850 	int	nomatch = 0;
    851 	caddr_t	addr = (caddr_t)rp->r_pc;
    852 	struct as *as;
    853 	caddr_t	ka;
    854 	pfn_t	pfnum;
    855 	page_t *pp;
    856 	proc_t *p = ttoproc(curthread);
    857 	struct seg *mapseg;
    858 	struct segvn_data *svd;
    859 
    860 	ASSERT(USERMODE(rp->r_tstate));
    861 	inst = fetch_user_instr(addr);
    862 	if (inst == (uint_t)-1) {
    863 		mpcb->mpcb_illexcaddr = addr;
    864 		mpcb->mpcb_illexcinsn = (uint32_t)-1;
    865 		return (SIMU_ILLEGAL);
    866 	}
    867 
    868 	/*
    869 	 * When fixing dirty v8 instructions there's a race if two processors
    870 	 * are executing the dirty executable at the same time.  If one
    871 	 * cleans the instruction as the other is executing it the second
    872 	 * processor will see a clean instruction when it comes through this
    873 	 * code and will return SIMU_ILLEGAL.  To work around the race
    874 	 * this code will keep track of the last illegal instruction seen
    875 	 * by each lwp and will only take action if the illegal instruction
    876 	 * is repeatable.
    877 	 */
    878 	if (addr != mpcb->mpcb_illexcaddr ||
    879 	    inst != mpcb->mpcb_illexcinsn)
    880 		nomatch = 1;
    881 	mpcb->mpcb_illexcaddr = addr;
    882 	mpcb->mpcb_illexcinsn = inst;
    883 
    884 	/* instruction fields */
    885 	i = (inst >> 13) & 0x1;
    886 	rd = (inst >> 25) & 0x1f;
    887 	optype = (inst >> 30) & 0x3;
    888 	op3 = (inst >> 19) & 0x3f;
    889 	ignor = (inst >> 5) & 0xff;
    890 	if (IS_IBIT_SET(inst)) {
    891 		asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
    892 		    TSTATE_ASI_MASK);
    893 	} else {
    894 		asi = ignor;
    895 	}
    896 
    897 	if (IS_VIS1(optype, op3) ||
    898 	    IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi) ||
    899 	    IS_FLOAT_QUAD_OP(optype, op3)) {
    900 		klwp_t *lwp = ttolwp(curthread);
    901 		kfpu_t *fp = lwptofpu(lwp);
    902 		if (fpu_exists) {
    903 			if (!(_fp_read_fprs() & FPRS_FEF))
    904 				fp_enable();
    905 			_fp_read_pfsr(&fp->fpu_fsr);
    906 		} else {
    907 			if (!fp->fpu_en)
    908 				fp_enable();
    909 		}
    910 		fp_precise(rp);
    911 		return (SIMU_RETRY);
    912 	}
    913 
    914 	if (optype == 2 && op3 == IOP_V8_POPC) {
    915 		return (simulate_popc(rp, badaddr, inst));
    916 	} else if (optype == 3 && op3 == IOP_V8_POPC) {
    917 		return (SIMU_ILLEGAL);
    918 	} else if (optype == OP_V8_ARITH && op3 == IOP_V8_MULScc) {
    919 		return (simulate_mulscc(rp, badaddr, inst));
    920 	}
    921 
    922 	if (optype == OP_V8_LDSTR) {
    923 		if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA ||
    924 		    op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA)
    925 			return (do_unaligned(rp, badaddr));
    926 	}
    927 
    928 	/* This is a new instruction so illexccnt should also be set. */
    929 	if (nomatch) {
    930 		mpcb->mpcb_illexccnt = 0;
    931 		return (SIMU_RETRY);
    932 	}
    933 
    934 	/*
    935 	 * In order to keep us from entering into an infinite loop while
    936 	 * attempting to clean up faulty instructions, we will return
    937 	 * SIMU_ILLEGAL once we've cleaned up the instruction as much
    938 	 * as we can, and still end up here.
    939 	 */
    940 	if (mpcb->mpcb_illexccnt >= 3)
    941 		return (SIMU_ILLEGAL);
    942 
    943 	mpcb->mpcb_illexccnt += 1;
    944 
    945 	/*
    946 	 * The rest of the code handles v8 binaries with instructions
    947 	 * that have dirty (non-zero) bits in reserved or 'ignored'
    948 	 * fields; these will cause core dumps on v9 machines.
    949 	 *
    950 	 * We only clean dirty instructions in 32-bit programs (ie, v8)
    951 	 * running on SPARCv9 processors.  True v9 programs are forced
    952 	 * to use the instruction set as intended.
    953 	 */
    954 	if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32)
    955 		return (SIMU_ILLEGAL);
    956 	switch (optype) {
    957 	case OP_V8_BRANCH:
    958 	case OP_V8_CALL:
    959 		return (SIMU_ILLEGAL);	/* these don't have ignored fields */
    960 		/*NOTREACHED*/
    961 	case OP_V8_ARITH:
    962 		switch (op3) {
    963 		case IOP_V8_RETT:
    964 			if (rd == 0 && !(i == 0 && ignor))
    965 				return (SIMU_ILLEGAL);
    966 			if (rd)
    967 				inst &= ~(0x1f << 25);
    968 			if (i == 0 && ignor)
    969 				inst &= ~(0xff << 5);
    970 			break;
    971 		case IOP_V8_TCC:
    972 			if (i == 0 && ignor != 0) {
    973 				inst &= ~(0xff << 5);
    974 			} else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) {
    975 				inst &= ~(0x3f << 7);
    976 			} else {
    977 				return (SIMU_ILLEGAL);
    978 			}
    979 			break;
    980 		case IOP_V8_JMPL:
    981 		case IOP_V8_RESTORE:
    982 		case IOP_V8_SAVE:
    983 			if ((op3 == IOP_V8_RETT && rd) ||
    984 			    (i == 0 && ignor)) {
    985 				inst &= ~(0xff << 5);
    986 			} else {
    987 				return (SIMU_ILLEGAL);
    988 			}
    989 			break;
    990 		case IOP_V8_FCMP:
    991 			if (rd == 0)
    992 				return (SIMU_ILLEGAL);
    993 			inst &= ~(0x1f << 25);
    994 			break;
    995 		case IOP_V8_RDASR:
    996 			rs1 = ((inst >> 14) & 0x1f);
    997 			if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) {
    998 				/*
    999 				 * The instruction specifies an invalid
   1000 				 * state register - better bail out than
   1001 				 * "fix" it when we're not sure what was
   1002 				 * intended.
   1003 				 */
   1004 				return (SIMU_ILLEGAL);
   1005 			}
   1006 				/*
   1007 				 * Note: this case includes the 'stbar'
   1008 				 * instruction (rs1 == 15 && i == 0).
   1009 				 */
   1010 				if ((ignor = (inst & 0x3fff)) != 0)
   1011 					inst &= ~(0x3fff);
   1012 			break;
   1013 		case IOP_V8_SRA:
   1014 		case IOP_V8_SRL:
   1015 		case IOP_V8_SLL:
   1016 			if (ignor == 0)
   1017 				return (SIMU_ILLEGAL);
   1018 			inst &= ~(0xff << 5);
   1019 			break;
   1020 		case IOP_V8_ADD:
   1021 		case IOP_V8_AND:
   1022 		case IOP_V8_OR:
   1023 		case IOP_V8_XOR:
   1024 		case IOP_V8_SUB:
   1025 		case IOP_V8_ANDN:
   1026 		case IOP_V8_ORN:
   1027 		case IOP_V8_XNOR:
   1028 		case IOP_V8_ADDC:
   1029 		case IOP_V8_UMUL:
   1030 		case IOP_V8_SMUL:
   1031 		case IOP_V8_SUBC:
   1032 		case IOP_V8_UDIV:
   1033 		case IOP_V8_SDIV:
   1034 		case IOP_V8_ADDcc:
   1035 		case IOP_V8_ANDcc:
   1036 		case IOP_V8_ORcc:
   1037 		case IOP_V8_XORcc:
   1038 		case IOP_V8_SUBcc:
   1039 		case IOP_V8_ANDNcc:
   1040 		case IOP_V8_ORNcc:
   1041 		case IOP_V8_XNORcc:
   1042 		case IOP_V8_ADDCcc:
   1043 		case IOP_V8_UMULcc:
   1044 		case IOP_V8_SMULcc:
   1045 		case IOP_V8_SUBCcc:
   1046 		case IOP_V8_UDIVcc:
   1047 		case IOP_V8_SDIVcc:
   1048 		case IOP_V8_TADDcc:
   1049 		case IOP_V8_TSUBcc:
   1050 		case IOP_V8_TADDccTV:
   1051 		case IOP_V8_TSUBccTV:
   1052 		case IOP_V8_MULScc:
   1053 		case IOP_V8_WRASR:
   1054 		case IOP_V8_FLUSH:
   1055 			if (i != 0 || ignor == 0)
   1056 				return (SIMU_ILLEGAL);
   1057 			inst &= ~(0xff << 5);
   1058 			break;
   1059 		default:
   1060 			return (SIMU_ILLEGAL);
   1061 		}
   1062 		break;
   1063 	case OP_V8_LDSTR:
   1064 		switch (op3) {
   1065 		case IOP_V8_STFSR:
   1066 		case IOP_V8_LDFSR:
   1067 			if (rd == 0 && !(i == 0 && ignor))
   1068 				return (SIMU_ILLEGAL);
   1069 			if (rd)
   1070 				inst &= ~(0x1f << 25);
   1071 			if (i == 0 && ignor)
   1072 				inst &= ~(0xff << 5);
   1073 			break;
   1074 		default:
   1075 			if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) &&
   1076 			    i == 0 && ignor)
   1077 				inst &= ~(0xff << 5);
   1078 			else
   1079 				return (SIMU_ILLEGAL);
   1080 			break;
   1081 		}
   1082 		break;
   1083 	default:
   1084 		return (SIMU_ILLEGAL);
   1085 	}
   1086 
   1087 	as = p->p_as;
   1088 
   1089 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   1090 	mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0);
   1091 	ASSERT(mapseg != NULL);
   1092 	svd = (struct segvn_data *)mapseg->s_data;
   1093 
   1094 	/*
   1095 	 * We only create COW page for MAP_PRIVATE mappings.
   1096 	 */
   1097 	SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
   1098 	if ((svd->type & MAP_TYPE) & MAP_SHARED) {
   1099 		SEGVN_LOCK_EXIT(as, &svd->lock);
   1100 		AS_LOCK_EXIT(as, &as->a_lock);
   1101 		return (SIMU_ILLEGAL);
   1102 	}
   1103 	SEGVN_LOCK_EXIT(as, &svd->lock);
   1104 	AS_LOCK_EXIT(as, &as->a_lock);
   1105 
   1106 	/*
   1107 	 * A "flush" instruction using the user PC's vaddr will not work
   1108 	 * here, at least on Spitfire. Instead we create a temporary kernel
   1109 	 * mapping to the user's text page, then modify and flush that.
   1110 	 * Break COW by locking user page.
   1111 	 */
   1112 	if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE,
   1113 	    F_SOFTLOCK, S_READ))
   1114 		return (SIMU_FAULT);
   1115 
   1116 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   1117 	pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc);
   1118 	AS_LOCK_EXIT(as, &as->a_lock);
   1119 	if (pf_is_memory(pfnum)) {
   1120 		pp = page_numtopp_nolock(pfnum);
   1121 		ASSERT(pp == NULL || PAGE_LOCKED(pp));
   1122 	} else {
   1123 		(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
   1124 		    PAGESIZE, F_SOFTUNLOCK, S_READ);
   1125 		return (SIMU_FAULT);
   1126 	}
   1127 
   1128 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   1129 	ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc);
   1130 	*(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst;
   1131 	doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE));
   1132 	ppmapout(ka);
   1133 	AS_LOCK_EXIT(as, &as->a_lock);
   1134 
   1135 	(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
   1136 	    PAGESIZE, F_SOFTUNLOCK, S_READ);
   1137 	return (SIMU_RETRY);
   1138 }
   1139 
   1140 /*
   1141  * Get the value of a register for instruction simulation
   1142  * by using the regs or window structure pointers.
   1143  * Return 0 for success, and -1 for failure.  If there is a failure,
   1144  * save the faulting address using badaddr pointer.
   1145  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
   1146  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
   1147  */
   1148 int
   1149 getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr)
   1150 {
   1151 	uint64_t *rgs, *sp;
   1152 	int rv = 0;
   1153 
   1154 	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
   1155 	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
   1156 	if (reg == 0) {
   1157 		*val = 0;
   1158 	} else if (reg < 16) {
   1159 		*val = rgs[reg];
   1160 	} else if (IS_V9STACK(sp)) {
   1161 		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
   1162 		uint64_t *addr = (uint64_t *)&rw[reg - 16];
   1163 		uint64_t res;
   1164 
   1165 		if (USERMODE(rp->r_tstate)) {
   1166 			if (fuword64_nowatch(addr, &res) == -1) {
   1167 				*badaddr = (caddr_t)addr;
   1168 				rv = -1;
   1169 			}
   1170 		} else {
   1171 			res = *addr;
   1172 		}
   1173 		*val = res;
   1174 	} else {
   1175 		caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
   1176 		uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
   1177 		uint32_t *addr = (uint32_t *)&rw[reg - 16];
   1178 		uint32_t res;
   1179 
   1180 		if (USERMODE(rp->r_tstate)) {
   1181 			if (fuword32_nowatch(addr, &res) == -1) {
   1182 				*badaddr = (caddr_t)addr;
   1183 				rv = -1;
   1184 			}
   1185 		} else {
   1186 			res = *addr;
   1187 		}
   1188 		*val = (uint64_t)res;
   1189 	}
   1190 	return (rv);
   1191 }
   1192 
   1193 /*
   1194  * Set the value of a register after instruction simulation
   1195  * by using the regs or window structure pointers.
   1196  * Return 0 for succes -1 failure.
   1197  * save the faulting address using badaddr pointer.
   1198  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
   1199  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
   1200  */
   1201 int
   1202 putreg(uint64_t	*data, struct regs *rp, uint_t reg, caddr_t *badaddr)
   1203 {
   1204 	uint64_t *rgs, *sp;
   1205 	int rv = 0;
   1206 
   1207 	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
   1208 	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
   1209 	if (reg == 0) {
   1210 		return (0);
   1211 	} else if (reg < 16) {
   1212 		rgs[reg] = *data;
   1213 	} else if (IS_V9STACK(sp)) {
   1214 		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
   1215 		uint64_t *addr = (uint64_t *)&rw[reg - 16];
   1216 		uint64_t res;
   1217 
   1218 		if (USERMODE(rp->r_tstate)) {
   1219 			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
   1220 
   1221 			res = *data;
   1222 			if (suword64_nowatch(addr, res) != 0) {
   1223 				*badaddr = (caddr_t)addr;
   1224 				rv = -1;
   1225 			}
   1226 			/*
   1227 			 * We have changed a local or in register;
   1228 			 * nuke the watchpoint return windows.
   1229 			 */
   1230 			mpcb->mpcb_rsp[0] = NULL;
   1231 			mpcb->mpcb_rsp[1] = NULL;
   1232 		} else {
   1233 			res = *data;
   1234 			*addr = res;
   1235 		}
   1236 	} else {
   1237 		caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
   1238 		uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
   1239 		uint32_t *addr = (uint32_t *)&rw[reg - 16];
   1240 		uint32_t res;
   1241 
   1242 		if (USERMODE(rp->r_tstate)) {
   1243 			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
   1244 
   1245 			res = (uint_t)*data;
   1246 			if (suword32_nowatch(addr, res) != 0) {
   1247 				*badaddr = (caddr_t)addr;
   1248 				rv = -1;
   1249 			}
   1250 			/*
   1251 			 * We have changed a local or in register;
   1252 			 * nuke the watchpoint return windows.
   1253 			 */
   1254 			mpcb->mpcb_rsp[0] = NULL;
   1255 			mpcb->mpcb_rsp[1] = NULL;
   1256 
   1257 		} else {
   1258 			res = (uint_t)*data;
   1259 			*addr = res;
   1260 		}
   1261 	}
   1262 	return (rv);
   1263 }
   1264 
   1265 /*
   1266  * Calculate a memory reference address from instruction
   1267  * operands, used to return the address of a fault, instead
   1268  * of the instruction when an error occurs.  This is code that is
   1269  * common with most of the routines that simulate instructions.
   1270  */
   1271 int
   1272 calc_memaddr(struct regs *rp, caddr_t *badaddr)
   1273 {
   1274 	uint_t	inst;
   1275 	uint_t	rd, rs1, rs2;
   1276 	int	sz;
   1277 	int	immflg;
   1278 	int	floatflg;
   1279 	caddr_t  addr;
   1280 	uint64_t val;
   1281 
   1282 	if (USERMODE(rp->r_tstate))
   1283 		inst = fetch_user_instr((caddr_t)rp->r_pc);
   1284 	else
   1285 		inst = *(uint_t *)rp->r_pc;
   1286 
   1287 	rd = (inst >> 25) & 0x1f;
   1288 	rs1 = (inst >> 14) & 0x1f;
   1289 	rs2 = inst & 0x1f;
   1290 	floatflg = (inst >> 24) & 1;
   1291 	immflg = (inst >> 13) & 1;
   1292 
   1293 	if (floatflg) {
   1294 		switch ((inst >> 19) & 3) {	/* map size bits to a number */
   1295 		case 0: sz = 4; break;		/* ldf/stf */
   1296 		case 1: return (0);		/* ld[x]fsr/st[x]fsr */
   1297 		case 2: sz = 16; break;		/* ldqf/stqf */
   1298 		case 3: sz = 8; break;		/* lddf/stdf */
   1299 		}
   1300 		/*
   1301 		 * Fix to access extra double register encoding plus
   1302 		 * compensate to access the correct fpu_dreg.
   1303 		 */
   1304 		if (sz > 4) {
   1305 			if ((rd & 1) == 1)
   1306 				rd = (rd & 0x1e) | 0x20;
   1307 			rd = rd >> 1;
   1308 		}
   1309 	} else {
   1310 		switch ((inst >> 19) & 0xf) {	/* map size bits to a number */
   1311 		case 0:				/* lduw */
   1312 		case 4:				/* stw */
   1313 		case 8:				/* ldsw */
   1314 		case 0xf:			/* swap */
   1315 			sz = 4; break;
   1316 		case 1:				/* ldub */
   1317 		case 5:				/* stb */
   1318 		case 9:				/* ldsb */
   1319 		case 0xd:			/* ldstub */
   1320 			sz = 1; break;
   1321 		case 2:				/* lduh */
   1322 		case 6:				/* sth */
   1323 		case 0xa:			/* ldsh */
   1324 			sz = 2; break;
   1325 		case 3:				/* ldd */
   1326 		case 7:				/* std */
   1327 		case 0xb:			/* ldx */
   1328 		case 0xe:			/* stx */
   1329 			sz = 8; break;
   1330 		}
   1331 	}
   1332 
   1333 	if (USERMODE(rp->r_tstate))
   1334 		(void) flush_user_windows_to_stack(NULL);
   1335 	else
   1336 		flush_windows();
   1337 
   1338 	if (getreg(rp, rs1, &val, badaddr))
   1339 		return (SIMU_FAULT);
   1340 	addr = (caddr_t)val;
   1341 
   1342 	/* check immediate bit and use immediate field or reg (rs2) */
   1343 	if (immflg) {
   1344 		int imm;
   1345 		imm = inst & 0x1fff;		/* mask out immediate field */
   1346 		imm <<= 19;			/* sign extend it */
   1347 		imm >>= 19;
   1348 		addr += imm;			/* compute address */
   1349 	} else {
   1350 		if (getreg(rp, rs2, &val, badaddr))
   1351 			return (SIMU_FAULT);
   1352 		addr += val;
   1353 	}
   1354 
   1355 	/*
   1356 	 * If this is a 32-bit program, chop the address accordingly.  The
   1357 	 * intermediate uintptr_t casts prevent warnings under a certain
   1358 	 * compiler, and the temporary 32 bit storage is intended to force
   1359 	 * proper code generation and break up what would otherwise be a
   1360 	 * quadruple cast.
   1361 	 */
   1362 	if (curproc->p_model == DATAMODEL_ILP32 && USERMODE(rp->r_tstate)) {
   1363 		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
   1364 		addr = (caddr_t)(uintptr_t)addr32;
   1365 	}
   1366 
   1367 	*badaddr = addr;
   1368 	return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS);
   1369 }
   1370 
   1371 /*
   1372  * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64).
   1373  * Also compute the precise address by instruction disassembly.
   1374  * (v9 page faults only provide the page address via the hardware.)
   1375  * Return 0 on failure (not a load or store instruction).
   1376  */
   1377 int
   1378 instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr)
   1379 {
   1380 	uint_t	inst, op3, asi;
   1381 	uint_t	rd, rs1, rs2;
   1382 	int	sz = 0;
   1383 	int	immflg;
   1384 	int	floatflg;
   1385 	caddr_t	addr;
   1386 	caddr_t badaddr;
   1387 	uint64_t val;
   1388 
   1389 	if (rdwr == S_EXEC) {
   1390 		*addrp = (caddr_t)rp->r_pc;
   1391 		return (4);
   1392 	}
   1393 
   1394 	/*
   1395 	 * Fetch the instruction from user-level.
   1396 	 * We would like to assert this:
   1397 	 *   ASSERT(USERMODE(rp->r_tstate));
   1398 	 * but we can't because we can reach this point from a
   1399 	 * register window underflow/overflow and the v9 wbuf
   1400 	 * traps call trap() with T_USER even though r_tstate
   1401 	 * indicates a system trap, not a user trap.
   1402 	 */
   1403 	inst = fetch_user_instr((caddr_t)rp->r_pc);
   1404 
   1405 	op3 = (inst >> 19) & 0x3f;
   1406 	rd = (inst >> 25) & 0x1f;
   1407 	rs1 = (inst >> 14) & 0x1f;
   1408 	rs2 = inst & 0x1f;
   1409 	floatflg = (inst >> 24) & 1;
   1410 	immflg = (inst >> 13) & 1;
   1411 
   1412 	/* if not load or store do nothing.  can't happen? */
   1413 	if ((inst >> 30) != 3)
   1414 		return (0);
   1415 
   1416 	if (immflg)
   1417 		asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
   1418 		    TSTATE_ASI_MASK);
   1419 	else
   1420 		asi = (inst >> 5) & 0xff;
   1421 
   1422 	if (floatflg) {
   1423 		/* check for ld/st alternate and highest defined V9 asi */
   1424 		if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) {
   1425 			sz = extended_asi_size(asi);
   1426 		} else {
   1427 			switch (op3 & 3) {
   1428 			case 0:
   1429 				sz = 4;			/* ldf/stf/cas */
   1430 				break;
   1431 			case 1:
   1432 				if (rd == 0)
   1433 					sz = 4;		/* ldfsr/stfsr */
   1434 				else
   1435 					sz = 8;		/* ldxfsr/stxfsr */
   1436 				break;
   1437 			case 2:
   1438 				if (op3 == 0x3e)
   1439 					sz = 8;		/* casx */
   1440 				else
   1441 					sz = 16;	/* ldqf/stqf */
   1442 				break;
   1443 			case 3:
   1444 				sz = 8;			/* lddf/stdf */
   1445 				break;
   1446 			}
   1447 		}
   1448 	} else {
   1449 		switch (op3 & 0xf) {		/* map size bits to a number */
   1450 		case 0:				/* lduw */
   1451 		case 4:				/* stw */
   1452 		case 8:				/* ldsw */
   1453 		case 0xf:			/* swap */
   1454 			sz = 4; break;
   1455 		case 1:				/* ldub */
   1456 		case 5:				/* stb */
   1457 		case 9:				/* ldsb */
   1458 		case 0xd:			/* ldstub */
   1459 			sz = 1; break;
   1460 		case 2:				/* lduh */
   1461 		case 6:				/* sth */
   1462 		case 0xa:			/* ldsh */
   1463 			sz = 2; break;
   1464 		case 3:				/* ldd */
   1465 		case 7:				/* std */
   1466 		case 0xb:			/* ldx */
   1467 		case 0xe:			/* stx */
   1468 			sz = 8; break;
   1469 		}
   1470 	}
   1471 
   1472 	if (sz == 0)	/* can't happen? */
   1473 		return (0);
   1474 	(void) flush_user_windows_to_stack(NULL);
   1475 
   1476 	if (getreg(rp, rs1, &val, &badaddr))
   1477 		return (0);
   1478 	addr = (caddr_t)val;
   1479 
   1480 	/* cas/casx don't use rs2 / simm13 to compute the address */
   1481 	if ((op3 & 0x3d) != 0x3c) {
   1482 		/* check immediate bit and use immediate field or reg (rs2) */
   1483 		if (immflg) {
   1484 			int imm;
   1485 			imm  = inst & 0x1fff;	/* mask out immediate field */
   1486 			imm <<= 19;		/* sign extend it */
   1487 			imm >>= 19;
   1488 			addr += imm;		/* compute address */
   1489 		} else {
   1490 			/*
   1491 			 * asi's in the 0xCx range are partial store
   1492 			 * instructions.  For these, rs2 is a mask, not part of
   1493 			 * the address.
   1494 			 */
   1495 			if (!(floatflg && (asi & 0xf0) == 0xc0)) {
   1496 				if (getreg(rp, rs2, &val, &badaddr))
   1497 					return (0);
   1498 				addr += val;
   1499 			}
   1500 		}
   1501 	}
   1502 
   1503 	/*
   1504 	 * If this is a 32-bit program, chop the address accordingly.  The
   1505 	 * intermediate uintptr_t casts prevent warnings under a certain
   1506 	 * compiler, and the temporary 32 bit storage is intended to force
   1507 	 * proper code generation and break up what would otherwise be a
   1508 	 * quadruple cast.
   1509 	 */
   1510 	if (curproc->p_model == DATAMODEL_ILP32) {
   1511 		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
   1512 		addr = (caddr_t)(uintptr_t)addr32;
   1513 	}
   1514 
   1515 	*addrp = addr;
   1516 	ASSERT(sz != 0);
   1517 	return (sz);
   1518 }
   1519 
   1520 /*
   1521  * Fetch an instruction from user-level.
   1522  * Deal with watchpoints, if they are in effect.
   1523  */
   1524 int32_t
   1525 fetch_user_instr(caddr_t vaddr)
   1526 {
   1527 	proc_t *p = curproc;
   1528 	int32_t instr;
   1529 
   1530 	/*
   1531 	 * If this is a 32-bit program, chop the address accordingly.  The
   1532 	 * intermediate uintptr_t casts prevent warnings under a certain
   1533 	 * compiler, and the temporary 32 bit storage is intended to force
   1534 	 * proper code generation and break up what would otherwise be a
   1535 	 * quadruple cast.
   1536 	 */
   1537 	if (p->p_model == DATAMODEL_ILP32) {
   1538 		caddr32_t vaddr32 = (caddr32_t)(uintptr_t)vaddr;
   1539 		vaddr = (caddr_t)(uintptr_t)vaddr32;
   1540 	}
   1541 
   1542 	if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1)
   1543 		instr = -1;
   1544 
   1545 	return (instr);
   1546 }
   1547