Home | History | Annotate | Download | only in os
      1     0   stevel /*
      2     0   stevel  * CDDL HEADER START
      3     0   stevel  *
      4     0   stevel  * The contents of this file are subject to the terms of the
      5  2179      ahl  * Common Development and Distribution License (the "License").
      6  2179      ahl  * You may not use this file except in compliance with the License.
      7     0   stevel  *
      8     0   stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9     0   stevel  * or http://www.opensolaris.org/os/licensing.
     10     0   stevel  * See the License for the specific language governing permissions
     11     0   stevel  * and limitations under the License.
     12     0   stevel  *
     13     0   stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14     0   stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15     0   stevel  * If applicable, add the following below this CDDL HEADER, with the
     16     0   stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17     0   stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18     0   stevel  *
     19     0   stevel  * CDDL HEADER END
     20     0   stevel  */
     21  2179      ahl 
     22     0   stevel /*
     23  9489      Joe  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24     0   stevel  * Use is subject to license terms.
     25     0   stevel  */
     26     0   stevel 
     27     0   stevel #include <sys/dtrace.h>
     28     0   stevel #include <sys/fasttrap.h>
     29     0   stevel #include <sys/x_call.h>
     30     0   stevel #include <sys/cmn_err.h>
     31     0   stevel #include <sys/trap.h>
     32     0   stevel #include <sys/psw.h>
     33     0   stevel #include <sys/privregs.h>
     34     0   stevel #include <sys/machsystm.h>
     35     0   stevel #include <vm/seg_kmem.h>
     36     0   stevel 
     37     0   stevel typedef struct dtrace_invop_hdlr {
     38     0   stevel 	int (*dtih_func)(uintptr_t, uintptr_t *, uintptr_t);
     39     0   stevel 	struct dtrace_invop_hdlr *dtih_next;
     40     0   stevel } dtrace_invop_hdlr_t;
     41     0   stevel 
     42     0   stevel dtrace_invop_hdlr_t *dtrace_invop_hdlr;
     43     0   stevel 
     44     0   stevel int
     45     0   stevel dtrace_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
     46     0   stevel {
     47     0   stevel 	dtrace_invop_hdlr_t *hdlr;
     48     0   stevel 	int rval;
     49     0   stevel 
     50     0   stevel 	for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) {
     51     0   stevel 		if ((rval = hdlr->dtih_func(addr, stack, eax)) != 0)
     52     0   stevel 			return (rval);
     53     0   stevel 	}
     54     0   stevel 
     55     0   stevel 	return (0);
     56     0   stevel }
     57     0   stevel 
     58     0   stevel void
     59     0   stevel dtrace_invop_add(int (*func)(uintptr_t, uintptr_t *, uintptr_t))
     60     0   stevel {
     61     0   stevel 	dtrace_invop_hdlr_t *hdlr;
     62     0   stevel 
     63     0   stevel 	hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP);
     64     0   stevel 	hdlr->dtih_func = func;
     65     0   stevel 	hdlr->dtih_next = dtrace_invop_hdlr;
     66     0   stevel 	dtrace_invop_hdlr = hdlr;
     67     0   stevel }
     68     0   stevel 
     69     0   stevel void
     70     0   stevel dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t))
     71     0   stevel {
     72     0   stevel 	dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL;
     73     0   stevel 
     74     0   stevel 	for (;;) {
     75     0   stevel 		if (hdlr == NULL)
     76     0   stevel 			panic("attempt to remove non-existent invop handler");
     77     0   stevel 
     78     0   stevel 		if (hdlr->dtih_func == func)
     79     0   stevel 			break;
     80     0   stevel 
     81     0   stevel 		prev = hdlr;
     82     0   stevel 		hdlr = hdlr->dtih_next;
     83     0   stevel 	}
     84     0   stevel 
     85     0   stevel 	if (prev == NULL) {
     86     0   stevel 		ASSERT(dtrace_invop_hdlr == hdlr);
     87     0   stevel 		dtrace_invop_hdlr = hdlr->dtih_next;
     88     0   stevel 	} else {
     89     0   stevel 		ASSERT(dtrace_invop_hdlr != hdlr);
     90     0   stevel 		prev->dtih_next = hdlr->dtih_next;
     91     0   stevel 	}
     92     0   stevel 
     93     0   stevel 	kmem_free(hdlr, sizeof (dtrace_invop_hdlr_t));
     94     0   stevel }
     95     0   stevel 
     96     0   stevel int
     97     0   stevel dtrace_getipl(void)
     98     0   stevel {
     99     0   stevel 	return (CPU->cpu_pri);
    100     0   stevel }
    101     0   stevel 
    102     0   stevel /*ARGSUSED*/
    103     0   stevel void
    104     0   stevel dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
    105     0   stevel {
    106     0   stevel #ifdef __amd64
    107     0   stevel 	extern uintptr_t toxic_addr;
    108     0   stevel 	extern size_t toxic_size;
    109     0   stevel 
    110     0   stevel 	(*func)(0, _userlimit);
    111     0   stevel 
    112     0   stevel 	if (hole_end > hole_start)
    113     0   stevel 		(*func)(hole_start, hole_end);
    114     0   stevel 	(*func)(toxic_addr, toxic_addr + toxic_size);
    115     0   stevel #else
    116     0   stevel 	extern void *device_arena_contains(void *, size_t, size_t *);
    117     0   stevel 	caddr_t	vaddr;
    118     0   stevel 	size_t	len;
    119     0   stevel 
    120     0   stevel 	for (vaddr = (caddr_t)kernelbase; vaddr < (caddr_t)KERNEL_TEXT;
    121     0   stevel 	    vaddr += len) {
    122     0   stevel 		len = (caddr_t)KERNEL_TEXT - vaddr;
    123     0   stevel 		vaddr = device_arena_contains(vaddr, len, &len);
    124     0   stevel 		if (vaddr == NULL)
    125  5084  johnlev 			break;
    126     0   stevel 		(*func)((uintptr_t)vaddr, (uintptr_t)vaddr + len);
    127     0   stevel 	}
    128     0   stevel #endif
    129     0   stevel 	(*func)(0, _userlimit);
    130     0   stevel }
    131     0   stevel 
    132     0   stevel static int
    133     0   stevel dtrace_xcall_func(dtrace_xcall_t func, void *arg)
    134     0   stevel {
    135     0   stevel 	(*func)(arg);
    136     0   stevel 
    137     0   stevel 	return (0);
    138     0   stevel }
    139     0   stevel 
    140     0   stevel /*ARGSUSED*/
    141     0   stevel void
    142     0   stevel dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
    143     0   stevel {
    144     0   stevel 	cpuset_t set;
    145     0   stevel 
    146     0   stevel 	CPUSET_ZERO(set);
    147     0   stevel 
    148     0   stevel 	if (cpu == DTRACE_CPUALL) {
    149     0   stevel 		CPUSET_ALL(set);
    150     0   stevel 	} else {
    151     0   stevel 		CPUSET_ADD(set, cpu);
    152     0   stevel 	}
    153     0   stevel 
    154     0   stevel 	kpreempt_disable();
    155  9489      Joe 	xc_sync((xc_arg_t)func, (xc_arg_t)arg, 0, CPUSET2BV(set),
    156  5084  johnlev 	    (xc_func_t)dtrace_xcall_func);
    157     0   stevel 	kpreempt_enable();
    158     0   stevel }
    159     0   stevel 
    160     0   stevel void
    161     0   stevel dtrace_sync_func(void)
    162     0   stevel {}
    163     0   stevel 
    164     0   stevel void
    165     0   stevel dtrace_sync(void)
    166     0   stevel {
    167     0   stevel 	dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL);
    168     0   stevel }
    169     0   stevel 
    170     0   stevel int (*dtrace_pid_probe_ptr)(struct regs *);
    171     0   stevel int (*dtrace_return_probe_ptr)(struct regs *);
    172     0   stevel 
    173     0   stevel void
    174     0   stevel dtrace_user_probe(struct regs *rp, caddr_t addr, processorid_t cpuid)
    175     0   stevel {
    176     0   stevel 	krwlock_t *rwp;
    177     0   stevel 	proc_t *p = curproc;
    178     0   stevel 	extern void trap(struct regs *, caddr_t, processorid_t);
    179     0   stevel 
    180     0   stevel 	if (USERMODE(rp->r_cs) || (rp->r_ps & PS_VM)) {
    181     0   stevel 		if (curthread->t_cred != p->p_cred) {
    182     0   stevel 			cred_t *oldcred = curthread->t_cred;
    183     0   stevel 			/*
    184     0   stevel 			 * DTrace accesses t_cred in probe context.  t_cred
    185     0   stevel 			 * must always be either NULL, or point to a valid,
    186     0   stevel 			 * allocated cred structure.
    187     0   stevel 			 */
    188     0   stevel 			curthread->t_cred = crgetcred();
    189     0   stevel 			crfree(oldcred);
    190     0   stevel 		}
    191     0   stevel 	}
    192     0   stevel 
    193     0   stevel 	if (rp->r_trapno == T_DTRACE_RET) {
    194     0   stevel 		uint8_t step = curthread->t_dtrace_step;
    195     0   stevel 		uint8_t ret = curthread->t_dtrace_ret;
    196     0   stevel 		uintptr_t npc = curthread->t_dtrace_npc;
    197     0   stevel 
    198     0   stevel 		if (curthread->t_dtrace_ast) {
    199     0   stevel 			aston(curthread);
    200     0   stevel 			curthread->t_sig_check = 1;
    201     0   stevel 		}
    202     0   stevel 
    203     0   stevel 		/*
    204     0   stevel 		 * Clear all user tracing flags.
    205     0   stevel 		 */
    206     0   stevel 		curthread->t_dtrace_ft = 0;
    207     0   stevel 
    208     0   stevel 		/*
    209     0   stevel 		 * If we weren't expecting to take a return probe trap, kill
    210     0   stevel 		 * the process as though it had just executed an unassigned
    211     0   stevel 		 * trap instruction.
    212     0   stevel 		 */
    213     0   stevel 		if (step == 0) {
    214     0   stevel 			tsignal(curthread, SIGILL);
    215     0   stevel 			return;
    216     0   stevel 		}
    217     0   stevel 
    218     0   stevel 		/*
    219     0   stevel 		 * If we hit this trap unrelated to a return probe, we're
    220     0   stevel 		 * just here to reset the AST flag since we deferred a signal
    221     0   stevel 		 * until after we logically single-stepped the instruction we
    222     0   stevel 		 * copied out.
    223     0   stevel 		 */
    224     0   stevel 		if (ret == 0) {
    225     0   stevel 			rp->r_pc = npc;
    226     0   stevel 			return;
    227     0   stevel 		}
    228     0   stevel 
    229     0   stevel 		/*
    230     0   stevel 		 * We need to wait until after we've called the
    231     0   stevel 		 * dtrace_return_probe_ptr function pointer to set %pc.
    232     0   stevel 		 */
    233     0   stevel 		rwp = &CPU->cpu_ft_lock;
    234     0   stevel 		rw_enter(rwp, RW_READER);
    235     0   stevel 		if (dtrace_return_probe_ptr != NULL)
    236     0   stevel 			(void) (*dtrace_return_probe_ptr)(rp);
    237     0   stevel 		rw_exit(rwp);
    238     0   stevel 		rp->r_pc = npc;
    239     0   stevel 
    240     0   stevel 	} else if (rp->r_trapno == T_BPTFLT) {
    241  3939    sethg 		uint8_t instr, instr2;
    242  3939    sethg 		caddr_t linearpc;
    243     0   stevel 		rwp = &CPU->cpu_ft_lock;
    244     0   stevel 
    245     0   stevel 		/*
    246     0   stevel 		 * The DTrace fasttrap provider uses the breakpoint trap
    247     0   stevel 		 * (int 3). We let DTrace take the first crack at handling
    248     0   stevel 		 * this trap; if it's not a probe that DTrace knowns about,
    249     0   stevel 		 * we call into the trap() routine to handle it like a
    250     0   stevel 		 * breakpoint placed by a conventional debugger.
    251     0   stevel 		 */
    252     0   stevel 		rw_enter(rwp, RW_READER);
    253     0   stevel 		if (dtrace_pid_probe_ptr != NULL &&
    254     0   stevel 		    (*dtrace_pid_probe_ptr)(rp) == 0) {
    255     0   stevel 			rw_exit(rwp);
    256     0   stevel 			return;
    257     0   stevel 		}
    258     0   stevel 		rw_exit(rwp);
    259     0   stevel 
    260  3939    sethg 		if (dtrace_linear_pc(rp, p, &linearpc) != 0) {
    261  3939    sethg 			trap(rp, addr, cpuid);
    262  3939    sethg 			return;
    263  3939    sethg 		}
    264  3939    sethg 
    265     0   stevel 		/*
    266     0   stevel 		 * If the instruction that caused the breakpoint trap doesn't
    267     0   stevel 		 * look like an int 3 anymore, it may be that this tracepoint
    268     0   stevel 		 * was removed just after the user thread executed it. In
    269     0   stevel 		 * that case, return to user land to retry the instuction.
    270  3939    sethg 		 * Note that we assume the length of the instruction to retry
    271  3939    sethg 		 * is 1 byte because that's the length of FASTTRAP_INSTR.
    272  3939    sethg 		 * We check for r_pc > 0 and > 2 so that we don't have to
    273  3939    sethg 		 * deal with segment wraparound.
    274     0   stevel 		 */
    275  3939    sethg 		if (rp->r_pc > 0 && fuword8(linearpc - 1, &instr) == 0 &&
    276  3939    sethg 		    instr != FASTTRAP_INSTR &&
    277  3939    sethg 		    (instr != 3 || (rp->r_pc >= 2 &&
    278  3939    sethg 		    (fuword8(linearpc - 2, &instr2) != 0 || instr2 != 0xCD)))) {
    279     0   stevel 			rp->r_pc--;
    280     0   stevel 			return;
    281     0   stevel 		}
    282     0   stevel 
    283     0   stevel 		trap(rp, addr, cpuid);
    284     0   stevel 
    285     0   stevel 	} else {
    286     0   stevel 		trap(rp, addr, cpuid);
    287     0   stevel 	}
    288     0   stevel }
    289     0   stevel 
    290     0   stevel void
    291     0   stevel dtrace_safe_synchronous_signal(void)
    292     0   stevel {
    293     0   stevel 	kthread_t *t = curthread;
    294     0   stevel 	struct regs *rp = lwptoregs(ttolwp(t));
    295     0   stevel 	size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;
    296     0   stevel 
    297     0   stevel 	ASSERT(t->t_dtrace_on);
    298     0   stevel 
    299     0   stevel 	/*
    300     0   stevel 	 * If we're not in the range of scratch addresses, we're not actually
    301     0   stevel 	 * tracing user instructions so turn off the flags. If the instruction
    302     0   stevel 	 * we copied out caused a synchonous trap, reset the pc back to its
    303     0   stevel 	 * original value and turn off the flags.
    304     0   stevel 	 */
    305     0   stevel 	if (rp->r_pc < t->t_dtrace_scrpc ||
    306     0   stevel 	    rp->r_pc > t->t_dtrace_astpc + isz) {
    307     0   stevel 		t->t_dtrace_ft = 0;
    308     0   stevel 	} else if (rp->r_pc == t->t_dtrace_scrpc ||
    309     0   stevel 	    rp->r_pc == t->t_dtrace_astpc) {
    310     0   stevel 		rp->r_pc = t->t_dtrace_pc;
    311     0   stevel 		t->t_dtrace_ft = 0;
    312     0   stevel 	}
    313     0   stevel }
    314     0   stevel 
    315     0   stevel int
    316     0   stevel dtrace_safe_defer_signal(void)
    317     0   stevel {
    318     0   stevel 	kthread_t *t = curthread;
    319     0   stevel 	struct regs *rp = lwptoregs(ttolwp(t));
    320     0   stevel 	size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;
    321     0   stevel 
    322     0   stevel 	ASSERT(t->t_dtrace_on);
    323     0   stevel 
    324     0   stevel 	/*
    325     0   stevel 	 * If we're not in the range of scratch addresses, we're not actually
    326     0   stevel 	 * tracing user instructions so turn off the flags.
    327     0   stevel 	 */
    328     0   stevel 	if (rp->r_pc < t->t_dtrace_scrpc ||
    329     0   stevel 	    rp->r_pc > t->t_dtrace_astpc + isz) {
    330     0   stevel 		t->t_dtrace_ft = 0;
    331     0   stevel 		return (0);
    332     0   stevel 	}
    333     0   stevel 
    334     0   stevel 	/*
    335     0   stevel 	 * If we've executed the original instruction, but haven't performed
    336     0   stevel 	 * the jmp back to t->t_dtrace_npc or the clean up of any registers
    337     0   stevel 	 * used to emulate %rip-relative instructions in 64-bit mode, do that
    338     0   stevel 	 * here and take the signal right away. We detect this condition by
    339     0   stevel 	 * seeing if the program counter is the range [scrpc + isz, astpc).
    340     0   stevel 	 */
    341     0   stevel 	if (t->t_dtrace_astpc - rp->r_pc <
    342     0   stevel 	    t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) {
    343     0   stevel #ifdef __amd64
    344     0   stevel 		/*
    345     0   stevel 		 * If there is a scratch register and we're on the
    346     0   stevel 		 * instruction immediately after the modified instruction,
    347     0   stevel 		 * restore the value of that scratch register.
    348     0   stevel 		 */
    349     0   stevel 		if (t->t_dtrace_reg != 0 &&
    350     0   stevel 		    rp->r_pc == t->t_dtrace_scrpc + isz) {
    351     0   stevel 			switch (t->t_dtrace_reg) {
    352     0   stevel 			case REG_RAX:
    353     0   stevel 				rp->r_rax = t->t_dtrace_regv;
    354     0   stevel 				break;
    355     0   stevel 			case REG_RCX:
    356     0   stevel 				rp->r_rcx = t->t_dtrace_regv;
    357     0   stevel 				break;
    358     0   stevel 			case REG_R8:
    359     0   stevel 				rp->r_r8 = t->t_dtrace_regv;
    360     0   stevel 				break;
    361     0   stevel 			case REG_R9:
    362     0   stevel 				rp->r_r9 = t->t_dtrace_regv;
    363     0   stevel 				break;
    364     0   stevel 			}
    365     0   stevel 		}
    366     0   stevel #endif
    367     0   stevel 		rp->r_pc = t->t_dtrace_npc;
    368     0   stevel 		t->t_dtrace_ft = 0;
    369     0   stevel 		return (0);
    370     0   stevel 	}
    371     0   stevel 
    372     0   stevel 	/*
    373     0   stevel 	 * Otherwise, make sure we'll return to the kernel after executing
    374     0   stevel 	 * the copied out instruction and defer the signal.
    375     0   stevel 	 */
    376     0   stevel 	if (!t->t_dtrace_step) {
    377     0   stevel 		ASSERT(rp->r_pc < t->t_dtrace_astpc);
    378     0   stevel 		rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc;
    379     0   stevel 		t->t_dtrace_step = 1;
    380     0   stevel 	}
    381     0   stevel 
    382     0   stevel 	t->t_dtrace_ast = 1;
    383     0   stevel 
    384     0   stevel 	return (1);
    385     0   stevel }
    386  3446      mrj 
    387  3446      mrj /*
    388  3446      mrj  * Additional artificial frames for the machine type. For i86pc, we're already
    389  5084  johnlev  * accounted for, so return 0. On the hypervisor, we have an additional frame
    390  5084  johnlev  * (xen_callback_handler).
    391  3446      mrj  */
    392  3446      mrj int
    393  3446      mrj dtrace_mach_aframes(void)
    394  3446      mrj {
    395  5084  johnlev #ifdef __xpv
    396  5084  johnlev 	return (1);
    397  5084  johnlev #else
    398  3446      mrj 	return (0);
    399  5084  johnlev #endif
    400  3446      mrj }
    401