Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * Copyright (c) 1992 Terrence R. Lambert.
     29  * Copyright (c) 1990 The Regents of the University of California.
     30  * All rights reserved.
     31  *
     32  * This code is derived from software contributed to Berkeley by
     33  * William Jolitz.
     34  *
     35  * Redistribution and use in source and binary forms, with or without
     36  * modification, are permitted provided that the following conditions
     37  * are met:
     38  * 1. Redistributions of source code must retain the above copyright
     39  *    notice, this list of conditions and the following disclaimer.
     40  * 2. Redistributions in binary form must reproduce the above copyright
     41  *    notice, this list of conditions and the following disclaimer in the
     42  *    documentation and/or other materials provided with the distribution.
     43  * 3. All advertising materials mentioning features or use of this software
     44  *    must display the following acknowledgement:
     45  *	This product includes software developed by the University of
     46  *	California, Berkeley and its contributors.
     47  * 4. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  *
     63  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
     64  */
     65 
     66 #include <sys/types.h>
     67 #include <sys/sysmacros.h>
     68 #include <sys/tss.h>
     69 #include <sys/segments.h>
     70 #include <sys/trap.h>
     71 #include <sys/cpuvar.h>
     72 #include <sys/bootconf.h>
     73 #include <sys/x86_archext.h>
     74 #include <sys/controlregs.h>
     75 #include <sys/archsystm.h>
     76 #include <sys/machsystm.h>
     77 #include <sys/kobj.h>
     78 #include <sys/cmn_err.h>
     79 #include <sys/reboot.h>
     80 #include <sys/kdi.h>
     81 #include <sys/mach_mmu.h>
     82 #include <sys/systm.h>
     83 
     84 #ifdef __xpv
     85 #include <sys/hypervisor.h>
     86 #include <vm/as.h>
     87 #endif
     88 
     89 #include <sys/promif.h>
     90 #include <sys/bootinfo.h>
     91 #include <vm/kboot_mmu.h>
     92 #include <vm/hat_pte.h>
     93 
     94 /*
     95  * cpu0 and default tables and structures.
     96  */
     97 user_desc_t	*gdt0;
     98 #if !defined(__xpv)
     99 desctbr_t	gdt0_default_r;
    100 #endif
    101 
    102 gate_desc_t	*idt0; 		/* interrupt descriptor table */
    103 #if defined(__i386)
    104 desctbr_t	idt0_default_r;		/* describes idt0 in IDTR format */
    105 #endif
    106 
    107 struct tss	*ktss0;			/* kernel task state structure */
    108 
    109 #if defined(__i386)
    110 struct tss	*dftss0;		/* #DF double-fault exception */
    111 #endif	/* __i386 */
    112 
    113 user_desc_t	zero_udesc;		/* base zero user desc native procs */
    114 user_desc_t	null_udesc;		/* null user descriptor */
    115 system_desc_t	null_sdesc;		/* null system descriptor */
    116 
    117 #if defined(__amd64)
    118 user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
    119 #endif	/* __amd64 */
    120 
    121 #if defined(__amd64)
    122 user_desc_t	ucs_on;
    123 user_desc_t	ucs_off;
    124 user_desc_t	ucs32_on;
    125 user_desc_t	ucs32_off;
    126 #endif	/* __amd64 */
    127 
    128 #pragma	align	16(dblfault_stack0)
    129 char		dblfault_stack0[DEFAULTSTKSZ];
    130 
    131 extern void	fast_null(void);
    132 extern hrtime_t	get_hrtime(void);
    133 extern hrtime_t	gethrvtime(void);
    134 extern hrtime_t	get_hrestime(void);
    135 extern uint64_t	getlgrp(void);
    136 
    137 void (*(fasttable[]))(void) = {
    138 	fast_null,			/* T_FNULL routine */
    139 	fast_null,			/* T_FGETFP routine (initially null) */
    140 	fast_null,			/* T_FSETFP routine (initially null) */
    141 	(void (*)())get_hrtime,		/* T_GETHRTIME */
    142 	(void (*)())gethrvtime,		/* T_GETHRVTIME */
    143 	(void (*)())get_hrestime,	/* T_GETHRESTIME */
    144 	(void (*)())getlgrp		/* T_GETLGRP */
    145 };
    146 
    147 /*
    148  * Structure containing pre-computed descriptors to allow us to temporarily
    149  * interpose on a standard handler.
    150  */
    151 struct interposing_handler {
    152 	int ih_inum;
    153 	gate_desc_t ih_interp_desc;
    154 	gate_desc_t ih_default_desc;
    155 };
    156 
    157 /*
    158  * The brand infrastructure interposes on two handlers, and we use one as a
    159  * NULL signpost.
    160  */
    161 static struct interposing_handler brand_tbl[3];
    162 
    163 /*
    164  * software prototypes for default local descriptor table
    165  */
    166 
    167 /*
    168  * Routines for loading segment descriptors in format the hardware
    169  * can understand.
    170  */
    171 
    172 #if defined(__amd64)
    173 
    174 /*
    175  * In long mode we have the new L or long mode attribute bit
    176  * for code segments. Only the conforming bit in type is used along
    177  * with descriptor priority and present bits. Default operand size must
    178  * be zero when in long mode. In 32-bit compatibility mode all fields
    179  * are treated as in legacy mode. For data segments while in long mode
    180  * only the present bit is loaded.
    181  */
    182 void
    183 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
    184     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
    185 {
    186 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
    187 
    188 	/*
    189 	 * 64-bit long mode.
    190 	 */
    191 	if (lmode == SDP_LONG)
    192 		dp->usd_def32 = 0;		/* 32-bit operands only */
    193 	else
    194 		/*
    195 		 * 32-bit compatibility mode.
    196 		 */
    197 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
    198 
    199 	dp->usd_long = lmode;	/* 64-bit mode */
    200 	dp->usd_type = type;
    201 	dp->usd_dpl = dpl;
    202 	dp->usd_p = 1;
    203 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
    204 
    205 	dp->usd_lobase = (uintptr_t)base;
    206 	dp->usd_midbase = (uintptr_t)base >> 16;
    207 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
    208 	dp->usd_lolimit = size;
    209 	dp->usd_hilimit = (uintptr_t)size >> 16;
    210 }
    211 
    212 #elif defined(__i386)
    213 
    214 /*
    215  * Install user segment descriptor for code and data.
    216  */
    217 void
    218 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
    219     uint_t dpl, uint_t gran, uint_t defopsz)
    220 {
    221 	dp->usd_lolimit = size;
    222 	dp->usd_hilimit = (uintptr_t)size >> 16;
    223 
    224 	dp->usd_lobase = (uintptr_t)base;
    225 	dp->usd_midbase = (uintptr_t)base >> 16;
    226 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
    227 
    228 	dp->usd_type = type;
    229 	dp->usd_dpl = dpl;
    230 	dp->usd_p = 1;
    231 	dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32 bit operands */
    232 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
    233 }
    234 
    235 #endif	/* __i386 */
    236 
    237 /*
    238  * Install system segment descriptor for LDT and TSS segments.
    239  */
    240 
    241 #if defined(__amd64)
    242 
    243 void
    244 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
    245     uint_t dpl)
    246 {
    247 	dp->ssd_lolimit = size;
    248 	dp->ssd_hilimit = (uintptr_t)size >> 16;
    249 
    250 	dp->ssd_lobase = (uintptr_t)base;
    251 	dp->ssd_midbase = (uintptr_t)base >> 16;
    252 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
    253 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
    254 
    255 	dp->ssd_type = type;
    256 	dp->ssd_zero1 = 0;	/* must be zero */
    257 	dp->ssd_zero2 = 0;
    258 	dp->ssd_dpl = dpl;
    259 	dp->ssd_p = 1;
    260 	dp->ssd_gran = 0;	/* force byte units */
    261 }
    262 
    263 void *
    264 get_ssd_base(system_desc_t *dp)
    265 {
    266 	uintptr_t	base;
    267 
    268 	base = (uintptr_t)dp->ssd_lobase |
    269 	    (uintptr_t)dp->ssd_midbase << 16 |
    270 	    (uintptr_t)dp->ssd_hibase << (16 + 8) |
    271 	    (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
    272 	return ((void *)base);
    273 }
    274 
    275 #elif defined(__i386)
    276 
    277 void
    278 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
    279     uint_t dpl)
    280 {
    281 	dp->ssd_lolimit = size;
    282 	dp->ssd_hilimit = (uintptr_t)size >> 16;
    283 
    284 	dp->ssd_lobase = (uintptr_t)base;
    285 	dp->ssd_midbase = (uintptr_t)base >> 16;
    286 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
    287 
    288 	dp->ssd_type = type;
    289 	dp->ssd_zero = 0;	/* must be zero */
    290 	dp->ssd_dpl = dpl;
    291 	dp->ssd_p = 1;
    292 	dp->ssd_gran = 0;	/* force byte units */
    293 }
    294 
    295 void *
    296 get_ssd_base(system_desc_t *dp)
    297 {
    298 	uintptr_t	base;
    299 
    300 	base = (uintptr_t)dp->ssd_lobase |
    301 	    (uintptr_t)dp->ssd_midbase << 16 |
    302 	    (uintptr_t)dp->ssd_hibase << (16 + 8);
    303 	return ((void *)base);
    304 }
    305 
    306 #endif	/* __i386 */
    307 
    308 /*
    309  * Install gate segment descriptor for interrupt, trap, call and task gates.
    310  */
    311 
    312 #if defined(__amd64)
    313 
    314 /*ARGSUSED*/
    315 void
    316 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
    317     uint_t type, uint_t dpl, uint_t vector)
    318 {
    319 	dp->sgd_looffset = (uintptr_t)func;
    320 	dp->sgd_hioffset = (uintptr_t)func >> 16;
    321 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
    322 
    323 	dp->sgd_selector =  (uint16_t)sel;
    324 
    325 	/*
    326 	 * For 64 bit native we use the IST stack mechanism
    327 	 * for double faults. All other traps use the CPL = 0
    328 	 * (tss_rsp0) stack.
    329 	 */
    330 #if !defined(__xpv)
    331 	if (vector == T_DBLFLT)
    332 		dp->sgd_ist = 1;
    333 	else
    334 #endif
    335 		dp->sgd_ist = 0;
    336 
    337 	dp->sgd_type = type;
    338 	dp->sgd_dpl = dpl;
    339 	dp->sgd_p = 1;
    340 }
    341 
    342 #elif defined(__i386)
    343 
    344 /*ARGSUSED*/
    345 void
    346 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
    347     uint_t type, uint_t dpl, uint_t unused)
    348 {
    349 	dp->sgd_looffset = (uintptr_t)func;
    350 	dp->sgd_hioffset = (uintptr_t)func >> 16;
    351 
    352 	dp->sgd_selector =  (uint16_t)sel;
    353 	dp->sgd_stkcpy = 0;	/* always zero bytes */
    354 	dp->sgd_type = type;
    355 	dp->sgd_dpl = dpl;
    356 	dp->sgd_p = 1;
    357 }
    358 
    359 #endif	/* __i386 */
    360 
    361 /*
    362  * Updates a single user descriptor in the the GDT of the current cpu.
    363  * Caller is responsible for preventing cpu migration.
    364  */
    365 
    366 void
    367 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
    368 {
    369 #if defined(__xpv)
    370 
    371 	uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
    372 
    373 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
    374 		panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
    375 
    376 #else	/* __xpv */
    377 
    378 	CPU->cpu_gdt[sidx] = *udp;
    379 
    380 #endif	/* __xpv */
    381 }
    382 
    383 /*
    384  * Writes single descriptor pointed to by udp into a processes
    385  * LDT entry pointed to by ldp.
    386  */
    387 int
    388 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
    389 {
    390 #if defined(__xpv)
    391 
    392 	uint64_t dpa;
    393 
    394 	dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
    395 	    ((uintptr_t)ldp & PAGEOFFSET);
    396 
    397 	/*
    398 	 * The hypervisor is a little more restrictive about what it
    399 	 * supports in the LDT.
    400 	 */
    401 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
    402 		return (EINVAL);
    403 
    404 #else	/* __xpv */
    405 
    406 	*ldp = *udp;
    407 
    408 #endif	/* __xpv */
    409 	return (0);
    410 }
    411 
    412 #if defined(__xpv)
    413 
    414 /*
    415  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
    416  * Returns true if a valid entry was written.
    417  */
    418 int
    419 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
    420 {
    421 	trap_info_t *ti = ti_arg;	/* XXPV	Aargh - segments.h comment */
    422 
    423 	/*
    424 	 * skip holes in the IDT
    425 	 */
    426 	if (GATESEG_GETOFFSET(sgd) == 0)
    427 		return (0);
    428 
    429 	ASSERT(sgd->sgd_type == SDT_SYSIGT);
    430 	ti->vector = vec;
    431 	TI_SET_DPL(ti, sgd->sgd_dpl);
    432 
    433 	/*
    434 	 * Is this an interrupt gate?
    435 	 */
    436 	if (sgd->sgd_type == SDT_SYSIGT) {
    437 		/* LINTED */
    438 		TI_SET_IF(ti, 1);
    439 	}
    440 	ti->cs = sgd->sgd_selector;
    441 #if defined(__amd64)
    442 	ti->cs |= SEL_KPL;	/* force into ring 3. see KCS_SEL  */
    443 #endif
    444 	ti->address = GATESEG_GETOFFSET(sgd);
    445 	return (1);
    446 }
    447 
    448 /*
    449  * Convert a single hw format gate descriptor and write it into our virtual IDT.
    450  */
    451 void
    452 xen_idt_write(gate_desc_t *sgd, uint_t vec)
    453 {
    454 	trap_info_t trapinfo[2];
    455 
    456 	bzero(trapinfo, sizeof (trapinfo));
    457 	if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
    458 		return;
    459 	if (xen_set_trap_table(trapinfo) != 0)
    460 		panic("xen_idt_write: xen_set_trap_table() failed");
    461 }
    462 
    463 #endif	/* __xpv */
    464 
    465 #if defined(__amd64)
    466 
    467 /*
    468  * Build kernel GDT.
    469  */
    470 
    471 static void
    472 init_gdt_common(user_desc_t *gdt)
    473 {
    474 	int i;
    475 
    476 	/*
    477 	 * 64-bit kernel code segment.
    478 	 */
    479 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
    480 	    SDP_PAGES, SDP_OP32);
    481 
    482 	/*
    483 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
    484 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
    485 	 * instruction to return from system calls back to 32-bit applications.
    486 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
    487 	 * descriptors. We therefore must ensure that the kernel uses something,
    488 	 * though it will be ignored by hardware, that is compatible with 32-bit
    489 	 * apps. For the same reason we must set the default op size of this
    490 	 * descriptor to 32-bit operands.
    491 	 */
    492 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
    493 	    SEL_KPL, SDP_PAGES, SDP_OP32);
    494 	gdt[GDT_KDATA].usd_def32 = 1;
    495 
    496 	/*
    497 	 * 64-bit user code segment.
    498 	 */
    499 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
    500 	    SDP_PAGES, SDP_OP32);
    501 
    502 	/*
    503 	 * 32-bit user code segment.
    504 	 */
    505 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
    506 	    SEL_UPL, SDP_PAGES, SDP_OP32);
    507 
    508 	/*
    509 	 * See gdt_ucode32() and gdt_ucode_native().
    510 	 */
    511 	ucs_on = ucs_off = gdt[GDT_UCODE];
    512 	ucs_off.usd_p = 0;	/* forces #np fault */
    513 
    514 	ucs32_on = ucs32_off = gdt[GDT_U32CODE];
    515 	ucs32_off.usd_p = 0;	/* forces #np fault */
    516 
    517 	/*
    518 	 * 32 and 64 bit data segments can actually share the same descriptor.
    519 	 * In long mode only the present bit is checked but all other fields
    520 	 * are loaded. But in compatibility mode all fields are interpreted
    521 	 * as in legacy mode so they must be set correctly for a 32-bit data
    522 	 * segment.
    523 	 */
    524 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
    525 	    SDP_PAGES, SDP_OP32);
    526 
    527 #if !defined(__xpv)
    528 
    529 	/*
    530 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
    531 	 * in the GDT is 0.
    532 	 */
    533 
    534 	/*
    535 	 * Kernel TSS
    536 	 */
    537 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
    538 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
    539 
    540 #endif	/* !__xpv */
    541 
    542 	/*
    543 	 * Initialize fs and gs descriptors for 32 bit processes.
    544 	 * Only attributes and limits are initialized, the effective
    545 	 * base address is programmed via fsbase/gsbase.
    546 	 */
    547 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
    548 	    SEL_UPL, SDP_PAGES, SDP_OP32);
    549 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
    550 	    SEL_UPL, SDP_PAGES, SDP_OP32);
    551 
    552 	/*
    553 	 * Initialize the descriptors set aside for brand usage.
    554 	 * Only attributes and limits are initialized.
    555 	 */
    556 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
    557 		set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
    558 		    SEL_UPL, SDP_PAGES, SDP_OP32);
    559 
    560 	/*
    561 	 * Initialize convenient zero base user descriptors for clearing
    562 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
    563 	 * an example.
    564 	 */
    565 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
    566 	    SDP_BYTES, SDP_OP32);
    567 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
    568 	    SDP_PAGES, SDP_OP32);
    569 }
    570 
    571 #if defined(__xpv)
    572 
    573 static user_desc_t *
    574 init_gdt(void)
    575 {
    576 	uint64_t gdtpa;
    577 	ulong_t ma[1];		/* XXPV should be a memory_t */
    578 	ulong_t addr;
    579 
    580 #if !defined(__lint)
    581 	/*
    582 	 * Our gdt is never larger than a single page.
    583 	 */
    584 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
    585 #endif
    586 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
    587 	    PAGESIZE, PAGESIZE);
    588 	bzero(gdt0, PAGESIZE);
    589 
    590 	init_gdt_common(gdt0);
    591 
    592 	/*
    593 	 * XXX Since we never invoke kmdb until after the kernel takes
    594 	 * over the descriptor tables why not have it use the kernel's
    595 	 * selectors?
    596 	 */
    597 	if (boothowto & RB_DEBUG) {
    598 		set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
    599 		    SEL_KPL, SDP_PAGES, SDP_OP32);
    600 		set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
    601 		    SEL_KPL, SDP_PAGES, SDP_OP32);
    602 	}
    603 
    604 	/*
    605 	 * Clear write permission for page containing the gdt and install it.
    606 	 */
    607 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
    608 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
    609 	kbm_read_only((uintptr_t)gdt0, gdtpa);
    610 	xen_set_gdt(ma, NGDT);
    611 
    612 	/*
    613 	 * Reload the segment registers to use the new GDT.
    614 	 * On 64-bit, fixup KCS_SEL to be in ring 3.
    615 	 * See KCS_SEL in segments.h.
    616 	 */
    617 	load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
    618 
    619 	/*
    620 	 *  setup %gs for kernel
    621 	 */
    622 	xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
    623 
    624 	/*
    625 	 * XX64 We should never dereference off "other gsbase" or
    626 	 * "fsbase".  So, we should arrange to point FSBASE and
    627 	 * KGSBASE somewhere truly awful e.g. point it at the last
    628 	 * valid address below the hole so that any attempts to index
    629 	 * off them cause an exception.
    630 	 *
    631 	 * For now, point it at 8G -- at least it should be unmapped
    632 	 * until some 64-bit processes run.
    633 	 */
    634 	addr = 0x200000000ul;
    635 	xen_set_segment_base(SEGBASE_FS, addr);
    636 	xen_set_segment_base(SEGBASE_GS_USER, addr);
    637 	xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
    638 
    639 	return (gdt0);
    640 }
    641 
    642 #else	/* __xpv */
    643 
    644 static user_desc_t *
    645 init_gdt(void)
    646 {
    647 	desctbr_t	r_bgdt, r_gdt;
    648 	user_desc_t	*bgdt;
    649 
    650 #if !defined(__lint)
    651 	/*
    652 	 * Our gdt is never larger than a single page.
    653 	 */
    654 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
    655 #endif
    656 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
    657 	    PAGESIZE, PAGESIZE);
    658 	bzero(gdt0, PAGESIZE);
    659 
    660 	init_gdt_common(gdt0);
    661 
    662 	/*
    663 	 * Copy in from boot's gdt to our gdt.
    664 	 * Entry 0 is the null descriptor by definition.
    665 	 */
    666 	rd_gdtr(&r_bgdt);
    667 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
    668 	if (bgdt == NULL)
    669 		panic("null boot gdt");
    670 
    671 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
    672 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
    673 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
    674 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
    675 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
    676 
    677 	/*
    678 	 * Install our new GDT
    679 	 */
    680 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
    681 	r_gdt.dtr_base = (uintptr_t)gdt0;
    682 	wr_gdtr(&r_gdt);
    683 
    684 	/*
    685 	 * Reload the segment registers to use the new GDT
    686 	 */
    687 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
    688 
    689 	/*
    690 	 *  setup %gs for kernel
    691 	 */
    692 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
    693 
    694 	/*
    695 	 * XX64 We should never dereference off "other gsbase" or
    696 	 * "fsbase".  So, we should arrange to point FSBASE and
    697 	 * KGSBASE somewhere truly awful e.g. point it at the last
    698 	 * valid address below the hole so that any attempts to index
    699 	 * off them cause an exception.
    700 	 *
    701 	 * For now, point it at 8G -- at least it should be unmapped
    702 	 * until some 64-bit processes run.
    703 	 */
    704 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
    705 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
    706 	return (gdt0);
    707 }
    708 
    709 #endif	/* __xpv */
    710 
    711 #elif defined(__i386)
    712 
    713 static void
    714 init_gdt_common(user_desc_t *gdt)
    715 {
    716 	int i;
    717 
    718 	/*
    719 	 * Text and data for both kernel and user span entire 32 bit
    720 	 * address space.
    721 	 */
    722 
    723 	/*
    724 	 * kernel code segment.
    725 	 */
    726 	set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
    727 	    SDP_OP32);
    728 
    729 	/*
    730 	 * kernel data segment.
    731 	 */
    732 	set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
    733 	    SDP_OP32);
    734 
    735 	/*
    736 	 * user code segment.
    737 	 */
    738 	set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
    739 	    SDP_OP32);
    740 
    741 	/*
    742 	 * user data segment.
    743 	 */
    744 	set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
    745 	    SDP_OP32);
    746 
    747 #if !defined(__xpv)
    748 
    749 	/*
    750 	 * TSS for T_DBLFLT (double fault) handler
    751 	 */
    752 	set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
    753 	    sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
    754 
    755 	/*
    756 	 * TSS for kernel
    757 	 */
    758 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
    759 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
    760 
    761 #endif	/* !__xpv */
    762 
    763 	/*
    764 	 * %gs selector for kernel
    765 	 */
    766 	set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
    767 	    SEL_KPL, SDP_BYTES, SDP_OP32);
    768 
    769 	/*
    770 	 * Initialize lwp private descriptors.
    771 	 * Only attributes and limits are initialized, the effective
    772 	 * base address is programmed via fsbase/gsbase.
    773 	 */
    774 	set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
    775 	    SDP_PAGES, SDP_OP32);
    776 	set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
    777 	    SDP_PAGES, SDP_OP32);
    778 
    779 	/*
    780 	 * Initialize the descriptors set aside for brand usage.
    781 	 * Only attributes and limits are initialized.
    782 	 */
    783 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
    784 		set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
    785 		    SDP_PAGES, SDP_OP32);
    786 	/*
    787 	 * Initialize convenient zero base user descriptor for clearing
    788 	 * lwp  private %fs and %gs descriptors in GDT. See setregs() for
    789 	 * an example.
    790 	 */
    791 	set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
    792 	    SDP_BYTES, SDP_OP32);
    793 }
    794 
    795 #if defined(__xpv)
    796 
    797 static user_desc_t *
    798 init_gdt(void)
    799 {
    800 	uint64_t gdtpa;
    801 	ulong_t ma[1];		/* XXPV should be a memory_t */
    802 
    803 #if !defined(__lint)
    804 	/*
    805 	 * Our gdt is never larger than a single page.
    806 	 */
    807 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
    808 #endif
    809 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
    810 	    PAGESIZE, PAGESIZE);
    811 	bzero(gdt0, PAGESIZE);
    812 
    813 	init_gdt_common(gdt0);
    814 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
    815 
    816 	/*
    817 	 * XXX Since we never invoke kmdb until after the kernel takes
    818 	 * over the descriptor tables why not have it use the kernel's
    819 	 * selectors?
    820 	 */
    821 	if (boothowto & RB_DEBUG) {
    822 		set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
    823 		    SDP_PAGES, SDP_OP32);
    824 		set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
    825 		    SDP_PAGES, SDP_OP32);
    826 	}
    827 
    828 	/*
    829 	 * Clear write permission for page containing the gdt and install it.
    830 	 */
    831 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
    832 	kbm_read_only((uintptr_t)gdt0, gdtpa);
    833 	xen_set_gdt(ma, NGDT);
    834 
    835 	/*
    836 	 * Reload the segment registers to use the new GDT
    837 	 */
    838 	load_segment_registers(
    839 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
    840 
    841 	return (gdt0);
    842 }
    843 
    844 #else	/* __xpv */
    845 
    846 static user_desc_t *
    847 init_gdt(void)
    848 {
    849 	desctbr_t	r_bgdt, r_gdt;
    850 	user_desc_t	*bgdt;
    851 
    852 #if !defined(__lint)
    853 	/*
    854 	 * Our gdt is never larger than a single page.
    855 	 */
    856 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
    857 #endif
    858 	/*
    859 	 * XXX this allocation belongs in our caller, not here.
    860 	 */
    861 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
    862 	    PAGESIZE, PAGESIZE);
    863 	bzero(gdt0, PAGESIZE);
    864 
    865 	init_gdt_common(gdt0);
    866 
    867 	/*
    868 	 * Copy in from boot's gdt to our gdt entries.
    869 	 * Entry 0 is null descriptor by definition.
    870 	 */
    871 	rd_gdtr(&r_bgdt);
    872 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
    873 	if (bgdt == NULL)
    874 		panic("null boot gdt");
    875 
    876 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
    877 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
    878 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
    879 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
    880 
    881 	/*
    882 	 * Install our new GDT
    883 	 */
    884 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
    885 	r_gdt.dtr_base = (uintptr_t)gdt0;
    886 	wr_gdtr(&r_gdt);
    887 
    888 	/*
    889 	 * Reload the segment registers to use the new GDT
    890 	 */
    891 	load_segment_registers(
    892 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
    893 
    894 	return (gdt0);
    895 }
    896 
    897 #endif	/* __xpv */
    898 #endif	/* __i386 */
    899 
    900 /*
    901  * Build kernel IDT.
    902  *
    903  * Note that for amd64 we pretty much require every gate to be an interrupt
    904  * gate which blocks interrupts atomically on entry; that's because of our
    905  * dependency on using 'swapgs' every time we come into the kernel to find
    906  * the cpu structure. If we get interrupted just before doing that, %cs could
    907  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
    908  * %gsbase is really still pointing at something in userland. Bad things will
    909  * ensue. We also use interrupt gates for i386 as well even though this is not
    910  * required for some traps.
    911  *
    912  * Perhaps they should have invented a trap gate that does an atomic swapgs?
    913  */
    914 static void
    915 init_idt_common(gate_desc_t *idt)
    916 {
    917 	set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
    918 	    0);
    919 	set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
    920 	    0);
    921 	set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
    922 	    0);
    923 	set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
    924 	    0);
    925 	set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
    926 	    0);
    927 	set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
    928 	    TRP_KPL, 0);
    929 	set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
    930 	    0);
    931 	set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
    932 	    0);
    933 
    934 	/*
    935 	 * double fault handler.
    936 	 *
    937 	 * Note that on the hypervisor a guest does not receive #df faults.
    938 	 * Instead a failsafe event is injected into the guest if its selectors
    939 	 * and/or stack is in a broken state. See xen_failsafe_callback.
    940 	 */
    941 #if !defined(__xpv)
    942 #if defined(__amd64)
    943 
    944 	set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
    945 	    T_DBLFLT);
    946 
    947 #elif defined(__i386)
    948 
    949 	/*
    950 	 * task gate required.
    951 	 */
    952 	set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
    953 	    0);
    954 
    955 #endif	/* __i386 */
    956 #endif	/* !__xpv */
    957 
    958 	/*
    959 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
    960 	 */
    961 
    962 	set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
    963 	    0);
    964 	set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
    965 	    0);
    966 	set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
    967 	set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
    968 	set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
    969 	set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
    970 	    0);
    971 	set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
    972 	    TRP_KPL, 0);
    973 	set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
    974 	set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
    975 
    976 	/*
    977 	 * install "int80" handler at, well, 0x80.
    978 	 */
    979 	set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
    980 	    0);
    981 
    982 	/*
    983 	 * install fast trap handler at 210.
    984 	 */
    985 	set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
    986 	    0);
    987 
    988 	/*
    989 	 * System call handler.
    990 	 */
    991 #if defined(__amd64)
    992 	set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
    993 	    TRP_UPL, 0);
    994 
    995 #elif defined(__i386)
    996 	set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
    997 	    TRP_UPL, 0);
    998 #endif	/* __i386 */
    999 
   1000 	/*
   1001 	 * Install the DTrace interrupt handler for the pid provider.
   1002 	 */
   1003 	set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
   1004 	    SDT_SYSIGT, TRP_UPL, 0);
   1005 
   1006 	/*
   1007 	 * Prepare interposing descriptors for the branded "int80"
   1008 	 * and syscall handlers and cache copies of the default
   1009 	 * descriptors.
   1010 	 */
   1011 	brand_tbl[0].ih_inum = T_INT80;
   1012 	brand_tbl[0].ih_default_desc = idt0[T_INT80];
   1013 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
   1014 	    SDT_SYSIGT, TRP_UPL, 0);
   1015 
   1016 	brand_tbl[1].ih_inum = T_SYSCALLINT;
   1017 	brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
   1018 
   1019 #if defined(__amd64)
   1020 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
   1021 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
   1022 #elif defined(__i386)
   1023 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
   1024 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
   1025 #endif	/* __i386 */
   1026 
   1027 	brand_tbl[2].ih_inum = 0;
   1028 }
   1029 
   1030 #if defined(__xpv)
   1031 
   1032 static void
   1033 init_idt(gate_desc_t *idt)
   1034 {
   1035 	init_idt_common(idt);
   1036 }
   1037 
   1038 #else	/* __xpv */
   1039 
   1040 static void
   1041 init_idt(gate_desc_t *idt)
   1042 {
   1043 	char	ivctname[80];
   1044 	void	(*ivctptr)(void);
   1045 	int	i;
   1046 
   1047 	/*
   1048 	 * Initialize entire table with 'reserved' trap and then overwrite
   1049 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
   1050 	 * since it can only be generated on a 386 processor. 15 is also
   1051 	 * unsupported and reserved.
   1052 	 */
   1053 	for (i = 0; i < NIDT; i++)
   1054 		set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
   1055 		    0);
   1056 
   1057 	/*
   1058 	 * 20-31 reserved
   1059 	 */
   1060 	for (i = 20; i < 32; i++)
   1061 		set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
   1062 		    0);
   1063 
   1064 	/*
   1065 	 * interrupts 32 - 255
   1066 	 */
   1067 	for (i = 32; i < 256; i++) {
   1068 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
   1069 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
   1070 		if (ivctptr == NULL)
   1071 			panic("kobj_getsymvalue(%s) failed", ivctname);
   1072 
   1073 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
   1074 	}
   1075 
   1076 	/*
   1077 	 * Now install the common ones. Note that it will overlay some
   1078 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
   1079 	 */
   1080 	init_idt_common(idt);
   1081 }
   1082 
   1083 #endif	/* __xpv */
   1084 
   1085 /*
   1086  * The kernel does not deal with LDTs unless a user explicitly creates
   1087  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
   1088  * to reference the LDT will therefore cause a #gp. System calls made via the
   1089  * obsolete lcall mechanism are emulated by the #gp fault handler.
   1090  */
   1091 static void
   1092 init_ldt(void)
   1093 {
   1094 #if defined(__xpv)
   1095 	xen_set_ldt(NULL, 0);
   1096 #else
   1097 	wr_ldtr(0);
   1098 #endif
   1099 }
   1100 
   1101 #if !defined(__xpv)
   1102 #if defined(__amd64)
   1103 
   1104 static void
   1105 init_tss(void)
   1106 {
   1107 	/*
   1108 	 * tss_rsp0 is dynamically filled in by resume() on each context switch.
   1109 	 * All exceptions but #DF will run on the thread stack.
   1110 	 * Set up the double fault stack here.
   1111 	 */
   1112 	ktss0->tss_ist1 =
   1113 	    (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
   1114 
   1115 	/*
   1116 	 * Set I/O bit map offset equal to size of TSS segment limit
   1117 	 * for no I/O permission map. This will force all user I/O
   1118 	 * instructions to generate #gp fault.
   1119 	 */
   1120 	ktss0->tss_bitmapbase = sizeof (*ktss0);
   1121 
   1122 	/*
   1123 	 * Point %tr to descriptor for ktss0 in gdt.
   1124 	 */
   1125 	wr_tsr(KTSS_SEL);
   1126 }
   1127 
   1128 #elif defined(__i386)
   1129 
   1130 static void
   1131 init_tss(void)
   1132 {
   1133 	/*
   1134 	 * ktss0->tss_esp dynamically filled in by resume() on each
   1135 	 * context switch.
   1136 	 */
   1137 	ktss0->tss_ss0	= KDS_SEL;
   1138 	ktss0->tss_eip	= (uint32_t)_start;
   1139 	ktss0->tss_ds	= ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
   1140 	ktss0->tss_cs	= KCS_SEL;
   1141 	ktss0->tss_fs	= KFS_SEL;
   1142 	ktss0->tss_gs	= KGS_SEL;
   1143 	ktss0->tss_ldt	= ULDT_SEL;
   1144 
   1145 	/*
   1146 	 * Initialize double fault tss.
   1147 	 */
   1148 	dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
   1149 	dftss0->tss_ss0	= KDS_SEL;
   1150 
   1151 	/*
   1152 	 * tss_cr3 will get initialized in hat_kern_setup() once our page
   1153 	 * tables have been setup.
   1154 	 */
   1155 	dftss0->tss_eip	= (uint32_t)syserrtrap;
   1156 	dftss0->tss_esp	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
   1157 	dftss0->tss_cs	= KCS_SEL;
   1158 	dftss0->tss_ds	= KDS_SEL;
   1159 	dftss0->tss_es	= KDS_SEL;
   1160 	dftss0->tss_ss	= KDS_SEL;
   1161 	dftss0->tss_fs	= KFS_SEL;
   1162 	dftss0->tss_gs	= KGS_SEL;
   1163 
   1164 	/*
   1165 	 * Set I/O bit map offset equal to size of TSS segment limit
   1166 	 * for no I/O permission map. This will force all user I/O
   1167 	 * instructions to generate #gp fault.
   1168 	 */
   1169 	ktss0->tss_bitmapbase = sizeof (*ktss0);
   1170 
   1171 	/*
   1172 	 * Point %tr to descriptor for ktss0 in gdt.
   1173 	 */
   1174 	wr_tsr(KTSS_SEL);
   1175 }
   1176 
   1177 #endif	/* __i386 */
   1178 #endif	/* !__xpv */
   1179 
   1180 #if defined(__xpv)
   1181 
   1182 void
   1183 init_desctbls(void)
   1184 {
   1185 	uint_t vec;
   1186 	user_desc_t *gdt;
   1187 
   1188 	/*
   1189 	 * Setup and install our GDT.
   1190 	 */
   1191 	gdt = init_gdt();
   1192 
   1193 	/*
   1194 	 * Store static pa of gdt to speed up pa_to_ma() translations
   1195 	 * on lwp context switches.
   1196 	 */
   1197 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
   1198 	CPU->cpu_gdt = gdt;
   1199 	CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
   1200 
   1201 	/*
   1202 	 * Setup and install our IDT.
   1203 	 */
   1204 #if !defined(__lint)
   1205 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
   1206 #endif
   1207 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
   1208 	    PAGESIZE, PAGESIZE);
   1209 	bzero(idt0, PAGESIZE);
   1210 	init_idt(idt0);
   1211 	for (vec = 0; vec < NIDT; vec++)
   1212 		xen_idt_write(&idt0[vec], vec);
   1213 
   1214 	CPU->cpu_idt = idt0;
   1215 
   1216 	/*
   1217 	 * set default kernel stack
   1218 	 */
   1219 	xen_stack_switch(KDS_SEL,
   1220 	    (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
   1221 
   1222 	xen_init_callbacks();
   1223 
   1224 	init_ldt();
   1225 }
   1226 
   1227 #else	/* __xpv */
   1228 
   1229 void
   1230 init_desctbls(void)
   1231 {
   1232 	user_desc_t *gdt;
   1233 	desctbr_t idtr;
   1234 
   1235 	/*
   1236 	 * Allocate IDT and TSS structures on unique pages for better
   1237 	 * performance in virtual machines.
   1238 	 */
   1239 #if !defined(__lint)
   1240 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
   1241 #endif
   1242 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
   1243 	    PAGESIZE, PAGESIZE);
   1244 	bzero(idt0, PAGESIZE);
   1245 #if !defined(__lint)
   1246 	ASSERT(sizeof (*ktss0) <= PAGESIZE);
   1247 #endif
   1248 	ktss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
   1249 	    PAGESIZE, PAGESIZE);
   1250 	bzero(ktss0, PAGESIZE);
   1251 
   1252 #if defined(__i386)
   1253 #if !defined(__lint)
   1254 	ASSERT(sizeof (*dftss0) <= PAGESIZE);
   1255 #endif
   1256 	dftss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
   1257 	    PAGESIZE, PAGESIZE);
   1258 	bzero(dftss0, PAGESIZE);
   1259 #endif
   1260 
   1261 	/*
   1262 	 * Setup and install our GDT.
   1263 	 */
   1264 	gdt = init_gdt();
   1265 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
   1266 	CPU->cpu_gdt = gdt;
   1267 
   1268 	/*
   1269 	 * Setup and install our IDT.
   1270 	 */
   1271 	init_idt(idt0);
   1272 
   1273 	idtr.dtr_base = (uintptr_t)idt0;
   1274 	idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
   1275 	wr_idtr(&idtr);
   1276 	CPU->cpu_idt = idt0;
   1277 
   1278 #if defined(__i386)
   1279 	/*
   1280 	 * We maintain a description of idt0 in convenient IDTR format
   1281 	 * for #pf's on some older pentium processors. See pentium_pftrap().
   1282 	 */
   1283 	idt0_default_r = idtr;
   1284 #endif	/* __i386 */
   1285 
   1286 	init_tss();
   1287 	CPU->cpu_tss = ktss0;
   1288 	init_ldt();
   1289 }
   1290 
   1291 #endif	/* __xpv */
   1292 
   1293 /*
   1294  * In the early kernel, we need to set up a simple GDT to run on.
   1295  *
   1296  * XXPV	Can dboot use this too?  See dboot_gdt.s
   1297  */
   1298 void
   1299 init_boot_gdt(user_desc_t *bgdt)
   1300 {
   1301 #if defined(__amd64)
   1302 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
   1303 	    SDP_PAGES, SDP_OP32);
   1304 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
   1305 	    SDP_PAGES, SDP_OP32);
   1306 #elif defined(__i386)
   1307 	set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
   1308 	    SDP_PAGES, SDP_OP32);
   1309 	set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
   1310 	    SDP_PAGES, SDP_OP32);
   1311 #endif	/* __i386 */
   1312 }
   1313 
   1314 /*
   1315  * Enable interpositioning on the system call path by rewriting the
   1316  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
   1317  * the branded entry points.
   1318  */
   1319 void
   1320 brand_interpositioning_enable(void)
   1321 {
   1322 	gate_desc_t	*idt = CPU->cpu_idt;
   1323 	int 		i;
   1324 
   1325 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
   1326 
   1327 	for (i = 0; brand_tbl[i].ih_inum; i++) {
   1328 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
   1329 #if defined(__xpv)
   1330 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
   1331 		    brand_tbl[i].ih_inum);
   1332 #endif
   1333 	}
   1334 
   1335 #if defined(__amd64)
   1336 #if defined(__xpv)
   1337 
   1338 	/*
   1339 	 * Currently the hypervisor only supports 64-bit syscalls via
   1340 	 * syscall instruction. The 32-bit syscalls are handled by
   1341 	 * interrupt gate above.
   1342 	 */
   1343 	xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
   1344 	    CALLBACKF_mask_events);
   1345 
   1346 #else
   1347 
   1348 	if (x86_feature & X86_ASYSC) {
   1349 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
   1350 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
   1351 	}
   1352 
   1353 #endif
   1354 #endif	/* __amd64 */
   1355 
   1356 	if (x86_feature & X86_SEP)
   1357 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
   1358 }
   1359 
   1360 /*
   1361  * Disable interpositioning on the system call path by rewriting the
   1362  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
   1363  * the standard entry points, which bypass the interpositioning hooks.
   1364  */
   1365 void
   1366 brand_interpositioning_disable(void)
   1367 {
   1368 	gate_desc_t	*idt = CPU->cpu_idt;
   1369 	int i;
   1370 
   1371 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
   1372 
   1373 	for (i = 0; brand_tbl[i].ih_inum; i++) {
   1374 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
   1375 #if defined(__xpv)
   1376 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
   1377 		    brand_tbl[i].ih_inum);
   1378 #endif
   1379 	}
   1380 
   1381 #if defined(__amd64)
   1382 #if defined(__xpv)
   1383 
   1384 	/*
   1385 	 * See comment above in brand_interpositioning_enable.
   1386 	 */
   1387 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
   1388 	    CALLBACKF_mask_events);
   1389 
   1390 #else
   1391 
   1392 	if (x86_feature & X86_ASYSC) {
   1393 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
   1394 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
   1395 	}
   1396 
   1397 #endif
   1398 #endif	/* __amd64 */
   1399 
   1400 	if (x86_feature & X86_SEP)
   1401 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
   1402 }
   1403