1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 29 /* All Rights Reserved */ 30 31 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 32 /* All Rights Reserved */ 33 34 35 #include <sys/asm_linkage.h> 36 #include <sys/asm_misc.h> 37 #include <sys/regset.h> 38 #include <sys/privregs.h> 39 #include <sys/psw.h> 40 #include <sys/reboot.h> 41 #include <sys/x86_archext.h> 42 #include <sys/machparam.h> 43 44 #if defined(__lint) 45 46 #include <sys/types.h> 47 #include <sys/thread.h> 48 #include <sys/systm.h> 49 #include <sys/lgrp.h> 50 #include <sys/regset.h> 51 #include <sys/link.h> 52 #include <sys/bootconf.h> 53 #include <sys/bootsvcs.h> 54 55 #else /* __lint */ 56 57 #include <sys/segments.h> 58 #include <sys/pcb.h> 59 #include <sys/trap.h> 60 #include <sys/ftrace.h> 61 #include <sys/traptrace.h> 62 #include <sys/clock.h> 63 #include <sys/cmn_err.h> 64 #include <sys/pit.h> 65 #include <sys/panic.h> 66 67 #if defined(__xpv) 68 #include <sys/hypervisor.h> 69 #endif 70 71 #include "assym.h" 72 73 /* 74 * Our assumptions: 75 * - We are running in protected-paged mode. 76 * - Interrupts are disabled. 77 * - The GDT and IDT are the callers; we need our copies. 78 * - The kernel's text, initialized data and bss are mapped. 79 * 80 * Our actions: 81 * - Save arguments 82 * - Initialize our stack pointer to the thread 0 stack (t0stack) 83 * and leave room for a phony "struct regs". 84 * - Our GDT and IDT need to get munged. 85 * - Since we are using the boot's GDT descriptors, we need 86 * to copy them into our GDT before we switch to ours. 87 * - We start using our GDT by loading correct values in the 88 * selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL, 89 * gs=KGS_SEL). 90 * - The default LDT entry for syscall is set. 91 * - We load the default LDT into the hardware LDT register. 92 * - We load the default TSS into the hardware task register. 93 * - Check for cpu type, i.e. 486 vs. P5 vs. P6 etc. 94 * - mlsetup(%esp) gets called. 95 * - We change our appearance to look like the real thread 0. 96 * (NOTE: making ourselves to be a real thread may be a noop) 97 * - main() gets called. (NOTE: main() never returns). 98 * 99 * NOW, the real code! 100 */ 101 /* 102 * The very first thing in the kernel's text segment must be a jump 103 * to the os/fakebop.c startup code. 104 */ 105 .text 106 jmp _start 107 108 /* 109 * Globals: 110 */ 111 .globl _locore_start 112 .globl mlsetup 113 .globl main 114 .globl panic 115 .globl t0stack 116 .globl t0 117 .globl sysp 118 .globl edata 119 120 /* 121 * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h) 122 */ 123 .globl bootops 124 .globl bootopsp 125 126 /* 127 * NOTE: t0stack should be the first thing in the data section so that 128 * if it ever overflows, it will fault on the last kernel text page. 129 */ 130 .data 131 .comm t0stack, DEFAULTSTKSZ, 32 132 .comm t0, 4094, 32 133 134 #endif /* __lint */ 135 136 137 #if defined(__amd64) 138 139 #if defined(__lint) 140 141 /* ARGSUSED */ 142 void 143 _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop) 144 {} 145 146 #else /* __lint */ 147 148 /* 149 * kobj_init() vectors us back to here with (note) a slightly different 150 * set of arguments than _start is given (see lint prototypes above). 151 * 152 * XXX Make this less vile, please. 153 */ 154 ENTRY_NP(_locore_start) 155 156 /* 157 * %rdi = boot services (should die someday) 158 * %rdx = bootops 159 * end 160 */ 161 162 leaq edata(%rip), %rbp /* reference edata for ksyms */ 163 movq $0, (%rbp) /* limit stack back trace */ 164 165 /* 166 * Initialize our stack pointer to the thread 0 stack (t0stack) 167 * and leave room for a "struct regs" for lwp0. Note that the 168 * stack doesn't actually align to a 16-byte boundary until just 169 * before we call mlsetup because we want to use %rsp to point at 170 * our regs structure. 171 */ 172 leaq t0stack(%rip), %rsp 173 addq $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp 174 #if (REGSIZE & 15) == 0 175 subq $8, %rsp 176 #endif 177 /* 178 * Save call back for special x86 boot services vector 179 */ 180 movq %rdi, sysp(%rip) 181 182 movq %rdx, bootops(%rip) /* save bootops */ 183 movq $bootops, bootopsp(%rip) 184 185 /* 186 * Save arguments and flags, if only for debugging .. 187 */ 188 movq %rdi, REGOFF_RDI(%rsp) 189 movq %rsi, REGOFF_RSI(%rsp) 190 movq %rdx, REGOFF_RDX(%rsp) 191 movq %rcx, REGOFF_RCX(%rsp) 192 movq %r8, REGOFF_R8(%rsp) 193 movq %r9, REGOFF_R9(%rsp) 194 pushf 195 popq %r11 196 movq %r11, REGOFF_RFL(%rsp) 197 198 #if !defined(__xpv) 199 /* 200 * Enable write protect and alignment check faults. 201 */ 202 movq %cr0, %rax 203 orq $_CONST(CR0_WP|CR0_AM), %rax 204 andq $_BITNOT(CR0_WT|CR0_CE), %rax 205 movq %rax, %cr0 206 #endif /* __xpv */ 207 208 /* 209 * (We just assert this works by virtue of being here) 210 */ 211 orl $X86_CPUID, x86_feature(%rip) 212 213 /* 214 * mlsetup() gets called with a struct regs as argument, while 215 * main takes no args and should never return. 216 */ 217 xorl %ebp, %ebp 218 movq %rsp, %rdi 219 pushq %rbp 220 /* (stack pointer now aligned on 16-byte boundary right here) */ 221 movq %rsp, %rbp 222 call mlsetup 223 call main 224 /* NOTREACHED */ 225 leaq __return_from_main(%rip), %rdi 226 xorl %eax, %eax 227 call panic 228 SET_SIZE(_locore_start) 229 230 #endif /* __amd64 */ 231 #endif /* __lint */ 232 233 #if !defined(__lint) 234 235 __return_from_main: 236 .string "main() returned" 237 __unsupported_cpu: 238 .string "486 style cpu detected - no longer supported!" 239 240 #endif /* !__lint */ 241 242 #if !defined(__amd64) 243 244 #if defined(__lint) 245 246 /* ARGSUSED */ 247 void 248 _locore_start(struct boot_syscalls *sysp, struct bootops *bop) 249 {} 250 251 #else /* __lint */ 252 253 /* 254 * kobj_init() vectors us back to here with (note) a slightly different 255 * set of arguments than _start is given (see lint prototypes above). 256 * 257 * XXX Make this less vile, please. 258 */ 259 ENTRY_NP(_locore_start) 260 261 /* 262 * %ecx = boot services (should die someday) 263 * %ebx = bootops 264 */ 265 mov $edata, %ebp / edata needs to be defined for ksyms 266 movl $0, (%ebp) / limit stack back trace 267 268 /* 269 * Initialize our stack pointer to the thread 0 stack (t0stack) 270 * and leave room for a phony "struct regs". 271 */ 272 movl $t0stack + DEFAULTSTKSZ - REGSIZE, %esp 273 274 /* 275 * Save call back for special x86 boot services vector 276 */ 277 mov %ecx, sysp / save call back for boot services 278 279 mov %ebx, bootops / save bootops 280 movl $bootops, bootopsp 281 282 283 /* 284 * Save all registers and flags 285 */ 286 pushal 287 pushfl 288 289 #if !defined(__xpv) 290 /* 291 * Override bios settings and enable write protect and 292 * alignment check faults. 293 */ 294 movl %cr0, %eax 295 296 /* 297 * enable WP for detecting faults, and enable alignment checking. 298 */ 299 orl $_CONST(CR0_WP|CR0_AM), %eax 300 andl $_BITNOT(CR0_WT|CR0_CE), %eax 301 movl %eax, %cr0 / set the cr0 register correctly and 302 / override the BIOS setup 303 304 /* 305 * If bit 21 of eflags can be flipped, then cpuid is present 306 * and enabled. 307 */ 308 pushfl 309 popl %ecx 310 movl %ecx, %eax 311 xorl $PS_ID, %eax / try complemented bit 312 pushl %eax 313 popfl 314 pushfl 315 popl %eax 316 cmpl %eax, %ecx 317 jne have_cpuid 318 319 /* 320 * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test 321 * div does not modify the cc flags on Cyrix, even though this may 322 * also be true for other vendors, this is generally true only for 323 * newer models from those vendors that support and do not disable 324 * cpuid (usually because cpuid cannot be disabled) 325 */ 326 327 /* 328 * clear cc flags 329 */ 330 xorb %ah, %ah 331 sahf 332 333 /* 334 * perform 5/2 test 335 */ 336 movw $5, %ax 337 movb $2, %bl 338 divb %bl 339 340 lahf 341 cmpb $2, %ah 342 jne cpu_486 343 344 /* 345 * div did not modify the cc flags, chances are the vendor is Cyrix 346 * assume the vendor is Cyrix and use the CCR's to enable cpuid 347 */ 348 .set CYRIX_CRI, 0x22 / CR Index Register 349 .set CYRIX_CRD, 0x23 / CR Data Register 350 351 .set CYRIX_CCR3, 0xc3 / Config Control Reg 3 352 .set CYRIX_CCR4, 0xe8 / Config Control Reg 4 353 .set CYRIX_DIR0, 0xfe / Device Identification Reg 0 354 .set CYRIX_DIR1, 0xff / Device Identification Reg 1 355 356 /* 357 * even if the cpu vendor is Cyrix and the motherboard/chipset 358 * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port 359 * 0x21 corresponds with 0x23 and since 0x22 is still untouched, 360 * the reads and writes of 0x21 are guaranteed to be off-chip of 361 * the cpu 362 */ 363 364 /* 365 * enable read of ISR at I/O port 0x20 366 */ 367 movb $0xb, %al 368 outb $MCMD_PORT 369 370 /* 371 * read IMR and store in %bl 372 */ 373 inb $MIMR_PORT 374 movb %al, %bl 375 376 /* 377 * mask out all interrupts so that ISR will not change 378 */ 379 movb $0xff, %al 380 outb $MIMR_PORT 381 382 /* 383 * reads of I/O port 0x22 on Cyrix are always directed off-chip 384 * make use of I/O pull-up to test for an unknown device on 0x22 385 */ 386 inb $CYRIX_CRI 387 cmpb $0xff, %al 388 je port_22_free 389 390 /* 391 * motherboard/chipset vendor may be ignoring line A1 of I/O address 392 */ 393 movb %al, %cl 394 395 /* 396 * if the ISR and the value read from 0x22 do not match then we have 397 * detected some unknown device, probably a chipset, at 0x22 398 */ 399 inb $MCMD_PORT 400 cmpb %al, %cl 401 jne restore_IMR 402 403 port_22_free: 404 /* 405 * now test to see if some unknown device is using I/O port 0x23 406 * 407 * read the external I/O port at 0x23 408 */ 409 inb $CYRIX_CRD 410 411 /* 412 * Test for pull-up at 0x23 or if I/O address line A1 is being ignored. 413 * IMR is 0xff so both tests are performed simultaneously. 414 */ 415 cmpb $0xff, %al 416 jne restore_IMR 417 418 /* 419 * We are a Cyrix part. In case we are some model of Cx486 or a Cx586, 420 * record the type and fix it later if not. 421 */ 422 movl $X86_VENDOR_Cyrix, x86_vendor 423 movl $X86_TYPE_CYRIX_486, x86_type 424 425 /* 426 * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3. 427 * 428 * load CCR3 index into CCR index register 429 */ 430 431 movb $CYRIX_CCR3, %al 432 outb $CYRIX_CRI 433 434 /* 435 * If we are not a Cyrix cpu, then we have performed an external I/O 436 * cycle. If the CCR index was not valid for this Cyrix model, we may 437 * have performed an external I/O cycle as well. In these cases and 438 * if the motherboard/chipset vendor ignores I/O address line A1, 439 * then the PIC will have IRQ3 set at the lowest priority as a side 440 * effect of the above outb. We are reasonalbly confident that there 441 * is not an unknown device on I/O port 0x22, so there should have been 442 * no unpredictable side-effect of the above outb. 443 */ 444 445 /* 446 * read CCR3 447 */ 448 inb $CYRIX_CRD 449 450 /* 451 * If we are not a Cyrix cpu the inb above produced an external I/O 452 * cycle. If we are a Cyrix model that does not support CCR3 wex 453 * produced an external I/O cycle. In all known Cyrix models 6x86 and 454 * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all 455 * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are 456 * reserved as well. It is highly unlikely that CCR3 contains the value 457 * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and 458 * deduce we are not a Cyrix with support for cpuid if so. 459 */ 460 cmpb $0xff, %al 461 je restore_PIC 462 463 /* 464 * There exist 486 ISA Cyrix chips that support CCR3 but do not support 465 * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O 466 * cycles, the exact behavior is model specific and undocumented. 467 * Unfortunately these external I/O cycles may confuse some PIC's beyond 468 * recovery. Fortunatetly we can use the following undocumented trick: 469 * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported. 470 * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed 471 * to work on all Cyrix cpu's which support cpuid. 472 */ 473 movb %al, %dl 474 xorb $0x10, %dl 475 movb %al, %cl 476 477 /* 478 * write back CRR3 with toggled bit 4 to CCR3 479 */ 480 movb $CYRIX_CCR3, %al 481 outb $CYRIX_CRI 482 483 movb %dl, %al 484 outb $CYRIX_CRD 485 486 /* 487 * read CCR3 488 */ 489 movb $CYRIX_CCR3, %al 490 outb $CYRIX_CRI 491 inb $CYRIX_CRD 492 movb %al, %dl 493 494 /* 495 * restore CCR3 496 */ 497 movb $CYRIX_CCR3, %al 498 outb $CYRIX_CRI 499 500 movb %cl, %al 501 outb $CYRIX_CRD 502 503 /* 504 * if bit 4 was not toggled DIR0 and DIR1 are not supported in which 505 * case we do not have cpuid anyway 506 */ 507 andb $0x10, %al 508 andb $0x10, %dl 509 cmpb %al, %dl 510 je restore_PIC 511 512 /* 513 * read DIR0 514 */ 515 movb $CYRIX_DIR0, %al 516 outb $CYRIX_CRI 517 inb $CYRIX_CRD 518 519 /* 520 * test for pull-up 521 */ 522 cmpb $0xff, %al 523 je restore_PIC 524 525 /* 526 * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for 527 * future use. If Cyrix ever produces a cpu that supports cpuid with 528 * these ids, the following test will have to change. For now we remain 529 * pessimistic since the formats of the CRR's may be different then. 530 * 531 * test for at least a 6x86, to see if we support both MAPEN and CPUID 532 */ 533 cmpb $0x30, %al 534 jb restore_IMR 535 536 /* 537 * enable MAPEN 538 */ 539 movb $CYRIX_CCR3, %al 540 outb $CYRIX_CRI 541 542 andb $0xf, %cl 543 movb %cl, %al 544 orb $0x10, %al 545 outb $CYRIX_CRD 546 547 /* 548 * select CCR4 549 */ 550 movb $CYRIX_CCR4, %al 551 outb $CYRIX_CRI 552 553 /* 554 * read CCR4 555 */ 556 inb $CYRIX_CRD 557 558 /* 559 * enable cpuid 560 */ 561 orb $0x80, %al 562 movb %al, %dl 563 564 /* 565 * select CCR4 566 */ 567 movb $CYRIX_CCR4, %al 568 outb $CYRIX_CRI 569 570 /* 571 * write CCR4 572 */ 573 movb %dl, %al 574 outb $CYRIX_CRD 575 576 /* 577 * select CCR3 578 */ 579 movb $CYRIX_CCR3, %al 580 outb $CYRIX_CRI 581 582 /* 583 * disable MAPEN and write CCR3 584 */ 585 movb %cl, %al 586 outb $CYRIX_CRD 587 588 /* 589 * restore IMR 590 */ 591 movb %bl, %al 592 outb $MIMR_PORT 593 594 /* 595 * test to see if cpuid available 596 */ 597 pushfl 598 popl %ecx 599 movl %ecx, %eax 600 xorl $PS_ID, %eax / try complemented bit 601 pushl %eax 602 popfl 603 pushfl 604 popl %eax 605 cmpl %eax, %ecx 606 jne have_cpuid 607 jmp cpu_486 608 609 restore_PIC: 610 /* 611 * In case the motherboard/chipset vendor is ignoring line A1 of the 612 * I/O address, we set the PIC priorities to sane values. 613 */ 614 movb $0xc7, %al / irq 7 lowest priority 615 outb $MCMD_PORT 616 617 restore_IMR: 618 movb %bl, %al 619 outb $MIMR_PORT 620 jmp cpu_486 621 622 have_cpuid: 623 /* 624 * cpuid instruction present 625 */ 626 orl $X86_CPUID, x86_feature 627 movl $0, %eax 628 cpuid 629 630 movl %ebx, cpu_vendor 631 movl %edx, cpu_vendor+4 632 movl %ecx, cpu_vendor+8 633 634 /* 635 * early cyrix cpus are somewhat strange and need to be 636 * probed in curious ways to determine their identity 637 */ 638 639 leal cpu_vendor, %esi 640 leal CyrixInstead, %edi 641 movl $12, %ecx 642 repz 643 cmpsb 644 je vendor_is_cyrix 645 646 / let mlsetup()/cpuid_pass1() handle everything else in C 647 648 jmp cpu_done 649 650 is486: 651 /* 652 * test to see if a useful cpuid 653 */ 654 testl %eax, %eax 655 jz isa486 656 657 movl $1, %eax 658 cpuid 659 660 movl %eax, %ebx 661 andl $0xF00, %ebx 662 cmpl $0x400, %ebx 663 je isa486 664 665 rep; ret /* use 2 byte return instruction */ 666 /* AMD Software Optimization Guide - Section 6.2 */ 667 isa486: 668 /* 669 * lose the return address 670 */ 671 popl %eax 672 jmp cpu_486 673 674 vendor_is_cyrix: 675 call is486 676 677 /* 678 * Processor signature and feature flags for Cyrix are insane. 679 * BIOS can play with semi-documented registers, so cpuid must be used 680 * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1 681 * Keep the family in %ebx and feature flags in %edx until not needed 682 */ 683 684 /* 685 * read DIR0 686 */ 687 movb $CYRIX_DIR0, %al 688 outb $CYRIX_CRI 689 inb $CYRIX_CRD 690 691 /* 692 * First we handle the cases where we are a 6x86 or 6x86L. 693 * The 6x86 is basically a 486, the only reliable bit in the 694 * feature flags is for FPU. The 6x86L is better, unfortunately 695 * there is no really good way to distinguish between these two 696 * cpu's. We are pessimistic and when in doubt assume 6x86. 697 */ 698 699 cmpb $0x40, %al 700 jae maybeGX 701 702 /* 703 * We are an M1, either a 6x86 or 6x86L. 704 */ 705 cmpb $0x30, %al 706 je maybe6x86L 707 cmpb $0x31, %al 708 je maybe6x86L 709 cmpb $0x34, %al 710 je maybe6x86L 711 cmpb $0x35, %al 712 je maybe6x86L 713 714 /* 715 * although it is possible that we are a 6x86L, the cpu and 716 * documentation are so buggy, we just do not care. 717 */ 718 jmp likely6x86 719 720 maybe6x86L: 721 /* 722 * read DIR1 723 */ 724 movb $CYRIX_DIR1, %al 725 outb $CYRIX_CRI 726 inb $CYRIX_CRD 727 cmpb $0x22, %al 728 jb likely6x86 729 730 /* 731 * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags 732 */ 733 movl $X86_TYPE_CYRIX_6x86L, x86_type 734 jmp coma_bug 735 736 likely6x86: 737 /* 738 * We are likely a 6x86, or a 6x86L without a way of knowing 739 * 740 * The 6x86 has NO Pentium or Pentium Pro compatible features even 741 * though it claims to be a Pentium Pro compatible! 742 * 743 * The 6x86 core used in the 6x86 may have most of the Pentium system 744 * registers and largely conform to the Pentium System Programming 745 * Reference. Documentation on these parts is long gone. Treat it as 746 * a crippled Pentium and hope for the best. 747 */ 748 749 movl $X86_TYPE_CYRIX_6x86, x86_type 750 jmp coma_bug 751 752 maybeGX: 753 /* 754 * Now we check whether we are a MediaGX or GXm. We have particular 755 * reason for concern here. Even though most of the GXm's 756 * report having TSC in the cpuid feature flags, the TSC may be 757 * horribly broken. What is worse, is that MediaGX's are basically 758 * 486's while the good GXm's are more like Pentium Pro's! 759 */ 760 761 cmpb $0x50, %al 762 jae maybeM2 763 764 /* 765 * We are either a MediaGX (sometimes called a Gx86) or GXm 766 */ 767 768 cmpb $41, %al 769 je maybeMediaGX 770 771 cmpb $44, %al 772 jb maybeGXm 773 774 cmpb $47, %al 775 jbe maybeMediaGX 776 777 /* 778 * We do not honestly know what we are, so assume a MediaGX 779 */ 780 jmp media_gx 781 782 maybeGXm: 783 /* 784 * It is still possible we are either a MediaGX or GXm, trust cpuid 785 * family should be 5 on a GXm 786 */ 787 cmpl $0x500, %ebx 788 je GXm 789 790 /* 791 * BIOS/Cyrix might set family to 6 on a GXm 792 */ 793 cmpl $0x600, %ebx 794 jne media_gx 795 796 GXm: 797 movl $X86_TYPE_CYRIX_GXm, x86_type 798 jmp cpu_done 799 800 maybeMediaGX: 801 /* 802 * read DIR1 803 */ 804 movb $CYRIX_DIR1, %al 805 outb $CYRIX_CRI 806 inb $CYRIX_CRD 807 808 cmpb $0x30, %al 809 jae maybeGXm 810 811 /* 812 * we are a MediaGX for which we do not trust cpuid 813 */ 814 media_gx: 815 movl $X86_TYPE_CYRIX_MediaGX, x86_type 816 jmp cpu_486 817 818 maybeM2: 819 /* 820 * Now we check whether we are a 6x86MX or MII. These cpu's are 821 * virtually identical, but we care because for the 6x86MX, we 822 * must work around the coma bug. Also for 6x86MX prior to revision 823 * 1.4, the TSC may have serious bugs. 824 */ 825 826 cmpb $0x60, %al 827 jae maybeM3 828 829 /* 830 * family should be 6, but BIOS/Cyrix might set it to 5 831 */ 832 cmpl $0x600, %ebx 833 ja cpu_486 834 835 /* 836 * read DIR1 837 */ 838 movb $CYRIX_DIR1, %al 839 outb $CYRIX_CRI 840 inb $CYRIX_CRD 841 842 cmpb $0x8, %al 843 jb cyrix6x86MX 844 cmpb $0x80, %al 845 jb MII 846 847 cyrix6x86MX: 848 /* 849 * It is altogether unclear how the revision stamped on the cpu 850 * maps to the values in DIR0 and DIR1. Just assume TSC is broken. 851 */ 852 movl $X86_TYPE_CYRIX_6x86MX, x86_type 853 jmp coma_bug 854 855 MII: 856 movl $X86_TYPE_CYRIX_MII, x86_type 857 likeMII: 858 jmp cpu_done 859 860 maybeM3: 861 /* 862 * We are some chip that we cannot identify yet, an MIII perhaps. 863 * We will be optimistic and hope that the chip is much like an MII, 864 * and that cpuid is sane. Cyrix seemed to have gotten it right in 865 * time for the MII, we can only hope it stayed that way. 866 * Maybe the BIOS or Cyrix is trying to hint at something 867 */ 868 cmpl $0x500, %ebx 869 je GXm 870 871 cmpb $0x80, %al 872 jae likelyM3 873 874 /* 875 * Just test for the features Cyrix is known for 876 */ 877 878 jmp MII 879 880 likelyM3: 881 /* 882 * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka 883 * the Cyrix MIII. There may be parts later that use the same ranges 884 * for DIR0 with special values in DIR1, maybe the VIA CIII, but for 885 * now we will call anything with a DIR0 of 0x80 or higher an MIII. 886 * The MIII is supposed to support large pages, but we will believe 887 * it when we see it. For now we just enable and test for MII features. 888 */ 889 movl $X86_TYPE_VIA_CYRIX_III, x86_type 890 jmp likeMII 891 892 coma_bug: 893 894 /* 895 * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some 896 * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus 897 * cycles except page table accesses and interrupt ACK cycles do not assert 898 * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0. 899 * Due to a bug in the cpu core involving over-optimization of branch 900 * prediction, register renaming, and execution of instructions down both the 901 * X and Y pipes for the xchgl instruction, short loops can be written that 902 * never de-assert LOCK# from one invocation of the loop to the next, ad 903 * infinitum. The undesirable effect of this situation is that interrupts are 904 * not serviced. The ideal workaround to this bug would be to set NO_LOCK to 905 * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no 906 * longer do, unless they are page table accesses or interrupt ACK cycles. 907 * With LOCK# not asserted, these bus cycles are now cached. This can cause 908 * undesirable behaviour if the ARR's are not configured correctly. Solaris 909 * does not configure the ARR's, nor does it provide any useful mechanism for 910 * doing so, thus the ideal workaround is not viable. Fortunately, the only 911 * known exploits for this bug involve the xchgl instruction specifically. 912 * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and 913 * 6x86MX cpu's which can be used to specify one instruction as a serializing 914 * instruction. With the xchgl instruction serialized, LOCK# is still 915 * asserted, but it is the sole instruction for which LOCK# is asserted. 916 * There is now some added penalty for the xchgl instruction, but the usual 917 * bus locking is preserved. This ingenious workaround was discovered by 918 * disassembling a binary provided by Cyrix as a workaround for this bug on 919 * Windows, but its not documented anywhere by Cyrix, nor is the bug actually 920 * mentioned in any public errata! The only concern for this workaround is 921 * that there may be similar undiscovered bugs with other instructions that 922 * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix 923 * fixed this bug sometime late in 1997 and no other exploits other than 924 * xchgl have been discovered is good indication that this workaround is 925 * reasonable. 926 */ 927 928 .set CYRIX_DBR0, 0x30 / Debug Register 0 929 .set CYRIX_DBR1, 0x31 / Debug Register 1 930 .set CYRIX_DBR2, 0x32 / Debug Register 2 931 .set CYRIX_DBR3, 0x33 / Debug Register 3 932 .set CYRIX_DOR, 0x3c / Debug Opcode Register 933 934 /* 935 * What is known about DBR1, DBR2, DBR3, and DOR is that for normal 936 * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode 937 * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f, 938 * and 0xff. Then, DOR is loaded with the one byte opcode. 939 */ 940 941 /* 942 * select CCR3 943 */ 944 movb $CYRIX_CCR3, %al 945 outb $CYRIX_CRI 946 947 /* 948 * read CCR3 and mask out MAPEN 949 */ 950 inb $CYRIX_CRD 951 andb $0xf, %al 952 953 /* 954 * save masked CCR3 in %ah 955 */ 956 movb %al, %ah 957 958 /* 959 * select CCR3 960 */ 961 movb $CYRIX_CCR3, %al 962 outb $CYRIX_CRI 963 964 /* 965 * enable MAPEN 966 */ 967 movb %ah, %al 968 orb $0x10, %al 969 outb $CYRIX_CRD 970 971 /* 972 * read DBR0 973 */ 974 movb $CYRIX_DBR0, %al 975 outb $CYRIX_CRI 976 inb $CYRIX_CRD 977 978 /* 979 * disable MATCH and save in %bh 980 */ 981 orb $0x80, %al 982 movb %al, %bh 983 984 /* 985 * write DBR0 986 */ 987 movb $CYRIX_DBR0, %al 988 outb $CYRIX_CRI 989 movb %bh, %al 990 outb $CYRIX_CRD 991 992 /* 993 * write DBR1 994 */ 995 movb $CYRIX_DBR1, %al 996 outb $CYRIX_CRI 997 movb $0xf8, %al 998 outb $CYRIX_CRD 999 1000 /* 1001 * write DBR2 1002 */ 1003 movb $CYRIX_DBR2, %al 1004 outb $CYRIX_CRI 1005 movb $0x7f, %al 1006 outb $CYRIX_CRD 1007 1008 /* 1009 * write DBR3 1010 */ 1011 movb $CYRIX_DBR3, %al 1012 outb $CYRIX_CRI 1013 xorb %al, %al 1014 outb $CYRIX_CRD 1015 1016 /* 1017 * write DOR 1018 */ 1019 movb $CYRIX_DOR, %al 1020 outb $CYRIX_CRI 1021 movb $0x87, %al 1022 outb $CYRIX_CRD 1023 1024 /* 1025 * enable MATCH 1026 */ 1027 movb $CYRIX_DBR0, %al 1028 outb $CYRIX_CRI 1029 movb %bh, %al 1030 andb $0x7f, %al 1031 outb $CYRIX_CRD 1032 1033 /* 1034 * disable MAPEN 1035 */ 1036 movb $0xc3, %al 1037 outb $CYRIX_CRI 1038 movb %ah, %al 1039 outb $CYRIX_CRD 1040 1041 jmp cpu_done 1042 1043 cpu_done: 1044 1045 popfl /* Restore original FLAGS */ 1046 popal /* Restore all registers */ 1047 1048 #endif /* !__xpv */ 1049 1050 /* 1051 * mlsetup(%esp) gets called. 1052 */ 1053 pushl %esp 1054 call mlsetup 1055 addl $4, %esp 1056 1057 /* 1058 * We change our appearance to look like the real thread 0. 1059 * (NOTE: making ourselves to be a real thread may be a noop) 1060 * main() gets called. (NOTE: main() never returns). 1061 */ 1062 call main 1063 /* NOTREACHED */ 1064 pushl $__return_from_main 1065 call panic 1066 1067 /* NOTREACHED */ 1068 cpu_486: 1069 pushl $__unsupported_cpu 1070 call panic 1071 SET_SIZE(_locore_start) 1072 1073 #endif /* __lint */ 1074 #endif /* !__amd64 */ 1075 1076 1077 /* 1078 * For stack layout, see privregs.h 1079 * When cmntrap gets called, the error code and trap number have been pushed. 1080 * When cmntrap_pushed gets called, the entire struct regs has been pushed. 1081 */ 1082 1083 #if defined(__lint) 1084 1085 /* ARGSUSED */ 1086 void 1087 cmntrap() 1088 {} 1089 1090 #else /* __lint */ 1091 1092 .globl trap /* C handler called below */ 1093 1094 #if defined(__amd64) 1095 1096 ENTRY_NP2(cmntrap, _cmntrap) 1097 1098 INTR_PUSH 1099 1100 ALTENTRY(cmntrap_pushed) 1101 1102 movq %rsp, %rbp 1103 1104 /* 1105 * - if this is a #pf i.e. T_PGFLT, %r15 is live 1106 * and contains the faulting address i.e. a copy of %cr2 1107 * 1108 * - if this is a #db i.e. T_SGLSTP, %r15 is live 1109 * and contains the value of %db6 1110 */ 1111 1112 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */ 1113 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */ 1114 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */ 1115 1116 /* 1117 * We must first check if DTrace has set its NOFAULT bit. This 1118 * regrettably must happen before the trap stack is recorded, because 1119 * this requires a call to getpcstack() and may induce recursion if an 1120 * fbt::getpcstack: enabling is inducing the bad load. 1121 */ 1122 movl %gs:CPU_ID, %eax 1123 shlq $CPU_CORE_SHIFT, %rax 1124 leaq cpu_core(%rip), %r8 1125 addq %r8, %rax 1126 movw CPUC_DTRACE_FLAGS(%rax), %cx 1127 testw $CPU_DTRACE_NOFAULT, %cx 1128 jnz .dtrace_induced 1129 1130 TRACE_STACK(%rdi) 1131 1132 movq %rbp, %rdi 1133 movq %r15, %rsi 1134 movl %gs:CPU_ID, %edx 1135 1136 /* 1137 * We know that this isn't a DTrace non-faulting load; we can now safely 1138 * reenable interrupts. (In the case of pagefaults, we enter through an 1139 * interrupt gate.) 1140 */ 1141 ENABLE_INTR_FLAGS 1142 1143 call trap /* trap(rp, addr, cpuid) handles all traps */ 1144 jmp _sys_rtt 1145 1146 .dtrace_induced: 1147 cmpw $KCS_SEL, REGOFF_CS(%rbp) /* test CS for user-mode trap */ 1148 jne 2f /* if from user, panic */ 1149 1150 cmpl $T_PGFLT, REGOFF_TRAPNO(%rbp) 1151 je 0f 1152 1153 cmpl $T_GPFLT, REGOFF_TRAPNO(%rbp) 1154 jne 3f /* if not PF or GP, panic */ 1155 1156 /* 1157 * If we've taken a GPF, we don't (unfortunately) have the address that 1158 * induced the fault. So instead of setting the fault to BADADDR, 1159 * we'll set the fault to ILLOP. 1160 */ 1161 orw $CPU_DTRACE_ILLOP, %cx 1162 movw %cx, CPUC_DTRACE_FLAGS(%rax) 1163 jmp 1f 1164 0: 1165 orw $CPU_DTRACE_BADADDR, %cx 1166 movw %cx, CPUC_DTRACE_FLAGS(%rax) /* set fault to bad addr */ 1167 movq %r15, CPUC_DTRACE_ILLVAL(%rax) 1168 /* fault addr is illegal value */ 1169 1: 1170 movq REGOFF_RIP(%rbp), %rdi 1171 movq %rdi, %r12 1172 call dtrace_instr_size 1173 addq %rax, %r12 1174 movq %r12, REGOFF_RIP(%rbp) 1175 INTR_POP 1176 IRET 1177 /*NOTREACHED*/ 1178 2: 1179 leaq dtrace_badflags(%rip), %rdi 1180 xorl %eax, %eax 1181 call panic 1182 3: 1183 leaq dtrace_badtrap(%rip), %rdi 1184 xorl %eax, %eax 1185 call panic 1186 SET_SIZE(cmntrap) 1187 SET_SIZE(_cmntrap) 1188 1189 #elif defined(__i386) 1190 1191 1192 ENTRY_NP2(cmntrap, _cmntrap) 1193 1194 INTR_PUSH 1195 1196 ALTENTRY(cmntrap_pushed) 1197 1198 movl %esp, %ebp 1199 1200 /* 1201 * - if this is a #pf i.e. T_PGFLT, %esi is live 1202 * and contains the faulting address i.e. a copy of %cr2 1203 * 1204 * - if this is a #db i.e. T_SGLSTP, %esi is live 1205 * and contains the value of %db6 1206 */ 1207 1208 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */ 1209 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */ 1210 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */ 1211 1212 /* 1213 * We must first check if DTrace has set its NOFAULT bit. This 1214 * regrettably must happen before the trap stack is recorded, because 1215 * this requires a call to getpcstack() and may induce recursion if an 1216 * fbt::getpcstack: enabling is inducing the bad load. 1217 */ 1218 movl %gs:CPU_ID, %eax 1219 shll $CPU_CORE_SHIFT, %eax 1220 addl $cpu_core, %eax 1221 movw CPUC_DTRACE_FLAGS(%eax), %cx 1222 testw $CPU_DTRACE_NOFAULT, %cx 1223 jnz .dtrace_induced 1224 1225 TRACE_STACK(%edi) 1226 1227 pushl %gs:CPU_ID 1228 pushl %esi /* fault address for PGFLTs */ 1229 pushl %ebp /* ®s */ 1230 1231 /* 1232 * We know that this isn't a DTrace non-faulting load; we can now safely 1233 * reenable interrupts. (In the case of pagefaults, we enter through an 1234 * interrupt gate.) 1235 */ 1236 ENABLE_INTR_FLAGS 1237 1238 call trap /* trap(rp, addr, cpuid) handles all traps */ 1239 addl $12, %esp /* get argument off stack */ 1240 jmp _sys_rtt 1241 1242 .dtrace_induced: 1243 cmpw $KCS_SEL, REGOFF_CS(%ebp) /* test CS for user-mode trap */ 1244 jne 2f /* if from user, panic */ 1245 1246 cmpl $T_PGFLT, REGOFF_TRAPNO(%ebp) 1247 je 0f 1248 1249 cmpl $T_GPFLT, REGOFF_TRAPNO(%ebp) 1250 jne 3f /* if not PF or GP, panic */ 1251 1252 /* 1253 * If we've taken a GPF, we don't (unfortunately) have the address that 1254 * induced the fault. So instead of setting the fault to BADADDR, 1255 * we'll set the fault to ILLOP. 1256 */ 1257 orw $CPU_DTRACE_ILLOP, %cx 1258 movw %cx, CPUC_DTRACE_FLAGS(%eax) 1259 jmp 1f 1260 0: 1261 orw $CPU_DTRACE_BADADDR, %cx 1262 movw %cx, CPUC_DTRACE_FLAGS(%eax) /* set fault to bad addr */ 1263 movl %esi, CPUC_DTRACE_ILLVAL(%eax) 1264 /* fault addr is illegal value */ 1265 1: 1266 pushl REGOFF_EIP(%ebp) 1267 call dtrace_instr_size 1268 addl $4, %esp 1269 movl REGOFF_EIP(%ebp), %ecx 1270 addl %eax, %ecx 1271 movl %ecx, REGOFF_EIP(%ebp) 1272 INTR_POP_KERNEL 1273 IRET 1274 /*NOTREACHED*/ 1275 2: 1276 pushl $dtrace_badflags 1277 call panic 1278 3: 1279 pushl $dtrace_badtrap 1280 call panic 1281 SET_SIZE(cmntrap) 1282 SET_SIZE(_cmntrap) 1283 1284 #endif /* __i386 */ 1285 1286 /* 1287 * Declare a uintptr_t which has the size of _cmntrap to enable stack 1288 * traceback code to know when a regs structure is on the stack. 1289 */ 1290 .globl _cmntrap_size 1291 .align CLONGSIZE 1292 _cmntrap_size: 1293 .NWORD . - _cmntrap 1294 .type _cmntrap_size, @object 1295 1296 dtrace_badflags: 1297 .string "bad DTrace flags" 1298 1299 dtrace_badtrap: 1300 .string "bad DTrace trap" 1301 1302 #endif /* __lint */ 1303 1304 #if defined(__lint) 1305 1306 /* ARGSUSED */ 1307 void 1308 cmninttrap() 1309 {} 1310 1311 #if !defined(__xpv) 1312 void 1313 bop_trap_handler(void) 1314 {} 1315 #endif 1316 1317 #else /* __lint */ 1318 1319 .globl trap /* C handler called below */ 1320 1321 #if defined(__amd64) 1322 1323 ENTRY_NP(cmninttrap) 1324 1325 INTR_PUSH 1326 INTGATE_INIT_KERNEL_FLAGS 1327 1328 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */ 1329 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */ 1330 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */ 1331 1332 movq %rsp, %rbp 1333 1334 movl %gs:CPU_ID, %edx 1335 xorl %esi, %esi 1336 movq %rsp, %rdi 1337 call trap /* trap(rp, addr, cpuid) handles all traps */ 1338 jmp _sys_rtt 1339 SET_SIZE(cmninttrap) 1340 1341 #if !defined(__xpv) 1342 /* 1343 * Handle traps early in boot. Just revectors into C quickly as 1344 * these are always fatal errors. 1345 * 1346 * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap(). 1347 */ 1348 ENTRY(bop_trap_handler) 1349 movq %rsp, %rdi 1350 sub $8, %rsp 1351 call bop_trap 1352 SET_SIZE(bop_trap_handler) 1353 #endif 1354 1355 #elif defined(__i386) 1356 1357 ENTRY_NP(cmninttrap) 1358 1359 INTR_PUSH 1360 INTGATE_INIT_KERNEL_FLAGS 1361 1362 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */ 1363 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */ 1364 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */ 1365 1366 movl %esp, %ebp 1367 1368 TRACE_STACK(%edi) 1369 1370 pushl %gs:CPU_ID 1371 pushl $0 1372 pushl %ebp 1373 call trap /* trap(rp, addr, cpuid) handles all traps */ 1374 addl $12, %esp 1375 jmp _sys_rtt 1376 SET_SIZE(cmninttrap) 1377 1378 #if !defined(__xpv) 1379 /* 1380 * Handle traps early in boot. Just revectors into C quickly as 1381 * these are always fatal errors. 1382 */ 1383 ENTRY(bop_trap_handler) 1384 movl %esp, %eax 1385 pushl %eax 1386 call bop_trap 1387 SET_SIZE(bop_trap_handler) 1388 #endif 1389 1390 #endif /* __i386 */ 1391 1392 #endif /* __lint */ 1393 1394 #if defined(__lint) 1395 1396 /* ARGSUSED */ 1397 void 1398 dtrace_trap() 1399 {} 1400 1401 #else /* __lint */ 1402 1403 .globl dtrace_user_probe 1404 1405 #if defined(__amd64) 1406 1407 ENTRY_NP(dtrace_trap) 1408 1409 INTR_PUSH 1410 1411 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */ 1412 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */ 1413 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */ 1414 1415 movq %rsp, %rbp 1416 1417 movl %gs:CPU_ID, %edx 1418 #if defined(__xpv) 1419 movq %gs:CPU_VCPU_INFO, %rsi 1420 movq VCPU_INFO_ARCH_CR2(%rsi), %rsi 1421 #else 1422 movq %cr2, %rsi 1423 #endif 1424 movq %rsp, %rdi 1425 1426 ENABLE_INTR_FLAGS 1427 1428 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */ 1429 jmp _sys_rtt 1430 1431 SET_SIZE(dtrace_trap) 1432 1433 #elif defined(__i386) 1434 1435 ENTRY_NP(dtrace_trap) 1436 1437 INTR_PUSH 1438 1439 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */ 1440 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */ 1441 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */ 1442 1443 movl %esp, %ebp 1444 1445 pushl %gs:CPU_ID 1446 #if defined(__xpv) 1447 movl %gs:CPU_VCPU_INFO, %eax 1448 movl VCPU_INFO_ARCH_CR2(%eax), %eax 1449 #else 1450 movl %cr2, %eax 1451 #endif 1452 pushl %eax 1453 pushl %ebp 1454 1455 ENABLE_INTR_FLAGS 1456 1457 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */ 1458 addl $12, %esp /* get argument off stack */ 1459 1460 jmp _sys_rtt 1461 SET_SIZE(dtrace_trap) 1462 1463 #endif /* __i386 */ 1464 1465 #endif /* __lint */ 1466 1467 /* 1468 * Return from _sys_trap routine. 1469 */ 1470 1471 #if defined(__lint) 1472 1473 void 1474 lwp_rtt_initial(void) 1475 {} 1476 1477 void 1478 lwp_rtt(void) 1479 {} 1480 1481 void 1482 _sys_rtt(void) 1483 {} 1484 1485 #else /* __lint */ 1486 1487 #if defined(__amd64) 1488 1489 ENTRY_NP(lwp_rtt_initial) 1490 movq %gs:CPU_THREAD, %r15 1491 movq T_STACK(%r15), %rsp /* switch to the thread stack */ 1492 movq %rsp, %rbp 1493 call __dtrace_probe___proc_start 1494 jmp _lwp_rtt 1495 1496 ENTRY_NP(lwp_rtt) 1497 1498 /* 1499 * r14 lwp 1500 * rdx lwp->lwp_procp 1501 * r15 curthread 1502 */ 1503 1504 movq %gs:CPU_THREAD, %r15 1505 movq T_STACK(%r15), %rsp /* switch to the thread stack */ 1506 movq %rsp, %rbp 1507 _lwp_rtt: 1508 call __dtrace_probe___proc_lwp__start 1509 movq %gs:CPU_LWP, %r14 1510 movq LWP_PROCP(%r14), %rdx 1511 1512 /* 1513 * XX64 Is the stack misaligned correctly at this point? 1514 * If not, we need to do a push before calling anything .. 1515 */ 1516 1517 #if defined(DEBUG) 1518 /* 1519 * If we were to run lwp_savectx at this point -without- 1520 * pcb_rupdate being set to 1, we'd end up sampling the hardware 1521 * state left by the previous running lwp, rather than setting 1522 * the values requested by the lwp creator. Bad. 1523 */ 1524 testb $0x1, PCB_RUPDATE(%r14) 1525 jne 1f 1526 leaq _no_pending_updates(%rip), %rdi 1527 movl $__LINE__, %esi 1528 movq %r14, %rdx 1529 xorl %eax, %eax 1530 call panic 1531 _no_pending_updates: 1532 .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1" 1533 1: 1534 #endif 1535 1536 /* 1537 * If agent lwp, clear %fs and %gs 1538 */ 1539 cmpq %r15, P_AGENTTP(%rdx) 1540 jne 1f 1541 xorl %ecx, %ecx 1542 movq %rcx, REGOFF_FS(%rsp) 1543 movq %rcx, REGOFF_GS(%rsp) 1544 movw %cx, LWP_PCB_FS(%r14) 1545 movw %cx, LWP_PCB_GS(%r14) 1546 1: 1547 call dtrace_systrace_rtt 1548 movq REGOFF_RDX(%rsp), %rsi 1549 movq REGOFF_RAX(%rsp), %rdi 1550 call post_syscall /* post_syscall(rval1, rval2) */ 1551 1552 /* 1553 * set up to take fault on first use of fp 1554 */ 1555 STTS(%rdi) 1556 1557 /* 1558 * XXX - may want a fast path that avoids sys_rtt_common in the 1559 * most common case. 1560 */ 1561 ALTENTRY(_sys_rtt) 1562 CLI(%rax) /* disable interrupts */ 1563 ALTENTRY(_sys_rtt_ints_disabled) 1564 movq %rsp, %rdi /* pass rp to sys_rtt_common */ 1565 call sys_rtt_common /* do common sys_rtt tasks */ 1566 testq %rax, %rax /* returning to userland? */ 1567 jz sr_sup 1568 1569 /* 1570 * Return to user 1571 */ 1572 ASSERT_UPCALL_MASK_IS_SET 1573 cmpw $UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */ 1574 je sys_rtt_syscall 1575 1576 /* 1577 * Return to 32-bit userland 1578 */ 1579 ALTENTRY(sys_rtt_syscall32) 1580 USER32_POP 1581 IRET 1582 /*NOTREACHED*/ 1583 1584 ALTENTRY(sys_rtt_syscall) 1585 /* 1586 * Return to 64-bit userland 1587 */ 1588 USER_POP 1589 ALTENTRY(nopop_sys_rtt_syscall) 1590 IRET 1591 /*NOTREACHED*/ 1592 SET_SIZE(nopop_sys_rtt_syscall) 1593 1594 /* 1595 * Return to supervisor 1596 * NOTE: to make the check in trap() that tests if we are executing 1597 * segment register fixup/restore code work properly, sr_sup MUST be 1598 * after _sys_rtt . 1599 */ 1600 ALTENTRY(sr_sup) 1601 /* 1602 * Restore regs before doing iretq to kernel mode 1603 */ 1604 INTR_POP 1605 IRET 1606 .globl _sys_rtt_end 1607 _sys_rtt_end: 1608 /*NOTREACHED*/ 1609 SET_SIZE(sr_sup) 1610 SET_SIZE(_sys_rtt_end) 1611 SET_SIZE(lwp_rtt) 1612 SET_SIZE(lwp_rtt_initial) 1613 SET_SIZE(_sys_rtt_ints_disabled) 1614 SET_SIZE(_sys_rtt) 1615 SET_SIZE(sys_rtt_syscall) 1616 SET_SIZE(sys_rtt_syscall32) 1617 1618 #elif defined(__i386) 1619 1620 ENTRY_NP(lwp_rtt_initial) 1621 movl %gs:CPU_THREAD, %eax 1622 movl T_STACK(%eax), %esp /* switch to the thread stack */ 1623 movl %esp, %ebp 1624 call __dtrace_probe___proc_start 1625 jmp _lwp_rtt 1626 1627 ENTRY_NP(lwp_rtt) 1628 movl %gs:CPU_THREAD, %eax 1629 movl T_STACK(%eax), %esp /* switch to the thread stack */ 1630 movl %esp, %ebp 1631 _lwp_rtt: 1632 call __dtrace_probe___proc_lwp__start 1633 1634 /* 1635 * If agent lwp, clear %fs and %gs. 1636 */ 1637 movl %gs:CPU_LWP, %eax 1638 movl LWP_PROCP(%eax), %edx 1639 1640 cmpl %eax, P_AGENTTP(%edx) 1641 jne 1f 1642 movl $0, REGOFF_FS(%esp) 1643 movl $0, REGOFF_GS(%esp) 1644 1: 1645 call dtrace_systrace_rtt 1646 movl REGOFF_EDX(%esp), %edx 1647 movl REGOFF_EAX(%esp), %eax 1648 pushl %edx 1649 pushl %eax 1650 call post_syscall /* post_syscall(rval1, rval2) */ 1651 addl $8, %esp 1652 1653 /* 1654 * set up to take fault on first use of fp 1655 */ 1656 STTS(%eax) 1657 1658 /* 1659 * XXX - may want a fast path that avoids sys_rtt_common in the 1660 * most common case. 1661 */ 1662 ALTENTRY(_sys_rtt) 1663 CLI(%eax) /* disable interrupts */ 1664 ALTENTRY(_sys_rtt_ints_disabled) 1665 pushl %esp /* pass rp to sys_rtt_common */ 1666 call sys_rtt_common 1667 addl $4, %esp /* pop arg */ 1668 testl %eax, %eax /* test for return to user mode */ 1669 jz sr_sup 1670 1671 /* 1672 * Return to User. 1673 */ 1674 ALTENTRY(sys_rtt_syscall) 1675 INTR_POP_USER 1676 1677 /* 1678 * There can be no instructions between this label and IRET or 1679 * we could end up breaking linux brand support. See label usage 1680 * in lx_brand_int80_callback for an example. 1681 */ 1682 ALTENTRY(nopop_sys_rtt_syscall) 1683 IRET 1684 /*NOTREACHED*/ 1685 SET_SIZE(nopop_sys_rtt_syscall) 1686 1687 ALTENTRY(_sys_rtt_end) 1688 1689 /* 1690 * Return to supervisor 1691 */ 1692 ALTENTRY(sr_sup) 1693 1694 /* 1695 * Restore regs before doing iret to kernel mode 1696 */ 1697 INTR_POP_KERNEL 1698 IRET 1699 /*NOTREACHED*/ 1700 1701 SET_SIZE(sr_sup) 1702 SET_SIZE(_sys_rtt_end) 1703 SET_SIZE(lwp_rtt) 1704 SET_SIZE(lwp_rtt_initial) 1705 SET_SIZE(_sys_rtt_ints_disabled) 1706 SET_SIZE(_sys_rtt) 1707 SET_SIZE(sys_rtt_syscall) 1708 1709 #endif /* __i386 */ 1710 1711 #endif /* __lint */ 1712 1713 #if defined(__lint) 1714 1715 /* 1716 * So why do we have to deal with all this crud in the world of ia32? 1717 * 1718 * Basically there are four classes of ia32 implementations, those that do not 1719 * have a TSC, those that have a marginal TSC that is broken to the extent 1720 * that it is useless, those that have a marginal TSC that is not quite so 1721 * horribly broken and can be used with some care, and those that have a 1722 * reliable TSC. This crud has to be here in order to sift through all the 1723 * variants. 1724 */ 1725 1726 /*ARGSUSED*/ 1727 uint64_t 1728 freq_tsc(uint32_t *pit_counter) 1729 { 1730 return (0); 1731 } 1732 1733 #else /* __lint */ 1734 1735 #if defined(__amd64) 1736 1737 /* 1738 * XX64 quick and dirty port from the i386 version. Since we 1739 * believe the amd64 tsc is more reliable, could this code be 1740 * simpler? 1741 */ 1742 ENTRY_NP(freq_tsc) 1743 pushq %rbp 1744 movq %rsp, %rbp 1745 movq %rdi, %r9 /* save pit_counter */ 1746 pushq %rbx 1747 1748 / We have a TSC, but we have no way in general to know how reliable it is. 1749 / Usually a marginal TSC behaves appropriately unless not enough time 1750 / elapses between reads. A reliable TSC can be read as often and as rapidly 1751 / as desired. The simplistic approach of reading the TSC counter and 1752 / correlating to the PIT counter cannot be naively followed. Instead estimates 1753 / have to be taken to successively refine a guess at the speed of the cpu 1754 / and then the TSC and PIT counter are correlated. In practice very rarely 1755 / is more than one quick loop required for an estimate. Measures have to be 1756 / taken to prevent the PIT counter from wrapping beyond its resolution and for 1757 / measuring the clock rate of very fast processors. 1758 / 1759 / The following constant can be tuned. It should be such that the loop does 1760 / not take too many nor too few PIT counts to execute. If this value is too 1761 / large, then on slow machines the loop will take a long time, or the PIT 1762 / counter may even wrap. If this value is too small, then on fast machines 1763 / the PIT counter may count so few ticks that the resolution of the PIT 1764 / itself causes a bad guess. Because this code is used in machines with 1765 / marginal TSC's and/or IO, if this value is too small on those, it may 1766 / cause the calculated cpu frequency to vary slightly from boot to boot. 1767 / 1768 / In all cases even if this constant is set inappropriately, the algorithm 1769 / will still work and the caller should be able to handle variances in the 1770 / calculation of cpu frequency, but the calculation will be inefficient and 1771 / take a disproportionate amount of time relative to a well selected value. 1772 / As the slowest supported cpu becomes faster, this constant should be 1773 / carefully increased. 1774 1775 movl $0x8000, %ecx 1776 1777 / to make sure the instruction cache has been warmed 1778 clc 1779 1780 jmp freq_tsc_loop 1781 1782 / The following block of code up to and including the latching of the PIT 1783 / counter after freq_tsc_perf_loop is very critical and very carefully 1784 / written, it should only be modified with great care. freq_tsc_loop to 1785 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in 1786 / freq_tsc_perf_loop up to the unlatching of the PIT counter. 1787 1788 .align 32 1789 freq_tsc_loop: 1790 / save the loop count in %ebx 1791 movl %ecx, %ebx 1792 1793 / initialize the PIT counter and start a count down 1794 movb $PIT_LOADMODE, %al 1795 outb $PITCTL_PORT 1796 movb $0xff, %al 1797 outb $PITCTR0_PORT 1798 outb $PITCTR0_PORT 1799 1800 / read the TSC and store the TS in %edi:%esi 1801 rdtsc 1802 movl %eax, %esi 1803 1804 freq_tsc_perf_loop: 1805 movl %edx, %edi 1806 movl %eax, %esi 1807 movl %edx, %edi 1808 loop freq_tsc_perf_loop 1809 1810 / read the TSC and store the LSW in %ecx 1811 rdtsc 1812 movl %eax, %ecx 1813 1814 / latch the PIT counter and status 1815 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al 1816 outb $PITCTL_PORT 1817 1818 / remember if the icache has been warmed 1819 setc %ah 1820 1821 / read the PIT status 1822 inb $PITCTR0_PORT 1823 shll $8, %eax 1824 1825 / read PIT count 1826 inb $PITCTR0_PORT 1827 shll $8, %eax 1828 inb $PITCTR0_PORT 1829 bswap %eax 1830 1831 / check to see if the PIT count was loaded into the CE 1832 btw $_CONST(PITSTAT_NULLCNT+8), %ax 1833 jc freq_tsc_increase_count 1834 1835 / check to see if PIT counter wrapped 1836 btw $_CONST(PITSTAT_OUTPUT+8), %ax 1837 jnc freq_tsc_pit_did_not_wrap 1838 1839 / halve count 1840 shrl $1, %ebx 1841 movl %ebx, %ecx 1842 1843 / the instruction cache has been warmed 1844 stc 1845 1846 jmp freq_tsc_loop 1847 1848 freq_tsc_increase_count: 1849 shll $1, %ebx 1850 jc freq_tsc_too_fast 1851 1852 movl %ebx, %ecx 1853 1854 / the instruction cache has been warmed 1855 stc 1856 1857 jmp freq_tsc_loop 1858 1859 freq_tsc_pit_did_not_wrap: 1860 roll $16, %eax 1861 1862 cmpw $0x2000, %ax 1863 notw %ax 1864 jb freq_tsc_sufficient_duration 1865 1866 freq_tsc_calculate: 1867 / in mode 0, the PIT loads the count into the CE on the first CLK pulse, 1868 / then on the second CLK pulse the CE is decremented, therefore mode 0 1869 / is really a (count + 1) counter, ugh 1870 xorl %esi, %esi 1871 movw %ax, %si 1872 incl %esi 1873 1874 movl $0xf000, %eax 1875 mull %ebx 1876 1877 / tuck away (target_pit_count * loop_count) 1878 movl %edx, %ecx 1879 movl %eax, %ebx 1880 1881 movl %esi, %eax 1882 movl $0xffffffff, %edx 1883 mull %edx 1884 1885 addl %esi, %eax 1886 adcl $0, %edx 1887 1888 cmpl %ecx, %edx 1889 ja freq_tsc_div_safe 1890 jb freq_tsc_too_fast 1891 1892 cmpl %ebx, %eax 1893 jbe freq_tsc_too_fast 1894 1895 freq_tsc_div_safe: 1896 movl %ecx, %edx 1897 movl %ebx, %eax 1898 1899 movl %esi, %ecx 1900 divl %ecx 1901 1902 movl %eax, %ecx 1903 1904 / the instruction cache has been warmed 1905 stc 1906 1907 jmp freq_tsc_loop 1908 1909 freq_tsc_sufficient_duration: 1910 / test to see if the icache has been warmed 1911 btl $16, %eax 1912 jnc freq_tsc_calculate 1913 1914 / recall mode 0 is a (count + 1) counter 1915 andl $0xffff, %eax 1916 incl %eax 1917 1918 / save the number of PIT counts 1919 movl %eax, (%r9) 1920 1921 / calculate the number of TS's that elapsed 1922 movl %ecx, %eax 1923 subl %esi, %eax 1924 sbbl %edi, %edx 1925 1926 jmp freq_tsc_end 1927 1928 freq_tsc_too_fast: 1929 / return 0 as a 64 bit quantity 1930 xorl %eax, %eax 1931 xorl %edx, %edx 1932 1933 freq_tsc_end: 1934 shlq $32, %rdx 1935 orq %rdx, %rax 1936 1937 popq %rbx 1938 leaveq 1939 ret 1940 SET_SIZE(freq_tsc) 1941 1942 #elif defined(__i386) 1943 1944 ENTRY_NP(freq_tsc) 1945 pushl %ebp 1946 movl %esp, %ebp 1947 pushl %edi 1948 pushl %esi 1949 pushl %ebx 1950 1951 / We have a TSC, but we have no way in general to know how reliable it is. 1952 / Usually a marginal TSC behaves appropriately unless not enough time 1953 / elapses between reads. A reliable TSC can be read as often and as rapidly 1954 / as desired. The simplistic approach of reading the TSC counter and 1955 / correlating to the PIT counter cannot be naively followed. Instead estimates 1956 / have to be taken to successively refine a guess at the speed of the cpu 1957 / and then the TSC and PIT counter are correlated. In practice very rarely 1958 / is more than one quick loop required for an estimate. Measures have to be 1959 / taken to prevent the PIT counter from wrapping beyond its resolution and for 1960 / measuring the clock rate of very fast processors. 1961 / 1962 / The following constant can be tuned. It should be such that the loop does 1963 / not take too many nor too few PIT counts to execute. If this value is too 1964 / large, then on slow machines the loop will take a long time, or the PIT 1965 / counter may even wrap. If this value is too small, then on fast machines 1966 / the PIT counter may count so few ticks that the resolution of the PIT 1967 / itself causes a bad guess. Because this code is used in machines with 1968 / marginal TSC's and/or IO, if this value is too small on those, it may 1969 / cause the calculated cpu frequency to vary slightly from boot to boot. 1970 / 1971 / In all cases even if this constant is set inappropriately, the algorithm 1972 / will still work and the caller should be able to handle variances in the 1973 / calculation of cpu frequency, but the calculation will be inefficient and 1974 / take a disproportionate amount of time relative to a well selected value. 1975 / As the slowest supported cpu becomes faster, this constant should be 1976 / carefully increased. 1977 1978 movl $0x8000, %ecx 1979 1980 / to make sure the instruction cache has been warmed 1981 clc 1982 1983 jmp freq_tsc_loop 1984 1985 / The following block of code up to and including the latching of the PIT 1986 / counter after freq_tsc_perf_loop is very critical and very carefully 1987 / written, it should only be modified with great care. freq_tsc_loop to 1988 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in 1989 / freq_tsc_perf_loop up to the unlatching of the PIT counter. 1990 1991 .align 32 1992 freq_tsc_loop: 1993 / save the loop count in %ebx 1994 movl %ecx, %ebx 1995 1996 / initialize the PIT counter and start a count down 1997 movb $PIT_LOADMODE, %al 1998 outb $PITCTL_PORT 1999 movb $0xff, %al 2000 outb $PITCTR0_PORT 2001 outb $PITCTR0_PORT 2002 2003 / read the TSC and store the TS in %edi:%esi 2004 rdtsc 2005 movl %eax, %esi 2006 2007 freq_tsc_perf_loop: 2008 movl %edx, %edi 2009 movl %eax, %esi 2010 movl %edx, %edi 2011 loop freq_tsc_perf_loop 2012 2013 / read the TSC and store the LSW in %ecx 2014 rdtsc 2015 movl %eax, %ecx 2016 2017 / latch the PIT counter and status 2018 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al 2019 outb $PITCTL_PORT 2020 2021 / remember if the icache has been warmed 2022 setc %ah 2023 2024 / read the PIT status 2025 inb $PITCTR0_PORT 2026 shll $8, %eax 2027 2028 / read PIT count 2029 inb $PITCTR0_PORT 2030 shll $8, %eax 2031 inb $PITCTR0_PORT 2032 bswap %eax 2033 2034 / check to see if the PIT count was loaded into the CE 2035 btw $_CONST(PITSTAT_NULLCNT+8), %ax 2036 jc freq_tsc_increase_count 2037 2038 / check to see if PIT counter wrapped 2039 btw $_CONST(PITSTAT_OUTPUT+8), %ax 2040 jnc freq_tsc_pit_did_not_wrap 2041 2042 / halve count 2043 shrl $1, %ebx 2044 movl %ebx, %ecx 2045 2046 / the instruction cache has been warmed 2047 stc 2048 2049 jmp freq_tsc_loop 2050 2051 freq_tsc_increase_count: 2052 shll $1, %ebx 2053 jc freq_tsc_too_fast 2054 2055 movl %ebx, %ecx 2056 2057 / the instruction cache has been warmed 2058 stc 2059 2060 jmp freq_tsc_loop 2061 2062 freq_tsc_pit_did_not_wrap: 2063 roll $16, %eax 2064 2065 cmpw $0x2000, %ax 2066 notw %ax 2067 jb freq_tsc_sufficient_duration 2068 2069 freq_tsc_calculate: 2070 / in mode 0, the PIT loads the count into the CE on the first CLK pulse, 2071 / then on the second CLK pulse the CE is decremented, therefore mode 0 2072 / is really a (count + 1) counter, ugh 2073 xorl %esi, %esi 2074 movw %ax, %si 2075 incl %esi 2076 2077 movl $0xf000, %eax 2078 mull %ebx 2079 2080 / tuck away (target_pit_count * loop_count) 2081 movl %edx, %ecx 2082 movl %eax, %ebx 2083 2084 movl %esi, %eax 2085 movl $0xffffffff, %edx 2086 mull %edx 2087 2088 addl %esi, %eax 2089 adcl $0, %edx 2090 2091 cmpl %ecx, %edx 2092 ja freq_tsc_div_safe 2093 jb freq_tsc_too_fast 2094 2095 cmpl %ebx, %eax 2096 jbe freq_tsc_too_fast 2097 2098 freq_tsc_div_safe: 2099 movl %ecx, %edx 2100 movl %ebx, %eax 2101 2102 movl %esi, %ecx 2103 divl %ecx 2104 2105 movl %eax, %ecx 2106 2107 / the instruction cache has been warmed 2108 stc 2109 2110 jmp freq_tsc_loop 2111 2112 freq_tsc_sufficient_duration: 2113 / test to see if the icache has been warmed 2114 btl $16, %eax 2115 jnc freq_tsc_calculate 2116 2117 / recall mode 0 is a (count + 1) counter 2118 andl $0xffff, %eax 2119 incl %eax 2120 2121 / save the number of PIT counts 2122 movl 8(%ebp), %ebx 2123 movl %eax, (%ebx) 2124 2125 / calculate the number of TS's that elapsed 2126 movl %ecx, %eax 2127 subl %esi, %eax 2128 sbbl %edi, %edx 2129 2130 jmp freq_tsc_end 2131 2132 freq_tsc_too_fast: 2133 / return 0 as a 64 bit quantity 2134 xorl %eax, %eax 2135 xorl %edx, %edx 2136 2137 freq_tsc_end: 2138 popl %ebx 2139 popl %esi 2140 popl %edi 2141 popl %ebp 2142 ret 2143 SET_SIZE(freq_tsc) 2144 2145 #endif /* __i386 */ 2146 #endif /* __lint */ 2147 2148 #if !defined(__amd64) 2149 #if defined(__lint) 2150 2151 /* 2152 * We do not have a TSC so we use a block of instructions with well known 2153 * timings. 2154 */ 2155 2156 /*ARGSUSED*/ 2157 uint64_t 2158 freq_notsc(uint32_t *pit_counter) 2159 { 2160 return (0); 2161 } 2162 2163 #else /* __lint */ 2164 ENTRY_NP(freq_notsc) 2165 pushl %ebp 2166 movl %esp, %ebp 2167 pushl %edi 2168 pushl %esi 2169 pushl %ebx 2170 2171 / initial count for the idivl loop 2172 movl $0x1000, %ecx 2173 2174 / load the divisor 2175 movl $1, %ebx 2176 2177 jmp freq_notsc_loop 2178 2179 .align 16 2180 freq_notsc_loop: 2181 / set high 32 bits of dividend to zero 2182 xorl %edx, %edx 2183 2184 / save the loop count in %edi 2185 movl %ecx, %edi 2186 2187 / initialize the PIT counter and start a count down 2188 movb $PIT_LOADMODE, %al 2189 outb $PITCTL_PORT 2190 movb $0xff, %al 2191 outb $PITCTR0_PORT 2192 outb $PITCTR0_PORT 2193 2194 / set low 32 bits of dividend to zero 2195 xorl %eax, %eax 2196 2197 / It is vital that the arguments to idivl be set appropriately because on some 2198 / cpu's this instruction takes more or less clock ticks depending on its 2199 / arguments. 2200 freq_notsc_perf_loop: 2201 idivl %ebx 2202 idivl %ebx 2203 idivl %ebx 2204 idivl %ebx 2205 idivl %ebx 2206 loop freq_notsc_perf_loop 2207 2208 / latch the PIT counter and status 2209 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al 2210 outb $PITCTL_PORT 2211 2212 / read the PIT status 2213 inb $PITCTR0_PORT 2214 shll $8, %eax 2215 2216 / read PIT count 2217 inb $PITCTR0_PORT 2218 shll $8, %eax 2219 inb $PITCTR0_PORT 2220 bswap %eax 2221 2222 / check to see if the PIT count was loaded into the CE 2223 btw $_CONST(PITSTAT_NULLCNT+8), %ax 2224 jc freq_notsc_increase_count 2225 2226 / check to see if PIT counter wrapped 2227 btw $_CONST(PITSTAT_OUTPUT+8), %ax 2228 jnc freq_notsc_pit_did_not_wrap 2229 2230 / halve count 2231 shrl $1, %edi 2232 movl %edi, %ecx 2233 2234 jmp freq_notsc_loop 2235 2236 freq_notsc_increase_count: 2237 shll $1, %edi 2238 jc freq_notsc_too_fast 2239 2240 movl %edi, %ecx 2241 2242 jmp freq_notsc_loop 2243 2244 freq_notsc_pit_did_not_wrap: 2245 shrl $16, %eax 2246 2247 cmpw $0x2000, %ax 2248 notw %ax 2249 jb freq_notsc_sufficient_duration 2250 2251 freq_notsc_calculate: 2252 / in mode 0, the PIT loads the count into the CE on the first CLK pulse, 2253 / then on the second CLK pulse the CE is decremented, therefore mode 0 2254 / is really a (count + 1) counter, ugh 2255 xorl %esi, %esi 2256 movw %ax, %si 2257 incl %esi 2258 2259 movl %edi, %eax 2260 movl $0xf000, %ecx 2261 mull %ecx 2262 2263 / tuck away (target_pit_count * loop_count) 2264 movl %edx, %edi 2265 movl %eax, %ecx 2266 2267 movl %esi, %eax 2268 movl $0xffffffff, %edx 2269 mull %edx 2270 2271 addl %esi, %eax 2272 adcl $0, %edx 2273 2274 cmpl %edi, %edx 2275 ja freq_notsc_div_safe 2276 jb freq_notsc_too_fast 2277 2278 cmpl %ecx, %eax 2279 jbe freq_notsc_too_fast 2280 2281 freq_notsc_div_safe: 2282 movl %edi, %edx 2283 movl %ecx, %eax 2284 2285 movl %esi, %ecx 2286 divl %ecx 2287 2288 movl %eax, %ecx 2289 2290 jmp freq_notsc_loop 2291 2292 freq_notsc_sufficient_duration: 2293 / recall mode 0 is a (count + 1) counter 2294 incl %eax 2295 2296 / save the number of PIT counts 2297 movl 8(%ebp), %ebx 2298 movl %eax, (%ebx) 2299 2300 / calculate the number of cpu clock ticks that elapsed 2301 cmpl $X86_VENDOR_Cyrix, x86_vendor 2302 jz freq_notsc_notcyrix 2303 2304 / freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores 2305 movl $86, %eax 2306 jmp freq_notsc_calculate_tsc 2307 2308 freq_notsc_notcyrix: 2309 / freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums 2310 movl $237, %eax 2311 2312 freq_notsc_calculate_tsc: 2313 mull %edi 2314 2315 jmp freq_notsc_end 2316 2317 freq_notsc_too_fast: 2318 / return 0 as a 64 bit quantity 2319 xorl %eax, %eax 2320 xorl %edx, %edx 2321 2322 freq_notsc_end: 2323 popl %ebx 2324 popl %esi 2325 popl %edi 2326 popl %ebp 2327 2328 ret 2329 SET_SIZE(freq_notsc) 2330 2331 #endif /* __lint */ 2332 #endif /* !__amd64 */ 2333 2334 #if !defined(__lint) 2335 .data 2336 #if !defined(__amd64) 2337 .align 4 2338 cpu_vendor: 2339 .long 0, 0, 0 /* Vendor ID string returned */ 2340 2341 .globl CyrixInstead 2342 2343 .globl x86_feature 2344 .globl x86_type 2345 .globl x86_vendor 2346 #endif 2347 2348 #endif /* __lint */ 2349