1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/thread.h> 29 #include <sys/cpuvar.h> 30 #include <sys/inttypes.h> 31 #include <sys/cmn_err.h> 32 #include <sys/time.h> 33 #include <sys/ksynch.h> 34 #include <sys/systm.h> 35 #include <sys/kcpc.h> 36 #include <sys/cpc_impl.h> 37 #include <sys/cpc_pcbe.h> 38 #include <sys/atomic.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/sdt.h> 42 #if defined(__x86) 43 #include <asm/clock.h> 44 #endif 45 46 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 47 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 48 49 50 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 51 int kcpc_cpuctx; /* number of cpu-specific contexts */ 52 53 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 54 55 /* 56 * These are set when a PCBE module is loaded. 57 */ 58 uint_t cpc_ncounters = 0; 59 pcbe_ops_t *pcbe_ops = NULL; 60 61 /* 62 * Statistics on (mis)behavior 63 */ 64 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 65 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 66 67 /* 68 * Is misbehaviour (overflow in a thread with no context) fatal? 69 */ 70 #ifdef DEBUG 71 static int kcpc_nullctx_panic = 1; 72 #else 73 static int kcpc_nullctx_panic = 0; 74 #endif 75 76 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 77 static void kcpc_restore(kcpc_ctx_t *ctx); 78 static void kcpc_save(kcpc_ctx_t *ctx); 79 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 80 static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode); 81 static void kcpc_free_configs(kcpc_set_t *set); 82 static kcpc_ctx_t *kcpc_ctx_alloc(void); 83 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 84 static void kcpc_ctx_free(kcpc_ctx_t *ctx); 85 static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx); 86 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 87 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 88 89 void 90 kcpc_register_pcbe(pcbe_ops_t *ops) 91 { 92 pcbe_ops = ops; 93 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 94 } 95 96 int 97 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 98 { 99 cpu_t *cp; 100 kcpc_ctx_t *ctx; 101 int error; 102 103 ctx = kcpc_ctx_alloc(); 104 105 if (kcpc_assign_reqs(set, ctx) != 0) { 106 kcpc_ctx_free(ctx); 107 *subcode = CPC_RESOURCE_UNAVAIL; 108 return (EINVAL); 109 } 110 111 ctx->kc_cpuid = cpuid; 112 ctx->kc_thread = curthread; 113 114 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 115 116 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 117 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 118 kcpc_ctx_free(ctx); 119 return (error); 120 } 121 122 set->ks_ctx = ctx; 123 ctx->kc_set = set; 124 125 /* 126 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 127 * we are manipulating the cpu_t and programming the hardware, else the 128 * the cpu_t could go away while we're looking at it. 129 */ 130 mutex_enter(&cpu_lock); 131 cp = cpu_get(cpuid); 132 133 if (cp == NULL) 134 /* 135 * The CPU could have been DRd out while we were getting set up. 136 */ 137 goto unbound; 138 139 mutex_enter(&cp->cpu_cpc_ctxlock); 140 141 if (cp->cpu_cpc_ctx != NULL) { 142 /* 143 * If this CPU already has a bound set, return an error. 144 */ 145 mutex_exit(&cp->cpu_cpc_ctxlock); 146 goto unbound; 147 } 148 149 if (curthread->t_bind_cpu != cpuid) { 150 mutex_exit(&cp->cpu_cpc_ctxlock); 151 goto unbound; 152 } 153 cp->cpu_cpc_ctx = ctx; 154 155 /* 156 * Kernel preemption must be disabled while fiddling with the hardware 157 * registers to prevent partial updates. 158 */ 159 kpreempt_disable(); 160 ctx->kc_rawtick = KCPC_GET_TICK(); 161 pcbe_ops->pcbe_program(ctx); 162 kpreempt_enable(); 163 164 mutex_exit(&cp->cpu_cpc_ctxlock); 165 mutex_exit(&cpu_lock); 166 167 mutex_enter(&set->ks_lock); 168 set->ks_state |= KCPC_SET_BOUND; 169 cv_signal(&set->ks_condv); 170 mutex_exit(&set->ks_lock); 171 172 return (0); 173 174 unbound: 175 mutex_exit(&cpu_lock); 176 set->ks_ctx = NULL; 177 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 178 kcpc_ctx_free(ctx); 179 return (EAGAIN); 180 } 181 182 int 183 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 184 { 185 kcpc_ctx_t *ctx; 186 int error; 187 188 /* 189 * Only one set is allowed per context, so ensure there is no 190 * existing context. 191 */ 192 193 if (t->t_cpc_ctx != NULL) 194 return (EEXIST); 195 196 ctx = kcpc_ctx_alloc(); 197 198 /* 199 * The context must begin life frozen until it has been properly 200 * programmed onto the hardware. This prevents the context ops from 201 * worrying about it until we're ready. 202 */ 203 ctx->kc_flags |= KCPC_CTX_FREEZE; 204 ctx->kc_hrtime = gethrtime(); 205 206 if (kcpc_assign_reqs(set, ctx) != 0) { 207 kcpc_ctx_free(ctx); 208 *subcode = CPC_RESOURCE_UNAVAIL; 209 return (EINVAL); 210 } 211 212 ctx->kc_cpuid = -1; 213 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 214 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 215 ctx->kc_thread = t; 216 t->t_cpc_ctx = ctx; 217 /* 218 * Permit threads to look at their own hardware counters from userland. 219 */ 220 ctx->kc_flags |= KCPC_CTX_NONPRIV; 221 222 /* 223 * Create the data store for this set. 224 */ 225 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 226 227 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 228 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 229 kcpc_ctx_free(ctx); 230 t->t_cpc_ctx = NULL; 231 return (error); 232 } 233 234 set->ks_ctx = ctx; 235 ctx->kc_set = set; 236 237 /* 238 * Add a device context to the subject thread. 239 */ 240 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 241 kcpc_lwp_create, NULL, kcpc_free); 242 243 /* 244 * Ask the backend to program the hardware. 245 */ 246 if (t == curthread) { 247 kpreempt_disable(); 248 ctx->kc_rawtick = KCPC_GET_TICK(); 249 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 250 pcbe_ops->pcbe_program(ctx); 251 kpreempt_enable(); 252 } else 253 /* 254 * Since we are the agent LWP, we know the victim LWP is stopped 255 * until we're done here; no need to worry about preemption or 256 * migration here. We still use an atomic op to clear the flag 257 * to ensure the flags are always self-consistent; they can 258 * still be accessed from, for instance, another CPU doing a 259 * kcpc_invalidate_all(). 260 */ 261 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 262 263 mutex_enter(&set->ks_lock); 264 set->ks_state |= KCPC_SET_BOUND; 265 cv_signal(&set->ks_condv); 266 mutex_exit(&set->ks_lock); 267 268 return (0); 269 } 270 271 /* 272 * Walk through each request in the set and ask the PCBE to configure a 273 * corresponding counter. 274 */ 275 static int 276 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 277 { 278 int i; 279 int ret; 280 kcpc_request_t *rp; 281 282 for (i = 0; i < set->ks_nreqs; i++) { 283 int n; 284 rp = &set->ks_req[i]; 285 286 n = rp->kr_picnum; 287 288 ASSERT(n >= 0 && n < cpc_ncounters); 289 290 ASSERT(ctx->kc_pics[n].kp_req == NULL); 291 292 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 293 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 294 == 0) { 295 *subcode = -1; 296 return (ENOTSUP); 297 } 298 /* 299 * If any of the counters have requested overflow 300 * notification, we flag the context as being one that 301 * cares about overflow. 302 */ 303 ctx->kc_flags |= KCPC_CTX_SIGOVF; 304 } 305 306 rp->kr_config = NULL; 307 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 308 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 309 &(rp->kr_config), (void *)ctx)) != 0) { 310 kcpc_free_configs(set); 311 *subcode = ret; 312 switch (ret) { 313 case CPC_ATTR_REQUIRES_PRIVILEGE: 314 case CPC_HV_NO_ACCESS: 315 return (EACCES); 316 default: 317 return (EINVAL); 318 } 319 } 320 321 ctx->kc_pics[n].kp_req = rp; 322 rp->kr_picp = &ctx->kc_pics[n]; 323 rp->kr_data = set->ks_data + rp->kr_index; 324 *rp->kr_data = rp->kr_preset; 325 } 326 327 return (0); 328 } 329 330 static void 331 kcpc_free_configs(kcpc_set_t *set) 332 { 333 int i; 334 335 for (i = 0; i < set->ks_nreqs; i++) 336 if (set->ks_req[i].kr_config != NULL) 337 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 338 } 339 340 /* 341 * buf points to a user address and the data should be copied out to that 342 * address in the current process. 343 */ 344 int 345 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 346 { 347 kcpc_ctx_t *ctx = set->ks_ctx; 348 uint64_t curtick = KCPC_GET_TICK(); 349 350 mutex_enter(&set->ks_lock); 351 if ((set->ks_state & KCPC_SET_BOUND) == 0) { 352 mutex_exit(&set->ks_lock); 353 return (EINVAL); 354 } 355 mutex_exit(&set->ks_lock); 356 357 if (ctx->kc_flags & KCPC_CTX_INVALID) 358 return (EAGAIN); 359 360 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 361 /* 362 * Kernel preemption must be disabled while reading the 363 * hardware regs, and if this is a CPU-bound context, while 364 * checking the CPU binding of the current thread. 365 */ 366 kpreempt_disable(); 367 368 if (ctx->kc_cpuid != -1) { 369 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 370 kpreempt_enable(); 371 return (EAGAIN); 372 } 373 } 374 375 if (ctx->kc_thread == curthread) { 376 ctx->kc_hrtime = gethrtime(); 377 pcbe_ops->pcbe_sample(ctx); 378 ctx->kc_vtick += curtick - ctx->kc_rawtick; 379 ctx->kc_rawtick = curtick; 380 } 381 382 kpreempt_enable(); 383 384 /* 385 * The config may have been invalidated by 386 * the pcbe_sample op. 387 */ 388 if (ctx->kc_flags & KCPC_CTX_INVALID) 389 return (EAGAIN); 390 } 391 392 if (copyout(set->ks_data, buf, 393 set->ks_nreqs * sizeof (uint64_t)) == -1) 394 return (EFAULT); 395 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 396 return (EFAULT); 397 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 398 return (EFAULT); 399 400 return (0); 401 } 402 403 /* 404 * Stop the counters on the CPU this context is bound to. 405 */ 406 static void 407 kcpc_stop_hw(kcpc_ctx_t *ctx) 408 { 409 cpu_t *cp; 410 411 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 412 == KCPC_CTX_INVALID); 413 414 kpreempt_disable(); 415 416 cp = cpu_get(ctx->kc_cpuid); 417 ASSERT(cp != NULL); 418 419 if (cp == CPU) { 420 pcbe_ops->pcbe_allstop(); 421 atomic_or_uint(&ctx->kc_flags, 422 KCPC_CTX_INVALID_STOPPED); 423 } else 424 kcpc_remote_stop(cp); 425 kpreempt_enable(); 426 } 427 428 int 429 kcpc_unbind(kcpc_set_t *set) 430 { 431 kcpc_ctx_t *ctx; 432 kthread_t *t; 433 434 /* 435 * We could be racing with the process's agent thread as it 436 * binds the set; we must wait for the set to finish binding 437 * before attempting to tear it down. 438 */ 439 mutex_enter(&set->ks_lock); 440 while ((set->ks_state & KCPC_SET_BOUND) == 0) 441 cv_wait(&set->ks_condv, &set->ks_lock); 442 mutex_exit(&set->ks_lock); 443 444 ctx = set->ks_ctx; 445 446 /* 447 * Use kc_lock to synchronize with kcpc_restore(). 448 */ 449 mutex_enter(&ctx->kc_lock); 450 ctx->kc_flags |= KCPC_CTX_INVALID; 451 mutex_exit(&ctx->kc_lock); 452 453 if (ctx->kc_cpuid == -1) { 454 t = ctx->kc_thread; 455 /* 456 * The context is thread-bound and therefore has a device 457 * context. It will be freed via removectx() calling 458 * freectx() calling kcpc_free(). 459 */ 460 if (t == curthread && 461 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 462 kpreempt_disable(); 463 pcbe_ops->pcbe_allstop(); 464 atomic_or_uint(&ctx->kc_flags, 465 KCPC_CTX_INVALID_STOPPED); 466 kpreempt_enable(); 467 } 468 #ifdef DEBUG 469 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 470 kcpc_lwp_create, NULL, kcpc_free) == 0) 471 panic("kcpc_unbind: context %p not preset on thread %p", 472 (void *)ctx, (void *)t); 473 #else 474 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 475 kcpc_lwp_create, NULL, kcpc_free); 476 #endif /* DEBUG */ 477 t->t_cpc_set = NULL; 478 t->t_cpc_ctx = NULL; 479 } else { 480 /* 481 * If we are unbinding a CPU-bound set from a remote CPU, the 482 * native CPU's idle thread could be in the midst of programming 483 * this context onto the CPU. We grab the context's lock here to 484 * ensure that the idle thread is done with it. When we release 485 * the lock, the CPU no longer has a context and the idle thread 486 * will move on. 487 * 488 * cpu_lock must be held to prevent the CPU from being DR'd out 489 * while we disassociate the context from the cpu_t. 490 */ 491 cpu_t *cp; 492 mutex_enter(&cpu_lock); 493 cp = cpu_get(ctx->kc_cpuid); 494 if (cp != NULL) { 495 /* 496 * The CPU may have been DR'd out of the system. 497 */ 498 mutex_enter(&cp->cpu_cpc_ctxlock); 499 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 500 kcpc_stop_hw(ctx); 501 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 502 cp->cpu_cpc_ctx = NULL; 503 mutex_exit(&cp->cpu_cpc_ctxlock); 504 } 505 mutex_exit(&cpu_lock); 506 if (ctx->kc_thread == curthread) { 507 kcpc_free(ctx, 0); 508 curthread->t_cpc_set = NULL; 509 } 510 } 511 512 return (0); 513 } 514 515 int 516 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 517 { 518 int i; 519 520 ASSERT(set != NULL); 521 ASSERT(set->ks_state & KCPC_SET_BOUND); 522 ASSERT(set->ks_ctx->kc_thread == curthread); 523 ASSERT(set->ks_ctx->kc_cpuid == -1); 524 525 if (index < 0 || index >= set->ks_nreqs) 526 return (EINVAL); 527 528 for (i = 0; i < set->ks_nreqs; i++) 529 if (set->ks_req[i].kr_index == index) 530 break; 531 ASSERT(i != set->ks_nreqs); 532 533 set->ks_req[i].kr_preset = preset; 534 return (0); 535 } 536 537 int 538 kcpc_restart(kcpc_set_t *set) 539 { 540 kcpc_ctx_t *ctx = set->ks_ctx; 541 int i; 542 543 ASSERT(set->ks_state & KCPC_SET_BOUND); 544 ASSERT(ctx->kc_thread == curthread); 545 ASSERT(ctx->kc_cpuid == -1); 546 547 kpreempt_disable(); 548 549 /* 550 * If the user is doing this on a running set, make sure the counters 551 * are stopped first. 552 */ 553 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 554 pcbe_ops->pcbe_allstop(); 555 556 for (i = 0; i < set->ks_nreqs; i++) { 557 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 558 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 559 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 560 } 561 562 /* 563 * Ask the backend to program the hardware. 564 */ 565 ctx->kc_rawtick = KCPC_GET_TICK(); 566 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 567 pcbe_ops->pcbe_program(ctx); 568 kpreempt_enable(); 569 570 return (0); 571 } 572 573 /* 574 * Caller must hold kcpc_cpuctx_lock. 575 */ 576 int 577 kcpc_enable(kthread_t *t, int cmd, int enable) 578 { 579 kcpc_ctx_t *ctx = t->t_cpc_ctx; 580 kcpc_set_t *set = t->t_cpc_set; 581 kcpc_set_t *newset; 582 int i; 583 int flag; 584 int err; 585 586 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 587 588 if (ctx == NULL) { 589 /* 590 * This thread has a set but no context; it must be a 591 * CPU-bound set. 592 */ 593 ASSERT(t->t_cpc_set != NULL); 594 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 595 return (EINVAL); 596 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 597 return (EAGAIN); 598 599 if (cmd == CPC_ENABLE) { 600 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 601 return (EINVAL); 602 kpreempt_disable(); 603 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 604 kcpc_restore(ctx); 605 kpreempt_enable(); 606 } else if (cmd == CPC_DISABLE) { 607 if (ctx->kc_flags & KCPC_CTX_FREEZE) 608 return (EINVAL); 609 kpreempt_disable(); 610 kcpc_save(ctx); 611 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 612 kpreempt_enable(); 613 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 614 /* 615 * Strategy for usr/sys: stop counters and update set's presets 616 * with current counter values, unbind, update requests with 617 * new config, then re-bind. 618 */ 619 flag = (cmd == CPC_USR_EVENTS) ? 620 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 621 622 kpreempt_disable(); 623 atomic_or_uint(&ctx->kc_flags, 624 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 625 pcbe_ops->pcbe_allstop(); 626 kpreempt_enable(); 627 for (i = 0; i < set->ks_nreqs; i++) { 628 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 629 if (enable) 630 set->ks_req[i].kr_flags |= flag; 631 else 632 set->ks_req[i].kr_flags &= ~flag; 633 } 634 newset = kcpc_dup_set(set); 635 if (kcpc_unbind(set) != 0) 636 return (EINVAL); 637 t->t_cpc_set = newset; 638 if (kcpc_bind_thread(newset, t, &err) != 0) { 639 t->t_cpc_set = NULL; 640 kcpc_free_set(newset); 641 return (EINVAL); 642 } 643 } else 644 return (EINVAL); 645 646 return (0); 647 } 648 649 /* 650 * Provide PCBEs with a way of obtaining the configs of every counter which will 651 * be programmed together. 652 * 653 * If current is NULL, provide the first config. 654 * 655 * If data != NULL, caller wants to know where the data store associated with 656 * the config we return is located. 657 */ 658 void * 659 kcpc_next_config(void *token, void *current, uint64_t **data) 660 { 661 int i; 662 kcpc_pic_t *pic; 663 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 664 665 if (current == NULL) { 666 /* 667 * Client would like the first config, which may not be in 668 * counter 0; we need to search through the counters for the 669 * first config. 670 */ 671 for (i = 0; i < cpc_ncounters; i++) 672 if (ctx->kc_pics[i].kp_req != NULL) 673 break; 674 /* 675 * There are no counters configured for the given context. 676 */ 677 if (i == cpc_ncounters) 678 return (NULL); 679 } else { 680 /* 681 * There surely is a faster way to do this. 682 */ 683 for (i = 0; i < cpc_ncounters; i++) { 684 pic = &ctx->kc_pics[i]; 685 686 if (pic->kp_req != NULL && 687 current == pic->kp_req->kr_config) 688 break; 689 } 690 691 /* 692 * We found the current config at picnum i. Now search for the 693 * next configured PIC. 694 */ 695 for (i++; i < cpc_ncounters; i++) { 696 pic = &ctx->kc_pics[i]; 697 if (pic->kp_req != NULL) 698 break; 699 } 700 701 if (i == cpc_ncounters) 702 return (NULL); 703 } 704 705 if (data != NULL) { 706 *data = ctx->kc_pics[i].kp_req->kr_data; 707 } 708 709 return (ctx->kc_pics[i].kp_req->kr_config); 710 } 711 712 713 static kcpc_ctx_t * 714 kcpc_ctx_alloc(void) 715 { 716 kcpc_ctx_t *ctx; 717 long hash; 718 719 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), KM_SLEEP); 720 721 hash = CPC_HASH_CTX(ctx); 722 mutex_enter(&kcpc_ctx_llock[hash]); 723 ctx->kc_next = kcpc_ctx_list[hash]; 724 kcpc_ctx_list[hash] = ctx; 725 mutex_exit(&kcpc_ctx_llock[hash]); 726 727 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 728 cpc_ncounters, KM_SLEEP); 729 730 ctx->kc_cpuid = -1; 731 732 return (ctx); 733 } 734 735 /* 736 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 737 * in the flags. 738 */ 739 static void 740 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 741 { 742 kcpc_set_t *ks = ctx->kc_set, *cks; 743 int i, j; 744 int code; 745 746 ASSERT(ks != NULL); 747 748 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 749 return; 750 751 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP); 752 cks->ks_state &= ~KCPC_SET_BOUND; 753 cctx->kc_set = cks; 754 cks->ks_flags = ks->ks_flags; 755 cks->ks_nreqs = ks->ks_nreqs; 756 cks->ks_req = kmem_alloc(cks->ks_nreqs * 757 sizeof (kcpc_request_t), KM_SLEEP); 758 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 759 KM_SLEEP); 760 cks->ks_ctx = cctx; 761 762 for (i = 0; i < cks->ks_nreqs; i++) { 763 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 764 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 765 (void) strncpy(cks->ks_req[i].kr_event, 766 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 767 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 768 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 769 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 770 if (ks->ks_req[i].kr_nattrs > 0) { 771 cks->ks_req[i].kr_attr = 772 kmem_alloc(ks->ks_req[i].kr_nattrs * 773 sizeof (kcpc_attr_t), KM_SLEEP); 774 } 775 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 776 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 777 ks->ks_req[i].kr_attr[j].ka_name, 778 CPC_MAX_ATTR_LEN); 779 cks->ks_req[i].kr_attr[j].ka_val = 780 ks->ks_req[i].kr_attr[j].ka_val; 781 } 782 } 783 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 784 kcpc_invalidate_config(cctx); 785 786 mutex_enter(&cks->ks_lock); 787 cks->ks_state |= KCPC_SET_BOUND; 788 cv_signal(&cks->ks_condv); 789 mutex_exit(&cks->ks_lock); 790 } 791 792 793 static void 794 kcpc_ctx_free(kcpc_ctx_t *ctx) 795 { 796 kcpc_ctx_t **loc; 797 long hash = CPC_HASH_CTX(ctx); 798 799 mutex_enter(&kcpc_ctx_llock[hash]); 800 loc = &kcpc_ctx_list[hash]; 801 ASSERT(*loc != NULL); 802 while (*loc != ctx) 803 loc = &(*loc)->kc_next; 804 *loc = ctx->kc_next; 805 mutex_exit(&kcpc_ctx_llock[hash]); 806 807 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 808 cv_destroy(&ctx->kc_condv); 809 mutex_destroy(&ctx->kc_lock); 810 kmem_free(ctx, sizeof (*ctx)); 811 } 812 813 /* 814 * Generic interrupt handler used on hardware that generates 815 * overflow interrupts. 816 * 817 * Note: executed at high-level interrupt context! 818 */ 819 /*ARGSUSED*/ 820 kcpc_ctx_t * 821 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 822 { 823 kcpc_ctx_t *ctx; 824 kthread_t *t = curthread; 825 int i; 826 827 /* 828 * On both x86 and UltraSPARC, we may deliver the high-level 829 * interrupt in kernel mode, just after we've started to run an 830 * interrupt thread. (That's because the hardware helpfully 831 * delivers the overflow interrupt some random number of cycles 832 * after the instruction that caused the overflow by which time 833 * we're in some part of the kernel, not necessarily running on 834 * the right thread). 835 * 836 * Check for this case here -- find the pinned thread 837 * that was running when the interrupt went off. 838 */ 839 if (t->t_flag & T_INTR_THREAD) { 840 klwp_t *lwp; 841 842 atomic_add_32(&kcpc_intrctx_count, 1); 843 844 /* 845 * Note that t_lwp is always set to point at the underlying 846 * thread, thus this will work in the presence of nested 847 * interrupts. 848 */ 849 ctx = NULL; 850 if ((lwp = t->t_lwp) != NULL) { 851 t = lwptot(lwp); 852 ctx = t->t_cpc_ctx; 853 } 854 } else 855 ctx = t->t_cpc_ctx; 856 857 if (ctx == NULL) { 858 /* 859 * This can easily happen if we're using the counters in 860 * "shared" mode, for example, and an overflow interrupt 861 * occurs while we are running cpustat. In that case, the 862 * bound thread that has the context that belongs to this 863 * CPU is almost certainly sleeping (if it was running on 864 * the CPU we'd have found it above), and the actual 865 * interrupted thread has no knowledge of performance counters! 866 */ 867 ctx = curthread->t_cpu->cpu_cpc_ctx; 868 if (ctx != NULL) { 869 /* 870 * Return the bound context for this CPU to 871 * the interrupt handler so that it can synchronously 872 * sample the hardware counters and restart them. 873 */ 874 return (ctx); 875 } 876 877 /* 878 * As long as the overflow interrupt really is delivered early 879 * enough after trapping into the kernel to avoid switching 880 * threads, we must always be able to find the cpc context, 881 * or something went terribly wrong i.e. we ended up 882 * running a passivated interrupt thread, a kernel 883 * thread or we interrupted idle, all of which are Very Bad. 884 */ 885 if (kcpc_nullctx_panic) 886 panic("null cpc context, thread %p", (void *)t); 887 atomic_add_32(&kcpc_nullctx_count, 1); 888 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 889 /* 890 * Schedule an ast to sample the counters, which will 891 * propagate any overflow into the virtualized performance 892 * counter(s), and may deliver a signal. 893 */ 894 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 895 /* 896 * If a counter has overflowed which was counting on behalf of 897 * a request which specified CPC_OVF_NOTIFY_EMT, send the 898 * process a signal. 899 */ 900 for (i = 0; i < cpc_ncounters; i++) { 901 if (ctx->kc_pics[i].kp_req != NULL && 902 bitmap & (1 << i) && 903 ctx->kc_pics[i].kp_req->kr_flags & 904 CPC_OVF_NOTIFY_EMT) { 905 /* 906 * A signal has been requested for this PIC, so 907 * so freeze the context. The interrupt handler 908 * has already stopped the counter hardware. 909 */ 910 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 911 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 912 KCPC_PIC_OVERFLOWED); 913 } 914 } 915 aston(t); 916 } 917 return (NULL); 918 } 919 920 /* 921 * The current thread context had an overflow interrupt; we're 922 * executing here in high-level interrupt context. 923 */ 924 /*ARGSUSED*/ 925 uint_t 926 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 927 { 928 kcpc_ctx_t *ctx; 929 uint64_t bitmap; 930 931 if (pcbe_ops == NULL || 932 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 933 return (DDI_INTR_UNCLAIMED); 934 935 /* 936 * Prevent any further interrupts. 937 */ 938 pcbe_ops->pcbe_allstop(); 939 940 /* 941 * Invoke the "generic" handler. 942 * 943 * If the interrupt has occurred in the context of an lwp owning 944 * the counters, then the handler posts an AST to the lwp to 945 * trigger the actual sampling, and optionally deliver a signal or 946 * restart the counters, on the way out of the kernel using 947 * kcpc_hw_overflow_ast() (see below). 948 * 949 * On the other hand, if the handler returns the context to us 950 * directly, then it means that there are no other threads in 951 * the middle of updating it, no AST has been posted, and so we 952 * should sample the counters here, and restart them with no 953 * further fuss. 954 */ 955 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 956 uint64_t curtick = KCPC_GET_TICK(); 957 958 ctx->kc_hrtime = gethrtime_waitfree(); 959 ctx->kc_vtick += curtick - ctx->kc_rawtick; 960 ctx->kc_rawtick = curtick; 961 pcbe_ops->pcbe_sample(ctx); 962 pcbe_ops->pcbe_program(ctx); 963 } 964 965 return (DDI_INTR_CLAIMED); 966 } 967 968 /* 969 * Called from trap() when processing the ast posted by the high-level 970 * interrupt handler. 971 */ 972 int 973 kcpc_overflow_ast() 974 { 975 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 976 int i; 977 int found = 0; 978 uint64_t curtick = KCPC_GET_TICK(); 979 980 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 981 982 /* 983 * An overflow happened: sample the context to ensure that 984 * the overflow is propagated into the upper bits of the 985 * virtualized 64-bit counter(s). 986 */ 987 kpreempt_disable(); 988 ctx->kc_hrtime = gethrtime_waitfree(); 989 pcbe_ops->pcbe_sample(ctx); 990 kpreempt_enable(); 991 992 ctx->kc_vtick += curtick - ctx->kc_rawtick; 993 994 /* 995 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 996 * if that pic generated an overflow and if the request it was counting 997 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 998 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 999 * found any overflowed pics, keep the context frozen and return true 1000 * (thus causing a signal to be sent). 1001 */ 1002 for (i = 0; i < cpc_ncounters; i++) { 1003 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 1004 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 1005 ~KCPC_PIC_OVERFLOWED); 1006 found = 1; 1007 } 1008 } 1009 if (found) 1010 return (1); 1011 1012 /* 1013 * Otherwise, re-enable the counters and continue life as before. 1014 */ 1015 kpreempt_disable(); 1016 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 1017 pcbe_ops->pcbe_program(ctx); 1018 kpreempt_enable(); 1019 return (0); 1020 } 1021 1022 /* 1023 * Called when switching away from current thread. 1024 */ 1025 static void 1026 kcpc_save(kcpc_ctx_t *ctx) 1027 { 1028 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1029 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 1030 return; 1031 /* 1032 * This context has been invalidated but the counters have not 1033 * been stopped. Stop them here and mark the context stopped. 1034 */ 1035 pcbe_ops->pcbe_allstop(); 1036 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1037 return; 1038 } 1039 1040 pcbe_ops->pcbe_allstop(); 1041 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1042 return; 1043 1044 /* 1045 * Need to sample for all reqs into each req's current mpic. 1046 */ 1047 ctx->kc_hrtime = gethrtime(); 1048 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1049 pcbe_ops->pcbe_sample(ctx); 1050 } 1051 1052 static void 1053 kcpc_restore(kcpc_ctx_t *ctx) 1054 { 1055 mutex_enter(&ctx->kc_lock); 1056 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1057 KCPC_CTX_INVALID) 1058 /* 1059 * The context is invalidated but has not been marked stopped. 1060 * We mark it as such here because we will not start the 1061 * counters during this context switch. 1062 */ 1063 ctx->kc_flags |= KCPC_CTX_INVALID_STOPPED; 1064 1065 1066 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) { 1067 mutex_exit(&ctx->kc_lock); 1068 return; 1069 } 1070 1071 /* 1072 * Set kc_flags to show that a kcpc_restore() is in progress to avoid 1073 * ctx & set related memory objects being freed without us knowing. 1074 * This can happen if an agent thread is executing a kcpc_unbind(), 1075 * with this thread as the target, whilst we're concurrently doing a 1076 * restorectx() during, for example, a proc_exit(). Effectively, by 1077 * doing this, we're asking kcpc_free() to cv_wait() until 1078 * kcpc_restore() has completed. 1079 */ 1080 ctx->kc_flags |= KCPC_CTX_RESTORE; 1081 mutex_exit(&ctx->kc_lock); 1082 1083 /* 1084 * While programming the hardware, the counters should be stopped. We 1085 * don't do an explicit pcbe_allstop() here because they should have 1086 * been stopped already by the last consumer. 1087 */ 1088 ctx->kc_rawtick = KCPC_GET_TICK(); 1089 pcbe_ops->pcbe_program(ctx); 1090 1091 /* 1092 * Wake the agent thread if it's waiting in kcpc_free(). 1093 */ 1094 mutex_enter(&ctx->kc_lock); 1095 ctx->kc_flags &= ~KCPC_CTX_RESTORE; 1096 cv_signal(&ctx->kc_condv); 1097 mutex_exit(&ctx->kc_lock); 1098 } 1099 1100 /* 1101 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1102 * following context operators to the idle thread on each CPU. They stop the 1103 * counters when the idle thread is switched on, and they start them again when 1104 * it is switched off. 1105 */ 1106 1107 /*ARGSUSED*/ 1108 void 1109 kcpc_idle_save(struct cpu *cp) 1110 { 1111 /* 1112 * The idle thread shouldn't be run anywhere else. 1113 */ 1114 ASSERT(CPU == cp); 1115 1116 /* 1117 * We must hold the CPU's context lock to ensure the context isn't freed 1118 * while we're looking at it. 1119 */ 1120 mutex_enter(&cp->cpu_cpc_ctxlock); 1121 1122 if ((cp->cpu_cpc_ctx == NULL) || 1123 (cp-&