1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "@(#)lgrpsys.c 1.12 06/10/24 SMI" 28 29 /* 30 * lgroup system calls 31 */ 32 33 #include <sys/types.h> 34 #include <sys/errno.h> 35 #include <sys/sunddi.h> 36 #include <sys/systm.h> 37 #include <sys/mman.h> 38 #include <sys/cpupart.h> 39 #include <sys/lgrp.h> 40 #include <sys/lgrp_user.h> 41 #include <sys/promif.h> /* for prom_printf() */ 42 #include <sys/sysmacros.h> 43 44 #include <vm/as.h> 45 46 47 /* definitions for mi_validity */ 48 #define VALID_ADDR 1 49 #define VALID_REQ 2 50 51 /* 52 * run through the given number of addresses and requests and return the 53 * corresponding memory information for each address 54 */ 55 static int 56 meminfo(int addr_count, struct meminfo *mip) 57 { 58 size_t in_size, out_size, req_size, val_size; 59 struct as *as; 60 struct hat *hat; 61 int i, j, out_idx, info_count; 62 lgrp_t *lgrp; 63 pfn_t pfn; 64 ssize_t pgsz; 65 int *req_array, *val_array; 66 uint64_t *in_array, *out_array; 67 uint64_t addr, paddr; 68 uintptr_t vaddr; 69 int ret = 0; 70 struct meminfo minfo; 71 #if defined(_SYSCALL32_IMPL) 72 struct meminfo32 minfo32; 73 #endif 74 75 /* 76 * Make sure that there is at least one address to translate and 77 * limit how many virtual addresses the kernel can do per call 78 */ 79 if (addr_count < 1) 80 return (set_errno(EINVAL)); 81 else if (addr_count > MAX_MEMINFO_CNT) 82 addr_count = MAX_MEMINFO_CNT; 83 84 if (get_udatamodel() == DATAMODEL_NATIVE) { 85 if (copyin(mip, &minfo, sizeof (struct meminfo))) 86 return (set_errno(EFAULT)); 87 } 88 #if defined(_SYSCALL32_IMPL) 89 else { 90 bzero(&minfo, sizeof (minfo)); 91 if (copyin(mip, &minfo32, sizeof (struct meminfo32))) 92 return (set_errno(EFAULT)); 93 minfo.mi_inaddr = (const uint64_t *)(uintptr_t) 94 minfo32.mi_inaddr; 95 minfo.mi_info_req = (const uint_t *)(uintptr_t) 96 minfo32.mi_info_req; 97 minfo.mi_info_count = minfo32.mi_info_count; 98 minfo.mi_outdata = (uint64_t *)(uintptr_t) 99 minfo32.mi_outdata; 100 minfo.mi_validity = (uint_t *)(uintptr_t) 101 minfo32.mi_validity; 102 } 103 #endif 104 /* 105 * all the input parameters have been copied in:- 106 * addr_count - number of input addresses 107 * minfo.mi_inaddr - array of input addresses 108 * minfo.mi_info_req - array of types of information requested 109 * minfo.mi_info_count - no. of pieces of info requested for each addr 110 * minfo.mi_outdata - array into which the results are placed 111 * minfo.mi_validity - array containing bitwise result codes; 0th bit 112 * evaluates validity of corresponding input 113 * address, 1st bit validity of response to first 114 * member of info_req, etc. 115 */ 116 117 /* make sure mi_info_count is within limit */ 118 info_count = minfo.mi_info_count; 119 if (info_count < 1 || info_count > MAX_MEMINFO_REQ) 120 return (set_errno(EINVAL)); 121 122 /* 123 * allocate buffer in_array for the input addresses and copy them in 124 */ 125 in_size = sizeof (uint64_t) * addr_count; 126 in_array = kmem_alloc(in_size, KM_SLEEP); 127 if (copyin(minfo.mi_inaddr, in_array, in_size)) { 128 kmem_free(in_array, in_size); 129 return (set_errno(EFAULT)); 130 } 131 132 /* 133 * allocate buffer req_array for the input info_reqs and copy them in 134 */ 135 req_size = sizeof (uint_t) * info_count; 136 req_array = kmem_alloc(req_size, KM_SLEEP); 137 if (copyin(minfo.mi_info_req, req_array, req_size)) { 138 kmem_free(req_array, req_size); 139 kmem_free(in_array, in_size); 140 return (set_errno(EFAULT)); 141 } 142 143 /* 144 * allocate buffer out_array which holds the results and will have 145 * to be copied out later 146 */ 147 out_size = sizeof (uint64_t) * addr_count * info_count; 148 out_array = kmem_alloc(out_size, KM_SLEEP); 149 150 /* 151 * allocate buffer val_array which holds the validity bits and will 152 * have to be copied out later 153 */ 154 val_size = sizeof (uint_t) * addr_count; 155 val_array = kmem_alloc(val_size, KM_SLEEP); 156 157 if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) { 158 /* find the corresponding lgroup for each physical address */ 159 for (i = 0; i < addr_count; i++) { 160 paddr = in_array[i]; 161 pfn = btop(paddr); 162 lgrp = lgrp_pfn_to_lgrp(pfn); 163 if (lgrp) { 164 out_array[i] = lgrp->lgrp_id; 165 val_array[i] = VALID_ADDR | VALID_REQ; 166 } else { 167 out_array[i] = NULL; 168 val_array[i] = 0; 169 } 170 } 171 } else { 172 /* get the corresponding memory info for each virtual address */ 173 as = curproc->p_as; 174 175 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 176 hat = as->a_hat; 177 for (i = out_idx = 0; i < addr_count; i++, out_idx += 178 info_count) { 179 addr = in_array[i]; 180 vaddr = (uintptr_t)(addr & ~PAGEOFFSET); 181 if (!as_segat(as, (caddr_t)vaddr)) { 182 val_array[i] = 0; 183 continue; 184 } 185 val_array[i] = VALID_ADDR; 186 pfn = hat_getpfnum(hat, (caddr_t)vaddr); 187 if (pfn != PFN_INVALID) { 188 paddr = (uint64_t)((pfn << PAGESHIFT) | 189 (addr & PAGEOFFSET)); 190 for (j = 0; j < info_count; j++) { 191 switch (req_array[j] & MEMINFO_MASK) { 192 case MEMINFO_VPHYSICAL: 193 /* 194 * return the physical address 195 * corresponding to the input 196 * virtual address 197 */ 198 out_array[out_idx + j] = paddr; 199 val_array[i] |= VALID_REQ << j; 200 break; 201 case MEMINFO_VLGRP: 202 /* 203 * return the lgroup of physical 204 * page corresponding to the 205 * input virtual address 206 */ 207 lgrp = lgrp_pfn_to_lgrp(pfn); 208 if (lgrp) { 209 out_array[out_idx + j] = 210 lgrp->lgrp_id; 211 val_array[i] |= 212 VALID_REQ << j; 213 } 214 break; 215 case MEMINFO_VPAGESIZE: 216 /* 217 * return the size of physical 218 * page corresponding to the 219 * input virtual address 220 */ 221 pgsz = hat_getpagesize(hat, 222 (caddr_t)vaddr); 223 if (pgsz != -1) { 224 out_array[out_idx + j] = 225 pgsz; 226 val_array[i] |= 227 VALID_REQ << j; 228 } 229 break; 230 case MEMINFO_VREPLCNT: 231 /* 232 * for future use:- 233 * return the no. replicated 234 * physical pages corresponding 235 * to the input virtual address, 236 * so it is always 0 at the 237 * moment 238 */ 239 out_array[out_idx + j] = 0; 240 val_array[i] |= VALID_REQ << j; 241 break; 242 case MEMINFO_VREPL: 243 /* 244 * for future use:- 245 * return the nth physical 246 * replica of the specified 247 * virtual address 248 */ 249 break; 250 case MEMINFO_VREPL_LGRP: 251 /* 252 * for future use:- 253 * return the lgroup of nth 254 * physical replica of the 255 * specified virtual address 256 */ 257 break; 258 case MEMINFO_PLGRP: 259 /* 260 * this is for physical address 261 * only, shouldn't mix with 262 * virtual address 263 */ 264 break; 265 default: 266 break; 267 } 268 } 269 } 270 } 271 AS_LOCK_EXIT(as, &as->a_lock); 272 } 273 274 /* copy out the results and validity bits and free the buffers */ 275 if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) || 276 (copyout(val_array, minfo.mi_validity, val_size) != 0)) 277 ret = set_errno(EFAULT); 278 279 kmem_free(in_array, in_size); 280 kmem_free(out_array, out_size); 281 kmem_free(req_array, req_size); 282 kmem_free(val_array, val_size); 283 284 return (ret); 285 } 286 287 288 /* 289 * Initialize lgroup affinities for thread 290 */ 291 void 292 lgrp_affinity_init(lgrp_affinity_t **bufaddr) 293 { 294 if (bufaddr) 295 *bufaddr = NULL; 296 } 297 298 299 /* 300 * Free lgroup affinities for thread and set to NULL 301 * just in case thread gets recycled 302 */ 303 void 304 lgrp_affinity_free(lgrp_affinity_t **bufaddr) 305 { 306 if (bufaddr && *bufaddr) { 307 kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t)); 308 *bufaddr = NULL; 309 } 310 } 311 312 313 #define P_ANY -2 /* cookie specifying any ID */ 314 315 316 /* 317 * Find LWP with given ID in specified process and get its affinity for 318 * specified lgroup 319 */ 320 lgrp_affinity_t 321 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp) 322 { 323 lgrp_affinity_t aff; 324 int found; 325 kthread_t *t; 326 327 ASSERT(MUTEX_HELD(&p->p_lock)); 328 329 aff = LGRP_AFF_NONE; 330 found = 0; 331 t = p->p_tlist; 332 /* 333 * The process may be executing in proc_exit() and its p->p_list may be 334 * already NULL. 335 */ 336 if (t == NULL) 337 return (set_errno(ESRCH)); 338 339 do { 340 if (t->t_tid == lwpid || lwpid == P_ANY) { 341 thread_lock(t); 342 /* 343 * Check to see whether caller has permission to set 344 * affinity for LWP 345 */ 346 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) { 347 thread_unlock(t); 348 return (set_errno(EPERM)); 349 } 350 351 if (t->t_lgrp_affinity) 352 aff = t->t_lgrp_affinity[lgrp]; 353 thread_unlock(t); 354 found = 1; 355 break; 356 } 357 } while ((t = t->t_forw) != p->p_tlist); 358 if (!found) 359 aff = set_errno(ESRCH); 360 361 return (aff); 362 } 363 364 365 /* 366 * Get lgroup affinity for given LWP 367 */ 368 lgrp_affinity_t 369 lgrp_affinity_get(lgrp_affinity_args_t *ap) 370 { 371 lgrp_affinity_t aff; 372 lgrp_affinity_args_t args; 373 id_t id; 374 idtype_t idtype; 375 lgrp_id_t lgrp; 376 proc_t *p; 377 kthread_t *t; 378 379 /* 380 * Copyin arguments 381 */ 382 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0) 383 return (set_errno(EFAULT)); 384 385 id = args.id; 386 idtype = args.idtype; 387 lgrp = args.lgrp; 388 389 /* 390 * Check for invalid lgroup 391 */ 392 if (lgrp < 0 || lgrp == LGRP_NONE) 393 return (set_errno(EINVAL)); 394 395 /* 396 * Check for existing lgroup 397 */ 398 if (lgrp > lgrp_alloc_max) 399 return (set_errno(ESRCH)); 400 401 /* 402 * Get lgroup affinity for given LWP or process 403 */ 404 switch (idtype) { 405 406 case P_LWPID: 407 /* 408 * LWP in current process 409 */ 410 p = curproc; 411 mutex_enter(&p->p_lock); 412 if (id != P_MYID) /* different thread */ 413 aff = lgrp_affinity_get_thread(p, id, lgrp); 414 else { /* current thread */ 415 aff = LGRP_AFF_NONE; 416 t = curthread; 417 thread_lock(t); 418 if (t->t_lgrp_affinity) 419 aff = t->t_lgrp_affinity[lgrp]; 420 thread_unlock(t); 421 } 422 mutex_exit(&p->p_lock); 423 break; 424 425 case P_PID: 426 /* 427 * Process 428 */ 429 mutex_enter(&pidlock); 430 431 if (id == P_MYID) 432 p = curproc; 433 else { 434 p = prfind(id); 435 if (p == NULL) { 436 mutex_exit(&pidlock); 437 return (set_errno(ESRCH)); 438 } 439 } 440 441 mutex_enter(&p->p_lock); 442 aff = lgrp_affinity_get_thread(p, P_ANY, lgrp); 443 mutex_exit(&p->p_lock); 444 445 mutex_exit(&pidlock); 446 break; 447 448 default: 449 aff = set_errno(EINVAL); 450 break; 451 } 452 453 return (aff); 454 } 455 456 457 /* 458 * Find lgroup for which this thread has most affinity in specified partition 459 * starting from home lgroup unless specified starting lgroup is preferred 460 */ 461 lpl_t * 462 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start, 463 boolean_t prefer_start) 464 { 465 lgrp_affinity_t *affs; 466 lgrp_affinity_t best_aff; 467 lpl_t *best_lpl; 468 lgrp_id_t finish; 469 lgrp_id_t home; 470 lgrp_id_t lgrpid; 471 lpl_t *lpl; 472 473 ASSERT(t != NULL); 474 ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) || 475 (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t))); 476 ASSERT(cpupart != NULL); 477 478 if (t->t_lgrp_affinity == NULL) 479 return (NULL); 480 481 affs = t->t_lgrp_affinity; 482 483 /* 484 * Thread bound to CPU 485 */ 486 if (t->t_bind_cpu != PBIND_NONE) { 487 cpu_t *cp; 488 489 /* 490 * Find which lpl has most affinity among leaf lpl directly 491 * containing CPU and its ancestor lpls 492 */ 493 cp = cpu[t->t_bind_cpu]; 494 495 best_lpl = lpl = cp->cpu_lpl; 496 best_aff = affs[best_lpl->lpl_lgrpid]; 497 while (lpl->lpl_parent != NULL) { 498 lpl = lpl->lpl_parent; 499 lgrpid = lpl->lpl_lgrpid; 500 if (affs[lgrpid] > best_aff) { 501 best_lpl = lpl; 502 best_aff = affs[lgrpid]; 503 } 504 } 505 return (best_lpl); 506 } 507 508 /* 509 * Start searching from home lgroup unless given starting lgroup is 510 * preferred or home lgroup isn't in given pset. Use root lgroup as 511 * starting point if both home and starting lgroups aren't in given 512 * pset. 513 */ 514 ASSERT(start >= 0 && start <= lgrp_alloc_max); 515 home = t->t_lpl->lpl_lgrpid; 516 if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart)) 517 lgrpid = home; 518 else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart)) 519 lgrpid = start; 520 else 521 lgrpid = LGRP_ROOTID; 522 523 best_lpl = &cpupart->cp_lgrploads[lgrpid]; 524 best_aff = affs[lgrpid]; 525 finish = lgrpid; 526 do { 527 /* 528 * Skip any lgroups that don't have CPU resources 529 * in this processor set. 530 */ 531 if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) { 532 if (++lgrpid > lgrp_alloc_max) 533 lgrpid = 0; /* wrap the search */ 534 continue; 535 } 536 537 /* 538 * Find lgroup with most affinity 539 */ 540 lpl = &cpupart->cp_lgrploads[lgrpid]; 541 if (affs[lgrpid] > best_aff) { 542 best_aff = affs[lgrpid]; 543 best_lpl = lpl; 544 } 545 546 if (++lgrpid > lgrp_alloc_max) 547 lgrpid = 0; /* wrap the search */ 548 549 } while (lgrpid != finish); 550 551 /* 552 * No lgroup (in this pset) with any affinity 553 */ 554 if (best_aff == LGRP_AFF_NONE) 555 return (NULL); 556 557 lgrpid = best_lpl->lpl_lgrpid; 558 ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0); 559 560 return (best_lpl); 561 } 562 563 564 /* 565 * Set thread's affinity for given lgroup 566 */ 567 int 568 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff, 569 lgrp_affinity_t **aff_buf) 570 { 571 lgrp_affinity_t *affs; 572 lgrp_id_t best; 573 lpl_t *best_lpl; 574 lgrp_id_t home; 575 int retval; 576 577 ASSERT(t != NULL); 578 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 579 580 retval = 0; 581 582 thread_lock(t); 583 584 /* 585 * Check to see whether caller has permission to set affinity for 586 * thread 587 */ 588 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) { 589 thread_unlock(t); 590 return (set_errno(EPERM)); 591 } 592 593 if (t->t_lgrp_affinity == NULL) { 594 if (aff == LGRP_AFF_NONE) { 595 thread_unlock(t); 596 return (0); 597 } 598 ASSERT(aff_buf != NULL && *aff_buf != NULL); 599 t->t_lgrp_affinity = *aff_buf; 600 *aff_buf = NULL; 601 } 602 603 affs = t->t_lgrp_affinity; 604 affs[lgrp] = aff; 605 606 /* 607 * Find lgroup for which thread has most affinity, 608 * starting with lgroup for which affinity being set 609 */ 610 best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE); 611 612 /* 613 * Rehome if found lgroup with more affinity than home or lgroup for 614 * which affinity is being set has same affinity as home 615 */ 616 home = t->t_lpl->lpl_lgrpid; 617 if (best_lpl != NULL && best_lpl != t->t_lpl) { 618 best = best_lpl->lpl_lgrpid; 619 if (affs[best] > affs[home] || (affs[best] == affs[home] && 620 best == lgrp)) 621 lgrp_move_thread(t, best_lpl, 1); 622 } 623 624 thread_unlock(t); 625 626 return (retval); 627 } 628 629 630 /* 631 * Set process' affinity for specified lgroup 632 */ 633 int 634 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff, 635 lgrp_affinity_t **aff_buf_array) 636 { 637 lgrp_affinity_t *buf; 638 int err = 0; 639 int i; 640 int retval; 641 kthread_t *t; 642 643 ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock)); 644 ASSERT(aff_buf_array != NULL); 645 646 i = 0; 647 t = p->p_tlist; 648 if (t != NULL) { 649 do { 650 /* 651 * Set lgroup affinity for thread 652 */ 653 buf = aff_buf_array[i]; 654 retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf); 655 656 if (err == 0 && retval != 0) 657 err = retval; 658 659 /* 660 * Advance pointer to next buffer 661 */ 662 if (buf == NULL) { 663 ASSERT(i < p->p_lwpcnt); 664 aff_buf_array[i] = NULL; 665 i++; 666 } 667 668 } while ((t = t->t_forw) != p->p_tlist); 669 } 670 return (err); 671 } 672 673 674 /* 675 * Set LWP's or process' affinity for specified lgroup 676 * 677 * When setting affinities, pidlock, process p_lock, and thread_lock() 678 * need to be held in that order to protect target thread's pset, process, 679 * process contents, and thread contents. thread_lock() does splhigh(), 680 * so it ends up having similiar effect as kpreempt_disable(), so it will 681 * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes. 682 */ 683 int 684 lgrp_affinity_set(lgrp_affinity_args_t *ap) 685 { 686 lgrp_affinity_t aff; 687 lgrp_affinity_t *aff_buf; 688 lgrp_affinity_args_t args; 689 id_t id; 690 idtype_t idtype; 691 lgrp_id_t lgrp; 692 int nthreads; 693 proc_t *p; 694 int retval; 695 696 /* 697 * Copyin arguments 698 */ 699 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0) 700 return (set_errno(EFAULT)); 701 702 idtype = args.idtype; 703 id = args.id; 704 lgrp = args.lgrp; 705 aff = args.aff; 706 707 /* 708 * Check for invalid lgroup 709 */ 710 if (lgrp < 0 || lgrp == LGRP_NONE) 711 return (set_errno(EINVAL)); 712 713 /* 714 * Check for existing lgroup 715 */ 716 if (lgrp > lgrp_alloc_max) 717 return (set_errno(ESRCH)); 718 719 /* 720 * Check for legal affinity 721 */ 722 if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK && 723 aff != LGRP_AFF_STRONG) 724 return (set_errno(EINVAL)); 725 726 /* 727 * Must be process or LWP ID 728 */ 729 if (idtype != P_LWPID && idtype != P_PID) 730 return (set_errno(EINVAL)); 731 732 /* 733 * Set given LWP's or process' affinity for specified lgroup 734 */ 735 switch (idtype) { 736 737 case P_LWPID: 738 /* 739 * Allocate memory for thread's lgroup affinities 740 * ahead of time w/o holding locks 741 */ 742 aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t), 743 KM_SLEEP); 744 745 p = curproc; 746 747 /* 748 * Set affinity for thread 749 */ 750 mutex_enter(&p->p_lock); 751 if (id == P_MYID) { /* current thread */ 752 retval = lgrp_affinity_set_thread(curthread, lgrp, aff, 753 &aff_buf); 754 } else if (p->p_tlist == NULL) { 755 retval = set_errno(ESRCH); 756 } else { /* other thread */ 757 int found = 0; 758 kthread_t *t; 759 760 t = p->p_tlist; 761 do { 762 if (t->t_tid == id) { 763 retval = lgrp_affinity_set_thread(t, 764 lgrp, aff, &aff_buf); 765 found = 1; 766 break; 767 } 768 } while ((t = t->t_forw) != p->p_tlist); 769 if (!found) 770 retval = set_errno(ESRCH); 771 } 772 mutex_exit(&p->p_lock); 773 774 /* 775 * Free memory for lgroup affinities, 776 * since thread didn't need it 777 */ 778 if (aff_buf) 779 kmem_free(aff_buf, 780 nlgrpsmax * sizeof (lgrp_affinity_t)); 781 782 break; 783 784 case P_PID: 785 786 do { 787 lgrp_affinity_t **aff_buf_array; 788 int i; 789 size_t size; 790 791 /* 792 * Get process 793 */ 794 mutex_enter(&pidlock); 795 796 if (id == P_MYID) 797 p = curproc; 798 else 799 p = prfind(id); 800 801 if (p == NULL) { 802 mutex_exit(&pidlock); 803 return (set_errno(ESRCH)); 804 } 805 806 /* 807 * Get number of threads in process 808 * 809 * NOTE: Only care about user processes, 810 * so p_lwpcnt should be number of threads. 811 */ 812 mutex_enter(&p->p_lock); 813 nthreads = p->p_lwpcnt; 814 mutex_exit(&p->p_lock); 815 816 mutex_exit(&pidlock); 817 818 if (nthreads < 1) 819 return (set_errno(ESRCH)); 820 821 /* 822 * Preallocate memory for lgroup affinities for 823 * each thread in process now to avoid holding 824 * any locks. Allocate an array to hold a buffer 825 * for each thread. 826 */ 827 aff_buf_array = kmem_zalloc(nthreads * 828 sizeof (lgrp_affinity_t *), KM_SLEEP); 829 830 size = nlgrpsmax * sizeof (lgrp_affinity_t); 831 for (i = 0; i < nthreads; i++) 832 aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP); 833 834 mutex_enter(&pidlock); 835 836 /* 837 * Get process again since dropped locks to allocate 838 * memory (except current process) 839 */ 840 if (id != P_MYID) 841 p = prfind(id); 842 843 /* 844 * Process went away after we dropped locks and before 845 * reacquiring them, so drop locks, free memory, and 846 * return. 847 */ 848 if (p == NULL) { 849 mutex_exit(&pidlock); 850 for (i = 0; i < nthreads; i++) 851 kmem_free(aff_buf_array[i], size); 852 kmem_free(aff_buf_array, 853 nthreads * sizeof (lgrp_affinity_t *)); 854 return (set_errno(ESRCH)); 855 } 856 857 mutex_enter(&p->p_lock); 858 859 /* 860 * See whether number of threads is same 861 * If not, drop locks, free memory, and try again 862 */ 863 if (nthreads != p->p_lwpcnt) { 864 mutex_exit(&p->p_lock); 865 mutex_exit(&pidlock); 866 for (i = 0; i < nthreads; i++) 867 kmem_free(aff_buf_array[i], size); 868 kmem_free(aff_buf_array, 869 nthreads * sizeof (lgrp_affinity_t *)); 870 continue; 871 } 872 873 /* 874 * Set lgroup affinity for threads in process 875 */ 876 retval = lgrp_affinity_set_proc(p, lgrp, aff, 877 aff_buf_array); 878 879 mutex_exit(&p->p_lock); 880 mutex_exit(&pidlock); 881 882 /* 883 * Free any leftover memory, since some threads may 884 * have already allocated memory and set lgroup 885 * affinities before 886 */ 887 for (i = 0; i < nthreads; i++) 888 if (aff_buf_array[i] != NULL) 889 kmem_free(aff_buf_array[i], size); 890 kmem_free(aff_buf_array, 891 nthreads * sizeof (lgrp_affinity_t *)); 892 893 break; 894 895 } while (nthreads != p->p_lwpcnt); 896 897 break; 898 899 default: 900 retval = set_errno(EINVAL); 901 break; 902 } 903 904 return (retval); 905 } 906 907 908 /* 909 * Return the latest generation number for the lgroup hierarchy 910 * with the given view 911 */ 912 lgrp_gen_t 913 lgrp_generation(lgrp_view_t view) 914 { 915 cpupart_t *cpupart; 916 uint_t gen; 917 918 kpreempt_disable(); 919 920 /* 921 * Determine generation number for given view 922 */ 923 if (view == LGRP_VIEW_OS) 924 /* 925 * Return generation number of lgroup hierarchy for OS view 926 */ 927 gen = lgrp_gen; 928 else { 929 /* 930 * For caller's view, use generation numbers for lgroup 931 * hierarchy and caller's pset 932 * NOTE: Caller needs to check for change in pset ID 933 */ 934 cpupart = curthread->t_cpupart; 935 ASSERT(cpupart); 936 gen = lgrp_gen + cpupart->cp_gen; 937 } 938 939 kpreempt_enable(); 940 941 return (gen); 942 } 943 944 945 lgrp_id_t 946 lgrp_home_thread(kthread_t *t) 947 { 948 lgrp_id_t home; 949 950 ASSERT(t != NULL); 951 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 952 953 thread_lock(t); 954 955 /* 956 * Check to see whether caller has permission to set affinity for 957 * thread 958 */ 959 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) { 960 thread_unlock(t); 961 return (set_errno(EPERM)); 962 } 963 964 home = lgrp_home_id(t); 965 966 thread_unlock(t); 967 return (home); 968 } 969 970 971 /* 972 * Get home lgroup of given process or thread 973 */ 974 lgrp_id_t 975 lgrp_home_get(idtype_t idtype, id_t id) 976 { 977 proc_t *p; 978 lgrp_id_t retval; 979 kthread_t *t; 980 981 /* 982 * Get home lgroup of given LWP or process 983 */ 984 switch (idtype) { 985 986 case P_LWPID: 987 p = curproc; 988 989 /* 990 * Set affinity for thread 991 */ 992 mutex_enter(&p->p_lock); 993 if (id == P_MYID) { /* current thread */ 994 retval = lgrp_home_thread(curthread); 995 } else if (p->p_tlist == NULL) { 996 retval = set_errno(ESRCH); 997 } else { /* other thread */ 998 int found = 0; 999 1000 t = p->p_tlist; 1001 do { 1002 if (t->t_tid == id) { 1003 retval = lgrp_home_thread(t); 1004 found = 1; 1005 break; 1006 } 1007 } while ((t = t->t_forw) != p->p_tlist); 1008 if (!found) 1009 retval = set_errno(ESRCH); 1010 } 1011 mutex_exit(&p->p_lock); 1012 break; 1013 1014 case P_PID: 1015 /* 1016 * Get process 1017 */ 1018 mutex_enter(&pidlock); 1019 1020 if (id == P_MYID) 1021 p = curproc; 1022 else 1023 p = prfind(id); 1024 1025 if (p == NULL) { 1026 mutex_exit(&pidlock); 1027 return (set_errno(ESRCH)); 1028 } 1029 1030 mutex_enter(&p->p_lock); 1031 t = p->p_tlist; 1032 if (t == NULL) 1033 retval = set_errno(ESRCH); 1034 else 1035 retval = lgrp_home_thread(t); 1036 mutex_exit(&p->p_lock); 1037 1038 mutex_exit(&pidlock); 1039 1040 break; 1041 1042 default: 1043 retval = set_errno(EINVAL); 1044 break; 1045 } 1046 1047 return (retval); 1048 } 1049 1050 1051 /* 1052 * Return latency between "from" and "to" lgroups 1053 * 1054 * This latency number can only be used for relative comparison 1055 * between lgroups on the running system, cannot be used across platforms, 1056 * and may not reflect the actual latency. It is platform and implementation 1057 * specific, so platform gets to decide its value. It would be nice if the 1058 * number was at least proportional to make comparisons more meaningful though. 1059 */ 1060 int 1061 lgrp_latency(lgrp_id_t from, lgrp_id_t to) 1062 { 1063 lgrp_t *from_lgrp; 1064 int i; 1065 int latency; 1066 int latency_max; 1067 lgrp_t *to_lgrp; 1068 1069 ASSERT(MUTEX_HELD(&cpu_lock)); 1070 1071 if (from < 0 || to < 0) 1072 return (set_errno(EINVAL)); 1073 1074 if (from > lgrp_alloc_max || to > lgrp_alloc_max) 1075 return (set_errno(ESRCH)); 1076 1077 from_lgrp = lgrp_table[from]; 1078 to_lgrp = lgrp_table[to]; 1079 1080 if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) { 1081 return (set_errno(ESRCH)); 1082 } 1083 1084 /* 1085 * Get latency for same lgroup 1086 */ 1087 if (from == to) { 1088 latency = from_lgrp->lgrp_latency; 1089 return (latency); 1090 } 1091 1092 /* 1093 * Get latency between leaf lgroups 1094 */ 1095 if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0) 1096 return (lgrp_plat_latency(from_lgrp->lgrp_plathand, 1097 to_lgrp->lgrp_plathand)); 1098 1099 /* 1100 * Determine max latency between resources in two lgroups 1101 */ 1102 latency_max = 0; 1103 for (i = 0; i <= lgrp_alloc_max; i++) { 1104 lgrp_t *from_rsrc; 1105 int j; 1106 lgrp_t *to_rsrc; 1107 1108 from_rsrc = lgrp_table[i]; 1109 if (!LGRP_EXISTS(from_rsrc) || 1110 !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i)) 1111 continue; 1112 1113 for (j = 0; j <= lgrp_alloc_max; j++) { 1114 to_rsrc = lgrp_table[j]; 1115 if (!LGRP_EXISTS(to_rsrc) || 1116 klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM], 1117 j) == 0) 1118 continue; 1119 latency = lgrp_plat_latency(from_rsrc->lgrp_plathand, 1120 to_rsrc->lgrp_plathand); 1121 if (latency > latency_max) 1122 latency_max = latency; 1123 } 1124 } 1125 return (latency_max); 1126 } 1127 1128 1129 /* 1130 * Return lgroup interface version number 1131 * 0 - none 1132 * 1 - original 1133 * 2 - lgrp_latency_cookie() and lgrp_resources() added 1134 */ 1135 int 1136 lgrp_version(int version) 1137 { 1138 /* 1139 * Return LGRP_VER_NONE when requested version isn't supported 1140 */ 1141 if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT) 1142 return (LGRP_VER_NONE); 1143 1144 /* 1145 * Return current version when LGRP_VER_NONE passed in 1146 */ 1147 if (version == LGRP_VER_NONE) 1148 return (LGRP_VER_CURRENT); 1149 1150 /* 1151 * Otherwise, return supported version. 1152 */ 1153 return (version); 1154 } 1155 1156 1157 /* 1158 * Snapshot of lgroup hieararchy 1159 * 1160 * One snapshot is kept and is based on the kernel's native data model, so 1161 * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the 1162 * 64-bit kernel. If a 32-bit user wants a snapshot from the 64-bit kernel, 1163 * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot. 1164 * 1165 * The format is defined by lgroup snapshot header and the layout of 1166 * the snapshot in memory is as follows: 1167 * 1) lgroup snapshot header 1168 * - specifies format of snapshot 1169 * - defined by lgrp_snapshot_header_t 1170 * 2) lgroup info array 1171 * - contains information about each lgroup 1172 * - one element for each lgroup 1173 * - each element is defined by lgrp_info_t 1174 * 3) lgroup CPU ID array 1175 * - contains list (array) of CPU IDs for each lgroup 1176 * - lgrp_info_t points into array and specifies how many CPUs belong to 1177 * given lgroup 1178 * 4) lgroup parents array 1179 * - contains lgroup bitmask of parents for each lgroup 1180 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax 1181 * 5) lgroup children array 1182 * - contains lgroup bitmask of children for each lgroup 1183 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax 1184 * 6) lgroup resources array 1185 * - contains lgroup bitmask of resources for each lgroup 1186 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax 1187 * 7) lgroup latency table 1188 * - contains latency from each lgroup to each of other lgroups 1189 * 1190 * NOTE: Must use nlgrpsmax for per lgroup data structures because lgroups 1191 * may be sparsely allocated. 1192 */ 1193 lgrp_snapshot_header_t *lgrp_snap = NULL; /* lgroup snapshot */ 1194 static kmutex_t lgrp_snap_lock; /* snapshot lock */ 1195 1196 1197 /* 1198 * Take a snapshot of lgroup hierarchy and return size of buffer 1199 * needed to hold snapshot 1200 */ 1201 static int 1202 lgrp_snapshot(void) 1203 { 1204 size_t bitmask_size; 1205 size_t bitmasks_size; 1206 size_t bufsize; 1207 int cpu_index; 1208 size_t cpuids_size; 1209 int i; 1210 int j; 1211 size_t info_size; 1212 size_t lats_size; 1213 ulong_t *lgrp_children; 1214 processorid_t *lgrp_cpuids; 1215 lgrp_info_t *lgrp_info; 1216 int **lgrp_lats; 1217 ulong_t *lgrp_parents; 1218 ulong_t *lgrp_rsets; 1219 ulong_t *lgrpset; 1220 int snap_ncpus; 1221 int snap_nlgrps; 1222 int snap_nlgrpsmax; 1223 size_t snap_hdr_size; 1224 #ifdef _SYSCALL32_IMPL 1225 model_t model = DATAMODEL_NATIVE; 1226 1227 /* 1228 * Have up-to-date snapshot, so check to see whether caller is 32-bit 1229 * program and need to return size of 32-bit snapshot now. 1230 */ 1231 model = get_udatamodel(); 1232 if (model == DATAMODEL_ILP32 && lgrp_snap && 1233 lgrp_snap->ss_gen == lgrp_gen) { 1234 1235 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max; 1236 1237 /* 1238 * Calculate size of buffer needed for 32-bit snapshot, 1239 * rounding up size of each object to allow for alignment 1240 * of next object in buffer. 1241 */ 1242 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t), 1243 sizeof (caddr32_t)); 1244 info_size = 1245 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t), 1246 sizeof (processorid_t)); 1247 cpuids_size = 1248 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t), 1249 sizeof (ulong_t)); 1250 1251 /* 1252 * lgroup bitmasks needed for parents, children, and resources 1253 * for each lgroup and pset lgroup set 1254 */ 1255 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax); 1256 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * 1257 snap_nlgrpsmax) + 1) * bitmask_size; 1258 1259 /* 1260 * Size of latency table and buffer 1261 */ 1262 lats_size = snap_nlgrpsmax * sizeof (caddr32_t) + 1263 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int); 1264 1265 bufsize =