1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #pragma ident "@(#)vm_as.c 1.175 07/12/10 SMI" 40 41 /* 42 * VM - address spaces. 43 */ 44 45 #include <sys/types.h> 46 #include <sys/t_lock.h> 47 #include <sys/param.h> 48 #include <sys/errno.h> 49 #include <sys/systm.h> 50 #include <sys/mman.h> 51 #include <sys/sysmacros.h> 52 #include <sys/cpuvar.h> 53 #include <sys/sysinfo.h> 54 #include <sys/kmem.h> 55 #include <sys/vnode.h> 56 #include <sys/vmsystm.h> 57 #include <sys/cmn_err.h> 58 #include <sys/debug.h> 59 #include <sys/tnf_probe.h> 60 #include <sys/vtrace.h> 61 62 #include <vm/hat.h> 63 #include <vm/xhat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_vn.h> 67 #include <vm/seg_dev.h> 68 #include <vm/seg_kmem.h> 69 #include <vm/seg_map.h> 70 #include <vm/seg_spt.h> 71 #include <vm/page.h> 72 73 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */ 74 75 static struct kmem_cache *as_cache; 76 77 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t); 78 static void as_clearwatchprot(struct as *, caddr_t, size_t); 79 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *); 80 81 82 /* 83 * Verifying the segment lists is very time-consuming; it may not be 84 * desirable always to define VERIFY_SEGLIST when DEBUG is set. 85 */ 86 #ifdef DEBUG 87 #define VERIFY_SEGLIST 88 int do_as_verify = 0; 89 #endif 90 91 /* 92 * Allocate a new callback data structure entry and fill in the events of 93 * interest, the address range of interest, and the callback argument. 94 * Link the entry on the as->a_callbacks list. A callback entry for the 95 * entire address space may be specified with vaddr = 0 and size = -1. 96 * 97 * CALLERS RESPONSIBILITY: If not calling from within the process context for 98 * the specified as, the caller must guarantee persistence of the specified as 99 * for the duration of this function (eg. pages being locked within the as 100 * will guarantee persistence). 101 */ 102 int 103 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events, 104 caddr_t vaddr, size_t size, int sleepflag) 105 { 106 struct as_callback *current_head, *cb; 107 caddr_t saddr; 108 size_t rsize; 109 110 /* callback function and an event are mandatory */ 111 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0)) 112 return (EINVAL); 113 114 /* Adding a callback after as_free has been called is not allowed */ 115 if (as == &kas) 116 return (ENOMEM); 117 118 /* 119 * vaddr = 0 and size = -1 is used to indicate that the callback range 120 * is the entire address space so no rounding is done in that case. 121 */ 122 if (size != -1) { 123 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 124 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) - 125 (size_t)saddr; 126 /* check for wraparound */ 127 if (saddr + rsize < saddr) 128 return (ENOMEM); 129 } else { 130 if (vaddr != 0) 131 return (EINVAL); 132 saddr = vaddr; 133 rsize = size; 134 } 135 136 /* Allocate and initialize a callback entry */ 137 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag); 138 if (cb == NULL) 139 return (EAGAIN); 140 141 cb->ascb_func = cb_func; 142 cb->ascb_arg = arg; 143 cb->ascb_events = events; 144 cb->ascb_saddr = saddr; 145 cb->ascb_len = rsize; 146 147 /* Add the entry to the list */ 148 mutex_enter(&as->a_contents); 149 current_head = as->a_callbacks; 150 as->a_callbacks = cb; 151 cb->ascb_next = current_head; 152 153 /* 154 * The call to this function may lose in a race with 155 * a pertinent event - eg. a thread does long term memory locking 156 * but before the callback is added another thread executes as_unmap. 157 * A broadcast here resolves that. 158 */ 159 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) { 160 AS_CLRUNMAPWAIT(as); 161 cv_broadcast(&as->a_cv); 162 } 163 164 mutex_exit(&as->a_contents); 165 return (0); 166 } 167 168 /* 169 * Search the callback list for an entry which pertains to arg. 170 * 171 * This is called from within the client upon completion of the callback. 172 * RETURN VALUES: 173 * AS_CALLBACK_DELETED (callback entry found and deleted) 174 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok) 175 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this 176 * entry will be made in as_do_callbacks) 177 * 178 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED 179 * set, it indicates that as_do_callbacks is processing this entry. The 180 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made 181 * to unblock as_do_callbacks, in case it is blocked. 182 * 183 * CALLERS RESPONSIBILITY: If not calling from within the process context for 184 * the specified as, the caller must guarantee persistence of the specified as 185 * for the duration of this function (eg. pages being locked within the as 186 * will guarantee persistence). 187 */ 188 uint_t 189 as_delete_callback(struct as *as, void *arg) 190 { 191 struct as_callback **prevcb = &as->a_callbacks; 192 struct as_callback *cb; 193 uint_t rc = AS_CALLBACK_NOTFOUND; 194 195 mutex_enter(&as->a_contents); 196 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) { 197 if (cb->ascb_arg != arg) 198 continue; 199 200 /* 201 * If the events indicate AS_CALLBACK_CALLED, just clear 202 * AS_ALL_EVENT in the events field and wakeup the thread 203 * that may be waiting in as_do_callbacks. as_do_callbacks 204 * will take care of removing this entry from the list. In 205 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise 206 * (AS_CALLBACK_CALLED not set), just remove it from the 207 * list, return the memory and return AS_CALLBACK_DELETED. 208 */ 209 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) { 210 /* leave AS_CALLBACK_CALLED */ 211 cb->ascb_events &= ~AS_ALL_EVENT; 212 rc = AS_CALLBACK_DELETE_DEFERRED; 213 cv_broadcast(&as->a_cv); 214 } else { 215 *prevcb = cb->ascb_next; 216 kmem_free(cb, sizeof (struct as_callback)); 217 rc = AS_CALLBACK_DELETED; 218 } 219 break; 220 } 221 mutex_exit(&as->a_contents); 222 return (rc); 223 } 224 225 /* 226 * Searches the as callback list for a matching entry. 227 * Returns a pointer to the first matching callback, or NULL if 228 * nothing is found. 229 * This function never sleeps so it is ok to call it with more 230 * locks held but the (required) a_contents mutex. 231 * 232 * See also comment on as_do_callbacks below. 233 */ 234 static struct as_callback * 235 as_find_callback(struct as *as, uint_t events, caddr_t event_addr, 236 size_t event_len) 237 { 238 struct as_callback *cb; 239 240 ASSERT(MUTEX_HELD(&as->a_contents)); 241 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) { 242 /* 243 * If the callback has not already been called, then 244 * check if events or address range pertains. An event_len 245 * of zero means do an unconditional callback. 246 */ 247 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) || 248 ((event_len != 0) && (((cb->ascb_events & events) == 0) || 249 (event_addr + event_len < cb->ascb_saddr) || 250 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) { 251 continue; 252 } 253 break; 254 } 255 return (cb); 256 } 257 258 /* 259 * Executes a given callback and removes it from the callback list for 260 * this address space. 261 * This function may sleep so the caller must drop all locks except 262 * a_contents before calling this func. 263 * 264 * See also comments on as_do_callbacks below. 265 */ 266 static void 267 as_execute_callback(struct as *as, struct as_callback *cb, 268 uint_t events) 269 { 270 struct as_callback **prevcb; 271 void *cb_arg; 272 273 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events)); 274 cb->ascb_events |= AS_CALLBACK_CALLED; 275 mutex_exit(&as->a_contents); 276 (*cb->ascb_func)(as, cb->ascb_arg, events); 277 mutex_enter(&as->a_contents); 278 /* 279 * the callback function is required to delete the callback 280 * when the callback function determines it is OK for 281 * this thread to continue. as_delete_callback will clear 282 * the AS_ALL_EVENT in the events field when it is deleted. 283 * If the callback function called as_delete_callback, 284 * events will already be cleared and there will be no blocking. 285 */ 286 while ((cb->ascb_events & events) != 0) { 287 cv_wait(&as->a_cv, &as->a_contents); 288 } 289 /* 290 * This entry needs to be taken off the list. Normally, the 291 * callback func itself does that, but unfortunately the list 292 * may have changed while the callback was running because the 293 * a_contents mutex was dropped and someone else other than the 294 * callback func itself could have called as_delete_callback, 295 * so we have to search to find this entry again. The entry 296 * must have AS_CALLBACK_CALLED, and have the same 'arg'. 297 */ 298 cb_arg = cb->ascb_arg; 299 prevcb = &as->a_callbacks; 300 for (cb = as->a_callbacks; cb != NULL; 301 prevcb = &cb->ascb_next, cb = *prevcb) { 302 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) || 303 (cb_arg != cb->ascb_arg)) { 304 continue; 305 } 306 *prevcb = cb->ascb_next; 307 kmem_free(cb, sizeof (struct as_callback)); 308 break; 309 } 310 } 311 312 /* 313 * Check the callback list for a matching event and intersection of 314 * address range. If there is a match invoke the callback. Skip an entry if: 315 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED) 316 * - not event of interest 317 * - not address range of interest 318 * 319 * An event_len of zero indicates a request for an unconditional callback 320 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The 321 * a_contents lock must be dropped before a callback, so only one callback 322 * can be done before returning. Return -1 (true) if a callback was 323 * executed and removed from the list, else return 0 (false). 324 * 325 * The logically separate parts, i.e. finding a matching callback and 326 * executing a given callback have been separated into two functions 327 * so that they can be called with different sets of locks held beyond 328 * the always-required a_contents. as_find_callback does not sleep so 329 * it is ok to call it if more locks than a_contents (i.e. the a_lock 330 * rwlock) are held. as_execute_callback on the other hand may sleep 331 * so all locks beyond a_contents must be dropped by the caller if one 332 * does not want to end comatose. 333 */ 334 static int 335 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr, 336 size_t event_len) 337 { 338 struct as_callback *cb; 339 340 if ((cb = as_find_callback(as, events, event_addr, event_len))) { 341 as_execute_callback(as, cb, events); 342 return (-1); 343 } 344 return (0); 345 } 346 347 /* 348 * Search for the segment containing addr. If a segment containing addr 349 * exists, that segment is returned. If no such segment exists, and 350 * the list spans addresses greater than addr, then the first segment 351 * whose base is greater than addr is returned; otherwise, NULL is 352 * returned unless tail is true, in which case the last element of the 353 * list is returned. 354 * 355 * a_seglast is used to cache the last found segment for repeated 356 * searches to the same addr (which happens frequently). 357 */ 358 struct seg * 359 as_findseg(struct as *as, caddr_t addr, int tail) 360 { 361 struct seg *seg = as->a_seglast; 362 avl_index_t where; 363 364 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 365 366 if (seg != NULL && 367 seg->s_base <= addr && 368 addr < seg->s_base + seg->s_size) 369 return (seg); 370 371 seg = avl_find(&as->a_segtree, &addr, &where); 372 if (seg != NULL) 373 return (as->a_seglast = seg); 374 375 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 376 if (seg == NULL && tail) 377 seg = avl_last(&as->a_segtree); 378 return (as->a_seglast = seg); 379 } 380 381 #ifdef VERIFY_SEGLIST 382 /* 383 * verify that the linked list is coherent 384 */ 385 static void 386 as_verify(struct as *as) 387 { 388 struct seg *seg, *seglast, *p, *n; 389 uint_t nsegs = 0; 390 391 if (do_as_verify == 0) 392 return; 393 394 seglast = as->a_seglast; 395 396 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 397 ASSERT(seg->s_as == as); 398 p = AS_SEGPREV(as, seg); 399 n = AS_SEGNEXT(as, seg); 400 ASSERT(p == NULL || p->s_as == as); 401 ASSERT(p == NULL || p->s_base < seg->s_base); 402 ASSERT(n == NULL || n->s_base > seg->s_base); 403 ASSERT(n != NULL || seg == avl_last(&as->a_segtree)); 404 if (seg == seglast) 405 seglast = NULL; 406 nsegs++; 407 } 408 ASSERT(seglast == NULL); 409 ASSERT(avl_numnodes(&as->a_segtree) == nsegs); 410 } 411 #endif /* VERIFY_SEGLIST */ 412 413 /* 414 * Add a new segment to the address space. The avl_find() 415 * may be expensive so we attempt to use last segment accessed 416 * in as_gap() as an insertion point. 417 */ 418 int 419 as_addseg(struct as *as, struct seg *newseg) 420 { 421 struct seg *seg; 422 caddr_t addr; 423 caddr_t eaddr; 424 avl_index_t where; 425 426 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 427 428 as->a_updatedir = 1; /* inform /proc */ 429 gethrestime(&as->a_updatetime); 430 431 if (as->a_lastgaphl != NULL) { 432 struct seg *hseg = NULL; 433 struct seg *lseg = NULL; 434 435 if (as->a_lastgaphl->s_base > newseg->s_base) { 436 hseg = as->a_lastgaphl; 437 lseg = AVL_PREV(&as->a_segtree, hseg); 438 } else { 439 lseg = as->a_lastgaphl; 440 hseg = AVL_NEXT(&as->a_segtree, lseg); 441 } 442 443 if (hseg && lseg && lseg->s_base < newseg->s_base && 444 hseg->s_base > newseg->s_base) { 445 avl_insert_here(&as->a_segtree, newseg, lseg, 446 AVL_AFTER); 447 as->a_lastgaphl = NULL; 448 as->a_seglast = newseg; 449 return (0); 450 } 451 as->a_lastgaphl = NULL; 452 } 453 454 addr = newseg->s_base; 455 eaddr = addr + newseg->s_size; 456 again: 457 458 seg = avl_find(&as->a_segtree, &addr, &where); 459 460 if (seg == NULL) 461 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 462 463 if (seg == NULL) 464 seg = avl_last(&as->a_segtree); 465 466 if (seg != NULL) { 467 caddr_t base = seg->s_base; 468 469 /* 470 * If top of seg is below the requested address, then 471 * the insertion point is at the end of the linked list, 472 * and seg points to the tail of the list. Otherwise, 473 * the insertion point is immediately before seg. 474 */ 475 if (base + seg->s_size > addr) { 476 if (addr >= base || eaddr > base) { 477 #ifdef __sparc 478 extern struct seg_ops segnf_ops; 479 480 /* 481 * no-fault segs must disappear if overlaid. 482 * XXX need new segment type so 483 * we don't have to check s_ops 484 */ 485 if (seg->s_ops == &segnf_ops) { 486 seg_unmap(seg); 487 goto again; 488 } 489 #endif 490 return (-1); /* overlapping segment */ 491 } 492 } 493 } 494 as->a_seglast = newseg; 495 avl_insert(&as->a_segtree, newseg, where); 496 497 #ifdef VERIFY_SEGLIST 498 as_verify(as); 499 #endif 500 return (0); 501 } 502 503 struct seg * 504 as_removeseg(struct as *as, struct seg *seg) 505 { 506 avl_tree_t *t; 507 508 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 509 510 as->a_updatedir = 1; /* inform /proc */ 511 gethrestime(&as->a_updatetime); 512 513 if (seg == NULL) 514 return (NULL); 515 516 t = &as->a_segtree; 517 if (as->a_seglast == seg) 518 as->a_seglast = NULL; 519 as->a_lastgaphl = NULL; 520 521 /* 522 * if this segment is at an address higher than 523 * a_lastgap, set a_lastgap to the next segment (NULL if last segment) 524 */ 525 if (as->a_lastgap && 526 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base)) 527 as->a_lastgap = AVL_NEXT(t, seg); 528 529 /* 530 * remove the segment from the seg tree 531 */ 532 avl_remove(t, seg); 533 534 #ifdef VERIFY_SEGLIST 535 as_verify(as); 536 #endif 537 return (seg); 538 } 539 540 /* 541 * Find a segment containing addr. 542 */ 543 struct seg * 544 as_segat(struct as *as, caddr_t addr) 545 { 546 struct seg *seg = as->a_seglast; 547 548 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 549 550 if (seg != NULL && seg->s_base <= addr && 551 addr < seg->s_base + seg->s_size) 552 return (seg); 553 554 seg = avl_find(&as->a_segtree, &addr, NULL); 555 return (seg); 556 } 557 558 /* 559 * Serialize all searches for holes in an address space to 560 * prevent two or more threads from allocating the same virtual 561 * address range. The address space must not be "read/write" 562 * locked by the caller since we may block. 563 */ 564 void 565 as_rangelock(struct as *as) 566 { 567 mutex_enter(&as->a_contents); 568 while (AS_ISCLAIMGAP(as)) 569 cv_wait(&as->a_cv, &as->a_contents); 570 AS_SETCLAIMGAP(as); 571 mutex_exit(&as->a_contents); 572 } 573 574 /* 575 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads. 576 */ 577 void 578 as_rangeunlock(struct as *as) 579 { 580 mutex_enter(&as->a_contents); 581 AS_CLRCLAIMGAP(as); 582 cv_signal(&as->a_cv); 583 mutex_exit(&as->a_contents); 584 } 585 586 /* 587 * compar segments (or just an address) by segment address range 588 */ 589 static int 590 as_segcompar(const void *x, const void *y) 591 { 592 struct seg *a = (struct seg *)x; 593 struct seg *b = (struct seg *)y; 594 595 if (a->s_base < b->s_base) 596 return (-1); 597 if (a->s_base >= b->s_base + b->s_size) 598 return (1); 599 return (0); 600 } 601 602 603 void 604 as_avlinit(struct as *as) 605 { 606 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg), 607 offsetof(struct seg, s_tree)); 608 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page), 609 offsetof(struct watched_page, wp_link)); 610 } 611 612 /*ARGSUSED*/ 613 static int 614 as_constructor(void *buf, void *cdrarg, int kmflags) 615 { 616 struct as *as = buf; 617 618 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL); 619 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL); 620 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL); 621 as_avlinit(as); 622 return (0); 623 } 624 625 /*ARGSUSED1*/ 626 static void 627 as_destructor(void *buf, void *cdrarg) 628 { 629 struct as *as = buf; 630 631 avl_destroy(&as->a_segtree); 632 mutex_destroy(&as->a_contents); 633 cv_destroy(&as->a_cv); 634 rw_destroy(&as->a_lock); 635 } 636 637 void 638 as_init(void) 639 { 640 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0, 641 as_constructor, as_destructor, NULL, NULL, NULL, 0); 642 } 643 644 /* 645 * Allocate and initialize an address space data structure. 646 * We call hat_alloc to allow any machine dependent 647 * information in the hat structure to be initialized. 648 */ 649 struct as * 650 as_alloc(void) 651 { 652 struct as *as; 653 654 as = kmem_cache_alloc(as_cache, KM_SLEEP); 655 656 as->a_flags = 0; 657 as->a_vbits = 0; 658 as->a_hrm = NULL; 659 as->a_seglast = NULL; 660 as->a_size = 0; 661 as->a_updatedir = 0; 662 gethrestime(&as->a_updatetime); 663 as->a_objectdir = NULL; 664 as->a_sizedir = 0; 665 as->a_userlimit = (caddr_t)USERLIMIT; 666 as->a_lastgap = NULL; 667 as->a_lastgaphl = NULL; 668 as->a_callbacks = NULL; 669 670 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 671 as->a_hat = hat_alloc(as); /* create hat for default system mmu */ 672 AS_LOCK_EXIT(as, &as->a_lock); 673 674 as->a_xhat = NULL; 675 676 return (as); 677 } 678 679 /* 680 * Free an address space data structure. 681 * Need to free the hat first and then 682 * all the segments on this as and finally 683 * the space for the as struct itself. 684 */ 685 void 686 as_free(struct as *as) 687 { 688 struct hat *hat = as->a_hat; 689 struct seg *seg, *next; 690 int called = 0; 691 692 top: 693 /* 694 * Invoke ALL callbacks. as_do_callbacks will do one callback 695 * per call, and not return (-1) until the callback has completed. 696 * When as_do_callbacks returns zero, all callbacks have completed. 697 */ 698 mutex_enter(&as->a_contents); 699 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0)) 700 ; 701 702 /* This will prevent new XHATs from attaching to as */ 703 if (!called) 704 AS_SETBUSY(as); 705 mutex_exit(&as->a_contents); 706 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 707 708 if (!called) { 709 called = 1; 710 hat_free_start(hat); 711 if (as->a_xhat != NULL) 712 xhat_free_start_all(as); 713 } 714 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) { 715 int err; 716 717 next = AS_SEGNEXT(as, seg); 718 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 719 if (err == EAGAIN) { 720 mutex_enter(&as->a_contents); 721 if (as->a_callbacks) { 722 AS_LOCK_EXIT(as, &as->a_lock); 723 } else { 724 /* 725 * Memory is currently locked. Wait for a 726 * cv_signal that it has been unlocked, then 727 * try the operation again. 728 */ 729 if (AS_ISUNMAPWAIT(as) == 0) 730 cv_broadcast(&as->a_cv); 731 AS_SETUNMAPWAIT(as); 732 AS_LOCK_EXIT(as, &as->a_lock); 733 while (AS_ISUNMAPWAIT(as)) 734 cv_wait(&as->a_cv, &as->a_contents); 735 } 736 mutex_exit(&as->a_contents); 737 goto top; 738 } else { 739 /* 740 * We do not expect any other error return at this 741 * time. This is similar to an ASSERT in seg_unmap() 742 */ 743 ASSERT(err == 0); 744 } 745 } 746 hat_free_end(hat); 747 if (as->a_xhat != NULL) 748 xhat_free_end_all(as); 749 AS_LOCK_EXIT(as, &as->a_lock); 750 751 /* /proc stuff */ 752 ASSERT(avl_numnodes(&as->a_wpage) == 0); 753 if (as->a_objectdir) { 754 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *)); 755 as->a_objectdir = NULL; 756 as->a_sizedir = 0; 757 } 758 759 /* 760 * Free the struct as back to kmem. Assert it has no segments. 761 */ 762 ASSERT(avl_numnodes(&as->a_segtree) == 0); 763 kmem_cache_free(as_cache, as); 764 } 765 766 int 767 as_dup(struct as *as, struct as **outas) 768 { 769 struct as *newas; 770 struct seg *seg, *newseg; 771 int error; 772 773 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 774 as_clearwatch(as); 775 newas = as_alloc(); 776 newas->a_userlimit = as->a_userlimit; 777 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER); 778 779 /* This will prevent new XHATs from attaching */ 780 mutex_enter(&as->a_contents); 781 AS_SETBUSY(as); 782 mutex_exit(&as->a_contents); 783 mutex_enter(&newas->a_contents); 784 AS_SETBUSY(newas); 785 mutex_exit(&newas->a_contents); 786 787 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD); 788 789 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 790 791 if (seg->s_flags & S_PURGE) 792 continue; 793 794 newseg = seg_alloc(newas, seg->s_base, seg->s_size); 795 if (newseg == NULL) { 796 AS_LOCK_EXIT(newas, &newas->a_lock); 797 as_setwatch(as); 798 mutex_enter(&as->a_contents); 799 AS_CLRBUSY(as); 800 mutex_exit(&as->a_contents); 801 AS_LOCK_EXIT(as, &as->a_lock); 802 as_free(newas); 803 return (-1); 804 } 805 if ((error = SEGOP_DUP(seg, newseg)) != 0) { 806 /* 807 * We call seg_free() on the new seg 808 * because the segment is not set up 809 * completely; i.e. it has no ops. 810 */ 811 as_setwatch(as); 812 mutex_enter(&as->a_contents); 813 AS_CLRBUSY(as); 814 mutex_exit(&as->a_contents); 815 AS_LOCK_EXIT(as, &as->a_lock); 816 seg_free(newseg); 817 AS_LOCK_EXIT(newas, &newas->a_lock); 818 as_free(newas); 819 return (error); 820 } 821 newas->a_size += seg->s_size; 822 } 823 824 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL); 825 if (as->a_xhat != NULL) 826 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL); 827 828 mutex_enter(&newas->a_contents); 829 AS_CLRBUSY(newas); 830 mutex_exit(&newas->a_contents); 831 AS_LOCK_EXIT(newas, &newas->a_lock); 832 833 as_setwatch(as); 834 mutex_enter(&as->a_contents); 835 AS_CLRBUSY(as); 836 mutex_exit(&as->a_contents); 837 AS_LOCK_EXIT(as, &as->a_lock); 838 if (error != 0) { 839 as_free(newas); 840 return (error); 841 } 842 *outas = newas; 843 return (0); 844 } 845 846 /* 847 * Handle a ``fault'' at addr for size bytes. 848 */ 849 faultcode_t 850 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size, 851 enum fault_type type, enum seg_rw rw) 852 { 853 struct seg *seg; 854 caddr_t raddr; /* rounded down addr */ 855 size_t rsize; /* rounded up size */ 856 size_t ssize; 857 faultcode_t res = 0; 858 caddr_t addrsav; 859 struct seg *segsav; 860 int as_lock_held; 861 klwp_t *lwp = ttolwp(curthread); 862 int is_xhat = 0; 863 int holding_wpage = 0; 864 extern struct seg_ops segdev_ops; 865 866 867 868 if (as->a_hat != hat) { 869 /* This must be an XHAT then */ 870 is_xhat = 1; 871 872 if ((type != F_INVAL) || (as == &kas)) 873 return (FC_NOSUPPORT); 874 } 875 876 retry: 877 if (!is_xhat) { 878 /* 879 * Indicate that the lwp is not to be stopped while waiting 880 * for a pagefault. This is to avoid deadlock while debugging 881 * a process via /proc over NFS (in particular). 882 */ 883 if (lwp != NULL) 884 lwp->lwp_nostop++; 885 886 /* 887 * same length must be used when we softlock and softunlock. 888 * We don't support softunlocking lengths less than 889 * the original length when there is largepage support. 890 * See seg_dev.c for more comments. 891 */ 892 switch (type) { 893 894 case F_SOFTLOCK: 895 CPU_STATS_ADD_K(vm, softlock, 1); 896 break; 897 898 case F_SOFTUNLOCK: 899 break; 900 901 case F_PROT: 902 CPU_STATS_ADD_K(vm, prot_fault, 1); 903 break; 904 905 case F_INVAL: 906 CPU_STATS_ENTER_K(); 907 CPU_STATS_ADDQ(CPU, vm, as_fault, 1); 908 if (as == &kas) 909 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1); 910 CPU_STATS_EXIT_K(); 911 break; 912 } 913 } 914 915 /* Kernel probe */ 916 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */, 917 tnf_opaque, address, addr, 918 tnf_fault_type, fault_type, type, 919 tnf_seg_access, access, rw); 920 921 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 922 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 923 (size_t)raddr; 924 925 /* 926 * XXX -- Don't grab the as lock for segkmap. We should grab it for 927 * correctness, but then we could be stuck holding this lock for 928 * a LONG time if the fault needs to be resolved on a slow 929 * filesystem, and then no-one will be able to exec new commands, 930 * as exec'ing requires the write lock on the as. 931 */ 932 if (as == &kas && segkmap && segkmap->s_base <= raddr && 933 raddr + size < segkmap->s_base + segkmap->s_size) { 934 /* 935 * if (as==&kas), this can't be XHAT: we've already returned 936 * FC_NOSUPPORT. 937 */ 938 seg = segkmap; 939 as_lock_held = 0; 940 } else { 941 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 942 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) { 943 /* 944 * Grab and hold the writers' lock on the as 945 * if the fault is to a watched page. 946 * This will keep CPUs from "peeking" at the 947 * address range while we're temporarily boosting 948 * the permissions for the XHAT device to 949 * resolve the fault in the segment layer. 950 * 951 * We could check whether faulted address 952 * is within a watched page and only then grab 953 * the writer lock, but this is simpler. 954 */ 955 AS_LOCK_EXIT(as, &as->a_lock); 956 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 957 } 958 959 seg = as_segat(as, raddr); 960 if (seg == NULL) { 961 AS_LOCK_EXIT(as, &as->a_lock); 962 if ((lwp != NULL) && (!is_xhat)) 963 lwp->lwp_nostop--; 964 return (FC_NOMAP); 965 } 966 967 as_lock_held = 1; 968 } 969 970 addrsav = raddr; 971 segsav = seg; 972 973 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 974 if (raddr >= seg->s_base + seg->s_size) { 975 seg = AS_SEGNEXT(as, seg); 976 if (seg == NULL || raddr != seg->s_base) { 977 res = FC_NOMAP; 978 break; 979 } 980 } 981 if (raddr + rsize > seg->s_base + seg->s_size) 982 ssize = seg->s_base + seg->s_size - raddr; 983 else 984 ssize = rsize; 985 986 if (!is_xhat || (seg->s_ops != &segdev_ops)) { 987 988 if (is_xhat && avl_numnodes(&as->a_wpage) != 0 && 989 pr_is_watchpage_as(raddr, rw, as)) { 990 /* 991 * Handle watch pages. If we're faulting on a 992 * watched page from an X-hat, we have to 993 * restore the original permissions while we 994 * handle the fault. 995 */ 996 as_clearwatch(as); 997 holding_wpage = 1; 998 } 999 1000 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw); 1001 1002 /* Restore watchpoints */ 1003 if (holding_wpage) { 1004 as_setwatch(as); 1005 holding_wpage = 0; 1006 } 1007 1008 if (res != 0) 1009 break; 1010 } else { 1011 /* XHAT does not support seg_dev */ 1012 res = FC_NOSUPPORT; 1013 break; 1014 } 1015 } 1016 1017 /* 1018 * If we were SOFTLOCKing and encountered a failure, 1019 * we must SOFTUNLOCK the range we already did. (Maybe we 1020 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing 1021 * right here...) 1022 */ 1023 if (res != 0 && type == F_SOFTLOCK) { 1024 for (seg = segsav; addrsav < raddr; addrsav += ssize) { 1025 if (addrsav >= seg->s_base + seg->s_size) 1026 seg = AS_SEGNEXT(as, seg); 1027 ASSERT(seg != NULL); 1028 /* 1029 * Now call the fault routine again to perform the 1030 * unlock using S_OTHER instead of the rw variable 1031 * since we never got a chance to touch the pages. 1032 */ 1033 if (raddr > seg->s_base + seg->s_size) 1034 ssize = seg->s_base + seg->s_size - addrsav; 1035 else 1036 ssize = raddr - addrsav; 1037 (void) SEGOP_FAULT(hat, seg, addrsav, ssize, 1038 F_SOFTUNLOCK, S_OTHER); 1039 } 1040 } 1041 if (as_lock_held) 1042 AS_LOCK_EXIT(as, &as->a_lock); 1043 if ((lwp != NULL) && (!is_xhat)) 1044 lwp->lwp_nostop--; 1045 1046 /* 1047 * If the lower levels returned EDEADLK for a fault, 1048 * It means that we should retry the fault. Let's wait 1049 * a bit also to let the deadlock causing condition clear. 1050 * This is part of a gross hack to work around a design flaw 1051 * in the ufs/sds logging code and should go away when the 1052 * logging code is re-designed to fix the problem. See bug 1053 * 4125102 for details of the problem. 1054 */ 1055 if (FC_ERRNO(res) == EDEADLK) { 1056 delay(deadlk_wait); 1057 res = 0; 1058 goto retry; 1059 } 1060 return (res); 1061 } 1062 1063 1064 1065 /* 1066 * Asynchronous ``fault'' at addr for size bytes. 1067 */ 1068 faultcode_t 1069 as_faulta(struct as *as, caddr_t addr, size_t size) 1070 { 1071 struct seg *seg; 1072 caddr_t raddr; /* rounded down addr */ 1073 size_t rsize; /* rounded up size */ 1074 faultcode_t res = 0; 1075 klwp_t *lwp = ttolwp(curthread); 1076 1077 retry: 1078 /* 1079 * Indicate that the lwp is not to be stopped while waiting 1080 * for a pagefault. This is to a