Home | History | Annotate | Download | only in vm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     28 /*	  All Rights Reserved  	*/
     29 
     30 /*
     31  * University Copyright- Copyright (c) 1982, 1986, 1988
     32  * The Regents of the University of California
     33  * All Rights Reserved
     34  *
     35  * University Acknowledgment- Portions of this document are derived from
     36  * software developed by the University of California, Berkeley, and its
     37  * contributors.
     38  */
     39 
     40 #pragma ident	"@(#)seg_dev.c	1.140	07/12/10 SMI"
     41 
     42 /*
     43  * VM - segment of a mapped device.
     44  *
     45  * This segment driver is used when mapping character special devices.
     46  */
     47 
     48 #include <sys/types.h>
     49 #include <sys/t_lock.h>
     50 #include <sys/sysmacros.h>
     51 #include <sys/vtrace.h>
     52 #include <sys/systm.h>
     53 #include <sys/vmsystm.h>
     54 #include <sys/mman.h>
     55 #include <sys/errno.h>
     56 #include <sys/kmem.h>
     57 #include <sys/cmn_err.h>
     58 #include <sys/vnode.h>
     59 #include <sys/proc.h>
     60 #include <sys/conf.h>
     61 #include <sys/debug.h>
     62 #include <sys/ddidevmap.h>
     63 #include <sys/ddi_implfuncs.h>
     64 #include <sys/lgrp.h>
     65 
     66 #include <vm/page.h>
     67 #include <vm/hat.h>
     68 #include <vm/as.h>
     69 #include <vm/seg.h>
     70 #include <vm/seg_dev.h>
     71 #include <vm/seg_kp.h>
     72 #include <vm/seg_kmem.h>
     73 #include <vm/vpage.h>
     74 
     75 #include <sys/sunddi.h>
     76 #include <sys/esunddi.h>
     77 #include <sys/fs/snode.h>
     78 
     79 
     80 #if DEBUG
     81 int segdev_debug;
     82 #define	DEBUGF(level, args) { if (segdev_debug >= (level)) cmn_err args; }
     83 #else
     84 #define	DEBUGF(level, args)
     85 #endif
     86 
     87 /* Default timeout for devmap context management */
     88 #define	CTX_TIMEOUT_VALUE 0
     89 
     90 #define	HOLD_DHP_LOCK(dhp)  if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
     91 			{ mutex_enter(&dhp->dh_lock); }
     92 
     93 #define	RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
     94 			{ mutex_exit(&dhp->dh_lock); }
     95 
     96 #define	round_down_p2(a, s)	((a) & ~((s) - 1))
     97 #define	round_up_p2(a, s)	(((a) + (s) - 1) & ~((s) - 1))
     98 
     99 /*
    100  * VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary
    101  * VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize
    102  */
    103 #define	VA_PA_ALIGNED(uvaddr, paddr, pgsize)		\
    104 	(((uvaddr | paddr) & (pgsize - 1)) == 0)
    105 #define	VA_PA_PGSIZE_ALIGNED(uvaddr, paddr, pgsize)	\
    106 	(((uvaddr ^ paddr) & (pgsize - 1)) == 0)
    107 
    108 #define	vpgtob(n)	((n) * sizeof (struct vpage))	/* For brevity */
    109 
    110 #define	VTOCVP(vp)	(VTOS(vp)->s_commonvp)	/* we "know" it's an snode */
    111 
    112 static struct devmap_ctx *devmapctx_list = NULL;
    113 static struct devmap_softlock *devmap_slist = NULL;
    114 
    115 /*
    116  * mutex, vnode and page for the page of zeros we use for the trash mappings.
    117  * One trash page is allocated on the first ddi_umem_setup call that uses it
    118  * XXX Eventually, we may want to combine this with what segnf does when all
    119  * hat layers implement HAT_NOFAULT.
    120  *
    121  * The trash page is used when the backing store for a userland mapping is
    122  * removed but the application semantics do not take kindly to a SIGBUS.
    123  * In that scenario, the applications pages are mapped to some dummy page
    124  * which returns garbage on read and writes go into a common place.
    125  * (Perfect for NO_FAULT semantics)
    126  * The device driver is responsible to communicating to the app with some
    127  * other mechanism that such remapping has happened and the app should take
    128  * corrective action.
    129  * We can also use an anonymous memory page as there is no requirement to
    130  * keep the page locked, however this complicates the fault code. RFE.
    131  */
    132 static struct vnode trashvp;
    133 static struct page *trashpp;
    134 
    135 /* Non-pageable kernel memory is allocated from the umem_np_arena. */
    136 static vmem_t *umem_np_arena;
    137 
    138 /* Set the cookie to a value we know will never be a valid umem_cookie */
    139 #define	DEVMAP_DEVMEM_COOKIE	((ddi_umem_cookie_t)0x1)
    140 
    141 /*
    142  * Macros to check if type of devmap handle
    143  */
    144 #define	cookie_is_devmem(c)	\
    145 	((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE)
    146 
    147 #define	cookie_is_pmem(c)	\
    148 	((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE)
    149 
    150 #define	cookie_is_kpmem(c)	(!cookie_is_devmem(c) && !cookie_is_pmem(c) &&\
    151 	((c)->type == KMEM_PAGEABLE))
    152 
    153 #define	dhp_is_devmem(dhp)	\
    154 	(cookie_is_devmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
    155 
    156 #define	dhp_is_pmem(dhp)	\
    157 	(cookie_is_pmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
    158 
    159 #define	dhp_is_kpmem(dhp)	\
    160 	(cookie_is_kpmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
    161 
    162 /*
    163  * Private seg op routines.
    164  */
    165 static int	segdev_dup(struct seg *, struct seg *);
    166 static int	segdev_unmap(struct seg *, caddr_t, size_t);
    167 static void	segdev_free(struct seg *);
    168 static faultcode_t segdev_fault(struct hat *, struct seg *, caddr_t, size_t,
    169 		    enum fault_type, enum seg_rw);
    170 static faultcode_t segdev_faulta(struct seg *, caddr_t);
    171 static int	segdev_setprot(struct seg *, caddr_t, size_t, uint_t);
    172 static int	segdev_checkprot(struct seg *, caddr_t, size_t, uint_t);
    173 static void	segdev_badop(void);
    174 static int	segdev_sync(struct seg *, caddr_t, size_t, int, uint_t);
    175 static size_t	segdev_incore(struct seg *, caddr_t, size_t, char *);
    176 static int	segdev_lockop(struct seg *, caddr_t, size_t, int, int,
    177 		    ulong_t *, size_t);
    178 static int	segdev_getprot(struct seg *, caddr_t, size_t, uint_t *);
    179 static u_offset_t	segdev_getoffset(struct seg *, caddr_t);
    180 static int	segdev_gettype(struct seg *, caddr_t);
    181 static int	segdev_getvp(struct seg *, caddr_t, struct vnode **);
    182 static int	segdev_advise(struct seg *, caddr_t, size_t, uint_t);
    183 static void	segdev_dump(struct seg *);
    184 static int	segdev_pagelock(struct seg *, caddr_t, size_t,
    185 		    struct page ***, enum lock_type, enum seg_rw);
    186 static int	segdev_setpagesize(struct seg *, caddr_t, size_t, uint_t);
    187 static int	segdev_getmemid(struct seg *, caddr_t, memid_t *);
    188 static lgrp_mem_policy_info_t	*segdev_getpolicy(struct seg *, caddr_t);
    189 static int	segdev_capable(struct seg *, segcapability_t);
    190 
    191 /*
    192  * XXX	this struct is used by rootnex_map_fault to identify
    193  *	the segment it has been passed. So if you make it
    194  *	"static" you'll need to fix rootnex_map_fault.
    195  */
    196 struct seg_ops segdev_ops = {
    197 	segdev_dup,
    198 	segdev_unmap,
    199 	segdev_free,
    200 	segdev_fault,
    201 	segdev_faulta,
    202 	segdev_setprot,
    203 	segdev_checkprot,
    204 	(int (*)())segdev_badop,	/* kluster */
    205 	(size_t (*)(struct seg *))NULL,	/* swapout */
    206 	segdev_sync,			/* sync */
    207 	segdev_incore,
    208 	segdev_lockop,			/* lockop */
    209 	segdev_getprot,
    210 	segdev_getoffset,
    211 	segdev_gettype,
    212 	segdev_getvp,
    213 	segdev_advise,
    214 	segdev_dump,
    215 	segdev_pagelock,
    216 	segdev_setpagesize,
    217 	segdev_getmemid,
    218 	segdev_getpolicy,
    219 	segdev_capable,
    220 };
    221 
    222 /*
    223  * Private segdev support routines
    224  */
    225 static struct segdev_data *sdp_alloc(void);
    226 
    227 static void segdev_softunlock(struct hat *, struct seg *, caddr_t,
    228     size_t, enum seg_rw);
    229 
    230 static faultcode_t segdev_faultpage(struct hat *, struct seg *, caddr_t,
    231     struct vpage *, enum fault_type, enum seg_rw, devmap_handle_t *);
    232 
    233 static faultcode_t segdev_faultpages(struct hat *, struct seg *, caddr_t,
    234     size_t, enum fault_type, enum seg_rw, devmap_handle_t *);
    235 
    236 static struct devmap_ctx *devmap_ctxinit(dev_t, ulong_t);
    237 static struct devmap_softlock *devmap_softlock_init(dev_t, ulong_t);
    238 static void devmap_softlock_rele(devmap_handle_t *);
    239 static void devmap_ctx_rele(devmap_handle_t *);
    240 
    241 static void devmap_ctxto(void *);
    242 
    243 static devmap_handle_t *devmap_find_handle(devmap_handle_t *dhp_head,
    244     caddr_t addr);
    245 
    246 static ulong_t devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
    247     ulong_t *opfn, ulong_t *pagesize);
    248 
    249 static void free_devmap_handle(devmap_handle_t *dhp);
    250 
    251 static int devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
    252     struct seg *newseg);
    253 
    254 static devmap_handle_t *devmap_handle_unmap(devmap_handle_t *dhp);
    255 
    256 static void devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len);
    257 
    258 static void devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr);
    259 
    260 static int devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
    261     offset_t off, size_t len, uint_t flags);
    262 
    263 static void devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len,
    264     caddr_t addr, size_t *llen, caddr_t *laddr);
    265 
    266 static void devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len);
    267 
    268 static void *devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag);
    269 static void devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size);
    270 
    271 static void *devmap_umem_alloc_np(size_t size, size_t flags);
    272 static void devmap_umem_free_np(void *addr, size_t size);
    273 
    274 /*
    275  * routines to lock and unlock underlying segkp segment for
    276  * KMEM_PAGEABLE type cookies.
    277  */
    278 static faultcode_t  acquire_kpmem_lock(struct ddi_umem_cookie *, size_t);
    279 static void release_kpmem_lock(struct ddi_umem_cookie *, size_t);
    280 
    281 /*
    282  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
    283  * drivers with devmap_access callbacks
    284  */
    285 static int devmap_softlock_enter(struct devmap_softlock *, size_t,
    286 	enum fault_type);
    287 static void devmap_softlock_exit(struct devmap_softlock *, size_t,
    288 	enum fault_type);
    289 
    290 static kmutex_t devmapctx_lock;
    291 
    292 static kmutex_t devmap_slock;
    293 
    294 /*
    295  * Initialize the thread callbacks and thread private data.
    296  */
    297 static struct devmap_ctx *
    298 devmap_ctxinit(dev_t dev, ulong_t id)
    299 {
    300 	struct devmap_ctx	*devctx;
    301 	struct devmap_ctx	*tmp;
    302 	dev_info_t		*dip;
    303 
    304 	tmp =  kmem_zalloc(sizeof (struct devmap_ctx), KM_SLEEP);
    305 
    306 	mutex_enter(&devmapctx_lock);
    307 
    308 	dip = e_ddi_hold_devi_by_dev(dev, 0);
    309 	ASSERT(dip != NULL);
    310 	ddi_release_devi(dip);
    311 
    312 	for (devctx = devmapctx_list; devctx != NULL; devctx = devctx->next)
    313 		if ((devctx->dip == dip) && (devctx->id == id))
    314 			break;
    315 
    316 	if (devctx == NULL) {
    317 		devctx = tmp;
    318 		devctx->dip = dip;
    319 		devctx->id = id;
    320 		mutex_init(&devctx->lock, NULL, MUTEX_DEFAULT, NULL);
    321 		cv_init(&devctx->cv, NULL, CV_DEFAULT, NULL);
    322 		devctx->next = devmapctx_list;
    323 		devmapctx_list = devctx;
    324 	} else
    325 		kmem_free(tmp, sizeof (struct devmap_ctx));
    326 
    327 	mutex_enter(&devctx->lock);
    328 	devctx->refcnt++;
    329 	mutex_exit(&devctx->lock);
    330 	mutex_exit(&devmapctx_lock);
    331 
    332 	return (devctx);
    333 }
    334 
    335 /*
    336  * Timeout callback called if a CPU has not given up the device context
    337  * within dhp->dh_timeout_length ticks
    338  */
    339 static void
    340 devmap_ctxto(void *data)
    341 {
    342 	struct devmap_ctx *devctx = data;
    343 
    344 	TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_CTXTO,
    345 	    "devmap_ctxto:timeout expired, devctx=%p", (void *)devctx);
    346 	mutex_enter(&devctx->lock);
    347 	/*
    348 	 * Set oncpu = 0 so the next mapping trying to get the device context
    349 	 * can.
    350 	 */
    351 	devctx->oncpu = 0;
    352 	devctx->timeout = 0;
    353 	cv_signal(&devctx->cv);
    354 	mutex_exit(&devctx->lock);
    355 }
    356 
    357 /*
    358  * Create a device segment.
    359  */
    360 int
    361 segdev_create(struct seg *seg, void *argsp)
    362 {
    363 	struct segdev_data *sdp;
    364 	struct segdev_crargs *a = (struct segdev_crargs *)argsp;
    365 	devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data;
    366 	int error;
    367 
    368 	/*
    369 	 * Since the address space is "write" locked, we
    370 	 * don't need the segment lock to protect "segdev" data.
    371 	 */
    372 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
    373 
    374 	hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
    375 
    376 	sdp = sdp_alloc();
    377 
    378 	sdp->mapfunc = a->mapfunc;
    379 	sdp->offset = a->offset;
    380 	sdp->prot = a->prot;
    381 	sdp->maxprot = a->maxprot;
    382 	sdp->type = a->type;
    383 	sdp->pageprot = 0;
    384 	sdp->softlockcnt = 0;
    385 	sdp->vpage = NULL;
    386 
    387 	if (sdp->mapfunc == NULL)
    388 		sdp->devmap_data = dhp;
    389 	else
    390 		sdp->devmap_data = dhp = NULL;
    391 
    392 	sdp->hat_flags = a->hat_flags;
    393 	sdp->hat_attr = a->hat_attr;
    394 
    395 	/*
    396 	 * Currently, hat_flags supports only HAT_LOAD_NOCONSIST
    397 	 */
    398 	ASSERT(!(sdp->hat_flags & ~HAT_LOAD_NOCONSIST));
    399 
    400 	/*
    401 	 * Hold shadow vnode -- segdev only deals with
    402 	 * character (VCHR) devices. We use the common
    403 	 * vp to hang pages on.
    404 	 */
    405 	sdp->vp = specfind(a->dev, VCHR);
    406 	ASSERT(sdp->vp != NULL);
    407 
    408 	seg->s_ops = &segdev_ops;
    409 	seg->s_data = sdp;
    410 
    411 	while (dhp != NULL) {
    412 		dhp->dh_seg = seg;
    413 		dhp = dhp->dh_next;
    414 	}
    415 
    416 	/*
    417 	 * Inform the vnode of the new mapping.
    418 	 */
    419 	/*
    420 	 * It is ok to use pass sdp->maxprot to ADDMAP rather than to use
    421 	 * dhp specific maxprot because spec_addmap does not use maxprot.
    422 	 */
    423 	error = VOP_ADDMAP(VTOCVP(sdp->vp), sdp->offset,
    424 	    seg->s_as, seg->s_base, seg->s_size,
    425 	    sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
    426 
    427 	if (error != 0) {
    428 		sdp->devmap_data = NULL;
    429 		hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size,
    430 		    HAT_UNLOAD_UNMAP);
    431 	}
    432 
    433 	return (error);
    434 }
    435 
    436 static struct segdev_data *
    437 sdp_alloc(void)
    438 {
    439 	struct segdev_data *sdp;
    440 
    441 	sdp = kmem_zalloc(sizeof (struct segdev_data), KM_SLEEP);
    442 	rw_init(&sdp->lock, NULL, RW_DEFAULT, NULL);
    443 
    444 	return (sdp);
    445 }
    446 
    447 /*
    448  * Duplicate seg and return new segment in newseg.
    449  */
    450 static int
    451 segdev_dup(struct seg *seg, struct seg *newseg)
    452 {
    453 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
    454 	struct segdev_data *newsdp;
    455 	devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
    456 	size_t npages;
    457 	int ret;
    458 
    459 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DUP,
    460 	    "segdev_dup:start dhp=%p, seg=%p", (void *)dhp, (void *)seg);
    461 
    462 	DEBUGF(3, (CE_CONT, "segdev_dup: dhp %p seg %p\n",
    463 	    (void *)dhp, (void *)seg));
    464 
    465 	/*
    466 	 * Since the address space is "write" locked, we
    467 	 * don't need the segment lock to protect "segdev" data.
    468 	 */
    469 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
    470 
    471 	newsdp = sdp_alloc();
    472 
    473 	newseg->s_ops = seg->s_ops;
    474 	newseg->s_data = (void *)newsdp;
    475 
    476 	VN_HOLD(sdp->vp);
    477 	newsdp->vp 	= sdp->vp;
    478 	newsdp->mapfunc = sdp->mapfunc;
    479 	newsdp->offset	= sdp->offset;
    480 	newsdp->pageprot = sdp->pageprot;
    481 	newsdp->prot	= sdp->prot;
    482 	newsdp->maxprot = sdp->maxprot;
    483 	newsdp->type = sdp->type;
    484 	newsdp->hat_attr = sdp->hat_attr;
    485 	newsdp->hat_flags = sdp->hat_flags;
    486 	newsdp->softlockcnt = 0;
    487 
    488 	/*
    489 	 * Initialize per page data if the segment we are
    490 	 * dup'ing has per page information.
    491 	 */
    492 	npages = seg_pages(newseg);
    493 
    494 	if (sdp->vpage != NULL) {
    495 		size_t nbytes = vpgtob(npages);
    496 
    497 		newsdp->vpage = kmem_zalloc(nbytes, KM_SLEEP);
    498 		bcopy(sdp->vpage, newsdp->vpage, nbytes);
    499 	} else
    500 		newsdp->vpage = NULL;
    501 
    502 	/*
    503 	 * duplicate devmap handles
    504 	 */
    505 	if (dhp != NULL) {
    506 		ret = devmap_handle_dup(dhp,
    507 		    (devmap_handle_t **)&newsdp->devmap_data, newseg);
    508 		if (ret != 0) {
    509 			TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DUP_CK1,
    510 			    "segdev_dup:ret1 ret=%x, dhp=%p seg=%p",
    511 			    ret, (void *)dhp, (void *)seg);
    512 			DEBUGF(1, (CE_CONT,
    513 			    "segdev_dup: ret %x dhp %p seg %p\n",
    514 			    ret, (void *)dhp, (void *)seg));
    515 			return (ret);
    516 		}
    517 	}
    518 
    519 	/*
    520 	 * Inform the common vnode of the new mapping.
    521 	 */
    522 	return (VOP_ADDMAP(VTOCVP(newsdp->vp),
    523 	    newsdp->offset, newseg->s_as,
    524 	    newseg->s_base, newseg->s_size, newsdp->prot,
    525 	    newsdp->maxprot, sdp->type, CRED(), NULL));
    526 }
    527 
    528 /*
    529  * duplicate devmap handles
    530  */
    531 static int
    532 devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
    533     struct seg *newseg)
    534 {
    535 	devmap_handle_t *newdhp_save = NULL;
    536 	devmap_handle_t *newdhp = NULL;
    537 	struct devmap_callback_ctl *callbackops;
    538 
    539 	while (dhp != NULL) {
    540 		newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
    541 
    542 		/* Need to lock the original dhp while copying if REMAP */
    543 		HOLD_DHP_LOCK(dhp);
    544 		bcopy(dhp, newdhp, sizeof (devmap_handle_t));
    545 		RELE_DHP_LOCK(dhp);
    546 		newdhp->dh_seg = newseg;
    547 		newdhp->dh_next = NULL;
    548 		if (newdhp_save != NULL)
    549 			newdhp_save->dh_next = newdhp;
    550 		else
    551 			*new_dhp = newdhp;
    552 		newdhp_save = newdhp;
    553 
    554 		callbackops = &newdhp->dh_callbackops;
    555 
    556 		if (dhp->dh_softlock != NULL)
    557 			newdhp->dh_softlock = devmap_softlock_init(
    558 			    newdhp->dh_dev,
    559 			    (ulong_t)callbackops->devmap_access);
    560 		if (dhp->dh_ctx != NULL)
    561 			newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
    562 			    (ulong_t)callbackops->devmap_access);
    563 
    564 		/*
    565 		 * Initialize dh_lock if we want to do remap.
    566 		 */
    567 		if (newdhp->dh_flags & DEVMAP_ALLOW_REMAP) {
    568 			mutex_init(&newdhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
    569 			newdhp->dh_flags |= DEVMAP_LOCK_INITED;
    570 		}
    571 
    572 		if (callbackops->devmap_dup != NULL) {
    573 			int ret;
    574 
    575 			/*
    576 			 * Call the dup callback so that the driver can
    577 			 * duplicate its private data.
    578 			 */
    579 			ret = (*callbackops->devmap_dup)(dhp, dhp->dh_pvtp,
    580 			    (devmap_cookie_t *)newdhp, &newdhp->dh_pvtp);
    581 
    582 			if (ret != 0) {
    583 				/*
    584 				 * We want to free up this segment as the driver
    585 				 * has indicated that we can't dup it.  But we
    586 				 * don't want to call the drivers, devmap_unmap,
    587 				 * callback function as the driver does not
    588 				 * think this segment exists. The caller of
    589 				 * devmap_dup will call seg_free on newseg
    590 				 * as it was the caller that allocated the
    591 				 * segment.
    592 				 */
    593 				DEBUGF(1, (CE_CONT, "devmap_handle_dup ERROR: "
    594 				    "newdhp %p dhp %p\n", (void *)newdhp,
    595 				    (void *)dhp));
    596 				callbackops->devmap_unmap = NULL;
    597 				return (ret);
    598 			}
    599 		}
    600 
    601 		dhp = dhp->dh_next;
    602 	}
    603 
    604 	return (0);
    605 }
    606 
    607 /*
    608  * Split a segment at addr for length len.
    609  */
    610 /*ARGSUSED*/
    611 static int
    612 segdev_unmap(struct seg *seg, caddr_t addr, size_t len)
    613 {
    614 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
    615 	register struct segdev_data *nsdp;
    616 	register struct seg *nseg;
    617 	register size_t	opages;		/* old segment size in pages */
    618 	register size_t	npages;		/* new segment size in pages */
    619 	register size_t	dpages;		/* pages being deleted (unmapped) */
    620 	register size_t	nbytes;
    621 	devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
    622 	devmap_handle_t *dhpp;
    623 	devmap_handle_t *newdhp;
    624 	struct devmap_callback_ctl *callbackops;
    625 	caddr_t nbase;
    626 	offset_t off;
    627 	ulong_t nsize;
    628 	size_t mlen, sz;
    629 
    630 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP,
    631 	    "segdev_unmap:start dhp=%p, seg=%p addr=%p len=%lx",
    632 	    (void *)dhp, (void *)seg, (void *)addr, len);
    633 
    634 	DEBUGF(3, (CE_CONT, "segdev_unmap: dhp %p seg %p addr %p len %lx\n",
    635 	    (void *)dhp, (void *)seg, (void *)addr, len));
    636 
    637 	/*
    638 	 * Since the address space is "write" locked, we
    639 	 * don't need the segment lock to protect "segdev" data.
    640 	 */
    641 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
    642 
    643 	if ((sz = sdp->softlockcnt) > 0) {
    644 		/*
    645 		 * Fail the unmap if pages are SOFTLOCKed through this mapping.
    646 		 * softlockcnt is protected from change by the as write lock.
    647 		 */
    648 		TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK1,
    649 		    "segdev_unmap:error softlockcnt = %ld", sz);
    650 		DEBUGF(1, (CE_CONT, "segdev_unmap: softlockcnt %ld\n", sz));
    651 		return (EAGAIN);
    652 	}
    653 
    654 	/*
    655 	 * Check for bad sizes
    656 	 */
    657 	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
    658 	    (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
    659 		panic("segdev_unmap");
    660 
    661 	if (dhp != NULL) {
    662 		devmap_handle_t *tdhp;
    663 		/*
    664 		 * If large page size was used in hat_devload(),
    665 		 * the same page size must be used in hat_unload().
    666 		 */
    667 		dhpp = tdhp = devmap_find_handle(dhp, addr);
    668 		while (tdhp != NULL) {
    669 			if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
    670 				break;
    671 			}
    672 			tdhp = tdhp->dh_next;
    673 		}
    674 		if (tdhp != NULL) {	/* found a dhp using large pages */
    675 			size_t slen = len;
    676 			size_t mlen;
    677 			size_t soff;
    678 
    679 			soff = (ulong_t)(addr - dhpp->dh_uvaddr);
    680 			while (slen != 0) {
    681 				mlen = MIN(slen, (dhpp->dh_len - soff));
    682 				hat_unload(seg->s_as->a_hat, dhpp->dh_uvaddr,
    683 				    dhpp->dh_len, HAT_UNLOAD_UNMAP);
    684 				dhpp = dhpp->dh_next;
    685 				ASSERT(slen >= mlen);
    686 				slen -= mlen;
    687 				soff = 0;
    688 			}
    689 		} else
    690 			hat_unload(seg->s_as->a_hat, addr, len,
    691 			    HAT_UNLOAD_UNMAP);
    692 	} else {
    693 		/*
    694 		 * Unload any hardware translations in the range
    695 		 * to be taken out.
    696 		 */
    697 		hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
    698 	}
    699 
    700 	/*
    701 	 * get the user offset which will used in the driver callbacks
    702 	 */
    703 	off = sdp->offset + (offset_t)(addr - seg->s_base);
    704 
    705 	/*
    706 	 * Inform the vnode of the unmapping.
    707 	 */
    708 	ASSERT(sdp->vp != NULL);
    709 	(void) VOP_DELMAP(VTOCVP(sdp->vp), off, seg->s_as, addr, len,
    710 	    sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
    711 
    712 	/*
    713 	 * Check for entire segment
    714 	 */
    715 	if (addr == seg->s_base && len == seg->s_size) {
    716 		seg_free(seg);
    717 		return (0);
    718 	}
    719 
    720 	opages = seg_pages(seg);
    721 	dpages = btop(len);
    722 	npages = opages - dpages;
    723 
    724 	/*
    725 	 * Check for beginning of segment
    726 	 */
    727 	if (addr == seg->s_base) {
    728 		if (sdp->vpage != NULL) {
    729 			register struct vpage *ovpage;
    730 
    731 			ovpage = sdp->vpage;	/* keep pointer to vpage */
    732 
    733 			nbytes = vpgtob(npages);
    734 			sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
    735 			bcopy(&ovpage[dpages], sdp->vpage, nbytes);
    736 
    737 			/* free up old vpage */
    738 			kmem_free(ovpage, vpgtob(opages));
    739 		}
    740 
    741 		/*
    742 		 * free devmap handles from the beginning of the mapping.
    743 		 */
    744 		if (dhp != NULL)
    745 			devmap_handle_unmap_head(dhp, len);
    746 
    747 		sdp->offset += (offset_t)len;
    748 
    749 		seg->s_base += len;
    750 		seg->s_size -= len;
    751 
    752 		return (0);
    753 	}
    754 
    755 	/*
    756 	 * Check for end of segment
    757 	 */
    758 	if (addr + len == seg->s_base + seg->s_size) {
    759 		if (sdp->vpage != NULL) {
    760 			register struct vpage *ovpage;
    761 
    762 			ovpage = sdp->vpage;	/* keep pointer to vpage */
    763 
    764 			nbytes = vpgtob(npages);
    765 			sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
    766 			bcopy(ovpage, sdp->vpage, nbytes);
    767 
    768 			/* free up old vpage */
    769 			kmem_free(ovpage, vpgtob(opages));
    770 		}
    771 		seg->s_size -= len;
    772 
    773 		/*
    774 		 * free devmap handles from addr to the end of the mapping.
    775 		 */
    776 		if (dhp != NULL)
    777 			devmap_handle_unmap_tail(dhp, addr);
    778 
    779 		return (0);
    780 	}
    781 
    782 	/*
    783 	 * The section to go is in the middle of the segment,
    784 	 * have to make it into two segments.  nseg is made for
    785 	 * the high end while seg is cut down at the low end.
    786 	 */
    787 	nbase = addr + len;				/* new seg base */
    788 	nsize = (seg->s_base + seg->s_size) - nbase;	/* new seg size */
    789 	seg->s_size = addr - seg->s_base;		/* shrink old seg */
    790 	nseg = seg_alloc(seg->s_as, nbase, nsize);
    791 	if (nseg == NULL)
    792 		panic("segdev_unmap seg_alloc");
    793 
    794 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK2,
    795 	    "segdev_unmap: seg=%p nseg=%p", (void *)seg, (void *)nseg);
    796 	DEBUGF(3, (CE_CONT, "segdev_unmap: segdev_dup seg %p nseg %p\n",
    797 	    (void *)seg, (void *)nseg));
    798 	nsdp = sdp_alloc();
    799 
    800 	nseg->s_ops = seg->s_ops;
    801 	nseg->s_data = (void *)nsdp;
    802 
    803 	VN_HOLD(sdp->vp);
    804 	nsdp->mapfunc = sdp->mapfunc;
    805 	nsdp->offset = sdp->offset + (offset_t)(nseg->s_base - seg->s_base);
    806 	nsdp->vp 	= sdp->vp;
    807 	nsdp->pageprot = sdp->pageprot;
    808 	nsdp->prot	= sdp->prot;
    809 	nsdp->maxprot = sdp->maxprot;
    810 	nsdp->type = sdp->type;
    811 	nsdp->hat_attr = sdp->hat_attr;
    812 	nsdp->hat_flags = sdp->hat_flags;
    813 	nsdp->softlockcnt = 0;
    814 
    815 	/*
    816 	 * Initialize per page data if the segment we are
    817 	 * dup'ing has per page information.
    818 	 */
    819 	if (sdp->vpage != NULL) {
    820 		/* need to split vpage into two arrays */
    821 		register size_t nnbytes;
    822 		register size_t nnpages;
    823 		register struct vpage *ovpage;
    824 
    825 		ovpage = sdp->vpage;		/* keep pointer to vpage */
    826 
    827 		npages = seg_pages(seg);	/* seg has shrunk */
    828 		nbytes = vpgtob(npages);
    829 		nnpages = seg_pages(nseg);
    830 		nnbytes = vpgtob(nnpages);
    831 
    832 		sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
    833 		bcopy(ovpage, sdp->vpage, nbytes);
    834 
    835 		nsdp->vpage = kmem_alloc(nnbytes, KM_SLEEP);
    836 		bcopy(&ovpage[npages + dpages], nsdp->vpage, nnbytes);
    837 
    838 		/* free up old vpage */
    839 		kmem_free(ovpage, vpgtob(opages));
    840 	} else
    841 		nsdp->vpage = NULL;
    842 
    843 	/*
    844 	 * unmap dhps.
    845 	 */
    846 	if (dhp == NULL) {
    847 		nsdp->devmap_data = NULL;
    848 		return (0);
    849 	}
    850 	while (dhp != NULL) {
    851 		callbackops = &dhp->dh_callbackops;
    852 		TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK3,
    853 		    "segdev_unmap: dhp=%p addr=%p", dhp, addr);
    854 		DEBUGF(3, (CE_CONT, "unmap: dhp %p addr %p uvaddr %p len %lx\n",
    855 		    (void *)dhp, (void *)addr,
    856 		    (void *)dhp->dh_uvaddr, dhp->dh_len));
    857 
    858 		if (addr == (dhp->dh_uvaddr + dhp->dh_len)) {
    859 			dhpp = dhp->dh_next;
    860 			dhp->dh_next = NULL;
    861 			dhp = dhpp;
    862 		} else if (addr > (dhp->dh_uvaddr + dhp->dh_len)) {
    863 			dhp = dhp->dh_next;
    864 		} else if (addr > dhp->dh_uvaddr &&
    865 		    (addr + len) < (dhp->dh_uvaddr + dhp->dh_len)) {
    866 			/*
    867 			 * <addr, addr+len> is enclosed by dhp.
    868 			 * create a newdhp that begins at addr+len and
    869 			 * ends at dhp->dh_uvaddr+dhp->dh_len.
    870 			 */
    871 			newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
    872 			HOLD_DHP_LOCK(dhp);
    873 			bcopy(dhp, newdhp, sizeof (devmap_handle_t));
    874 			RELE_DHP_LOCK(dhp);
    875 			newdhp->dh_seg = nseg;
    876 			newdhp->dh_next = dhp->dh_next;
    877 			if (dhp->dh_softlock != NULL)
    878 				newdhp->dh_softlock = devmap_softlock_init(
    879 				    newdhp->dh_dev,
    880 				    (ulong_t)callbackops->devmap_access);
    881 			if (dhp->dh_ctx != NULL)
    882 				newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
    883 				    (ulong_t)callbackops->devmap_access);
    884 			if (newdhp->dh_flags & DEVMAP_LOCK_INITED) {
    885 				mutex_init(&newdhp->dh_lock,
    886 				    NULL, MUTEX_DEFAULT, NULL);
    887 			}
    888 			if (callbackops->devmap_unmap != NULL)
    889 				(*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
    890 				    off, len, dhp, &dhp->dh_pvtp,
    891 				    newdhp, &newdhp->dh_pvtp);
    892 			mlen = len + (addr - dhp->dh_uvaddr);
    893 			devmap_handle_reduce_len(newdhp, mlen);
    894 			nsdp->devmap_data = newdhp;
    895 			/* XX Changing len should recalculate LARGE flag */
    896 			dhp->dh_len = addr - dhp->dh_uvaddr;
    897 			dhpp = dhp->dh_next;
    898 			dhp->dh_next = NULL;
    899 			dhp = dhpp;
    900 		} else if ((addr > dhp->dh_uvaddr) &&
    901 		    ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len))) {
    902 			mlen = dhp->dh_len + dhp->dh_uvaddr - addr;
    903 			/*
    904 			 * <addr, addr+len> spans over dhps.
    905 			 */
    906 			if (callbackops->devmap_unmap != NULL)
    907 				(*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
    908 				    off, mlen, (devmap_cookie_t *)dhp,
    909 				    &dhp->dh_pvtp, NULL, NULL);
    910 			/* XX Changing len should recalculate LARGE flag */
    911 			dhp->dh_len = addr - dhp->dh_uvaddr;
    912 			dhpp = dhp->dh_next;
    913 			dhp->dh_next = NULL;
    914 			dhp = dhpp;
    915 			nsdp->devmap_data = dhp;
    916 		} else if ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len)) {
    917 			/*
    918 			 * dhp is enclosed by <addr, addr+len>.
    919 			 */
    920 			dhp->dh_seg = nseg;
    921 			nsdp->devmap_data = dhp;
    922 			dhp = devmap_handle_unmap(dhp);
    923 			nsdp->devmap_data = dhp; /* XX redundant? */
    924 		} else if (((addr + len) > dhp->dh_uvaddr) &&
    925 		    ((addr + len) < (dhp->dh_uvaddr + dhp->dh_len))) {
    926 			mlen = addr + len - dhp->dh_uvaddr;
    927 			if (callbackops->devmap_unmap != NULL)
    928 				(*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
    929 				    dhp->dh_uoff, mlen, NULL,
    930 				    NULL, dhp, &dhp->dh_pvtp);
    931 			devmap_handle_reduce_len(dhp, mlen);
    932 			nsdp->devmap_data = dhp;
    933 			dhp->dh_seg = nseg;
    934 			dhp = dhp->dh_next;
    935 		} else {
    936 			dhp->dh_seg = nseg;
    937 			dhp = dhp->dh_next;
    938 		}
    939 	}
    940 	return (0);
    941 }
    942 
    943 /*
    944  * Utility function handles reducing the length of a devmap handle during unmap
    945  * Note that is only used for unmapping the front portion of the handler,
    946  * i.e., we are bumping up the offset/pfn etc up by len
    947  * Do not use if reducing length at the tail.
    948  */
    949 static void
    950 devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len)
    951 {