Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * driver for accessing kernel devinfo tree.
     29  */
     30 #include <sys/types.h>
     31 #include <sys/pathname.h>
     32 #include <sys/debug.h>
     33 #include <sys/autoconf.h>
     34 #include <sys/vmsystm.h>
     35 #include <sys/conf.h>
     36 #include <sys/file.h>
     37 #include <sys/kmem.h>
     38 #include <sys/modctl.h>
     39 #include <sys/stat.h>
     40 #include <sys/ddi.h>
     41 #include <sys/sunddi.h>
     42 #include <sys/sunldi_impl.h>
     43 #include <sys/sunndi.h>
     44 #include <sys/esunddi.h>
     45 #include <sys/sunmdi.h>
     46 #include <sys/ddi_impldefs.h>
     47 #include <sys/ndi_impldefs.h>
     48 #include <sys/mdi_impldefs.h>
     49 #include <sys/devinfo_impl.h>
     50 #include <sys/thread.h>
     51 #include <sys/modhash.h>
     52 #include <sys/bitmap.h>
     53 #include <util/qsort.h>
     54 #include <sys/disp.h>
     55 #include <sys/kobj.h>
     56 #include <sys/crc32.h>
     57 
     58 
     59 #ifdef DEBUG
     60 static int di_debug;
     61 #define	dcmn_err(args) if (di_debug >= 1) cmn_err args
     62 #define	dcmn_err2(args) if (di_debug >= 2) cmn_err args
     63 #define	dcmn_err3(args) if (di_debug >= 3) cmn_err args
     64 #else
     65 #define	dcmn_err(args) /* nothing */
     66 #define	dcmn_err2(args) /* nothing */
     67 #define	dcmn_err3(args) /* nothing */
     68 #endif
     69 
     70 /*
     71  * We partition the space of devinfo minor nodes equally between the full and
     72  * unprivileged versions of the driver.  The even-numbered minor nodes are the
     73  * full version, while the odd-numbered ones are the read-only version.
     74  */
     75 static int di_max_opens = 32;
     76 
     77 static int di_prop_dyn = 1;		/* enable dynamic property support */
     78 
     79 #define	DI_FULL_PARENT		0
     80 #define	DI_READONLY_PARENT	1
     81 #define	DI_NODE_SPECIES		2
     82 #define	DI_UNPRIVILEGED_NODE(x)	(((x) % 2) != 0)
     83 
     84 #define	IOC_IDLE	0	/* snapshot ioctl states */
     85 #define	IOC_SNAP	1	/* snapshot in progress */
     86 #define	IOC_DONE	2	/* snapshot done, but not copied out */
     87 #define	IOC_COPY	3	/* copyout in progress */
     88 
     89 /*
     90  * Keep max alignment so we can move snapshot to different platforms.
     91  *
     92  * NOTE: Most callers should rely on the di_checkmem return value
     93  * being aligned, and reestablish *off_p with aligned value, instead
     94  * of trying to align size of their allocations: this approach will
     95  * minimize memory use.
     96  */
     97 #define	DI_ALIGN(addr)	((addr + 7l) & ~7l)
     98 
     99 /*
    100  * To avoid wasting memory, make a linked list of memory chunks.
    101  * Size of each chunk is buf_size.
    102  */
    103 struct di_mem {
    104 	struct di_mem	*next;		/* link to next chunk */
    105 	char		*buf;		/* contiguous kernel memory */
    106 	size_t		buf_size;	/* size of buf in bytes */
    107 	devmap_cookie_t	cook;		/* cookie from ddi_umem_alloc */
    108 };
    109 
    110 /*
    111  * This is a stack for walking the tree without using recursion.
    112  * When the devinfo tree height is above some small size, one
    113  * gets watchdog resets on sun4m.
    114  */
    115 struct di_stack {
    116 	void		*offset[MAX_TREE_DEPTH];
    117 	struct dev_info *dip[MAX_TREE_DEPTH];
    118 	int		circ[MAX_TREE_DEPTH];
    119 	int		depth;	/* depth of current node to be copied */
    120 };
    121 
    122 #define	TOP_OFFSET(stack)	\
    123 	((di_off_t *)(stack)->offset[(stack)->depth - 1])
    124 #define	TOP_NODE(stack)		\
    125 	((stack)->dip[(stack)->depth - 1])
    126 #define	PARENT_OFFSET(stack)	\
    127 	((di_off_t *)(stack)->offset[(stack)->depth - 2])
    128 #define	EMPTY_STACK(stack)	((stack)->depth == 0)
    129 #define	POP_STACK(stack)	{ \
    130 	ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
    131 		(stack)->circ[(stack)->depth - 1]); \
    132 	((stack)->depth--); \
    133 }
    134 #define	PUSH_STACK(stack, node, off_p)	{ \
    135 	ASSERT(node != NULL); \
    136 	ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
    137 	(stack)->dip[(stack)->depth] = (node); \
    138 	(stack)->offset[(stack)->depth] = (void *)(off_p); \
    139 	((stack)->depth)++; \
    140 }
    141 
    142 #define	DI_ALL_PTR(s)	DI_ALL(di_mem_addr((s), 0))
    143 
    144 /*
    145  * With devfs, the device tree has no global locks. The device tree is
    146  * dynamic and dips may come and go if they are not locked locally. Under
    147  * these conditions, pointers are no longer reliable as unique IDs.
    148  * Specifically, these pointers cannot be used as keys for hash tables
    149  * as the same devinfo structure may be freed in one part of the tree only
    150  * to be allocated as the structure for a different device in another
    151  * part of the tree. This can happen if DR and the snapshot are
    152  * happening concurrently.
    153  * The following data structures act as keys for devinfo nodes and
    154  * pathinfo nodes.
    155  */
    156 
    157 enum di_ktype {
    158 	DI_DKEY = 1,
    159 	DI_PKEY = 2
    160 };
    161 
    162 struct di_dkey {
    163 	dev_info_t	*dk_dip;
    164 	major_t		dk_major;
    165 	int		dk_inst;
    166 	pnode_t		dk_nodeid;
    167 };
    168 
    169 struct di_pkey {
    170 	mdi_pathinfo_t	*pk_pip;
    171 	char		*pk_path_addr;
    172 	dev_info_t	*pk_client;
    173 	dev_info_t	*pk_phci;
    174 };
    175 
    176 struct di_key {
    177 	enum di_ktype	k_type;
    178 	union {
    179 		struct di_dkey dkey;
    180 		struct di_pkey pkey;
    181 	} k_u;
    182 };
    183 
    184 
    185 struct i_lnode;
    186 
    187 typedef struct i_link {
    188 	/*
    189 	 * If a di_link struct representing this i_link struct makes it
    190 	 * into the snapshot, then self will point to the offset of
    191 	 * the di_link struct in the snapshot
    192 	 */
    193 	di_off_t	self;
    194 
    195 	int		spec_type;	/* block or char access type */
    196 	struct i_lnode	*src_lnode;	/* src i_lnode */
    197 	struct i_lnode	*tgt_lnode;	/* tgt i_lnode */
    198 	struct i_link	*src_link_next;	/* next src i_link /w same i_lnode */
    199 	struct i_link	*tgt_link_next;	/* next tgt i_link /w same i_lnode */
    200 } i_link_t;
    201 
    202 typedef struct i_lnode {
    203 	/*
    204 	 * If a di_lnode struct representing this i_lnode struct makes it
    205 	 * into the snapshot, then self will point to the offset of
    206 	 * the di_lnode struct in the snapshot
    207 	 */
    208 	di_off_t	self;
    209 
    210 	/*
    211 	 * used for hashing and comparing i_lnodes
    212 	 */
    213 	int		modid;
    214 
    215 	/*
    216 	 * public information describing a link endpoint
    217 	 */
    218 	struct di_node	*di_node;	/* di_node in snapshot */
    219 	dev_t		devt;		/* devt */
    220 
    221 	/*
    222 	 * i_link ptr to links coming into this i_lnode node
    223 	 * (this i_lnode is the target of these i_links)
    224 	 */
    225 	i_link_t	*link_in;
    226 
    227 	/*
    228 	 * i_link ptr to links going out of this i_lnode node
    229 	 * (this i_lnode is the source of these i_links)
    230 	 */
    231 	i_link_t	*link_out;
    232 } i_lnode_t;
    233 
    234 /*
    235  * Soft state associated with each instance of driver open.
    236  */
    237 static struct di_state {
    238 	di_off_t	mem_size;	/* total # bytes in memlist */
    239 	struct di_mem	*memlist;	/* head of memlist */
    240 	uint_t		command;	/* command from ioctl */
    241 	int		di_iocstate;	/* snapshot ioctl state	*/
    242 	mod_hash_t	*reg_dip_hash;
    243 	mod_hash_t	*reg_pip_hash;
    244 	int		lnode_count;
    245 	int		link_count;
    246 
    247 	mod_hash_t	*lnode_hash;
    248 	mod_hash_t	*link_hash;
    249 } **di_states;
    250 
    251 static kmutex_t di_lock;	/* serialize instance assignment */
    252 
    253 typedef enum {
    254 	DI_QUIET = 0,	/* DI_QUIET must always be 0 */
    255 	DI_ERR,
    256 	DI_INFO,
    257 	DI_TRACE,
    258 	DI_TRACE1,
    259 	DI_TRACE2
    260 } di_cache_debug_t;
    261 
    262 static uint_t	di_chunk = 32;		/* I/O chunk size in pages */
    263 
    264 #define	DI_CACHE_LOCK(c)	(mutex_enter(&(c).cache_lock))
    265 #define	DI_CACHE_UNLOCK(c)	(mutex_exit(&(c).cache_lock))
    266 #define	DI_CACHE_LOCKED(c)	(mutex_owned(&(c).cache_lock))
    267 
    268 /*
    269  * Check that whole device tree is being configured as a pre-condition for
    270  * cleaning up /etc/devices files.
    271  */
    272 #define	DEVICES_FILES_CLEANABLE(st)	\
    273 	(((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
    274 	strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
    275 
    276 #define	CACHE_DEBUG(args)	\
    277 	{ if (di_cache_debug != DI_QUIET) di_cache_print args; }
    278 
    279 typedef struct phci_walk_arg {
    280 	di_off_t	off;
    281 	struct di_state	*st;
    282 } phci_walk_arg_t;
    283 
    284 static int di_open(dev_t *, int, int, cred_t *);
    285 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
    286 static int di_close(dev_t, int, int, cred_t *);
    287 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
    288 static int di_attach(dev_info_t *, ddi_attach_cmd_t);
    289 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
    290 
    291 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
    292 static di_off_t di_snapshot_and_clean(struct di_state *);
    293 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
    294 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
    295 static di_off_t di_copynode(struct dev_info *, struct di_stack *,
    296     struct di_state *);
    297 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
    298     struct di_state *);
    299 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
    300 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
    301 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *,
    302     struct di_state *, struct dev_info *);
    303 static void di_allocmem(struct di_state *, size_t);
    304 static void di_freemem(struct di_state *);
    305 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
    306 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
    307 static void *di_mem_addr(struct di_state *, di_off_t);
    308 static int di_setstate(struct di_state *, int);
    309 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
    310 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
    311 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
    312     struct di_state *, int);
    313 static di_off_t di_getlink_data(di_off_t, struct di_state *);
    314 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
    315 
    316 static int cache_args_valid(struct di_state *st, int *error);
    317 static int snapshot_is_cacheable(struct di_state *st);
    318 static int di_cache_lookup(struct di_state *st);
    319 static int di_cache_update(struct di_state *st);
    320 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
    321 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg);
    322 static int build_phci_list(dev_info_t *ph_devinfo, void *arg);
    323 
    324 extern int modrootloaded;
    325 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *);
    326 extern void mdi_vhci_walk_phcis(dev_info_t *,
    327 	int (*)(dev_info_t *, void *), void *);
    328 
    329 
    330 static struct cb_ops di_cb_ops = {
    331 	di_open,		/* open */
    332 	di_close,		/* close */
    333 	nodev,			/* strategy */
    334 	nodev,			/* print */
    335 	nodev,			/* dump */
    336 	nodev,			/* read */
    337 	nodev,			/* write */
    338 	di_ioctl,		/* ioctl */
    339 	nodev,			/* devmap */
    340 	nodev,			/* mmap */
    341 	nodev,			/* segmap */
    342 	nochpoll,		/* poll */
    343 	ddi_prop_op,		/* prop_op */
    344 	NULL,			/* streamtab  */
    345 	D_NEW | D_MP		/* Driver compatibility flag */
    346 };
    347 
    348 static struct dev_ops di_ops = {
    349 	DEVO_REV,		/* devo_rev, */
    350 	0,			/* refcnt  */
    351 	di_info,		/* info */
    352 	nulldev,		/* identify */
    353 	nulldev,		/* probe */
    354 	di_attach,		/* attach */
    355 	di_detach,		/* detach */
    356 	nodev,			/* reset */
    357 	&di_cb_ops,		/* driver operations */
    358 	NULL			/* bus operations */
    359 };
    360 
    361 /*
    362  * Module linkage information for the kernel.
    363  */
    364 static struct modldrv modldrv = {
    365 	&mod_driverops,
    366 	"DEVINFO Driver",
    367 	&di_ops
    368 };
    369 
    370 static struct modlinkage modlinkage = {
    371 	MODREV_1,
    372 	&modldrv,
    373 	NULL
    374 };
    375 
    376 int
    377 _init(void)
    378 {
    379 	int	error;
    380 
    381 	mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
    382 
    383 	error = mod_install(&modlinkage);
    384 	if (error != 0) {
    385 		mutex_destroy(&di_lock);
    386 		return (error);
    387 	}
    388 
    389 	return (0);
    390 }
    391 
    392 int
    393 _info(struct modinfo *modinfop)
    394 {
    395 	return (mod_info(&modlinkage, modinfop));
    396 }
    397 
    398 int
    399 _fini(void)
    400 {
    401 	int	error;
    402 
    403 	error = mod_remove(&modlinkage);
    404 	if (error != 0) {
    405 		return (error);
    406 	}
    407 
    408 	mutex_destroy(&di_lock);
    409 	return (0);
    410 }
    411 
    412 static dev_info_t *di_dip;
    413 
    414 /*ARGSUSED*/
    415 static int
    416 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
    417 {
    418 	int	error = DDI_FAILURE;
    419 
    420 	switch (infocmd) {
    421 	case DDI_INFO_DEVT2DEVINFO:
    422 		*result = (void *)di_dip;
    423 		error = DDI_SUCCESS;
    424 		break;
    425 	case DDI_INFO_DEVT2INSTANCE:
    426 		/*
    427 		 * All dev_t's map to the same, single instance.
    428 		 */
    429 		*result = (void *)0;
    430 		error = DDI_SUCCESS;
    431 		break;
    432 	default:
    433 		break;
    434 	}
    435 
    436 	return (error);
    437 }
    438 
    439 static int
    440 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
    441 {
    442 	int	error = DDI_FAILURE;
    443 
    444 	switch (cmd) {
    445 	case DDI_ATTACH:
    446 		di_states = kmem_zalloc(
    447 		    di_max_opens * sizeof (struct di_state *), KM_SLEEP);
    448 
    449 		if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
    450 		    DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
    451 		    ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
    452 		    DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
    453 			kmem_free(di_states,
    454 			    di_max_opens * sizeof (struct di_state *));
    455 			ddi_remove_minor_node(dip, NULL);
    456 			error = DDI_FAILURE;
    457 		} else {
    458 			di_dip = dip;
    459 			ddi_report_dev(dip);
    460 
    461 			error = DDI_SUCCESS;
    462 		}
    463 		break;
    464 	default:
    465 		error = DDI_FAILURE;
    466 		break;
    467 	}
    468 
    469 	return (error);
    470 }
    471 
    472 static int
    473 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
    474 {
    475 	int	error = DDI_FAILURE;
    476 
    477 	switch (cmd) {
    478 	case DDI_DETACH:
    479 		ddi_remove_minor_node(dip, NULL);
    480 		di_dip = NULL;
    481 		kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
    482 
    483 		error = DDI_SUCCESS;
    484 		break;
    485 	default:
    486 		error = DDI_FAILURE;
    487 		break;
    488 	}
    489 
    490 	return (error);
    491 }
    492 
    493 /*
    494  * Allow multiple opens by tweaking the dev_t such that it looks like each
    495  * open is getting a different minor device.  Each minor gets a separate
    496  * entry in the di_states[] table.  Based on the original minor number, we
    497  * discriminate opens of the full and read-only nodes.  If all of the instances
    498  * of the selected minor node are currently open, we return EAGAIN.
    499  */
    500 /*ARGSUSED*/
    501 static int
    502 di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
    503 {
    504 	int	m;
    505 	minor_t	minor_parent = getminor(*devp);
    506 
    507 	if (minor_parent != DI_FULL_PARENT &&
    508 	    minor_parent != DI_READONLY_PARENT)
    509 		return (ENXIO);
    510 
    511 	mutex_enter(&di_lock);
    512 
    513 	for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
    514 		if (di_states[m] != NULL)
    515 			continue;
    516 
    517 		di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
    518 		break;	/* It's ours. */
    519 	}
    520 
    521 	if (m >= di_max_opens) {
    522 		/*
    523 		 * maximum open instance for device reached
    524 		 */
    525 		mutex_exit(&di_lock);
    526 		dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
    527 		return (EAGAIN);
    528 	}
    529 	mutex_exit(&di_lock);
    530 
    531 	ASSERT(m < di_max_opens);
    532 	*devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
    533 
    534 	dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
    535 	    (void *)curthread, m + DI_NODE_SPECIES));
    536 
    537 	return (0);
    538 }
    539 
    540 /*ARGSUSED*/
    541 static int
    542 di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
    543 {
    544 	struct di_state	*st;
    545 	int		m = (int)getminor(dev) - DI_NODE_SPECIES;
    546 
    547 	if (m < 0) {
    548 		cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
    549 		    m + DI_NODE_SPECIES);
    550 		return (ENXIO);
    551 	}
    552 
    553 	st = di_states[m];
    554 	ASSERT(m < di_max_opens && st != NULL);
    555 
    556 	di_freemem(st);
    557 	kmem_free(st, sizeof (struct di_state));
    558 
    559 	/*
    560 	 * empty slot in state table
    561 	 */
    562 	mutex_enter(&di_lock);
    563 	di_states[m] = NULL;
    564 	dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
    565 	    (void *)curthread, m + DI_NODE_SPECIES));
    566 	mutex_exit(&di_lock);
    567 
    568 	return (0);
    569 }
    570 
    571 
    572 /*ARGSUSED*/
    573 static int
    574 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
    575 {
    576 	int		rv, error;
    577 	di_off_t	off;
    578 	struct di_all	*all;
    579 	struct di_state	*st;
    580 	int		m = (int)getminor(dev) - DI_NODE_SPECIES;
    581 	major_t		i;
    582 	char		*drv_name;
    583 	size_t		map_size, size;
    584 	struct di_mem	*dcp;
    585 	int		ndi_flags;
    586 
    587 	if (m < 0 || m >= di_max_opens) {
    588 		return (ENXIO);
    589 	}
    590 
    591 	st = di_states[m];
    592 	ASSERT(st != NULL);
    593 
    594 	dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
    595 
    596 	switch (cmd) {
    597 	case DINFOIDENT:
    598 		/*
    599 		 * This is called from di_init to verify that the driver
    600 		 * opened is indeed devinfo. The purpose is to guard against
    601 		 * sending ioctl to an unknown driver in case of an
    602 		 * unresolved major number conflict during bfu.
    603 		 */
    604 		*rvalp = DI_MAGIC;
    605 		return (0);
    606 
    607 	case DINFOLODRV:
    608 		/*
    609 		 * Hold an installed driver and return the result
    610 		 */
    611 		if (DI_UNPRIVILEGED_NODE(m)) {
    612 			/*
    613 			 * Only the fully enabled instances may issue
    614 			 * DINFOLDDRV.
    615 			 */
    616 			return (EACCES);
    617 		}
    618 
    619 		drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
    620 		if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
    621 			kmem_free(drv_name, MAXNAMELEN);
    622 			return (EFAULT);
    623 		}
    624 
    625 		/*
    626 		 * Some 3rd party driver's _init() walks the device tree,
    627 		 * so we load the driver module before configuring driver.
    628 		 */
    629 		i = ddi_name_to_major(drv_name);
    630 		if (ddi_hold_driver(i) == NULL) {
    631 			kmem_free(drv_name, MAXNAMELEN);
    632 			return (ENXIO);
    633 		}
    634 
    635 		ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
    636 
    637 		/*
    638 		 * i_ddi_load_drvconf() below will trigger a reprobe
    639 		 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
    640 		 * needed here.
    641 		 */
    642 		modunload_disable();
    643 		(void) i_ddi_load_drvconf(i);
    644 		(void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
    645 		kmem_free(drv_name, MAXNAMELEN);
    646 		ddi_rele_driver(i);
    647 		rv = i_ddi_devs_attached(i);
    648 		modunload_enable();
    649 
    650 		i_ddi_di_cache_invalidate(KM_SLEEP);
    651 
    652 		return ((rv == DDI_SUCCESS)? 0 : ENXIO);
    653 
    654 	case DINFOUSRLD:
    655 		/*
    656 		 * The case for copying snapshot to userland
    657 		 */
    658 		if (di_setstate(st, IOC_COPY) == -1)
    659 			return (EBUSY);
    660 
    661 		map_size = DI_ALL_PTR(st)->map_size;
    662 		if (map_size == 0) {
    663 			(void) di_setstate(st, IOC_DONE);
    664 			return (EFAULT);
    665 		}
    666 
    667 		/*
    668 		 * copyout the snapshot
    669 		 */
    670 		map_size = (map_size + PAGEOFFSET) & PAGEMASK;
    671 
    672 		/*
    673 		 * Return the map size, so caller may do a sanity
    674 		 * check against the return value of snapshot ioctl()
    675 		 */
    676 		*rvalp = (int)map_size;
    677 
    678 		/*
    679 		 * Copy one chunk at a time
    680 		 */
    681 		off = 0;
    682 		dcp = st->memlist;
    683 		while (map_size) {
    684 			size = dcp->buf_size;
    685 			if (map_size <= size) {
    686 				size = map_size;
    687 			}
    688 
    689 			if (ddi_copyout(di_mem_addr(st, off),
    690 			    (void *)(arg + off), size, mode) != 0) {
    691 				(void) di_setstate(st, IOC_DONE);
    692 				return (EFAULT);
    693 			}
    694 
    695 			map_size -= size;
    696 			off += size;
    697 			dcp = dcp->next;
    698 		}
    699 
    700 		di_freemem(st);
    701 		(void) di_setstate(st, IOC_IDLE);
    702 		return (0);
    703 
    704 	default:
    705 		if ((cmd & ~DIIOC_MASK) != DIIOC) {
    706 			/*
    707 			 * Invalid ioctl command
    708 			 */
    709 			return (ENOTTY);
    710 		}
    711 		/*
    712 		 * take a snapshot
    713 		 */
    714 		st->command = cmd & DIIOC_MASK;
    715 		/*FALLTHROUGH*/
    716 	}
    717 
    718 	/*
    719 	 * Obtain enough memory to hold header + rootpath.  We prevent kernel
    720 	 * memory exhaustion by freeing any previously allocated snapshot and
    721 	 * refusing the operation; otherwise we would be allowing ioctl(),
    722 	 * ioctl(), ioctl(), ..., panic.
    723 	 */
    724 	if (di_setstate(st, IOC_SNAP) == -1)
    725 		return (EBUSY);
    726 
    727 	/*
    728 	 * Initial memlist always holds di_all and the root_path - and
    729 	 * is at least a page and size.
    730 	 */
    731 	size = sizeof (struct di_all) +
    732 	    sizeof (((struct dinfo_io *)(NULL))->root_path);
    733 	if (size < PAGESIZE)
    734 		size = PAGESIZE;
    735 	off = di_checkmem(st, 0, size);
    736 	all = DI_ALL_PTR(st);
    737 	off += sizeof (struct di_all);		/* real length of di_all */
    738 
    739 	all->devcnt = devcnt;
    740 	all->command = st->command;
    741 	all->version = DI_SNAPSHOT_VERSION;
    742 	all->top_vhci_devinfo = 0;		/* filled by build_vhci_list. */
    743 
    744 	/*
    745 	 * Note the endianness in case we need to transport snapshot
    746 	 * over the network.
    747 	 */
    748 #if defined(_LITTLE_ENDIAN)
    749 	all->endianness = DI_LITTLE_ENDIAN;
    750 #else
    751 	all->endianness = DI_BIG_ENDIAN;
    752 #endif
    753 
    754 	/* Copyin ioctl args, store in the snapshot. */
    755 	if (copyinstr((void *)arg, all->root_path,
    756 	    sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
    757 		di_freemem(st);
    758 		(void) di_setstate(st, IOC_IDLE);
    759 		return (EFAULT);
    760 	}
    761 	off += size;				/* real length of root_path */
    762 
    763 	if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
    764 		di_freemem(st);
    765 		(void) di_setstate(st, IOC_IDLE);
    766 		return (EINVAL);
    767 	}
    768 
    769 	error = 0;
    770 	if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
    771 		di_freemem(st);
    772 		(void) di_setstate(st, IOC_IDLE);
    773 		return (error);
    774 	}
    775 
    776 	/*
    777 	 * Only the fully enabled version may force load drivers or read
    778 	 * the parent private data from a driver.
    779 	 */
    780 	if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
    781 	    DI_UNPRIVILEGED_NODE(m)) {
    782 		di_freemem(st);
    783 		(void) di_setstate(st, IOC_IDLE);
    784 		return (EACCES);
    785 	}
    786 
    787 	/* Do we need private data? */
    788 	if (st->command & DINFOPRIVDATA) {
    789 		arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
    790 
    791 #ifdef _MULTI_DATAMODEL
    792 		switch (ddi_model_convert_from(mode & FMODELS)) {
    793 		case DDI_MODEL_ILP32: {
    794 			/*
    795 			 * Cannot copy private data from 64-bit kernel
    796 			 * to 32-bit app
    797 			 */
    798 			di_freemem(st);
    799 			(void) di_setstate(st, IOC_IDLE);
    800 			return (EINVAL);
    801 		}
    802 		case DDI_MODEL_NONE:
    803 			if ((off = di_copyformat(off, st, arg, mode)) == 0) {
    804 				di_freemem(st);
    805 				(void) di_setstate(st, IOC_IDLE);
    806 				return (EFAULT);
    807 			}
    808 			break;
    809 		}
    810 #else /* !_MULTI_DATAMODEL */
    811 		if ((off = di_copyformat(off, st, arg, mode)) == 0) {
    812 			di_freemem(st);
    813 			(void) di_setstate(st, IOC_IDLE);
    814 			return (EFAULT);
    815 		}
    816 #endif /* _MULTI_DATAMODEL */
    817 	}
    818 
    819 	all->top_devinfo = DI_ALIGN(off);
    820 
    821 	/*
    822 	 * For cache lookups we reallocate memory from scratch,
    823 	 * so the value of "all" is no longer valid.
    824 	 */
    825 	all = NULL;
    826 
    827 	if (st->command & DINFOCACHE) {
    828 		*rvalp = di_cache_lookup(st);
    829 	} else if (snapshot_is_cacheable(st)) {
    830 		DI_CACHE_LOCK(di_cache);
    831 		*rvalp = di_cache_update(st);
    832 		DI_CACHE_UNLOCK(di_cache);
    833 	} else
    834 		*rvalp = di_snapshot_and_clean(st);
    835 
    836 	if (*rvalp) {
    837 		DI_ALL_PTR(st)->map_size = *rvalp;
    838 		(void) di_setstate(st, IOC_DONE);
    839 	} else {
    840 		di_freemem(st);
    841 		(void) di_setstate(st, IOC_IDLE);
    842 	}
    843 
    844 	return (0);
    845 }
    846 
    847 /*
    848  * Get a chunk of memory >= size, for the snapshot
    849  */
    850 static void
    851 di_allocmem(struct di_state *st, size_t size)
    852 {
    853 	struct di_mem	*mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP);
    854 
    855 	/*
    856 	 * Round up size to nearest power of 2. If it is less
    857 	 * than st->mem_size, set it to st->mem_size (i.e.,
    858 	 * the mem_size is doubled every time) to reduce the
    859 	 * number of memory allocations.
    860 	 */
    861 	size_t tmp = 1;
    862 	while (tmp < size) {
    863 		tmp <<= 1;
    864 	}
    865 	size = (tmp > st->mem_size) ? tmp : st->mem_size;
    866 
    867 	mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
    868 	mem->buf_size = size;
    869 
    870 	dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
    871 
    872 	if (st->mem_size == 0) {	/* first chunk */
    873 		st->memlist = mem;
    874 	} else {
    875 		/*
    876 		 * locate end of linked list and add a chunk at the end
    877 		 */
    878 		struct di_mem *dcp = st->memlist;
    879 		while (dcp->next != NULL) {
    880 			dcp = dcp->next;
    881 		}
    882 
    883 		dcp->next = mem;
    884 	}
    885 
    886 	st->mem_size += size;
    887 }
    888 
    889 /*
    890  * Copy upto bufsiz bytes of the memlist to buf
    891  */
    892 static void
    893 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
    894 {
    895 	struct di_mem	*dcp;
    896 	size_t		copysz;
    897 
    898 	if (st->mem_size == 0) {
    899 		ASSERT(st->memlist == NULL);
    900 		return;
    901 	}
    902 
    903 	copysz = 0;
    904 	for (dcp = st->memlist; dcp; dcp = dcp->next) {
    905 
    906 		ASSERT(bufsiz > 0);
    907 
    908 		if (bufsiz <= dcp->buf_size)
    909 			copysz = bufsiz;
    910 		else
    911 			copysz = dcp->buf_size;
    912 
    913 		bcopy(dcp->buf, buf, copysz);
    914 
    915 		buf += copysz;
    916 		bufsiz -= copysz;
    917 
    918 		if (bufsiz == 0)
    919 			break;
    920 	}
    921 }
    922 
    923 /*
    924  * Free all memory for the snapshot
    925  */
    926 static void
    927 di_freemem(struct di_state *st)
    928 {
    929 	struct di_mem	*dcp, *tmp;
    930 
    931 	dcmn_err2((CE_CONT, "di_freemem\n"));
    932 
    933 	if (st->mem_size) {
    934 		dcp = st->memlist;
    935 		while (dcp) {	/* traverse the linked list */
    936 			tmp = dcp;
    937 			dcp = dcp->next;
    938 			ddi_umem_free(tmp->cook);
    939 			kmem_free(tmp, sizeof (struct di_mem));
    940 		}
    941 		st->mem_size = 0;
    942 		st->memlist = NULL;
    943 	}
    944 
    945 	ASSERT(st->mem_size == 0);
    946 	ASSERT(st->memlist == NULL);
    947 }
    948 
    949 /*
    950  * Copies cached data to the di_state structure.
    951  * Returns:
    952  *	- size of data copied, on SUCCESS
    953  *	- 0 on failure
    954  */
    955 static int
    956 di_cache2mem(struct di_cache *cache, struct di_state *st)
    957 {
    958 	caddr_t	pa;
    959 
    960 	ASSERT(st->mem_size == 0);
    961 	ASSERT(st->memlist == NULL);
    962 	ASSERT(!servicing_interrupt());
    963 	ASSERT(DI_CACHE_LOCKED(*cache));
    964 
    965 	if (cache->cache_size == 0) {
    966 		ASSERT(cache->cache_data == NULL);
    967 		CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
    968 		return (0);
    969 	}
    970 
    971 	ASSERT(cache->cache_data);
    972 
    973 	di_allocmem(st, cache->cache_size);
    974 
    975 	pa = di_mem_addr(st, 0);
    976 
    977 	ASSERT(pa);
    978 
    979 	/*
    980 	 * Verify that di_allocmem() allocates contiguous memory,
    981 	 * so that it is safe to do straight bcopy()
    982 	 */
    983 	ASSERT(st->memlist != NULL);
    984 	ASSERT(st->memlist->next == NULL);
    985 	bcopy(cache->cache_data, pa, cache->cache_size);
    986 
    987 	return (cache->cache_size);
    988 }
    989 
    990 /*
    991  * Copies a snapshot from di_state to the cache
    992  * Returns:
    993  *	- 0 on failure
    994  *	- size of copied data on success
    995  */
    996 static size_t
    997 di_mem2cache(struct di_state *st, struct di_cache *cache)
    998 {
    999 	size_t	map_size;
   1000 
   1001 	ASSERT(cache->cache_size == 0);
   1002 	ASSERT(cache->cache_data == NULL);
   1003 	ASSERT(!servicing_interrupt());
   1004 	ASSERT(DI_CACHE_LOCKED(*cache));
   1005 
   1006 	if (st->mem_size == 0) {
   1007 		ASSERT(st->memlist == NULL);
   1008 		CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
   1009 		return (0);
   1010 	}
   1011 
   1012 	ASSERT(st->memlist);
   1013 
   1014 	/*
   1015 	 * The size of the memory list may be much larger than the
   1016 	 * size of valid data (map_size). Cache only the valid data
   1017 	 */
   1018 	map_size = DI_ALL_PTR(st)->map_size;
   1019 	if (map_size == 0 || map_size < sizeof (struct di_all) ||
   1020 	    map_size > st->mem_size) {
   1021 		CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
   1022 		return (0);
   1023 	}
   1024 
   1025 	cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
   1026 	cache->cache_size = map_size;
   1027 	di_copymem(st, cache->cache_data, cache->cache_size);
   1028 
   1029 	return (map_size);
   1030 }
   1031 
   1032 /*
   1033  * Make sure there is at least "size" bytes memory left before
   1034  * going on. Otherwise, start on a new chunk.
   1035  */
   1036 static di_off_t
   1037 di_checkmem(struct di_state *st, di_off_t off, size_t size)
   1038 {
   1039 	dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
   1040 	    off, (int)size));
   1041 
   1042 	/*
   1043 	 * di_checkmem() shouldn't be called with a size of zero.
   1044 	 * But in case it is, we want to make sure we return a valid
   1045 	 * offset within the memlist and not an offset that points us
   1046 	 * at the end of the memlist.
   1047 	 */
   1048 	if (size == 0) {
   1049 		dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
   1050 		size = 1;
   1051 	}
   1052 
   1053 	off = DI_ALIGN(off);
   1054 	if ((st->mem_size - off) < size) {
   1055 		off = st->mem_size;
   1056 		di_allocmem(st, size);
   1057 	}
   1058 
   1059 	/* verify that return value is aligned */
   1060 	ASSERT(off == DI_ALIGN(off));
   1061 	return (off);
   1062 }
   1063 
   1064 /*
   1065  * Copy the private data format from ioctl arg.
   1066  * On success, the ending offset is returned. On error 0 is returned.
   1067  */
   1068 static di_off_t
   1069 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
   1070 {
   1071 	di_off_t		size;
   1072 	struct di_priv_data	*priv;
   1073 	struct di_all		*all = DI_ALL_PTR(st);
   1074 
   1075 	dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
   1076 	    off, (void *)arg, mode));
   1077 
   1078 	/*
   1079 	 * Copyin data and check version.
   1080 	 * We only handle private data version 0.
   1081 	 */
   1082 	priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
   1083 	if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
   1084 	    mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
   1085 		kmem_free(priv, sizeof (struct di_priv_data));
   1086 		return (0);
   1087 	}
   1088 
   1089 	/*
   1090 	 * Save di_priv_data copied from userland in snapshot.
   1091 	 */
   1092 	all->pd_version = priv->version;
   1093 	all->n_ppdata = priv->n_parent;
   1094 	all->n_dpdata = priv->n_driver;
   1095 
   1096 	/*
   1097 	 * copyin private data format, modify offset accordingly
   1098 	 */
   1099 	if (all->n_ppdata) {	/* parent private data format */
   1100 		/*