1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * driver for accessing kernel devinfo tree. 29 */ 30 #include <sys/types.h> 31 #include <sys/pathname.h> 32 #include <sys/debug.h> 33 #include <sys/autoconf.h> 34 #include <sys/vmsystm.h> 35 #include <sys/conf.h> 36 #include <sys/file.h> 37 #include <sys/kmem.h> 38 #include <sys/modctl.h> 39 #include <sys/stat.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunldi_impl.h> 43 #include <sys/sunndi.h> 44 #include <sys/esunddi.h> 45 #include <sys/sunmdi.h> 46 #include <sys/ddi_impldefs.h> 47 #include <sys/ndi_impldefs.h> 48 #include <sys/mdi_impldefs.h> 49 #include <sys/devinfo_impl.h> 50 #include <sys/thread.h> 51 #include <sys/modhash.h> 52 #include <sys/bitmap.h> 53 #include <util/qsort.h> 54 #include <sys/disp.h> 55 #include <sys/kobj.h> 56 #include <sys/crc32.h> 57 58 59 #ifdef DEBUG 60 static int di_debug; 61 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 62 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 63 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 64 #else 65 #define dcmn_err(args) /* nothing */ 66 #define dcmn_err2(args) /* nothing */ 67 #define dcmn_err3(args) /* nothing */ 68 #endif 69 70 /* 71 * We partition the space of devinfo minor nodes equally between the full and 72 * unprivileged versions of the driver. The even-numbered minor nodes are the 73 * full version, while the odd-numbered ones are the read-only version. 74 */ 75 static int di_max_opens = 32; 76 77 static int di_prop_dyn = 1; /* enable dynamic property support */ 78 79 #define DI_FULL_PARENT 0 80 #define DI_READONLY_PARENT 1 81 #define DI_NODE_SPECIES 2 82 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 83 84 #define IOC_IDLE 0 /* snapshot ioctl states */ 85 #define IOC_SNAP 1 /* snapshot in progress */ 86 #define IOC_DONE 2 /* snapshot done, but not copied out */ 87 #define IOC_COPY 3 /* copyout in progress */ 88 89 /* 90 * Keep max alignment so we can move snapshot to different platforms. 91 * 92 * NOTE: Most callers should rely on the di_checkmem return value 93 * being aligned, and reestablish *off_p with aligned value, instead 94 * of trying to align size of their allocations: this approach will 95 * minimize memory use. 96 */ 97 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 98 99 /* 100 * To avoid wasting memory, make a linked list of memory chunks. 101 * Size of each chunk is buf_size. 102 */ 103 struct di_mem { 104 struct di_mem *next; /* link to next chunk */ 105 char *buf; /* contiguous kernel memory */ 106 size_t buf_size; /* size of buf in bytes */ 107 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 108 }; 109 110 /* 111 * This is a stack for walking the tree without using recursion. 112 * When the devinfo tree height is above some small size, one 113 * gets watchdog resets on sun4m. 114 */ 115 struct di_stack { 116 void *offset[MAX_TREE_DEPTH]; 117 struct dev_info *dip[MAX_TREE_DEPTH]; 118 int circ[MAX_TREE_DEPTH]; 119 int depth; /* depth of current node to be copied */ 120 }; 121 122 #define TOP_OFFSET(stack) \ 123 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 124 #define TOP_NODE(stack) \ 125 ((stack)->dip[(stack)->depth - 1]) 126 #define PARENT_OFFSET(stack) \ 127 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 128 #define EMPTY_STACK(stack) ((stack)->depth == 0) 129 #define POP_STACK(stack) { \ 130 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 131 (stack)->circ[(stack)->depth - 1]); \ 132 ((stack)->depth--); \ 133 } 134 #define PUSH_STACK(stack, node, off_p) { \ 135 ASSERT(node != NULL); \ 136 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 137 (stack)->dip[(stack)->depth] = (node); \ 138 (stack)->offset[(stack)->depth] = (void *)(off_p); \ 139 ((stack)->depth)++; \ 140 } 141 142 #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0)) 143 144 /* 145 * With devfs, the device tree has no global locks. The device tree is 146 * dynamic and dips may come and go if they are not locked locally. Under 147 * these conditions, pointers are no longer reliable as unique IDs. 148 * Specifically, these pointers cannot be used as keys for hash tables 149 * as the same devinfo structure may be freed in one part of the tree only 150 * to be allocated as the structure for a different device in another 151 * part of the tree. This can happen if DR and the snapshot are 152 * happening concurrently. 153 * The following data structures act as keys for devinfo nodes and 154 * pathinfo nodes. 155 */ 156 157 enum di_ktype { 158 DI_DKEY = 1, 159 DI_PKEY = 2 160 }; 161 162 struct di_dkey { 163 dev_info_t *dk_dip; 164 major_t dk_major; 165 int dk_inst; 166 pnode_t dk_nodeid; 167 }; 168 169 struct di_pkey { 170 mdi_pathinfo_t *pk_pip; 171 char *pk_path_addr; 172 dev_info_t *pk_client; 173 dev_info_t *pk_phci; 174 }; 175 176 struct di_key { 177 enum di_ktype k_type; 178 union { 179 struct di_dkey dkey; 180 struct di_pkey pkey; 181 } k_u; 182 }; 183 184 185 struct i_lnode; 186 187 typedef struct i_link { 188 /* 189 * If a di_link struct representing this i_link struct makes it 190 * into the snapshot, then self will point to the offset of 191 * the di_link struct in the snapshot 192 */ 193 di_off_t self; 194 195 int spec_type; /* block or char access type */ 196 struct i_lnode *src_lnode; /* src i_lnode */ 197 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 198 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 199 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 200 } i_link_t; 201 202 typedef struct i_lnode { 203 /* 204 * If a di_lnode struct representing this i_lnode struct makes it 205 * into the snapshot, then self will point to the offset of 206 * the di_lnode struct in the snapshot 207 */ 208 di_off_t self; 209 210 /* 211 * used for hashing and comparing i_lnodes 212 */ 213 int modid; 214 215 /* 216 * public information describing a link endpoint 217 */ 218 struct di_node *di_node; /* di_node in snapshot */ 219 dev_t devt; /* devt */ 220 221 /* 222 * i_link ptr to links coming into this i_lnode node 223 * (this i_lnode is the target of these i_links) 224 */ 225 i_link_t *link_in; 226 227 /* 228 * i_link ptr to links going out of this i_lnode node 229 * (this i_lnode is the source of these i_links) 230 */ 231 i_link_t *link_out; 232 } i_lnode_t; 233 234 /* 235 * Soft state associated with each instance of driver open. 236 */ 237 static struct di_state { 238 di_off_t mem_size; /* total # bytes in memlist */ 239 struct di_mem *memlist; /* head of memlist */ 240 uint_t command; /* command from ioctl */ 241 int di_iocstate; /* snapshot ioctl state */ 242 mod_hash_t *reg_dip_hash; 243 mod_hash_t *reg_pip_hash; 244 int lnode_count; 245 int link_count; 246 247 mod_hash_t *lnode_hash; 248 mod_hash_t *link_hash; 249 } **di_states; 250 251 static kmutex_t di_lock; /* serialize instance assignment */ 252 253 typedef enum { 254 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 255 DI_ERR, 256 DI_INFO, 257 DI_TRACE, 258 DI_TRACE1, 259 DI_TRACE2 260 } di_cache_debug_t; 261 262 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 263 264 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 265 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 266 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 267 268 /* 269 * Check that whole device tree is being configured as a pre-condition for 270 * cleaning up /etc/devices files. 271 */ 272 #define DEVICES_FILES_CLEANABLE(st) \ 273 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 274 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 275 276 #define CACHE_DEBUG(args) \ 277 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 278 279 typedef struct phci_walk_arg { 280 di_off_t off; 281 struct di_state *st; 282 } phci_walk_arg_t; 283 284 static int di_open(dev_t *, int, int, cred_t *); 285 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 286 static int di_close(dev_t, int, int, cred_t *); 287 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 288 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 289 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 290 291 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 292 static di_off_t di_snapshot_and_clean(struct di_state *); 293 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 294 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 295 static di_off_t di_copynode(struct dev_info *, struct di_stack *, 296 struct di_state *); 297 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 298 struct di_state *); 299 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 300 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 301 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *, 302 struct di_state *, struct dev_info *); 303 static void di_allocmem(struct di_state *, size_t); 304 static void di_freemem(struct di_state *); 305 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 306 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 307 static void *di_mem_addr(struct di_state *, di_off_t); 308 static int di_setstate(struct di_state *, int); 309 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 310 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 311 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 312 struct di_state *, int); 313 static di_off_t di_getlink_data(di_off_t, struct di_state *); 314 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 315 316 static int cache_args_valid(struct di_state *st, int *error); 317 static int snapshot_is_cacheable(struct di_state *st); 318 static int di_cache_lookup(struct di_state *st); 319 static int di_cache_update(struct di_state *st); 320 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 321 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg); 322 static int build_phci_list(dev_info_t *ph_devinfo, void *arg); 323 324 extern int modrootloaded; 325 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); 326 extern void mdi_vhci_walk_phcis(dev_info_t *, 327 int (*)(dev_info_t *, void *), void *); 328 329 330 static struct cb_ops di_cb_ops = { 331 di_open, /* open */ 332 di_close, /* close */ 333 nodev, /* strategy */ 334 nodev, /* print */ 335 nodev, /* dump */ 336 nodev, /* read */ 337 nodev, /* write */ 338 di_ioctl, /* ioctl */ 339 nodev, /* devmap */ 340 nodev, /* mmap */ 341 nodev, /* segmap */ 342 nochpoll, /* poll */ 343 ddi_prop_op, /* prop_op */ 344 NULL, /* streamtab */ 345 D_NEW | D_MP /* Driver compatibility flag */ 346 }; 347 348 static struct dev_ops di_ops = { 349 DEVO_REV, /* devo_rev, */ 350 0, /* refcnt */ 351 di_info, /* info */ 352 nulldev, /* identify */ 353 nulldev, /* probe */ 354 di_attach, /* attach */ 355 di_detach, /* detach */ 356 nodev, /* reset */ 357 &di_cb_ops, /* driver operations */ 358 NULL /* bus operations */ 359 }; 360 361 /* 362 * Module linkage information for the kernel. 363 */ 364 static struct modldrv modldrv = { 365 &mod_driverops, 366 "DEVINFO Driver", 367 &di_ops 368 }; 369 370 static struct modlinkage modlinkage = { 371 MODREV_1, 372 &modldrv, 373 NULL 374 }; 375 376 int 377 _init(void) 378 { 379 int error; 380 381 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 382 383 error = mod_install(&modlinkage); 384 if (error != 0) { 385 mutex_destroy(&di_lock); 386 return (error); 387 } 388 389 return (0); 390 } 391 392 int 393 _info(struct modinfo *modinfop) 394 { 395 return (mod_info(&modlinkage, modinfop)); 396 } 397 398 int 399 _fini(void) 400 { 401 int error; 402 403 error = mod_remove(&modlinkage); 404 if (error != 0) { 405 return (error); 406 } 407 408 mutex_destroy(&di_lock); 409 return (0); 410 } 411 412 static dev_info_t *di_dip; 413 414 /*ARGSUSED*/ 415 static int 416 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 417 { 418 int error = DDI_FAILURE; 419 420 switch (infocmd) { 421 case DDI_INFO_DEVT2DEVINFO: 422 *result = (void *)di_dip; 423 error = DDI_SUCCESS; 424 break; 425 case DDI_INFO_DEVT2INSTANCE: 426 /* 427 * All dev_t's map to the same, single instance. 428 */ 429 *result = (void *)0; 430 error = DDI_SUCCESS; 431 break; 432 default: 433 break; 434 } 435 436 return (error); 437 } 438 439 static int 440 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 441 { 442 int error = DDI_FAILURE; 443 444 switch (cmd) { 445 case DDI_ATTACH: 446 di_states = kmem_zalloc( 447 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 448 449 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 450 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 451 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 452 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 453 kmem_free(di_states, 454 di_max_opens * sizeof (struct di_state *)); 455 ddi_remove_minor_node(dip, NULL); 456 error = DDI_FAILURE; 457 } else { 458 di_dip = dip; 459 ddi_report_dev(dip); 460 461 error = DDI_SUCCESS; 462 } 463 break; 464 default: 465 error = DDI_FAILURE; 466 break; 467 } 468 469 return (error); 470 } 471 472 static int 473 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 474 { 475 int error = DDI_FAILURE; 476 477 switch (cmd) { 478 case DDI_DETACH: 479 ddi_remove_minor_node(dip, NULL); 480 di_dip = NULL; 481 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 482 483 error = DDI_SUCCESS; 484 break; 485 default: 486 error = DDI_FAILURE; 487 break; 488 } 489 490 return (error); 491 } 492 493 /* 494 * Allow multiple opens by tweaking the dev_t such that it looks like each 495 * open is getting a different minor device. Each minor gets a separate 496 * entry in the di_states[] table. Based on the original minor number, we 497 * discriminate opens of the full and read-only nodes. If all of the instances 498 * of the selected minor node are currently open, we return EAGAIN. 499 */ 500 /*ARGSUSED*/ 501 static int 502 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 503 { 504 int m; 505 minor_t minor_parent = getminor(*devp); 506 507 if (minor_parent != DI_FULL_PARENT && 508 minor_parent != DI_READONLY_PARENT) 509 return (ENXIO); 510 511 mutex_enter(&di_lock); 512 513 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 514 if (di_states[m] != NULL) 515 continue; 516 517 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 518 break; /* It's ours. */ 519 } 520 521 if (m >= di_max_opens) { 522 /* 523 * maximum open instance for device reached 524 */ 525 mutex_exit(&di_lock); 526 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 527 return (EAGAIN); 528 } 529 mutex_exit(&di_lock); 530 531 ASSERT(m < di_max_opens); 532 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 533 534 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 535 (void *)curthread, m + DI_NODE_SPECIES)); 536 537 return (0); 538 } 539 540 /*ARGSUSED*/ 541 static int 542 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 543 { 544 struct di_state *st; 545 int m = (int)getminor(dev) - DI_NODE_SPECIES; 546 547 if (m < 0) { 548 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 549 m + DI_NODE_SPECIES); 550 return (ENXIO); 551 } 552 553 st = di_states[m]; 554 ASSERT(m < di_max_opens && st != NULL); 555 556 di_freemem(st); 557 kmem_free(st, sizeof (struct di_state)); 558 559 /* 560 * empty slot in state table 561 */ 562 mutex_enter(&di_lock); 563 di_states[m] = NULL; 564 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 565 (void *)curthread, m + DI_NODE_SPECIES)); 566 mutex_exit(&di_lock); 567 568 return (0); 569 } 570 571 572 /*ARGSUSED*/ 573 static int 574 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 575 { 576 int rv, error; 577 di_off_t off; 578 struct di_all *all; 579 struct di_state *st; 580 int m = (int)getminor(dev) - DI_NODE_SPECIES; 581 major_t i; 582 char *drv_name; 583 size_t map_size, size; 584 struct di_mem *dcp; 585 int ndi_flags; 586 587 if (m < 0 || m >= di_max_opens) { 588 return (ENXIO); 589 } 590 591 st = di_states[m]; 592 ASSERT(st != NULL); 593 594 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 595 596 switch (cmd) { 597 case DINFOIDENT: 598 /* 599 * This is called from di_init to verify that the driver 600 * opened is indeed devinfo. The purpose is to guard against 601 * sending ioctl to an unknown driver in case of an 602 * unresolved major number conflict during bfu. 603 */ 604 *rvalp = DI_MAGIC; 605 return (0); 606 607 case DINFOLODRV: 608 /* 609 * Hold an installed driver and return the result 610 */ 611 if (DI_UNPRIVILEGED_NODE(m)) { 612 /* 613 * Only the fully enabled instances may issue 614 * DINFOLDDRV. 615 */ 616 return (EACCES); 617 } 618 619 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 620 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 621 kmem_free(drv_name, MAXNAMELEN); 622 return (EFAULT); 623 } 624 625 /* 626 * Some 3rd party driver's _init() walks the device tree, 627 * so we load the driver module before configuring driver. 628 */ 629 i = ddi_name_to_major(drv_name); 630 if (ddi_hold_driver(i) == NULL) { 631 kmem_free(drv_name, MAXNAMELEN); 632 return (ENXIO); 633 } 634 635 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 636 637 /* 638 * i_ddi_load_drvconf() below will trigger a reprobe 639 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 640 * needed here. 641 */ 642 modunload_disable(); 643 (void) i_ddi_load_drvconf(i); 644 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 645 kmem_free(drv_name, MAXNAMELEN); 646 ddi_rele_driver(i); 647 rv = i_ddi_devs_attached(i); 648 modunload_enable(); 649 650 i_ddi_di_cache_invalidate(KM_SLEEP); 651 652 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 653 654 case DINFOUSRLD: 655 /* 656 * The case for copying snapshot to userland 657 */ 658 if (di_setstate(st, IOC_COPY) == -1) 659 return (EBUSY); 660 661 map_size = DI_ALL_PTR(st)->map_size; 662 if (map_size == 0) { 663 (void) di_setstate(st, IOC_DONE); 664 return (EFAULT); 665 } 666 667 /* 668 * copyout the snapshot 669 */ 670 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 671 672 /* 673 * Return the map size, so caller may do a sanity 674 * check against the return value of snapshot ioctl() 675 */ 676 *rvalp = (int)map_size; 677 678 /* 679 * Copy one chunk at a time 680 */ 681 off = 0; 682 dcp = st->memlist; 683 while (map_size) { 684 size = dcp->buf_size; 685 if (map_size <= size) { 686 size = map_size; 687 } 688 689 if (ddi_copyout(di_mem_addr(st, off), 690 (void *)(arg + off), size, mode) != 0) { 691 (void) di_setstate(st, IOC_DONE); 692 return (EFAULT); 693 } 694 695 map_size -= size; 696 off += size; 697 dcp = dcp->next; 698 } 699 700 di_freemem(st); 701 (void) di_setstate(st, IOC_IDLE); 702 return (0); 703 704 default: 705 if ((cmd & ~DIIOC_MASK) != DIIOC) { 706 /* 707 * Invalid ioctl command 708 */ 709 return (ENOTTY); 710 } 711 /* 712 * take a snapshot 713 */ 714 st->command = cmd & DIIOC_MASK; 715 /*FALLTHROUGH*/ 716 } 717 718 /* 719 * Obtain enough memory to hold header + rootpath. We prevent kernel 720 * memory exhaustion by freeing any previously allocated snapshot and 721 * refusing the operation; otherwise we would be allowing ioctl(), 722 * ioctl(), ioctl(), ..., panic. 723 */ 724 if (di_setstate(st, IOC_SNAP) == -1) 725 return (EBUSY); 726 727 /* 728 * Initial memlist always holds di_all and the root_path - and 729 * is at least a page and size. 730 */ 731 size = sizeof (struct di_all) + 732 sizeof (((struct dinfo_io *)(NULL))->root_path); 733 if (size < PAGESIZE) 734 size = PAGESIZE; 735 off = di_checkmem(st, 0, size); 736 all = DI_ALL_PTR(st); 737 off += sizeof (struct di_all); /* real length of di_all */ 738 739 all->devcnt = devcnt; 740 all->command = st->command; 741 all->version = DI_SNAPSHOT_VERSION; 742 all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */ 743 744 /* 745 * Note the endianness in case we need to transport snapshot 746 * over the network. 747 */ 748 #if defined(_LITTLE_ENDIAN) 749 all->endianness = DI_LITTLE_ENDIAN; 750 #else 751 all->endianness = DI_BIG_ENDIAN; 752 #endif 753 754 /* Copyin ioctl args, store in the snapshot. */ 755 if (copyinstr((void *)arg, all->root_path, 756 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 757 di_freemem(st); 758 (void) di_setstate(st, IOC_IDLE); 759 return (EFAULT); 760 } 761 off += size; /* real length of root_path */ 762 763 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 764 di_freemem(st); 765 (void) di_setstate(st, IOC_IDLE); 766 return (EINVAL); 767 } 768 769 error = 0; 770 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 771 di_freemem(st); 772 (void) di_setstate(st, IOC_IDLE); 773 return (error); 774 } 775 776 /* 777 * Only the fully enabled version may force load drivers or read 778 * the parent private data from a driver. 779 */ 780 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 781 DI_UNPRIVILEGED_NODE(m)) { 782 di_freemem(st); 783 (void) di_setstate(st, IOC_IDLE); 784 return (EACCES); 785 } 786 787 /* Do we need private data? */ 788 if (st->command & DINFOPRIVDATA) { 789 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 790 791 #ifdef _MULTI_DATAMODEL 792 switch (ddi_model_convert_from(mode & FMODELS)) { 793 case DDI_MODEL_ILP32: { 794 /* 795 * Cannot copy private data from 64-bit kernel 796 * to 32-bit app 797 */ 798 di_freemem(st); 799 (void) di_setstate(st, IOC_IDLE); 800 return (EINVAL); 801 } 802 case DDI_MODEL_NONE: 803 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 804 di_freemem(st); 805 (void) di_setstate(st, IOC_IDLE); 806 return (EFAULT); 807 } 808 break; 809 } 810 #else /* !_MULTI_DATAMODEL */ 811 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 812 di_freemem(st); 813 (void) di_setstate(st, IOC_IDLE); 814 return (EFAULT); 815 } 816 #endif /* _MULTI_DATAMODEL */ 817 } 818 819 all->top_devinfo = DI_ALIGN(off); 820 821 /* 822 * For cache lookups we reallocate memory from scratch, 823 * so the value of "all" is no longer valid. 824 */ 825 all = NULL; 826 827 if (st->command & DINFOCACHE) { 828 *rvalp = di_cache_lookup(st); 829 } else if (snapshot_is_cacheable(st)) { 830 DI_CACHE_LOCK(di_cache); 831 *rvalp = di_cache_update(st); 832 DI_CACHE_UNLOCK(di_cache); 833 } else 834 *rvalp = di_snapshot_and_clean(st); 835 836 if (*rvalp) { 837 DI_ALL_PTR(st)->map_size = *rvalp; 838 (void) di_setstate(st, IOC_DONE); 839 } else { 840 di_freemem(st); 841 (void) di_setstate(st, IOC_IDLE); 842 } 843 844 return (0); 845 } 846 847 /* 848 * Get a chunk of memory >= size, for the snapshot 849 */ 850 static void 851 di_allocmem(struct di_state *st, size_t size) 852 { 853 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP); 854 855 /* 856 * Round up size to nearest power of 2. If it is less 857 * than st->mem_size, set it to st->mem_size (i.e., 858 * the mem_size is doubled every time) to reduce the 859 * number of memory allocations. 860 */ 861 size_t tmp = 1; 862 while (tmp < size) { 863 tmp <<= 1; 864 } 865 size = (tmp > st->mem_size) ? tmp : st->mem_size; 866 867 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 868 mem->buf_size = size; 869 870 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 871 872 if (st->mem_size == 0) { /* first chunk */ 873 st->memlist = mem; 874 } else { 875 /* 876 * locate end of linked list and add a chunk at the end 877 */ 878 struct di_mem *dcp = st->memlist; 879 while (dcp->next != NULL) { 880 dcp = dcp->next; 881 } 882 883 dcp->next = mem; 884 } 885 886 st->mem_size += size; 887 } 888 889 /* 890 * Copy upto bufsiz bytes of the memlist to buf 891 */ 892 static void 893 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 894 { 895 struct di_mem *dcp; 896 size_t copysz; 897 898 if (st->mem_size == 0) { 899 ASSERT(st->memlist == NULL); 900 return; 901 } 902 903 copysz = 0; 904 for (dcp = st->memlist; dcp; dcp = dcp->next) { 905 906 ASSERT(bufsiz > 0); 907 908 if (bufsiz <= dcp->buf_size) 909 copysz = bufsiz; 910 else 911 copysz = dcp->buf_size; 912 913 bcopy(dcp->buf, buf, copysz); 914 915 buf += copysz; 916 bufsiz -= copysz; 917 918 if (bufsiz == 0) 919 break; 920 } 921 } 922 923 /* 924 * Free all memory for the snapshot 925 */ 926 static void 927 di_freemem(struct di_state *st) 928 { 929 struct di_mem *dcp, *tmp; 930 931 dcmn_err2((CE_CONT, "di_freemem\n")); 932 933 if (st->mem_size) { 934 dcp = st->memlist; 935 while (dcp) { /* traverse the linked list */ 936 tmp = dcp; 937 dcp = dcp->next; 938 ddi_umem_free(tmp->cook); 939 kmem_free(tmp, sizeof (struct di_mem)); 940 } 941 st->mem_size = 0; 942 st->memlist = NULL; 943 } 944 945 ASSERT(st->mem_size == 0); 946 ASSERT(st->memlist == NULL); 947 } 948 949 /* 950 * Copies cached data to the di_state structure. 951 * Returns: 952 * - size of data copied, on SUCCESS 953 * - 0 on failure 954 */ 955 static int 956 di_cache2mem(struct di_cache *cache, struct di_state *st) 957 { 958 caddr_t pa; 959 960 ASSERT(st->mem_size == 0); 961 ASSERT(st->memlist == NULL); 962 ASSERT(!servicing_interrupt()); 963 ASSERT(DI_CACHE_LOCKED(*cache)); 964 965 if (cache->cache_size == 0) { 966 ASSERT(cache->cache_data == NULL); 967 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 968 return (0); 969 } 970 971 ASSERT(cache->cache_data); 972 973 di_allocmem(st, cache->cache_size); 974 975 pa = di_mem_addr(st, 0); 976 977 ASSERT(pa); 978 979 /* 980 * Verify that di_allocmem() allocates contiguous memory, 981 * so that it is safe to do straight bcopy() 982 */ 983 ASSERT(st->memlist != NULL); 984 ASSERT(st->memlist->next == NULL); 985 bcopy(cache->cache_data, pa, cache->cache_size); 986 987 return (cache->cache_size); 988 } 989 990 /* 991 * Copies a snapshot from di_state to the cache 992 * Returns: 993 * - 0 on failure 994 * - size of copied data on success 995 */ 996 static size_t 997 di_mem2cache(struct di_state *st, struct di_cache *cache) 998 { 999 size_t map_size; 1000 1001 ASSERT(cache->cache_size == 0); 1002 ASSERT(cache->cache_data == NULL); 1003 ASSERT(!servicing_interrupt()); 1004 ASSERT(DI_CACHE_LOCKED(*cache)); 1005 1006 if (st->mem_size == 0) { 1007 ASSERT(st->memlist == NULL); 1008 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 1009 return (0); 1010 } 1011 1012 ASSERT(st->memlist); 1013 1014 /* 1015 * The size of the memory list may be much larger than the 1016 * size of valid data (map_size). Cache only the valid data 1017 */ 1018 map_size = DI_ALL_PTR(st)->map_size; 1019 if (map_size == 0 || map_size < sizeof (struct di_all) || 1020 map_size > st->mem_size) { 1021 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 1022 return (0); 1023 } 1024 1025 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1026 cache->cache_size = map_size; 1027 di_copymem(st, cache->cache_data, cache->cache_size); 1028 1029 return (map_size); 1030 } 1031 1032 /* 1033 * Make sure there is at least "size" bytes memory left before 1034 * going on. Otherwise, start on a new chunk. 1035 */ 1036 static di_off_t 1037 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1038 { 1039 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1040 off, (int)size)); 1041 1042 /* 1043 * di_checkmem() shouldn't be called with a size of zero. 1044 * But in case it is, we want to make sure we return a valid 1045 * offset within the memlist and not an offset that points us 1046 * at the end of the memlist. 1047 */ 1048 if (size == 0) { 1049 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1050 size = 1; 1051 } 1052 1053 off = DI_ALIGN(off); 1054 if ((st->mem_size - off) < size) { 1055 off = st->mem_size; 1056 di_allocmem(st, size); 1057 } 1058 1059 /* verify that return value is aligned */ 1060 ASSERT(off == DI_ALIGN(off)); 1061 return (off); 1062 } 1063 1064 /* 1065 * Copy the private data format from ioctl arg. 1066 * On success, the ending offset is returned. On error 0 is returned. 1067 */ 1068 static di_off_t 1069 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1070 { 1071 di_off_t size; 1072 struct di_priv_data *priv; 1073 struct di_all *all = DI_ALL_PTR(st); 1074 1075 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1076 off, (void *)arg, mode)); 1077 1078 /* 1079 * Copyin data and check version. 1080 * We only handle private data version 0. 1081 */ 1082 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1083 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1084 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1085 kmem_free(priv, sizeof (struct di_priv_data)); 1086 return (0); 1087 } 1088 1089 /* 1090 * Save di_priv_data copied from userland in snapshot. 1091 */ 1092 all->pd_version = priv->version; 1093 all->n_ppdata = priv->n_parent; 1094 all->n_dpdata = priv->n_driver; 1095 1096 /* 1097 * copyin private data format, modify offset accordingly 1098 */ 1099 if (all->n_ppdata) { /* parent private data format */ 1100 /*