1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "@(#)spa.c 1.46 08/01/02 SMI" 28 29 /* 30 * This file contains all the routines used when modifying on-disk SPA state. 31 * This includes opening, importing, destroying, exporting a pool, and syncing a 32 * pool. 33 */ 34 35 #include <sys/zfs_context.h> 36 #include <sys/fm/fs/zfs.h> 37 #include <sys/spa_impl.h> 38 #include <sys/zio.h> 39 #include <sys/zio_checksum.h> 40 #include <sys/zio_compress.h> 41 #include <sys/dmu.h> 42 #include <sys/dmu_tx.h> 43 #include <sys/zap.h> 44 #include <sys/zil.h> 45 #include <sys/vdev_impl.h> 46 #include <sys/metaslab.h> 47 #include <sys/uberblock_impl.h> 48 #include <sys/txg.h> 49 #include <sys/avl.h> 50 #include <sys/dmu_traverse.h> 51 #include <sys/dmu_objset.h> 52 #include <sys/unique.h> 53 #include <sys/dsl_pool.h> 54 #include <sys/dsl_dataset.h> 55 #include <sys/dsl_dir.h> 56 #include <sys/dsl_prop.h> 57 #include <sys/dsl_synctask.h> 58 #include <sys/fs/zfs.h> 59 #include <sys/arc.h> 60 #include <sys/callb.h> 61 #include <sys/systeminfo.h> 62 #include <sys/sunddi.h> 63 64 #include "zfs_prop.h" 65 66 int zio_taskq_threads = 8; 67 68 static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 69 70 /* 71 * ========================================================================== 72 * SPA properties routines 73 * ========================================================================== 74 */ 75 76 /* 77 * Add a (source=src, propname=propval) list to an nvlist. 78 */ 79 static int 80 spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 81 uint64_t intval, zprop_source_t src) 82 { 83 const char *propname = zpool_prop_to_name(prop); 84 nvlist_t *propval; 85 int err = 0; 86 87 if (err = nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP)) 88 return (err); 89 90 if (err = nvlist_add_uint64(propval, ZPROP_SOURCE, src)) 91 goto out; 92 93 if (strval != NULL) { 94 if (err = nvlist_add_string(propval, ZPROP_VALUE, strval)) 95 goto out; 96 } else { 97 if (err = nvlist_add_uint64(propval, ZPROP_VALUE, intval)) 98 goto out; 99 } 100 101 err = nvlist_add_nvlist(nvl, propname, propval); 102 out: 103 nvlist_free(propval); 104 return (err); 105 } 106 107 /* 108 * Get property values from the spa configuration. 109 */ 110 static int 111 spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 112 { 113 uint64_t size = spa_get_space(spa); 114 uint64_t used = spa_get_alloc(spa); 115 uint64_t cap, version; 116 zprop_source_t src = ZPROP_SRC_NONE; 117 int err; 118 char *cachefile; 119 size_t len; 120 121 /* 122 * readonly properties 123 */ 124 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa->spa_name, 125 0, src)) 126 return (err); 127 128 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src)) 129 return (err); 130 131 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src)) 132 return (err); 133 134 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 135 size - used, src)) 136 return (err); 137 138 cap = (size == 0) ? 0 : (used * 100 / size); 139 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src)) 140 return (err); 141 142 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, 143 spa_guid(spa), src)) 144 return (err); 145 146 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 147 spa->spa_root_vdev->vdev_state, src)) 148 return (err); 149 150 /* 151 * settable properties that are not stored in the pool property object. 152 */ 153 version = spa_version(spa); 154 if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 155 src = ZPROP_SRC_DEFAULT; 156 else 157 src = ZPROP_SRC_LOCAL; 158 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, 159 version, src)) 160 return (err); 161 162 if (spa->spa_root != NULL) { 163 src = ZPROP_SRC_LOCAL; 164 if (err = spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, 165 spa->spa_root, 0, src)) 166 return (err); 167 } 168 169 if (spa->spa_config_dir != NULL) { 170 if (strcmp(spa->spa_config_dir, "none") == 0) { 171 err = spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 172 spa->spa_config_dir, 0, ZPROP_SRC_LOCAL); 173 } else { 174 len = strlen(spa->spa_config_dir) + 175 strlen(spa->spa_config_file) + 2; 176 cachefile = kmem_alloc(len, KM_SLEEP); 177 (void) snprintf(cachefile, len, "%s/%s", 178 spa->spa_config_dir, spa->spa_config_file); 179 err = spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 180 cachefile, 0, ZPROP_SRC_LOCAL); 181 kmem_free(cachefile, len); 182 } 183 184 if (err) 185 return (err); 186 } 187 188 return (0); 189 } 190 191 /* 192 * Get zpool property values. 193 */ 194 int 195 spa_prop_get(spa_t *spa, nvlist_t **nvp) 196 { 197 zap_cursor_t zc; 198 zap_attribute_t za; 199 objset_t *mos = spa->spa_meta_objset; 200 int err; 201 202 if (err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)) 203 return (err); 204 205 /* 206 * Get properties from the spa config. 207 */ 208 if (err = spa_prop_get_config(spa, nvp)) 209 goto out; 210 211 mutex_enter(&spa->spa_props_lock); 212 /* If no pool property object, no more prop to get. */ 213 if (spa->spa_pool_props_object == 0) { 214 mutex_exit(&spa->spa_props_lock); 215 return (0); 216 } 217 218 /* 219 * Get properties from the MOS pool property object. 220 */ 221 for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 222 (err = zap_cursor_retrieve(&zc, &za)) == 0; 223 zap_cursor_advance(&zc)) { 224 uint64_t intval = 0; 225 char *strval = NULL; 226 zprop_source_t src = ZPROP_SRC_DEFAULT; 227 zpool_prop_t prop; 228 229 if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 230 continue; 231 232 switch (za.za_integer_length) { 233 case 8: 234 /* integer property */ 235 if (za.za_first_integer != 236 zpool_prop_default_numeric(prop)) 237 src = ZPROP_SRC_LOCAL; 238 239 if (prop == ZPOOL_PROP_BOOTFS) { 240 dsl_pool_t *dp; 241 dsl_dataset_t *ds = NULL; 242 243 dp = spa_get_dsl(spa); 244 rw_enter(&dp->dp_config_rwlock, RW_READER); 245 if (err = dsl_dataset_open_obj(dp, 246 za.za_first_integer, NULL, DS_MODE_NONE, 247 FTAG, &ds)) { 248 rw_exit(&dp->dp_config_rwlock); 249 break; 250 } 251 252 strval = kmem_alloc( 253 MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 254 KM_SLEEP); 255 dsl_dataset_name(ds, strval); 256 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 257 rw_exit(&dp->dp_config_rwlock); 258 } else { 259 strval = NULL; 260 intval = za.za_first_integer; 261 } 262 263 err = spa_prop_add_list(*nvp, prop, strval, 264 intval, src); 265 266 if (strval != NULL) 267 kmem_free(strval, 268 MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 269 270 break; 271 272 case 1: 273 /* string property */ 274 strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 275 err = zap_lookup(mos, spa->spa_pool_props_object, 276 za.za_name, 1, za.za_num_integers, strval); 277 if (err) { 278 kmem_free(strval, za.za_num_integers); 279 break; 280 } 281 err = spa_prop_add_list(*nvp, prop, strval, 0, src); 282 kmem_free(strval, za.za_num_integers); 283 break; 284 285 default: 286 break; 287 } 288 } 289 zap_cursor_fini(&zc); 290 mutex_exit(&spa->spa_props_lock); 291 out: 292 if (err && err != ENOENT) { 293 nvlist_free(*nvp); 294 return (err); 295 } 296 297 return (0); 298 } 299 300 /* 301 * Validate the given pool properties nvlist and modify the list 302 * for the property values to be set. 303 */ 304 static int 305 spa_prop_validate(spa_t *spa, nvlist_t *props) 306 { 307 nvpair_t *elem; 308 int error = 0, reset_bootfs = 0; 309 uint64_t objnum; 310 311 elem = NULL; 312 while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 313 zpool_prop_t prop; 314 char *propname, *strval; 315 uint64_t intval; 316 vdev_t *rvdev; 317 char *vdev_type; 318 objset_t *os; 319 char *slash; 320 321 propname = nvpair_name(elem); 322 323 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 324 return (EINVAL); 325 326 switch (prop) { 327 case ZPOOL_PROP_VERSION: 328 error = nvpair_value_uint64(elem, &intval); 329 if (!error && 330 (intval < spa_version(spa) || intval > SPA_VERSION)) 331 error = EINVAL; 332 break; 333 334 case ZPOOL_PROP_DELEGATION: 335 case ZPOOL_PROP_AUTOREPLACE: 336 error = nvpair_value_uint64(elem, &intval); 337 if (!error && intval > 1) 338 error = EINVAL; 339 break; 340 341 case ZPOOL_PROP_BOOTFS: 342 if (spa_version(spa) < SPA_VERSION_BOOTFS) { 343 error = ENOTSUP; 344 break; 345 } 346 347 /* 348 * A bootable filesystem can not be on a RAIDZ pool 349 * nor a striped pool with more than 1 device. 350 */ 351 rvdev = spa->spa_root_vdev; 352 vdev_type = 353 rvdev->vdev_child[0]->vdev_ops->vdev_op_type; 354 if (rvdev->vdev_children > 1 || 355 strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 || 356 strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) { 357 error = ENOTSUP; 358 break; 359 } 360 361 reset_bootfs = 1; 362 363 error = nvpair_value_string(elem, &strval); 364 365 if (!error) { 366 if (strval == NULL || strval[0] == '\0') { 367 objnum = zpool_prop_default_numeric( 368 ZPOOL_PROP_BOOTFS); 369 break; 370 } 371 372 if (error = dmu_objset_open(strval, DMU_OST_ZFS, 373 DS_MODE_STANDARD | DS_MODE_READONLY, &os)) 374 break; 375 objnum = dmu_objset_id(os); 376 dmu_objset_close(os); 377 } 378 break; 379 case ZPOOL_PROP_FAILUREMODE: 380 error = nvpair_value_uint64(elem, &intval); 381 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 382 intval > ZIO_FAILURE_MODE_PANIC)) 383 error = EINVAL; 384 385 /* 386 * This is a special case which only occurs when 387 * the pool has completely failed. This allows 388 * the user to change the in-core failmode property 389 * without syncing it out to disk (I/Os might 390 * currently be blocked). We do this by returning 391 * EIO to the caller (spa_prop_set) to trick it 392 * into thinking we encountered a property validation 393 * error. 394 */ 395 if (!error && spa_state(spa) == POOL_STATE_IO_FAILURE) { 396 spa->spa_failmode = intval; 397 error = EIO; 398 } 399 break; 400 401 case ZPOOL_PROP_CACHEFILE: 402 if ((error = nvpair_value_string(elem, &strval)) != 0) 403 break; 404 405 if (strval[0] == '\0') 406 break; 407 408 if (strcmp(strval, "none") == 0) 409 break; 410 411 if (strval[0] != '/') { 412 error = EINVAL; 413 break; 414 } 415 416 slash = strrchr(strval, '/'); 417 ASSERT(slash != NULL); 418 419 if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 420 strcmp(slash, "/..") == 0) 421 error = EINVAL; 422 break; 423 } 424 425 if (error) 426 break; 427 } 428 429 if (!error && reset_bootfs) { 430 error = nvlist_remove(props, 431 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 432 433 if (!error) { 434 error = nvlist_add_uint64(props, 435 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 436 } 437 } 438 439 return (error); 440 } 441 442 int 443 spa_prop_set(spa_t *spa, nvlist_t *nvp) 444 { 445 int error; 446 447 if ((error = spa_prop_validate(spa, nvp)) != 0) 448 return (error); 449 450 return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 451 spa, nvp, 3)); 452 } 453 454 /* 455 * If the bootfs property value is dsobj, clear it. 456 */ 457 void 458 spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 459 { 460 if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 461 VERIFY(zap_remove(spa->spa_meta_objset, 462 spa->spa_pool_props_object, 463 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 464 spa->spa_bootfs = 0; 465 } 466 } 467 468 /* 469 * ========================================================================== 470 * SPA state manipulation (open/create/destroy/import/export) 471 * ========================================================================== 472 */ 473 474 static int 475 spa_error_entry_compare(const void *a, const void *b) 476 { 477 spa_error_entry_t *sa = (spa_error_entry_t *)a; 478 spa_error_entry_t *sb = (spa_error_entry_t *)b; 479 int ret; 480 481 ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 482 sizeof (zbookmark_t)); 483 484 if (ret < 0) 485 return (-1); 486 else if (ret > 0) 487 return (1); 488 else 489 return (0); 490 } 491 492 /* 493 * Utility function which retrieves copies of the current logs and 494 * re-initializes them in the process. 495 */ 496 void 497 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 498 { 499 ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 500 501 bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 502 bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 503 504 avl_create(&spa->spa_errlist_scrub, 505 spa_error_entry_compare, sizeof (spa_error_entry_t), 506 offsetof(spa_error_entry_t, se_avl)); 507 avl_create(&spa->spa_errlist_last, 508 spa_error_entry_compare, sizeof (spa_error_entry_t), 509 offsetof(spa_error_entry_t, se_avl)); 510 } 511 512 /* 513 * Activate an uninitialized pool. 514 */ 515 static void 516 spa_activate(spa_t *spa) 517 { 518 int t; 519 520 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 521 522 spa->spa_state = POOL_STATE_ACTIVE; 523 524 spa->spa_normal_class = metaslab_class_create(); 525 spa->spa_log_class = metaslab_class_create(); 526 527 for (t = 0; t < ZIO_TYPES; t++) { 528 spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue", 529 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 530 TASKQ_PREPOPULATE); 531 spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr", 532 zio_taskq_threads, maxclsyspri, 50, INT_MAX, 533 TASKQ_PREPOPULATE); 534 } 535 536 list_create(&spa->spa_dirty_list, sizeof (vdev_t), 537 offsetof(vdev_t, vdev_dirty_node)); 538 list_create(&spa->spa_zio_list, sizeof (zio_t), 539 offsetof(zio_t, zio_link_node)); 540 541 txg_list_create(&spa->spa_vdev_txg_list, 542 offsetof(struct vdev, vdev_txg_node)); 543 544 avl_create(&spa->spa_errlist_scrub, 545 spa_error_entry_compare, sizeof (spa_error_entry_t), 546 offsetof(spa_error_entry_t, se_avl)); 547 avl_create(&spa->spa_errlist_last, 548 spa_error_entry_compare, sizeof (spa_error_entry_t), 549 offsetof(spa_error_entry_t, se_avl)); 550 } 551 552 /* 553 * Opposite of spa_activate(). 554 */ 555 static void 556 spa_deactivate(spa_t *spa) 557 { 558 int t; 559 560 ASSERT(spa->spa_sync_on == B_FALSE); 561 ASSERT(spa->spa_dsl_pool == NULL); 562 ASSERT(spa->spa_root_vdev == NULL); 563 564 ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 565 566 txg_list_destroy(&spa->spa_vdev_txg_list); 567 568 list_destroy(&spa->spa_dirty_list); 569 list_destroy(&spa->spa_zio_list); 570 571 for (t = 0; t < ZIO_TYPES; t++) { 572 taskq_destroy(spa->spa_zio_issue_taskq[t]); 573 taskq_destroy(spa->spa_zio_intr_taskq[t]); 574 spa->spa_zio_issue_taskq[t] = NULL; 575 spa->spa_zio_intr_taskq[t] = NULL; 576 } 577 578 metaslab_class_destroy(spa->spa_normal_class); 579 spa->spa_normal_class = NULL; 580 581 metaslab_class_destroy(spa->spa_log_class); 582 spa->spa_log_class = NULL; 583 584 /* 585 * If this was part of an import or the open otherwise failed, we may 586 * still have errors left in the queues. Empty them just in case. 587 */ 588 spa_errlog_drain(spa); 589 590 avl_destroy(&spa->spa_errlist_scrub); 591 avl_destroy(&spa->spa_errlist_last); 592 593 spa->spa_state = POOL_STATE_UNINITIALIZED; 594 } 595 596 /* 597 * Verify a pool configuration, and construct the vdev tree appropriately. This 598 * will create all the necessary vdevs in the appropriate layout, with each vdev 599 * in the CLOSED state. This will prep the pool before open/creation/import. 600 * All vdev validation is done by the vdev_alloc() routine. 601 */ 602 static int 603 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 604 uint_t id, int atype) 605 { 606 nvlist_t **child; 607 uint_t c, children; 608 int error; 609 610 if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 611 return (error); 612 613 if ((*vdp)->vdev_ops->vdev_op_leaf) 614 return (0); 615 616 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 617 &child, &children) != 0) { 618 vdev_free(*vdp); 619 *vdp = NULL; 620 return (EINVAL); 621 } 622 623 for (c = 0; c < children; c++) { 624 vdev_t *vd; 625 if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 626 atype)) != 0) { 627 vdev_free(*vdp); 628 *vdp = NULL; 629 return (error); 630 } 631 } 632 633 ASSERT(*vdp != NULL); 634 635 return (0); 636 } 637 638 /* 639 * Opposite of spa_load(). 640 */ 641 static void 642 spa_unload(spa_t *spa) 643 { 644 int i; 645 646 /* 647 * Stop async tasks. 648 */ 649 spa_async_suspend(spa); 650 651 /* 652 * Stop syncing. 653 */ 654 if (spa->spa_sync_on) { 655 txg_sync_stop(spa->spa_dsl_pool); 656 spa->spa_sync_on = B_FALSE; 657 } 658 659 /* 660 * Wait for any outstanding prefetch I/O to complete. 661 */ 662 spa_config_enter(spa, RW_WRITER, FTAG); 663 spa_config_exit(spa, FTAG); 664 665 /* 666 * Drop and purge level 2 cache 667 */ 668 spa_l2cache_drop(spa); 669 670 /* 671 * Close the dsl pool. 672 */ 673 if (spa->spa_dsl_pool) { 674 dsl_pool_close(spa->spa_dsl_pool); 675 spa->spa_dsl_pool = NULL; 676 } 677 678 /* 679 * Close all vdevs. 680 */ 681 if (spa->spa_root_vdev) 682 vdev_free(spa->spa_root_vdev); 683 ASSERT(spa->spa_root_vdev == NULL); 684 685 for (i = 0; i < spa->spa_spares.sav_count; i++) 686 vdev_free(spa->spa_spares.sav_vdevs[i]); 687 if (spa->spa_spares.sav_vdevs) { 688 kmem_free(spa->spa_spares.sav_vdevs, 689 spa->spa_spares.sav_count * sizeof (void *)); 690 spa->spa_spares.sav_vdevs = NULL; 691 } 692 if (spa->spa_spares.sav_config) { 693 nvlist_free(spa->spa_spares.sav_config); 694 spa->spa_spares.sav_config = NULL; 695 } 696 697 for (i = 0; i < spa->spa_l2cache.sav_count; i++) 698 vdev_free(spa->spa_l2cache.sav_vdevs[i]); 699 if (spa->spa_l2cache.sav_vdevs) { 700 kmem_free(spa->spa_l2cache.sav_vdevs, 701 spa->spa_l2cache.sav_count * sizeof (void *)); 702 spa->spa_l2cache.sav_vdevs = NULL; 703 } 704 if (spa->spa_l2cache.sav_config) { 705 nvlist_free(spa->spa_l2cache.sav_config); 706 spa->spa_l2cache.sav_config = NULL; 707 } 708 709 spa->spa_async_suspended = 0; 710 } 711 712 /* 713 * Load (or re-load) the current list of vdevs describing the active spares for 714 * this pool. When this is called, we have some form of basic information in 715 * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 716 * then re-generate a more complete list including status information. 717 */ 718 static void 719 spa_load_spares(spa_t *spa) 720 { 721 nvlist_t **spares; 722 uint_t nspares; 723 int i; 724 vdev_t *vd, *tvd; 725 726 /* 727 * First, close and free any existing spare vdevs. 728 */ 729 for (i = 0; i < spa->spa_spares.sav_count; i++) { 730 vd = spa->spa_spares.sav_vdevs[i]; 731 732 /* Undo the call to spa_activate() below */ 733 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL && 734 tvd->vdev_isspare) 735 spa_spare_remove(tvd); 736 vdev_close(vd); 737 vdev_free(vd); 738 } 739 740 if (spa->spa_spares.sav_vdevs) 741 kmem_free(spa->spa_spares.sav_vdevs, 742 spa->spa_spares.sav_count * sizeof (void *)); 743 744 if (spa->spa_spares.sav_config == NULL) 745 nspares = 0; 746 else 747 VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 748 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 749 750 spa->spa_spares.sav_count = (int)nspares; 751 spa->spa_spares.sav_vdevs = NULL; 752 753 if (nspares == 0) 754 return; 755 756 /* 757 * Construct the array of vdevs, opening them to get status in the 758 * process. For each spare, there is potentially two different vdev_t 759 * structures associated with it: one in the list of spares (used only 760 * for basic validation purposes) and one in the active vdev 761 * configuration (if it's spared in). During this phase we open and 762 * validate each vdev on the spare list. If the vdev also exists in the 763 * active configuration, then we also mark this vdev as an active spare. 764 */ 765 spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 766 KM_SLEEP); 767 for (i = 0; i < spa->spa_spares.sav_count; i++) { 768 VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 769 VDEV_ALLOC_SPARE) == 0); 770 ASSERT(vd != NULL); 771 772 spa->spa_spares.sav_vdevs[i] = vd; 773 774 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) { 775 if (!tvd->vdev_isspare) 776 spa_spare_add(tvd); 777 778 /* 779 * We only mark the spare active if we were successfully 780 * able to load the vdev. Otherwise, importing a pool 781 * with a bad active spare would result in strange 782 * behavior, because multiple pool would think the spare 783 * is actively in use. 784 * 785 * There is a vulnerability here to an equally bizarre 786 * circumstance, where a dead active spare is later 787 * brought back to life (onlined or otherwise). Given 788 * the rarity of this scenario, and the extra complexity 789 * it adds, we ignore the possibility. 790 */ 791 if (!vdev_is_dead(tvd)) 792 spa_spare_activate(tvd); 793 } 794 795 if (vdev_open(vd) != 0) 796 continue; 797 798 vd->vdev_top = vd; 799 if (vdev_validate_aux(vd) == 0) 800 spa_spare_add(vd); 801 } 802 803 /* 804 * Recompute the stashed list of spares, with status information 805 * this time. 806 */ 807 VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 808 DATA_TYPE_NVLIST_ARRAY) == 0); 809 810 spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 811 KM_SLEEP); 812 for (i = 0; i < spa->spa_spares.sav_count; i++) 813 spares[i] = vdev_config_generate(spa, 814 spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 815 VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 816 ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 817 for (i = 0; i < spa->spa_spares.sav_count; i++) 818 nvlist_free(spares[i]); 819 kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 820 } 821 822 /* 823 * Load (or re-load) the current list of vdevs describing the active l2cache for 824 * this pool. When this is called, we have some form of basic information in 825 * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 826 * then re-generate a more complete list including status information. 827 * Devices which are already active have their details maintained, and are 828 * not re-opened. 829 */ 830 static void 831 spa_load_l2cache(spa_t *spa) 832 { 833 nvlist_t **l2cache; 834 uint_t nl2cache; 835 int i, j, oldnvdevs; 836 uint64_t guid; 837 vdev_t *vd, **oldvdevs, **newvdevs; 838 spa_aux_vdev_t *sav = &spa->spa_l2cache; 839 840 if (sav->sav_config != NULL) { 841 VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 842 ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 843 newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 844 } else { 845 nl2cache = 0; 846 } 847 848 oldvdevs = sav->sav_vdevs; 849 oldnvdevs = sav->sav_count; 850 sav->sav_vdevs = NULL; 851 sav->sav_count = 0; 852 853 /* 854 * Process new nvlist of vdevs. 855 */ 856 for (i = 0; i < nl2cache; i++) { 857 VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 858 &guid) == 0); 859 860 newvdevs[i] = NULL; 861 for (j = 0; j < oldnvdevs; j++) { 862 vd = oldvdevs[j]; 863 if (vd != NULL && guid == vd->vdev_guid) { 864 /* 865 * Retain previous vdev for add/remove ops. 866 */ 867 newvdevs[i] = vd; 868 oldvdevs[j] = NULL; 869 break; 870 } 871 } 872 873 if (newvdevs[i] == NULL) { 874 /* 875 * Create new vdev 876 */ 877 VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 878 VDEV_ALLOC_L2CACHE) == 0); 879 ASSERT(vd != NULL); 880 newvdevs[i] = vd; 881 882 /* 883 * Commit this vdev as an l2cache device, 884 * even if it fails to open. 885 */ 886 spa_l2cache_add(vd); 887 888 if (vdev_open(vd) != 0) 889 continue; 890 891 vd->vdev_top = vd; 892 (void) vdev_validate_aux(vd); 893 894 if (!vdev_is_dead(vd)) { 895 uint64_t size; 896 size = vdev_get_rsize(vd); 897 ASSERT3U(size, >, 0); 898 if (spa_mode & FWRITE) { 899 l2arc_add_vdev(spa, vd, 900 VDEV_LABEL_START_SIZE, 901 size - VDEV_LABEL_START_SIZE); 902 } 903 spa_l2cache_activate(vd); 904 } 905 } 906 } 907 908 /* 909 * Purge vdevs that were dropped 910 */ 911 for (i = 0; i < oldnvdevs; i++) { 912 uint64_t pool; 913 914 vd = oldvdevs[i]; 915 if (vd != NULL) { 916 if (spa_mode & FWRITE && 917 spa_l2cache_exists(vd->vdev_guid, &pool) && 918 pool != 0ULL) { 919 l2arc_remove_vdev(vd); 920 } 921 (void) vdev_close(vd); 922 spa_l2cache_remove(vd); 923 } 924 } 925 926 if (oldvdevs) 927 kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 928 929 if (sav->sav_config == NULL) 930 goto out; 931 932 sav->sav_vdevs = newvdevs; 933 sav->sav_count = (int)nl2cache; 934 935 /* 936 * Recompute the stashed list of l2cache devices, with status 937 * information this time. 938 */ 939 VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 940 DATA_TYPE_NVLIST_ARRAY) == 0); 941 942 l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 943 for (i = 0; i < sav->sav_count; i++) 944 l2cache[i] = vdev_config_generate(spa, 945 sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 946 VERIFY(nvlist_add_nvlist_array(sav->sav_config, 947 ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 948 out: 949 for (i = 0; i < sav->sav_count; i++) 950 nvlist_free(l2cache[i]); 951 if (sav->sav_count) 952 kmem_free(l2cache, sav->sav_count * sizeof (void *)); 953 } 954 955 static int 956 load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 957 { 958 dmu_buf_t *db; 959 char *packed = NULL; 960 size_t nvsize = 0; 961 int error; 962 *value = NULL; 963 964 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 965 nvsize = *(uint64_t *)db->db_data; 966 dmu_buf_rele(db, FTAG); 967 968 packed = kmem_alloc(nvsize, KM_SLEEP); 969 error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); 970 if (error == 0) 971 error = nvlist_unpack(packed, nvsize, value, 0); 972 kmem_free(packed, nvsize); 973 974 return (error); 975 } 976 977 /* 978 * Checks to see if the given vdev could not be opened, in which case we post a 979 * sysevent to notify the autoreplace code that the device has been removed. 980 */ 981 static void 982 spa_check_removed(vdev_t *vd) 983 { 984 int c; 985 986 for (c = 0; c < vd->vdev_children; c++) 987 spa_check_removed(vd->vdev_child[c]); 988 989 if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 990 zfs_post_autoreplace(vd->vdev_spa, vd); 991 spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 992 } 993 } 994 995 /* 996 * Load an existing storage pool, using the pool's builtin spa_config as a 997 * source of configuration information. 998 */ 999 static int 1000 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1001 { 1002 int error = 0; 1003 nvlist_t *nvroot = NULL; 1004 vdev_t *rvd; 1005 uberblock_t *ub = &spa->spa_uberblock; 1006 uint64_t config_cache_txg = spa->spa_config_txg; 1007 uint64_t pool_guid; 1008 uint64_t version; 1009 zio_t *zio; 1010 uint64_t autoreplace = 0; 1011 1012 spa->spa_load_state = state; 1013 1014 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1015 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1016 error = EINVAL; 1017 goto out; 1018 } 1019 1020 /* 1021 * Versioning wasn't explicitly added to the label until later, so if 1022 * it's not present treat it as the initial version. 1023 */ 1024 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1025 version = SPA_VERSION_INITIAL; 1026 1027 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1028 &spa->spa_config_txg); 1029 1030 if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1031 spa_guid_exists(pool_guid, 0)) { 1032 error = EEXIST; 1033 goto out; 1034 } 1035 1036 spa->spa_load_guid = pool_guid; 1037 1038 /* 1039 * Parse the configuration into a vdev tree. We explicitly set the 1040 * value that will be returned by spa_version() since parsing the 1041 * configuration requires knowing the version number. 1042 */ 1043 spa_config_enter(spa, RW_WRITER, FTAG); 1044 spa->spa_ubsync.ub_version = version; 1045 error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 1046 spa_config_exit(spa, FTAG); 1047 1048 if (error != 0) 1049 goto out; 1050 1051 ASSERT(spa->spa_root_vdev == rvd); 1052 ASSERT(spa_guid(spa) == pool_guid); 1053 1054 /* 1055 * Try to open all vdevs, loading each label in the process. 1056 */ 1057 error = vdev_open(rvd); 1058 if (error != 0) 1059 goto