1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This file contains all the routines used when modifying on-disk SPA state. 29 * This includes opening, importing, destroying, exporting a pool, and syncing a 30 * pool. 31 */ 32 33 #include <sys/zfs_context.h> 34 #include <sys/fm/fs/zfs.h> 35 #include <sys/spa_impl.h> 36 #include <sys/zio.h> 37 #include <sys/zio_checksum.h> 38 #include <sys/zio_compress.h> 39 #include <sys/dmu.h> 40 #include <sys/dmu_tx.h> 41 #include <sys/zap.h> 42 #include <sys/zil.h> 43 #include <sys/vdev_impl.h> 44 #include <sys/metaslab.h> 45 #include <sys/uberblock_impl.h> 46 #include <sys/txg.h> 47 #include <sys/avl.h> 48 #include <sys/dmu_traverse.h> 49 #include <sys/dmu_objset.h> 50 #include <sys/unique.h> 51 #include <sys/dsl_pool.h> 52 #include <sys/dsl_dataset.h> 53 #include <sys/dsl_dir.h> 54 #include <sys/dsl_prop.h> 55 #include <sys/dsl_synctask.h> 56 #include <sys/fs/zfs.h> 57 #include <sys/arc.h> 58 #include <sys/callb.h> 59 #include <sys/systeminfo.h> 60 #include <sys/sunddi.h> 61 #include <sys/spa_boot.h> 62 63 #include "zfs_prop.h" 64 #include "zfs_comutil.h" 65 66 int zio_taskq_threads[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 67 /* ISSUE INTR */ 68 { 1, 1 }, /* ZIO_TYPE_NULL */ 69 { 1, 8 }, /* ZIO_TYPE_READ */ 70 { 8, 1 }, /* ZIO_TYPE_WRITE */ 71 { 1, 1 }, /* ZIO_TYPE_FREE */ 72 { 1, 1 }, /* ZIO_TYPE_CLAIM */ 73 { 1, 1 }, /* ZIO_TYPE_IOCTL */ 74 }; 75 76 static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 77 static boolean_t spa_has_active_shared_spare(spa_t *spa); 78 79 /* 80 * ========================================================================== 81 * SPA properties routines 82 * ========================================================================== 83 */ 84 85 /* 86 * Add a (source=src, propname=propval) list to an nvlist. 87 */ 88 static void 89 spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 90 uint64_t intval, zprop_source_t src) 91 { 92 const char *propname = zpool_prop_to_name(prop); 93 nvlist_t *propval; 94 95 VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 96 VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 97 98 if (strval != NULL) 99 VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 100 else 101 VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 102 103 VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 104 nvlist_free(propval); 105 } 106 107 /* 108 * Get property values from the spa configuration. 109 */ 110 static void 111 spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 112 { 113 uint64_t size = spa_get_space(spa); 114 uint64_t used = spa_get_alloc(spa); 115 uint64_t cap, version; 116 zprop_source_t src = ZPROP_SRC_NONE; 117 spa_config_dirent_t *dp; 118 119 ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 120 121 /* 122 * readonly properties 123 */ 124 spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 125 spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 126 spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 127 spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, size - used, src); 128 129 cap = (size == 0) ? 0 : (used * 100 / size); 130 spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 131 132 spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 133 spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 134 spa->spa_root_vdev->vdev_state, src); 135 136 /* 137 * settable properties that are not stored in the pool property object. 138 */ 139 version = spa_version(spa); 140 if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 141 src = ZPROP_SRC_DEFAULT; 142 else 143 src = ZPROP_SRC_LOCAL; 144 spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 145 146 if (spa->spa_root != NULL) 147 spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 148 0, ZPROP_SRC_LOCAL); 149 150 if ((dp = list_head(&spa->spa_config_list)) != NULL) { 151 if (dp->scd_path == NULL) { 152 spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 153 "none", 0, ZPROP_SRC_LOCAL); 154 } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 155 spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 156 dp->scd_path, 0, ZPROP_SRC_LOCAL); 157 } 158 } 159 } 160 161 /* 162 * Get zpool property values. 163 */ 164 int 165 spa_prop_get(spa_t *spa, nvlist_t **nvp) 166 { 167 zap_cursor_t zc; 168 zap_attribute_t za; 169 objset_t *mos = spa->spa_meta_objset; 170 int err; 171 172 VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 173 174 mutex_enter(&spa->spa_props_lock); 175 176 /* 177 * Get properties from the spa config. 178 */ 179 spa_prop_get_config(spa, nvp); 180 181 /* If no pool property object, no more prop to get. */ 182 if (spa->spa_pool_props_object == 0) { 183 mutex_exit(&spa->spa_props_lock); 184 return (0); 185 } 186 187 /* 188 * Get properties from the MOS pool property object. 189 */ 190 for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 191 (err = zap_cursor_retrieve(&zc, &za)) == 0; 192 zap_cursor_advance(&zc)) { 193 uint64_t intval = 0; 194 char *strval = NULL; 195 zprop_source_t src = ZPROP_SRC_DEFAULT; 196 zpool_prop_t prop; 197 198 if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 199 continue; 200 201 switch (za.za_integer_length) { 202 case 8: 203 /* integer property */ 204 if (za.za_first_integer != 205 zpool_prop_default_numeric(prop)) 206 src = ZPROP_SRC_LOCAL; 207 208 if (prop == ZPOOL_PROP_BOOTFS) { 209 dsl_pool_t *dp; 210 dsl_dataset_t *ds = NULL; 211 212 dp = spa_get_dsl(spa); 213 rw_enter(&dp->dp_config_rwlock, RW_READER); 214 if (err = dsl_dataset_hold_obj(dp, 215 za.za_first_integer, FTAG, &ds)) { 216 rw_exit(&dp->dp_config_rwlock); 217 break; 218 } 219 220 strval = kmem_alloc( 221 MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 222 KM_SLEEP); 223 dsl_dataset_name(ds, strval); 224 dsl_dataset_rele(ds, FTAG); 225 rw_exit(&dp->dp_config_rwlock); 226 } else { 227 strval = NULL; 228 intval = za.za_first_integer; 229 } 230 231 spa_prop_add_list(*nvp, prop, strval, intval, src); 232 233 if (strval != NULL) 234 kmem_free(strval, 235 MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 236 237 break; 238 239 case 1: 240 /* string property */ 241 strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 242 err = zap_lookup(mos, spa->spa_pool_props_object, 243 za.za_name, 1, za.za_num_integers, strval); 244 if (err) { 245 kmem_free(strval, za.za_num_integers); 246 break; 247 } 248 spa_prop_add_list(*nvp, prop, strval, 0, src); 249 kmem_free(strval, za.za_num_integers); 250 break; 251 252 default: 253 break; 254 } 255 } 256 zap_cursor_fini(&zc); 257 mutex_exit(&spa->spa_props_lock); 258 out: 259 if (err && err != ENOENT) { 260 nvlist_free(*nvp); 261 *nvp = NULL; 262 return (err); 263 } 264 265 return (0); 266 } 267 268 /* 269 * Validate the given pool properties nvlist and modify the list 270 * for the property values to be set. 271 */ 272 static int 273 spa_prop_validate(spa_t *spa, nvlist_t *props) 274 { 275 nvpair_t *elem; 276 int error = 0, reset_bootfs = 0; 277 uint64_t objnum; 278 279 elem = NULL; 280 while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 281 zpool_prop_t prop; 282 char *propname, *strval; 283 uint64_t intval; 284 objset_t *os; 285 char *slash; 286 287 propname = nvpair_name(elem); 288 289 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 290 return (EINVAL); 291 292 switch (prop) { 293 case ZPOOL_PROP_VERSION: 294 error = nvpair_value_uint64(elem, &intval); 295 if (!error && 296 (intval < spa_version(spa) || intval > SPA_VERSION)) 297 error = EINVAL; 298 break; 299 300 case ZPOOL_PROP_DELEGATION: 301 case ZPOOL_PROP_AUTOREPLACE: 302 case ZPOOL_PROP_LISTSNAPS: 303 error = nvpair_value_uint64(elem, &intval); 304 if (!error && intval > 1) 305 error = EINVAL; 306 break; 307 308 case ZPOOL_PROP_BOOTFS: 309 if (spa_version(spa) < SPA_VERSION_BOOTFS) { 310 error = ENOTSUP; 311 break; 312 } 313 314 /* 315 * Make sure the vdev config is bootable 316 */ 317 if (!vdev_is_bootable(spa->spa_root_vdev)) { 318 error = ENOTSUP; 319 break; 320 } 321 322 reset_bootfs = 1; 323 324 error = nvpair_value_string(elem, &strval); 325 326 if (!error) { 327 uint64_t compress; 328 329 if (strval == NULL || strval[0] == '\0') { 330 objnum = zpool_prop_default_numeric( 331 ZPOOL_PROP_BOOTFS); 332 break; 333 } 334 335 if (error = dmu_objset_open(strval, DMU_OST_ZFS, 336 DS_MODE_USER | DS_MODE_READONLY, &os)) 337 break; 338 339 /* We don't support gzip bootable datasets */ 340 if ((error = dsl_prop_get_integer(strval, 341 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 342 &compress, NULL)) == 0 && 343 !BOOTFS_COMPRESS_VALID(compress)) { 344 error = ENOTSUP; 345 } else { 346 objnum = dmu_objset_id(os); 347 } 348 dmu_objset_close(os); 349 } 350 break; 351 352 case ZPOOL_PROP_FAILUREMODE: 353 error = nvpair_value_uint64(elem, &intval); 354 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 355 intval > ZIO_FAILURE_MODE_PANIC)) 356 error = EINVAL; 357 358 /* 359 * This is a special case which only occurs when 360 * the pool has completely failed. This allows 361 * the user to change the in-core failmode property 362 * without syncing it out to disk (I/Os might 363 * currently be blocked). We do this by returning 364 * EIO to the caller (spa_prop_set) to trick it 365 * into thinking we encountered a property validation 366 * error. 367 */ 368 if (!error && spa_suspended(spa)) { 369 spa->spa_failmode = intval; 370 error = EIO; 371 } 372 break; 373 374 case ZPOOL_PROP_CACHEFILE: 375 if ((error = nvpair_value_string(elem, &strval)) != 0) 376 break; 377 378 if (strval[0] == '\0') 379 break; 380 381 if (strcmp(strval, "none") == 0) 382 break; 383 384 if (strval[0] != '/') { 385 error = EINVAL; 386 break; 387 } 388 389 slash = strrchr(strval, '/'); 390 ASSERT(slash != NULL); 391 392 if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 393 strcmp(slash, "/..") == 0) 394 error = EINVAL; 395 break; 396 } 397 398 if (error) 399 break; 400 } 401 402 if (!error && reset_bootfs) { 403 error = nvlist_remove(props, 404 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 405 406 if (!error) { 407 error = nvlist_add_uint64(props, 408 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 409 } 410 } 411 412 return (error); 413 } 414 415 int 416 spa_prop_set(spa_t *spa, nvlist_t *nvp) 417 { 418 int error; 419 420 if ((error = spa_prop_validate(spa, nvp)) != 0) 421 return (error); 422 423 return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 424 spa, nvp, 3)); 425 } 426 427 /* 428 * If the bootfs property value is dsobj, clear it. 429 */ 430 void 431 spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 432 { 433 if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 434 VERIFY(zap_remove(spa->spa_meta_objset, 435 spa->spa_pool_props_object, 436 zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 437 spa->spa_bootfs = 0; 438 } 439 } 440 441 /* 442 * ========================================================================== 443 * SPA state manipulation (open/create/destroy/import/export) 444 * ========================================================================== 445 */ 446 447 static int 448 spa_error_entry_compare(const void *a, const void *b) 449 { 450 spa_error_entry_t *sa = (spa_error_entry_t *)a; 451 spa_error_entry_t *sb = (spa_error_entry_t *)b; 452 int ret; 453 454 ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 455 sizeof (zbookmark_t)); 456 457 if (ret < 0) 458 return (-1); 459 else if (ret > 0) 460 return (1); 461 else 462 return (0); 463 } 464 465 /* 466 * Utility function which retrieves copies of the current logs and 467 * re-initializes them in the process. 468 */ 469 void 470 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 471 { 472 ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 473 474 bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 475 bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 476 477 avl_create(&spa->spa_errlist_scrub, 478 spa_error_entry_compare, sizeof (spa_error_entry_t), 479 offsetof(spa_error_entry_t, se_avl)); 480 avl_create(&spa->spa_errlist_last, 481 spa_error_entry_compare, sizeof (spa_error_entry_t), 482 offsetof(spa_error_entry_t, se_avl)); 483 } 484 485 /* 486 * Activate an uninitialized pool. 487 */ 488 static void 489 spa_activate(spa_t *spa) 490 { 491 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 492 493 spa->spa_state = POOL_STATE_ACTIVE; 494 495 spa->spa_normal_class = metaslab_class_create(); 496 spa->spa_log_class = metaslab_class_create(); 497 498 for (int t = 0; t < ZIO_TYPES; t++) { 499 for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 500 spa->spa_zio_taskq[t][q] = taskq_create("spa_zio", 501 zio_taskq_threads[t][q], maxclsyspri, 50, 502 INT_MAX, TASKQ_PREPOPULATE); 503 } 504 } 505 506 list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 507 offsetof(vdev_t, vdev_config_dirty_node)); 508 list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 509 offsetof(vdev_t, vdev_state_dirty_node)); 510 511 txg_list_create(&spa->spa_vdev_txg_list, 512 offsetof(struct vdev, vdev_txg_node)); 513 514 avl_create(&spa->spa_errlist_scrub, 515 spa_error_entry_compare, sizeof (spa_error_entry_t), 516 offsetof(spa_error_entry_t, se_avl)); 517 avl_create(&spa->spa_errlist_last, 518 spa_error_entry_compare, sizeof (spa_error_entry_t), 519 offsetof(spa_error_entry_t, se_avl)); 520 } 521 522 /* 523 * Opposite of spa_activate(). 524 */ 525 static void 526 spa_deactivate(spa_t *spa) 527 { 528 ASSERT(spa->spa_sync_on == B_FALSE); 529 ASSERT(spa->spa_dsl_pool == NULL); 530 ASSERT(spa->spa_root_vdev == NULL); 531 532 ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 533 534 txg_list_destroy(&spa->spa_vdev_txg_list); 535 536 list_destroy(&spa->spa_config_dirty_list); 537 list_destroy(&spa->spa_state_dirty_list); 538 539 for (int t = 0; t < ZIO_TYPES; t++) { 540 for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 541 taskq_destroy(spa->spa_zio_taskq[t][q]); 542 spa->spa_zio_taskq[t][q] = NULL; 543 } 544 } 545 546 metaslab_class_destroy(spa->spa_normal_class); 547 spa->spa_normal_class = NULL; 548 549 metaslab_class_destroy(spa->spa_log_class); 550 spa->spa_log_class = NULL; 551 552 /* 553 * If this was part of an import or the open otherwise failed, we may 554 * still have errors left in the queues. Empty them just in case. 555 */ 556 spa_errlog_drain(spa); 557 558 avl_destroy(&spa->spa_errlist_scrub); 559 avl_destroy(&spa->spa_errlist_last); 560 561 spa->spa_state = POOL_STATE_UNINITIALIZED; 562 } 563 564 /* 565 * Verify a pool configuration, and construct the vdev tree appropriately. This 566 * will create all the necessary vdevs in the appropriate layout, with each vdev 567 * in the CLOSED state. This will prep the pool before open/creation/import. 568 * All vdev validation is done by the vdev_alloc() routine. 569 */ 570 static int 571 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 572 uint_t id, int atype) 573 { 574 nvlist_t **child; 575 uint_t c, children; 576 int error; 577 578 if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 579 return (error); 580 581 if ((*vdp)->vdev_ops->vdev_op_leaf) 582 return (0); 583 584 error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 585 &child, &children); 586 587 if (error == ENOENT) 588 return (0); 589 590 if (error) { 591 vdev_free(*vdp); 592 *vdp = NULL; 593 return (EINVAL); 594 } 595 596 for (c = 0; c < children; c++) { 597 vdev_t *vd; 598 if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 599 atype)) != 0) { 600 vdev_free(*vdp); 601 *vdp = NULL; 602 return (error); 603 } 604 } 605 606 ASSERT(*vdp != NULL); 607 608 return (0); 609 } 610 611 /* 612 * Opposite of spa_load(). 613 */ 614 static void 615 spa_unload(spa_t *spa) 616 { 617 int i; 618 619 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 620 621 /* 622 * Stop async tasks. 623 */ 624 spa_async_suspend(spa); 625 626 /* 627 * Stop syncing. 628 */ 629 if (spa->spa_sync_on) { 630 txg_sync_stop(spa->spa_dsl_pool); 631 spa->spa_sync_on = B_FALSE; 632 } 633 634 /* 635 * Wait for any outstanding async I/O to complete. 636 */ 637 mutex_enter(&spa->spa_async_root_lock); 638 while (spa->spa_async_root_count != 0) 639 cv_wait(&spa->spa_async_root_cv, &spa->spa_async_root_lock); 640 mutex_exit(&spa->spa_async_root_lock); 641 642 /* 643 * Drop and purge level 2 cache 644 */ 645 spa_l2cache_drop(spa); 646 647 /* 648 * Close the dsl pool. 649 */ 650 if (spa->spa_dsl_pool) { 651 dsl_pool_close(spa->spa_dsl_pool); 652 spa->spa_dsl_pool = NULL; 653 } 654 655 /* 656 * Close all vdevs. 657 */ 658 if (spa->spa_root_vdev) 659 vdev_free(spa->spa_root_vdev); 660 ASSERT(spa->spa_root_vdev == NULL); 661 662 for (i = 0; i < spa->spa_spares.sav_count; i++) 663 vdev_free(spa->spa_spares.sav_vdevs[i]); 664 if (spa->spa_spares.sav_vdevs) { 665 kmem_free(spa->spa_spares.sav_vdevs, 666 spa->spa_spares.sav_count * sizeof (void *)); 667 spa->spa_spares.sav_vdevs = NULL; 668 } 669 if (spa->spa_spares.sav_config) { 670 nvlist_free(spa->spa_spares.sav_config); 671 spa->spa_spares.sav_config = NULL; 672 } 673 spa->spa_spares.sav_count = 0; 674 675 for (i = 0; i < spa->spa_l2cache.sav_count; i++) 676 vdev_free(spa->spa_l2cache.sav_vdevs[i]); 677 if (spa->spa_l2cache.sav_vdevs) { 678 kmem_free(spa->spa_l2cache.sav_vdevs, 679 spa->spa_l2cache.sav_count * sizeof (void *)); 680 spa->spa_l2cache.sav_vdevs = NULL; 681 } 682 if (spa->spa_l2cache.sav_config) { 683 nvlist_free(spa->spa_l2cache.sav_config); 684 spa->spa_l2cache.sav_config = NULL; 685 } 686 spa->spa_l2cache.sav_count = 0; 687 688 spa->spa_async_suspended = 0; 689 } 690 691 /* 692 * Load (or re-load) the current list of vdevs describing the active spares for 693 * this pool. When this is called, we have some form of basic information in 694 * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 695 * then re-generate a more complete list including status information. 696 */ 697 static void 698 spa_load_spares(spa_t *spa) 699 { 700 nvlist_t **spares; 701 uint_t nspares; 702 int i; 703 vdev_t *vd, *tvd; 704 705 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 706 707 /* 708 * First, close and free any existing spare vdevs. 709 */ 710 for (i = 0; i < spa->spa_spares.sav_count; i++) { 711 vd = spa->spa_spares.sav_vdevs[i]; 712 713 /* Undo the call to spa_activate() below */ 714 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 715 B_FALSE)) != NULL && tvd->vdev_isspare) 716 spa_spare_remove(tvd); 717 vdev_close(vd); 718 vdev_free(vd); 719 } 720 721 if (spa->spa_spares.sav_vdevs) 722 kmem_free(spa->spa_spares.sav_vdevs, 723 spa->spa_spares.sav_count * sizeof (void *)); 724 725 if (spa->spa_spares.sav_config == NULL) 726 nspares = 0; 727 else 728 VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 729 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 730 731 spa->spa_spares.sav_count = (int)nspares; 732 spa->spa_spares.sav_vdevs = NULL; 733 734 if (nspares == 0) 735 return; 736 737 /* 738 * Construct the array of vdevs, opening them to get status in the 739 * process. For each spare, there is potentially two different vdev_t 740 * structures associated with it: one in the list of spares (used only 741 * for basic validation purposes) and one in the active vdev 742 * configuration (if it's spared in). During this phase we open and 743 * validate each vdev on the spare list. If the vdev also exists in the 744 * active configuration, then we also mark this vdev as an active spare. 745 */ 746 spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 747 KM_SLEEP); 748 for (i = 0; i < spa->spa_spares.sav_count; i++) { 749 VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 750 VDEV_ALLOC_SPARE) == 0); 751 ASSERT(vd != NULL); 752 753 spa->spa_spares.sav_vdevs[i] = vd; 754 755 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 756 B_FALSE)) != NULL) { 757 if (!tvd->vdev_isspare) 758 spa_spare_add(tvd); 759 760 /* 761 * We only mark the spare active if we were successfully 762 * able to load the vdev. Otherwise, importing a pool 763 * with a bad active spare would result in strange 764 * behavior, because multiple pool would think the spare 765 * is actively in use. 766 * 767 * There is a vulnerability here to an equally bizarre 768 * circumstance, where a dead active spare is later 769 * brought back to life (onlined or otherwise). Given 770 * the rarity of this scenario, and the extra complexity 771 * it adds, we ignore the possibility. 772 */ 773 if (!vdev_is_dead(tvd)) 774 spa_spare_activate(tvd); 775 } 776 777 vd->vdev_top = vd; 778 779 if (vdev_open(vd) != 0) 780 continue; 781 782 if (vdev_validate_aux(vd) == 0) 783 spa_spare_add(vd); 784 } 785 786 /* 787 * Recompute the stashed list of spares, with status information 788 * this time. 789 */ 790 VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 791 DATA_TYPE_NVLIST_ARRAY) == 0); 792 793 spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 794 KM_SLEEP); 795 for (i = 0; i < spa->spa_spares.sav_count; i++) 796 spares[i] = vdev_config_generate(spa, 797 spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 798 VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 799 ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 800 for (i = 0; i < spa->spa_spares.sav_count; i++) 801 nvlist_free(spares[i]); 802 kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 803 } 804 805 /* 806 * Load (or re-load) the current list of vdevs describing the active l2cache for 807 * this pool. When this is called, we have some form of basic information in 808 * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 809 * then re-generate a more complete list including status information. 810 * Devices which are already active have their details maintained, and are 811 * not re-opened. 812 */ 813 static void 814 spa_load_l2cache(spa_t *spa) 815 { 816 nvlist_t **l2cache; 817 uint_t nl2cache; 818 int i, j, oldnvdevs; 819 uint64_t guid, size; 820 vdev_t *vd, **oldvdevs, **newvdevs; 821 spa_aux_vdev_t *sav = &spa->spa_l2cache; 822 823 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 824 825 if (sav->sav_config != NULL) { 826 VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 827 ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 828 newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 829 } else { 830 nl2cache = 0; 831 } 832 833 oldvdevs = sav->sav_vdevs; 834 oldnvdevs = sav->sav_count; 835 sav->sav_vdevs = NULL; 836 sav->sav_count = 0; 837 838 /* 839 * Process new nvlist of vdevs. 840 */ 841 for (i = 0; i < nl2cache; i++) { 842 VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 843 &guid) == 0); 844 845 newvdevs[i] = NULL; 846 for (j = 0; j < oldnvdevs; j++) { 847 vd = oldvdevs[j]; 848 if (vd != NULL && guid == vd->vdev_guid) { 849 /* 850 * Retain previous vdev for add/remove ops. 851 */ 852 newvdevs[i] = vd; 853 oldvdevs[j] = NULL; 854 break; 855 } 856 } 857 858 if (newvdevs[i] == NULL) { 859 /* 860 * Create new vdev 861 */ 862 VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 863 VDEV_ALLOC_L2CACHE) == 0); 864 ASSERT(vd != NULL); 865 newvdevs[i] = vd; 866 867 /* 868 * Commit this vdev as an l2cache device, 869 * even if it fails to open. 870 */ 871 spa_l2cache_add(vd); 872 873 vd->vdev_top = vd; 874 vd->vdev_aux = sav; 875 876 spa_l2cache_activate(vd); 877 878 if (vdev_open(vd) != 0) 879 continue; 880 881 (void) vdev_validate_aux(vd); 882 883 if (!vdev_is_dead(vd)) { 884 size = vdev_get_rsize(vd); 885 l2arc_add_vdev(spa, vd, 886 VDEV_LABEL_START_SIZE, 887 size - VDEV_LABEL_START_SIZE); 888 } 889 } 890 } 891 892 /* 893 * Purge vdevs that were dropped 894 */ 895 for (i = 0; i < oldnvdevs; i++) { 896 uint64_t pool; 897 898 vd = oldvdevs[i]; 899 if (vd != NULL) { 900 if ((spa_mode & FWRITE) && 901 spa_l2cache_exists(vd->vdev_guid, &pool) && 902 pool != 0ULL && 903 l2arc_vdev_present(vd)) { 904 l2arc_remove_vdev(vd); 905 } 906 (void) vdev_close(vd); 907 spa_l2cache_remove(vd); 908 } 909 } 910 911 if (oldvdevs) 912 kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 913 914 if (sav->sav_config == NULL) 915 goto out; 916 917 sav->sav_vdevs = newvdevs; 918 sav->sav_count = (int)nl2cache; 919 920 /* 921 * Recompute the stashed list of l2cache devices, with status 922 * information this time. 923 */ 924 VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 925 DATA_TYPE_NVLIST_ARRAY) == 0); 926 927 l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 928 for (i = 0; i < sav->sav_count; i++) 929 l2cache[i] = vdev_config_generate(spa, 930 sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 931 VERIFY(nvlist_add_nvlist_array(sav->sav_config, 932 ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 933 out: 934 for (i = 0; i < sav->sav_count; i++) 935 nvlist_free(l2cache[i]); 936 if (sav->sav_count) 937 kmem_free(l2cache, sav->sav_count * sizeof (void *)); 938 } 939 940 static int 941 load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 942 { 943 dmu_buf_t *db; 944 char *packed = NULL; 945 size_t nvsize = 0; 946 int error; 947 *value = NULL; 948 949 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 950 nvsize = *(uint64_t *)db->db_data; 951 dmu_buf_rele(db, FTAG); 952 953 packed = kmem_alloc(nvsize, KM_SLEEP); 954 error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); 955 if (error == 0) 956 error = nvlist_unpack(packed, nvsize, value, 0); 957 kmem_free(packed, nvsize); 958 959 return (error); 960 } 961 962 /* 963 * Checks to see if the given vdev could not be opened, in which case we post a 964 * sysevent to notify the autoreplace code that the device has been removed. 965 */ 966 static void 967 spa_check_removed(vdev_t *vd) 968 { 969 int c; 970 971 for (c = 0; c < vd->vdev_children; c++) 972 spa_check_removed(vd->vdev_child[c]); 973 974 if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 975 zfs_post_autoreplace(vd->vdev_spa, vd); 976 spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 977 } 978 } 979 980 /* 981 * Check for missing log devices 982 */ 983 int 984 spa_check_logs(spa_t *spa) 985 { 986 switch (spa->spa_log_state) { 987 case SPA_LOG_MISSING: 988 /* need to recheck in case slog has been restored */ 989 case SPA_LOG_UNKNOWN: 990 if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 991 DS_FIND_CHILDREN)) { 992 spa->spa_log_state = SPA_LOG_MISSING; 993 return (1); 994 } 995 break; 996 997 case SPA_LOG_CLEAR: 998 (void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL, 999 DS_FIND_CHILDREN); 1000 break; 1001 } 1002 spa->spa_log_state = SPA_LOG_GOOD; 1003 return (0); 1004 } 1005 1006 /* 1007 * Load an existing storage pool, using the pool's builtin spa_config as a 1008 * source of configuration information. 1009 */ 1010 static int 1011 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1012 { 1013 int error = 0; 1014 nvlist_t *nvroot = NULL; 1015 vdev_t *rvd; 1016 uberblock_t *ub = &spa->spa_uberblock; 1017 uint64_t config_cache_txg = spa->spa_config_txg; 1018 uint64_t pool_guid; 1019 uint64_t version; 1020 uint64_t autoreplace = 0; 1021 char *ereport = FM_EREPORT_ZFS_POOL; 1022 1023 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1024 1025 spa->spa_load_state = state; 1026 1027 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1028 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1029 error = EINVAL; 1030 goto out; 1031 } 1032 1033 /* 1034 * Versioning wasn't explicitly added to the label until later, so if 1035 * it's not present treat it as the initial version. 1036 */ 1037 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1038 version = SPA_VERSION_INITIAL; 1039 1040 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1041 &spa->spa_config_txg); 1042 1043 if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1044 spa_guid_exists(pool_guid, 0)) { 1045 error = EEXIST; 1046 goto out; 1047 } 1048 1049 spa->spa_load_guid = pool_guid; 1050 1051 /* 1052 * Parse the configuration into a vdev tree. We exp