Home | History | Annotate | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * This file contains all the routines used when modifying on-disk SPA state.
     29  * This includes opening, importing, destroying, exporting a pool, and syncing a
     30  * pool.
     31  */
     32 
     33 #include <sys/zfs_context.h>
     34 #include <sys/fm/fs/zfs.h>
     35 #include <sys/spa_impl.h>
     36 #include <sys/zio.h>
     37 #include <sys/zio_checksum.h>
     38 #include <sys/zio_compress.h>
     39 #include <sys/dmu.h>
     40 #include <sys/dmu_tx.h>
     41 #include <sys/zap.h>
     42 #include <sys/zil.h>
     43 #include <sys/vdev_impl.h>
     44 #include <sys/metaslab.h>
     45 #include <sys/uberblock_impl.h>
     46 #include <sys/txg.h>
     47 #include <sys/avl.h>
     48 #include <sys/dmu_traverse.h>
     49 #include <sys/dmu_objset.h>
     50 #include <sys/unique.h>
     51 #include <sys/dsl_pool.h>
     52 #include <sys/dsl_dataset.h>
     53 #include <sys/dsl_dir.h>
     54 #include <sys/dsl_prop.h>
     55 #include <sys/dsl_synctask.h>
     56 #include <sys/fs/zfs.h>
     57 #include <sys/arc.h>
     58 #include <sys/callb.h>
     59 #include <sys/systeminfo.h>
     60 #include <sys/sunddi.h>
     61 #include <sys/spa_boot.h>
     62 
     63 #include "zfs_prop.h"
     64 #include "zfs_comutil.h"
     65 
     66 int zio_taskq_threads[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
     67 	/*	ISSUE	INTR					*/
     68 	{	1,	1	},	/* ZIO_TYPE_NULL	*/
     69 	{	1,	8	},	/* ZIO_TYPE_READ	*/
     70 	{	8,	1	},	/* ZIO_TYPE_WRITE	*/
     71 	{	1,	1	},	/* ZIO_TYPE_FREE	*/
     72 	{	1,	1	},	/* ZIO_TYPE_CLAIM	*/
     73 	{	1,	1	},	/* ZIO_TYPE_IOCTL	*/
     74 };
     75 
     76 static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
     77 static boolean_t spa_has_active_shared_spare(spa_t *spa);
     78 
     79 /*
     80  * ==========================================================================
     81  * SPA properties routines
     82  * ==========================================================================
     83  */
     84 
     85 /*
     86  * Add a (source=src, propname=propval) list to an nvlist.
     87  */
     88 static void
     89 spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
     90     uint64_t intval, zprop_source_t src)
     91 {
     92 	const char *propname = zpool_prop_to_name(prop);
     93 	nvlist_t *propval;
     94 
     95 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
     96 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
     97 
     98 	if (strval != NULL)
     99 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
    100 	else
    101 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
    102 
    103 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
    104 	nvlist_free(propval);
    105 }
    106 
    107 /*
    108  * Get property values from the spa configuration.
    109  */
    110 static void
    111 spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
    112 {
    113 	uint64_t size = spa_get_space(spa);
    114 	uint64_t used = spa_get_alloc(spa);
    115 	uint64_t cap, version;
    116 	zprop_source_t src = ZPROP_SRC_NONE;
    117 	spa_config_dirent_t *dp;
    118 
    119 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
    120 
    121 	/*
    122 	 * readonly properties
    123 	 */
    124 	spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
    125 	spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
    126 	spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src);
    127 	spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, size - used, src);
    128 
    129 	cap = (size == 0) ? 0 : (used * 100 / size);
    130 	spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
    131 
    132 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
    133 	spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
    134 	    spa->spa_root_vdev->vdev_state, src);
    135 
    136 	/*
    137 	 * settable properties that are not stored in the pool property object.
    138 	 */
    139 	version = spa_version(spa);
    140 	if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
    141 		src = ZPROP_SRC_DEFAULT;
    142 	else
    143 		src = ZPROP_SRC_LOCAL;
    144 	spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
    145 
    146 	if (spa->spa_root != NULL)
    147 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
    148 		    0, ZPROP_SRC_LOCAL);
    149 
    150 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
    151 		if (dp->scd_path == NULL) {
    152 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
    153 			    "none", 0, ZPROP_SRC_LOCAL);
    154 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
    155 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
    156 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
    157 		}
    158 	}
    159 }
    160 
    161 /*
    162  * Get zpool property values.
    163  */
    164 int
    165 spa_prop_get(spa_t *spa, nvlist_t **nvp)
    166 {
    167 	zap_cursor_t zc;
    168 	zap_attribute_t za;
    169 	objset_t *mos = spa->spa_meta_objset;
    170 	int err;
    171 
    172 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
    173 
    174 	mutex_enter(&spa->spa_props_lock);
    175 
    176 	/*
    177 	 * Get properties from the spa config.
    178 	 */
    179 	spa_prop_get_config(spa, nvp);
    180 
    181 	/* If no pool property object, no more prop to get. */
    182 	if (spa->spa_pool_props_object == 0) {
    183 		mutex_exit(&spa->spa_props_lock);
    184 		return (0);
    185 	}
    186 
    187 	/*
    188 	 * Get properties from the MOS pool property object.
    189 	 */
    190 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
    191 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
    192 	    zap_cursor_advance(&zc)) {
    193 		uint64_t intval = 0;
    194 		char *strval = NULL;
    195 		zprop_source_t src = ZPROP_SRC_DEFAULT;
    196 		zpool_prop_t prop;
    197 
    198 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
    199 			continue;
    200 
    201 		switch (za.za_integer_length) {
    202 		case 8:
    203 			/* integer property */
    204 			if (za.za_first_integer !=
    205 			    zpool_prop_default_numeric(prop))
    206 				src = ZPROP_SRC_LOCAL;
    207 
    208 			if (prop == ZPOOL_PROP_BOOTFS) {
    209 				dsl_pool_t *dp;
    210 				dsl_dataset_t *ds = NULL;
    211 
    212 				dp = spa_get_dsl(spa);
    213 				rw_enter(&dp->dp_config_rwlock, RW_READER);
    214 				if (err = dsl_dataset_hold_obj(dp,
    215 				    za.za_first_integer, FTAG, &ds)) {
    216 					rw_exit(&dp->dp_config_rwlock);
    217 					break;
    218 				}
    219 
    220 				strval = kmem_alloc(
    221 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
    222 				    KM_SLEEP);
    223 				dsl_dataset_name(ds, strval);
    224 				dsl_dataset_rele(ds, FTAG);
    225 				rw_exit(&dp->dp_config_rwlock);
    226 			} else {
    227 				strval = NULL;
    228 				intval = za.za_first_integer;
    229 			}
    230 
    231 			spa_prop_add_list(*nvp, prop, strval, intval, src);
    232 
    233 			if (strval != NULL)
    234 				kmem_free(strval,
    235 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
    236 
    237 			break;
    238 
    239 		case 1:
    240 			/* string property */
    241 			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
    242 			err = zap_lookup(mos, spa->spa_pool_props_object,
    243 			    za.za_name, 1, za.za_num_integers, strval);
    244 			if (err) {
    245 				kmem_free(strval, za.za_num_integers);
    246 				break;
    247 			}
    248 			spa_prop_add_list(*nvp, prop, strval, 0, src);
    249 			kmem_free(strval, za.za_num_integers);
    250 			break;
    251 
    252 		default:
    253 			break;
    254 		}
    255 	}
    256 	zap_cursor_fini(&zc);
    257 	mutex_exit(&spa->spa_props_lock);
    258 out:
    259 	if (err && err != ENOENT) {
    260 		nvlist_free(*nvp);
    261 		*nvp = NULL;
    262 		return (err);
    263 	}
    264 
    265 	return (0);
    266 }
    267 
    268 /*
    269  * Validate the given pool properties nvlist and modify the list
    270  * for the property values to be set.
    271  */
    272 static int
    273 spa_prop_validate(spa_t *spa, nvlist_t *props)
    274 {
    275 	nvpair_t *elem;
    276 	int error = 0, reset_bootfs = 0;
    277 	uint64_t objnum;
    278 
    279 	elem = NULL;
    280 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
    281 		zpool_prop_t prop;
    282 		char *propname, *strval;
    283 		uint64_t intval;
    284 		objset_t *os;
    285 		char *slash;
    286 
    287 		propname = nvpair_name(elem);
    288 
    289 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
    290 			return (EINVAL);
    291 
    292 		switch (prop) {
    293 		case ZPOOL_PROP_VERSION:
    294 			error = nvpair_value_uint64(elem, &intval);
    295 			if (!error &&
    296 			    (intval < spa_version(spa) || intval > SPA_VERSION))
    297 				error = EINVAL;
    298 			break;
    299 
    300 		case ZPOOL_PROP_DELEGATION:
    301 		case ZPOOL_PROP_AUTOREPLACE:
    302 		case ZPOOL_PROP_LISTSNAPS:
    303 			error = nvpair_value_uint64(elem, &intval);
    304 			if (!error && intval > 1)
    305 				error = EINVAL;
    306 			break;
    307 
    308 		case ZPOOL_PROP_BOOTFS:
    309 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
    310 				error = ENOTSUP;
    311 				break;
    312 			}
    313 
    314 			/*
    315 			 * Make sure the vdev config is bootable
    316 			 */
    317 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
    318 				error = ENOTSUP;
    319 				break;
    320 			}
    321 
    322 			reset_bootfs = 1;
    323 
    324 			error = nvpair_value_string(elem, &strval);
    325 
    326 			if (!error) {
    327 				uint64_t compress;
    328 
    329 				if (strval == NULL || strval[0] == '\0') {
    330 					objnum = zpool_prop_default_numeric(
    331 					    ZPOOL_PROP_BOOTFS);
    332 					break;
    333 				}
    334 
    335 				if (error = dmu_objset_open(strval, DMU_OST_ZFS,
    336 				    DS_MODE_USER | DS_MODE_READONLY, &os))
    337 					break;
    338 
    339 				/* We don't support gzip bootable datasets */
    340 				if ((error = dsl_prop_get_integer(strval,
    341 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
    342 				    &compress, NULL)) == 0 &&
    343 				    !BOOTFS_COMPRESS_VALID(compress)) {
    344 					error = ENOTSUP;
    345 				} else {
    346 					objnum = dmu_objset_id(os);
    347 				}
    348 				dmu_objset_close(os);
    349 			}
    350 			break;
    351 
    352 		case ZPOOL_PROP_FAILUREMODE:
    353 			error = nvpair_value_uint64(elem, &intval);
    354 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
    355 			    intval > ZIO_FAILURE_MODE_PANIC))
    356 				error = EINVAL;
    357 
    358 			/*
    359 			 * This is a special case which only occurs when
    360 			 * the pool has completely failed. This allows
    361 			 * the user to change the in-core failmode property
    362 			 * without syncing it out to disk (I/Os might
    363 			 * currently be blocked). We do this by returning
    364 			 * EIO to the caller (spa_prop_set) to trick it
    365 			 * into thinking we encountered a property validation
    366 			 * error.
    367 			 */
    368 			if (!error && spa_suspended(spa)) {
    369 				spa->spa_failmode = intval;
    370 				error = EIO;
    371 			}
    372 			break;
    373 
    374 		case ZPOOL_PROP_CACHEFILE:
    375 			if ((error = nvpair_value_string(elem, &strval)) != 0)
    376 				break;
    377 
    378 			if (strval[0] == '\0')
    379 				break;
    380 
    381 			if (strcmp(strval, "none") == 0)
    382 				break;
    383 
    384 			if (strval[0] != '/') {
    385 				error = EINVAL;
    386 				break;
    387 			}
    388 
    389 			slash = strrchr(strval, '/');
    390 			ASSERT(slash != NULL);
    391 
    392 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
    393 			    strcmp(slash, "/..") == 0)
    394 				error = EINVAL;
    395 			break;
    396 		}
    397 
    398 		if (error)
    399 			break;
    400 	}
    401 
    402 	if (!error && reset_bootfs) {
    403 		error = nvlist_remove(props,
    404 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
    405 
    406 		if (!error) {
    407 			error = nvlist_add_uint64(props,
    408 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
    409 		}
    410 	}
    411 
    412 	return (error);
    413 }
    414 
    415 int
    416 spa_prop_set(spa_t *spa, nvlist_t *nvp)
    417 {
    418 	int error;
    419 
    420 	if ((error = spa_prop_validate(spa, nvp)) != 0)
    421 		return (error);
    422 
    423 	return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
    424 	    spa, nvp, 3));
    425 }
    426 
    427 /*
    428  * If the bootfs property value is dsobj, clear it.
    429  */
    430 void
    431 spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
    432 {
    433 	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
    434 		VERIFY(zap_remove(spa->spa_meta_objset,
    435 		    spa->spa_pool_props_object,
    436 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
    437 		spa->spa_bootfs = 0;
    438 	}
    439 }
    440 
    441 /*
    442  * ==========================================================================
    443  * SPA state manipulation (open/create/destroy/import/export)
    444  * ==========================================================================
    445  */
    446 
    447 static int
    448 spa_error_entry_compare(const void *a, const void *b)
    449 {
    450 	spa_error_entry_t *sa = (spa_error_entry_t *)a;
    451 	spa_error_entry_t *sb = (spa_error_entry_t *)b;
    452 	int ret;
    453 
    454 	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
    455 	    sizeof (zbookmark_t));
    456 
    457 	if (ret < 0)
    458 		return (-1);
    459 	else if (ret > 0)
    460 		return (1);
    461 	else
    462 		return (0);
    463 }
    464 
    465 /*
    466  * Utility function which retrieves copies of the current logs and
    467  * re-initializes them in the process.
    468  */
    469 void
    470 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
    471 {
    472 	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
    473 
    474 	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
    475 	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
    476 
    477 	avl_create(&spa->spa_errlist_scrub,
    478 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
    479 	    offsetof(spa_error_entry_t, se_avl));
    480 	avl_create(&spa->spa_errlist_last,
    481 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
    482 	    offsetof(spa_error_entry_t, se_avl));
    483 }
    484 
    485 /*
    486  * Activate an uninitialized pool.
    487  */
    488 static void
    489 spa_activate(spa_t *spa)
    490 {
    491 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
    492 
    493 	spa->spa_state = POOL_STATE_ACTIVE;
    494 
    495 	spa->spa_normal_class = metaslab_class_create();
    496 	spa->spa_log_class = metaslab_class_create();
    497 
    498 	for (int t = 0; t < ZIO_TYPES; t++) {
    499 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
    500 			spa->spa_zio_taskq[t][q] = taskq_create("spa_zio",
    501 			    zio_taskq_threads[t][q], maxclsyspri, 50,
    502 			    INT_MAX, TASKQ_PREPOPULATE);
    503 		}
    504 	}
    505 
    506 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
    507 	    offsetof(vdev_t, vdev_config_dirty_node));
    508 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
    509 	    offsetof(vdev_t, vdev_state_dirty_node));
    510 
    511 	txg_list_create(&spa->spa_vdev_txg_list,
    512 	    offsetof(struct vdev, vdev_txg_node));
    513 
    514 	avl_create(&spa->spa_errlist_scrub,
    515 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
    516 	    offsetof(spa_error_entry_t, se_avl));
    517 	avl_create(&spa->spa_errlist_last,
    518 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
    519 	    offsetof(spa_error_entry_t, se_avl));
    520 }
    521 
    522 /*
    523  * Opposite of spa_activate().
    524  */
    525 static void
    526 spa_deactivate(spa_t *spa)
    527 {
    528 	ASSERT(spa->spa_sync_on == B_FALSE);
    529 	ASSERT(spa->spa_dsl_pool == NULL);
    530 	ASSERT(spa->spa_root_vdev == NULL);
    531 
    532 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
    533 
    534 	txg_list_destroy(&spa->spa_vdev_txg_list);
    535 
    536 	list_destroy(&spa->spa_config_dirty_list);
    537 	list_destroy(&spa->spa_state_dirty_list);
    538 
    539 	for (int t = 0; t < ZIO_TYPES; t++) {
    540 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
    541 			taskq_destroy(spa->spa_zio_taskq[t][q]);
    542 			spa->spa_zio_taskq[t][q] = NULL;
    543 		}
    544 	}
    545 
    546 	metaslab_class_destroy(spa->spa_normal_class);
    547 	spa->spa_normal_class = NULL;
    548 
    549 	metaslab_class_destroy(spa->spa_log_class);
    550 	spa->spa_log_class = NULL;
    551 
    552 	/*
    553 	 * If this was part of an import or the open otherwise failed, we may
    554 	 * still have errors left in the queues.  Empty them just in case.
    555 	 */
    556 	spa_errlog_drain(spa);
    557 
    558 	avl_destroy(&spa->spa_errlist_scrub);
    559 	avl_destroy(&spa->spa_errlist_last);
    560 
    561 	spa->spa_state = POOL_STATE_UNINITIALIZED;
    562 }
    563 
    564 /*
    565  * Verify a pool configuration, and construct the vdev tree appropriately.  This
    566  * will create all the necessary vdevs in the appropriate layout, with each vdev
    567  * in the CLOSED state.  This will prep the pool before open/creation/import.
    568  * All vdev validation is done by the vdev_alloc() routine.
    569  */
    570 static int
    571 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
    572     uint_t id, int atype)
    573 {
    574 	nvlist_t **child;
    575 	uint_t c, children;
    576 	int error;
    577 
    578 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
    579 		return (error);
    580 
    581 	if ((*vdp)->vdev_ops->vdev_op_leaf)
    582 		return (0);
    583 
    584 	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
    585 	    &child, &children);
    586 
    587 	if (error == ENOENT)
    588 		return (0);
    589 
    590 	if (error) {
    591 		vdev_free(*vdp);
    592 		*vdp = NULL;
    593 		return (EINVAL);
    594 	}
    595 
    596 	for (c = 0; c < children; c++) {
    597 		vdev_t *vd;
    598 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
    599 		    atype)) != 0) {
    600 			vdev_free(*vdp);
    601 			*vdp = NULL;
    602 			return (error);
    603 		}
    604 	}
    605 
    606 	ASSERT(*vdp != NULL);
    607 
    608 	return (0);
    609 }
    610 
    611 /*
    612  * Opposite of spa_load().
    613  */
    614 static void
    615 spa_unload(spa_t *spa)
    616 {
    617 	int i;
    618 
    619 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    620 
    621 	/*
    622 	 * Stop async tasks.
    623 	 */
    624 	spa_async_suspend(spa);
    625 
    626 	/*
    627 	 * Stop syncing.
    628 	 */
    629 	if (spa->spa_sync_on) {
    630 		txg_sync_stop(spa->spa_dsl_pool);
    631 		spa->spa_sync_on = B_FALSE;
    632 	}
    633 
    634 	/*
    635 	 * Wait for any outstanding async I/O to complete.
    636 	 */
    637 	mutex_enter(&spa->spa_async_root_lock);
    638 	while (spa->spa_async_root_count != 0)
    639 		cv_wait(&spa->spa_async_root_cv, &spa->spa_async_root_lock);
    640 	mutex_exit(&spa->spa_async_root_lock);
    641 
    642 	/*
    643 	 * Drop and purge level 2 cache
    644 	 */
    645 	spa_l2cache_drop(spa);
    646 
    647 	/*
    648 	 * Close the dsl pool.
    649 	 */
    650 	if (spa->spa_dsl_pool) {
    651 		dsl_pool_close(spa->spa_dsl_pool);
    652 		spa->spa_dsl_pool = NULL;
    653 	}
    654 
    655 	/*
    656 	 * Close all vdevs.
    657 	 */
    658 	if (spa->spa_root_vdev)
    659 		vdev_free(spa->spa_root_vdev);
    660 	ASSERT(spa->spa_root_vdev == NULL);
    661 
    662 	for (i = 0; i < spa->spa_spares.sav_count; i++)
    663 		vdev_free(spa->spa_spares.sav_vdevs[i]);
    664 	if (spa->spa_spares.sav_vdevs) {
    665 		kmem_free(spa->spa_spares.sav_vdevs,
    666 		    spa->spa_spares.sav_count * sizeof (void *));
    667 		spa->spa_spares.sav_vdevs = NULL;
    668 	}
    669 	if (spa->spa_spares.sav_config) {
    670 		nvlist_free(spa->spa_spares.sav_config);
    671 		spa->spa_spares.sav_config = NULL;
    672 	}
    673 	spa->spa_spares.sav_count = 0;
    674 
    675 	for (i = 0; i < spa->spa_l2cache.sav_count; i++)
    676 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
    677 	if (spa->spa_l2cache.sav_vdevs) {
    678 		kmem_free(spa->spa_l2cache.sav_vdevs,
    679 		    spa->spa_l2cache.sav_count * sizeof (void *));
    680 		spa->spa_l2cache.sav_vdevs = NULL;
    681 	}
    682 	if (spa->spa_l2cache.sav_config) {
    683 		nvlist_free(spa->spa_l2cache.sav_config);
    684 		spa->spa_l2cache.sav_config = NULL;
    685 	}
    686 	spa->spa_l2cache.sav_count = 0;
    687 
    688 	spa->spa_async_suspended = 0;
    689 }
    690 
    691 /*
    692  * Load (or re-load) the current list of vdevs describing the active spares for
    693  * this pool.  When this is called, we have some form of basic information in
    694  * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
    695  * then re-generate a more complete list including status information.
    696  */
    697 static void
    698 spa_load_spares(spa_t *spa)
    699 {
    700 	nvlist_t **spares;
    701 	uint_t nspares;
    702 	int i;
    703 	vdev_t *vd, *tvd;
    704 
    705 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
    706 
    707 	/*
    708 	 * First, close and free any existing spare vdevs.
    709 	 */
    710 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
    711 		vd = spa->spa_spares.sav_vdevs[i];
    712 
    713 		/* Undo the call to spa_activate() below */
    714 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
    715 		    B_FALSE)) != NULL && tvd->vdev_isspare)
    716 			spa_spare_remove(tvd);
    717 		vdev_close(vd);
    718 		vdev_free(vd);
    719 	}
    720 
    721 	if (spa->spa_spares.sav_vdevs)
    722 		kmem_free(spa->spa_spares.sav_vdevs,
    723 		    spa->spa_spares.sav_count * sizeof (void *));
    724 
    725 	if (spa->spa_spares.sav_config == NULL)
    726 		nspares = 0;
    727 	else
    728 		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
    729 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
    730 
    731 	spa->spa_spares.sav_count = (int)nspares;
    732 	spa->spa_spares.sav_vdevs = NULL;
    733 
    734 	if (nspares == 0)
    735 		return;
    736 
    737 	/*
    738 	 * Construct the array of vdevs, opening them to get status in the
    739 	 * process.   For each spare, there is potentially two different vdev_t
    740 	 * structures associated with it: one in the list of spares (used only
    741 	 * for basic validation purposes) and one in the active vdev
    742 	 * configuration (if it's spared in).  During this phase we open and
    743 	 * validate each vdev on the spare list.  If the vdev also exists in the
    744 	 * active configuration, then we also mark this vdev as an active spare.
    745 	 */
    746 	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
    747 	    KM_SLEEP);
    748 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
    749 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
    750 		    VDEV_ALLOC_SPARE) == 0);
    751 		ASSERT(vd != NULL);
    752 
    753 		spa->spa_spares.sav_vdevs[i] = vd;
    754 
    755 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
    756 		    B_FALSE)) != NULL) {
    757 			if (!tvd->vdev_isspare)
    758 				spa_spare_add(tvd);
    759 
    760 			/*
    761 			 * We only mark the spare active if we were successfully
    762 			 * able to load the vdev.  Otherwise, importing a pool
    763 			 * with a bad active spare would result in strange
    764 			 * behavior, because multiple pool would think the spare
    765 			 * is actively in use.
    766 			 *
    767 			 * There is a vulnerability here to an equally bizarre
    768 			 * circumstance, where a dead active spare is later
    769 			 * brought back to life (onlined or otherwise).  Given
    770 			 * the rarity of this scenario, and the extra complexity
    771 			 * it adds, we ignore the possibility.
    772 			 */
    773 			if (!vdev_is_dead(tvd))
    774 				spa_spare_activate(tvd);
    775 		}
    776 
    777 		vd->vdev_top = vd;
    778 
    779 		if (vdev_open(vd) != 0)
    780 			continue;
    781 
    782 		if (vdev_validate_aux(vd) == 0)
    783 			spa_spare_add(vd);
    784 	}
    785 
    786 	/*
    787 	 * Recompute the stashed list of spares, with status information
    788 	 * this time.
    789 	 */
    790 	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
    791 	    DATA_TYPE_NVLIST_ARRAY) == 0);
    792 
    793 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
    794 	    KM_SLEEP);
    795 	for (i = 0; i < spa->spa_spares.sav_count; i++)
    796 		spares[i] = vdev_config_generate(spa,
    797 		    spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE);
    798 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
    799 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
    800 	for (i = 0; i < spa->spa_spares.sav_count; i++)
    801 		nvlist_free(spares[i]);
    802 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
    803 }
    804 
    805 /*
    806  * Load (or re-load) the current list of vdevs describing the active l2cache for
    807  * this pool.  When this is called, we have some form of basic information in
    808  * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
    809  * then re-generate a more complete list including status information.
    810  * Devices which are already active have their details maintained, and are
    811  * not re-opened.
    812  */
    813 static void
    814 spa_load_l2cache(spa_t *spa)
    815 {
    816 	nvlist_t **l2cache;
    817 	uint_t nl2cache;
    818 	int i, j, oldnvdevs;
    819 	uint64_t guid, size;
    820 	vdev_t *vd, **oldvdevs, **newvdevs;
    821 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
    822 
    823 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
    824 
    825 	if (sav->sav_config != NULL) {
    826 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
    827 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
    828 		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
    829 	} else {
    830 		nl2cache = 0;
    831 	}
    832 
    833 	oldvdevs = sav->sav_vdevs;
    834 	oldnvdevs = sav->sav_count;
    835 	sav->sav_vdevs = NULL;
    836 	sav->sav_count = 0;
    837 
    838 	/*
    839 	 * Process new nvlist of vdevs.
    840 	 */
    841 	for (i = 0; i < nl2cache; i++) {
    842 		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
    843 		    &guid) == 0);
    844 
    845 		newvdevs[i] = NULL;
    846 		for (j = 0; j < oldnvdevs; j++) {
    847 			vd = oldvdevs[j];
    848 			if (vd != NULL && guid == vd->vdev_guid) {
    849 				/*
    850 				 * Retain previous vdev for add/remove ops.
    851 				 */
    852 				newvdevs[i] = vd;
    853 				oldvdevs[j] = NULL;
    854 				break;
    855 			}
    856 		}
    857 
    858 		if (newvdevs[i] == NULL) {
    859 			/*
    860 			 * Create new vdev
    861 			 */
    862 			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
    863 			    VDEV_ALLOC_L2CACHE) == 0);
    864 			ASSERT(vd != NULL);
    865 			newvdevs[i] = vd;
    866 
    867 			/*
    868 			 * Commit this vdev as an l2cache device,
    869 			 * even if it fails to open.
    870 			 */
    871 			spa_l2cache_add(vd);
    872 
    873 			vd->vdev_top = vd;
    874 			vd->vdev_aux = sav;
    875 
    876 			spa_l2cache_activate(vd);
    877 
    878 			if (vdev_open(vd) != 0)
    879 				continue;
    880 
    881 			(void) vdev_validate_aux(vd);
    882 
    883 			if (!vdev_is_dead(vd)) {
    884 				size = vdev_get_rsize(vd);
    885 				l2arc_add_vdev(spa, vd,
    886 				    VDEV_LABEL_START_SIZE,
    887 				    size - VDEV_LABEL_START_SIZE);
    888 			}
    889 		}
    890 	}
    891 
    892 	/*
    893 	 * Purge vdevs that were dropped
    894 	 */
    895 	for (i = 0; i < oldnvdevs; i++) {
    896 		uint64_t pool;
    897 
    898 		vd = oldvdevs[i];
    899 		if (vd != NULL) {
    900 			if ((spa_mode & FWRITE) &&
    901 			    spa_l2cache_exists(vd->vdev_guid, &pool) &&
    902 			    pool != 0ULL &&
    903 			    l2arc_vdev_present(vd)) {
    904 				l2arc_remove_vdev(vd);
    905 			}
    906 			(void) vdev_close(vd);
    907 			spa_l2cache_remove(vd);
    908 		}
    909 	}
    910 
    911 	if (oldvdevs)
    912 		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
    913 
    914 	if (sav->sav_config == NULL)
    915 		goto out;
    916 
    917 	sav->sav_vdevs = newvdevs;
    918 	sav->sav_count = (int)nl2cache;
    919 
    920 	/*
    921 	 * Recompute the stashed list of l2cache devices, with status
    922 	 * information this time.
    923 	 */
    924 	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
    925 	    DATA_TYPE_NVLIST_ARRAY) == 0);
    926 
    927 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
    928 	for (i = 0; i < sav->sav_count; i++)
    929 		l2cache[i] = vdev_config_generate(spa,
    930 		    sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE);
    931 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
    932 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
    933 out:
    934 	for (i = 0; i < sav->sav_count; i++)
    935 		nvlist_free(l2cache[i]);
    936 	if (sav->sav_count)
    937 		kmem_free(l2cache, sav->sav_count * sizeof (void *));
    938 }
    939 
    940 static int
    941 load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
    942 {
    943 	dmu_buf_t *db;
    944 	char *packed = NULL;
    945 	size_t nvsize = 0;
    946 	int error;
    947 	*value = NULL;
    948 
    949 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
    950 	nvsize = *(uint64_t *)db->db_data;
    951 	dmu_buf_rele(db, FTAG);
    952 
    953 	packed = kmem_alloc(nvsize, KM_SLEEP);
    954 	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed);
    955 	if (error == 0)
    956 		error = nvlist_unpack(packed, nvsize, value, 0);
    957 	kmem_free(packed, nvsize);
    958 
    959 	return (error);
    960 }
    961 
    962 /*
    963  * Checks to see if the given vdev could not be opened, in which case we post a
    964  * sysevent to notify the autoreplace code that the device has been removed.
    965  */
    966 static void
    967 spa_check_removed(vdev_t *vd)
    968 {
    969 	int c;
    970 
    971 	for (c = 0; c < vd->vdev_children; c++)
    972 		spa_check_removed(vd->vdev_child[c]);
    973 
    974 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
    975 		zfs_post_autoreplace(vd->vdev_spa, vd);
    976 		spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
    977 	}
    978 }
    979 
    980 /*
    981  * Check for missing log devices
    982  */
    983 int
    984 spa_check_logs(spa_t *spa)
    985 {
    986 	switch (spa->spa_log_state) {
    987 	case SPA_LOG_MISSING:
    988 		/* need to recheck in case slog has been restored */
    989 	case SPA_LOG_UNKNOWN:
    990 		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
    991 		    DS_FIND_CHILDREN)) {
    992 			spa->spa_log_state = SPA_LOG_MISSING;
    993 			return (1);
    994 		}
    995 		break;
    996 
    997 	case SPA_LOG_CLEAR:
    998 		(void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL,
    999 		    DS_FIND_CHILDREN);
   1000 		break;
   1001 	}
   1002 	spa->spa_log_state = SPA_LOG_GOOD;
   1003 	return (0);
   1004 }
   1005 
   1006 /*
   1007  * Load an existing storage pool, using the pool's builtin spa_config as a
   1008  * source of configuration information.
   1009  */
   1010 static int
   1011 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
   1012 {
   1013 	int error = 0;
   1014 	nvlist_t *nvroot = NULL;
   1015 	vdev_t *rvd;
   1016 	uberblock_t *ub = &spa->spa_uberblock;
   1017 	uint64_t config_cache_txg = spa->spa_config_txg;
   1018 	uint64_t pool_guid;
   1019 	uint64_t version;
   1020 	uint64_t autoreplace = 0;
   1021 	char *ereport = FM_EREPORT_ZFS_POOL;
   1022 
   1023 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
   1024 
   1025 	spa->spa_load_state = state;
   1026 
   1027 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
   1028 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) {
   1029 		error = EINVAL;
   1030 		goto out;
   1031 	}
   1032 
   1033 	/*
   1034 	 * Versioning wasn't explicitly added to the label until later, so if
   1035 	 * it's not present treat it as the initial version.
   1036 	 */
   1037 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
   1038 		version = SPA_VERSION_INITIAL;
   1039 
   1040 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
   1041 	    &spa->spa_config_txg);
   1042 
   1043 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
   1044 	    spa_guid_exists(pool_guid, 0)) {
   1045 		error = EEXIST;
   1046 		goto out;
   1047 	}
   1048 
   1049 	spa->spa_load_guid = pool_guid;
   1050 
   1051 	/*
   1052 	 * Parse the configuration into a vdev tree.  We exp