Home | History | Annotate | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)bplist.c	1.5	07/10/29 SMI"
     27 
     28 #include <sys/bplist.h>
     29 #include <sys/zfs_context.h>
     30 
     31 static int
     32 bplist_hold(bplist_t *bpl)
     33 {
     34 	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
     35 	if (bpl->bpl_dbuf == NULL) {
     36 		int err = dmu_bonus_hold(bpl->bpl_mos,
     37 		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
     38 		if (err)
     39 			return (err);
     40 		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
     41 	}
     42 	return (0);
     43 }
     44 
     45 uint64_t
     46 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
     47 {
     48 	int size;
     49 
     50 	size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
     51 	    BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
     52 
     53 	return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
     54 	    DMU_OT_BPLIST_HDR, size, tx));
     55 }
     56 
     57 void
     58 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
     59 {
     60 	VERIFY(dmu_object_free(mos, object, tx) == 0);
     61 }
     62 
     63 int
     64 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
     65 {
     66 	dmu_object_info_t doi;
     67 	int err;
     68 
     69 	err = dmu_object_info(mos, object, &doi);
     70 	if (err)
     71 		return (err);
     72 
     73 	mutex_enter(&bpl->bpl_lock);
     74 
     75 	ASSERT(bpl->bpl_dbuf == NULL);
     76 	ASSERT(bpl->bpl_phys == NULL);
     77 	ASSERT(bpl->bpl_cached_dbuf == NULL);
     78 	ASSERT(bpl->bpl_queue == NULL);
     79 	ASSERT(object != 0);
     80 	ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
     81 	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
     82 
     83 	bpl->bpl_mos = mos;
     84 	bpl->bpl_object = object;
     85 	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
     86 	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
     87 	bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
     88 
     89 	mutex_exit(&bpl->bpl_lock);
     90 	return (0);
     91 }
     92 
     93 void
     94 bplist_close(bplist_t *bpl)
     95 {
     96 	mutex_enter(&bpl->bpl_lock);
     97 
     98 	ASSERT(bpl->bpl_queue == NULL);
     99 
    100 	if (bpl->bpl_cached_dbuf) {
    101 		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
    102 		bpl->bpl_cached_dbuf = NULL;
    103 	}
    104 	if (bpl->bpl_dbuf) {
    105 		dmu_buf_rele(bpl->bpl_dbuf, bpl);
    106 		bpl->bpl_dbuf = NULL;
    107 		bpl->bpl_phys = NULL;
    108 	}
    109 
    110 	mutex_exit(&bpl->bpl_lock);
    111 }
    112 
    113 boolean_t
    114 bplist_empty(bplist_t *bpl)
    115 {
    116 	boolean_t rv;
    117 
    118 	if (bpl->bpl_object == 0)
    119 		return (B_TRUE);
    120 
    121 	mutex_enter(&bpl->bpl_lock);
    122 	VERIFY(0 == bplist_hold(bpl)); /* XXX */
    123 	rv = (bpl->bpl_phys->bpl_entries == 0);
    124 	mutex_exit(&bpl->bpl_lock);
    125 
    126 	return (rv);
    127 }
    128 
    129 static int
    130 bplist_cache(bplist_t *bpl, uint64_t blkid)
    131 {
    132 	int err = 0;
    133 
    134 	if (bpl->bpl_cached_dbuf == NULL ||
    135 	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
    136 		if (bpl->bpl_cached_dbuf != NULL)
    137 			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
    138 		err = dmu_buf_hold(bpl->bpl_mos,
    139 		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
    140 		    bpl, &bpl->bpl_cached_dbuf);
    141 		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
    142 		    1ULL << bpl->bpl_blockshift);
    143 	}
    144 	return (err);
    145 }
    146 
    147 int
    148 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
    149 {
    150 	uint64_t blk, off;
    151 	blkptr_t *bparray;
    152 	int err;
    153 
    154 	mutex_enter(&bpl->bpl_lock);
    155 
    156 	err = bplist_hold(bpl);
    157 	if (err) {
    158 		mutex_exit(&bpl->bpl_lock);
    159 		return (err);
    160 	}
    161 
    162 	if (*itorp >= bpl->bpl_phys->bpl_entries) {
    163 		mutex_exit(&bpl->bpl_lock);
    164 		return (ENOENT);
    165 	}
    166 
    167 	blk = *itorp >> bpl->bpl_bpshift;
    168 	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
    169 
    170 	err = bplist_cache(bpl, blk);
    171 	if (err) {
    172 		mutex_exit(&bpl->bpl_lock);
    173 		return (err);
    174 	}
    175 
    176 	bparray = bpl->bpl_cached_dbuf->db_data;
    177 	*bp = bparray[off];
    178 	(*itorp)++;
    179 	mutex_exit(&bpl->bpl_lock);
    180 	return (0);
    181 }
    182 
    183 int
    184 bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
    185 {
    186 	uint64_t blk, off;
    187 	blkptr_t *bparray;
    188 	int err;
    189 
    190 	ASSERT(!BP_IS_HOLE(bp));
    191 	mutex_enter(&bpl->bpl_lock);
    192 	err = bplist_hold(bpl);
    193 	if (err)
    194 		return (err);
    195 
    196 	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
    197 	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
    198 
    199 	err = bplist_cache(bpl, blk);
    200 	if (err) {
    201 		mutex_exit(&bpl->bpl_lock);
    202 		return (err);
    203 	}
    204 
    205 	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
    206 	bparray = bpl->bpl_cached_dbuf->db_data;
    207 	bparray[off] = *bp;
    208 
    209 	/* We never need the fill count. */
    210 	bparray[off].blk_fill = 0;
    211 
    212 	/* The bplist will compress better if we can leave off the checksum */
    213 	bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
    214 
    215 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
    216 	bpl->bpl_phys->bpl_entries++;
    217 	bpl->bpl_phys->bpl_bytes +=
    218 	    bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
    219 	if (bpl->bpl_havecomp) {
    220 		bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
    221 		bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
    222 	}
    223 	mutex_exit(&bpl->bpl_lock);
    224 
    225 	return (0);
    226 }
    227 
    228 /*
    229  * Deferred entry; will be written later by bplist_sync().
    230  */
    231 void
    232 bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp)
    233 {
    234 	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
    235 
    236 	ASSERT(!BP_IS_HOLE(bp));
    237 	mutex_enter(&bpl->bpl_lock);
    238 	bpq->bpq_blk = *bp;
    239 	bpq->bpq_next = bpl->bpl_queue;
    240 	bpl->bpl_queue = bpq;
    241 	mutex_exit(&bpl->bpl_lock);
    242 }
    243 
    244 void
    245 bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
    246 {
    247 	bplist_q_t *bpq;
    248 
    249 	mutex_enter(&bpl->bpl_lock);
    250 	while ((bpq = bpl->bpl_queue) != NULL) {
    251 		bpl->bpl_queue = bpq->bpq_next;
    252 		mutex_exit(&bpl->bpl_lock);
    253 		VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
    254 		kmem_free(bpq, sizeof (*bpq));
    255 		mutex_enter(&bpl->bpl_lock);
    256 	}
    257 	mutex_exit(&bpl->bpl_lock);
    258 }
    259 
    260 void
    261 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
    262 {
    263 	mutex_enter(&bpl->bpl_lock);
    264 	ASSERT3P(bpl->bpl_queue, ==, NULL);
    265 	VERIFY(0 == bplist_hold(bpl));
    266 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
    267 	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
    268 	    bpl->bpl_object, 0, -1ULL, tx));
    269 	bpl->bpl_phys->bpl_entries = 0;
    270 	bpl->bpl_phys->bpl_bytes = 0;
    271 	if (bpl->bpl_havecomp) {
    272 		bpl->bpl_phys->bpl_comp = 0;
    273 		bpl->bpl_phys->bpl_uncomp = 0;
    274 	}
    275 	mutex_exit(&bpl->bpl_lock);
    276 }
    277 
    278 int
    279 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
    280 {
    281 	int err;
    282 
    283 	mutex_enter(&bpl->bpl_lock);
    284 
    285 	err = bplist_hold(bpl);
    286 	if (err) {
    287 		mutex_exit(&bpl->bpl_lock);
    288 		return (err);
    289 	}
    290 
    291 	*usedp = bpl->bpl_phys->bpl_bytes;
    292 	if (bpl->bpl_havecomp) {
    293 		*compp = bpl->bpl_phys->bpl_comp;
    294 		*uncompp = bpl->bpl_phys->bpl_uncomp;
    295 	}
    296 	mutex_exit(&bpl->bpl_lock);
    297 
    298 	if (!bpl->bpl_havecomp) {
    299 		uint64_t itor = 0, comp = 0, uncomp = 0;
    300 		blkptr_t bp;
    301 
    302 		while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
    303 			comp += BP_GET_PSIZE(&bp);
    304 			uncomp += BP_GET_UCSIZE(&bp);
    305 		}
    306 		if (err == ENOENT)
    307 			err = 0;
    308 		*compp = comp;
    309 		*uncompp = uncomp;
    310 	}
    311 
    312 	return (err);
    313 }
    314