Home | History | Annotate | Download | only in sys
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #ifndef _ZIO_H
     28 #define	_ZIO_H
     29 
     30 #pragma ident	"@(#)zio.h	1.19	07/12/12 SMI"
     31 
     32 #include <sys/zfs_context.h>
     33 #include <sys/spa.h>
     34 #include <sys/txg.h>
     35 #include <sys/avl.h>
     36 #include <sys/dkio.h>
     37 #include <sys/fs/zfs.h>
     38 #include <sys/zio_impl.h>
     39 
     40 #ifdef	__cplusplus
     41 extern "C" {
     42 #endif
     43 
     44 #define	ZBT_MAGIC	0x210da7ab10c7a11ULL	/* zio data bloc tail */
     45 
     46 typedef struct zio_block_tail {
     47 	uint64_t	zbt_magic;	/* for validation, endianness	*/
     48 	zio_cksum_t	zbt_cksum;	/* 256-bit checksum		*/
     49 } zio_block_tail_t;
     50 
     51 /*
     52  * Gang block headers are self-checksumming and contain an array
     53  * of block pointers.
     54  */
     55 #define	SPA_GANGBLOCKSIZE	SPA_MINBLOCKSIZE
     56 #define	SPA_GBH_NBLKPTRS	((SPA_GANGBLOCKSIZE - \
     57 	sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
     58 #define	SPA_GBH_FILLER		((SPA_GANGBLOCKSIZE - \
     59 	sizeof (zio_block_tail_t) - \
     60 	(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
     61 	sizeof (uint64_t))
     62 
     63 #define	ZIO_GET_IOSIZE(zio)	\
     64 	(BP_IS_GANG((zio)->io_bp) ? \
     65 	SPA_GANGBLOCKSIZE : BP_GET_PSIZE((zio)->io_bp))
     66 
     67 typedef struct zio_gbh {
     68 	blkptr_t		zg_blkptr[SPA_GBH_NBLKPTRS];
     69 	uint64_t		zg_filler[SPA_GBH_FILLER];
     70 	zio_block_tail_t	zg_tail;
     71 } zio_gbh_phys_t;
     72 
     73 enum zio_checksum {
     74 	ZIO_CHECKSUM_INHERIT = 0,
     75 	ZIO_CHECKSUM_ON,
     76 	ZIO_CHECKSUM_OFF,
     77 	ZIO_CHECKSUM_LABEL,
     78 	ZIO_CHECKSUM_GANG_HEADER,
     79 	ZIO_CHECKSUM_ZILOG,
     80 	ZIO_CHECKSUM_FLETCHER_2,
     81 	ZIO_CHECKSUM_FLETCHER_4,
     82 	ZIO_CHECKSUM_SHA256,
     83 	ZIO_CHECKSUM_FUNCTIONS
     84 };
     85 
     86 #define	ZIO_CHECKSUM_ON_VALUE	ZIO_CHECKSUM_FLETCHER_2
     87 #define	ZIO_CHECKSUM_DEFAULT	ZIO_CHECKSUM_ON
     88 
     89 enum zio_compress {
     90 	ZIO_COMPRESS_INHERIT = 0,
     91 	ZIO_COMPRESS_ON,
     92 	ZIO_COMPRESS_OFF,
     93 	ZIO_COMPRESS_LZJB,
     94 	ZIO_COMPRESS_EMPTY,
     95 	ZIO_COMPRESS_GZIP_1,
     96 	ZIO_COMPRESS_GZIP_2,
     97 	ZIO_COMPRESS_GZIP_3,
     98 	ZIO_COMPRESS_GZIP_4,
     99 	ZIO_COMPRESS_GZIP_5,
    100 	ZIO_COMPRESS_GZIP_6,
    101 	ZIO_COMPRESS_GZIP_7,
    102 	ZIO_COMPRESS_GZIP_8,
    103 	ZIO_COMPRESS_GZIP_9,
    104 	ZIO_COMPRESS_FUNCTIONS
    105 };
    106 
    107 #define	ZIO_COMPRESS_ON_VALUE	ZIO_COMPRESS_LZJB
    108 #define	ZIO_COMPRESS_DEFAULT	ZIO_COMPRESS_OFF
    109 
    110 #define	ZIO_FAILURE_MODE_WAIT		0
    111 #define	ZIO_FAILURE_MODE_CONTINUE	1
    112 #define	ZIO_FAILURE_MODE_PANIC		2
    113 
    114 #define	ZIO_PRIORITY_NOW		(zio_priority_table[0])
    115 #define	ZIO_PRIORITY_SYNC_READ		(zio_priority_table[1])
    116 #define	ZIO_PRIORITY_SYNC_WRITE		(zio_priority_table[2])
    117 #define	ZIO_PRIORITY_ASYNC_READ		(zio_priority_table[3])
    118 #define	ZIO_PRIORITY_ASYNC_WRITE	(zio_priority_table[4])
    119 #define	ZIO_PRIORITY_FREE		(zio_priority_table[5])
    120 #define	ZIO_PRIORITY_CACHE_FILL		(zio_priority_table[6])
    121 #define	ZIO_PRIORITY_LOG_WRITE		(zio_priority_table[7])
    122 #define	ZIO_PRIORITY_RESILVER		(zio_priority_table[8])
    123 #define	ZIO_PRIORITY_SCRUB		(zio_priority_table[9])
    124 #define	ZIO_PRIORITY_TABLE_SIZE		10
    125 
    126 #define	ZIO_FLAG_MUSTSUCCEED		0x00000
    127 #define	ZIO_FLAG_CANFAIL		0x00001
    128 #define	ZIO_FLAG_FAILFAST		0x00002
    129 #define	ZIO_FLAG_CONFIG_HELD		0x00004
    130 #define	ZIO_FLAG_CONFIG_GRABBED		0x00008
    131 
    132 #define	ZIO_FLAG_DONT_CACHE		0x00010
    133 #define	ZIO_FLAG_DONT_QUEUE		0x00020
    134 #define	ZIO_FLAG_DONT_PROPAGATE		0x00040
    135 #define	ZIO_FLAG_DONT_RETRY		0x00080
    136 
    137 #define	ZIO_FLAG_PHYSICAL		0x00100
    138 #define	ZIO_FLAG_IO_BYPASS		0x00200
    139 #define	ZIO_FLAG_IO_REPAIR		0x00400
    140 #define	ZIO_FLAG_SPECULATIVE		0x00800
    141 
    142 #define	ZIO_FLAG_RESILVER		0x01000
    143 #define	ZIO_FLAG_SCRUB			0x02000
    144 #define	ZIO_FLAG_SCRUB_THREAD		0x04000
    145 #define	ZIO_FLAG_SUBBLOCK		0x08000
    146 
    147 #define	ZIO_FLAG_NOBOOKMARK		0x10000
    148 #define	ZIO_FLAG_USER			0x20000
    149 #define	ZIO_FLAG_METADATA		0x40000
    150 #define	ZIO_FLAG_WRITE_RETRY		0x80000
    151 
    152 #define	ZIO_FLAG_GANG_INHERIT		\
    153 	(ZIO_FLAG_CANFAIL |		\
    154 	ZIO_FLAG_FAILFAST |		\
    155 	ZIO_FLAG_CONFIG_HELD |		\
    156 	ZIO_FLAG_DONT_CACHE |		\
    157 	ZIO_FLAG_DONT_RETRY |		\
    158 	ZIO_FLAG_IO_REPAIR |		\
    159 	ZIO_FLAG_SPECULATIVE |		\
    160 	ZIO_FLAG_RESILVER |		\
    161 	ZIO_FLAG_SCRUB |		\
    162 	ZIO_FLAG_SCRUB_THREAD |		\
    163 	ZIO_FLAG_USER | 		\
    164 	ZIO_FLAG_METADATA)
    165 
    166 #define	ZIO_FLAG_VDEV_INHERIT		\
    167 	(ZIO_FLAG_GANG_INHERIT |	\
    168 	ZIO_FLAG_PHYSICAL)
    169 
    170 #define	ZIO_FLAG_RETRY_INHERIT		\
    171 	(ZIO_FLAG_VDEV_INHERIT |	\
    172 	ZIO_FLAG_CONFIG_GRABBED |	\
    173 	ZIO_FLAG_DONT_PROPAGATE |	\
    174 	ZIO_FLAG_NOBOOKMARK)
    175 
    176 
    177 #define	ZIO_PIPELINE_CONTINUE		0x100
    178 #define	ZIO_PIPELINE_STOP		0x101
    179 
    180 /*
    181  * We'll take the unused errno 'EBADE' (from the Convergent graveyard)
    182  * to indicate checksum errors.
    183  */
    184 #define	ECKSUM	EBADE
    185 
    186 typedef struct zio zio_t;
    187 typedef void zio_done_func_t(zio_t *zio);
    188 
    189 extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
    190 extern char *zio_type_name[ZIO_TYPES];
    191 
    192 /*
    193  * A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
    194  * identifies any block in the pool.  By convention, the meta-objset (MOS)
    195  * is objset 0, the meta-dnode is object 0, the root block (osphys_t) is
    196  * level -1 of the meta-dnode, and intent log blocks (which are chained
    197  * off the root block) have blkid == sequence number.  In summary:
    198  *
    199  *	mos is objset 0
    200  *	meta-dnode is object 0
    201  *	root block is <objset, 0, -1, 0>
    202  *	intent log is <objset, 0, -1, ZIL sequence number>
    203  *
    204  * Note: this structure is called a bookmark because its first purpose was
    205  * to remember where to resume a pool-wide traverse.  The absolute ordering
    206  * for block visitation during traversal is defined in compare_bookmark().
    207  *
    208  * Note: this structure is passed between userland and the kernel.
    209  * Therefore it must not change size or alignment between 32/64 bit
    210  * compilation options.
    211  */
    212 typedef struct zbookmark {
    213 	uint64_t	zb_objset;
    214 	uint64_t	zb_object;
    215 	int64_t		zb_level;
    216 	uint64_t	zb_blkid;
    217 } zbookmark_t;
    218 
    219 struct zio {
    220 	/* Core information about this I/O */
    221 	zio_t		*io_parent;
    222 	zio_t		*io_root;
    223 	spa_t		*io_spa;
    224 	zbookmark_t	io_bookmark;
    225 	enum zio_checksum io_checksum;
    226 	enum zio_compress io_compress;
    227 	int		io_ndvas;
    228 	uint64_t	io_txg;
    229 	blkptr_t	*io_bp;
    230 	blkptr_t	io_bp_copy;
    231 	zio_t		*io_child;
    232 	zio_t		*io_sibling_prev;
    233 	zio_t		*io_sibling_next;
    234 	zio_transform_t *io_transform_stack;
    235 	zio_t		*io_logical;
    236 	list_node_t	zio_link_node;
    237 
    238 	/* Callback info */
    239 	zio_done_func_t	*io_ready;
    240 	zio_done_func_t	*io_done;
    241 	void		*io_private;
    242 	blkptr_t	io_bp_orig;
    243 
    244 	/* Data represented by this I/O */
    245 	void		*io_data;
    246 	uint64_t	io_size;
    247 
    248 	/* Stuff for the vdev stack */
    249 	vdev_t		*io_vd;
    250 	void		*io_vsd;
    251 	uint64_t	io_offset;
    252 	uint64_t	io_deadline;
    253 	uint64_t	io_timestamp;
    254 	avl_node_t	io_offset_node;
    255 	avl_node_t	io_deadline_node;
    256 	avl_tree_t	*io_vdev_tree;
    257 	zio_t		*io_delegate_list;
    258 	zio_t		*io_delegate_next;
    259 
    260 	/* Internal pipeline state */
    261 	int		io_flags;
    262 	int		io_orig_flags;
    263 	enum zio_type	io_type;
    264 	enum zio_stage	io_stage;
    265 	enum zio_stage	io_orig_stage;
    266 	uint8_t		io_stalled;
    267 	uint8_t		io_priority;
    268 	struct dk_callback io_dk_callback;
    269 	int		io_cmd;
    270 	int		io_retries;
    271 	int		io_error;
    272 	uint32_t	io_numerrors;
    273 	uint32_t	io_pipeline;
    274 	uint32_t	io_orig_pipeline;
    275 	uint64_t	io_children_notready;
    276 	uint64_t	io_children_notdone;
    277 	void		*io_waiter;
    278 	kmutex_t	io_lock;
    279 	kcondvar_t	io_cv;
    280 
    281 	/* FMA state */
    282 	uint64_t	io_ena;
    283 };
    284 
    285 extern zio_t *zio_null(zio_t *pio, spa_t *spa,
    286     zio_done_func_t *done, void *private, int flags);
    287 
    288 extern zio_t *zio_root(spa_t *spa,
    289     zio_done_func_t *done, void *private, int flags);
    290 
    291 extern zio_t *zio_read(zio_t *pio, spa_t *spa, blkptr_t *bp, void *data,
    292     uint64_t size, zio_done_func_t *done, void *private,
    293     int priority, int flags, zbookmark_t *zb);
    294 
    295 extern zio_t *zio_write(zio_t *pio, spa_t *spa, int checksum, int compress,
    296     int ncopies, uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
    297     zio_done_func_t *ready, zio_done_func_t *done, void *private, int priority,
    298     int flags, zbookmark_t *zb);
    299 
    300 extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, int checksum,
    301     uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
    302     zio_done_func_t *done, void *private, int priority, int flags,
    303     zbookmark_t *zb);
    304 
    305 extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
    306     zio_done_func_t *done, void *private);
    307 
    308 extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
    309     zio_done_func_t *done, void *private);
    310 
    311 extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
    312     zio_done_func_t *done, void *private, int priority, int flags);
    313 
    314 extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
    315     uint64_t size, void *data, int checksum,
    316     zio_done_func_t *done, void *private, int priority, int flags,
    317     boolean_t labels);
    318 
    319 extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
    320     uint64_t size, void *data, int checksum,
    321     zio_done_func_t *done, void *private, int priority, int flags,
    322     boolean_t labels);
    323 
    324 extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
    325     blkptr_t *old_bp, uint64_t txg);
    326 extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
    327 extern void zio_flush(zio_t *zio, vdev_t *vd);
    328 
    329 extern int zio_wait(zio_t *zio);
    330 extern void zio_nowait(zio_t *zio);
    331 extern void zio_execute(zio_t *zio);
    332 extern void zio_interrupt(zio_t *zio);
    333 
    334 extern int zio_wait_for_children_ready(zio_t *zio);
    335 extern int zio_wait_for_children_done(zio_t *zio);
    336 
    337 extern void *zio_buf_alloc(size_t size);
    338 extern void zio_buf_free(void *buf, size_t size);
    339 extern void *zio_data_buf_alloc(size_t size);
    340 extern void zio_data_buf_free(void *buf, size_t size);
    341 
    342 extern void zio_resubmit_stage_async(void *);
    343 
    344 /*
    345  * Delegate I/O to a child vdev.
    346  */
    347 extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
    348     uint64_t offset, void *data, uint64_t size, int type, int priority,
    349     int flags, zio_done_func_t *done, void *private);
    350 
    351 extern void zio_vdev_io_bypass(zio_t *zio);
    352 extern void zio_vdev_io_reissue(zio_t *zio);
    353 extern void zio_vdev_io_redone(zio_t *zio);
    354 
    355 extern void zio_checksum_verified(zio_t *zio);
    356 extern void zio_set_gang_verifier(zio_t *zio, zio_cksum_t *zcp);
    357 
    358 extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
    359 extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
    360 
    361 extern boolean_t zio_should_retry(zio_t *zio);
    362 extern int zio_vdev_resume_io(spa_t *);
    363 
    364 /*
    365  * Initial setup and teardown.
    366  */
    367 extern void zio_init(void);
    368 extern void zio_fini(void);
    369 
    370 /*
    371  * Fault injection
    372  */
    373 struct zinject_record;
    374 extern uint32_t zio_injection_enabled;
    375 extern int zio_inject_fault(char *name, int flags, int *id,
    376     struct zinject_record *record);
    377 extern int zio_inject_list_next(int *id, char *name, size_t buflen,
    378     struct zinject_record *record);
    379 extern int zio_clear_fault(int id);
    380 extern int zio_handle_fault_injection(zio_t *zio, int error);
    381 extern int zio_handle_device_injection(vdev_t *vd, int error);
    382 
    383 #ifdef	__cplusplus
    384 }
    385 #endif
    386 
    387 #endif	/* _ZIO_H */
    388