Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"@(#)fmd_ckpt.c	1.6	07/11/16 SMI"
     28 
     29 #include <sys/types.h>
     30 #include <sys/mkdev.h>
     31 #include <sys/stat.h>
     32 
     33 #include <strings.h>
     34 #include <unistd.h>
     35 #include <limits.h>
     36 #include <fcntl.h>
     37 
     38 #include <fmd_module.h>
     39 #include <fmd_error.h>
     40 #include <fmd_alloc.h>
     41 #include <fmd_case.h>
     42 #include <fmd_serd.h>
     43 #include <fmd_subr.h>
     44 #include <fmd_conf.h>
     45 #include <fmd_event.h>
     46 #include <fmd_log.h>
     47 #include <fmd_api.h>
     48 #include <fmd_ckpt.h>
     49 
     50 #include <fmd.h>
     51 
     52 #define	P2ROUNDUP(x, align)	(-(-(x) & -(align)))
     53 #define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
     54 
     55 /*
     56  * The fmd_ckpt_t structure is used to manage all of the state needed by the
     57  * various subroutines that save and restore checkpoints.  The structure is
     58  * initialized using fmd_ckpt_create() or fmd_ckpt_open() and is destroyed
     59  * by fmd_ckpt_destroy().  Refer to the subroutines below for more details.
     60  */
     61 typedef struct fmd_ckpt {
     62 	char ckp_src[PATH_MAX];	/* ckpt input or output filename */
     63 	char ckp_dst[PATH_MAX];	/* ckpt rename filename */
     64 	uchar_t *ckp_buf;	/* data buffer base address */
     65 	fcf_hdr_t *ckp_hdr;	/* file header pointer */
     66 	uchar_t *ckp_ptr;	/* data buffer pointer */
     67 	size_t ckp_size;	/* data buffer size */
     68 	fcf_sec_t *ckp_secp;	/* section header table pointer */
     69 	fcf_sec_t *ckp_modp;	/* section header for module */
     70 	uint_t ckp_secs;	/* number of sections */
     71 	char *ckp_strs;		/* string table base pointer */
     72 	char *ckp_strp;		/* string table pointer */
     73 	size_t ckp_strn;	/* string table size */
     74 	int ckp_fd;		/* output descriptor */
     75 	fmd_module_t *ckp_mp;	/* checkpoint module */
     76 	void *ckp_arg;		/* private arg for callbacks */
     77 } fmd_ckpt_t;
     78 
     79 typedef struct fmd_ckpt_desc {
     80 	uint64_t secd_size;	/* minimum section size */
     81 	uint32_t secd_entsize;	/* minimum section entry size */
     82 	uint32_t secd_align;	/* section alignment */
     83 } fmd_ckpt_desc_t;
     84 
     85 /*
     86  * Table of FCF section descriptions.  Here we record the minimum size for each
     87  * section (for use during restore) and the expected entry size and alignment
     88  * for each section (for use during both checkpoint and restore).
     89  */
     90 static const fmd_ckpt_desc_t _fmd_ckpt_sections[] = {
     91 { 0, 0, sizeof (uint8_t) },					   /* NONE */
     92 { 1, 0, sizeof (char) },					   /* STRTAB */
     93 { sizeof (fcf_module_t), 0, sizeof (uint32_t) },		   /* MODULE */
     94 { sizeof (fcf_case_t), 0, sizeof (uint32_t) },			   /* CASE */
     95 { sizeof (fcf_buf_t), sizeof (fcf_buf_t), sizeof (uint32_t) },	   /* BUFS */
     96 { 0, 0, _MAX_ALIGNMENT },					   /* BUFFER */
     97 { sizeof (fcf_serd_t), sizeof (fcf_serd_t), sizeof (uint64_t) },   /* SERD */
     98 { sizeof (fcf_event_t), sizeof (fcf_event_t), sizeof (uint64_t) }, /* EVENTS */
     99 { sizeof (fcf_nvl_t), sizeof (fcf_nvl_t), sizeof (uint64_t) },	   /* NVLISTS */
    100 };
    101 
    102 static int
    103 fmd_ckpt_create(fmd_ckpt_t *ckp, fmd_module_t *mp)
    104 {
    105 	const char *dir = mp->mod_ckpt;
    106 	const char *name = mp->mod_name;
    107 	mode_t mode;
    108 
    109 	bzero(ckp, sizeof (fmd_ckpt_t));
    110 	ckp->ckp_mp = mp;
    111 
    112 	ckp->ckp_size = sizeof (fcf_hdr_t);
    113 	ckp->ckp_strn = 1; /* for \0 */
    114 
    115 	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s+", dir, name);
    116 	(void) snprintf(ckp->ckp_dst, PATH_MAX, "%s/%s", dir, name);
    117 
    118 	(void) unlink(ckp->ckp_src);
    119 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.mode", &mode);
    120 	ckp->ckp_fd = open64(ckp->ckp_src, O_WRONLY | O_CREAT | O_EXCL, mode);
    121 
    122 	return (ckp->ckp_fd);
    123 }
    124 
    125 /*PRINTFLIKE2*/
    126 static int
    127 fmd_ckpt_inval(fmd_ckpt_t *ckp, const char *format, ...)
    128 {
    129 	va_list ap;
    130 
    131 	va_start(ap, format);
    132 	fmd_verror(EFMD_CKPT_INVAL, format, ap);
    133 	va_end(ap);
    134 
    135 	fmd_free(ckp->ckp_buf, ckp->ckp_size);
    136 	return (fmd_set_errno(EFMD_CKPT_INVAL));
    137 }
    138 
    139 static int
    140 fmd_ckpt_open(fmd_ckpt_t *ckp, fmd_module_t *mp)
    141 {
    142 	struct stat64 st;
    143 	uint64_t seclen;
    144 	uint_t i;
    145 	int err;
    146 
    147 	bzero(ckp, sizeof (fmd_ckpt_t));
    148 	ckp->ckp_mp = mp;
    149 
    150 	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s",
    151 	    mp->mod_ckpt, mp->mod_name);
    152 
    153 	if ((ckp->ckp_fd = open(ckp->ckp_src, O_RDONLY)) == -1)
    154 		return (-1); /* failed to open checkpoint file */
    155 
    156 	if (fstat64(ckp->ckp_fd, &st) == -1) {
    157 		err = errno;
    158 		(void) close(ckp->ckp_fd);
    159 		return (fmd_set_errno(err));
    160 	}
    161 
    162 	ckp->ckp_buf = fmd_alloc(st.st_size, FMD_SLEEP);
    163 	ckp->ckp_hdr = (void *)ckp->ckp_buf;
    164 	ckp->ckp_size = read(ckp->ckp_fd, ckp->ckp_buf, st.st_size);
    165 
    166 	if (ckp->ckp_size != st.st_size || ckp->ckp_size < sizeof (fcf_hdr_t) ||
    167 	    ckp->ckp_size != ckp->ckp_hdr->fcfh_filesz) {
    168 		err = ckp->ckp_size == (size_t)-1L ? errno : EFMD_CKPT_SHORT;
    169 		fmd_free(ckp->ckp_buf, st.st_size);
    170 		(void) close(ckp->ckp_fd);
    171 		return (fmd_set_errno(err));
    172 	}
    173 
    174 	(void) close(ckp->ckp_fd);
    175 	ckp->ckp_fd = -1;
    176 
    177 	/*
    178 	 * Once we've read in a consistent copy of the FCF file and we're sure
    179 	 * the header can be accessed, go through it and make sure everything
    180 	 * is valid.  We also check that unused bits are zero so we can expand
    181 	 * to use them safely in the future and support old files if needed.
    182 	 */
    183 	if (bcmp(&ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0],
    184 	    FCF_MAG_STRING, FCF_MAG_STRLEN) != 0)
    185 		return (fmd_ckpt_inval(ckp, "bad checkpoint magic string\n"));
    186 
    187 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] != FCF_MODEL_NATIVE)
    188 		return (fmd_ckpt_inval(ckp, "bad checkpoint data model\n"));
    189 
    190 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] != FCF_ENCODE_NATIVE)
    191 		return (fmd_ckpt_inval(ckp, "bad checkpoint data encoding\n"));
    192 
    193 	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] != FCF_VERSION_1) {
    194 		return (fmd_ckpt_inval(ckp, "bad checkpoint version %u\n",
    195 		    ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION]));
    196 	}
    197 
    198 	for (i = FCF_ID_PAD; i < FCF_ID_SIZE; i++) {
    199 		if (ckp->ckp_hdr->fcfh_ident[i] != 0) {
    200 			return (fmd_ckpt_inval(ckp,
    201 			    "bad checkpoint padding at id[%d]", i));
    202 		}
    203 	}
    204 
    205 	if (ckp->ckp_hdr->fcfh_flags & ~FCF_FL_VALID)
    206 		return (fmd_ckpt_inval(ckp, "bad checkpoint flags\n"));
    207 
    208 	if (ckp->ckp_hdr->fcfh_pad != 0)
    209 		return (fmd_ckpt_inval(ckp, "reserved field in use\n"));
    210 
    211 	if (ckp->ckp_hdr->fcfh_hdrsize < sizeof (fcf_hdr_t) ||
    212 	    ckp->ckp_hdr->fcfh_secsize < sizeof (fcf_sec_t)) {
    213 		return (fmd_ckpt_inval(ckp,
    214 		    "bad header and/or section size\n"));
    215 	}
    216 
    217 	seclen = (uint64_t)ckp->ckp_hdr->fcfh_secnum *
    218 	    (uint64_t)ckp->ckp_hdr->fcfh_secsize;
    219 
    220 	if (ckp->ckp_hdr->fcfh_secoff > ckp->ckp_size ||
    221 	    seclen > ckp->ckp_size ||
    222 	    ckp->ckp_hdr->fcfh_secoff + seclen > ckp->ckp_size ||
    223 	    ckp->ckp_hdr->fcfh_secoff + seclen < ckp->ckp_hdr->fcfh_secoff)
    224 		return (fmd_ckpt_inval(ckp, "truncated section headers\n"));
    225 
    226 	if (!IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secoff, sizeof (uint64_t)) ||
    227 	    !IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secsize, sizeof (uint64_t)))
    228 		return (fmd_ckpt_inval(ckp, "misaligned section headers\n"));
    229 
    230 	/*
    231 	 * Once the header is validated, iterate over the section headers
    232 	 * ensuring that each one is valid w.r.t. offset, alignment, and size.
    233 	 * We also pick up the string table pointer during this pass.
    234 	 */
    235 	ckp->ckp_secp = (void *)(ckp->ckp_buf + ckp->ckp_hdr->fcfh_secoff);
    236 	ckp->ckp_secs = ckp->ckp_hdr->fcfh_secnum;
    237 
    238 	for (i = 0; i < ckp->ckp_secs; i++) {
    239 		fcf_sec_t *sp = (void *)(ckp->ckp_buf +
    240 		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
    241 
    242 		const fmd_ckpt_desc_t *dp = &_fmd_ckpt_sections[sp->fcfs_type];
    243 
    244 		if (sp->fcfs_flags != 0) {
    245 			return (fmd_ckpt_inval(ckp, "section %u has invalid "
    246 			    "section flags (0x%x)\n", i, sp->fcfs_flags));
    247 		}
    248 
    249 		if (sp->fcfs_align & (sp->fcfs_align - 1)) {
    250 			return (fmd_ckpt_inval(ckp, "section %u has invalid "
    251 			    "alignment (%u)\n", i, sp->fcfs_align));
    252 		}
    253 
    254 		if (sp->fcfs_offset & (sp->fcfs_align - 1)) {
    255 			return (fmd_ckpt_inval(ckp, "section %u is not properly"
    256 			    " aligned (offset %llu)\n", i, sp->fcfs_offset));
    257 		}
    258 
    259 		if (sp->fcfs_entsize != 0 &&
    260 		    (sp->fcfs_entsize & (sp->fcfs_align - 1)) != 0) {
    261 			return (fmd_ckpt_inval(ckp, "section %u has misaligned "
    262 			    "entsize %u\n", i, sp->fcfs_entsize));
    263 		}
    264 
    265 		if (sp->fcfs_offset > ckp->ckp_size ||
    266 		    sp->fcfs_size > ckp->ckp_size ||
    267 		    sp->fcfs_offset + sp->fcfs_size > ckp->ckp_size ||
    268 		    sp->fcfs_offset + sp->fcfs_size < sp->fcfs_offset) {
    269 			return (fmd_ckpt_inval(ckp, "section %u has corrupt "
    270 			    "size or offset\n", i));
    271 		}
    272 
    273 		if (sp->fcfs_type >= sizeof (_fmd_ckpt_sections) /
    274 		    sizeof (_fmd_ckpt_sections[0])) {
    275 			return (fmd_ckpt_inval(ckp, "section %u has unknown "
    276 			    "section type %u\n", i, sp->fcfs_type));
    277 		}
    278 
    279 		if (sp->fcfs_align != dp->secd_align) {
    280 			return (fmd_ckpt_inval(ckp, "section %u has align %u "
    281 			    "(not %u)\n", i, sp->fcfs_align, dp->secd_align));
    282 		}
    283 
    284 		if (sp->fcfs_size < dp->secd_size ||
    285 		    sp->fcfs_entsize < dp->secd_entsize) {
    286 			return (fmd_ckpt_inval(ckp, "section %u has short "
    287 			    "size or entsize\n", i));
    288 		}
    289 
    290 		switch (sp->fcfs_type) {
    291 		case FCF_SECT_STRTAB:
    292 			if (ckp->ckp_strs != NULL) {
    293 				return (fmd_ckpt_inval(ckp, "multiple string "
    294 				    "tables are present in checkpoint file\n"));
    295 			}
    296 
    297 			ckp->ckp_strs = (char *)ckp->ckp_buf + sp->fcfs_offset;
    298 			ckp->ckp_strn = sp->fcfs_size;
    299 
    300 			if (ckp->ckp_strs[ckp->ckp_strn - 1] != '\0') {
    301 				return (fmd_ckpt_inval(ckp, "string table %u "
    302 				    "is missing terminating nul byte\n", i));
    303 			}
    304 			break;
    305 
    306 		case FCF_SECT_MODULE:
    307 			if (ckp->ckp_modp != NULL) {
    308 				return (fmd_ckpt_inval(ckp, "multiple module "
    309 				    "sects are present in checkpoint file\n"));
    310 			}
    311 			ckp->ckp_modp = sp;
    312 			break;
    313 		}
    314 	}
    315 
    316 	/*
    317 	 * Ensure that the first section is an empty one of type FCF_SECT_NONE.
    318 	 * This is done to ensure that links can use index 0 as a null section.
    319 	 */
    320 	if (ckp->ckp_secs == 0 || ckp->ckp_secp->fcfs_type != FCF_SECT_NONE ||
    321 	    ckp->ckp_secp->fcfs_entsize != 0 || ckp->ckp_secp->fcfs_size != 0) {
    322 		return (fmd_ckpt_inval(ckp, "section 0 is not of the "
    323 		    "appropriate size and/or attributes (SECT_NONE)\n"));
    324 	}
    325 
    326 	if (ckp->ckp_modp == NULL) {
    327 		return (fmd_ckpt_inval(ckp,
    328 		    "no module section found in file\n"));
    329 	}
    330 
    331 	return (0);
    332 }
    333 
    334 static void
    335 fmd_ckpt_destroy(fmd_ckpt_t *ckp)
    336 {
    337 	if (ckp->ckp_buf != NULL)
    338 		fmd_free(ckp->ckp_buf, ckp->ckp_size);
    339 	if (ckp->ckp_fd >= 0)
    340 		(void) close(ckp->ckp_fd);
    341 }
    342 
    343 /*
    344  * fmd_ckpt_error() is used as a wrapper around fmd_error() for ckpt routines.
    345  * It calls fmd_module_unlock() on behalf of its caller, logs the error, and
    346  * then aborts the API call and the surrounding module entry point by doing an
    347  * fmd_module_abort(), which longjmps to the place where we entered the module.
    348  * Depending on the type of error and conf settings, we will reset or fail.
    349  */
    350 /*PRINTFLIKE3*/
    351 static void
    352 fmd_ckpt_error(fmd_ckpt_t *ckp, int err, const char *format, ...)
    353 {
    354 	fmd_module_t *mp = ckp->ckp_mp;
    355 	va_list ap;
    356 
    357 	va_start(ap, format);
    358 	fmd_verror(err, format, ap);
    359 	va_end(ap);
    360 
    361 	if (fmd_module_locked(mp))
    362 		fmd_module_unlock(mp);
    363 
    364 	fmd_ckpt_destroy(ckp);
    365 	fmd_module_abort(mp, err);
    366 }
    367 
    368 static fcf_secidx_t
    369 fmd_ckpt_section(fmd_ckpt_t *ckp, const void *data, uint_t type, uint64_t size)
    370 {
    371 	const fmd_ckpt_desc_t *dp;
    372 
    373 	ASSERT(type < sizeof (_fmd_ckpt_sections) / sizeof (fmd_ckpt_desc_t));
    374 	dp = &_fmd_ckpt_sections[type];
    375 
    376 	ckp->ckp_ptr = (uchar_t *)
    377 	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, dp->secd_align);
    378 
    379 	ckp->ckp_secp->fcfs_type = type;
    380 	ckp->ckp_secp->fcfs_align = dp->secd_align;
    381 	ckp->ckp_secp->fcfs_flags = 0;
    382 	ckp->ckp_secp->fcfs_entsize = dp->secd_entsize;
    383 	ckp->ckp_secp->fcfs_offset = (size_t)(ckp->ckp_ptr - ckp->ckp_buf);
    384 	ckp->ckp_secp->fcfs_size = size;
    385 
    386 	/*
    387 	 * If the data pointer is non-NULL, copy the data to our buffer; else
    388 	 * the caller is responsible for doing so and updating ckp->ckp_ptr.
    389 	 */
    390 	if (data != NULL) {
    391 		bcopy(data, ckp->ckp_ptr, size);
    392 		ckp->ckp_ptr += size;
    393 	}
    394 
    395 	ckp->ckp_secp++;
    396 	return (ckp->ckp_secs++);
    397 }
    398 
    399 static fcf_stridx_t
    400 fmd_ckpt_string(fmd_ckpt_t *ckp, const char *s)
    401 {
    402 	fcf_stridx_t idx = (fcf_stridx_t)(ckp->ckp_strp - ckp->ckp_strs);
    403 
    404 	(void) strcpy(ckp->ckp_strp, s);
    405 	ckp->ckp_strp += strlen(s) + 1;
    406 
    407 	return (idx);
    408 }
    409 
    410 static int
    411 fmd_ckpt_alloc(fmd_ckpt_t *ckp, uint64_t gen)
    412 {
    413 	/*
    414 	 * We've added up all the sections by now: add two more for SECT_NONE
    415 	 * and SECT_STRTAB, and add the size of the section header table and
    416 	 * string table to the total size.  We know that the fcf_hdr_t is
    417 	 * aligned so that that fcf_sec_t's can follow it, and that fcf_sec_t
    418 	 * is aligned so that any section can follow it, so no extra padding
    419 	 * bytes need to be allocated between any of these items.
    420 	 */
    421 	ckp->ckp_secs += 2; /* for FCF_SECT_NONE and FCF_SECT_STRTAB */
    422 	ckp->ckp_size += sizeof (fcf_sec_t) * ckp->ckp_secs;
    423 	ckp->ckp_size += ckp->ckp_strn;
    424 
    425 	TRACE((FMD_DBG_CKPT, "alloc fcf buf size %u", ckp->ckp_size));
    426 	ckp->ckp_buf = fmd_zalloc(ckp->ckp_size, FMD_NOSLEEP);
    427 
    428 	if (ckp->ckp_buf == NULL)
    429 		return (-1); /* errno is set for us */
    430 
    431 	ckp->ckp_hdr = (void *)ckp->ckp_buf;
    432 
    433 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0] = FCF_MAG_MAG0;
    434 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG1] = FCF_MAG_MAG1;
    435 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG2] = FCF_MAG_MAG2;
    436 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG3] = FCF_MAG_MAG3;
    437 	ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] = FCF_MODEL_NATIVE;
    438 	ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] = FCF_ENCODE_NATIVE;
    439 	ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] = FCF_VERSION;
    440 
    441 	ckp->ckp_hdr->fcfh_hdrsize = sizeof (fcf_hdr_t);
    442 	ckp->ckp_hdr->fcfh_secsize = sizeof (fcf_sec_t);
    443 	ckp->ckp_hdr->fcfh_secnum = ckp->ckp_secs;
    444 	ckp->ckp_hdr->fcfh_secoff = sizeof (fcf_hdr_t);
    445 	ckp->ckp_hdr->fcfh_filesz = ckp->ckp_size;
    446 	ckp->ckp_hdr->fcfh_cgen = gen;
    447 
    448 	ckp->ckp_secs = 0; /* reset section counter for second pass */
    449 	ckp->ckp_secp = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
    450 	ckp->ckp_strs = (char *)ckp->ckp_buf + ckp->ckp_size - ckp->ckp_strn;
    451 	ckp->ckp_strp = ckp->ckp_strs + 1; /* use first byte as \0 */
    452 	ckp->ckp_ptr = (uchar_t *)(ckp->ckp_secp + ckp->ckp_hdr->fcfh_secnum);
    453 
    454 	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_NONE, 0);
    455 	return (0);
    456 }
    457 
    458 static int
    459 fmd_ckpt_commit(fmd_ckpt_t *ckp)
    460 {
    461 	fcf_sec_t *secbase = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
    462 	size_t stroff = ckp->ckp_size - ckp->ckp_strn;
    463 
    464 	/*
    465 	 * Before committing the checkpoint, we assert that fmd_ckpt_t's sizes
    466 	 * and current pointer locations all add up appropriately.  Any ASSERTs
    467 	 * which trip here likely indicate an inconsistency in the code for the
    468 	 * reservation pass and the buffer update pass of the FCF subroutines.
    469 	 */
    470 	ASSERT((size_t)(ckp->ckp_ptr - ckp->ckp_buf) == stroff);
    471 	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_STRTAB, ckp->ckp_strn);
    472 	ckp->ckp_ptr += ckp->ckp_strn; /* string table is already filled in */
    473 
    474 	ASSERT(ckp->ckp_secs == ckp->ckp_hdr->fcfh_secnum);
    475 	ASSERT(ckp->ckp_secp == secbase + ckp->ckp_hdr->fcfh_secnum);
    476 	ASSERT(ckp->ckp_ptr == ckp->ckp_buf + ckp->ckp_hdr->fcfh_filesz);
    477 
    478 	if (write(ckp->ckp_fd, ckp->ckp_buf, ckp->ckp_size) != ckp->ckp_size ||
    479 	    fsync(ckp->ckp_fd) != 0 || close(ckp->ckp_fd) != 0)
    480 		return (-1); /* errno is set for us */
    481 
    482 	ckp->ckp_fd = -1; /* fd is now closed */
    483 	return (rename(ckp->ckp_src, ckp->ckp_dst) != 0);
    484 }
    485 
    486 static void
    487 fmd_ckpt_resv(fmd_ckpt_t *ckp, size_t size, size_t align)
    488 {
    489 	if (size != 0) {
    490 		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, align) + size;
    491 		ckp->ckp_secs++;
    492 	}
    493 }
    494 
    495 static void
    496 fmd_ckpt_resv_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
    497 {
    498 	ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, _MAX_ALIGNMENT) + bp->buf_size;
    499 	ckp->ckp_strn += strlen(bp->buf_name) + 1;
    500 	ckp->ckp_secs++;
    501 }
    502 
    503 static void
    504 fmd_ckpt_save_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
    505 {
    506 	fcf_buf_t *fcfb = ckp->ckp_arg;
    507 
    508 	fcfb->fcfb_name = fmd_ckpt_string(ckp, bp->buf_name);
    509 	fcfb->fcfb_data = fmd_ckpt_section(ckp,
    510 	    bp->buf_data, FCF_SECT_BUFFER, bp->buf_size);
    511 
    512 	ckp->ckp_arg = fcfb + 1;
    513 }
    514 
    515 static void
    516 fmd_ckpt_save_event(fmd_ckpt_t *ckp, fmd_event_t *e)
    517 {
    518 	fcf_event_t *fcfe = (void *)ckp->ckp_ptr;
    519 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
    520 	fmd_log_t *lp = ep->ev_log;
    521 
    522 	fcfe->fcfe_todsec = ep->ev_time.ftv_sec;
    523 	fcfe->fcfe_todnsec = ep->ev_time.ftv_nsec;
    524 	fcfe->fcfe_major = lp ? major(lp->log_stat.st_dev) : -1U;
    525 	fcfe->fcfe_minor = lp ? minor(lp->log_stat.st_dev) : -1U;
    526 	fcfe->fcfe_inode = lp ? lp->log_stat.st_ino : -1ULL;
    527 	fcfe->fcfe_offset = ep->ev_off;
    528 
    529 	ckp->ckp_ptr += sizeof (fcf_event_t);
    530 }
    531 
    532 static void
    533 fmd_ckpt_save_nvlist(fmd_ckpt_t *ckp, nvlist_t *nvl)
    534 {
    535 	fcf_nvl_t *fcfn = (void *)ckp->ckp_ptr;
    536 	char *nvbuf = (char *)ckp->ckp_ptr + sizeof (fcf_nvl_t);
    537 	size_t nvsize = 0;
    538 
    539 	(void) nvlist_size(nvl, &nvsize, NV_ENCODE_NATIVE);
    540 	fcfn->fcfn_size = (uint64_t)nvsize;
    541 
    542 	(void) nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_NATIVE, 0);
    543 	ckp->ckp_ptr += sizeof (fcf_nvl_t) + nvsize;
    544 
    545 	ckp->ckp_ptr = (uchar_t *)
    546 	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, sizeof (uint64_t));
    547 }
    548 
    549 static void
    550 fmd_ckpt_resv_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
    551 {
    552 	fmd_ckpt_resv(ckp,
    553 	    sizeof (fcf_event_t) * sgp->sg_count, sizeof (uint64_t));
    554 
    555 	ckp->ckp_strn += strlen(sgp->sg_name) + 1;
    556 }
    557 
    558 static void
    559 fmd_ckpt_save_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
    560 {
    561 	fcf_serd_t *fcfd = ckp->ckp_arg;
    562 	fcf_secidx_t evsec = FCF_SECT_NONE;
    563 	fmd_serd_elem_t *sep;
    564 
    565 	if (sgp->sg_count != 0) {
    566 		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
    567 		    sizeof (fcf_event_t) * sgp->sg_count);
    568 
    569 		for (sep = fmd_list_next(&sgp->sg_list);
    570 		    sep != NULL; sep = fmd_list_next(sep))
    571 			fmd_ckpt_save_event(ckp, sep->se_event);
    572 	}
    573 
    574 	fcfd->fcfd_name = fmd_ckpt_string(ckp, sgp->sg_name);
    575 	fcfd->fcfd_events = evsec;
    576 	fcfd->fcfd_pad = 0;
    577 	fcfd->fcfd_n = sgp->sg_n;
    578 	fcfd->fcfd_t = sgp->sg_t;
    579 
    580 	ckp->ckp_arg = fcfd + 1;
    581 }
    582 
    583 static void
    584 fmd_ckpt_resv_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
    585 {
    586 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
    587 	fmd_case_susp_t *cis;
    588 	uint_t n;
    589 
    590 	if (cip->ci_xprt != NULL)
    591 		return; /* do not checkpoint cases from remote transports */
    592 
    593 	n = fmd_buf_hash_count(&cip->ci_bufs);
    594 	fmd_buf_hash_apply(&cip->ci_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
    595 	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
    596 
    597 	if (cip->ci_principal != NULL)
    598 		fmd_ckpt_resv(ckp, sizeof (fcf_event_t), sizeof (uint64_t));
    599 
    600 	fmd_ckpt_resv(ckp,
    601 	    sizeof (fcf_event_t) * cip->ci_nitems, sizeof (uint64_t));
    602 
    603 	if (cip->ci_nsuspects != 0)
    604 		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, sizeof (uint64_t));
    605 
    606 	cip->ci_nvsz = 0; /* compute size of packed suspect nvlist array */
    607 
    608 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
    609 		size_t nvsize = 0;
    610 
    611 		(void) nvlist_size(cis->cis_nvl, &nvsize, NV_ENCODE_NATIVE);
    612 		cip->ci_nvsz += sizeof (fcf_nvl_t) + nvsize;
    613 		cip->ci_nvsz = P2ROUNDUP(cip->ci_nvsz, sizeof (uint64_t));
    614 	}
    615 
    616 	fmd_ckpt_resv(ckp, cip->ci_nvsz, sizeof (uint64_t));
    617 	fmd_ckpt_resv(ckp, sizeof (fcf_case_t), sizeof (uint32_t));
    618 	ckp->ckp_strn += strlen(cip->ci_uuid) + 1;
    619 }
    620 
    621 static void
    622 fmd_ckpt_save_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
    623 {
    624 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
    625 
    626 	fmd_case_item_t *cit;
    627 	fmd_case_susp_t *cis;
    628 	fcf_case_t fcfc;
    629 	uint_t n;
    630 
    631 	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
    632 	fcf_secidx_t evsec = FCF_SECIDX_NONE;
    633 	fcf_secidx_t nvsec = FCF_SECIDX_NONE;
    634 	fcf_secidx_t prsec = FCF_SECIDX_NONE;
    635 
    636 	if (cip->ci_xprt != NULL)
    637 		return; /* do not checkpoint cases from remote transports */
    638 
    639 	if ((n = fmd_buf_hash_count(&cip->ci_bufs)) != 0) {
    640 		size_t size = sizeof (fcf_buf_t) * n;
    641 		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
    642 
    643 		fmd_buf_hash_apply(&cip->ci_bufs,
    644 		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
    645 
    646 		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
    647 		fmd_free(bufs, size);
    648 	}
    649 
    650 	if (cip->ci_principal != NULL) {
    651 		prsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
    652 		    sizeof (fcf_event_t));
    653 
    654 		fmd_ckpt_save_event(ckp, cip->ci_principal);
    655 	}
    656 
    657 	if (cip->ci_nitems != 0) {
    658 		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
    659 		    sizeof (fcf_event_t) * cip->ci_nitems);
    660 
    661 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
    662 			fmd_ckpt_save_event(ckp, cit->cit_event);
    663 	}
    664 
    665 	if (cip->ci_nsuspects != 0) {
    666 		nvsec = fmd_ckpt_section(ckp, NULL,
    667 		    FCF_SECT_NVLISTS, cip->ci_nvsz);
    668 
    669 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
    670 			fmd_ckpt_save_nvlist(ckp, cis->cis_nvl);
    671 	}
    672 
    673 	fcfc.fcfc_uuid = fmd_ckpt_string(ckp, cip->ci_uuid);
    674 	fcfc.fcfc_bufs = bufsec;
    675 	fcfc.fcfc_principal = prsec;
    676 	fcfc.fcfc_events = evsec;
    677 	fcfc.fcfc_suspects = nvsec;
    678 
    679 	switch (cip->ci_state) {
    680 	case FMD_CASE_UNSOLVED:
    681 		fcfc.fcfc_state = FCF_CASE_UNSOLVED;
    682 		break;
    683 	case FMD_CASE_SOLVED:
    684 		fcfc.fcfc_state = FCF_CASE_SOLVED;
    685 		break;
    686 	case FMD_CASE_CLOSE_WAIT:
    687 		fcfc.fcfc_state = FCF_CASE_CLOSE_WAIT;
    688 		break;
    689 	default:
    690 		fmd_panic("case %p (%s) has invalid state %u",
    691 		    (void *)cp, cip->ci_uuid, cip->ci_state);
    692 	}
    693 
    694 	(void) fmd_ckpt_section(ckp, &fcfc, FCF_SECT_CASE, sizeof (fcf_case_t));
    695 }
    696 
    697 static void
    698 fmd_ckpt_resv_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
    699 {
    700 	fmd_case_t *cp;
    701 	uint_t n;
    702 
    703 	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
    704 		fmd_ckpt_resv_case(ckp, cp);
    705 
    706 	n = fmd_serd_hash_count(&mp->mod_serds);
    707 	fmd_serd_hash_apply(&mp->mod_serds,
    708 	    (fmd_serd_eng_f *)fmd_ckpt_resv_serd, ckp);
    709 	fmd_ckpt_resv(ckp, sizeof (fcf_serd_t) * n, sizeof (uint64_t));
    710 
    711 	n = fmd_buf_hash_count(&mp->mod_bufs);
    712 	fmd_buf_hash_apply(&mp->mod_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
    713 	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
    714 
    715 	fmd_ckpt_resv(ckp, sizeof (fcf_module_t), sizeof (uint32_t));
    716 	ckp->ckp_strn += strlen(mp->mod_name) + 1;
    717 	ckp->ckp_strn += strlen(mp->mod_path) + 1;
    718 	ckp->ckp_strn += strlen(mp->mod_info->fmdi_desc) + 1;
    719 	ckp->ckp_strn += strlen(mp->mod_info->fmdi_vers) + 1;
    720 }
    721 
    722 static void
    723 fmd_ckpt_save_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
    724 {
    725 	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
    726 	fcf_module_t fcfm;
    727 	fmd_case_t *cp;
    728 	uint_t n;
    729 
    730 	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
    731 		fmd_ckpt_save_case(ckp, cp);
    732 
    733 	if ((n = fmd_serd_hash_count(&mp->mod_serds)) != 0) {
    734 		size_t size = sizeof (fcf_serd_t) * n;
    735 		fcf_serd_t *serds = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
    736 
    737 		fmd_serd_hash_apply(&mp->mod_serds,
    738 		    (fmd_serd_eng_f *)fmd_ckpt_save_serd, ckp);
    739 
    740 		(void) fmd_ckpt_section(ckp, serds, FCF_SECT_SERD, size);
    741 		fmd_free(serds, size);
    742 	}
    743 
    744 	if ((n = fmd_buf_hash_count(&mp->mod_bufs)) != 0) {
    745 		size_t size = sizeof (fcf_buf_t) * n;
    746 		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
    747 
    748 		fmd_buf_hash_apply(&mp->mod_bufs,
    749 		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
    750 
    751 		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
    752 		fmd_free(bufs, size);
    753 	}
    754 
    755 	fcfm.fcfm_name = fmd_ckpt_string(ckp, mp->mod_name);
    756 	fcfm.fcfm_path = fmd_ckpt_string(ckp, mp->mod_path);
    757 	fcfm.fcfm_desc = fmd_ckpt_string(ckp, mp->mod_info->fmdi_desc);
    758 	fcfm.fcfm_vers = fmd_ckpt_string(ckp, mp->mod_info->fmdi_vers);
    759 	fcfm.fcfm_bufs = bufsec;
    760 
    761 	(void) fmd_ckpt_section(ckp, &fcfm,
    762 	    FCF_SECT_MODULE, sizeof (fcf_module_t));
    763 }
    764 
    765 void
    766 fmd_ckpt_save(fmd_module_t *mp)
    767 {
    768 	struct stat64 st;
    769 	char path[PATH_MAX];
    770 	mode_t dirmode;
    771 
    772 	hrtime_t now = gethrtime();
    773 	fmd_ckpt_t ckp;
    774 	int err;
    775 
    776 	ASSERT(fmd_module_locked(mp));
    777 
    778 	/*
    779 	 * If checkpointing is disabled for the module, just return.  We must
    780 	 * commit the module state anyway to transition pending log events.
    781 	 */
    782 	if (mp->mod_stats->ms_ckpt_save.fmds_value.bool == FMD_B_FALSE) {
    783 		fmd_module_commit(mp);
    784 		return;
    785 	}
    786 
    787 	if (!(mp->mod_flags & (FMD_MOD_MDIRTY | FMD_MOD_CDIRTY)))
    788 		return; /* no checkpoint is necessary for this module */
    789 
    790 	TRACE((FMD_DBG_CKPT, "ckpt save begin %s %llu",
    791 	    mp->mod_name, mp->mod_gen + 1));
    792 
    793 	/*
    794 	 * If the per-module checkpoint directory isn't found or isn't of type
    795 	 * directory, move aside whatever is there (if anything) and attempt
    796 	 * to mkdir(2) a new module checkpoint directory.  If this fails, we
    797 	 * have no choice but to abort the checkpoint and try again later.
    798 	 */
    799 	if (stat64(mp->mod_ckpt, &st) != 0 || !S_ISDIR(st.st_mode)) {
    800 		(void) snprintf(path, sizeof (path), "%s-", mp->mod_ckpt);
    801 		(void) rename(mp->mod_ckpt, path);
    802 		(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dirmode", &dirmode);
    803 
    804 		if (mkdir(mp->mod_ckpt, dirmode) != 0) {
    805 			fmd_error(EFMD_CKPT_MKDIR,
    806 			    "failed to mkdir %s", mp->mod_ckpt);
    807 			return; /* return without clearing dirty bits */
    808 		}
    809 	}
    810 
    811 	/*
    812 	 * Create a temporary file to write out the checkpoint into, and create
    813 	 * a fmd_ckpt_t structure to manage construction of the checkpoint.  We
    814 	 * then figure out how much space will be required, and allocate it.
    815 	 */
    816 	if (fmd_ckpt_create(&ckp, mp) == -1) {
    817 		fmd_error(EFMD_CKPT_CREATE, "failed to create %s", ckp.ckp_src);
    818 		return;
    819 	}
    820 
    821 	fmd_ckpt_resv_module(&ckp, mp);
    822 
    823 	if (fmd_ckpt_alloc(&ckp, mp->mod_gen + 1) != 0) {
    824 		fmd_error(EFMD_CKPT_NOMEM, "failed to build %s", ckp.ckp_src);
    825 		fmd_ckpt_destroy(&ckp);
    826 		return;
    827 	}
    828 
    829 	/*
    830 	 * Fill in the checkpoint content, write it to disk, sync it, and then
    831 	 * atomically rename it to the destination path.  If this fails, we
    832 	 * have no choice but to leave all our dirty bits set and return.
    833 	 */
    834 	fmd_ckpt_save_module(&ckp, mp);
    835 	err = fmd_ckpt_commit(&ckp);
    836 	fmd_ckpt_destroy(&ckp);
    837 
    838 	if (err != 0) {
    839 		fmd_error(EFMD_CKPT_COMMIT, "failed to commit %s", ckp.ckp_dst);
    840 		return; /* return without clearing dirty bits */
    841 	}
    842 
    843 	fmd_module_commit(mp);
    844 	TRACE((FMD_DBG_CKPT, "ckpt save end %s", mp->mod_name));
    845 
    846 	mp->mod_stats->ms_ckpt_cnt.fmds_value.ui64++;
    847 	mp->mod_stats->ms_ckpt_time.fmds_value.ui64 += gethrtime() - now;
    848 
    849 	fmd_dprintf(FMD_DBG_CKPT, "saved checkpoint of %s (%llu)\n",
    850 	    mp->mod_name, mp->mod_gen);
    851 }
    852 
    853 /*
    854  * Utility function to retrieve a pointer to a section's header and verify that
    855  * it is of the expected type or it is a FCF_SECT_NONE reference.
    856  */
    857 static const fcf_sec_t *
    858 fmd_ckpt_secptr(fmd_ckpt_t *ckp, fcf_secidx_t sid, uint_t type)
    859 {
    860 	const fcf_sec_t *sp = (void *)(ckp->ckp_buf +
    861 	    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * sid);
    862 
    863 	return (sid < ckp->ckp_secs && (sp->fcfs_type == type ||
    864 	    sp->fcfs_type == FCF_SECT_NONE) ? sp : NULL);
    865 }
    866 
    867 /*
    868  * Utility function to retrieve the data pointer for a particular section.  The
    869  * validity of the header values has already been checked by fmd_ckpt_open().
    870  */
    871 static const void *
    872 fmd_ckpt_dataptr(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
    873 {
    874 	return (ckp->ckp_buf + sp->fcfs_offset);
    875 }
    876 
    877 /*
    878  * Utility function to retrieve the end of the data region for a particular
    879  * section.  The validity of this value has been confirmed by fmd_ckpt_open().
    880  */
    881 static const void *
    882 fmd_ckpt_datalim(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
    883 {
    884 	return (ckp->ckp_buf + sp->fcfs_offset + sp->fcfs_size);
    885 }
    886 
    887 /*
    888  * Utility function to retrieve a string pointer (fcf_stridx_t).  If the string
    889  * index is valid, the string data is returned; otherwise 'defstr' is returned.
    890  */
    891 static const char *
    892 fmd_ckpt_strptr(fmd_ckpt_t *ckp, fcf_stridx_t sid, const char *defstr)
    893 {
    894 	return (sid < ckp->ckp_strn ? ckp->ckp_strs + sid : defstr);
    895 }
    896 
    897 static void
    898 fmd_ckpt_restore_events(fmd_ckpt_t *ckp, fcf_secidx_t sid,
    899     void (*func)(void *, fmd_event_t *), void *arg)
    900 {
    901 	const fcf_event_t *fcfe;
    902 	const fcf_sec_t *sp;
    903 	fmd_timeval_t ftv;
    904 	fmd_log_t *lp, *errlp;
    905 	uint_t i, n;
    906 	uint32_t e_maj, e_min;
    907 	uint64_t e_ino;
    908 
    909 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_EVENTS)) == NULL) {
    910 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
    911 		    "invalid link to section %u: expected events\n", sid);
    912 	}
    913 
    914 	if (sp->fcfs_size == 0)
    915 		return; /* empty events section or type none */
    916 
    917 	fcfe = fmd_ckpt_dataptr(ckp, sp);
    918 	n = sp->fcfs_size / sp->fcfs_entsize;
    919 
    920 	/*
    921 	 * Hold the reader lock on log pointers to block log rotation during
    922 	 * the section restore so that we can safely insert refs to d_errlog.
    923 	 */
    924 	(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
    925 	errlp = fmd.d_errlog;
    926 
    927 	e_maj = major(errlp->log_stat.st_dev);
    928 	e_min = minor(errlp->log_stat.st_dev);
    929 	e_ino = errlp->log_stat.st_ino;
    930 
    931 	for (i = 0; i < n; i++) {
    932 		fmd_event_t *ep;
    933 
    934 		ftv.ftv_sec = fcfe->fcfe_todsec;
    935 		ftv.ftv_nsec = fcfe->fcfe_todnsec;
    936 
    937 		if (e_ino == fcfe->fcfe_inode &&
    938 		    e_maj == fcfe->fcfe_major &&
    939 		    e_min == fcfe->fcfe_minor)
    940 			lp = errlp;
    941 		else
    942 			lp = NULL;
    943 
    944 		ep = fmd_event_recreate(FMD_EVT_PROTOCOL,
    945 		    &ftv, NULL, NULL, lp, fcfe->fcfe_offset, 0);
    946 		fmd_event_hold(ep);
    947 		func(arg, ep);
    948 		fmd_event_rele(ep);
    949 
    950 		fcfe = (fcf_event_t *)((uintptr_t)fcfe + sp->fcfs_entsize);
    951 	}
    952 
    953 	(void) pthread_rwlock_unlock(&fmd.d_log_lock);
    954 }
    955 
    956 static int
    957 fmd_ckpt_restore_suspects(fmd_ckpt_t *ckp, fmd_case_t *cp, fcf_secidx_t sid)
    958 {
    959 	const fcf_nvl_t *fcfn, *endn;
    960 	const fcf_sec_t *sp;
    961 	nvlist_t *nvl;
    962 	int err, i;
    963 
    964 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_NVLISTS)) == NULL) {
    965 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
    966 		    "invalid link to section %u: expected nvlists\n", sid);
    967 	}
    968 
    969 	fcfn = fmd_ckpt_dataptr(ckp, sp);
    970 	endn = fmd_ckpt_datalim(ckp, sp);
    971 
    972 	for (i = 0; fcfn < endn; i++) {
    973 		char *data = (char *)fcfn + sp->fcfs_entsize;
    974 		size_t size = (size_t)fcfn->fcfn_size;
    975 
    976 		if (fcfn->fcfn_size > (size_t)((char *)endn - data)) {
    977 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "nvlist %u [%d] "
    978 			    "size %u exceeds buffer\n", sid, i, size);
    979 		}
    980 
    981 		if ((err = nvlist_xunpack(data, size, &nvl, &fmd.d_nva)) != 0) {
    982 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "failed to "
    983 			    "unpack nvlist %u [%d]: %s\n", sid, i,
    984 			    fmd_strerror(err));
    985 		}
    986 
    987 		fmd_case_insert_suspect(cp, nvl);
    988 
    989 		size = sp->fcfs_entsize + fcfn->fcfn_size;
    990 		size = P2ROUNDUP(size, sizeof (uint64_t));
    991 		fcfn = (fcf_nvl_t *)((uintptr_t)fcfn + size);
    992 	}
    993 
    994 	return (i);
    995 }
    996 
    997 static void
    998 fmd_ckpt_restore_bufs(fmd_ckpt_t *ckp, fmd_module_t *mp,
    999     fmd_case_t *cp, fcf_secidx_t sid)
   1000 {
   1001 	const fcf_sec_t *sp, *dsp;
   1002 	const fcf_buf_t *fcfb;
   1003 	uint_t i, n;
   1004 
   1005 	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_BUFS)) == NULL) {
   1006 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
   1007 		    "invalid link to section %u: expected bufs\n", sid);
   1008 	}
   1009 
   1010 	if (sp->fcfs_size == 0)
   1011 		return; /* empty events section or type none */
   1012 
   1013 	fcfb = fmd_ckpt_dataptr(ckp, sp);
   1014 	n = sp->fcfs_size / sp->fcfs_entsize;
   1015 
   1016 	for (i = 0; i < n; i++) {
   1017 		dsp = fmd_ckpt_secptr(ckp, fcfb->fcfb_data, FCF_SECT_BUFFER);
   1018 
   1019 		if (dsp == NULL) {
   1020 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "invalid %u "
   1021 			    "buffer link %u\n", sid, fcfb->fcfb_data);
   1022 		}
   1023 
   1024 		fmd_buf_write((fmd_hdl_t *)mp, cp,
   1025 		    fmd_ckpt_strptr(ckp, fcfb->fcfb_name, "<CORRUPT>"),
   1026 		    ckp->ckp_buf + dsp->fcfs_offset, dsp->fcfs_size);
   1027 
   1028 		fcfb = (fcf_buf_t *)((uintptr_t)fcfb + sp->fcfs_entsize);
   1029 	}
   1030 }
   1031 
   1032 static void
   1033 fmd_ckpt_restore_case(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
   1034 {
   1035 	const fcf_case_t *fcfc = fmd_ckpt_dataptr(ckp, sp);
   1036 	const char *uuid = fmd_ckpt_strptr(ckp, fcfc->fcfc_uuid, NULL);
   1037 	fmd_case_t *cp;
   1038 	int n;
   1039 
   1040 	if (uuid == NULL || fcfc->fcfc_state > FCF_CASE_CLOSE_WAIT) {
   1041 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "corrupt %u case uuid "
   1042 		    "and/or state\n", (uint_t)(sp - ckp->ckp_secp));
   1043 	}
   1044 
   1045 	fmd_module_lock(mp);
   1046 
   1047 	if ((cp = fmd_case_recreate(mp, NULL,
   1048 	    FMD_CASE_UNSOLVED, uuid, NULL)) == NULL) {
   1049 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
   1050 		    "duplicate case uuid: %s\n", uuid);
   1051 	}
   1052 
   1053 	fmd_ckpt_restore_events(ckp, fcfc->fcfc_principal,
   1054 	    (void (*)(void *, fmd_event_t *))fmd_case_insert_principal, cp);
   1055 
   1056 	fmd_ckpt_restore_events(ckp, fcfc->fcfc_events,
   1057 	    (void (*)(void *, fmd_event_t *))fmd_case_insert_event, cp);
   1058 
   1059 	n = fmd_ckpt_restore_suspects(ckp, cp, fcfc->fcfc_suspects);
   1060 
   1061 	if (fcfc->fcfc_state == FCF_CASE_SOLVED)
   1062 		fmd_case_transition_update(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
   1063 	else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n != 0)
   1064 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_SOLVED);
   1065 	else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n == 0)
   1066 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
   1067 
   1068 	fmd_module_unlock(mp);
   1069 	fmd_ckpt_restore_bufs(ckp, mp, cp, fcfc->fcfc_bufs);
   1070 }
   1071 
   1072 static void
   1073 fmd_ckpt_restore_serd(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
   1074 {
   1075 	const fcf_serd_t *fcfd = fmd_ckpt_dataptr(ckp, sp);
   1076 	uint_t i, n = sp->fcfs_size / sp->fcfs_entsize;
   1077 	const fcf_sec_t *esp;
   1078 	const char *s;
   1079 
   1080 	for (i = 0; i < n; i++) {
   1081 		esp = fmd_ckpt_secptr(ckp, fcfd->fcfd_events, FCF_SECT_EVENTS);
   1082 
   1083 		if (esp == NULL) {
   1084 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
   1085 			    "invalid events link %u\n", fcfd->fcfd_events);
   1086 		}
   1087 
   1088 		if ((s = fmd_ckpt_strptr(ckp, fcfd->fcfd_name, NULL)) == NULL) {
   1089 			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
   1090 			    "serd name %u is corrupt\n", fcfd->fcfd_name);
   1091 		}
   1092 
   1093 		fmd_serd_create((fmd_hdl_t *)mp, s, fcfd->fcfd_n, fcfd->fcfd_t);
   1094 		fmd_module_lock(mp);
   1095 
   1096 		fmd_ckpt_restore_events(ckp, fcfd->fcfd_events,
   1097 		    (void (*)(void *, fmd_event_t *))fmd_serd_eng_record,
   1098 		    fmd_serd_eng_lookup(&mp->mod_serds, s));
   1099 
   1100 		fmd_module_unlock(mp);
   1101 		fcfd = (fcf_serd_t *)((uintptr_t)fcfd + sp->fcfs_entsize);
   1102 	}
   1103 }
   1104 
   1105 static void
   1106 fmd_ckpt_restore_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
   1107 {
   1108 	const fcf_module_t *fcfm = fmd_ckpt_dataptr(ckp, ckp->ckp_modp);
   1109 	const fcf_sec_t *sp;
   1110 	uint_t i;
   1111 
   1112 	if (strcmp(mp->mod_name, fmd_ckpt_strptr(ckp, fcfm->fcfm_name, "")) ||
   1113 	    strcmp(mp->mod_path, fmd_ckpt_strptr(ckp, fcfm->fcfm_path, ""))) {
   1114 		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
   1115 		    "checkpoint is not for module %s\n", mp->mod_name);
   1116 	}
   1117 
   1118 	for (i = 0; i < ckp->ckp_secs; i++) {
   1119 		sp = (void *)(ckp->ckp_buf +
   1120 		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
   1121 
   1122 		switch (sp->fcfs_type) {
   1123 		case FCF_SECT_CASE:
   1124 			fmd_ckpt_restore_case(ckp, mp, sp);
   1125 			break;
   1126 		case FCF_SECT_SERD:
   1127 			fmd_ckpt_restore_serd(ckp, mp, sp);
   1128 			break;
   1129 		}
   1130 	}
   1131 
   1132 	fmd_ckpt_restore_bufs(ckp, mp, NULL, fcfm->fcfm_bufs);
   1133 	mp->mod_gen = ckp->ckp_hdr->fcfh_cgen;
   1134 }
   1135 
   1136 /*
   1137  * Restore a checkpoint for the specified module.  Any errors which occur
   1138  * during restore will call fmd_ckpt_error() or trigger an fmd_api_error(),
   1139  * either of which will automatically unlock the module and trigger an abort.
   1140  */
   1141 void
   1142 fmd_ckpt_restore(fmd_module_t *mp)
   1143 {
   1144 	fmd_ckpt_t ckp;
   1145 
   1146 	if (mp->mod_stats->ms_ckpt_restore.fmds_value.bool == FMD_B_FALSE)
   1147 		return; /* never restore checkpoints for this module */
   1148 
   1149 	TRACE((FMD_DBG_CKPT, "ckpt restore begin %s", mp->mod_name));
   1150 
   1151 	if (fmd_ckpt_open(&ckp, mp) == -1) {
   1152 		if (errno != ENOENT)
   1153 			fmd_error(EFMD_CKPT_OPEN, "can't open %s", ckp.ckp_src);
   1154 		TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
   1155 		return;
   1156 	}
   1157 
   1158 	ASSERT(!fmd_module_locked(mp));
   1159 	fmd_ckpt_restore_module(&ckp, mp);
   1160 	fmd_ckpt_destroy(&ckp);
   1161 	fmd_module_clrdirty(mp);
   1162 
   1163 	TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
   1164 	fmd_dprintf(FMD_DBG_CKPT, "restored checkpoint of %s\n", mp->mod_name);
   1165 }
   1166 
   1167 /*
   1168  * Delete the module's checkpoint file.  This is used by the ckpt.zero property
   1169  * code or by the fmadm reset RPC service path to force a checkpoint delete.
   1170  */
   1171 void
   1172 fmd_ckpt_delete(fmd_module_t *mp)
   1173 {
   1174 	char path[PATH_MAX];
   1175 
   1176 	(void) snprintf(path, sizeof (path),
   1177 	    "%s/%s", mp->mod_ckpt, mp->mod_name);
   1178 
   1179 	TRACE((FMD_DBG_CKPT, "delete %s ckpt", mp->mod_name));
   1180 
   1181 	if (unlink(path) != 0 && errno != ENOENT)
   1182 		fmd_error(EFMD_CKPT_DELETE, "failed to delete %s", path);
   1183 }
   1184 
   1185 /*
   1186  * Move aside the module's checkpoint file if checkpoint restore has failed.
   1187  * We rename the file rather than deleting it in the hopes that someone might
   1188  * send it to us for post-mortem analysis of whether we have a checkpoint bug.
   1189  */
   1190 void
   1191 fmd_ckpt_rename(fmd_module_t *mp)
   1192 {
   1193 	char src[PATH_MAX], dst[PATH_MAX];
   1194 
   1195 	(void) snprintf(src, sizeof (src), "%s/%s", mp->mod_ckpt, mp->mod_name);
   1196 	(void) snprintf(dst, sizeof (dst), "%s-", src);
   1197 
   1198 	TRACE((FMD_DBG_CKPT, "rename %s ckpt", mp->mod_name));
   1199 
   1200 	if (rename(src, dst) != 0 && errno != ENOENT)
   1201 		fmd_error(EFMD_CKPT_DELETE, "failed to rename %s", src);
   1202 }
   1203