Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * pseudo scsi disk driver
     28  */
     29 
     30 #include <sys/scsi/scsi.h>
     31 #include <sys/ddi.h>
     32 #include <sys/sunddi.h>
     33 #include <sys/kmem.h>
     34 #include <sys/taskq.h>
     35 #include <sys/disp.h>
     36 #include <sys/types.h>
     37 #include <sys/buf.h>
     38 
     39 #include <sys/emul64.h>
     40 #include <sys/emul64cmd.h>
     41 #include <sys/emul64var.h>
     42 
     43 /*
     44  * Mode sense/select page control
     45  */
     46 #define	MODE_SENSE_PC_CURRENT		0
     47 #define	MODE_SENSE_PC_CHANGEABLE	1
     48 #define	MODE_SENSE_PC_DEFAULT		2
     49 #define	MODE_SENSE_PC_SAVED		3
     50 
     51 /*
     52  * Byte conversion macros
     53  */
     54 #if	defined(_BIG_ENDIAN)
     55 #define	ushort_to_scsi_ushort(n)	(n)
     56 #define	uint32_to_scsi_uint32(n)	(n)
     57 #define	uint64_to_scsi_uint64(n)	(n)
     58 #elif	defined(_LITTLE_ENDIAN)
     59 
     60 #define	ushort_to_scsi_ushort(n)			\
     61 		((((n) & 0x00ff) << 8) |		\
     62 		(((n)  & 0xff00) >> 8))
     63 
     64 #define	uint32_to_scsi_uint32(n)			\
     65 		((((n) & 0x000000ff) << 24) |		\
     66 		(((n)  & 0x0000ff00) << 8) |		\
     67 		(((n)  & 0x00ff0000) >> 8) |		\
     68 		(((n)  & 0xff000000) >> 24))
     69 #define	uint64_to_scsi_uint64(n)				\
     70 		((((n) & 0x00000000000000ff) << 56) |           \
     71 		(((n)  & 0x000000000000ff00) << 40) |           \
     72 		(((n)  & 0x0000000000ff0000) << 24) |           \
     73 		(((n)  & 0x00000000ff000000) << 8) |            \
     74 		(((n)  & 0x000000ff00000000) >> 8) |            \
     75 		(((n)  & 0x0000ff0000000000) >> 24) |           \
     76 		(((n)  & 0x00ff000000000000) >> 40) |           \
     77 		(((n)  & 0xff00000000000000) >> 56))
     78 #else
     79 error no _BIG_ENDIAN or _LITTLE_ENDIAN
     80 #endif
     81 #define	uint_to_byte0(n)		((n) & 0xff)
     82 #define	uint_to_byte1(n)		(((n)>>8) & 0xff)
     83 #define	uint_to_byte2(n)		(((n)>>16) & 0xff)
     84 #define	uint_to_byte3(n)		(((n)>>24) & 0xff)
     85 
     86 /*
     87  * struct prop_map
     88  *
     89  * This structure maps a property name to the place to store its value.
     90  */
     91 struct prop_map {
     92 	char 		*pm_name;	/* Name of the property. */
     93 	int		*pm_value;	/* Place to store the value. */
     94 };
     95 
     96 static int emul64_debug_blklist = 0;
     97 
     98 /*
     99  * Some interesting statistics.  These are protected by the
    100  * emul64_stats_mutex.  It would be nice to have an ioctl to print them out,
    101  * but we don't have the development time for that now.  You can at least
    102  * look at them with adb.
    103  */
    104 
    105 int		emul64_collect_stats = 1; /* Collect stats if non-zero */
    106 kmutex_t	emul64_stats_mutex;	/* Protect these variables */
    107 long		emul64_nowrite_count = 0; /* # active nowrite ranges */
    108 static uint64_t	emul64_skipped_io = 0;	/* Skipped I/O operations, because of */
    109 					/* EMUL64_WRITE_OFF. */
    110 static uint64_t	emul64_skipped_blk = 0;	/* Skipped blocks because of */
    111 					/* EMUL64_WRITE_OFF. */
    112 static uint64_t	emul64_io_ops = 0;	/* Total number of I/O operations */
    113 					/* including skipped and actual. */
    114 static uint64_t	emul64_io_blocks = 0;	/* Total number of blocks involved */
    115 					/* in I/O operations. */
    116 static uint64_t	emul64_nonzero = 0;	/* Number of non-zero data blocks */
    117 					/* currently held in memory */
    118 static uint64_t	emul64_max_list_length = 0; /* Maximum size of a linked */
    119 					    /* list of non-zero blocks. */
    120 uint64_t emul64_taskq_max = 0;		/* emul64_scsi_start uses the taskq */
    121 					/* mechanism to dispatch work. */
    122 					/* If the number of entries in the */
    123 					/* exceeds the maximum for the queue */
    124 					/* the queue a 1 second delay is */
    125 					/* encountered in taskq_ent_alloc. */
    126 					/* This counter counts the number */
    127 					/* times that this happens. */
    128 
    129 /*
    130  * Since emul64 does no physical I/O, operations that would normally be I/O
    131  * intensive become CPU bound.  An example of this is RAID 5
    132  * initialization.  When the kernel becomes CPU bound, it looks as if the
    133  * machine is hung.
    134  *
    135  * To avoid this problem, we provide a function, emul64_yield_check, that does a
    136  * delay from time to time to yield up the CPU.  The following variables
    137  * are tunables for this algorithm.
    138  *
    139  *	emul64_num_delay_called	Number of times we called delay.  This is
    140  *				not really a tunable.  Rather it is a
    141  *				counter that provides useful information
    142  *				for adjusting the tunables.
    143  *	emul64_yield_length	Number of microseconds to yield the CPU.
    144  *	emul64_yield_period	Number of I/O operations between yields.
    145  *	emul64_yield_enable	emul64 will yield the CPU, only if this
    146  *				variable contains a non-zero value.  This
    147  *				allows the yield functionality to be turned
    148  *				off for experimentation purposes.
    149  *
    150  * The value of 1000 for emul64_yield_period has been determined by
    151  * experience with running the tests.
    152  */
    153 static uint64_t		emul64_num_delay_called = 0;
    154 static int		emul64_yield_length = 1000;
    155 static int		emul64_yield_period = 1000;
    156 static int		emul64_yield_enable = 1;
    157 static kmutex_t		emul64_yield_mutex;
    158 static kcondvar_t 	emul64_yield_cv;
    159 
    160 /*
    161  * This array establishes a set of tunable variables that can be set by
    162  * defining properties in the emul64.conf file.
    163  */
    164 struct prop_map emul64_properties[] = {
    165 	"emul64_collect_stats",		&emul64_collect_stats,
    166 	"emul64_yield_length",		&emul64_yield_length,
    167 	"emul64_yield_period",		&emul64_yield_period,
    168 	"emul64_yield_enable",		&emul64_yield_enable,
    169 	"emul64_max_task",		&emul64_max_task,
    170 	"emul64_task_nthreads",		&emul64_task_nthreads
    171 };
    172 
    173 static unsigned char *emul64_zeros = NULL; /* Block of 0s for comparison */
    174 
    175 extern void emul64_check_cond(struct scsi_pkt *pkt, uchar_t key,
    176 				uchar_t asc, uchar_t ascq);
    177 /* ncyl=250000 acyl=2 nhead=24 nsect=357 */
    178 uint_t dkg_rpm = 3600;
    179 
    180 static int bsd_mode_sense_dad_mode_geometry(struct scsi_pkt *);
    181 static int bsd_mode_sense_dad_mode_err_recov(struct scsi_pkt *);
    182 static int bsd_mode_sense_modepage_disco_reco(struct scsi_pkt *);
    183 static int bsd_mode_sense_dad_mode_format(struct scsi_pkt *);
    184 static int bsd_mode_sense_dad_mode_cache(struct scsi_pkt *);
    185 static int bsd_readblks(struct emul64 *, ushort_t, ushort_t, diskaddr_t,
    186 				int, unsigned char *);
    187 static int bsd_writeblks(struct emul64 *, ushort_t, ushort_t, diskaddr_t,
    188 				int, unsigned char *);
    189 emul64_tgt_t *find_tgt(struct emul64 *, ushort_t, ushort_t);
    190 static blklist_t *bsd_findblk(emul64_tgt_t *, diskaddr_t, avl_index_t *);
    191 static void bsd_allocblk(emul64_tgt_t *, diskaddr_t, caddr_t, avl_index_t);
    192 static void bsd_freeblk(emul64_tgt_t *, blklist_t *);
    193 static void emul64_yield_check();
    194 static emul64_rng_overlap_t bsd_tgt_overlap(emul64_tgt_t *, diskaddr_t, int);
    195 
    196 char *emul64_name = "emul64";
    197 
    198 
    199 /*
    200  * Initialize globals in this file.
    201  */
    202 void
    203 emul64_bsd_init()
    204 {
    205 	emul64_zeros = (unsigned char *) kmem_zalloc(DEV_BSIZE, KM_SLEEP);
    206 	mutex_init(&emul64_stats_mutex, NULL, MUTEX_DRIVER, NULL);
    207 	mutex_init(&emul64_yield_mutex, NULL, MUTEX_DRIVER, NULL);
    208 	cv_init(&emul64_yield_cv, NULL, CV_DRIVER, NULL);
    209 }
    210 
    211 /*
    212  * Clean up globals in this file.
    213  */
    214 void
    215 emul64_bsd_fini()
    216 {
    217 	cv_destroy(&emul64_yield_cv);
    218 	mutex_destroy(&emul64_yield_mutex);
    219 	mutex_destroy(&emul64_stats_mutex);
    220 	if (emul64_zeros != NULL) {
    221 		kmem_free(emul64_zeros, DEV_BSIZE);
    222 		emul64_zeros = NULL;
    223 	}
    224 }
    225 
    226 /*
    227  * Attempt to get the values of the properties that are specified in the
    228  * emul64_properties array.  If the property exists, copy its value to the
    229  * specified location.  All the properties have been assigned default
    230  * values in this driver, so if we cannot get the property that is not a
    231  * problem.
    232  */
    233 void
    234 emul64_bsd_get_props(dev_info_t *dip)
    235 {
    236 	uint_t		count;
    237 	uint_t		i;
    238 	struct prop_map	*pmp;
    239 	int		*properties;
    240 
    241 	for (pmp = emul64_properties, i = 0;
    242 	    i < sizeof (emul64_properties) / sizeof (struct prop_map);
    243 	    i++, pmp++) {
    244 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip,
    245 		    DDI_PROP_DONTPASS, pmp->pm_name, &properties,
    246 		    &count) == DDI_PROP_SUCCESS) {
    247 			if (count >= 1) {
    248 				*pmp->pm_value = *properties;
    249 			}
    250 			ddi_prop_free((void *) properties);
    251 		}
    252 	}
    253 }
    254 
    255 int
    256 emul64_bsd_blkcompare(const void *a1, const void *b1)
    257 {
    258 	blklist_t	*a = (blklist_t *)a1;
    259 	blklist_t	*b = (blklist_t *)b1;
    260 
    261 	if (a->bl_blkno < b->bl_blkno)
    262 		return (-1);
    263 	if (a->bl_blkno == b->bl_blkno)
    264 		return (0);
    265 	return (1);
    266 }
    267 
    268 /* ARGSUSED 0 */
    269 int
    270 bsd_scsi_start_stop_unit(struct scsi_pkt *pkt)
    271 {
    272 	return (0);
    273 }
    274 
    275 /* ARGSUSED 0 */
    276 int
    277 bsd_scsi_test_unit_ready(struct scsi_pkt *pkt)
    278 {
    279 	return (0);
    280 }
    281 
    282 /* ARGSUSED 0 */
    283 int
    284 bsd_scsi_request_sense(struct scsi_pkt *pkt)
    285 {
    286 	return (0);
    287 }
    288 
    289 int
    290 bsd_scsi_inq_page0(struct scsi_pkt *pkt, uchar_t pqdtype)
    291 {
    292 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    293 
    294 	if (sp->cmd_count < 6) {
    295 		cmn_err(CE_CONT, "%s: bsd_scsi_inq_page0: size %d required\n",
    296 		    emul64_name, 6);
    297 		return (EIO);
    298 	}
    299 
    300 	sp->cmd_addr[0] = pqdtype;	/* periph qual., dtype */
    301 	sp->cmd_addr[1] = 0;		/* page code */
    302 	sp->cmd_addr[2] = 0;		/* reserved */
    303 	sp->cmd_addr[3] = 6 - 3;	/* length */
    304 	sp->cmd_addr[4] = 0;		/* 1st page */
    305 	sp->cmd_addr[5] = 0x83;		/* 2nd page */
    306 
    307 	pkt->pkt_resid = sp->cmd_count - 6;
    308 	return (0);
    309 }
    310 
    311 int
    312 bsd_scsi_inq_page83(struct scsi_pkt *pkt, uchar_t pqdtype)
    313 {
    314 	struct emul64		*emul64 = PKT2EMUL64(pkt);
    315 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    316 	int			instance = ddi_get_instance(emul64->emul64_dip);
    317 
    318 	if (sp->cmd_count < 22) {
    319 		cmn_err(CE_CONT, "%s: bsd_scsi_inq_page83: size %d required\n",
    320 		    emul64_name, 22);
    321 		return (EIO);
    322 	}
    323 
    324 	sp->cmd_addr[0] = pqdtype;	/* periph qual., dtype */
    325 	sp->cmd_addr[1] = 0x83;		/* page code */
    326 	sp->cmd_addr[2] = 0;		/* reserved */
    327 	sp->cmd_addr[3] = (22 - 8) + 4;	/* length */
    328 
    329 	sp->cmd_addr[4] = 1;		/* code set - binary */
    330 	sp->cmd_addr[5] = 3;		/* association and device ID type 3 */
    331 	sp->cmd_addr[6] = 0;		/* reserved */
    332 	sp->cmd_addr[7] = 22 - 8;	/* ID length */
    333 
    334 	sp->cmd_addr[8] = 0xde;		/* @8: identifier, byte 0 */
    335 	sp->cmd_addr[9] = 0xca;
    336 	sp->cmd_addr[10] = 0xde;
    337 	sp->cmd_addr[11] = 0x80;
    338 
    339 	sp->cmd_addr[12] = 0xba;
    340 	sp->cmd_addr[13] = 0xbe;
    341 	sp->cmd_addr[14] = 0xab;
    342 	sp->cmd_addr[15] = 0xba;
    343 					/* @22: */
    344 
    345 	/*
    346 	 * Instances seem to be assigned sequentially, so it unlikely that we
    347 	 * will have more than 65535 of them.
    348 	 */
    349 	sp->cmd_addr[16] = uint_to_byte1(instance);
    350 	sp->cmd_addr[17] = uint_to_byte0(instance);
    351 	sp->cmd_addr[18] = uint_to_byte1(TGT(sp));
    352 	sp->cmd_addr[19] = uint_to_byte0(TGT(sp));
    353 	sp->cmd_addr[20] = uint_to_byte1(LUN(sp));
    354 	sp->cmd_addr[21] = uint_to_byte0(LUN(sp));
    355 
    356 	pkt->pkt_resid = sp->cmd_count - 22;
    357 	return (0);
    358 }
    359 
    360 int
    361 bsd_scsi_inquiry(struct scsi_pkt *pkt)
    362 {
    363 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    364 	union scsi_cdb		*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    365 	emul64_tgt_t		*tgt;
    366 	uchar_t			pqdtype;
    367 	struct scsi_inquiry	inq;
    368 
    369 	EMUL64_MUTEX_ENTER(sp->cmd_emul64);
    370 	tgt = find_tgt(sp->cmd_emul64,
    371 	    pkt->pkt_address.a_target, pkt->pkt_address.a_lun);
    372 	EMUL64_MUTEX_EXIT(sp->cmd_emul64);
    373 
    374 	if (sp->cmd_count < sizeof (inq)) {
    375 		cmn_err(CE_CONT, "%s: bsd_scsi_inquiry: size %d required\n",
    376 		    emul64_name, (int)sizeof (inq));
    377 		return (EIO);
    378 	}
    379 
    380 	if (cdb->cdb_opaque[1] & 0xfc) {
    381 		cmn_err(CE_WARN, "%s: bsd_scsi_inquiry: 0x%x",
    382 		    emul64_name, cdb->cdb_opaque[1]);
    383 		emul64_check_cond(pkt, 0x5, 0x24, 0x0);	/* inv. fld in cdb */
    384 		return (0);
    385 	}
    386 
    387 	pqdtype = tgt->emul64_tgt_dtype;
    388 	if (cdb->cdb_opaque[1] & 0x1) {
    389 		switch (cdb->cdb_opaque[2]) {
    390 		case 0x00:
    391 			return (bsd_scsi_inq_page0(pkt, pqdtype));
    392 		case 0x83:
    393 			return (bsd_scsi_inq_page83(pkt, pqdtype));
    394 		default:
    395 			cmn_err(CE_WARN, "%s: bsd_scsi_inquiry: "
    396 			    "unsupported 0x%x",
    397 			    emul64_name, cdb->cdb_opaque[2]);
    398 			return (0);
    399 		}
    400 	}
    401 
    402 	/* set up the inquiry data we return */
    403 	(void) bzero((void *)&inq, sizeof (inq));
    404 
    405 	inq.inq_dtype = pqdtype;
    406 	inq.inq_ansi = 2;
    407 	inq.inq_rdf = 2;
    408 	inq.inq_len = sizeof (inq) - 4;
    409 	inq.inq_wbus16 = 1;
    410 	inq.inq_cmdque = 1;
    411 
    412 	(void) bcopy(tgt->emul64_tgt_inq, inq.inq_vid,
    413 	    sizeof (tgt->emul64_tgt_inq));
    414 	(void) bcopy("1", inq.inq_revision, 2);
    415 	(void) bcopy((void *)&inq, sp->cmd_addr, sizeof (inq));
    416 
    417 	pkt->pkt_resid = sp->cmd_count - sizeof (inq);
    418 	return (0);
    419 }
    420 
    421 /* ARGSUSED 0 */
    422 int
    423 bsd_scsi_format(struct scsi_pkt *pkt)
    424 {
    425 	return (0);
    426 }
    427 
    428 int
    429 bsd_scsi_io(struct scsi_pkt *pkt)
    430 {
    431 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    432 	union scsi_cdb		*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    433 	diskaddr_t		lblkno;
    434 	int			nblks;
    435 
    436 	switch (cdb->scc_cmd) {
    437 	case SCMD_READ:
    438 			lblkno = (uint32_t)GETG0ADDR(cdb);
    439 			nblks = GETG0COUNT(cdb);
    440 			pkt->pkt_resid = bsd_readblks(sp->cmd_emul64,
    441 			    pkt->pkt_address.a_target, pkt->pkt_address.a_lun,
    442 			    lblkno, nblks, sp->cmd_addr);
    443 			if (emul64debug) {
    444 				cmn_err(CE_CONT, "%s: bsd_scsi_io: "
    445 				    "read g0 blk=%lld (0x%llx) nblks=%d\n",
    446 				    emul64_name, lblkno, lblkno, nblks);
    447 			}
    448 		break;
    449 	case SCMD_WRITE:
    450 			lblkno = (uint32_t)GETG0ADDR(cdb);
    451 			nblks = GETG0COUNT(cdb);
    452 			pkt->pkt_resid = bsd_writeblks(sp->cmd_emul64,
    453 			    pkt->pkt_address.a_target, pkt->pkt_address.a_lun,
    454 			    lblkno, nblks, sp->cmd_addr);
    455 			if (emul64debug) {
    456 				cmn_err(CE_CONT, "%s: bsd_scsi_io: "
    457 				    "write g0 blk=%lld (0x%llx) nblks=%d\n",
    458 				    emul64_name, lblkno, lblkno, nblks);
    459 			}
    460 		break;
    461 	case SCMD_READ_G1:
    462 			lblkno = (uint32_t)GETG1ADDR(cdb);
    463 			nblks = GETG1COUNT(cdb);
    464 			pkt->pkt_resid = bsd_readblks(sp->cmd_emul64,
    465 			    pkt->pkt_address.a_target, pkt->pkt_address.a_lun,
    466 			    lblkno, nblks, sp->cmd_addr);
    467 			if (emul64debug) {
    468 				cmn_err(CE_CONT, "%s: bsd_scsi_io: "
    469 				    "read g1 blk=%lld (0x%llx) nblks=%d\n",
    470 				    emul64_name, lblkno, lblkno, nblks);
    471 			}
    472 		break;
    473 	case SCMD_WRITE_G1:
    474 			lblkno = (uint32_t)GETG1ADDR(cdb);
    475 			nblks = GETG1COUNT(cdb);
    476 			pkt->pkt_resid = bsd_writeblks(sp->cmd_emul64,
    477 			    pkt->pkt_address.a_target, pkt->pkt_address.a_lun,
    478 			    lblkno, nblks, sp->cmd_addr);
    479 			if (emul64debug) {
    480 				cmn_err(CE_CONT, "%s: bsd_scsi_io: "
    481 				    "write g1 blk=%lld (0x%llx) nblks=%d\n",
    482 				    emul64_name, lblkno, lblkno, nblks);
    483 			}
    484 		break;
    485 	case SCMD_READ_G4:
    486 			lblkno = GETG4ADDR(cdb);
    487 			lblkno <<= 32;
    488 			lblkno |= (uint32_t)GETG4ADDRTL(cdb);
    489 			nblks = GETG4COUNT(cdb);
    490 			pkt->pkt_resid = bsd_readblks(sp->cmd_emul64,
    491 			    pkt->pkt_address.a_target, pkt->pkt_address.a_lun,
    492 			    lblkno, nblks, sp->cmd_addr);
    493 			if (emul64debug) {
    494 				cmn_err(CE_CONT, "%s: bsd_scsi_io: "
    495 				    "read g4 blk=%lld (0x%llx) nblks=%d\n",
    496 				    emul64_name, lblkno, lblkno, nblks);
    497 			}
    498 		break;
    499 	case SCMD_WRITE_G4:
    500 			lblkno = GETG4ADDR(cdb);
    501 			lblkno <<= 32;
    502 			lblkno |= (uint32_t)GETG4ADDRTL(cdb);
    503 			nblks = GETG4COUNT(cdb);
    504 			pkt->pkt_resid = bsd_writeblks(sp->cmd_emul64,
    505 			    pkt->pkt_address.a_target, pkt->pkt_address.a_lun,
    506 			    lblkno, nblks, sp->cmd_addr);
    507 			if (emul64debug) {
    508 				cmn_err(CE_CONT, "%s: bsd_scsi_io: "
    509 				    "write g4 blk=%lld (0x%llx) nblks=%d\n",
    510 				    emul64_name, lblkno, lblkno, nblks);
    511 			}
    512 		break;
    513 	default:
    514 		cmn_err(CE_WARN, "%s: bsd_scsi_io: unhandled I/O: 0x%x",
    515 		    emul64_name, cdb->scc_cmd);
    516 		break;
    517 	}
    518 
    519 	if (pkt->pkt_resid != 0)
    520 		cmn_err(CE_WARN, "%s: bsd_scsi_io: "
    521 		    "pkt_resid: 0x%lx, lblkno %lld, nblks %d",
    522 		    emul64_name, pkt->pkt_resid, lblkno, nblks);
    523 
    524 	return (0);
    525 }
    526 
    527 int
    528 bsd_scsi_log_sense(struct scsi_pkt *pkt)
    529 {
    530 	union scsi_cdb		*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    531 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    532 	int			page_code;
    533 
    534 	if (sp->cmd_count < 9) {
    535 		cmn_err(CE_CONT, "%s: bsd_scsi_log_sense size %d required\n",
    536 		    emul64_name, 9);
    537 		return (EIO);
    538 	}
    539 
    540 	page_code = cdb->cdb_opaque[2] & 0x3f;
    541 	if (page_code) {
    542 		cmn_err(CE_CONT, "%s: bsd_scsi_log_sense: "
    543 		    "page 0x%x not supported\n", emul64_name, page_code);
    544 		emul64_check_cond(pkt, 0x5, 0x24, 0x0); /* inv. fld in cdb */
    545 		return (0);
    546 	}
    547 
    548 	sp->cmd_addr[0] = 0;		/* page code */
    549 	sp->cmd_addr[1] = 0;		/* reserved */
    550 	sp->cmd_addr[2] = 0;		/* MSB of page length */
    551 	sp->cmd_addr[3] = 8 - 3;	/* LSB of page length */
    552 
    553 	sp->cmd_addr[4] = 0;		/* MSB of parameter code */
    554 	sp->cmd_addr[5] = 0;		/* LSB of parameter code */
    555 	sp->cmd_addr[6] = 0;		/* parameter control byte */
    556 	sp->cmd_addr[7] = 4 - 3;	/* parameter length */
    557 	sp->cmd_addr[8] = 0x0;		/* parameter value */
    558 
    559 	pkt->pkt_resid = sp->cmd_count - 9;
    560 	return (0);
    561 }
    562 
    563 int
    564 bsd_scsi_mode_sense(struct scsi_pkt *pkt)
    565 {
    566 	union scsi_cdb	*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    567 	int		page_control;
    568 	int		page_code;
    569 	int		rval = 0;
    570 
    571 	switch (cdb->scc_cmd) {
    572 	case SCMD_MODE_SENSE:
    573 			page_code = cdb->cdb_opaque[2] & 0x3f;
    574 			page_control = (cdb->cdb_opaque[2] >> 6) & 0x03;
    575 			if (emul64debug) {
    576 				cmn_err(CE_CONT, "%s: bsd_scsi_mode_sense: "
    577 				    "page=0x%x control=0x%x nbytes=%d\n",
    578 				    emul64_name, page_code, page_control,
    579 				    GETG0COUNT(cdb));
    580 			}
    581 		break;
    582 	case SCMD_MODE_SENSE_G1:
    583 			page_code = cdb->cdb_opaque[2] & 0x3f;
    584 			page_control = (cdb->cdb_opaque[2] >> 6) & 0x03;
    585 			if (emul64debug) {
    586 				cmn_err(CE_CONT, "%s: bsd_scsi_mode_sense: "
    587 				    "page=0x%x control=0x%x nbytes=%d\n",
    588 				    emul64_name, page_code, page_control,
    589 				    GETG1COUNT(cdb));
    590 			}
    591 		break;
    592 	default:
    593 		cmn_err(CE_CONT, "%s: bsd_scsi_mode_sense: "
    594 		    "cmd 0x%x not supported\n", emul64_name, cdb->scc_cmd);
    595 		return (EIO);
    596 	}
    597 
    598 	switch (page_code) {
    599 	case DAD_MODE_GEOMETRY:
    600 		rval = bsd_mode_sense_dad_mode_geometry(pkt);
    601 		break;
    602 	case DAD_MODE_ERR_RECOV:
    603 		rval = bsd_mode_sense_dad_mode_err_recov(pkt);
    604 		break;
    605 	case MODEPAGE_DISCO_RECO:
    606 		rval = bsd_mode_sense_modepage_disco_reco(pkt);
    607 		break;
    608 	case DAD_MODE_FORMAT:
    609 		rval = bsd_mode_sense_dad_mode_format(pkt);
    610 		break;
    611 	case DAD_MODE_CACHE:
    612 		rval = bsd_mode_sense_dad_mode_cache(pkt);
    613 		break;
    614 	default:
    615 		cmn_err(CE_CONT, "%s: bsd_scsi_mode_sense: "
    616 		    "page 0x%x not supported\n", emul64_name, page_code);
    617 		rval = EIO;
    618 		break;
    619 	}
    620 
    621 	return (rval);
    622 }
    623 
    624 
    625 static int
    626 bsd_mode_sense_dad_mode_geometry(struct scsi_pkt *pkt)
    627 {
    628 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    629 	union scsi_cdb		*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    630 	uchar_t			*addr = (uchar_t *)sp->cmd_addr;
    631 	emul64_tgt_t		*tgt;
    632 	int			page_control;
    633 	struct mode_header	header;
    634 	struct mode_geometry	page4;
    635 	int			ncyl;
    636 	int			rval = 0;
    637 
    638 	page_control = (cdb->cdb_opaque[2] >> 6) & 0x03;
    639 
    640 	if (emul64debug) {
    641 		cmn_err(CE_CONT, "%s: bsd_mode_sense_dad_mode_geometry: "
    642 		    "pc=%d n=%d\n", emul64_name, page_control, sp->cmd_count);
    643 	}
    644 
    645 	if (sp->cmd_count < (sizeof (header) + sizeof (page4))) {
    646 		cmn_err(CE_CONT, "%s: bsd_mode_sense_dad_mode_geometry: "
    647 		    "size %d required\n",
    648 		    emul64_name, (int)(sizeof (header) + sizeof (page4)));
    649 		return (EIO);
    650 	}
    651 
    652 	(void) bzero(&header, sizeof (header));
    653 	(void) bzero(&page4, sizeof (page4));
    654 
    655 	header.length = sizeof (header) + sizeof (page4) - 1;
    656 	header.bdesc_length = 0;
    657 
    658 	page4.mode_page.code = DAD_MODE_GEOMETRY;
    659 	page4.mode_page.ps = 1;
    660 	page4.mode_page.length = sizeof (page4) - sizeof (struct mode_page);
    661 
    662 	switch (page_control) {
    663 	case MODE_SENSE_PC_CURRENT:
    664 	case MODE_SENSE_PC_DEFAULT:
    665 	case MODE_SENSE_PC_SAVED:
    666 		EMUL64_MUTEX_ENTER(sp->cmd_emul64);
    667 		tgt = find_tgt(sp->cmd_emul64,
    668 		    pkt->pkt_address.a_target, pkt->pkt_address.a_lun);
    669 		EMUL64_MUTEX_EXIT(sp->cmd_emul64);
    670 		ncyl = tgt->emul64_tgt_ncyls;
    671 		page4.cyl_ub = uint_to_byte2(ncyl);
    672 		page4.cyl_mb = uint_to_byte1(ncyl);
    673 		page4.cyl_lb = uint_to_byte0(ncyl);
    674 		page4.heads = uint_to_byte0(tgt->emul64_tgt_nheads);
    675 		page4.rpm = ushort_to_scsi_ushort(dkg_rpm);
    676 		break;
    677 	case MODE_SENSE_PC_CHANGEABLE:
    678 		page4.cyl_ub = 0xff;
    679 		page4.cyl_mb = 0xff;
    680 		page4.cyl_lb = 0xff;
    681 		page4.heads = 0xff;
    682 		page4.rpm = 0xffff;
    683 		break;
    684 	}
    685 
    686 	(void) bcopy(&header, addr, sizeof (header));
    687 	(void) bcopy(&page4, addr + sizeof (header), sizeof (page4));
    688 
    689 	pkt->pkt_resid = sp->cmd_count - sizeof (page4) - sizeof (header);
    690 	rval = 0;
    691 
    692 	return (rval);
    693 }
    694 
    695 static int
    696 bsd_mode_sense_dad_mode_err_recov(struct scsi_pkt *pkt)
    697 {
    698 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    699 	union scsi_cdb		*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    700 	uchar_t			*addr = (uchar_t *)sp->cmd_addr;
    701 	int			page_control;
    702 	struct mode_header	header;
    703 	struct mode_err_recov	page1;
    704 	int			rval = 0;
    705 
    706 	page_control = (cdb->cdb_opaque[2] >> 6) & 0x03;
    707 
    708 	if (emul64debug) {
    709 		cmn_err(CE_CONT, "%s: bsd_mode_sense_dad_mode_err_recov: "
    710 		    "pc=%d n=%d\n", emul64_name, page_control, sp->cmd_count);
    711 	}
    712 
    713 	if (sp->cmd_count < (sizeof (header) + sizeof (page1))) {
    714 		cmn_err(CE_CONT, "%s: bsd_mode_sense_dad_mode_err_recov: "
    715 		    "size %d required\n",
    716 		    emul64_name, (int)(sizeof (header) + sizeof (page1)));
    717 		return (EIO);
    718 	}
    719 
    720 	(void) bzero(&header, sizeof (header));
    721 	(void) bzero(&page1, sizeof (page1));
    722 
    723 	header.length = sizeof (header) + sizeof (page1) - 1;
    724 	header.bdesc_length = 0;
    725 
    726 	page1.mode_page.code = DAD_MODE_ERR_RECOV;
    727 	page1.mode_page.ps = 1;
    728 	page1.mode_page.length = sizeof (page1) - sizeof (struct mode_page);
    729 
    730 	switch (page_control) {
    731 	case MODE_SENSE_PC_CURRENT:
    732 	case MODE_SENSE_PC_DEFAULT:
    733 	case MODE_SENSE_PC_SAVED:
    734 		break;
    735 	case MODE_SENSE_PC_CHANGEABLE:
    736 		break;
    737 	}
    738 
    739 	(void) bcopy(&header, addr, sizeof (header));
    740 	(void) bcopy(&page1, addr + sizeof (header), sizeof (page1));
    741 
    742 	pkt->pkt_resid = sp->cmd_count - sizeof (page1) - sizeof (header);
    743 	rval = 0;
    744 
    745 	return (rval);
    746 }
    747 
    748 static int
    749 bsd_mode_sense_modepage_disco_reco(struct scsi_pkt *pkt)
    750 {
    751 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    752 	union scsi_cdb		*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    753 	int			rval = 0;
    754 	uchar_t			*addr = (uchar_t *)sp->cmd_addr;
    755 	int			page_control;
    756 	struct mode_header	header;
    757 	struct mode_disco_reco	page2;
    758 
    759 	page_control = (cdb->cdb_opaque[2] >> 6) & 0x03;
    760 
    761 	if (emul64debug) {
    762 		cmn_err(CE_CONT, "%s: bsd_mode_sense_modepage_disco_reco: "
    763 		    "pc=%d n=%d\n", emul64_name, page_control, sp->cmd_count);
    764 	}
    765 
    766 	if (sp->cmd_count < (sizeof (header) + sizeof (page2))) {
    767 		cmn_err(CE_CONT, "%s: bsd_mode_sense_modepage_disco_reco: "
    768 		    "size %d required\n",
    769 		    emul64_name, (int)(sizeof (header) + sizeof (page2)));
    770 		return (EIO);
    771 	}
    772 
    773 	(void) bzero(&header, sizeof (header));
    774 	(void) bzero(&page2, sizeof (page2));
    775 
    776 	header.length = sizeof (header) + sizeof (page2) - 1;
    777 	header.bdesc_length = 0;
    778 
    779 	page2.mode_page.code = MODEPAGE_DISCO_RECO;
    780 	page2.mode_page.ps = 1;
    781 	page2.mode_page.length = sizeof (page2) - sizeof (struct mode_page);
    782 
    783 	switch (page_control) {
    784 	case MODE_SENSE_PC_CURRENT:
    785 	case MODE_SENSE_PC_DEFAULT:
    786 	case MODE_SENSE_PC_SAVED:
    787 		break;
    788 	case MODE_SENSE_PC_CHANGEABLE:
    789 		break;
    790 	}
    791 
    792 	(void) bcopy(&header, addr, sizeof (header));
    793 	(void) bcopy(&page2, addr + sizeof (header), sizeof (page2));
    794 
    795 	pkt->pkt_resid = sp->cmd_count - sizeof (page2) - sizeof (header);
    796 	rval = 0;
    797 
    798 	return (rval);
    799 }
    800 
    801 static int
    802 bsd_mode_sense_dad_mode_format(struct scsi_pkt *pkt)
    803 {
    804 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    805 	union scsi_cdb		*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    806 	uchar_t			*addr = (uchar_t *)sp->cmd_addr;
    807 	emul64_tgt_t		*tgt;
    808 	int			page_control;
    809 	struct mode_header	header;
    810 	struct mode_format	page3;
    811 	int			rval = 0;
    812 
    813 	page_control = (cdb->cdb_opaque[2] >> 6) & 0x03;
    814 
    815 	if (emul64debug) {
    816 		cmn_err(CE_CONT, "%s: bsd_mode_sense_dad_mode_format: "
    817 		    "pc=%d n=%d\n", emul64_name, page_control, sp->cmd_count);
    818 	}
    819 
    820 	if (sp->cmd_count < (sizeof (header) + sizeof (page3))) {
    821 		cmn_err(CE_CONT, "%s: bsd_mode_sense_dad_mode_format: "
    822 		    "size %d required\n",
    823 		    emul64_name, (int)(sizeof (header) + sizeof (page3)));
    824 		return (EIO);
    825 	}
    826 
    827 	(void) bzero(&header, sizeof (header));
    828 	(void) bzero(&page3, sizeof (page3));
    829 
    830 	header.length = sizeof (header) + sizeof (page3) - 1;
    831 	header.bdesc_length = 0;
    832 
    833 	page3.mode_page.code = DAD_MODE_FORMAT;
    834 	page3.mode_page.ps = 1;
    835 	page3.mode_page.length = sizeof (page3) - sizeof (struct mode_page);
    836 
    837 	switch (page_control) {
    838 	case MODE_SENSE_PC_CURRENT:
    839 	case MODE_SENSE_PC_DEFAULT:
    840 	case MODE_SENSE_PC_SAVED:
    841 		page3.data_bytes_sect = ushort_to_scsi_ushort(DEV_BSIZE);
    842 		page3.interleave = ushort_to_scsi_ushort(1);
    843 		EMUL64_MUTEX_ENTER(sp->cmd_emul64);
    844 		tgt = find_tgt(sp->cmd_emul64,
    845 		    pkt->pkt_address.a_target, pkt->pkt_address.a_lun);
    846 		EMUL64_MUTEX_EXIT(sp->cmd_emul64);
    847 		page3.sect_track = ushort_to_scsi_ushort(tgt->emul64_tgt_nsect);
    848 		break;
    849 	case MODE_SENSE_PC_CHANGEABLE:
    850 		break;
    851 	}
    852 
    853 	(void) bcopy(&header, addr, sizeof (header));
    854 	(void) bcopy(&page3, addr + sizeof (header), sizeof (page3));
    855 
    856 	pkt->pkt_resid = sp->cmd_count - sizeof (page3) - sizeof (header);
    857 	rval = 0;
    858 
    859 	return (rval);
    860 }
    861 
    862 static int
    863 bsd_mode_sense_dad_mode_cache(struct scsi_pkt *pkt)
    864 {
    865 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    866 	union scsi_cdb		*cdb = (union scsi_cdb *)pkt->pkt_cdbp;
    867 	uchar_t			*addr = (uchar_t *)sp->cmd_addr;
    868 	int			page_control;
    869 	struct mode_header	header;
    870 	struct mode_cache	page8;
    871 	int			rval = 0;
    872 
    873 	page_control = (cdb->cdb_opaque[2] >> 6) & 0x03;
    874 
    875 	if (emul64debug) {
    876 		cmn_err(CE_CONT, "%s: bsd_mode_sense_dad_mode_cache: "
    877 		    "pc=%d n=%d\n", emul64_name, page_control, sp->cmd_count);
    878 	}
    879 
    880 	if (sp->cmd_count < (sizeof (header) + sizeof (page8))) {
    881 		cmn_err(CE_CONT, "%s: bsd_mode_sense_dad_mode_cache: "
    882 		    "size %d required\n",
    883 		    emul64_name, (int)(sizeof (header) + sizeof (page8)));
    884 		return (EIO);
    885 	}
    886 
    887 	(void) bzero(&header, sizeof (header));
    888 	(void) bzero(&page8, sizeof (page8));
    889 
    890 	header.length = sizeof (header) + sizeof (page8) - 1;
    891 	header.bdesc_length = 0;
    892 
    893 	page8.mode_page.code = DAD_MODE_CACHE;
    894 	page8.mode_page.ps = 1;
    895 	page8.mode_page.length = sizeof (page8) - sizeof (struct mode_page);
    896 
    897 	switch (page_control) {
    898 	case MODE_SENSE_PC_CURRENT:
    899 	case MODE_SENSE_PC_DEFAULT:
    900 	case MODE_SENSE_PC_SAVED:
    901 		break;
    902 	case MODE_SENSE_PC_CHANGEABLE:
    903 		break;
    904 	}
    905 
    906 	(void) bcopy(&header, addr, sizeof (header));
    907 	(void) bcopy(&page8, addr + sizeof (header), sizeof (page8));
    908 
    909 	pkt->pkt_resid = sp->cmd_count - sizeof (page8) - sizeof (header);
    910 	rval = 0;
    911 
    912 	return (rval);
    913 }
    914 
    915 /* ARGSUSED 0 */
    916 int
    917 bsd_scsi_mode_select(struct scsi_pkt *pkt)
    918 {
    919 	return (0);
    920 }
    921 
    922 int
    923 bsd_scsi_read_capacity_8(struct scsi_pkt *pkt)
    924 {
    925 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    926 	emul64_tgt_t		*tgt;
    927 	struct scsi_capacity	cap;
    928 	int			rval = 0;
    929 
    930 	EMUL64_MUTEX_ENTER(sp->cmd_emul64);
    931 	tgt = find_tgt(sp->cmd_emul64,
    932 	    pkt->pkt_address.a_target, pkt->pkt_address.a_lun);
    933 	EMUL64_MUTEX_EXIT(sp->cmd_emul64);
    934 	if (tgt->emul64_tgt_sectors > 0xffffffff)
    935 		cap.capacity = 0xffffffff;
    936 	else
    937 		cap.capacity =
    938 		    uint32_to_scsi_uint32(tgt->emul64_tgt_sectors);
    939 	cap.lbasize = uint32_to_scsi_uint32((uint_t)DEV_BSIZE);
    940 
    941 	pkt->pkt_resid = sp->cmd_count - sizeof (struct scsi_capacity);
    942 
    943 	(void) bcopy(&cap, (caddr_t)sp->cmd_addr,
    944 	    sizeof (struct scsi_capacity));
    945 	return (rval);
    946 }
    947 
    948 int
    949 bsd_scsi_read_capacity_16(struct scsi_pkt *pkt)
    950 {
    951 	struct emul64_cmd	*sp = PKT2CMD(pkt);
    952 	emul64_tgt_t		*tgt;
    953 	struct scsi_capacity_16 cap;
    954 	int			rval = 0;
    955 
    956 	EMUL64_MUTEX_ENTER(sp->cmd_emul64);
    957 	tgt = find_tgt(sp->cmd_emul64,
    958 	    pkt->pkt_address.a_target, pkt->pkt_address.a_lun);
    959 	EMUL64_MUTEX_EXIT(sp->cmd_emul64);
    960 
    961 	cap.sc_capacity = uint64_to_scsi_uint64(tgt->emul64_tgt_sectors);
    962 	cap.sc_lbasize = uint32_to_scsi_uint32((uint_t)DEV_BSIZE);
    963 	cap.sc_rto_en = 0;
    964 	cap.sc_prot_en = 0;
    965 	cap.sc_rsvd0 = 0;
    966 	bzero(&cap.sc_rsvd1[0], sizeof (cap.sc_rsvd1));
    967 
    968 	pkt->pkt_resid = sp->cmd_count - sizeof (struct scsi_capacity_16);
    969 
    970 	(void) bcopy(&cap, (caddr_t)sp->cmd_addr,
    971 	    sizeof (struct scsi_capacity_16));
    972 	return (rval);
    973 }
    974 int
    975 bsd_scsi_read_capacity(struct scsi_pkt *pkt)
    976 {
    977 	return (bsd_scsi_read_capacity_8(pkt));
    978 }
    979 
    980 
    981 /* ARGSUSED 0 */
    982 int
    983 bsd_scsi_reserve(struct scsi_pkt *pkt)
    984 {
    985 	return (0);
    986 }
    987 
    988 /* ARGSUSED 0 */
    989 int
    990 bsd_scsi_release(struct scsi_pkt *pkt)
    991 {
    992 	return (0);
    993 }
    994 
    995 
    996 int
    997 bsd_scsi_read_defect_list(struct scsi_pkt *pkt)
    998 {
    999 	pkt->pkt_resid = 0;
   1000 	return (0);
   1001 }
   1002 
   1003 
   1004 /* ARGSUSED 0 */
   1005 int
   1006 bsd_scsi_reassign_block(struct scsi_pkt *pkt)
   1007 {
   1008 	return (0);
   1009 }
   1010 
   1011 
   1012 static int
   1013 bsd_readblks(struct emul64 *emul64, ushort_t target, ushort_t lun,
   1014     diskaddr_t blkno, int nblks, unsigned char *bufaddr)
   1015 {
   1016 	emul64_tgt_t	*tgt;
   1017 	blklist_t	*blk;
   1018 	emul64_rng_overlap_t overlap;
   1019 	int		i = 0;
   1020 
   1021 	if (emul64debug) {
   1022 		cmn_err(CE_CONT, "%s: bsd_readblks: "
   1023 		    "<%d,%d> blk %llu (0x%llx) nblks %d\n",
   1024 		    emul64_name, target, lun, blkno, blkno, nblks);
   1025 	}
   1026 
   1027 	emul64_yield_check();
   1028 
   1029 	EMUL64_MUTEX_ENTER(emul64);
   1030 	tgt = find_tgt(emul64, target, lun);
   1031 	EMUL64_MUTEX_EXIT(emul64);
   1032 	if (tgt == NULL) {
   1033 		cmn_err(CE_WARN, "%s: bsd_readblks: no target for %d,%d\n",
   1034 		    emul64_name, target, lun);
   1035 		goto unlocked_out;
   1036 	}
   1037 
   1038 	if (emul64_collect_stats) {
   1039 		mutex_enter(&emul64_stats_mutex);
   1040 		emul64_io_ops++;
   1041 		emul64_io_blocks += nblks;
   1042 		mutex_exit(&emul64_stats_mutex);
   1043 	}
   1044 	mutex_enter(&tgt->emul64_tgt_blk_lock);
   1045 
   1046 	/*
   1047 	 * Keep the ioctls from changing the nowrite list for the duration
   1048 	 * of this I/O by grabbing emul64_tgt_nw_lock.  This will keep the
   1049 	 * results from our call to bsd_tgt_overlap from changing while we
   1050 	 * do the I/O.
   1051 	 */
   1052 	rw_enter(&tgt->emul64_tgt_nw_lock, RW_READER);
   1053 
   1054 	overlap = bsd_tgt_overlap(tgt, blkno, nblks);
   1055 	switch (overlap) {
   1056 	case O_SAME:
   1057 	case O_SUBSET:
   1058 	case O_OVERLAP:
   1059 		cmn_err(CE_WARN, "%s: bsd_readblks: "
   1060 		    "read to blocked area %lld,%d\n",
   1061 		    emul64_name, blkno, nblks);
   1062 		rw_exit(&tgt->emul64_tgt_nw_lock);
   1063 		goto errout;
   1064 	case O_NONE:
   1065 		break;
   1066 	}
   1067 	for (i = 0; i < nblks; i++) {
   1068 		if (emul64_debug_blklist)
   1069 			cmn_err(CE_CONT, "%s: bsd_readblks: "
   1070 			    "%d of %d: blkno %lld\n",
   1071 			    emul64_name, i+1, nblks, blkno);
   1072 		if (blkno > tgt->emul64_tgt_sectors)
   1073 			break;
   1074 		blk = bsd_findblk(tgt, blkno, NULL);
   1075 		if (blk) {
   1076 			(void) bcopy(blk->bl_data, bufaddr, DEV_BSIZE);
   1077 		} else {
   1078 			(void) bzero(bufaddr, DEV_BSIZE);
   1079 		}
   1080 		blkno++;
   1081 		bufaddr += DEV_BSIZE;
   1082 	}
   1083 	rw_exit(&tgt->emul64_tgt_nw_lock);
   1084 
   1085 errout:
   1086 	mutex_exit(&tgt->emul64_tgt_blk_lock);
   1087 
   1088 unlocked_out:
   1089 	return ((nblks - i) * DEV_BSIZE);
   1090 }
   1091 
   1092 
   1093 static int
   1094 bsd_writeblks(struct emul64 *emul64, ushort_t target, ushort_t lun,
   1095     diskaddr_t blkno, int nblks, unsigned char *bufaddr)
   1096 {
   1097 	emul64_tgt_t	*tgt;
   1098 	blklist_t	*blk;
   1099 	emul64_rng_overlap_t overlap;
   1100 	avl_index_t	where;
   1101 	int		i = 0;
   1102 
   1103 	if (emul64debug) {
   1104 		cmn_err(CE_CONT, "%s: bsd_writeblks: "
   1105 		    "<%d,%d> blk %llu (0x%llx) nblks %d\n",
   1106 		    emul64_name, target, lun, blkno, blkno, nblks);
   1107 	}
   1108 
   1109 	emul64_yield_check();
   1110 
   1111 	EMUL64_MUTEX_ENTER(emul64);
   1112 	tgt = find_tgt(emul64, target, lun);
   1113 	EMUL64_MUTEX_EXIT(emul64);
   1114 	if (tgt == NULL) {
   1115 		cmn_err(CE_WARN, "%s: bsd_writeblks: no target for %d,%d\n",
   1116 		    emul64_name, target, lun);
   1117 		goto unlocked_out;
   1118 	}
   1119 
   1120 	if (emul64_collect_stats) {
   1121 		mutex_enter(&emul64_stats_mutex);
   1122 		emul64_io_ops++;
   1123 		emul64_io_blocks += nblks;
   1124 		mutex_exit(&emul64_stats_mutex);
   1125 	}
   1126 	mutex_enter(&tgt->emul64_tgt_blk_lock);
   1127 
   1128 	/*
   1129 	 * Keep the ioctls from changing the nowrite list for the duration
   1130 	 * of this I/O by grabbing emul64_tgt_nw_lock.  This will keep the
   1131 	 * results from our call to bsd_tgt_overlap from changing while we
   1132 	 * do the I/O.
   1133 	 */
   1134 	rw_enter(&tgt->emul64_tgt_nw_lock, RW_READER);
   1135 	overlap = bsd_tgt_overlap(tgt, blkno, nblks);
   1136 	switch (overlap) {
   1137 	case O_SAME:
   1138 	case O_SUBSET:
   1139 		if (emul64_collect_stats) {
   1140 			mutex_enter(&emul64_stats_mutex);
   1141 			emul64_skipped_io++;
   1142 			emul64_skipped_blk += nblks;
   1143 			mutex_exit(&emul64_stats_mutex);
   1144 		}
   1145 		rw_exit(&tgt->emul64_tgt_nw_lock);
   1146 		mutex_exit(&tgt->emul64_tgt_blk_lock);
   1147 		return (0);
   1148 	case O_OVERLAP:
   1149 	case O_NONE:
   1150 		break;
   1151 	}
   1152 	for (i = 0; i < nblks; i++) {
   1153 		if ((overlap == O_NONE) ||
   1154 		    (bsd_tgt_overlap(tgt, blkno, 1) == O_NONE)) {
   1155 			/*
   1156 			 * If there was no overlap for the entire I/O range
   1157 			 * or if there is no overlap for this particular
   1158 			 * block, then we need to do the write.
   1159 			 */
   1160 			if (emul64_debug_blklist)
   1161 				cmn_err(CE_CONT, "%s: bsd_writeblks: "
   1162 				    "%d of %d: blkno %lld\n",
   1163 				    emul64_name, i+1, nblks, blkno);
   1164 			if (blkno > tgt->emul64_tgt_sectors) {
   1165 				cmn_err(CE_WARN, "%s: bsd_writeblks: "
   1166 				    "blkno %lld, tgt_sectors %lld\n",
   1167 				    emul64_name, blkno,
   1168 				    tgt->emul64_tgt_sectors);
   1169 				break;
   1170 			}
   1171 
   1172 			blk = bsd_findblk(tgt, blkno, &where);
   1173 			if (bcmp(bufaddr, emul64_zeros, DEV_BSIZE) == 0) {
   1174 				if (blk) {
   1175 					bsd_freeblk(tgt, blk);
   1176 				}
   1177 			} else {
   1178 				if (blk) {
   1179 					(void) bcopy(bufaddr, blk->bl_data,
   1180 					    DEV_BSIZE);
   1181 				} else {
   1182 					bsd_allocblk(tgt, blkno,
   1183 					    (caddr_t)bufaddr, where);
   1184 				}
   1185 			}
   1186 		}
   1187 		blkno++;
   1188 		bufaddr += DEV_BSIZE;
   1189 	}
   1190 
   1191 	/*
   1192 	 * Now that we're done with our I/O, allow the ioctls to change the
   1193 	 * nowrite list.
   1194 	 */
   1195 	rw_exit(&tgt->emul64_tgt_nw_lock);
   1196 
   1197 errout:
   1198 	mutex_exit(&tgt->emul64_tgt_blk_lock);
   1199 
   1200 unlocked_out:
   1201 	return ((nblks - i) * DEV_BSIZE);
   1202 }
   1203 
   1204 emul64_tgt_t *
   1205 find_tgt(struct emul64 *emul64, ushort_t target, ushort_t lun)
   1206 {
   1207 	emul64_tgt_t	*tgt;
   1208 
   1209 	tgt = emul64->emul64_tgt;
   1210 	while (tgt) {
   1211 		if (tgt->emul64_tgt_saddr.a_target == target &&
   1212 		    tgt->emul64_tgt_saddr.a_lun == lun) {
   1213 			break;
   1214 		}
   1215 		tgt = tgt->emul64_tgt_next;
   1216 	}
   1217 	return (tgt);
   1218 
   1219 }
   1220 
   1221 /*
   1222  * Free all blocks that are part of the specified range.
   1223  */
   1224 int
   1225 bsd_freeblkrange(emul64_tgt_t *tgt, emul64_range_t *range)
   1226 {
   1227 	blklist_t	*blk;
   1228 	blklist_t	*nextblk;
   1229 
   1230 	ASSERT(mutex_owned(&tgt->emul64_tgt_blk_lock));
   1231 	for (blk = (blklist_t *)avl_first(&tgt->emul64_tgt_data);
   1232 	    blk != NULL;
   1233 	    blk = nextblk) {
   1234 		/*
   1235 		 * We need to get the next block pointer now, because blk
   1236 		 * will be freed inside the if statement.
   1237 		 */
   1238 		nextblk = AVL_NEXT(&tgt->emul64_tgt_data, blk);
   1239 
   1240 		if (emul64_overlap(range, blk->bl_blkno, (size_t)1) != O_NONE) {
   1241 			bsd_freeblk(tgt, blk);
   1242 		}
   1243 	}
   1244 	return (0);
   1245 }
   1246 
   1247 static blklist_t *
   1248 bsd_findblk(emul64_tgt_t *tgt, diskaddr_t blkno, avl_index_t *where)
   1249 {
   1250 	blklist_t	*blk;
   1251 	blklist_t	search;
   1252 
   1253 	ASSERT(mutex_owned(&tgt->emul64_tgt_blk_lock));
   1254 
   1255 	search.bl_blkno = blkno;
   1256 	blk = (blklist_t *)avl_find(&tgt->emul64_tgt_data, &search, where);
   1257 	return (blk);
   1258 }
   1259 
   1260 
   1261 static void
   1262 bsd_allocblk(emul64_tgt_t *tgt,
   1263 		diskaddr_t blkno,
   1264 		caddr_t data,
   1265 		avl_index_t where)
   1266 {
   1267 	blklist_t	*blk;
   1268 
   1269 	if (emul64_debug_blklist)
   1270 		cmn_err(CE_CONT, "%s: bsd_allocblk: %llu\n",
   1271 		    emul64_name, blkno);
   1272 
   1273 	ASSERT(mutex_owned(&tgt->emul64_tgt_blk_lock));
   1274 
   1275 	blk = (blklist_t *)kmem_zalloc(sizeof (blklist_t), KM_SLEEP);
   1276 	blk->bl_data = (uchar_t *)kmem_zalloc(DEV_BSIZE, KM_SLEEP);
   1277 	blk->bl_blkno = blkno;
   1278 	(void) bcopy(data, blk->bl_data, DEV_BSIZE);
   1279 	avl_insert(&tgt->emul64_tgt_data, (void *) blk, where);
   1280 
   1281 	if (emul64_collect_stats) {
   1282 		mutex_enter(&emul64_stats_mutex);
   1283 		emul64_nonzero++;
   1284 		tgt->emul64_list_length++;
   1285 		if (tgt->emul64_list_length > emul64_max_list_length) {
   1286 			emul64_max_list_length = tgt->emul64_list_length;
   1287 		}
   1288 		mutex_exit(&emul64_stats_mutex);
   1289 	}
   1290 }
   1291 
   1292 static void
   1293 bsd_freeblk(emul64_tgt_t *tgt, blklist_t *blk)
   1294 {
   1295 	if (emul64_debug_blklist)
   1296 		cmn_err(CE_CONT, "%s: bsd_freeblk: <%d,%d> blk=%lld\n",
   1297 		    emul64_name, tgt->emul64_tgt_saddr.a_target,
   1298 		    tgt->emul64_tgt_saddr.a_lun, blk->bl_blkno);
   1299 
   1300 	ASSERT(mutex_owned(&tgt->emul64_tgt_blk_lock));
   1301 
   1302 	avl_remove(&tgt->emul64_tgt_data, (void *) blk);
   1303 	if (emul64_collect_stats) {
   1304 		mutex_enter(&emul64_stats_mutex);
   1305 		emul64_nonzero--;
   1306 		tgt->emul64_list_length--;
   1307 		mutex_exit(&emul64_stats_mutex);
   1308 	}
   1309 	kmem_free(blk->bl_data, DEV_BSIZE);
   1310 	kmem_free(blk, sizeof (blklist_t));
   1311 }
   1312 
   1313 /*
   1314  * Look for overlap between a nowrite range and a block range.
   1315  *
   1316  * NOTE:  Callers of this function must hold the tgt->emul64_tgt_nw_lock
   1317  *	  lock.  For the purposes of this function, a reader lock is
   1318  *	  sufficient.
   1319  */
   1320 static emul64_rng_overlap_t
   1321 bsd_tgt_overlap(emul64_tgt_t *tgt, diskaddr_t blkno, int count)
   1322 {
   1323 	emul64_nowrite_t	*nw;
   1324 	emul64_rng_overlap_t	rv = O_NONE;
   1325 
   1326 	for (nw = tgt->emul64_tgt_nowrite;
   1327 	    (nw != NULL) && (rv == O_NONE);
   1328 	    nw = nw->emul64_nwnext) {
   1329 		rv = emul64_overlap(&nw->emul64_blocked, blkno, (size_t)count);
   1330 	}
   1331 	return (rv);
   1332 }
   1333 
   1334 /*
   1335  * Operations that do a lot of I/O, such as RAID 5 initializations, result
   1336  * in a CPU bound kernel when the device is an emul64 device.  This makes
   1337  * the machine look hung.  To avoid this problem, give up the CPU from time
   1338  * to time.
   1339  */
   1340 
   1341 static void
   1342 emul64_yield_check()
   1343 {
   1344 	static uint_t	emul64_io_count = 0;	/* # I/Os since last wait */
   1345 	static uint_t	emul64_waiting = FALSE;	/* TRUE -> a thread is in */
   1346 						/*   cv_timed wait. */
   1347 	clock_t		ticks;
   1348 
   1349 	if (emul64_yield_enable == 0)
   1350 		return;
   1351 
   1352 	mutex_enter(&emul64_yield_mutex);
   1353 
   1354 	if (emul64_waiting == TRUE) {
   1355 		/*
   1356 		 * Another thread has already started the timer.  We'll
   1357 		 * just wait here until their time expires, and they
   1358 		 * broadcast to us.  When they do that, we'll return and
   1359 		 * let our caller do more I/O.
   1360 		 */
   1361 		cv_wait(&emul64_yield_cv, &emul64_yield_mutex);
   1362 	} else if (emul64_io_count++ > emul64_yield_period) {
   1363 		/*
   1364 		 * Set emul64_waiting to let other threads know that we
   1365 		 * have started the timer.
   1366 		 */
   1367 		emul64_waiting = TRUE;
   1368 		emul64_num_delay_called++;
   1369 		ticks = drv_usectohz(emul64_yield_length);
   1370 		if (ticks == 0)
   1371 			ticks = 1;
   1372 		(void) cv_reltimedwait(&emul64_yield_cv, &emul64_yield_mutex,
   1373 		    ticks, TR_CLOCK_TICK);
   1374 		emul64_io_count = 0;
   1375 		emul64_waiting = FALSE;
   1376 
   1377 		/* Broadcast in case others are waiting. */
   1378 		cv_broadcast(&emul64_yield_cv);
   1379 	}
   1380 
   1381 	mutex_exit(&emul64_yield_mutex);
   1382 }
   1383