Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/systm.h>
     30 #include <sys/kmem.h>
     31 #include <sys/disp.h>
     32 #include <sys/stream.h>
     33 #include <sys/strsubr.h>
     34 #include <sys/strsun.h>
     35 #include <sys/policy.h>
     36 #include <sys/tsol/label_macro.h>
     37 #include <sys/tsol/tndb.h>
     38 #include <sys/tsol/tnet.h>
     39 #include <inet/ip.h>
     40 #include <inet/ip6.h>
     41 #include <inet/tcp.h>
     42 #include <inet/ipclassifier.h>
     43 #include <inet/ip_ire.h>
     44 #include <inet/ip_ftable.h>
     45 
     46 /*
     47  * This routine takes a sensitivity label as input and creates a CIPSO
     48  * option in the specified buffer.  It returns the size of the CIPSO option.
     49  * If the sensitivity label is too large for the CIPSO option, then 0
     50  * is returned.
     51  *
     52  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
     53  * (more accurately, success means a return value between 10 and 40).
     54  */
     55 
     56 static int
     57 tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
     58 {
     59 	struct cipso_tag_type_1 *tt1;
     60 	const _bslabel_impl_t *bsl;
     61 	const uchar_t *ucp;
     62 	int i;
     63 
     64 	if (doi == 0)
     65 		return (0);
     66 
     67 	/* check for Admin High sensitivity label */
     68 	if (blequal(sl, label2bslabel(l_admin_high)))
     69 		return (0);
     70 
     71 	/* check whether classification will fit in one octet */
     72 	bsl = (const _bslabel_impl_t *)sl;
     73 	if (LCLASS(bsl) & 0xFF00)
     74 		return (0);
     75 
     76 	/*
     77 	 * Check whether compartments will fit in 30 octets.
     78 	 * Compartments 241 - 256 are not allowed.
     79 	 */
     80 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
     81 		return (0);
     82 
     83 	/*
     84 	 * Compute option length and tag length.
     85 	 * 'p' points to the last two bytes in the Sensitivity Label's
     86 	 * compartments; these cannot be mapped into CIPSO compartments.
     87 	 */
     88 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
     89 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
     90 		if (*ucp != 0)
     91 			break;
     92 
     93 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
     94 
     95 	if (cop == NULL)
     96 		return (10 + i);
     97 
     98 	doi = htonl(doi);
     99 	ucp = (const uchar_t *)&doi;
    100 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
    101 	cop[IPOPT_OLEN] = 10 + i;
    102 	cop[IPOPT_OLEN+1] = ucp[0];
    103 	cop[IPOPT_OLEN+2] = ucp[1];
    104 	cop[IPOPT_OLEN+3] = ucp[2];
    105 	cop[IPOPT_OLEN+4] = ucp[3];
    106 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
    107 	tt1->tag_type = 1;
    108 	tt1->tag_align = 0;
    109 	tt1->tag_sl = LCLASS(bsl);
    110 	tt1->tag_length = 4 + i;
    111 
    112 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
    113 
    114 	return (cop[IPOPT_OLEN]);
    115 }
    116 
    117 /*
    118  * The following routine copies a datagram's option into the specified buffer
    119  * (if buffer pointer is non-null), or returns a pointer to the label within
    120  * the streams message (if buffer is null).  In both cases, tsol_get_option
    121  * returns the option's type.
    122  *
    123  * tsol_get_option assumes that the specified buffer is large enough to
    124  * hold the largest valid CIPSO option.  Since the total number of
    125  * IP header options cannot exceed 40 bytes, a 40 byte buffer is a good choice.
    126  */
    127 
    128 tsol_ip_label_t
    129 tsol_get_option(mblk_t *mp, uchar_t **buffer)
    130 {
    131 	ipha_t	*ipha;
    132 	uchar_t	*opt;
    133 	uint32_t	totallen;
    134 	uint32_t	optval;
    135 	uint32_t	optlen;
    136 
    137 	ipha = (ipha_t *)mp->b_rptr;
    138 
    139 	/*
    140 	 * Get length (in 4 byte octets) of IP header options.
    141 	 * If header doesn't contain options, then return OPT_NONE.
    142 	 */
    143 	totallen = ipha->ipha_version_and_hdr_length -
    144 	    (uint8_t)((IP_VERSION << 4) + IP_SIMPLE_HDR_LENGTH_IN_WORDS);
    145 
    146 	if (totallen == 0)
    147 		return (OPT_NONE);
    148 
    149 	totallen <<= 2;
    150 
    151 	/*
    152 	 * Search for CIPSO option.
    153 	 * If no such option is present, then return OPT_NONE.
    154 	 */
    155 	opt = (uchar_t *)&ipha[1];
    156 	while (totallen != 0) {
    157 		switch (optval = opt[IPOPT_OPTVAL]) {
    158 		case IPOPT_EOL:
    159 			return (OPT_NONE);
    160 		case IPOPT_NOP:
    161 			optlen = 1;
    162 			break;
    163 		default:
    164 			if (totallen <= IPOPT_OLEN)
    165 				return (OPT_NONE);
    166 			optlen = opt[IPOPT_OLEN];
    167 			if (optlen < 2)
    168 				return (OPT_NONE);
    169 		}
    170 		if (optlen > totallen)
    171 			return (OPT_NONE);
    172 		/*
    173 		 * Copy pointer to option into '*buffer' and
    174 		 * return the option type.
    175 		 */
    176 		switch (optval) {
    177 		case IPOPT_COMSEC:
    178 			*buffer = opt;
    179 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
    180 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1)
    181 				return (OPT_CIPSO);
    182 			return (OPT_NONE);
    183 		}
    184 		totallen -= optlen;
    185 		opt += optlen;
    186 	}
    187 	return (OPT_NONE);
    188 }
    189 
    190 /*
    191  * tsol_compute_label()
    192  *
    193  * This routine computes the IP label that should be on a packet based on the
    194  * connection and destination information.
    195  *
    196  * Returns:
    197  *      0		Fetched label
    198  *      EACCES		The packet failed the remote host accreditation
    199  *      ENOMEM		Memory allocation failure
    200  *	EINVAL		Label cannot be computed
    201  */
    202 int
    203 tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage,
    204     boolean_t isexempt, ip_stack_t *ipst)
    205 {
    206 	uint_t		sec_opt_len;
    207 	ts_label_t	*tsl;
    208 	tsol_tpc_t	*dst_rhtp;
    209 	ire_t		*ire, *sire = NULL;
    210 	boolean_t	compute_label = B_FALSE;
    211 	tsol_ire_gw_secattr_t *attrp;
    212 	zoneid_t	zoneid, ip_zoneid;
    213 
    214 	if (opt_storage != NULL)
    215 		opt_storage[IPOPT_OLEN] = 0;
    216 
    217 	if ((tsl = crgetlabel(credp)) == NULL)
    218 		return (0);
    219 
    220 	/* always pass multicast */
    221 	if (CLASSD(dst))
    222 		return (0);
    223 
    224 	if ((dst_rhtp = find_tpc(&dst, IPV4_VERSION, B_FALSE)) == NULL) {
    225 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v4,
    226 		    char *, "destination ip(1) not in database (with creds(2))",
    227 		    ipaddr_t, dst, cred_t *, credp);
    228 		return (EINVAL);
    229 	}
    230 
    231 	zoneid = crgetzoneid(credp);
    232 
    233 	/*
    234 	 * For exclusive stacks we set the zoneid to zero
    235 	 * to operate as if in the global zone for IRE and conn_t comparisons.
    236 	 */
    237 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
    238 		ip_zoneid = GLOBAL_ZONEID;
    239 	else
    240 		ip_zoneid = zoneid;
    241 
    242 	switch (dst_rhtp->tpc_tp.host_type) {
    243 	case UNLABELED:
    244 		/*
    245 		 * Only add a label if the unlabeled destination is
    246 		 * not broadcast/local/loopback address, that it is
    247 		 * not on the same subnet, and that the next-hop
    248 		 * gateway is labeled.
    249 		 */
    250 		ire = ire_cache_lookup(dst, ip_zoneid, tsl, ipst);
    251 
    252 		if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL |
    253 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
    254 			IRE_REFRELE(ire);
    255 			TPC_RELE(dst_rhtp);
    256 			return (0);
    257 		} else if (ire == NULL) {
    258 			ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire,
    259 			    ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
    260 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
    261 		}
    262 
    263 		/* no route to destination */
    264 		if (ire == NULL) {
    265 			DTRACE_PROBE4(
    266 			    tx__tnopt__log__info__labeling__routedst__v4,
    267 			    char *, "No route to unlabeled dest ip(1)/tpc(2) "
    268 			    "with creds(3).", ipaddr_t, dst, tsol_tpc_t *,
    269 			    dst_rhtp, cred_t *, credp);
    270 			TPC_RELE(dst_rhtp);
    271 			return (EINVAL);
    272 		}
    273 
    274 		/*
    275 		 * Prefix IRE from f-table lookup means that the destination
    276 		 * is not directly connected; check the next-hop attributes.
    277 		 */
    278 		if (sire != NULL) {
    279 			ASSERT(ire != NULL);
    280 			IRE_REFRELE(ire);
    281 			ire = sire;
    282 		}
    283 
    284 		attrp = ire->ire_gw_secattr;
    285 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
    286 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
    287 			compute_label = B_TRUE;
    288 
    289 		/*
    290 		 * Can talk to unlabeled hosts if
    291 		 * (1) zone's label matches the default label, or
    292 		 * (2) SO_MAC_EXEMPT is on and we dominate the peer's label
    293 		 * (3) SO_MAC_EXEMPT is on and this is the global zone
    294 		 */
    295 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
    296 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
    297 		    &tsl->tsl_label) && (!isexempt ||
    298 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
    299 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
    300 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
    301 			    char *, "unlabeled dest ip(1)/tpc(2) "
    302 			    "non-matching creds(3).", ipaddr_t, dst,
    303 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
    304 			IRE_REFRELE(ire);
    305 			TPC_RELE(dst_rhtp);
    306 			return (EACCES);
    307 		}
    308 
    309 		IRE_REFRELE(ire);
    310 		break;
    311 
    312 	case SUN_CIPSO:
    313 		/*
    314 		 * Can talk to labeled hosts if zone's label is within target's
    315 		 * label range or set.
    316 		 */
    317 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
    318 		    (!_blinrange(&tsl->tsl_label,
    319 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
    320 		    !blinlset(&tsl->tsl_label,
    321 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
    322 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
    323 			    char *, "labeled dest ip(1)/tpc(2) "
    324 			    "non-matching creds(3).", ipaddr_t, dst,
    325 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
    326 			TPC_RELE(dst_rhtp);
    327 			return (EACCES);
    328 		}
    329 		compute_label = B_TRUE;
    330 		break;
    331 
    332 	default:
    333 		TPC_RELE(dst_rhtp);
    334 		return (EACCES);
    335 	}
    336 
    337 	if (!compute_label) {
    338 		TPC_RELE(dst_rhtp);
    339 		return (0);
    340 	}
    341 
    342 	/* compute the CIPSO option */
    343 	if (dst_rhtp->tpc_tp.host_type != UNLABELED)
    344 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
    345 		    tsl->tsl_doi);
    346 	else
    347 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
    348 		    opt_storage, tsl->tsl_doi);
    349 	TPC_RELE(dst_rhtp);
    350 
    351 	if (sec_opt_len == 0) {
    352 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v4,
    353 		    char *,
    354 		    "options lack length for dest ip(1)/tpc(2) with creds(3).",
    355 		    ipaddr_t, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
    356 		return (EINVAL);
    357 	}
    358 
    359 	return (0);
    360 }
    361 
    362 /*
    363  * Remove any existing security option (CIPSO) from the given IP
    364  * header, move the 'buflen' bytes back to fill the gap, and return the number
    365  * of bytes removed (as zero or negative number).  Assumes that the headers are
    366  * sane.
    367  */
    368 int
    369 tsol_remove_secopt(ipha_t *ipha, int buflen)
    370 {
    371 	int remlen, olen, oval, delta;
    372 	uchar_t *fptr, *tptr;
    373 	boolean_t noop_keep;
    374 
    375 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
    376 	fptr = tptr = (uchar_t *)(ipha + 1);
    377 	noop_keep = B_TRUE;
    378 	while (remlen > 0) {
    379 		oval = fptr[IPOPT_OPTVAL];
    380 
    381 		/* terminate on end of list */
    382 		if (oval == IPOPT_EOL)
    383 			break;
    384 
    385 		/*
    386 		 * Delete any no-ops following a deleted option, at least up
    387 		 * to a 4 octet alignment; copy others.
    388 		 */
    389 		if (oval == IPOPT_NOP) {
    390 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
    391 				noop_keep = B_TRUE;
    392 			if (noop_keep)
    393 				*tptr++ = oval;
    394 			fptr++;
    395 			remlen--;
    396 			continue;
    397 		}
    398 
    399 		/* stop on corrupted list; just do nothing. */
    400 		if (remlen < 2)
    401 			return (0);
    402 		olen = fptr[IPOPT_OLEN];
    403 		if (olen < 2 || olen > remlen)
    404 			return (0);
    405 
    406 		/* skip over security options to delete them */
    407 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
    408 			noop_keep = B_FALSE;
    409 			fptr += olen;
    410 			remlen -= olen;
    411 			continue;
    412 		}
    413 
    414 		/* copy the rest */
    415 		noop_keep = B_TRUE;
    416 		if (tptr != fptr)
    417 			ovbcopy(fptr, tptr, olen);
    418 		fptr += olen;
    419 		tptr += olen;
    420 		remlen -= olen;
    421 	}
    422 
    423 	fptr += remlen;
    424 
    425 	/* figure how much padding we'll need for header alignment */
    426 	olen = (tptr - (uchar_t *)ipha) & 3;
    427 	if (olen > 0) {
    428 		olen = 4 - olen;
    429 		/* pad with end-of-list */
    430 		bzero(tptr, olen);
    431 		tptr += olen;
    432 	}
    433 
    434 	/* slide back the headers that follow and update the IP header */
    435 	delta = fptr - tptr;
    436 	if (delta != 0) {
    437 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
    438 		ipha->ipha_version_and_hdr_length -= delta / 4;
    439 	}
    440 	return (-delta);
    441 }
    442 
    443 /*
    444  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
    445  * move the data following the IP header (up to buflen) to accomodate the new
    446  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
    447  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
    448  * option cannot be inserted.  (Note that negative return values are possible
    449  * when noops must be compressed, and that only -1 indicates error.  Successful
    450  * return value is always evenly divisible by 4, by definition.)
    451  */
    452 int
    453 tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
    454 {
    455 	int remlen, padding, lastpad, totlen;
    456 	int oval, olen;
    457 	int delta;
    458 	uchar_t *optr;
    459 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
    460 
    461 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
    462 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
    463 	    optbuf[IPOPT_OLEN] == 0)
    464 		return (0);
    465 
    466 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
    467 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
    468 
    469 	/* first find the real (unpadded) length of the existing options */
    470 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
    471 	padding = totlen = lastpad = 0;
    472 	optr = (uchar_t *)(ipha + 1);
    473 	while (remlen > 0) {
    474 		oval = optr[IPOPT_OPTVAL];
    475 
    476 		/* stop at end of list */
    477 		if (oval == IPOPT_EOL)
    478 			break;
    479 
    480 		/* skip no-ops, noting that length byte isn't present */
    481 		if (oval == IPOPT_NOP) {
    482 			optr++;
    483 			padding++;
    484 			lastpad++;
    485 			totlen++;
    486 			remlen--;
    487 			continue;
    488 		}
    489 
    490 		/* give up on a corrupted list; report failure */
    491 		if (remlen < 2)
    492 			return (-1);
    493 		olen = optr[IPOPT_OLEN];
    494 		if (olen < 2 || olen > remlen)
    495 			return (-1);
    496 
    497 		lastpad = 0;
    498 		optr += olen;
    499 		totlen += olen;
    500 		remlen -= olen;
    501 	}
    502 
    503 	/* completely ignore any trailing padding */
    504 	totlen -= lastpad;
    505 	padding -= lastpad;
    506 
    507 	/*
    508 	 * If some sort of inter-option alignment was present, try to preserve
    509 	 * that alignment.  If alignment pushes us out past the maximum, then
    510 	 * discard it and try to compress to fit.  (We just "assume" that any
    511 	 * padding added was attempting to get 32 bit alignment.  If that's
    512 	 * wrong, that's just too bad.)
    513 	 */
    514 	if (padding > 0) {
    515 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
    516 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
    517 			totlen -= padding;
    518 			if (olen + totlen > IP_MAX_OPT_LENGTH)
    519 				return (-1);
    520 			padding = 0;
    521 		}
    522 	}
    523 
    524 	/*
    525 	 * Since we may need to compress or expand the option list, we write to
    526 	 * a temporary buffer and then copy the results back to the IP header.
    527 	 */
    528 	toptr = tempopt;
    529 
    530 	/* compute actual option to insert */
    531 	olen = optbuf[IPOPT_OLEN];
    532 	bcopy(optbuf, toptr, olen);
    533 	toptr += olen;
    534 	if (padding > 0) {
    535 		while ((olen & 3) != 0) {
    536 			*toptr++ = IPOPT_NOP;
    537 			olen++;
    538 		}
    539 	}
    540 
    541 	/* copy over the existing options */
    542 	optr = (uchar_t *)(ipha + 1);
    543 	while (totlen > 0) {
    544 		oval = optr[IPOPT_OPTVAL];
    545 
    546 		/* totlen doesn't include end-of-list marker */
    547 		ASSERT(oval != IPOPT_EOL);
    548 
    549 		/* handle no-ops; copy if desired, ignore otherwise */
    550 		if (oval == IPOPT_NOP) {
    551 			if (padding > 0) {
    552 				/* note: cannot overflow due to checks above */
    553 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
    554 				*toptr++ = oval;
    555 			}
    556 			optr++;
    557 			totlen--;
    558 			continue;
    559 		}
    560 
    561 		/* list cannot be corrupt at this point */
    562 		ASSERT(totlen >= 2);
    563 		olen = optr[IPOPT_OLEN];
    564 		ASSERT(olen >= 2 && olen <= totlen);
    565 
    566 		/* cannot run out of room due to tests above */
    567 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
    568 
    569 		bcopy(optr, toptr, olen);
    570 		optr += olen;
    571 		toptr += olen;
    572 		totlen -= olen;
    573 	}
    574 
    575 	/* figure how much padding we'll need for header alignment */
    576 	olen = (toptr - tempopt) & 3;
    577 	if (olen > 0) {
    578 		olen = 4 - olen;
    579 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
    580 		/* pad with end-of-list value */
    581 		bzero(toptr, olen);
    582 		toptr += olen;
    583 	}
    584 
    585 	/* move the headers as needed and update IP header */
    586 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
    587 	remlen = IPH_HDR_LENGTH(ipha);
    588 	delta = olen - remlen;
    589 	if (delta != 0) {
    590 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
    591 		    buflen - remlen);
    592 		ipha->ipha_version_and_hdr_length += delta / 4;
    593 	}
    594 
    595 	/* slap in the new options */
    596 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
    597 
    598 	return (delta);
    599 }
    600 
    601 /*
    602  * tsol_check_label()
    603  *
    604  * This routine computes the IP label that should be on the packet based on the
    605  * connection and destination information.  If the label is there, it returns
    606  * zero, so the caller knows that the label is syncronized, and further calls
    607  * are not required.  If the label isn't right, then the right one is inserted.
    608  *
    609  * The packet's header is clear before entering IPsec's engine.
    610  *
    611  * Returns:
    612  *      0		Label on packet (was|is now) correct
    613  *      EACCES		The packet failed the remote host accreditation.
    614  *      ENOMEM		Memory allocation failure.
    615  *	EINVAL		Label cannot be computed
    616  */
    617 int
    618 tsol_check_label(const cred_t *credp, mblk_t **mpp, boolean_t isexempt,
    619     ip_stack_t *ipst)
    620 {
    621 	mblk_t *mp = *mpp;
    622 	ipha_t  *ipha;
    623 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
    624 	uint_t hlen;
    625 	uint_t sec_opt_len;
    626 	uchar_t *optr;
    627 	int delta_remove = 0, delta_add, adjust;
    628 	int retv;
    629 
    630 	opt_storage[IPOPT_OPTVAL] = 0;
    631 
    632 	ipha = (ipha_t *)mp->b_rptr;
    633 
    634 	retv = tsol_compute_label(credp, ipha->ipha_dst, opt_storage, isexempt,
    635 	    ipst);
    636 	if (retv != 0)
    637 		return (retv);
    638 
    639 	optr = (uchar_t *)(ipha + 1);
    640 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
    641 	sec_opt_len = opt_storage[IPOPT_OLEN];
    642 
    643 	if (hlen >= sec_opt_len) {
    644 		/* If no option is supposed to be there, make sure it's not */
    645 		if (sec_opt_len == 0 && hlen > 0 &&
    646 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
    647 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
    648 			return (0);
    649 		/* if the option is there, it's always first */
    650 		if (sec_opt_len != 0 &&
    651 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
    652 			return (0);
    653 	}
    654 
    655 	/*
    656 	 * If there is an option there, then it must be the wrong one; delete.
    657 	 */
    658 	if (hlen > 0) {
    659 		delta_remove = tsol_remove_secopt(ipha, MBLKL(mp));
    660 		mp->b_wptr += delta_remove;
    661 	}
    662 
    663 	/* Make sure we have room for the worst-case addition */
    664 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
    665 	hlen = (hlen + 3) & ~3;
    666 	if (hlen > IP_MAX_HDR_LENGTH)
    667 		hlen = IP_MAX_HDR_LENGTH;
    668 	hlen -= IPH_HDR_LENGTH(ipha);
    669 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
    670 		int copylen;
    671 		mblk_t *new_mp;
    672 
    673 		/* allocate enough to be meaningful, but not *too* much */
    674 		copylen = MBLKL(mp);
    675 		if (copylen > 256)
    676 			copylen = 256;
    677 		new_mp = allocb_cred(hlen + copylen +
    678 		    (mp->b_rptr - mp->b_datap->db_base), DB_CRED(mp));
    679 		if (new_mp == NULL)
    680 			return (ENOMEM);
    681 
    682 		/* keep the bias */
    683 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
    684 		new_mp->b_wptr = new_mp->b_rptr + copylen;
    685 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
    686 		new_mp->b_cont = mp;
    687 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
    688 			new_mp->b_cont = mp->b_cont;
    689 			freeb(mp);
    690 		}
    691 		*mpp = mp = new_mp;
    692 		ipha = (ipha_t *)mp->b_rptr;
    693 	}
    694 
    695 	delta_add = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
    696 	if (delta_add == -1)
    697 		goto param_prob;
    698 
    699 	ASSERT((mp->b_wptr + delta_add) <= DB_LIM(mp));
    700 	mp->b_wptr += delta_add;
    701 
    702 	adjust = delta_remove + delta_add;
    703 	adjust += ntohs(ipha->ipha_length);
    704 	ipha->ipha_length = htons(adjust);
    705 
    706 	return (0);
    707 
    708 param_prob:
    709 	return (EINVAL);
    710 }
    711 
    712 /*
    713  * IPv6 HopOpt extension header for the label option layout:
    714  *	- One octet giving the type of the 'next extension header'
    715  *	- Header extension length in 8-byte words, not including the
    716  *	  1st 8 bytes, but including any pad bytes at the end.
    717  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
    718  *	- Followed by TLV encoded IPv6 label option. Option layout is
    719  *		* One octet, IP6OPT_LS
    720  *		* One octet option length in bytes of the option data following
    721  *		  the length, but not including any pad bytes at the end.
    722  *		* Four-octet DOI (IP6LS_DOI_V4)
    723  *		* One octet suboption, IP6LS_TT_V4
    724  *		* One octet suboption length in bytes of the suboption
    725  *		  following the suboption length, including the suboption
    726  *		  header length, but not including any pad bytes at the end.
    727  *	- Pad to make the extension header a multiple of 8 bytes.
    728  *
    729  * This function returns the contents of 'IPv6 option structure' in the above.
    730  * i.e starting from the IP6OPT_LS but not including the pad at the end.
    731  * The user must prepend two octets (either padding or next header / length)
    732  * and append padding out to the next 8 octet boundary.
    733  */
    734 int
    735 tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst,
    736     uchar_t *opt_storage, boolean_t isexempt, ip_stack_t *ipst)
    737 {
    738 	tsol_tpc_t	*dst_rhtp;
    739 	ts_label_t	*tsl;
    740 	uint_t		sec_opt_len;
    741 	uint32_t	doi;
    742 	zoneid_t	zoneid, ip_zoneid;
    743 	ire_t		*ire, *sire;
    744 	tsol_ire_gw_secattr_t *attrp;
    745 	boolean_t	compute_label;
    746 
    747 	if (ip6opt_ls == 0)
    748 		return (EINVAL);
    749 
    750 	if (opt_storage != NULL)
    751 		opt_storage[IPOPT_OLEN] = 0;
    752 
    753 	if ((tsl = crgetlabel(credp)) == NULL)
    754 		return (0);
    755 
    756 	/* Always pass multicast */
    757 	if (IN6_IS_ADDR_MULTICAST(dst))
    758 		return (0);
    759 
    760 	if ((dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE)) == NULL) {
    761 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v6,
    762 		    char *, "destination ip6(1) not in database with creds(2)",
    763 		    in6_addr_t *, dst, cred_t *, credp);
    764 		return (EINVAL);
    765 	}
    766 
    767 	zoneid = crgetzoneid(credp);
    768 
    769 	/*
    770 	 * For exclusive stacks we set the zoneid to zero
    771 	 * to operate as if in the global zone for IRE and conn_t comparisons.
    772 	 */
    773 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
    774 		ip_zoneid = GLOBAL_ZONEID;
    775 	else
    776 		ip_zoneid = zoneid;
    777 
    778 	/*
    779 	 * Fill in a V6 label.  If a new format is added here, make certain
    780 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
    781 	 * as TSOL_MAX_IPV6_OPTION.
    782 	 */
    783 	compute_label = B_FALSE;
    784 	switch (dst_rhtp->tpc_tp.host_type) {
    785 	case UNLABELED:
    786 		/*
    787 		 * Only add a label if the unlabeled destination is
    788 		 * not local or loopback address, that it is
    789 		 * not on the same subnet, and that the next-hop
    790 		 * gateway is labeled.
    791 		 */
    792 		sire = NULL;
    793 		ire = ire_cache_lookup_v6(dst, ip_zoneid, tsl, ipst);
    794 
    795 		if (ire != NULL && (ire->ire_type & (IRE_LOCAL |
    796 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
    797 			IRE_REFRELE(ire);
    798 			TPC_RELE(dst_rhtp);
    799 			return (0);
    800 		} else if (ire == NULL) {
    801 			ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL,
    802 			    &sire, ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
    803 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
    804 		}
    805 
    806 		/* no route to destination */
    807 		if (ire == NULL) {
    808 			DTRACE_PROBE4(
    809 			    tx__tnopt__log__info__labeling__routedst__v6,
    810 			    char *, "No route to unlabeled dest ip6(1)/tpc(2) "
    811 			    "with creds(3).", in6_addr_t *, dst, tsol_tpc_t *,
    812 			    dst_rhtp, cred_t *, credp);
    813 			TPC_RELE(dst_rhtp);
    814 			return (EINVAL);
    815 		}
    816 
    817 		/*
    818 		 * Prefix IRE from f-table lookup means that the destination
    819 		 * is not directly connected; check the next-hop attributes.
    820 		 */
    821 		if (sire != NULL) {
    822 			ASSERT(ire != NULL);
    823 			IRE_REFRELE(ire);
    824 			ire = sire;
    825 		}
    826 
    827 		attrp = ire->ire_gw_secattr;
    828 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
    829 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
    830 			compute_label = B_TRUE;
    831 
    832 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
    833 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
    834 		    &tsl->tsl_label) && (!isexempt ||
    835 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
    836 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
    837 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
    838 			    char *, "unlabeled dest ip6(1)/tpc(2) "
    839 			    "non-matching creds(3)", in6_addr_t *, dst,
    840 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
    841 			IRE_REFRELE(ire);
    842 			TPC_RELE(dst_rhtp);
    843 			return (EACCES);
    844 		}
    845 
    846 		IRE_REFRELE(ire);
    847 		break;
    848 
    849 	case SUN_CIPSO:
    850 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
    851 		    (!_blinrange(&tsl->tsl_label,
    852 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
    853 		    !blinlset(&tsl->tsl_label,
    854 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
    855 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
    856 			    char *,
    857 			    "labeled dest ip6(1)/tpc(2) non-matching creds(3).",
    858 			    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp,
    859 			    cred_t *, credp);
    860 			TPC_RELE(dst_rhtp);
    861 			return (EACCES);
    862 		}
    863 		compute_label = B_TRUE;
    864 		break;
    865 
    866 	default:
    867 		TPC_RELE(dst_rhtp);
    868 		return (EACCES);
    869 	}
    870 
    871 	if (!compute_label) {
    872 		TPC_RELE(dst_rhtp);
    873 		return (0);
    874 	}
    875 
    876 	/* compute the CIPSO option */
    877 	if (opt_storage != NULL)
    878 		opt_storage += 8;
    879 	if (dst_rhtp->tpc_tp.host_type != UNLABELED) {
    880 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
    881 		    tsl->tsl_doi);
    882 	} else {
    883 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
    884 		    opt_storage, tsl->tsl_doi);
    885 	}
    886 	TPC_RELE(dst_rhtp);
    887 
    888 	if (sec_opt_len == 0) {
    889 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v6,
    890 		    char *,
    891 		    "options lack length for dest ip6(1)/tpc(2) with creds(3).",
    892 		    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
    893 		return (EINVAL);
    894 	}
    895 
    896 	if (opt_storage == NULL)
    897 		return (0);
    898 
    899 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
    900 		opt_storage[sec_opt_len] = IPOPT_EOL;
    901 
    902 	/*
    903 	 * Just in case the option length is odd, round it up to the next even
    904 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
    905 	 * some reason.
    906 	 *
    907 	 * Length in the overall option header (IP6OPT_LS) does not include the
    908 	 * option header itself, but the length in the suboption does include
    909 	 * the suboption header.  Thus, when there's just one suboption, the
    910 	 * length in the option header is the suboption length plus 4 (for the
    911 	 * DOI value).
    912 	 */
    913 	opt_storage[-2] = IP6LS_TT_V4;
    914 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
    915 	opt_storage[-8] = ip6opt_ls;
    916 	opt_storage[-7] = opt_storage[-1] + 4;
    917 	doi = htons(IP6LS_DOI_V4);
    918 	bcopy(&doi, opt_storage - 6, 4);
    919 
    920 	return (0);
    921 }
    922 
    923 /*
    924  * Locate the start of the IP6OPT_LS label option and return it.
    925  * Also return the start of the next non-pad option in after_secoptp.
    926  * Usually the label option is the first option at least when packets
    927  * are generated, but for generality we don't assume that on received packets.
    928  */
    929 uchar_t *
    930 tsol_find_secopt_v6(
    931     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
    932     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
    933     uchar_t **after_secoptp,	/* Non-pad option following the label option */
    934     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
    935 {
    936 	uint_t	optlen;
    937 	uint_t	optused;
    938 	const uchar_t *optptr;
    939 	uchar_t	opt_type;
    940 	const uchar_t *secopt = NULL;
    941 
    942 	*hbh_needed = B_FALSE;
    943 	*after_secoptp = NULL;
    944 	optlen = hbhlen - 2;
    945 	optptr = ip6hbh + 2;
    946 	while (optlen != 0) {
    947 		opt_type = *optptr;
    948 		if (opt_type == IP6OPT_PAD1) {
    949 			optptr++;
    950 			optlen--;
    951 			continue;
    952 		}
    953 		if (optlen == 1)
    954 			break;
    955 		optused = 2 + optptr[1];
    956 		if (optused > optlen)
    957 			break;
    958 		/*
    959 		 * if we get here, ip6opt_ls can
    960 		 * not be 0 because it will always
    961 		 * match the IP6OPT_PAD1 above.
    962 		 * Therefore ip6opt_ls == 0 forces
    963 		 * this test to always fail here.
    964 		 */
    965 		if (opt_type == ip6opt_ls)
    966 			secopt = optptr;
    967 		else switch (opt_type) {
    968 		case IP6OPT_PADN:
    969 			break;
    970 		default:
    971 			/*
    972 			 * There is at least 1 option other than
    973 			 * the label option. So the hop-by-hop header is needed
    974 			 */
    975 			*hbh_needed = B_TRUE;
    976 			if (secopt != NULL) {
    977 				*after_secoptp = (uchar_t *)optptr;
    978 				return ((uchar_t *)secopt);
    979 			}
    980 			break;
    981 		}
    982 		optlen -= optused;
    983 		optptr += optused;
    984 	}
    985 	return ((uchar_t *)secopt);
    986 }
    987 
    988 /*
    989  * Remove the label option from the hop-by-hop options header if it exists.
    990  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
    991  * Header and data following the label option that is deleted are copied
    992  * (i.e. slid backward) to the right position, and returns the number
    993  * of bytes removed (as zero or negative number.)
    994  */
    995 int
    996 tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
    997 {
    998 	uchar_t	*ip6hbh;	/* hop-by-hop header */
    999 	uint_t	hbhlen;		/* hop-by-hop extension header length */
   1000 	uchar_t *secopt = NULL;
   1001 	uchar_t *after_secopt;
   1002 	uint_t	pad;
   1003 	uint_t	delta;
   1004 	boolean_t hbh_needed;
   1005 
   1006 	/*
   1007 	 * hop-by-hop extension header must appear first, if it does not
   1008 	 * exist, there is no label option.
   1009 	 */
   1010 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
   1011 		return (0);
   1012 
   1013 	ip6hbh = (uchar_t *)&ip6h[1];
   1014 	hbhlen = (ip6hbh[1] + 1) << 3;
   1015 	/*
   1016 	 * Locate the start of the label option if it exists and the end
   1017 	 * of the label option including pads if any.
   1018 	 */
   1019 	secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
   1020 	    &hbh_needed);
   1021 	if (secopt == NULL)
   1022 		return (0);
   1023 	if (!hbh_needed) {
   1024 		uchar_t	next_hdr;
   1025 		/*
   1026 		 * The label option was the only option in the hop-by-hop
   1027 		 * header. We don't need the hop-by-hop header itself any
   1028 		 * longer.
   1029 		 */
   1030 		next_hdr = ip6hbh[0];
   1031 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
   1032 		    buflen - (IPV6_HDR_LEN + hbhlen));
   1033 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - hbhlen);
   1034 		ip6h->ip6_nxt = next_hdr;
   1035 		return (-hbhlen);
   1036 	}
   1037 
   1038 	if (after_secopt == NULL) {
   1039 		/* There is no option following the label option */
   1040 		after_secopt = ip6hbh + hbhlen;
   1041 	}
   1042 
   1043 	/*
   1044 	 * After deleting the label option, we need to slide the headers
   1045 	 * and data back, while still maintaining the same alignment (module 8)
   1046 	 * for the other options. So we slide the headers and data back only
   1047 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
   1048 	 * with pads.
   1049 	 */
   1050 	delta = after_secopt - secopt;
   1051 	pad = delta % 8;
   1052 	if (pad == 1) {
   1053 		secopt[0] = IP6OPT_PAD1;
   1054 	} else if (pad > 1) {
   1055 		secopt[0] = IP6OPT_PADN;
   1056 		secopt[1] = pad - 2;
   1057 		if (pad > 2)
   1058 			bzero(&secopt[2], pad - 2);
   1059 	}
   1060 	secopt += pad;
   1061 	delta -= pad;
   1062 	ovbcopy(after_secopt, secopt,
   1063 	    (uchar_t *)ip6h + buflen - after_secopt);
   1064 	ip6hbh[1] -= delta/8;
   1065 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - delta);
   1066 
   1067 	return (-delta);
   1068 }
   1069 
   1070 /*
   1071  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
   1072  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
   1073  * option is described in the block comment above tsol_compute_label_v6.
   1074  * This function prepends this hop-by-hop option before any other hop-by-hop
   1075  * options in the hop-by-hop header if one already exists, else a new
   1076  * hop-by-hop header is created and stuffed into the packet following
   1077  * the IPv6 header. 'buflen' is the total length of the packet i.e.
   1078  * b_wptr - b_rptr. The caller ensures that there is enough space for the
   1079  * extra option being added. Header and data following the position where
   1080  * the label option is inserted are copied (i.e. slid forward) to the right
   1081  * position.
   1082  */
   1083 int
   1084 tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
   1085 {
   1086 	/*
   1087 	 * rawlen is the length of the label option in bytes, not including
   1088 	 * any pads, starting from the IP6OPT_LS (option type) byte.
   1089 	 */
   1090 	uint_t	rawlen;
   1091 
   1092 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
   1093 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
   1094 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
   1095 	uint_t	pad_len;
   1096 	uchar_t	*pad_position;
   1097 	int	delta;		/* Actual numbe