Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/stream.h>
     30 #include <sys/strsubr.h>
     31 #include <sys/stropts.h>
     32 #include <sys/sunddi.h>
     33 #include <sys/cred.h>
     34 #include <sys/debug.h>
     35 #include <sys/kmem.h>
     36 #include <sys/errno.h>
     37 #include <sys/disp.h>
     38 #include <netinet/in.h>
     39 #include <netinet/in_systm.h>
     40 #include <netinet/ip.h>
     41 #include <netinet/ip_icmp.h>
     42 #include <netinet/tcp.h>
     43 #include <inet/common.h>
     44 #include <inet/ipclassifier.h>
     45 #include <inet/ip.h>
     46 #include <inet/mib2.h>
     47 #include <inet/nd.h>
     48 #include <inet/tcp.h>
     49 #include <inet/ip_rts.h>
     50 #include <inet/ip_ire.h>
     51 #include <inet/ip_if.h>
     52 #include <sys/modhash.h>
     53 
     54 #include <sys/tsol/label.h>
     55 #include <sys/tsol/label_macro.h>
     56 #include <sys/tsol/tnet.h>
     57 #include <sys/tsol/tndb.h>
     58 #include <sys/strsun.h>
     59 
     60 /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */
     61 int tsol_strict_error;
     62 
     63 /*
     64  * Some notes on the Trusted Solaris IRE gateway security attributes:
     65  *
     66  * When running in Trusted mode, the routing subsystem determines whether or
     67  * not a packet can be delivered to an off-link host (not directly reachable
     68  * through an interface) based on the accreditation checks of the packet's
     69  * security attributes against those associated with the next-hop gateway.
     70  *
     71  * The next-hop gateway's security attributes can be derived from two sources
     72  * (in order of preference): route-related and the host database.  A Trusted
     73  * system must be configured with at least the host database containing an
     74  * entry for the next-hop gateway, or otherwise no accreditation checks can
     75  * be performed, which may result in the inability to send packets to any
     76  * off-link destination host.
     77  *
     78  * The major differences between the two sources are the number and type of
     79  * security attributes used for accreditation checks.  A host database entry
     80  * can contain at most one set of security attributes, specific only to the
     81  * next-hop gateway.  On contrast, route-related security attributes are made
     82  * up of a collection of security attributes for the distant networks, and
     83  * are grouped together per next-hop gateway used to reach those networks.
     84  * This is the preferred method, and the routing subsystem will fallback to
     85  * the host database entry only if there are no route-related attributes
     86  * associated with the next-hop gateway.
     87  *
     88  * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/
     89  * INTERFACE type) are initialized to contain a placeholder to store this
     90  * information.  The ire_gw_secattr structure gets allocated, initialized
     91  * and associated with the IRE during the time of the IRE creation.  The
     92  * initialization process also includes resolving the host database entry
     93  * of the next-hop gateway for fallback purposes.  It does not include any
     94  * route-related attribute setup, as that process comes separately as part
     95  * of the route requests (add/change) made to the routing subsystem.
     96  *
     97  * The underlying logic which involves associating IREs with the gateway
     98  * security attributes are represented by the following data structures:
     99  *
    100  * tsol_gcdb_t, or "gcdb"
    101  *
    102  *	- This is a system-wide collection of records containing the
    103  *	  currently used route-related security attributes, which are fed
    104  *	  through the routing socket interface, e.g. "route add/change".
    105  *
    106  * tsol_gc_t, or "gc"
    107  *
    108  *	- This is the gateway credential structure, and it provides for the
    109  *	  only mechanism to access the contents of gcdb.  More than one gc
    110  *	  entries may refer to the same gcdb record.  gc's in the system are
    111  *	  grouped according to the next-hop gateway address.
    112  *
    113  * tsol_gcgrp_t, or "gcgrp"
    114  *
    115  *	- Group of gateway credentials, and is unique per next-hop gateway
    116  *	  address.  When the group is not empty, i.e. when gcgrp_count is
    117  *	  greater than zero, it contains one or more gc's, each pointing to
    118  *	  a gcdb record which indicates the gateway security attributes
    119  *	  associated with the next-hop gateway.
    120  *
    121  * The fields of the tsol_ire_gw_secattr_t used from within the IRE are:
    122  *
    123  * igsa_lock
    124  *
    125  *	- Lock that protects all fields within tsol_ire_gw_secattr_t.
    126  *
    127  * igsa_rhc
    128  *
    129  *	- Remote host cache database entry of next-hop gateway.  This is
    130  *	  used in the case when there are no route-related attributes
    131  *	  configured for the IRE.
    132  *
    133  * igsa_gc
    134  *
    135  *	- A set of route-related attributes that only get set for prefix
    136  *	  IREs.  If this is non-NULL, the prefix IRE has been associated
    137  *	  with a set of gateway security attributes by way of route add/
    138  *	  change functionality.  This field stays NULL for IRE_CACHEs.
    139  *
    140  * igsa_gcgrp
    141  *
    142  *	- Group of gc's which only gets set for IRE_CACHEs.  Each of the gc
    143  *	  points to a gcdb record that contains the security attributes
    144  *	  used to perform the credential checks of the packet which uses
    145  *	  the IRE.  If the group is not empty, the list of gc's can be
    146  *	  traversed starting at gcgrp_head.  This field stays NULL for
    147  *	  prefix IREs.
    148  */
    149 
    150 static kmem_cache_t *ire_gw_secattr_cache;
    151 
    152 #define	GCDB_HASH_SIZE	101
    153 #define	GCGRP_HASH_SIZE	101
    154 
    155 #define	GCDB_REFRELE(p) {		\
    156 	mutex_enter(&gcdb_lock);	\
    157 	ASSERT((p)->gcdb_refcnt > 0);	\
    158 	if (--((p)->gcdb_refcnt) == 0)	\
    159 		gcdb_inactive(p);	\
    160 	ASSERT(MUTEX_HELD(&gcdb_lock));	\
    161 	mutex_exit(&gcdb_lock);		\
    162 }
    163 
    164 static int gcdb_hash_size = GCDB_HASH_SIZE;
    165 static int gcgrp_hash_size = GCGRP_HASH_SIZE;
    166 static mod_hash_t *gcdb_hash;
    167 static mod_hash_t *gcgrp4_hash;
    168 static mod_hash_t *gcgrp6_hash;
    169 
    170 static kmutex_t gcdb_lock;
    171 kmutex_t gcgrp_lock;
    172 
    173 static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t);
    174 static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t);
    175 static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t);
    176 static void gcdb_inactive(tsol_gcdb_t *);
    177 
    178 static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t);
    179 static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t);
    180 
    181 static int ire_gw_secattr_constructor(void *, void *, int);
    182 static void ire_gw_secattr_destructor(void *, void *);
    183 
    184 void
    185 tnet_init(void)
    186 {
    187 	ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache",
    188 	    sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor,
    189 	    ire_gw_secattr_destructor, NULL, NULL, NULL, 0);
    190 
    191 	gcdb_hash = mod_hash_create_extended("gcdb_hash",
    192 	    gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
    193 	    gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP);
    194 
    195 	gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash",
    196 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
    197 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
    198 
    199 	gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash",
    200 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
    201 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
    202 
    203 	mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL);
    204 	mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL);
    205 }
    206 
    207 void
    208 tnet_fini(void)
    209 {
    210 	kmem_cache_destroy(ire_gw_secattr_cache);
    211 	mod_hash_destroy_hash(gcdb_hash);
    212 	mod_hash_destroy_hash(gcgrp4_hash);
    213 	mod_hash_destroy_hash(gcgrp6_hash);
    214 	mutex_destroy(&gcdb_lock);
    215 	mutex_destroy(&gcgrp_lock);
    216 }
    217 
    218 /* ARGSUSED */
    219 static int
    220 ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
    221 {
    222 	tsol_ire_gw_secattr_t *attrp = buf;
    223 
    224 	mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL);
    225 
    226 	attrp->igsa_rhc = NULL;
    227 	attrp->igsa_gc = NULL;
    228 	attrp->igsa_gcgrp = NULL;
    229 
    230 	return (0);
    231 }
    232 
    233 /* ARGSUSED */
    234 static void
    235 ire_gw_secattr_destructor(void *buf, void *cdrarg)
    236 {
    237 	tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf;
    238 
    239 	mutex_destroy(&attrp->igsa_lock);
    240 }
    241 
    242 tsol_ire_gw_secattr_t *
    243 ire_gw_secattr_alloc(int kmflags)
    244 {
    245 	return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags));
    246 }
    247 
    248 void
    249 ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
    250 {
    251 	ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock));
    252 
    253 	if (attrp->igsa_rhc != NULL) {
    254 		TNRHC_RELE(attrp->igsa_rhc);
    255 		attrp->igsa_rhc = NULL;
    256 	}
    257 
    258 	if (attrp->igsa_gc != NULL) {
    259 		GC_REFRELE(attrp->igsa_gc);
    260 		attrp->igsa_gc = NULL;
    261 	}
    262 	if (attrp->igsa_gcgrp != NULL) {
    263 		GCGRP_REFRELE(attrp->igsa_gcgrp);
    264 		attrp->igsa_gcgrp = NULL;
    265 	}
    266 
    267 	ASSERT(attrp->igsa_rhc == NULL);
    268 	ASSERT(attrp->igsa_gc == NULL);
    269 	ASSERT(attrp->igsa_gcgrp == NULL);
    270 
    271 	kmem_cache_free(ire_gw_secattr_cache, attrp);
    272 }
    273 
    274 /* ARGSUSED */
    275 static uint_t
    276 gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key)
    277 {
    278 	const struct rtsa_s *rp = (struct rtsa_s *)key;
    279 	const uint32_t *up, *ue;
    280 	uint_t hash;
    281 	int i;
    282 
    283 	ASSERT(rp != NULL);
    284 
    285 	/* See comments in hash_bylabel in zone.c for details */
    286 	hash = rp->rtsa_doi + (rp->rtsa_doi << 1);
    287 	up = (const uint32_t *)&rp->rtsa_slrange;
    288 	ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up);
    289 	i = 1;
    290 	while (up < ue) {
    291 		/* using 2^n + 1, 1 <= n <= 16 as source of many primes */
    292 		hash += *up + (*up << ((i % 16) + 1));
    293 		up++;
    294 		i++;
    295 	}
    296 	return (hash);
    297 }
    298 
    299 static int
    300 gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
    301 {
    302 	struct rtsa_s *rp1 = (struct rtsa_s *)key1;
    303 	struct rtsa_s *rp2 = (struct rtsa_s *)key2;
    304 
    305 	ASSERT(rp1 != NULL && rp2 != NULL);
    306 
    307 	if (blequal(&rp1->rtsa_slrange.lower_bound,
    308 	    &rp2->rtsa_slrange.lower_bound) &&
    309 	    blequal(&rp1->rtsa_slrange.upper_bound,
    310 	    &rp2->rtsa_slrange.upper_bound) &&
    311 	    rp1->rtsa_doi == rp2->rtsa_doi)
    312 		return (0);
    313 
    314 	/* No match; not found */
    315 	return (-1);
    316 }
    317 
    318 /* ARGSUSED */
    319 static uint_t
    320 gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key)
    321 {
    322 	tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key;
    323 	uint_t		idx = 0;
    324 	uint32_t	*ap;
    325 
    326 	ASSERT(ga != NULL);
    327 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
    328 
    329 	ap = (uint32_t *)&ga->ga_addr.s6_addr32[0];
    330 	idx ^= *ap++;
    331 	idx ^= *ap++;
    332 	idx ^= *ap++;
    333 	idx ^= *ap;
    334 
    335 	return (idx);
    336 }
    337 
    338 static int
    339 gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
    340 {
    341 	tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1;
    342 	tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2;
    343 
    344 	ASSERT(ga1 != NULL && ga2 != NULL);
    345 
    346 	/* Address family must match */
    347 	if (ga1->ga_af != ga2->ga_af)
    348 		return (-1);
    349 
    350 	if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] &&
    351 	    ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] &&
    352 	    ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] &&
    353 	    ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3])
    354 		return (0);
    355 
    356 	/* No match; not found */
    357 	return (-1);
    358 }
    359 
    360 #define	RTSAFLAGS	"\20\11cipso\3doi\2max_sl\1min_sl"
    361 
    362 int
    363 rtsa_validate(const struct rtsa_s *rp)
    364 {
    365 	uint32_t mask = rp->rtsa_mask;
    366 
    367 	/* RTSA_CIPSO must be set, and DOI must not be zero */
    368 	if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) {
    369 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
    370 		    "rtsa(1) lacks flag or has 0 doi.",
    371 		    rtsa_s *, rp);
    372 		return (EINVAL);
    373 	}
    374 	/*
    375 	 * SL range must be specified, and it must have its
    376 	 * upper bound dominating its lower bound.
    377 	 */
    378 	if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE ||
    379 	    !bldominates(&rp->rtsa_slrange.upper_bound,
    380 	    &rp->rtsa_slrange.lower_bound)) {
    381 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
    382 		    "rtsa(1) min_sl and max_sl not set or max_sl is "
    383 		    "not dominating.", rtsa_s *, rp);
    384 		return (EINVAL);
    385 	}
    386 	return (0);
    387 }
    388 
    389 /*
    390  * A brief explanation of the reference counting scheme:
    391  *
    392  * Prefix IREs have a non-NULL igsa_gc and a NULL igsa_gcgrp;
    393  * IRE_CACHEs have it vice-versa.
    394  *
    395  * Apart from dynamic references due to to reference holds done
    396  * actively by threads, we have the following references:
    397  *
    398  * gcdb_refcnt:
    399  *	- Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference
    400  *	  to the gcdb_refcnt.
    401  *
    402  * gc_refcnt:
    403  *	- A prefix IRE that points to an igsa_gc contributes a reference
    404  *	  to the gc_refcnt.
    405  *
    406  * gcgrp_refcnt:
    407  *	- An IRE_CACHE that points to an igsa_gcgrp contributes a reference
    408  *	  to the gcgrp_refcnt of the associated tsol_gcgrp_t.
    409  *	- Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
    410  *	  a reference to the gcgrp_refcnt.
    411  */
    412 static tsol_gcdb_t *
    413 gcdb_lookup(struct rtsa_s *rp, boolean_t alloc)
    414 {
    415 	tsol_gcdb_t *gcdb = NULL;
    416 
    417 	if (rtsa_validate(rp) != 0)
    418 		return (NULL);
    419 
    420 	mutex_enter(&gcdb_lock);
    421 	/* Find a copy in the cache; otherwise, create one and cache it */
    422 	if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp,
    423 	    (mod_hash_val_t *)&gcdb) == 0) {
    424 		gcdb->gcdb_refcnt++;
    425 		ASSERT(gcdb->gcdb_refcnt != 0);
    426 
    427 		DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *,
    428 		    "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb);
    429 	} else if (alloc) {
    430 		gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP);
    431 		if (gcdb != NULL) {
    432 			gcdb->gcdb_refcnt = 1;
    433 			gcdb->gcdb_mask = rp->rtsa_mask;
    434 			gcdb->gcdb_doi = rp->rtsa_doi;
    435 			gcdb->gcdb_slrange = rp->rtsa_slrange;
    436 
    437 			if (mod_hash_insert(gcdb_hash,
    438 			    (mod_hash_key_t)&gcdb->gcdb_attr,
    439 			    (mod_hash_val_t)gcdb) != 0) {
    440 				mutex_exit(&gcdb_lock);
    441 				kmem_free(gcdb, sizeof (*gcdb));
    442 				return (NULL);
    443 			}
    444 
    445 			DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *,
    446 			    "gcdb(1) inserted in gcdb_hash(global)",
    447 			    tsol_gcdb_t *, gcdb);
    448 		}
    449 	}
    450 	mutex_exit(&gcdb_lock);
    451 	return (gcdb);
    452 }
    453 
    454 static void
    455 gcdb_inactive(tsol_gcdb_t *gcdb)
    456 {
    457 	ASSERT(MUTEX_HELD(&gcdb_lock));
    458 	ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0);
    459 
    460 	(void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr,
    461 	    (mod_hash_val_t *)&gcdb);
    462 
    463 	DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *,
    464 	    "gcdb(1) removed from gcdb_hash(global)",
    465 	    tsol_gcdb_t *, gcdb);
    466 	kmem_free(gcdb, sizeof (*gcdb));
    467 }
    468 
    469 tsol_gc_t *
    470 gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp)
    471 {
    472 	tsol_gc_t *gc;
    473 	tsol_gcdb_t *gcdb;
    474 
    475 	*gcgrp_xtrarefp = B_TRUE;
    476 
    477 	rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER);
    478 	if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) {
    479 		rw_exit(&gcgrp->gcgrp_rwlock);
    480 		return (NULL);
    481 	}
    482 
    483 	for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) {
    484 		if (gc->gc_db == gcdb) {
    485 			ASSERT(gc->gc_grp == gcgrp);
    486 
    487 			gc->gc_refcnt++;
    488 			ASSERT(gc->gc_refcnt != 0);
    489 
    490 			GCDB_REFRELE(gcdb);
    491 
    492 			DTRACE_PROBE3(tx__gcdb__log__info__gc__create,
    493 			    char *, "found gc(1) in gcgrp(2)",
    494 			    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
    495 			rw_exit(&gcgrp->gcgrp_rwlock);
    496 			return (gc);
    497 		}
    498 	}
    499 
    500 	gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP);
    501 	if (gc != NULL) {
    502 		if (gcgrp->gcgrp_head == NULL) {
    503 			gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc;
    504 		} else {
    505 			gcgrp->gcgrp_tail->gc_next = gc;
    506 			gc->gc_prev = gcgrp->gcgrp_tail;
    507 			gcgrp->gcgrp_tail = gc;
    508 		}
    509 		gcgrp->gcgrp_count++;
    510 		ASSERT(gcgrp->gcgrp_count != 0);
    511 
    512 		/* caller has incremented gcgrp reference for us */
    513 		gc->gc_grp = gcgrp;
    514 
    515 		gc->gc_db = gcdb;
    516 		gc->gc_refcnt = 1;
    517 
    518 		DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *,
    519 		    "added gc(1) to gcgrp(2)", tsol_gc_t *, gc,
    520 		    tsol_gcgrp_t *, gcgrp);
    521 
    522 		*gcgrp_xtrarefp = B_FALSE;
    523 	}
    524 	rw_exit(&gcgrp->gcgrp_rwlock);
    525 
    526 	return (gc);
    527 }
    528 
    529 void
    530 gc_inactive(tsol_gc_t *gc)
    531 {
    532 	tsol_gcgrp_t *gcgrp = gc->gc_grp;
    533 
    534 	ASSERT(gcgrp != NULL);
    535 	ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock));
    536 	ASSERT(gc->gc_refcnt == 0);
    537 
    538 	if (gc->gc_prev != NULL)
    539 		gc->gc_prev->gc_next = gc->gc_next;
    540 	else
    541 		gcgrp->gcgrp_head = gc->gc_next;
    542 	if (gc->gc_next != NULL)
    543 		gc->gc_next->gc_prev = gc->gc_prev;
    544 	else
    545 		gcgrp->gcgrp_tail = gc->gc_prev;
    546 	ASSERT(gcgrp->gcgrp_count > 0);
    547 	gcgrp->gcgrp_count--;
    548 
    549 	/* drop lock before it's destroyed */
    550 	rw_exit(&gcgrp->gcgrp_rwlock);
    551 
    552 	DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *,
    553 	    "removed inactive gc(1) from gcgrp(2)",
    554 	    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
    555 
    556 	GCGRP_REFRELE(gcgrp);
    557 
    558 	gc->gc_grp = NULL;
    559 	gc->gc_prev = gc->gc_next = NULL;
    560 
    561 	if (gc->gc_db != NULL)
    562 		GCDB_REFRELE(gc->gc_db);
    563 
    564 	kmem_free(gc, sizeof (*gc));
    565 }
    566 
    567 tsol_gcgrp_t *
    568 gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc)
    569 {
    570 	tsol_gcgrp_t *gcgrp = NULL;
    571 	mod_hash_t *hashp;
    572 
    573 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
    574 
    575 	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
    576 
    577 	mutex_enter(&gcgrp_lock);
    578 	if (mod_hash_find(hashp, (mod_hash_key_t)ga,
    579 	    (mod_hash_val_t *)&gcgrp) == 0) {
    580 		gcgrp->gcgrp_refcnt++;
    581 		ASSERT(gcgrp->gcgrp_refcnt != 0);
    582 
    583 		DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *,
    584 		    "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp,
    585 		    mod_hash_t *, hashp);
    586 
    587 	} else if (alloc) {
    588 		gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP);
    589 		if (gcgrp != NULL) {
    590 			gcgrp->gcgrp_refcnt = 1;
    591 			rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL);
    592 			bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga));
    593 
    594 			if (mod_hash_insert(hashp,
    595 			    (mod_hash_key_t)&gcgrp->gcgrp_addr,
    596 			    (mod_hash_val_t)gcgrp) != 0) {
    597 				mutex_exit(&gcgrp_lock);
    598 				kmem_free(gcgrp, sizeof (*gcgrp));
    599 				return (NULL);
    600 			}
    601 
    602 			DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert,
    603 			    char *, "inserted gcgrp(1) in hash(2)",
    604 			    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
    605 		}
    606 	}
    607 	mutex_exit(&gcgrp_lock);
    608 	return (gcgrp);
    609 }
    610 
    611 void
    612 gcgrp_inactive(tsol_gcgrp_t *gcgrp)
    613 {
    614 	tsol_gcgrp_addr_t *ga;
    615 	mod_hash_t *hashp;
    616 
    617 	ASSERT(MUTEX_HELD(&gcgrp_lock));
    618 	ASSERT(!RW_LOCK_HELD(&gcgrp->gcgrp_rwlock));
    619 	ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0);
    620 	ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0);
    621 
    622 	ga = &gcgrp->gcgrp_addr;
    623 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
    624 
    625 	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
    626 	(void) mod_hash_remove(hashp, (mod_hash_key_t)ga,
    627 	    (mod_hash_val_t *)&gcgrp);
    628 	rw_destroy(&gcgrp->gcgrp_rwlock);
    629 
    630 	DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *,
    631 	    "removed inactive gcgrp(1) from hash(2)",
    632 	    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
    633 
    634 	kmem_free(gcgrp, sizeof (*gcgrp));
    635 }
    636 
    637 /*
    638  * Converts CIPSO option to sensitivity label.
    639  * Validity checks based on restrictions defined in
    640  * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity)
    641  */
    642 static boolean_t
    643 cipso_to_sl(const uchar_t *option, bslabel_t *sl)
    644 {
    645 	const struct cipso_option *co = (const struct cipso_option *)option;
    646 	const struct cipso_tag_type_1 *tt1;
    647 
    648 	tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0];
    649 	if (tt1->tag_type != 1 ||
    650 	    tt1->tag_length < TSOL_TT1_MIN_LENGTH ||
    651 	    tt1->tag_length > TSOL_TT1_MAX_LENGTH ||
    652 	    tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length)
    653 		return (B_FALSE);
    654 
    655 	bsllow(sl);	/* assumed: sets compartments to all zeroes */
    656 	LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl);
    657 	bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments,
    658 	    tt1->tag_length - TSOL_TT1_MIN_LENGTH);
    659 	return (B_TRUE);
    660 }
    661 
    662 /*
    663  * Parse the CIPSO label in the incoming packet and construct a ts_label_t
    664  * that reflects the CIPSO label and attach it to the dblk cred. Later as
    665  * the mblk flows up through the stack any code that needs to examine the
    666  * packet label can inspect the label from the dblk cred. This function is
    667  * called right in ip_rput for all packets, i.e. locally destined and
    668  * to be forwarded packets. The forwarding path needs to examine the label
    669  * to determine how to forward the packet.
    670  *
    671  * For IPv4, IP header options have been pulled up, but other headers might not
    672  * have been.  For IPv6, any hop-by-hop options have been pulled up, but any
    673  * other headers might not be present.
    674  */
    675 boolean_t
    676 tsol_get_pkt_label(mblk_t *mp, int version)
    677 {
    678 	tsol_tpc_t	*src_rhtp;
    679 	uchar_t		*opt_ptr = NULL;
    680 	const ipha_t	*ipha;
    681 	bslabel_t	sl;
    682 	uint32_t	doi;
    683 	tsol_ip_label_t	label_type;
    684 	const cipso_option_t *co;
    685 	const void	*src;
    686 	const ip6_t	*ip6h;
    687 
    688 	ASSERT(DB_TYPE(mp) == M_DATA);
    689 
    690 	if (version == IPV4_VERSION) {
    691 		ipha = (const ipha_t *)mp->b_rptr;
    692 		src = &ipha->ipha_src;
    693 		label_type = tsol_get_option(mp, &opt_ptr);
    694 	} else {
    695 		uchar_t		*after_secopt;
    696 		boolean_t	hbh_needed;
    697 		const uchar_t	*ip6hbh;
    698 		size_t		optlen;
    699 
    700 		label_type = OPT_NONE;
    701 		ip6h = (const ip6_t *)mp->b_rptr;
    702 		src = &ip6h->ip6_src;
    703 		if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
    704 			ip6hbh = (const uchar_t *)&ip6h[1];
    705 			optlen = (ip6hbh[1] + 1) << 3;
    706 			ASSERT(ip6hbh + optlen <= mp->b_wptr);
    707 			opt_ptr = tsol_find_secopt_v6(ip6hbh, optlen,
    708 			    &after_secopt, &hbh_needed);
    709 			/* tsol_find_secopt_v6 guarantees some sanity */
    710 			if (opt_ptr != NULL &&
    711 			    (optlen = opt_ptr[1]) >= 8) {
    712 				opt_ptr += 2;
    713 				bcopy(opt_ptr, &doi, sizeof (doi));
    714 				doi = ntohl(doi);
    715 				if (doi == IP6LS_DOI_V4 &&
    716 				    opt_ptr[4] == IP6LS_TT_V4 &&
    717 				    opt_ptr[5] <= optlen - 4 &&
    718 				    opt_ptr[7] <= optlen - 6) {
    719 					opt_ptr += sizeof (doi) + 2;
    720 					label_type = OPT_CIPSO;
    721 				}
    722 			}
    723 		}
    724 	}
    725 
    726 	switch (label_type) {
    727 	case OPT_CIPSO:
    728 		/*
    729 		 * Convert the CIPSO label to the internal format
    730 		 * and attach it to the dblk cred.
    731 		 * Validity checks based on restrictions defined in
    732 		 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
    733 		 * (draft-ietf-cipso-ipsecurity)
    734 		 */
    735 		if (version == IPV6_VERSION && ip6opt_ls == 0)
    736 			return (B_FALSE);
    737 		co = (const struct cipso_option *)opt_ptr;
    738 		if ((co->cipso_length <
    739 		    TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) ||
    740 		    (co->cipso_length > IP_MAX_OPT_LENGTH))
    741 			return (B_FALSE);
    742 		bcopy(co->cipso_doi, &doi, sizeof (doi));
    743 		doi = ntohl(doi);
    744 		if (!cipso_to_sl(opt_ptr, &sl))
    745 			return (B_FALSE);
    746 		setbltype(&sl, SUN_SL_ID);
    747 		break;
    748 
    749 	case OPT_NONE:
    750 		/*
    751 		 * Handle special cases that are not currently labeled, even
    752 		 * though the sending system may otherwise be configured as
    753 		 * labeled.
    754 		 *	- IGMP
    755 		 *	- IPv4 ICMP Router Discovery
    756 		 *	- IPv6 Neighbor Discovery
    757 		 */
    758 		if (version == IPV4_VERSION) {
    759 			if (ipha->ipha_protocol == IPPROTO_IGMP)
    760 				return (B_TRUE);
    761 			if (ipha->ipha_protocol == IPPROTO_ICMP) {
    762 				const struct icmp *icmp = (const struct icmp *)
    763 				    (mp->b_rptr + IPH_HDR_LENGTH(ipha));
    764 
    765 				if ((uchar_t *)icmp > mp->b_wptr) {
    766 					if (!pullupmsg(mp,
    767 					    (uchar_t *)icmp - mp->b_rptr + 1))
    768 						return (B_FALSE);
    769 					icmp = (const struct icmp *)
    770 					    (mp->b_rptr +
    771 					    IPH_HDR_LENGTH(ipha));
    772 				}
    773 				if (icmp->icmp_type == ICMP_ROUTERADVERT ||
    774 				    icmp->icmp_type == ICMP_ROUTERSOLICIT)
    775 					return (B_TRUE);
    776 			}
    777 			src = &ipha->ipha_src;
    778 		} else {
    779 			if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
    780 				const icmp6_t *icmp6 = (const icmp6_t *)
    781 				    (mp->b_rptr + IPV6_HDR_LEN);
    782 
    783 				if ((uchar_t *)icmp6 + ICMP6_MINLEN >
    784 				    mp->b_wptr) {
    785 					if (!pullupmsg(mp,
    786 					    (uchar_t *)icmp6 - mp->b_rptr +
    787 					    ICMP6_MINLEN))
    788 						return (B_FALSE);
    789 					icmp6 = (const icmp6_t *)
    790 					    (mp->b_rptr + IPV6_HDR_LEN);
    791 				}
    792 				if (icmp6->icmp6_type >= MLD_LISTENER_QUERY &&
    793 				    icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE)
    794 					return (B_TRUE);
    795 			}
    796 			src = &ip6h->ip6_src;
    797 		}
    798 
    799 		/*
    800 		 * Look up the tnrhtp database and get the implicit label
    801 		 * that is associated with this unlabeled host and attach
    802 		 * it to the packet.
    803 		 */
    804 		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
    805 			return (B_FALSE);
    806 
    807 		/* If the sender is labeled, drop the unlabeled packet. */
    808 		if (src_rhtp->tpc_tp.host_type != UNLABELED) {
    809 			TPC_RELE(src_rhtp);
    810 			pr_addr_dbg("unlabeled packet forged from %s\n",
    811 			    version == IPV4_VERSION ? AF_INET : AF_INET6, src);
    812 			return (B_FALSE);
    813 		}
    814 
    815 		sl = src_rhtp->tpc_tp.tp_def_label;
    816 		setbltype(&sl, SUN_SL_ID);
    817 		doi = src_rhtp->tpc_tp.tp_doi;
    818 		TPC_RELE(src_rhtp);
    819 		break;
    820 
    821 	default:
    822 		return (B_FALSE);
    823 	}
    824 
    825 	/* Make sure no other thread is messing with this mblk */
    826 	ASSERT(DB_REF(mp) == 1);
    827 	if (DB_CRED(mp) == NULL) {
    828 		DB_CRED(mp) = newcred_from_bslabel(&sl, doi, KM_NOSLEEP);
    829 		if (DB_CRED(mp) == NULL)
    830 			return (B_FALSE);
    831 	} else {
    832 		cred_t	*newcr;
    833 
    834 		newcr = copycred_from_bslabel(DB_CRED(mp), &sl, doi,
    835 		    KM_NOSLEEP);
    836 		if (newcr == NULL)
    837 			return (B_FALSE);
    838 		crfree(DB_CRED(mp));
    839 		DB_CRED(mp) = newcr;
    840 	}
    841 
    842 	/*
    843 	 * If the source was unlabeled, then flag as such,
    844 	 * while remembering that CIPSO routers add headers.
    845 	 */
    846 	if (label_type == OPT_NONE)
    847 		crgetlabel(DB_CRED(mp))->tsl_flags |= TSLF_UNLABELED;
    848 	else if (label_type == OPT_CIPSO) {
    849 		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
    850 			return (B_FALSE);
    851 		if (src_rhtp->tpc_tp.host_type == UNLABELED)
    852 			crgetlabel(DB_CRED(mp))->tsl_flags |=
    853 			    TSLF_UNLABELED;
    854 		TPC_RELE(src_rhtp);
    855 	}
    856 
    857 	return (B_TRUE);
    858 }
    859 
    860 /*
    861  * This routine determines whether the given packet should be accepted locally.
    862  * It does a range/set check on the packet's label by looking up the given
    863  * address in the remote host database.
    864  */
    865 boolean_t
    866 tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
    867     boolean_t shared_addr, const conn_t *connp)
    868 {
    869 	const cred_t *credp;
    870 	ts_label_t *plabel, *conn_plabel;
    871 	tsol_tpc_t *tp;
    872 	boolean_t retv;
    873 	const bslabel_t *label, *conn_label;
    874 
    875 	/*
    876 	 * The cases in which this can happen are:
    877 	 *	- IPv6 Router Alert, where ip_rput_data_v6 deliberately skips
    878 	 *	  over the label attachment process.
    879 	 *	- MLD output looped-back to ourselves.
    880 	 *	- IPv4 Router Discovery, where tsol_get_pkt_label intentionally
    881 	 *	  avoids the labeling process.
    882 	 * We trust that all valid paths in the code set the cred pointer when
    883 	 * needed.
    884 	 */
    885 	if ((credp = DB_CRED(mp)) == NULL)
    886 		return (B_TRUE);
    887 
    888 	/*
    889 	 * If this packet is from the inside (not a remote host) and has the
    890 	 * same zoneid as the selected destination, then no checks are
    891 	 * necessary.  Membership in the zone is enough proof.  This is
    892 	 * intended to be a hot path through this function.
    893 	 */
    894 	if (!crisremote(credp) &&
    895 	    crgetzone(credp) == crgetzone(connp->conn_cred))
    896 		return (B_TRUE);
    897 
    898 	plabel = crgetlabel(credp);
    899 	conn_plabel = crgetlabel(connp->conn_cred);
    900 	ASSERT(plabel != NULL && conn_plabel != NULL);
    901 
    902 	label = label2bslabel(plabel);
    903 	conn_label = label2bslabel(crgetlabel(connp->conn_cred));
    904 
    905 	/*
    906 	 * MLPs are always validated using the range and set of the local
    907 	 * address, even when the remote host is unlabeled.
    908 	 */
    909 	if (connp->conn_mlp_type == mlptBoth ||
    910 	/* LINTED: no consequent */
    911 	    connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) {
    912 		;
    913 
    914 	/*
    915 	 * If this is a packet from an unlabeled sender, then we must apply
    916 	 * different rules.  If the label is equal to the zone's label, then
    917 	 * it's allowed.  If it's not equal, but the zone is either the global
    918 	 * zone or the label is dominated by the zone's label, then allow it
    919 	 * as long as it's in the range configured for the destination.
    920 	 */
    921 	} else if (plabel->tsl_flags & TSLF_UNLABELED) {
    922 		if (plabel->tsl_doi == conn_plabel->tsl_doi &&
    923 		    blequal(label, conn_label))
    924 			return (B_TRUE);
    925 
    926 		/*
    927 		 * conn_zoneid is global for an exclusive stack, thus we use
    928 		 * conn_cred to get the zoneid
    929 		 */
    930 		if (!connp->conn_mac_exempt ||
    931 		    (crgetzoneid(connp->conn_cred) != GLOBAL_ZONEID &&
    932 		    (plabel->tsl_doi != conn_plabel->tsl_doi ||
    933 		    !bldominates(conn_label, label)))) {
    934 			DTRACE_PROBE3(
    935 			    tx__ip__log__drop__receivelocal__mac_unl,
    936 			    char *,
    937 			    "unlabeled packet mp(1) fails mac for conn(2)",
    938 			    mblk_t *, mp, conn_t *, connp);
    939 			return (B_FALSE);
    940 		}
    941 
    942 	/*
    943 	 * If this is a packet from a labeled sender, verify the
    944 	 * label on the packet matches the connection label.
    945 	 */
    946 	} else {
    947 		if (plabel->tsl_doi != conn_plabel->tsl_doi ||
    948 		    !blequal(label, conn_label)) {
    949 			DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp,
    950 			    char *,
    951 			    "packet mp(1) failed label match to SLP conn(2)",
    952 			    mblk_t *, mp, conn_t *, connp);
    953 			return (B_FALSE);
    954 		}
    955 		/*
    956 		 * No further checks will be needed if this is a zone-
    957 		 * specific address because (1) The process for bringing up
    958 		 * the interface ensures the zone's label is within the zone-
    959 		 * specific address's valid label range; (2) For cases where
    960 		 * the conn is bound to the unspecified addresses, ip fanout
    961 		 * logic ensures conn's zoneid equals the dest addr's zoneid;
    962 		 * (3) Mac-exempt and mlp logic above already handle all
    963 		 * cases where the zone label may not be the same as the
    964 		 * conn label.
    965 		 */
    966 		if (!shared_addr)
    967 			return (B_TRUE);
    968 	}
    969 
    970 	tp = find_tpc(addr, version, B_FALSE);
    971 	if (tp == NULL) {
    972 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr,
    973 		    char *, "dropping mp(1), host(2) lacks entry",
    974 		    mblk_t *, mp, void *, addr);
    975 		return (B_FALSE);
    976 	}
    977 
    978 	/*
    979 	 * The local host address should not be unlabeled at this point.  The
    980 	 * only way this can happen is that the destination isn't unicast.  We
    981 	 * assume that the packet should not have had a label, and thus should
    982 	 * have been handled by the TSLF_UNLABELED logic above.
    983 	 */
    984 	if (tp->tpc_tp.host_type == UNLABELED) {
    985 		retv = B_FALSE;
    986 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *,
    987 		    "mp(1) unlabeled source, but tp is not unlabeled.",
    988 		    mblk_t *, mp, tsol_tpc_t *, tp);
    989 
    990 	} else if (tp->tpc_tp.host_type != SUN_CIPSO) {
    991 		retv = B_FALSE;
    992 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *,
    993 		    "delivering mp(1), found unrecognized tpc(2) type.",
    994 		    mblk_t *, mp, tsol_tpc_t *, tp);
    995 
    996 	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
    997 		retv = B_FALSE;
    998 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
    999 		    "mp(1) could not be delievered to tp(2), doi mismatch",
   1000 		    mblk_t *, mp, tsol_tpc_t *, tp);
   1001 
   1002 	} else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) &&
   1003 	    !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) {
   1004 		retv = B_FALSE;
   1005 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
   1006 		    "mp(1) could not be delievered to tp(2), bad mac",
   1007 		    mblk_t *, mp, tsol_tpc_t *, tp);
   1008 	} else {
   1009 		retv = B_TRUE;
   1010 	}
   1011 
   1012 	TPC_RELE(tp);
   1013 
   1014 	return (retv);
   1015 }
   1016 
   1017 boolean_t
   1018 tsol_can_accept_raw(mblk_t *mp, boolean_t check_host)
   1019 {
   1020 	ts_label_t	*plabel = NULL;
   1021 	tsol_tpc_t	*src_rhtp, *dst_rhtp;
   1022 	boolean_t	retv;
   1023 
   1024