Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)labelsys.c	1.2	06/12/15 SMI"
     27 
     28 #include <sys/systm.h>
     29 #include <sys/types.h>
     30 #include <sys/stream.h>
     31 #include <sys/kmem.h>
     32 #include <sys/strsubr.h>
     33 #include <sys/cmn_err.h>
     34 #include <sys/debug.h>
     35 #include <sys/param.h>
     36 #include <sys/model.h>
     37 #include <sys/errno.h>
     38 #include <sys/modhash.h>
     39 
     40 #include <sys/policy.h>
     41 #include <sys/tsol/label.h>
     42 #include <sys/tsol/tsyscall.h>
     43 #include <sys/tsol/tndb.h>
     44 #include <sys/tsol/tnet.h>
     45 #include <sys/disp.h>
     46 
     47 #include <inet/ip.h>
     48 #include <inet/ip6.h>
     49 #include <sys/sdt.h>
     50 
     51 static mod_hash_t *tpc_name_hash;	/* hash of cache entries by name */
     52 static kmutex_t tpc_lock;
     53 
     54 static tsol_tpc_t *tpc_unlab;
     55 
     56 /*
     57  * tnrhc_table and tnrhc_table_v6 are similar to the IP forwarding tables
     58  * in organization and search. The tnrhc_table[_v6] is an array of 33/129
     59  * pointers to the 33/129 tnrhc tables indexed by the prefix length.
     60  * A largest prefix match search is done by find_rhc and it walks the
     61  * tables from the most specific to the least specific table. Table 0
     62  * corresponds to the single entry for 0.0.0.0/0 or ::0/0.
     63  */
     64 tnrhc_hash_t *tnrhc_table[TSOL_MASK_TABLE_SIZE];
     65 tnrhc_hash_t *tnrhc_table_v6[TSOL_MASK_TABLE_SIZE_V6];
     66 kmutex_t tnrhc_g_lock;
     67 
     68 static void tsol_create_i_tmpls(void);
     69 
     70 static void tsol_create_i_tnrh(const tnaddr_t *);
     71 
     72 /* List of MLPs on valid on shared addresses */
     73 static tsol_mlp_list_t shared_mlps;
     74 
     75 /*
     76  * Convert length for a mask to the mask.
     77  */
     78 static ipaddr_t
     79 tsol_plen_to_mask(uint_t masklen)
     80 {
     81 	return (masklen == 0 ? 0 : htonl(IP_HOST_MASK << (IP_ABITS - masklen)));
     82 }
     83 
     84 /*
     85  * Convert a prefix length to the mask for that prefix.
     86  * Returns the argument bitmask.
     87  */
     88 static void
     89 tsol_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
     90 {
     91 	uint32_t *ptr;
     92 
     93 	ASSERT(plen <= IPV6_ABITS);
     94 
     95 	ptr = (uint32_t *)bitmask;
     96 	while (plen >= 32) {
     97 		*ptr++ = 0xffffffffU;
     98 		plen -= 32;
     99 	}
    100 	if (plen > 0)
    101 		*ptr++ = htonl(0xffffffff << (32 - plen));
    102 	while (ptr < (uint32_t *)(bitmask + 1))
    103 		*ptr++ = 0;
    104 }
    105 
    106 boolean_t
    107 tnrhc_init_table(tnrhc_hash_t *table[], short prefix_len, int kmflag)
    108 {
    109 	int	i;
    110 
    111 	mutex_enter(&tnrhc_g_lock);
    112 
    113 	if (table[prefix_len] == NULL) {
    114 		table[prefix_len] = (tnrhc_hash_t *)
    115 		    kmem_zalloc(TNRHC_SIZE * sizeof (tnrhc_hash_t), kmflag);
    116 		if (table[prefix_len] == NULL) {
    117 			mutex_exit(&tnrhc_g_lock);
    118 			return (B_FALSE);
    119 		}
    120 		for (i = 0; i < TNRHC_SIZE; i++) {
    121 			mutex_init(&table[prefix_len][i].tnrh_lock,
    122 			    NULL, MUTEX_DEFAULT, 0);
    123 		}
    124 	}
    125 	mutex_exit(&tnrhc_g_lock);
    126 	return (B_TRUE);
    127 }
    128 
    129 void
    130 tcache_init(void)
    131 {
    132 	tnaddr_t address;
    133 
    134 	/*
    135 	 * Note: unable to use mod_hash_create_strhash here, since it's
    136 	 * assymetric.  It assumes that the user has allocated exactly
    137 	 * strlen(key) + 1 bytes for the key when inserted, and attempts to
    138 	 * kmem_free that memory on a delete.
    139 	 */
    140 	tpc_name_hash = mod_hash_create_extended("tnrhtpc_by_name", 256,
    141 	    mod_hash_null_keydtor,  mod_hash_null_valdtor, mod_hash_bystr,
    142 	    NULL, mod_hash_strkey_cmp, KM_SLEEP);
    143 	mutex_init(&tpc_lock, NULL, MUTEX_DEFAULT, NULL);
    144 
    145 	mutex_init(&tnrhc_g_lock, NULL, MUTEX_DEFAULT, NULL);
    146 
    147 	/* label_init always called before tcache_init */
    148 	ASSERT(l_admin_low != NULL && l_admin_high != NULL);
    149 
    150 	/* Initialize the zeroth table prior to loading the 0.0.0.0 entry */
    151 	(void) tnrhc_init_table(tnrhc_table, 0, KM_SLEEP);
    152 	(void) tnrhc_init_table(tnrhc_table_v6, 0, KM_SLEEP);
    153 	/*
    154 	 * create an internal host template called "_unlab"
    155 	 */
    156 	tsol_create_i_tmpls();
    157 
    158 	/*
    159 	 * create a host entry, 0.0.0.0 = _unlab
    160 	 */
    161 	bzero(&address, sizeof (tnaddr_t));
    162 	address.ta_family = AF_INET;
    163 	tsol_create_i_tnrh(&address);
    164 
    165 	/*
    166 	 * create a host entry, ::0 = _unlab
    167 	 */
    168 	address.ta_family = AF_INET6;
    169 	tsol_create_i_tnrh(&address);
    170 
    171 	rw_init(&shared_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL);
    172 }
    173 
    174 /* Called only by the TNRHC_RELE macro when the refcount goes to zero. */
    175 void
    176 tnrhc_free(tsol_tnrhc_t *tnrhc)
    177 {
    178 	/*
    179 	 * We assert rhc_invalid here to make sure that no new thread could
    180 	 * possibly end up finding this entry.  If it could, then the
    181 	 * mutex_destroy would panic.
    182 	 */
    183 	DTRACE_PROBE1(tx__tndb__l3__tnrhcfree, tsol_tnrhc_t *, tnrhc);
    184 	ASSERT(tnrhc->rhc_next == NULL && tnrhc->rhc_invalid);
    185 	mutex_exit(&tnrhc->rhc_lock);
    186 	mutex_destroy(&tnrhc->rhc_lock);
    187 	if (tnrhc->rhc_tpc != NULL)
    188 		TPC_RELE(tnrhc->rhc_tpc);
    189 	kmem_free(tnrhc, sizeof (*tnrhc));
    190 }
    191 
    192 /* Called only by the TPC_RELE macro when the refcount goes to zero. */
    193 void
    194 tpc_free(tsol_tpc_t *tpc)
    195 {
    196 	DTRACE_PROBE1(tx__tndb__l3__tpcfree, tsol_tpc_t *, tpc);
    197 	ASSERT(tpc->tpc_invalid);
    198 	mutex_exit(&tpc->tpc_lock);
    199 	mutex_destroy(&tpc->tpc_lock);
    200 	kmem_free(tpc, sizeof (*tpc));
    201 }
    202 
    203 /*
    204  * Find and hold a reference to a template entry by name.  Ignores entries that
    205  * are being deleted.
    206  */
    207 static tsol_tpc_t *
    208 tnrhtp_find(const char *name, mod_hash_t *hash)
    209 {
    210 	mod_hash_val_t hv;
    211 	tsol_tpc_t *tpc = NULL;
    212 
    213 	mutex_enter(&tpc_lock);
    214 	if (mod_hash_find(hash, (mod_hash_key_t)name, &hv) == 0) {
    215 		tpc = (tsol_tpc_t *)hv;
    216 		if (tpc->tpc_invalid)
    217 			tpc = NULL;
    218 		else
    219 			TPC_HOLD(tpc);
    220 	}
    221 	mutex_exit(&tpc_lock);
    222 	return (tpc);
    223 }
    224 
    225 static int
    226 tnrh_delete(const tsol_rhent_t *rhent)
    227 {
    228 	tsol_tnrhc_t *current;
    229 	tsol_tnrhc_t **prevp;
    230 	ipaddr_t tmpmask;
    231 	in6_addr_t tmpmask_v6;
    232 	tnrhc_hash_t *tnrhc_hash;
    233 
    234 	if (rhent->rh_address.ta_family == AF_INET) {
    235 		if (rhent->rh_prefix < 0 || rhent->rh_prefix > IP_ABITS)
    236 			return (EINVAL);
    237 		if (tnrhc_table[rhent->rh_prefix] == NULL)
    238 			return (ENOENT);
    239 		tmpmask = tsol_plen_to_mask(rhent->rh_prefix);
    240 		tnrhc_hash = &tnrhc_table[rhent->rh_prefix][
    241 		    TSOL_ADDR_HASH(rhent->rh_address.ta_addr_v4.s_addr &
    242 		    tmpmask, TNRHC_SIZE)];
    243 	} else if (rhent->rh_address.ta_family == AF_INET6) {
    244 		if (rhent->rh_prefix < 0 || rhent->rh_prefix > IPV6_ABITS)
    245 			return (EINVAL);
    246 		if (tnrhc_table_v6[rhent->rh_prefix] == NULL)
    247 			return (ENOENT);
    248 		tsol_plen_to_mask_v6(rhent->rh_prefix, &tmpmask_v6);
    249 		tnrhc_hash = &tnrhc_table_v6[rhent->rh_prefix][
    250 		    TSOL_ADDR_MASK_HASH_V6(rhent->rh_address.ta_addr_v6,
    251 		    tmpmask_v6, TNRHC_SIZE)];
    252 	} else {
    253 		return (EAFNOSUPPORT);
    254 	}
    255 
    256 	/* search for existing entry */
    257 	mutex_enter(&tnrhc_hash->tnrh_lock);
    258 	prevp = &tnrhc_hash->tnrh_list;
    259 	while ((current = *prevp) != NULL) {
    260 		if (TNADDR_EQ(&rhent->rh_address, &current->rhc_host))
    261 			break;
    262 		prevp = &current->rhc_next;
    263 	}
    264 
    265 	if (current != NULL) {
    266 		DTRACE_PROBE(tx__tndb__l2__tnrhdelete_existingrhentry);
    267 		*prevp = current->rhc_next;
    268 		mutex_enter(&current->rhc_lock);
    269 		current->rhc_next = NULL;
    270 		current->rhc_invalid = 1;
    271 		mutex_exit(&current->rhc_lock);
    272 		TNRHC_RELE(current);
    273 	}
    274 	mutex_exit(&tnrhc_hash->tnrh_lock);
    275 	return (current == NULL ? ENOENT : 0);
    276 }
    277 
    278 /*
    279  * Flush all remote host entries from the database.
    280  *
    281  * Note that the htable arrays themselves do not have reference counters, so,
    282  * unlike the remote host entries, they cannot be freed.
    283  */
    284 static void
    285 flush_rh_table(tnrhc_hash_t **htable, int nbits)
    286 {
    287 	tnrhc_hash_t *hent, *hend;
    288 	tsol_tnrhc_t *rhc, *rhnext;
    289 
    290 	while (--nbits >= 0) {
    291 		if ((hent = htable[nbits]) == NULL)
    292 			continue;
    293 		hend = hent + TNRHC_SIZE;
    294 		while (hent < hend) {
    295 			/*
    296 			 * List walkers hold this lock during the walk.  It
    297 			 * protects tnrh_list and rhc_next.
    298 			 */
    299 			mutex_enter(&hent->tnrh_lock);
    300 			rhnext = hent->tnrh_list;
    301 			hent->tnrh_list = NULL;
    302 			mutex_exit(&hent->tnrh_lock);
    303 			/*
    304 			 * There may still be users of the rhcs at this point,
    305 			 * but not of the list or its next pointer.  Thus, the
    306 			 * only thing that would need to be done under a lock
    307 			 * is setting the invalid bit, but that's atomic
    308 			 * anyway, so no locks needed here.
    309 			 */
    310 			while ((rhc = rhnext) != NULL) {
    311 				rhnext = rhc->rhc_next;
    312 				rhc->rhc_next = NULL;
    313 				rhc->rhc_invalid = 1;
    314 				TNRHC_RELE(rhc);
    315 			}
    316 			hent++;
    317 		}
    318 	}
    319 }
    320 
    321 /*
    322  * Load a remote host entry into kernel cache.  Create a new one if a matching
    323  * entry isn't found, otherwise replace the contents of the previous one by
    324  * deleting it and recreating it.  (Delete and recreate is used to avoid
    325  * allowing other threads to see an unstable data structure.)
    326  *
    327  * A "matching" entry is the one whose address matches that of the one
    328  * being loaded.
    329  *
    330  * Return 0 for success, error code for failure.
    331  */
    332 static int
    333 tnrh_hash_add(tsol_tnrhc_t *new, short prefix)
    334 {
    335 	tsol_tnrhc_t **rhp;
    336 	tsol_tnrhc_t *rh;
    337 	ipaddr_t tmpmask;
    338 	in6_addr_t tmpmask_v6;
    339 	tnrhc_hash_t *tnrhc_hash;
    340 
    341 	/* Find the existing entry, if any, leaving the hash locked */
    342 	if (new->rhc_host.ta_family == AF_INET) {
    343 		if (prefix < 0 || prefix > IP_ABITS)
    344 			return (EINVAL);
    345 		if (tnrhc_table[prefix] == NULL &&
    346 		    !tnrhc_init_table(tnrhc_table, prefix,
    347 		    KM_NOSLEEP))
    348 			return (ENOMEM);
    349 		tmpmask = tsol_plen_to_mask(prefix);
    350 		tnrhc_hash = &tnrhc_table[prefix][
    351 		    TSOL_ADDR_HASH(new->rhc_host.ta_addr_v4.s_addr &
    352 		    tmpmask, TNRHC_SIZE)];
    353 		mutex_enter(&tnrhc_hash->tnrh_lock);
    354 		for (rhp = &tnrhc_hash->tnrh_list; (rh = *rhp) != NULL;
    355 		    rhp = &rh->rhc_next) {
    356 			ASSERT(rh->rhc_host.ta_family == AF_INET);
    357 			if (((rh->rhc_host.ta_addr_v4.s_addr ^
    358 			    new->rhc_host.ta_addr_v4.s_addr) & tmpmask) ==
    359 			    0)
    360 				break;
    361 		}
    362 	} else if (new->rhc_host.ta_family == AF_INET6) {
    363 		if (prefix < 0 || prefix > IPV6_ABITS)
    364 			return (EINVAL);
    365 		if (tnrhc_table_v6[prefix] == NULL &&
    366 		    !tnrhc_init_table(tnrhc_table_v6, prefix,
    367 		    KM_NOSLEEP))
    368 			return (ENOMEM);
    369 		tsol_plen_to_mask_v6(prefix, &tmpmask_v6);
    370 		tnrhc_hash = &tnrhc_table_v6[prefix][
    371 		    TSOL_ADDR_MASK_HASH_V6(new->rhc_host.ta_addr_v6,
    372 		    tmpmask_v6, TNRHC_SIZE)];
    373 		mutex_enter(&tnrhc_hash->tnrh_lock);
    374 		for (rhp = &tnrhc_hash->tnrh_list; (rh = *rhp) != NULL;
    375 		    rhp = &rh->rhc_next) {
    376 			ASSERT(rh->rhc_host.ta_family == AF_INET6);
    377 			if (V6_MASK_EQ_2(rh->rhc_host.ta_addr_v6, tmpmask_v6,
    378 			    new->rhc_host.ta_addr_v6))
    379 				break;
    380 		}
    381 	} else {
    382 		return (EAFNOSUPPORT);
    383 	}
    384 
    385 	/* Clobber the old remote host entry. */
    386 	if (rh != NULL) {
    387 		ASSERT(!rh->rhc_invalid);
    388 		rh->rhc_invalid = 1;
    389 		*rhp = rh->rhc_next;
    390 		rh->rhc_next = NULL;
    391 		DTRACE_PROBE1(tx__tndb__l2__tnrhhashadd__invalidaterh,
    392 		    tsol_tnrhc_t *, rh);
    393 		TNRHC_RELE(rh);
    394 	}
    395 
    396 	TNRHC_HOLD(new);
    397 	new->rhc_next = tnrhc_hash->tnrh_list;
    398 	tnrhc_hash->tnrh_list = new;
    399 	DTRACE_PROBE1(tx__tndb__l2__tnrhhashadd__addedrh, tsol_tnrhc_t *, new);
    400 	mutex_exit(&tnrhc_hash->tnrh_lock);
    401 
    402 	return (0);
    403 }
    404 
    405 /*
    406  * Load a remote host entry into kernel cache.
    407  *
    408  * Return 0 for success, error code for failure.
    409  */
    410 int
    411 tnrh_load(const tsol_rhent_t *rhent)
    412 {
    413 	tsol_tnrhc_t *new;
    414 	tsol_tpc_t *tpc;
    415 	int status;
    416 
    417 	/* Find and bump the reference count on the named template */
    418 	if ((tpc = tnrhtp_find(rhent->rh_template, tpc_name_hash)) == NULL) {
    419 		return (EINVAL);
    420 	}
    421 	ASSERT(tpc->tpc_tp.host_type == UNLABELED ||
    422 	    tpc->tpc_tp.host_type == SUN_CIPSO);
    423 
    424 	if ((new = kmem_zalloc(sizeof (*new), KM_NOSLEEP)) == NULL) {
    425 		TPC_RELE(tpc);
    426 		return (ENOMEM);
    427 	}
    428 
    429 	/* Initialize the new entry. */
    430 	mutex_init(&new->rhc_lock, NULL, MUTEX_DEFAULT, NULL);
    431 	new->rhc_host = rhent->rh_address;
    432 
    433 	/* The rhc now owns this tpc reference, so no TPC_RELE past here */
    434 	new->rhc_tpc = tpc;
    435 
    436 	/*
    437 	 * tnrh_hash_add handles the tnrh entry ref count for hash
    438 	 * table inclusion. The ref count is incremented and decremented
    439 	 * here to trigger deletion of the new hash table entry in the
    440 	 * event that tnrh_hash_add fails.
    441 	 */
    442 	TNRHC_HOLD(new);
    443 	status = tnrh_hash_add(new, rhent->rh_prefix);
    444 	TNRHC_RELE(new);
    445 
    446 	return (status);
    447 }
    448 
    449 static int
    450 tnrh_get(tsol_rhent_t *rhent)
    451 {
    452 	tsol_tpc_t *tpc;
    453 
    454 	switch (rhent->rh_address.ta_family) {
    455 	case AF_INET:
    456 		tpc = find_tpc(&rhent->rh_address.ta_addr_v4, IPV4_VERSION,
    457 		    B_TRUE);
    458 		break;
    459 
    460 	case AF_INET6:
    461 		tpc = find_tpc(&rhent->rh_address.ta_addr_v6, IPV6_VERSION,
    462 		    B_TRUE);
    463 		break;
    464 
    465 	default:
    466 		return (EINVAL);
    467 	}
    468 	if (tpc == NULL)
    469 		return (ENOENT);
    470 
    471 	DTRACE_PROBE2(tx__tndb__l4__tnrhget__foundtpc, tsol_rhent_t *,
    472 	    rhent, tsol_tpc_t *, tpc);
    473 	bcopy(tpc->tpc_tp.name, rhent->rh_template,
    474 	    sizeof (rhent->rh_template));
    475 	TPC_RELE(tpc);
    476 	return (0);
    477 }
    478 
    479 static boolean_t
    480 template_name_ok(const char *name)
    481 {
    482 	const char *name_end = name + TNTNAMSIZ;
    483 
    484 	while (name < name_end) {
    485 		if (*name == '\0')
    486 			break;
    487 		name++;
    488 	}
    489 	return (name < name_end);
    490 }
    491 
    492 static int
    493 tnrh(int cmd, void *buf)
    494 {
    495 	int retv;
    496 	tsol_rhent_t rhent;
    497 
    498 	/* Make sure user has sufficient privilege */
    499 	if (cmd != TNDB_GET &&
    500 	    (retv = secpolicy_net_config(CRED(), B_FALSE)) != 0)
    501 		return (set_errno(retv));
    502 
    503 	/*
    504 	 * Get arguments
    505 	 */
    506 	if (cmd != TNDB_FLUSH &&
    507 	    copyin(buf, &rhent, sizeof (rhent)) != 0) {
    508 		DTRACE_PROBE(tx__tndb__l0__tnrhdelete__copyin);
    509 		return (set_errno(EFAULT));
    510 	}
    511 
    512 	switch (cmd) {
    513 	case TNDB_LOAD:
    514 		DTRACE_PROBE(tx__tndb__l2__tnrhdelete__tndbload);
    515 		if (!template_name_ok(rhent.rh_template)) {
    516 			retv = EINVAL;
    517 		} else {
    518 			retv = tnrh_load(&rhent);
    519 		}
    520 		break;
    521 
    522 	case TNDB_DELETE:
    523 		DTRACE_PROBE(tx__tndb__l2__tnrhdelete__tndbdelete);
    524 		retv = tnrh_delete(&rhent);
    525 		break;
    526 
    527 	case TNDB_GET:
    528 		DTRACE_PROBE(tx__tndb__l4__tnrhdelete__tndbget);
    529 		if (!template_name_ok(rhent.rh_template)) {
    530 			retv = EINVAL;
    531 			break;
    532 		}
    533 
    534 		retv = tnrh_get(&rhent);
    535 		if (retv != 0)
    536 			break;
    537 
    538 		/*
    539 		 * Copy out result
    540 		 */
    541 		if (copyout(&rhent, buf, sizeof (rhent)) != 0) {
    542 			DTRACE_PROBE(tx__tndb__l0__tnrhdelete__copyout);
    543 			retv = EFAULT;
    544 		}
    545 		break;
    546 
    547 	case TNDB_FLUSH:
    548 		DTRACE_PROBE(tx__tndb__l2__tnrhdelete__flush);
    549 		flush_rh_table(tnrhc_table, TSOL_MASK_TABLE_SIZE);
    550 		flush_rh_table(tnrhc_table_v6, TSOL_MASK_TABLE_SIZE_V6);
    551 		break;
    552 
    553 	default:
    554 		DTRACE_PROBE1(tx__tndb__l0__tnrhdelete__unknowncmd,
    555 		    int, cmd);
    556 		retv = EOPNOTSUPP;
    557 		break;
    558 	}
    559 
    560 	if (retv != 0)
    561 		return (set_errno(retv));
    562 	else
    563 		return (retv);
    564 }
    565 
    566 static tsol_tpc_t *
    567 tnrhtp_create(const tsol_tpent_t *tpent, int kmflags)
    568 {
    569 	tsol_tpc_t *tpc;
    570 	mod_hash_val_t hv;
    571 
    572 	/*
    573 	 * We intentionally allocate a new entry before taking the lock on the
    574 	 * entire database.
    575 	 */
    576 	if ((tpc = kmem_zalloc(sizeof (*tpc), kmflags)) == NULL)
    577 		return (NULL);
    578 
    579 	mutex_enter(&tpc_lock);
    580 	if (mod_hash_find(tpc_name_hash, (mod_hash_key_t)tpent->name,
    581 	    &hv) == 0) {
    582 		tsol_tpc_t *found_tpc = (tsol_tpc_t *)hv;
    583 
    584 		found_tpc->tpc_invalid = 1;
    585 		(void) mod_hash_destroy(tpc_name_hash,
    586 		    (mod_hash_key_t)tpent->name);
    587 		TPC_RELE(found_tpc);
    588 	}
    589 
    590 	mutex_init(&tpc->tpc_lock, NULL, MUTEX_DEFAULT, NULL);
    591 	/* tsol_tpent_t is the same on LP64 and ILP32 */
    592 	bcopy(tpent, &tpc->tpc_tp, sizeof (tpc->tpc_tp));
    593 	(void) mod_hash_insert(tpc_name_hash, (mod_hash_key_t)tpc->tpc_tp.name,
    594 	    (mod_hash_val_t)tpc);
    595 	TPC_HOLD(tpc);
    596 	mutex_exit(&tpc_lock);
    597 
    598 	return (tpc);
    599 }
    600 
    601 static int
    602 tnrhtp_delete(const char *tname)
    603 {
    604 	tsol_tpc_t *tpc;
    605 	mod_hash_val_t hv;
    606 	int retv = ENOENT;
    607 
    608 	mutex_enter(&tpc_lock);
    609 	if (mod_hash_find(tpc_name_hash, (mod_hash_key_t)tname, &hv) == 0) {
    610 		tpc = (tsol_tpc_t *)hv;
    611 		ASSERT(!tpc->tpc_invalid);
    612 		tpc->tpc_invalid = 1;
    613 		(void) mod_hash_destroy(tpc_name_hash,
    614 		    (mod_hash_key_t)tpc->tpc_tp.name);
    615 		TPC_RELE(tpc);
    616 		retv = 0;
    617 	}
    618 	mutex_exit(&tpc_lock);
    619 	return (retv);
    620 }
    621 
    622 /* ARGSUSED */
    623 static uint_t
    624 tpc_delete(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
    625 {
    626 	tsol_tpc_t *tpc = (tsol_tpc_t *)val;
    627 
    628 	ASSERT(!tpc->tpc_invalid);
    629 	tpc->tpc_invalid = 1;
    630 	TPC_RELE(tpc);
    631 	return (MH_WALK_CONTINUE);
    632 }
    633 
    634 static void
    635 tnrhtp_flush(void)
    636 {
    637 	mutex_enter(&tpc_lock);
    638 	mod_hash_walk(tpc_name_hash, tpc_delete, NULL);
    639 	mod_hash_clear(tpc_name_hash);
    640 	mutex_exit(&tpc_lock);
    641 }
    642 
    643 static int
    644 tnrhtp(int cmd, void *buf)
    645 {
    646 	int retv;
    647 	int type;
    648 	tsol_tpent_t rhtpent;
    649 	tsol_tpc_t *tpc;
    650 
    651 	/* Make sure user has sufficient privilege */
    652 	if (cmd != TNDB_GET &&
    653 	    (retv = secpolicy_net_config(CRED(), B_FALSE)) != 0)
    654 		return (set_errno(retv));
    655 
    656 	/*
    657 	 * Get argument.  Note that tsol_tpent_t is the same on LP64 and ILP32,
    658 	 * so no special handling is required.
    659 	 */
    660 	if (cmd != TNDB_FLUSH) {
    661 		if (copyin(buf, &rhtpent, sizeof (rhtpent)) != 0) {
    662 			DTRACE_PROBE(tx__tndb__l0__tnrhtp__copyin);
    663 			return (set_errno(EFAULT));
    664 		}
    665 
    666 		/*
    667 		 * Don't let the user give us a bogus (unterminated) template
    668 		 * name.
    669 		 */
    670 		if (!template_name_ok(rhtpent.name))
    671 			return (set_errno(EINVAL));
    672 	}
    673 
    674 	switch (cmd) {
    675 	case TNDB_LOAD:
    676 		DTRACE_PROBE1(tx__tndb__l2__tnrhtp__tndbload, char *,
    677 			rhtpent.name);
    678 		type = rhtpent.host_type;
    679 		if (type != UNLABELED && type != SUN_CIPSO) {
    680 			retv = EINVAL;
    681 			break;
    682 		}
    683 
    684 		if (tnrhtp_create(&rhtpent, KM_NOSLEEP) == NULL)
    685 			retv = ENOMEM;
    686 		else
    687 			retv = 0;
    688 		break;
    689 
    690 	case TNDB_GET:
    691 		DTRACE_PROBE1(tx__tndb__l4__tnrhtp__tndbget, char *,
    692 		    rhtpent.name);
    693 		tpc = tnrhtp_find(rhtpent.name, tpc_name_hash);
    694 		if (tpc == NULL) {
    695 			retv = ENOENT;
    696 			break;
    697 		}
    698 
    699 		/* Copy out result */
    700 		if (copyout(&tpc->tpc_tp, buf, sizeof (tpc->tpc_tp)) != 0) {
    701 			DTRACE_PROBE(tx__tndb__l0__tnrhtp__copyout);
    702 			retv = EFAULT;
    703 		} else {
    704 			retv = 0;
    705 		}
    706 		TPC_RELE(tpc);
    707 		break;
    708 
    709 	case TNDB_DELETE:
    710 		DTRACE_PROBE1(tx__tndb__l4__tnrhtp__tndbdelete, char *,
    711 		    rhtpent.name);
    712 		retv = tnrhtp_delete(rhtpent.name);
    713 		break;
    714 
    715 	case TNDB_FLUSH:
    716 		DTRACE_PROBE(tx__tndb__l4__tnrhtp__flush);
    717 		tnrhtp_flush();
    718 		retv = 0;
    719 		break;
    720 
    721 	default:
    722 		DTRACE_PROBE1(tx__tndb__l0__tnrhtp__unknowncmd, int,
    723 		    cmd);
    724 		retv = EOPNOTSUPP;
    725 		break;
    726 	}
    727 
    728 	if (retv != 0)
    729 		return (set_errno(retv));
    730 	else
    731 		return (retv);
    732 }
    733 
    734 /*
    735  * MLP entry ordering logic
    736  *
    737  * There are two loops in this routine.  The first loop finds the entry that
    738  * either logically follows the new entry to be inserted, or is the entry that
    739  * precedes and overlaps the new entry, or is NULL to mean end-of-list.  This
    740  * is 'tme.'  The second loop scans ahead from that point to find any overlap
    741  * on the front or back of this new entry.
    742  *
    743  * For the first loop, we can have the following cases in the list (note that
    744  * the port-portmax range is inclusive):
    745  *
    746  *	       port   portmax
    747  *		+--------+
    748  * 1: +------+ ................... precedes; skip to next
    749  * 2:	    +------+ ............. overlaps; stop here if same protocol
    750  * 3:		+------+ ......... overlaps; stop if same or higher protocol
    751  * 4:		    +-------+ .... overlaps or succeeds; stop here
    752  *
    753  * For the second loop, we can have the following cases (note that we need not
    754  * care about other protocol entries at this point, because we're only looking
    755  * for overlap, not an insertion point):
    756  *
    757  *	       port   portmax
    758  *		+--------+
    759  * 5:	    +------+ ............. overlaps; stop if same protocol
    760  * 6:		+------+ ......... overlaps; stop if same protocol
    761  * 7:		    +-------+ .... overlaps; stop if same protocol
    762  * 8:			   +---+ . follows; search is done
    763  *
    764  * In other words, this second search needs to consider only whether the entry
    765  * has a starting port number that's greater than the end point of the new
    766  * entry.  All others are overlaps.
    767  */
    768 static int
    769 mlp_add_del(tsol_mlp_list_t *mlpl, zoneid_t zoneid, uint8_t proto,
    770     uint16_t port, uint16_t portmax, boolean_t addflag)
    771 {
    772 	int retv;
    773 	tsol_mlp_entry_t *tme, *tme2, *newent;
    774 
    775 	if (addflag) {
    776 		if ((newent = kmem_zalloc(sizeof (*newent), KM_NOSLEEP)) ==
    777 		    NULL)
    778 			return (ENOMEM);
    779 	} else {
    780 		newent = NULL;
    781 	}
    782 	rw_enter(&mlpl->mlpl_rwlock, RW_WRITER);
    783 
    784 	/*
    785 	 * First loop: find logical insertion point or overlap.  Table is kept
    786 	 * in order of port number first, and then, within that, by protocol
    787 	 * number.
    788 	 */
    789 	for (tme = mlpl->mlpl_first; tme != NULL; tme = tme->mlpe_next) {
    790 		/* logically next (case 4) */
    791 		if (tme->mlpe_mlp.mlp_port > port)
    792 			break;
    793 		/* if this is logically next or overlap, then stop (case 3) */
    794 		if (tme->mlpe_mlp.mlp_port == port &&
    795 		    tme->mlpe_mlp.mlp_ipp >= proto)
    796 			break;
    797 		/* earlier or same port sequence; check for overlap (case 2) */
    798 		if (tme->mlpe_mlp.mlp_ipp == proto &&
    799 		    tme->mlpe_mlp.mlp_port_upper >= port)
    800 			break;
    801 		/* otherwise, loop again (case 1) */
    802 	}
    803 
    804 	/* Second loop: scan ahead for overlap */
    805 	for (tme2 = tme; tme2 != NULL; tme2 = tme2->mlpe_next) {
    806 		/* check if entry follows; no overlap (case 8) */
    807 		if (tme2->mlpe_mlp.mlp_port > portmax) {
    808 			tme2 = NULL;
    809 			break;
    810 		}
    811 		/* only exact protocol matches at this point (cases 5-7) */
    812 		if (tme2->mlpe_mlp.mlp_ipp == proto)
    813 			break;
    814 	}
    815 
    816 	retv = 0;
    817 	if (addflag) {
    818 		if (tme2 != NULL) {
    819 			retv = EEXIST;
    820 		} else {
    821 			newent->mlpe_zoneid = zoneid;
    822 			newent->mlpe_mlp.mlp_ipp = proto;
    823 			newent->mlpe_mlp.mlp_port = port;
    824 			newent->mlpe_mlp.mlp_port_upper = portmax;
    825 			newent->mlpe_next = tme;
    826 			if (tme == NULL) {
    827 				tme2 = mlpl->mlpl_last;
    828 				mlpl->mlpl_last = newent;
    829 			} else {
    830 				tme2 = tme->mlpe_prev;
    831 				tme->mlpe_prev = newent;
    832 			}
    833 			newent->mlpe_prev = tme2;
    834 			if (tme2 == NULL)
    835 				mlpl->mlpl_first = newent;
    836 			else
    837 				tme2->mlpe_next = newent;
    838 			newent = NULL;
    839 		}
    840 	} else {
    841 		if (tme2 == NULL || tme2->mlpe_mlp.mlp_port != port ||
    842 		    tme2->mlpe_mlp.mlp_port_upper != portmax) {
    843 			retv = ENOENT;
    844 		} else {
    845 			if ((tme2 = tme->mlpe_prev) == NULL)
    846 				mlpl->mlpl_first = tme->mlpe_next;
    847 			else
    848 				tme2->mlpe_next = tme->mlpe_next;
    849 			if ((tme2 = tme->mlpe_next) == NULL)
    850 				mlpl->mlpl_last = tme->mlpe_prev;
    851 			else
    852 				tme2->mlpe_prev = tme->mlpe_prev;
    853 			newent = tme;
    854 		}
    855 	}
    856 	rw_exit(&mlpl->mlpl_rwlock);
    857 
    858 	if (newent != NULL)
    859 		kmem_free(newent, sizeof (*newent));
    860 
    861 	return (retv);
    862 }
    863 
    864 /*
    865  * Add or remove an MLP entry from the database so that the classifier can find
    866  * it.
    867  *
    868  * Note: port number is in host byte order.
    869  */
    870 int
    871 tsol_mlp_anon(zone_t *zone, mlp_type_t mlptype, uchar_t proto, uint16_t port,
    872     boolean_t addflag)
    873 {
    874 	int retv = 0;
    875 
    876 	if (mlptype == mlptBoth || mlptype == mlptPrivate)
    877 		retv = mlp_add_del(&zone->zone_mlps, zone->zone_id, proto,
    878 		    port, port, addflag);
    879 	if ((retv == 0 || !addflag) &&
    880 	    (mlptype == mlptBoth || mlptype == mlptShared)) {
    881 		retv = mlp_add_del(&shared_mlps, zone->zone_id, proto, port,
    882 		    port, addflag);
    883 		if (retv != 0 && addflag)
    884 			(void) mlp_add_del(&zone->zone_mlps, zone->zone_id,
    885 			    proto, port, port, B_FALSE);
    886 	}
    887 	return (retv);
    888 }
    889 
    890 static void
    891 mlp_flush(tsol_mlp_list_t *mlpl, zoneid_t zoneid)
    892 {
    893 	tsol_mlp_entry_t *tme, *tme2, *tmnext;
    894 
    895 	rw_enter(&mlpl->mlpl_rwlock, RW_WRITER);
    896 	for (tme = mlpl->mlpl_first; tme != NULL; tme = tmnext) {
    897 		tmnext = tme->mlpe_next;
    898 		if (zoneid == ALL_ZONES || tme->mlpe_zoneid == zoneid) {
    899 			if ((tme2 = tme->mlpe_prev) == NULL)
    900 				mlpl->mlpl_first = tmnext;
    901 			else
    902 				tme2->mlpe_next = tmnext;
    903 			if (tmnext == NULL)
    904 				mlpl->mlpl_last = tme2;
    905 			else
    906 				tmnext->mlpe_prev = tme2;
    907 			kmem_free(tme, sizeof (*tme));
    908 		}
    909 	}
    910 	rw_exit(&mlpl->mlpl_rwlock);
    911 }
    912 
    913 /*
    914  * Note: user supplies port numbers in host byte order.
    915  */
    916 static int
    917 tnmlp(int cmd, void *buf)
    918 {
    919 	int retv;
    920 	tsol_mlpent_t tsme;
    921 	zone_t *zone;
    922 	tsol_mlp_list_t *mlpl;
    923 	tsol_mlp_entry_t *tme;
    924 
    925 	/* Make sure user has sufficient privilege */
    926 	if (cmd != TNDB_GET &&
    927 	    (retv = secpolicy_net_config(CRED(), B_FALSE)) != 0)
    928 		return (set_errno(retv));
    929 
    930 	/*
    931 	 * Get argument.  Note that tsol_mlpent_t is the same on LP64 and
    932 	 * ILP32, so no special handling is required.
    933 	 */
    934 	if (copyin(buf, &tsme, sizeof (tsme)) != 0) {
    935 		DTRACE_PROBE(tx__tndb__l0__tnmlp__copyin);
    936 		return (set_errno(EFAULT));
    937 	}
    938 
    939 	/* MLPs on shared IP addresses */
    940 	if (tsme.tsme_flags & TSOL_MEF_SHARED) {
    941 		zone = NULL;
    942 		mlpl = &shared_mlps;
    943 	} else {
    944 		zone = zone_find_by_id(tsme.tsme_zoneid);
    945 		if (zone == NULL)
    946 			return (set_errno(EINVAL));
    947 		mlpl = &zone->zone_mlps;
    948 	}
    949 	if (tsme.tsme_mlp.mlp_port_upper == 0)
    950 		tsme.tsme_mlp.mlp_port_upper = tsme.tsme_mlp.mlp_port;
    951 
    952 	switch (cmd) {
    953 	case TNDB_LOAD:
    954 		DTRACE_PROBE1(tx__tndb__l2__tnmlp__tndbload,
    955 		    tsol_mlpent_t *, &tsme);
    956 		if (tsme.tsme_mlp.mlp_ipp == 0 || tsme.tsme_mlp.mlp_port == 0 ||
    957 		    tsme.tsme_mlp.mlp_port > tsme.tsme_mlp.mlp_port_upper) {
    958 			retv = EINVAL;
    959 			break;
    960 		}
    961 		retv = mlp_add_del(mlpl, tsme.tsme_zoneid,
    962 		    tsme.tsme_mlp.mlp_ipp, tsme.tsme_mlp.mlp_port,
    963 		    tsme.tsme_mlp.mlp_port_upper, B_TRUE);
    964 		break;
    965 
    966 	case TNDB_GET:
    967 		DTRACE_PROBE1(tx__tndb__l2__tnmlp__tndbget,
    968 		    tsol_mlpent_t *, &tsme);
    969 
    970 		/*
    971 		 * Search for the requested element or, failing that, the one
    972 		 * that's logically next in the sequence.
    973 		 */
    974 		rw_enter(&mlpl->mlpl_rwlock, RW_READER);
    975 		for (tme = mlpl->mlpl_first; tme != NULL;
    976 		    tme = tme->mlpe_next) {
    977 			if (tsme.tsme_zoneid != ALL_ZONES &&
    978 			    tme->mlpe_zoneid != tsme.tsme_zoneid)
    979 				continue;
    980 			if (tme->mlpe_mlp.mlp_ipp >= tsme.tsme_mlp.mlp_ipp &&
    981 			    tme->mlpe_mlp.mlp_port == tsme.tsme_mlp.mlp_port)
    982 				break;
    983 			if (tme->mlpe_mlp.mlp_port > tsme.tsme_mlp.mlp_port)
    984 				break;
    985 		}
    986 		if (tme == NULL) {
    987 			retv = ENOENT;
    988 		} else {
    989 			tsme.tsme_zoneid = tme->mlpe_zoneid;
    990 			tsme.tsme_mlp = tme->mlpe_mlp;
    991 			retv = 0;
    992 		}
    993 		rw_exit(&mlpl->mlpl_rwlock);
    994 		break;
    995 
    996 	case TNDB_DELETE:
    997 		DTRACE_PROBE1(tx__tndb__l4__tnmlp__tndbdelete,
    998 		    tsol_mlpent_t *, &tsme);
    999 		retv = mlp_add_del(mlpl, tsme.tsme_zoneid,
   1000 		    tsme.tsme_mlp.mlp_ipp, tsme.tsme_mlp.mlp_port,
   1001 		    tsme.tsme_mlp.mlp_port_upper, B_FALSE);
   1002 		break;
   1003 
   1004 	case TNDB_FLUSH:
   1005 		DTRACE_PROBE1(tx__tndb__l4__tnmlp__tndbflush,
   1006 		    tsol_mlpent_t *, &tsme);
   1007 		mlp_flush(mlpl, ALL_ZONES);
   1008 		mlp_flush(&shared_mlps, tsme.tsme_zoneid);
   1009 		retv = 0;
   1010 		break;
   1011 
   1012 	default:
   1013 		DTRACE_PROBE1(tx__tndb__l0__tnmlp__unknowncmd, int,
   1014 		    cmd);
   1015 		retv = EOPNOTSUPP;
   1016 		break;
   1017 	}
   1018 
   1019 	if (zone != NULL)
   1020 		zone_rele(zone);
   1021 
   1022 	if (cmd == TNDB_GET && retv == 0) {
   1023 		/* Copy out result */
   1024 		if (copyout(&tsme, buf, sizeof (tsme)) != 0) {
   1025 			DTRACE_PROBE(tx__tndb__l0__tnmlp__copyout);
   1026 			retv = EFAULT;
   1027 		}
   1028 	}
   1029 
   1030 	if (retv != 0)
   1031 		return (set_errno(retv));
   1032 	else
   1033 		return (retv);
   1034 }
   1035 
   1036 /*
   1037  * Returns a tnrhc matching the addr address.
   1038  * The returned rhc's refcnt is incremented.
   1039  */
   1040 tsol_tnrhc_t *
   1041 find_rhc(const void *addr, uchar_t version, boolean_t staleok)
   1042 {
   1043 	tsol_tnrhc_t *rh = NULL;
   1044 	tsol_tnrhc_t *new;
   1045 	tsol_tpc_t *tpc;
   1046 	tnrhc_hash_t *tnrhc_hash;
   1047 	ipaddr_t tmpmask;
   1048 	in_addr_t *in4 = (in_addr_t *)addr;
   1049 	in6_addr_t *in6 = (in6_addr_t *)addr;
   1050 	in_addr_t tmpin4;
   1051 	in6_addr_t tmpmask6;
   1052 	int	i;
   1053 	int	prefix;
   1054 
   1055 	/*
   1056 	 * An IPv4-mapped IPv6 address is really an IPv4 address
   1057 	 * in IPv6 format.
   1058 	 */