Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/stream.h>
     30 #include <sys/stropts.h>
     31 #include <sys/errno.h>
     32 #include <sys/ddi.h>
     33 #include <sys/debug.h>
     34 #include <sys/cmn_err.h>
     35 #include <sys/stream.h>
     36 #include <sys/strlog.h>
     37 #include <sys/kmem.h>
     38 #include <sys/sunddi.h>
     39 #include <sys/tihdr.h>
     40 #include <sys/atomic.h>
     41 #include <sys/socket.h>
     42 #include <sys/sysmacros.h>
     43 #include <sys/crypto/common.h>
     44 #include <sys/crypto/api.h>
     45 #include <sys/zone.h>
     46 #include <netinet/in.h>
     47 #include <net/if.h>
     48 #include <net/pfkeyv2.h>
     49 #include <inet/common.h>
     50 #include <netinet/ip6.h>
     51 #include <inet/ip.h>
     52 #include <inet/ip_ire.h>
     53 #include <inet/ip6.h>
     54 #include <inet/ipsec_info.h>
     55 #include <inet/tcp.h>
     56 #include <inet/sadb.h>
     57 #include <inet/ipsec_impl.h>
     58 #include <inet/ipsecah.h>
     59 #include <inet/ipsecesp.h>
     60 #include <sys/random.h>
     61 #include <sys/dlpi.h>
     62 #include <sys/iphada.h>
     63 #include <inet/ip_if.h>
     64 #include <inet/ipdrop.h>
     65 #include <inet/ipclassifier.h>
     66 #include <inet/sctp_ip.h>
     67 #include <inet/tun.h>
     68 
     69 /*
     70  * This source file contains Security Association Database (SADB) common
     71  * routines.  They are linked in with the AH module.  Since AH has no chance
     72  * of falling under export control, it was safe to link it in there.
     73  */
     74 
     75 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
     76     ipsec_action_t *, boolean_t, uint32_t, uint32_t, netstack_t *);
     77 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
     78 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
     79 static void sadb_drain_torchq(queue_t *, mblk_t *);
     80 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
     81 			    netstack_t *);
     82 static void sadb_destroy(sadb_t *, netstack_t *);
     83 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
     84 
     85 static time_t sadb_add_time(time_t, uint64_t);
     86 static void lifetime_fuzz(ipsa_t *);
     87 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
     88 
     89 /*
     90  * ipsacq_maxpackets is defined here to make it tunable
     91  * from /etc/system.
     92  */
     93 extern uint64_t ipsacq_maxpackets;
     94 
     95 #define	SET_EXPIRE(sa, delta, exp) {				\
     96 	if (((sa)->ipsa_ ## delta) != 0) {				\
     97 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
     98 			(sa)->ipsa_ ## delta);				\
     99 	}								\
    100 }
    101 
    102 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
    103 	if (((sa)->ipsa_ ## delta) != 0) {				\
    104 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
    105 			(sa)->ipsa_ ## delta);				\
    106 		if (((sa)->ipsa_ ## exp) == 0)				\
    107 			(sa)->ipsa_ ## exp = tmp;			\
    108 		else							\
    109 			(sa)->ipsa_ ## exp = 				\
    110 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
    111 	}								\
    112 }
    113 
    114 
    115 /* wrap the macro so we can pass it as a function pointer */
    116 void
    117 sadb_sa_refrele(void *target)
    118 {
    119 	IPSA_REFRELE(((ipsa_t *)target));
    120 }
    121 
    122 /*
    123  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
    124  * a signed type.
    125  */
    126 #define	TIME_MAX LONG_MAX
    127 
    128 /*
    129  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
    130  * time_t is defined to be a signed type with the same range as
    131  * "long".  On ILP32 systems, we thus run the risk of wrapping around
    132  * at end of time, as well as "overwrapping" the clock back around
    133  * into a seemingly valid but incorrect future date earlier than the
    134  * desired expiration.
    135  *
    136  * In order to avoid odd behavior (either negative lifetimes or loss
    137  * of high order bits) when someone asks for bizarrely long SA
    138  * lifetimes, we do a saturating add for expire times.
    139  *
    140  * We presume that ILP32 systems will be past end of support life when
    141  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
    142  *
    143  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
    144  * will hopefully have figured out clever ways to avoid the use of
    145  * fixed-sized integers in computation.
    146  */
    147 static time_t
    148 sadb_add_time(time_t base, uint64_t delta)
    149 {
    150 	time_t sum;
    151 
    152 	/*
    153 	 * Clip delta to the maximum possible time_t value to
    154 	 * prevent "overwrapping" back into a shorter-than-desired
    155 	 * future time.
    156 	 */
    157 	if (delta > TIME_MAX)
    158 		delta = TIME_MAX;
    159 	/*
    160 	 * This sum may still overflow.
    161 	 */
    162 	sum = base + delta;
    163 
    164 	/*
    165 	 * .. so if the result is less than the base, we overflowed.
    166 	 */
    167 	if (sum < base)
    168 		sum = TIME_MAX;
    169 
    170 	return (sum);
    171 }
    172 
    173 /*
    174  * Callers of this function have already created a working security
    175  * association, and have found the appropriate table & hash chain.  All this
    176  * function does is check duplicates, and insert the SA.  The caller needs to
    177  * hold the hash bucket lock and increment the refcnt before insertion.
    178  *
    179  * Return 0 if success, EEXIST if collision.
    180  */
    181 #define	SA_UNIQUE_MATCH(sa1, sa2) \
    182 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
    183 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
    184 
    185 int
    186 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
    187 {
    188 	ipsa_t **ptpn = NULL;
    189 	ipsa_t *walker;
    190 	boolean_t unspecsrc;
    191 
    192 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
    193 
    194 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
    195 
    196 	walker = bucket->isaf_ipsa;
    197 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
    198 
    199 	/*
    200 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
    201 	 * of the list unless there's an unspecified source address, then
    202 	 * insert it after the last SA with a specified source address.
    203 	 *
    204 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
    205 	 * checking for collisions.
    206 	 */
    207 
    208 	while (walker != NULL) {
    209 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
    210 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
    211 			if (walker->ipsa_spi == ipsa->ipsa_spi)
    212 				return (EEXIST);
    213 
    214 			mutex_enter(&walker->ipsa_lock);
    215 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
    216 			    (walker->ipsa_flags & IPSA_F_USED) &&
    217 			    SA_UNIQUE_MATCH(walker, ipsa)) {
    218 				walker->ipsa_flags |= IPSA_F_CINVALID;
    219 			}
    220 			mutex_exit(&walker->ipsa_lock);
    221 		}
    222 
    223 		if (ptpn == NULL && unspecsrc) {
    224 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
    225 			    walker->ipsa_addrfam))
    226 				ptpn = walker->ipsa_ptpn;
    227 			else if (walker->ipsa_next == NULL)
    228 				ptpn = &walker->ipsa_next;
    229 		}
    230 
    231 		walker = walker->ipsa_next;
    232 	}
    233 
    234 	if (ptpn == NULL)
    235 		ptpn = &bucket->isaf_ipsa;
    236 	ipsa->ipsa_next = *ptpn;
    237 	ipsa->ipsa_ptpn = ptpn;
    238 	if (ipsa->ipsa_next != NULL)
    239 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
    240 	*ptpn = ipsa;
    241 	ipsa->ipsa_linklock = &bucket->isaf_lock;
    242 
    243 	return (0);
    244 }
    245 #undef SA_UNIQUE_MATCH
    246 
    247 /*
    248  * Free a security association.  Its reference count is 0, which means
    249  * I must free it.  The SA must be unlocked and must not be linked into
    250  * any fanout list.
    251  */
    252 static void
    253 sadb_freeassoc(ipsa_t *ipsa)
    254 {
    255 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
    256 
    257 	ASSERT(ipss != NULL);
    258 	ASSERT(!MUTEX_HELD(&ipsa->ipsa_lock));
    259 	ASSERT(ipsa->ipsa_refcnt == 0);
    260 	ASSERT(ipsa->ipsa_next == NULL);
    261 	ASSERT(ipsa->ipsa_ptpn == NULL);
    262 
    263 	ip_drop_packet(sadb_clear_lpkt(ipsa), B_TRUE, NULL, NULL,
    264 	    DROPPER(ipss, ipds_sadb_inlarval_timeout),
    265 	    &ipss->ipsec_sadb_dropper);
    266 
    267 	mutex_enter(&ipsa->ipsa_lock);
    268 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
    269 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
    270 	mutex_exit(&ipsa->ipsa_lock);
    271 
    272 	/* bzero() these fields for paranoia's sake. */
    273 	if (ipsa->ipsa_authkey != NULL) {
    274 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
    275 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
    276 	}
    277 	if (ipsa->ipsa_encrkey != NULL) {
    278 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
    279 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
    280 	}
    281 	if (ipsa->ipsa_src_cid != NULL) {
    282 		IPSID_REFRELE(ipsa->ipsa_src_cid);
    283 	}
    284 	if (ipsa->ipsa_dst_cid != NULL) {
    285 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
    286 	}
    287 	if (ipsa->ipsa_integ != NULL)
    288 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
    289 	if (ipsa->ipsa_sens != NULL)
    290 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
    291 
    292 	mutex_destroy(&ipsa->ipsa_lock);
    293 	kmem_free(ipsa, sizeof (*ipsa));
    294 }
    295 
    296 /*
    297  * Unlink a security association from a hash bucket.  Assume the hash bucket
    298  * lock is held, but the association's lock is not.
    299  *
    300  * Note that we do not bump the bucket's generation number here because
    301  * we might not be making a visible change to the set of visible SA's.
    302  * All callers MUST bump the bucket's generation number before they unlock
    303  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
    304  * was present in the bucket at the time it was locked.
    305  */
    306 void
    307 sadb_unlinkassoc(ipsa_t *ipsa)
    308 {
    309 	ASSERT(ipsa->ipsa_linklock != NULL);
    310 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
    311 
    312 	/* These fields are protected by the link lock. */
    313 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
    314 	if (ipsa->ipsa_next != NULL) {
    315 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
    316 		ipsa->ipsa_next = NULL;
    317 	}
    318 
    319 	ipsa->ipsa_ptpn = NULL;
    320 
    321 	/* This may destroy the SA. */
    322 	IPSA_REFRELE(ipsa);
    323 }
    324 
    325 /*
    326  * Create a larval security association with the specified SPI.	 All other
    327  * fields are zeroed.
    328  */
    329 static ipsa_t *
    330 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
    331     netstack_t *ns)
    332 {
    333 	ipsa_t *newbie;
    334 
    335 	/*
    336 	 * Allocate...
    337 	 */
    338 
    339 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
    340 	if (newbie == NULL) {
    341 		/* Can't make new larval SA. */
    342 		return (NULL);
    343 	}
    344 
    345 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
    346 	newbie->ipsa_spi = spi;
    347 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
    348 
    349 	/*
    350 	 * Copy addresses...
    351 	 */
    352 
    353 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
    354 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
    355 
    356 	newbie->ipsa_addrfam = addrfam;
    357 
    358 	/*
    359 	 * Set common initialization values, including refcnt.
    360 	 */
    361 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
    362 	newbie->ipsa_state = IPSA_STATE_LARVAL;
    363 	newbie->ipsa_refcnt = 1;
    364 	newbie->ipsa_freefunc = sadb_freeassoc;
    365 
    366 	/*
    367 	 * There aren't a lot of other common initialization values, as
    368 	 * they are copied in from the PF_KEY message.
    369 	 */
    370 
    371 	return (newbie);
    372 }
    373 
    374 /*
    375  * Call me to initialize a security association fanout.
    376  */
    377 static int
    378 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
    379 {
    380 	isaf_t *table;
    381 	int i;
    382 
    383 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
    384 	*tablep = table;
    385 
    386 	if (table == NULL)
    387 		return (ENOMEM);
    388 
    389 	for (i = 0; i < size; i++) {
    390 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
    391 		table[i].isaf_ipsa = NULL;
    392 		table[i].isaf_gen = 0;
    393 	}
    394 
    395 	return (0);
    396 }
    397 
    398 /*
    399  * Call me to initialize an acquire fanout
    400  */
    401 static int
    402 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
    403 {
    404 	iacqf_t *table;
    405 	int i;
    406 
    407 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
    408 	*tablep = table;
    409 
    410 	if (table == NULL)
    411 		return (ENOMEM);
    412 
    413 	for (i = 0; i < size; i++) {
    414 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
    415 		table[i].iacqf_ipsacq = NULL;
    416 	}
    417 
    418 	return (0);
    419 }
    420 
    421 /*
    422  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
    423  * caller must clean up partial allocations.
    424  */
    425 static int
    426 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
    427 {
    428 	ASSERT(sp->sdb_of == NULL);
    429 	ASSERT(sp->sdb_if == NULL);
    430 	ASSERT(sp->sdb_acq == NULL);
    431 
    432 	sp->sdb_hashsize = size;
    433 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
    434 		return (ENOMEM);
    435 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
    436 		return (ENOMEM);
    437 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
    438 		return (ENOMEM);
    439 
    440 	return (0);
    441 }
    442 
    443 /*
    444  * Call me to initialize an SADB instance; fall back to default size on failure.
    445  */
    446 static void
    447 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
    448     netstack_t *ns)
    449 {
    450 	ASSERT(sp->sdb_of == NULL);
    451 	ASSERT(sp->sdb_if == NULL);
    452 	ASSERT(sp->sdb_acq == NULL);
    453 
    454 	if (size < IPSEC_DEFAULT_HASH_SIZE)
    455 		size = IPSEC_DEFAULT_HASH_SIZE;
    456 
    457 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
    458 
    459 		cmn_err(CE_WARN,
    460 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
    461 		    size, ver, name);
    462 
    463 		sadb_destroy(sp, ns);
    464 		size = IPSEC_DEFAULT_HASH_SIZE;
    465 		cmn_err(CE_WARN, "Falling back to %d entries", size);
    466 		(void) sadb_init_trial(sp, size, KM_SLEEP);
    467 	}
    468 }
    469 
    470 
    471 /*
    472  * Initialize an SADB-pair.
    473  */
    474 void
    475 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
    476 {
    477 	sadb_init(name, &sp->s_v4, size, 4, ns);
    478 	sadb_init(name, &sp->s_v6, size, 6, ns);
    479 
    480 	sp->s_satype = type;
    481 
    482 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
    483 	if (type == SADB_SATYPE_AH) {
    484 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
    485 
    486 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
    487 	}
    488 }
    489 
    490 /*
    491  * Deliver a single SADB_DUMP message representing a single SA.  This is
    492  * called many times by sadb_dump().
    493  *
    494  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
    495  * the caller should take that as a hint that dupb() on the "original answer"
    496  * failed, and that perhaps the caller should try again with a copyb()ed
    497  * "original answer".
    498  */
    499 static int
    500 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
    501     sadb_msg_t *samsg)
    502 {
    503 	mblk_t *answer;
    504 
    505 	answer = dupb(original_answer);
    506 	if (answer == NULL)
    507 		return (ENOBUFS);
    508 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
    509 	if (answer->b_cont == NULL) {
    510 		freeb(answer);
    511 		return (ENOMEM);
    512 	}
    513 
    514 	/* Just do a putnext, and let keysock deal with flow control. */
    515 	putnext(pfkey_q, answer);
    516 	return (0);
    517 }
    518 
    519 /*
    520  * Common function to allocate and prepare a keysock_out_t M_CTL message.
    521  */
    522 mblk_t *
    523 sadb_keysock_out(minor_t serial)
    524 {
    525 	mblk_t *mp;
    526 	keysock_out_t *kso;
    527 
    528 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
    529 	if (mp != NULL) {
    530 		mp->b_datap->db_type = M_CTL;
    531 		mp->b_wptr += sizeof (ipsec_info_t);
    532 		kso = (keysock_out_t *)mp->b_rptr;
    533 		kso->ks_out_type = KEYSOCK_OUT;
    534 		kso->ks_out_len = sizeof (*kso);
    535 		kso->ks_out_serial = serial;
    536 	}
    537 
    538 	return (mp);
    539 }
    540 
    541 /*
    542  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
    543  * to keysock.
    544  */
    545 static int
    546 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
    547     int num_entries, boolean_t do_peers)
    548 {
    549 	int i, error = 0;
    550 	mblk_t *original_answer;
    551 	ipsa_t *walker;
    552 	sadb_msg_t *samsg;
    553 
    554 	/*
    555 	 * For each IPSA hash bucket do:
    556 	 *	- Hold the mutex
    557 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
    558 	 */
    559 	ASSERT(mp->b_cont != NULL);
    560 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
    561 
    562 	original_answer = sadb_keysock_out(serial);
    563 	if (original_answer == NULL)
    564 		return (ENOMEM);
    565 
    566 	for (i = 0; i < num_entries; i++) {
    567 		mutex_enter(&fanout[i].isaf_lock);
    568 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
    569 		    walker = walker->ipsa_next) {
    570 			if (!do_peers && walker->ipsa_haspeer)
    571 				continue;
    572 			error = sadb_dump_deliver(pfkey_q, original_answer,
    573 			    walker, samsg);
    574 			if (error == ENOBUFS) {
    575 				mblk_t *new_original_answer;
    576 
    577 				/* Ran out of dupb's.  Try a copyb. */
    578 				new_original_answer = copyb(original_answer);
    579 				if (new_original_answer == NULL) {
    580 					error = ENOMEM;
    581 				} else {
    582 					freeb(original_answer);
    583 					original_answer = new_original_answer;
    584 					error = sadb_dump_deliver(pfkey_q,
    585 					    original_answer, walker, samsg);
    586 				}
    587 			}
    588 			if (error != 0)
    589 				break;	/* out of for loop. */
    590 		}
    591 		mutex_exit(&fanout[i].isaf_lock);
    592 		if (error != 0)
    593 			break;	/* out of for loop. */
    594 	}
    595 
    596 	freeb(original_answer);
    597 	return (error);
    598 }
    599 
    600 /*
    601  * Dump an entire SADB; outbound first, then inbound.
    602  */
    603 
    604 int
    605 sadb_dump(queue_t *pfkey_q, mblk_t *mp, minor_t serial, sadb_t *sp)
    606 {
    607 	int error;
    608 
    609 	/* Dump outbound */
    610 	error = sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_of,
    611 	    sp->sdb_hashsize, B_TRUE);
    612 	if (error)
    613 		return (error);
    614 
    615 	/* Dump inbound */
    616 	return sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_if,
    617 	    sp->sdb_hashsize, B_FALSE);
    618 }
    619 
    620 /*
    621  * Generic sadb table walker.
    622  *
    623  * Call "walkfn" for each SA in each bucket in "table"; pass the
    624  * bucket, the entry and "cookie" to the callback function.
    625  * Take care to ensure that walkfn can delete the SA without screwing
    626  * up our traverse.
    627  *
    628  * The bucket is locked for the duration of the callback, both so that the
    629  * callback can just call sadb_unlinkassoc() when it wants to delete something,
    630  * and so that no new entries are added while we're walking the list.
    631  */
    632 static void
    633 sadb_walker(isaf_t *table, uint_t numentries,
    634     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
    635     void *cookie)
    636 {
    637 	int i;
    638 	for (i = 0; i < numentries; i++) {
    639 		ipsa_t *entry, *next;
    640 
    641 		mutex_enter(&table[i].isaf_lock);
    642 
    643 		for (entry = table[i].isaf_ipsa; entry != NULL;
    644 		    entry = next) {
    645 			next = entry->ipsa_next;
    646 			(*walkfn)(&table[i], entry, cookie);
    647 		}
    648 		mutex_exit(&table[i].isaf_lock);
    649 	}
    650 }
    651 
    652 /*
    653  * From the given SA, construct a dl_ct_ipsec_key and
    654  * a dl_ct_ipsec structures to be sent to the adapter as part
    655  * of a DL_CONTROL_REQ.
    656  *
    657  * ct_sa must point to the storage allocated for the key
    658  * structure and must be followed by storage allocated
    659  * for the SA information that must be sent to the driver
    660  * as part of the DL_CONTROL_REQ request.
    661  *
    662  * The is_inbound boolean indicates whether the specified
    663  * SA is part of an inbound SA table.
    664  *
    665  * Returns B_TRUE if the corresponding SA must be passed to
    666  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
    667  */
    668 static boolean_t
    669 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
    670 {
    671 	dl_ct_ipsec_key_t *keyp;
    672 	dl_ct_ipsec_t *sap;
    673 	void *ct_sa = mp->b_wptr;
    674 
    675 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
    676 
    677 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
    678 	sap = (dl_ct_ipsec_t *)(keyp + 1);
    679 
    680 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
    681 	    "is_inbound = %d\n", is_inbound));
    682 
    683 	/* initialize flag */
    684 	sap->sadb_sa_flags = 0;
    685 	if (is_inbound) {
    686 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
    687 		/*
    688 		 * If an inbound SA has a peer, then mark it has being
    689 		 * an outbound SA as well.
    690 		 */
    691 		if (sa->ipsa_haspeer)
    692 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
    693 	} else {
    694 		/*
    695 		 * If an outbound SA has a peer, then don't send it,
    696 		 * since we will send the copy from the inbound table.
    697 		 */
    698 		if (sa->ipsa_haspeer) {
    699 			freemsg(mp);
    700 			return (B_FALSE);
    701 		}
    702 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
    703 	}
    704 
    705 	keyp->dl_key_spi = sa->ipsa_spi;
    706 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
    707 	    DL_CTL_IPSEC_ADDR_LEN);
    708 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
    709 
    710 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
    711 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
    712 
    713 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
    714 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
    715 	bcopy(sa->ipsa_authkey,
    716 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
    717 
    718 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
    719 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
    720 	bcopy(sa->ipsa_encrkey,
    721 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
    722 
    723 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
    724 	return (B_TRUE);
    725 }
    726 
    727 /*
    728  * Called from AH or ESP to format a message which will be used to inform
    729  * IPsec-acceleration-capable ills of a SADB change.
    730  * (It is not possible to send the message to IP directly from this function
    731  * since the SA, if any, is locked during the call).
    732  *
    733  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
    734  * sa_type: identifies whether the operation applies to AH or ESP
    735  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
    736  * sa: Pointer to an SA.  Must be non-NULL and locked
    737  *	for ADD, DELETE, GET, and UPDATE operations.
    738  * This function returns an mblk chain that must be passed to IP
    739  * for forwarding to the IPsec capable providers.
    740  */
    741 mblk_t *
    742 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
    743     boolean_t is_inbound)
    744 {
    745 	mblk_t *mp;
    746 	dl_control_req_t *ctrl;
    747 	boolean_t need_key = B_FALSE;
    748 	mblk_t *ctl_mp = NULL;
    749 	ipsec_ctl_t *ctl;
    750 
    751 	/*
    752 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
    753 	 * 2 if a key is needed for the operation
    754 	 *    2.1 initialize key
    755 	 *    2.2 if a full SA is needed for the operation
    756 	 *	2.2.1 initialize full SA info
    757 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
    758 	 * to send the resulting message to IPsec capable ills.
    759 	 */
    760 
    761 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
    762 
    763 	/*
    764 	 * Allocate DL_CONTROL_REQ M_PROTO
    765 	 * We allocate room for the SA even if it's not needed
    766 	 * by some of the operations (for example flush)
    767 	 */
    768 	mp = allocb(sizeof (dl_control_req_t) +
    769 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
    770 	if (mp == NULL)
    771 		return (NULL);
    772 	mp->b_datap->db_type = M_PROTO;
    773 
    774 	/* initialize dl_control_req_t */
    775 	ctrl = (dl_control_req_t *)mp->b_wptr;
    776 	ctrl->dl_primitive = DL_CONTROL_REQ;
    777 	ctrl->dl_operation = dl_operation;
    778 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
    779 	    DL_CT_IPSEC_ESP;
    780 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
    781 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
    782 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
    783 	    sizeof (dl_ct_ipsec_key_t);
    784 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
    785 	mp->b_wptr += sizeof (dl_control_req_t);
    786 
    787 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
    788 		ASSERT(sa != NULL);
    789 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
    790 
    791 		need_key = B_TRUE;
    792 
    793 		/*
    794 		 * Initialize key and SA data. Note that for some
    795 		 * operations the SA data is ignored by the provider
    796 		 * (delete, etc.)
    797 		 */
    798 		if (!sadb_req_from_sa(sa, mp, is_inbound))
    799 			return (NULL);
    800 	}
    801 
    802 	/* construct control message */
    803 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
    804 	if (ctl_mp == NULL) {
    805 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
    806 		freemsg(mp);
    807 		return (NULL);
    808 	}
    809 
    810 	ctl_mp->b_datap->db_type = M_CTL;
    811 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
    812 	ctl_mp->b_cont = mp;
    813 
    814 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
    815 	ctl->ipsec_ctl_type = IPSEC_CTL;
    816 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
    817 	ctl->ipsec_ctl_sa_type = sa_type;
    818 
    819 	if (need_key) {
    820 		/*
    821 		 * Keep an additional reference on SA, since it will be
    822 		 * needed by IP to send control messages corresponding
    823 		 * to that SA from its perimeter. IP will do a
    824 		 * IPSA_REFRELE when done with the request.
    825 		 */
    826 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
    827 		IPSA_REFHOLD(sa);
    828 		ctl->ipsec_ctl_sa = sa;
    829 	} else
    830 		ctl->ipsec_ctl_sa = NULL;
    831 
    832 	return (ctl_mp);
    833 }
    834 
    835 
    836 /*
    837  * Called by sadb_ill_download() to dump the entries for a specific
    838  * fanout table.  For each SA entry in the table passed as argument,
    839  * use mp as a template and constructs a full DL_CONTROL message, and
    840  * call ill_dlpi_send(), provided by IP, to send the resulting
    841  * messages to the ill.
    842  */
    843 static void
    844 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
    845     boolean_t is_inbound)
    846 {
    847 	ipsa_t *walker;
    848 	mblk_t *nmp, *salist;
    849 	int i, error = 0;
    850 	ip_stack_t	*ipst = ill->ill_ipst;
    851 	netstack_t	*ns = ipst->ips_netstack;
    852 
    853 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
    854 	    (void *)fanout, num_entries));
    855 	/*
    856 	 * For each IPSA hash bucket do:
    857 	 *	- Hold the mutex
    858 	 *	- Walk each entry, sending a corresponding request to IP
    859 	 *	  for it.
    860 	 */
    861 	ASSERT(mp->b_datap->db_type == M_PROTO);
    862 
    863 	for (i = 0; i < num_entries; i++) {
    864 		mutex_enter(&fanout[i].isaf_lock);
    865 		salist = NULL;
    866 
    867 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
    868 		    walker = walker->ipsa_next) {
    869 			IPSECHW_DEBUG(IPSECHW_SADB,
    870 			    ("sadb_ill_df: sending SA to ill via IP \n"));
    871 			/*
    872 			 * Duplicate the template mp passed and
    873 			 * complete DL_CONTROL_REQ data.
    874 			 * To be more memory efficient, we could use
    875 			 * dupb() for the M_CTL and copyb() for the M_PROTO
    876 			 * as the M_CTL, since the M_CTL is the same for
    877 			 * every SA entry passed down to IP for the same ill.
    878 			 *
    879 			 * Note that copymsg/copyb ensure that the new mblk
    880 			 * is at least as large as the source mblk even if it's
    881 			 * not using all its storage -- therefore, nmp
    882 			 * has trailing space for sadb_req_from_sa to add
    883 			 * the SA-specific bits.
    884 			 */
    885 			mutex_enter(&walker->ipsa_lock);
    886 			if (ipsec_capab_match(ill,
    887 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
    888 			    walker, ns)) {
    889 				nmp = copymsg(mp);
    890 				if (nmp == NULL) {
    891 					IPSECHW_DEBUG(IPSECHW_SADB,
    892 					    ("sadb_ill_df: alloc error\n"));
    893 					error = ENOMEM;
    894 					mutex_exit(&walker->ipsa_lock);
    895 					break;
    896 				}
    897 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
    898 					nmp->b_next = salist;
    899 					salist = nmp;
    900 				}
    901 			}
    902 			mutex_exit(&walker->ipsa_lock);
    903 		}
    904 		mutex_exit(&fanout[i].isaf_lock);
    905 		while (salist != NULL) {
    906 			nmp = salist;
    907 			salist = nmp->b_next;
    908 			nmp->b_next = NULL;
    909 			ill_dlpi_send(ill, nmp);
    910 		}
    911 		if (error != 0)
    912 			break;	/* out of for loop. */
    913 	}
    914 }
    915 
    916 /*
    917  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
    918  * the type specified by sa_type to the specified ill.
    919  *
    920  * We call for each fanout table defined by the SADB (one per
    921  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
    922  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
    923  * message to the ill.
    924  */
    925 void
    926 sadb_ill_download(ill_t *ill, uint_t sa_type)
    927 {
    928 	mblk_t *protomp;	/* prototype message */
    929 	dl_control_req_t *ctrl;
    930 	sadbp_t *spp;
    931 	sadb_t *sp;
    932 	int dlt;
    933 	ip_stack_t	*ipst = ill->ill_ipst;
    934 	netstack_t	*ns = ipst->ips_netstack;
    935 
    936 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
    937 
    938 	/*
    939 	 * Allocate and initialize prototype answer. A duplicate for
    940 	 * each SA is sent down to the interface.
    941 	 */
    942 
    943 	/* DL_CONTROL_REQ M_PROTO mblk_t */
    944 	protomp = allocb(sizeof (dl_control_req_t) +
    945 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
    946 	if (protomp == NULL)
    947 		return;
    948 	protomp->b_datap->db_type = M_PROTO;
    949 
    950 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
    951 	if (sa_type == SADB_SATYPE_ESP) {
    952 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
    953 
    954 		spp = &espstack->esp_sadb;
    955 	} else {
    956 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
    957 
    958 		spp = &ahstack->ah_sadb;
    959 	}
    960 
    961 	ctrl = (dl_control_req_t *)protomp->b_wptr;
    962 	ctrl->dl_primitive = DL_CONTROL_REQ;
    963 	ctrl->dl_operation = DL_CO_SET;
    964 	ctrl->dl_type = dlt;
    965 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
    966 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
    967 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
    968 	    sizeof (dl_ct_ipsec_key_t);
    969 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
    970 	protomp->b_wptr += sizeof (dl_control_req_t);
    971 
    972 	/*
    973 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
    974 	 * and dl_ct_ipsec_t
    975 	 */
    976 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
    977 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
    978 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
    979 	freemsg(protomp);
    980 }
    981 
    982 /*
    983  * Call me to free up a security association fanout.  Use the forever
    984  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
    985  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
    986  * when a module is unloaded).
    987  */
    988 static void
    989 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever)
    990 {
    991 	int i;
    992 	isaf_t *table = *tablep;
    993 
    994 	if (table == NULL)
    995 		return;
    996 
    997 	for (i = 0; i < numentries; i++) {
    998 		mutex_enter(&table[i].isaf_lock);
    999 		while (table[i].isaf_ipsa != NULL)
   1000 			sadb_unlinkassoc(table[i].isaf_ipsa);
   1001 		table[i].isaf_gen++;
   1002 		mutex_exit(&table[i].isaf_lock);
   1003 		if (forever)
   1004 			mutex_destroy(&(table[i].isaf_lock));
   1005 	}
   1006 
   1007 	if (forever) {
   1008 		*tablep = NULL;
   1009 		kmem_free(table, numentries * sizeof (*table));
   1010 	}
   1011 }
   1012 
   1013 /*
   1014  * Entry points to sadb_destroyer().
   1015  */
   1016 static void
   1017 sadb_flush(sadb_t *sp, netstack_t *ns)
   1018 {
   1019 	/*
   1020 	 * Flush out each bucket, one at a time.  Were it not for keysock's
   1021 	 * enforcement, there would be a subtlety where I could add on the
   1022 	 * heels of a flush.  With keysock's enforcement, however, this
   1023 	 * makes ESP's job easy.
   1024 	 */
   1025 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE);
   1026 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE);
   1027 
   1028 	/* For each acquire, destroy it; leave the bucket mutex alone. */
   1029 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
   1030 }
   1031 
   1032 static void
   1033 sadb_destroy(sadb_t *sp, netstack_t *ns)
   1034 {
   1035 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE);
   1036 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE);
   1037 
   1038 	/* For each acquire, destroy it, including the bucket mutex. */
   1039 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
   1040 
   1041 	ASSERT(sp->sdb_of == NULL);
   1042 	ASSERT(sp->sdb_if == NULL);
   1043 	ASSERT(sp->sdb_acq == NULL);
   1044 }
   1045 
   1046 static void
   1047 sadb_send_flush_req(sadbp_t *spp)
   1048 {
   1049 	mblk_t *ctl_mp;
   1050 
   1051 	/*
   1052 	 * we've been unplumbed, or never were plumbed; don't go there.
   1053 	 */
   1054 	if (spp->s_ip_q == NULL)
   1055 		return;
   1056 
   1057 	/* have IP send a flush msg to the IPsec accelerators */
   1058 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp