Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/stream.h>
     28 #include <sys/stropts.h>
     29 #include <sys/errno.h>
     30 #include <sys/strlog.h>
     31 #include <sys/tihdr.h>
     32 #include <sys/socket.h>
     33 #include <sys/ddi.h>
     34 #include <sys/sunddi.h>
     35 #include <sys/mkdev.h>
     36 #include <sys/kmem.h>
     37 #include <sys/zone.h>
     38 #include <sys/sysmacros.h>
     39 #include <sys/cmn_err.h>
     40 #include <sys/vtrace.h>
     41 #include <sys/debug.h>
     42 #include <sys/atomic.h>
     43 #include <sys/strsun.h>
     44 #include <sys/random.h>
     45 #include <netinet/in.h>
     46 #include <net/if.h>
     47 #include <netinet/ip6.h>
     48 #include <netinet/icmp6.h>
     49 #include <net/pfkeyv2.h>
     50 
     51 #include <inet/common.h>
     52 #include <inet/mi.h>
     53 #include <inet/ip.h>
     54 #include <inet/ip6.h>
     55 #include <inet/nd.h>
     56 #include <inet/ipsec_info.h>
     57 #include <inet/ipsec_impl.h>
     58 #include <inet/sadb.h>
     59 #include <inet/ipsecah.h>
     60 #include <inet/ipsec_impl.h>
     61 #include <inet/ipdrop.h>
     62 #include <sys/taskq.h>
     63 #include <sys/policy.h>
     64 #include <sys/iphada.h>
     65 #include <sys/strsun.h>
     66 
     67 #include <sys/crypto/common.h>
     68 #include <sys/crypto/api.h>
     69 #include <sys/kstat.h>
     70 #include <sys/strsubr.h>
     71 
     72 /*
     73  * Table of ND variables supported by ipsecah. These are loaded into
     74  * ipsecah_g_nd in ipsecah_init_nd.
     75  * All of these are alterable, within the min/max values given, at run time.
     76  */
     77 static	ipsecahparam_t	lcl_param_arr[] = {
     78 	/* min	max			value	name */
     79 	{ 0,	3,			0,	"ipsecah_debug"},
     80 	{ 125,	32000, SADB_AGE_INTERVAL_DEFAULT,	"ipsecah_age_interval"},
     81 	{ 1,	10,			1,	"ipsecah_reap_delay"},
     82 	{ 1,	SADB_MAX_REPLAY,	64,	"ipsecah_replay_size"},
     83 	{ 1,	300,			15,	"ipsecah_acquire_timeout"},
     84 	{ 1,	1800,			90,	"ipsecah_larval_timeout"},
     85 	/* Default lifetime values for ACQUIRE messages. */
     86 	{ 0,	0xffffffffU,		0,	"ipsecah_default_soft_bytes"},
     87 	{ 0,	0xffffffffU,		0,	"ipsecah_default_hard_bytes"},
     88 	{ 0,	0xffffffffU,		24000,	"ipsecah_default_soft_addtime"},
     89 	{ 0,	0xffffffffU,		28800,	"ipsecah_default_hard_addtime"},
     90 	{ 0,	0xffffffffU,		0,	"ipsecah_default_soft_usetime"},
     91 	{ 0,	0xffffffffU,		0,	"ipsecah_default_hard_usetime"},
     92 	{ 0,	1,			0,	"ipsecah_log_unknown_spi"},
     93 };
     94 #define	ipsecah_debug			ipsecah_params[0].ipsecah_param_value
     95 #define	ipsecah_age_interval		ipsecah_params[1].ipsecah_param_value
     96 #define	ipsecah_age_int_max		ipsecah_params[1].ipsecah_param_max
     97 #define	ipsecah_reap_delay		ipsecah_params[2].ipsecah_param_value
     98 #define	ipsecah_replay_size		ipsecah_params[3].ipsecah_param_value
     99 #define	ipsecah_acquire_timeout		ipsecah_params[4].ipsecah_param_value
    100 #define	ipsecah_larval_timeout		ipsecah_params[5].ipsecah_param_value
    101 #define	ipsecah_default_soft_bytes	ipsecah_params[6].ipsecah_param_value
    102 #define	ipsecah_default_hard_bytes	ipsecah_params[7].ipsecah_param_value
    103 #define	ipsecah_default_soft_addtime	ipsecah_params[8].ipsecah_param_value
    104 #define	ipsecah_default_hard_addtime	ipsecah_params[9].ipsecah_param_value
    105 #define	ipsecah_default_soft_usetime	ipsecah_params[10].ipsecah_param_value
    106 #define	ipsecah_default_hard_usetime	ipsecah_params[11].ipsecah_param_value
    107 #define	ipsecah_log_unknown_spi		ipsecah_params[12].ipsecah_param_value
    108 
    109 #define	ah0dbg(a)	printf a
    110 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
    111 #define	ah1dbg(ahstack, a)	if (ahstack->ipsecah_debug != 0) printf a
    112 #define	ah2dbg(ahstack, a)	if (ahstack->ipsecah_debug > 1) printf a
    113 #define	ah3dbg(ahstack, a)	if (ahstack->ipsecah_debug > 2) printf a
    114 
    115 /*
    116  * XXX This is broken. Padding should be determined dynamically
    117  * depending on the ICV size and IP version number so that the
    118  * total AH header size is a multiple of 32 bits or 64 bits
    119  * for V4 and V6 respectively. For 96bit ICVs we have no problems.
    120  * Anything different from that, we need to fix our code.
    121  */
    122 #define	IPV4_PADDING_ALIGN	0x04	/* Multiple of 32 bits */
    123 #define	IPV6_PADDING_ALIGN	0x04	/* Multiple of 32 bits */
    124 
    125 /*
    126  * Helper macro. Avoids a call to msgdsize if there is only one
    127  * mblk in the chain.
    128  */
    129 #define	AH_MSGSIZE(mp) ((mp)->b_cont != NULL ? msgdsize(mp) : MBLKL(mp))
    130 
    131 
    132 static ipsec_status_t ah_auth_out_done(mblk_t *);
    133 static ipsec_status_t ah_auth_in_done(mblk_t *);
    134 static mblk_t *ah_process_ip_options_v4(mblk_t *, ipsa_t *, int *, uint_t,
    135     boolean_t, ipsecah_stack_t *);
    136 static mblk_t *ah_process_ip_options_v6(mblk_t *, ipsa_t *, int *, uint_t,
    137     boolean_t, ipsecah_stack_t *);
    138 static void ah_getspi(mblk_t *, keysock_in_t *, ipsecah_stack_t *);
    139 static ipsec_status_t ah_inbound_accelerated(mblk_t *, boolean_t, ipsa_t *,
    140     uint32_t);
    141 static ipsec_status_t ah_outbound_accelerated_v4(mblk_t *, ipsa_t *);
    142 static ipsec_status_t ah_outbound_accelerated_v6(mblk_t *, ipsa_t *);
    143 static ipsec_status_t ah_outbound(mblk_t *);
    144 
    145 static int ipsecah_open(queue_t *, dev_t *, int, int, cred_t *);
    146 static int ipsecah_close(queue_t *);
    147 static void ipsecah_rput(queue_t *, mblk_t *);
    148 static void ipsecah_wput(queue_t *, mblk_t *);
    149 static void ah_send_acquire(ipsacq_t *, mblk_t *, netstack_t *);
    150 static boolean_t ah_register_out(uint32_t, uint32_t, uint_t, ipsecah_stack_t *);
    151 static void	*ipsecah_stack_init(netstackid_t stackid, netstack_t *ns);
    152 static void	ipsecah_stack_fini(netstackid_t stackid, void *arg);
    153 
    154 extern void (*cl_inet_getspi)(uint8_t, uint8_t *, size_t);
    155 
    156 /* Setable in /etc/system */
    157 uint32_t ah_hash_size = IPSEC_DEFAULT_HASH_SIZE;
    158 
    159 static taskq_t *ah_taskq;
    160 
    161 static struct module_info info = {
    162 	5136, "ipsecah", 0, INFPSZ, 65536, 1024
    163 };
    164 
    165 static struct qinit rinit = {
    166 	(pfi_t)ipsecah_rput, NULL, ipsecah_open, ipsecah_close, NULL, &info,
    167 	NULL
    168 };
    169 
    170 static struct qinit winit = {
    171 	(pfi_t)ipsecah_wput, NULL, ipsecah_open, ipsecah_close, NULL, &info,
    172 	NULL
    173 };
    174 
    175 struct streamtab ipsecahinfo = {
    176 	&rinit, &winit, NULL, NULL
    177 };
    178 
    179 static int ah_kstat_update(kstat_t *, int);
    180 
    181 uint64_t ipsacq_maxpackets = IPSACQ_MAXPACKETS;
    182 
    183 static boolean_t
    184 ah_kstat_init(ipsecah_stack_t *ahstack, netstackid_t stackid)
    185 {
    186 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
    187 
    188 	ahstack->ah_ksp = kstat_create_netstack("ipsecah", 0, "ah_stat", "net",
    189 	    KSTAT_TYPE_NAMED, sizeof (ah_kstats_t) / sizeof (kstat_named_t),
    190 	    KSTAT_FLAG_PERSISTENT, stackid);
    191 
    192 	if (ahstack->ah_ksp == NULL || ahstack->ah_ksp->ks_data == NULL)
    193 		return (B_FALSE);
    194 
    195 	ahstack->ah_kstats = ahstack->ah_ksp->ks_data;
    196 
    197 	ahstack->ah_ksp->ks_update = ah_kstat_update;
    198 	ahstack->ah_ksp->ks_private = (void *)(uintptr_t)stackid;
    199 
    200 #define	K64 KSTAT_DATA_UINT64
    201 #define	KI(x) kstat_named_init(&(ahstack->ah_kstats->ah_stat_##x), #x, K64)
    202 
    203 	KI(num_aalgs);
    204 	KI(good_auth);
    205 	KI(bad_auth);
    206 	KI(replay_failures);
    207 	KI(replay_early_failures);
    208 	KI(keysock_in);
    209 	KI(out_requests);
    210 	KI(acquire_requests);
    211 	KI(bytes_expired);
    212 	KI(out_discards);
    213 	KI(in_accelerated);
    214 	KI(out_accelerated);
    215 	KI(noaccel);
    216 	KI(crypto_sync);
    217 	KI(crypto_async);
    218 	KI(crypto_failures);
    219 
    220 #undef KI
    221 #undef K64
    222 
    223 	kstat_install(ahstack->ah_ksp);
    224 	IP_ACQUIRE_STAT(ipss, maxpackets, ipsacq_maxpackets);
    225 	return (B_TRUE);
    226 }
    227 
    228 static int
    229 ah_kstat_update(kstat_t *kp, int rw)
    230 {
    231 	ah_kstats_t	*ekp;
    232 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
    233 	netstack_t	*ns;
    234 	ipsec_stack_t	*ipss;
    235 
    236 	if ((kp == NULL) || (kp->ks_data == NULL))
    237 		return (EIO);
    238 
    239 	if (rw == KSTAT_WRITE)
    240 		return (EACCES);
    241 
    242 	ns = netstack_find_by_stackid(stackid);
    243 	if (ns == NULL)
    244 		return (-1);
    245 	ipss = ns->netstack_ipsec;
    246 	if (ipss == NULL) {
    247 		netstack_rele(ns);
    248 		return (-1);
    249 	}
    250 	ekp = (ah_kstats_t *)kp->ks_data;
    251 
    252 	mutex_enter(&ipss->ipsec_alg_lock);
    253 	ekp->ah_stat_num_aalgs.value.ui64 = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
    254 	mutex_exit(&ipss->ipsec_alg_lock);
    255 
    256 	netstack_rele(ns);
    257 	return (0);
    258 }
    259 
    260 /*
    261  * Don't have to lock ipsec_age_interval, as only one thread will access it at
    262  * a time, because I control the one function that does a qtimeout() on
    263  * ah_pfkey_q.
    264  */
    265 static void
    266 ah_ager(void *arg)
    267 {
    268 	ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
    269 	netstack_t	*ns = ahstack->ipsecah_netstack;
    270 	hrtime_t begin = gethrtime();
    271 
    272 	sadb_ager(&ahstack->ah_sadb.s_v4, ahstack->ah_pfkey_q,
    273 	    ahstack->ah_sadb.s_ip_q, ahstack->ipsecah_reap_delay, ns);
    274 	sadb_ager(&ahstack->ah_sadb.s_v6, ahstack->ah_pfkey_q,
    275 	    ahstack->ah_sadb.s_ip_q, ahstack->ipsecah_reap_delay, ns);
    276 
    277 	ahstack->ah_event = sadb_retimeout(begin, ahstack->ah_pfkey_q,
    278 	    ah_ager, ahstack,
    279 	    &ahstack->ipsecah_age_interval, ahstack->ipsecah_age_int_max,
    280 	    info.mi_idnum);
    281 }
    282 
    283 /*
    284  * Get an AH NDD parameter.
    285  */
    286 /* ARGSUSED */
    287 static int
    288 ipsecah_param_get(q, mp, cp, cr)
    289 	queue_t	*q;
    290 	mblk_t	*mp;
    291 	caddr_t	cp;
    292 	cred_t *cr;
    293 {
    294 	ipsecahparam_t	*ipsecahpa = (ipsecahparam_t *)cp;
    295 	uint_t value;
    296 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
    297 
    298 	mutex_enter(&ahstack->ipsecah_param_lock);
    299 	value = ipsecahpa->ipsecah_param_value;
    300 	mutex_exit(&ahstack->ipsecah_param_lock);
    301 
    302 	(void) mi_mpprintf(mp, "%u", value);
    303 	return (0);
    304 }
    305 
    306 /*
    307  * This routine sets an NDD variable in a ipsecahparam_t structure.
    308  */
    309 /* ARGSUSED */
    310 static int
    311 ipsecah_param_set(q, mp, value, cp, cr)
    312 	queue_t	*q;
    313 	mblk_t	*mp;
    314 	char	*value;
    315 	caddr_t	cp;
    316 	cred_t *cr;
    317 {
    318 	ulong_t	new_value;
    319 	ipsecahparam_t	*ipsecahpa = (ipsecahparam_t *)cp;
    320 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
    321 
    322 	/*
    323 	 * Fail the request if the new value does not lie within the
    324 	 * required bounds.
    325 	 */
    326 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
    327 	    new_value < ipsecahpa->ipsecah_param_min ||
    328 	    new_value > ipsecahpa->ipsecah_param_max) {
    329 		    return (EINVAL);
    330 	}
    331 
    332 	/* Set the new value */
    333 	mutex_enter(&ahstack->ipsecah_param_lock);
    334 	ipsecahpa->ipsecah_param_value = new_value;
    335 	mutex_exit(&ahstack->ipsecah_param_lock);
    336 	return (0);
    337 }
    338 
    339 /*
    340  * Using lifetime NDD variables, fill in an extended combination's
    341  * lifetime information.
    342  */
    343 void
    344 ipsecah_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
    345 {
    346 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
    347 
    348 	ecomb->sadb_x_ecomb_soft_bytes = ahstack->ipsecah_default_soft_bytes;
    349 	ecomb->sadb_x_ecomb_hard_bytes = ahstack->ipsecah_default_hard_bytes;
    350 	ecomb->sadb_x_ecomb_soft_addtime =
    351 	    ahstack->ipsecah_default_soft_addtime;
    352 	ecomb->sadb_x_ecomb_hard_addtime =
    353 	    ahstack->ipsecah_default_hard_addtime;
    354 	ecomb->sadb_x_ecomb_soft_usetime =
    355 	    ahstack->ipsecah_default_soft_usetime;
    356 	ecomb->sadb_x_ecomb_hard_usetime =
    357 	    ahstack->ipsecah_default_hard_usetime;
    358 }
    359 
    360 /*
    361  * Initialize things for AH at module load time.
    362  */
    363 boolean_t
    364 ipsecah_ddi_init(void)
    365 {
    366 	ah_taskq = taskq_create("ah_taskq", 1, minclsyspri,
    367 	    IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
    368 
    369 	/*
    370 	 * We want to be informed each time a stack is created or
    371 	 * destroyed in the kernel, so we can maintain the
    372 	 * set of ipsecah_stack_t's.
    373 	 */
    374 	netstack_register(NS_IPSECAH, ipsecah_stack_init, NULL,
    375 	    ipsecah_stack_fini);
    376 
    377 	return (B_TRUE);
    378 }
    379 
    380 /*
    381  * Walk through the param array specified registering each element with the
    382  * named dispatch handler.
    383  */
    384 static boolean_t
    385 ipsecah_param_register(IDP *ndp, ipsecahparam_t *ahp, int cnt)
    386 {
    387 	for (; cnt-- > 0; ahp++) {
    388 		if (ahp->ipsecah_param_name != NULL &&
    389 		    ahp->ipsecah_param_name[0]) {
    390 			if (!nd_load(ndp,
    391 			    ahp->ipsecah_param_name,
    392 			    ipsecah_param_get, ipsecah_param_set,
    393 			    (caddr_t)ahp)) {
    394 				nd_free(ndp);
    395 				return (B_FALSE);
    396 			}
    397 		}
    398 	}
    399 	return (B_TRUE);
    400 }
    401 
    402 /*
    403  * Initialize things for AH for each stack instance
    404  */
    405 static void *
    406 ipsecah_stack_init(netstackid_t stackid, netstack_t *ns)
    407 {
    408 	ipsecah_stack_t	*ahstack;
    409 	ipsecahparam_t	*ahp;
    410 
    411 	ahstack = (ipsecah_stack_t *)kmem_zalloc(sizeof (*ahstack), KM_SLEEP);
    412 	ahstack->ipsecah_netstack = ns;
    413 
    414 	ahp = (ipsecahparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
    415 	ahstack->ipsecah_params = ahp;
    416 	bcopy(lcl_param_arr, ahp, sizeof (lcl_param_arr));
    417 
    418 	(void) ipsecah_param_register(&ahstack->ipsecah_g_nd, ahp,
    419 	    A_CNT(lcl_param_arr));
    420 
    421 	(void) ah_kstat_init(ahstack, stackid);
    422 
    423 	ahstack->ah_sadb.s_acquire_timeout = &ahstack->ipsecah_acquire_timeout;
    424 	ahstack->ah_sadb.s_acqfn = ah_send_acquire;
    425 	sadbp_init("AH", &ahstack->ah_sadb, SADB_SATYPE_AH, ah_hash_size,
    426 	    ahstack->ipsecah_netstack);
    427 
    428 	mutex_init(&ahstack->ipsecah_param_lock, NULL, MUTEX_DEFAULT, 0);
    429 
    430 	ip_drop_register(&ahstack->ah_dropper, "IPsec AH");
    431 	return (ahstack);
    432 }
    433 
    434 /*
    435  * Destroy things for AH at module unload time.
    436  */
    437 void
    438 ipsecah_ddi_destroy(void)
    439 {
    440 	netstack_unregister(NS_IPSECAH);
    441 	taskq_destroy(ah_taskq);
    442 }
    443 
    444 /*
    445  * Destroy things for AH for one stack... Never called?
    446  */
    447 static void
    448 ipsecah_stack_fini(netstackid_t stackid, void *arg)
    449 {
    450 	ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
    451 
    452 	if (ahstack->ah_pfkey_q != NULL) {
    453 		(void) quntimeout(ahstack->ah_pfkey_q, ahstack->ah_event);
    454 	}
    455 	ahstack->ah_sadb.s_acqfn = NULL;
    456 	ahstack->ah_sadb.s_acquire_timeout = NULL;
    457 	sadbp_destroy(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
    458 	ip_drop_unregister(&ahstack->ah_dropper);
    459 	mutex_destroy(&ahstack->ipsecah_param_lock);
    460 	nd_free(&ahstack->ipsecah_g_nd);
    461 
    462 	kmem_free(ahstack->ipsecah_params, sizeof (lcl_param_arr));
    463 	ahstack->ipsecah_params = NULL;
    464 	kstat_delete_netstack(ahstack->ah_ksp, stackid);
    465 	ahstack->ah_ksp = NULL;
    466 	ahstack->ah_kstats = NULL;
    467 
    468 	kmem_free(ahstack, sizeof (*ahstack));
    469 }
    470 
    471 /*
    472  * AH module open routine. The module should be opened by keysock.
    473  */
    474 /* ARGSUSED */
    475 static int
    476 ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
    477 {
    478 	netstack_t	*ns;
    479 	ipsecah_stack_t	*ahstack;
    480 
    481 	if (secpolicy_ip_config(credp, B_FALSE) != 0)
    482 		return (EPERM);
    483 
    484 	if (q->q_ptr != NULL)
    485 		return (0);  /* Re-open of an already open instance. */
    486 
    487 	if (sflag != MODOPEN)
    488 		return (EINVAL);
    489 
    490 	ns = netstack_find_by_cred(credp);
    491 	ASSERT(ns != NULL);
    492 	ahstack = ns->netstack_ipsecah;
    493 	ASSERT(ahstack != NULL);
    494 
    495 	/*
    496 	 * ASSUMPTIONS (because I'm MT_OCEXCL):
    497 	 *
    498 	 *	* I'm being pushed on top of IP for all my opens (incl. #1).
    499 	 *	* Only ipsecah_open() can write into ah_sadb.s_ip_q.
    500 	 *	* Because of this, I can check lazily for ah_sadb.s_ip_q.
    501 	 *
    502 	 *  If these assumptions are wrong, I'm in BIG trouble...
    503 	 */
    504 
    505 	q->q_ptr = ahstack;
    506 	WR(q)->q_ptr = q->q_ptr;
    507 
    508 	if (ahstack->ah_sadb.s_ip_q == NULL) {
    509 		struct T_unbind_req *tur;
    510 
    511 		ahstack->ah_sadb.s_ip_q = WR(q);
    512 		/* Allocate an unbind... */
    513 		ahstack->ah_ip_unbind = allocb(sizeof (struct T_unbind_req),
    514 		    BPRI_HI);
    515 
    516 		/*
    517 		 * Send down T_BIND_REQ to bind IPPROTO_AH.
    518 		 * Handle the ACK here in AH.
    519 		 */
    520 		qprocson(q);
    521 		if (ahstack->ah_ip_unbind == NULL ||
    522 		    !sadb_t_bind_req(ahstack->ah_sadb.s_ip_q, IPPROTO_AH)) {
    523 			if (ahstack->ah_ip_unbind != NULL) {
    524 				freeb(ahstack->ah_ip_unbind);
    525 				ahstack->ah_ip_unbind = NULL;
    526 			}
    527 			q->q_ptr = NULL;
    528 			qprocsoff(q);
    529 			netstack_rele(ahstack->ipsecah_netstack);
    530 			return (ENOMEM);
    531 		}
    532 
    533 		ahstack->ah_ip_unbind->b_datap->db_type = M_PROTO;
    534 		tur = (struct T_unbind_req *)ahstack->ah_ip_unbind->b_rptr;
    535 		tur->PRIM_type = T_UNBIND_REQ;
    536 	} else {
    537 		qprocson(q);
    538 	}
    539 
    540 	/*
    541 	 * For now, there's not much I can do.  I'll be getting a message
    542 	 * passed down to me from keysock (in my wput), and a T_BIND_ACK
    543 	 * up from IP (in my rput).
    544 	 */
    545 
    546 	return (0);
    547 }
    548 
    549 /*
    550  * AH module close routine.
    551  */
    552 static int
    553 ipsecah_close(queue_t *q)
    554 {
    555 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
    556 
    557 	/*
    558 	 * If ah_sadb.s_ip_q is attached to this instance, send a
    559 	 * T_UNBIND_REQ to IP for the instance before doing
    560 	 * a qprocsoff().
    561 	 */
    562 	if (WR(q) == ahstack->ah_sadb.s_ip_q &&
    563 	    ahstack->ah_ip_unbind != NULL) {
    564 		putnext(WR(q), ahstack->ah_ip_unbind);
    565 		ahstack->ah_ip_unbind = NULL;
    566 	}
    567 
    568 	/*
    569 	 * Clean up q_ptr, if needed.
    570 	 */
    571 	qprocsoff(q);
    572 
    573 	/* Keysock queue check is safe, because of OCEXCL perimeter. */
    574 
    575 	if (q == ahstack->ah_pfkey_q) {
    576 		ah1dbg(ahstack,
    577 		    ("ipsecah_close:  Ummm... keysock is closing AH.\n"));
    578 		ahstack->ah_pfkey_q = NULL;
    579 		/* Detach qtimeouts. */
    580 		(void) quntimeout(q, ahstack->ah_event);
    581 	}
    582 
    583 	if (WR(q) == ahstack->ah_sadb.s_ip_q) {
    584 		/*
    585 		 * If the ah_sadb.s_ip_q is attached to this instance, find
    586 		 * another.  The OCEXCL outer perimeter helps us here.
    587 		 */
    588 
    589 		ahstack->ah_sadb.s_ip_q = NULL;
    590 
    591 		/*
    592 		 * Find a replacement queue for ah_sadb.s_ip_q.
    593 		 */
    594 		if (ahstack->ah_pfkey_q != NULL &&
    595 		    ahstack->ah_pfkey_q != RD(q)) {
    596 			/*
    597 			 * See if we can use the pfkey_q.
    598 			 */
    599 			ahstack->ah_sadb.s_ip_q = WR(ahstack->ah_pfkey_q);
    600 		}
    601 
    602 		if (ahstack->ah_sadb.s_ip_q == NULL ||
    603 		    !sadb_t_bind_req(ahstack->ah_sadb.s_ip_q, IPPROTO_AH)) {
    604 			ah1dbg(ahstack,
    605 			    ("ipsecah: Can't reassign ah_sadb.s_ip_q.\n"));
    606 			ahstack->ah_sadb.s_ip_q = NULL;
    607 		} else {
    608 			ahstack->ah_ip_unbind =
    609 			    allocb(sizeof (struct T_unbind_req), BPRI_HI);
    610 
    611 			if (ahstack->ah_ip_unbind != NULL) {
    612 				struct T_unbind_req *tur;
    613 
    614 				ahstack->ah_ip_unbind->b_datap->db_type =
    615 				    M_PROTO;
    616 				tur = (struct T_unbind_req *)
    617 				    ahstack->ah_ip_unbind->b_rptr;
    618 				tur->PRIM_type = T_UNBIND_REQ;
    619 			}
    620 			/* If it's NULL, I can't do much here. */
    621 		}
    622 	}
    623 
    624 	netstack_rele(ahstack->ipsecah_netstack);
    625 	return (0);
    626 }
    627 
    628 /*
    629  * AH module read put routine.
    630  */
    631 /* ARGSUSED */
    632 static void
    633 ipsecah_rput(queue_t *q, mblk_t *mp)
    634 {
    635 	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
    636 
    637 	ASSERT(mp->b_datap->db_type != M_CTL);	/* No more IRE_DB_REQ. */
    638 
    639 	switch (mp->b_datap->db_type) {
    640 	case M_PROTO:
    641 	case M_PCPROTO:
    642 		/* TPI message of some sort. */
    643 		switch (*((t_scalar_t *)mp->b_rptr)) {
    644 		case T_BIND_ACK:
    645 			/* We expect this. */
    646 			ah3dbg(ahstack,
    647 			    ("Thank you IP from AH for T_BIND_ACK\n"));
    648 			break;
    649 		case T_ERROR_ACK:
    650 			cmn_err(CE_WARN,
    651 			    "ipsecah:  AH received T_ERROR_ACK from IP.");
    652 			break;
    653 		case T_OK_ACK:
    654 			/* Probably from a (rarely sent) T_UNBIND_REQ. */
    655 			break;
    656 		default:
    657 			ah1dbg(ahstack, ("Unknown M_{,PC}PROTO message.\n"));
    658 		}
    659 		freemsg(mp);
    660 		break;
    661 	default:
    662 		/* For now, passthru message. */
    663 		ah2dbg(ahstack, ("AH got unknown mblk type %d.\n",
    664 		    mp->b_datap->db_type));
    665 		putnext(q, mp);
    666 	}
    667 }
    668 
    669 /*
    670  * Construct an SADB_REGISTER message with the current algorithms.
    671  */
    672 static boolean_t
    673 ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
    674     ipsecah_stack_t *ahstack)
    675 {
    676 	mblk_t *mp;
    677 	boolean_t rc = B_TRUE;
    678 	sadb_msg_t *samsg;
    679 	sadb_supported_t *sasupp;
    680 	sadb_alg_t *saalg;
    681 	uint_t allocsize = sizeof (*samsg);
    682 	uint_t i, numalgs_snap;
    683 	ipsec_alginfo_t **authalgs;
    684 	uint_t num_aalgs;
    685 	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
    686 
    687 	/* Allocate the KEYSOCK_OUT. */
    688 	mp = sadb_keysock_out(serial);
    689 	if (mp == NULL) {
    690 		ah0dbg(("ah_register_out: couldn't allocate mblk.\n"));
    691 		return (B_FALSE);
    692 	}
    693 
    694 	/*
    695 	 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
    696 	 * The alg reader lock needs to be held while allocating
    697 	 * the variable part (i.e. the algorithms) of the message.
    698 	 */
    699 
    700 	mutex_enter(&ipss->ipsec_alg_lock);
    701 
    702 	/*
    703 	 * Return only valid algorithms, so the number of algorithms
    704 	 * to send up may be less than the number of algorithm entries
    705 	 * in the table.
    706 	 */
    707 	authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
    708 	for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
    709 		if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
    710 			num_aalgs++;
    711 
    712 	/*
    713 	 * Fill SADB_REGISTER message's algorithm descriptors.  Hold
    714 	 * down the lock while filling it.
    715 	 */
    716 	if (num_aalgs != 0) {
    717 		allocsize += (num_aalgs * sizeof (*saalg));
    718 		allocsize += sizeof (*sasupp);
    719 	}
    720 	mp->b_cont = allocb(allocsize, BPRI_HI);
    721 	if (mp->b_cont == NULL) {
    722 		mutex_exit(&ipss->ipsec_alg_lock);
    723 		freemsg(mp);
    724 		return (B_FALSE);
    725 	}
    726 
    727 	mp->b_cont->b_wptr += allocsize;
    728 	if (num_aalgs != 0) {
    729 
    730 		saalg = (sadb_alg_t *)(mp->b_cont->b_rptr + sizeof (*samsg) +
    731 		    sizeof (*sasupp));
    732 		ASSERT(((ulong_t)saalg & 0x7) == 0);
    733 
    734 		numalgs_snap = 0;
    735 		for (i = 0;
    736 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
    737 		    i++) {
    738 			if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
    739 				continue;
    740 
    741 			saalg->sadb_alg_id = authalgs[i]->alg_id;
    742 			saalg->sadb_alg_ivlen = 0;
    743 			saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits;
    744 			saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits;
    745 			saalg->sadb_x_alg_increment =
    746 			    authalgs[i]->alg_increment;
    747 			saalg->sadb_x_alg_defincr =
    748 			    authalgs[i]->alg_ef_default;
    749 			numalgs_snap++;
    750 			saalg++;
    751 		}
    752 		ASSERT(numalgs_snap == num_aalgs);
    753 #ifdef DEBUG
    754 		/*
    755 		 * Reality check to make sure I snagged all of the
    756 		 * algorithms.
    757 		 */
    758 		for (; i < IPSEC_MAX_ALGS; i++)
    759 			if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
    760 				cmn_err(CE_PANIC,
    761 				    "ah_register_out()!  Missed #%d.\n", i);
    762 #endif /* DEBUG */
    763 	}
    764 
    765 	mutex_exit(&ipss->ipsec_alg_lock);
    766 
    767 	/* Now fill the restof the SADB_REGISTER message. */
    768 
    769 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
    770 	samsg->sadb_msg_version = PF_KEY_V2;
    771 	samsg->sadb_msg_type = SADB_REGISTER;
    772 	samsg->sadb_msg_errno = 0;
    773 	samsg->sadb_msg_satype = SADB_SATYPE_AH;
    774 	samsg->sadb_msg_len = SADB_8TO64(allocsize);
    775 	samsg->sadb_msg_reserved = 0;
    776 	/*
    777 	 * Assume caller has sufficient sequence/pid number info.  If it's one
    778 	 * from me over a new alg., I could give two hoots about sequence.
    779 	 */
    780 	samsg->sadb_msg_seq = sequence;
    781 	samsg->sadb_msg_pid = pid;
    782 
    783 	if (allocsize > sizeof (*samsg)) {
    784 		sasupp = (sadb_supported_t *)(samsg + 1);
    785 		sasupp->sadb_supported_len =
    786 		    SADB_8TO64(allocsize - sizeof (sadb_msg_t));
    787 		sasupp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
    788 		sasupp->sadb_supported_reserved = 0;
    789 	}
    790 
    791 	if (ahstack->ah_pfkey_q != NULL)
    792 		putnext(ahstack->ah_pfkey_q, mp);
    793 	else {
    794 		rc = B_FALSE;
    795 		freemsg(mp);
    796 	}
    797 
    798 	return (rc);
    799 }
    800 
    801 /*
    802  * Invoked when the algorithm table changes. Causes SADB_REGISTER
    803  * messages continaining the current list of algorithms to be
    804  * sent up to the AH listeners.
    805  */
    806 void
    807 ipsecah_algs_changed(netstack_t *ns)
    808 {
    809 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
    810 
    811 	/*
    812 	 * Time to send a PF_KEY SADB_REGISTER message to AH listeners
    813 	 * everywhere.  (The function itself checks for NULL ah_pfkey_q.)
    814 	 */
    815 	(void) ah_register_out(0, 0, 0, ahstack);
    816 }
    817 
    818 /*
    819  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
    820  * and put() it into AH and STREAMS again.
    821  */
    822 static void
    823 inbound_task(void *arg)
    824 {
    825 	ah_t *ah;
    826 	mblk_t *mp = (mblk_t *)arg;
    827 	ipsec_in_t *ii = (ipsec_in_t *)mp->b_rptr;
    828 	int ipsec_rc;
    829 	netstack_t	*ns = ii->ipsec_in_ns;
    830 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
    831 
    832 	ah2dbg(ahstack, ("in AH inbound_task"));
    833 
    834 	ASSERT(ahstack != NULL);
    835 	ah = ipsec_inbound_ah_sa(mp, ns);
    836 	if (ah == NULL)
    837 		return;
    838 	ASSERT(ii->ipsec_in_ah_sa != NULL);
    839 	ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(mp, ah);
    840 	if (ipsec_rc != IPSEC_STATUS_SUCCESS)
    841 		return;
    842 	ip_fanout_proto_again(mp, NULL, NULL, NULL);
    843 }
    844 
    845 
    846 /*
    847  * Now that weak-key passed, actually ADD the security association, and
    848  * send back a reply ADD message.
    849  */
    850 static int
    851 ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
    852     int *diagnostic, ipsecah_stack_t *ahstack)
    853 {
    854 	isaf_t *primary = NULL, *secondary, *inbound, *outbound;
    855 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
    856 	sadb_address_t *dstext =
    857 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
    858 	struct sockaddr_in *dst;
    859 	struct sockaddr_in6 *dst6;
    860 	boolean_t is_ipv4, clone = B_FALSE, is_inbound = B_FALSE;
    861 	uint32_t *dstaddr;
    862 	ipsa_t *larval;
    863 	ipsacq_t *acqrec;
    864 	iacqf_t *acq_bucket;
    865 	mblk_t *acq_msgs = NULL;
    866 	mblk_t *lpkt;
    867 	int rc;
    868 	sadb_t *sp;
    869 	int outhash;
    870 	netstack_t	*ns = ahstack->ipsecah_netstack;
    871 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
    872 
    873 	/*
    874 	 * Locate the appropriate table(s).
    875 	 */
    876 
    877 	dst = (struct sockaddr_in *)(dstext + 1);
    878 	dst6 = (struct sockaddr_in6 *)dst;
    879 	is_ipv4 = (dst->sin_family == AF_INET);
    880 	if (is_ipv4) {
    881 		sp = &ahstack->ah_sadb.s_v4;
    882 		dstaddr = (uint32_t *)(&dst->sin_addr);
    883 		outhash = OUTBOUND_HASH_V4(sp, *(ipaddr_t *)dstaddr);
    884 	} else {
    885 		ASSERT(dst->sin_family == AF_INET6);
    886 		sp = &ahstack->ah_sadb.s_v6;
    887 		dstaddr = (uint32_t *)(&dst6->sin6_addr);
    888 		outhash = OUTBOUND_HASH_V6(sp, *(in6_addr_t *)dstaddr);
    889 	}
    890 
    891 	inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
    892 	outbound = &sp->sdb_of[outhash];
    893 	/*
    894 	 * Use the direction flags provided by the KMD to determine
    895 	 * if the inbound or outbound table should be the primary
    896 	 * for this SA. If these flags were absent then make this
    897 	 * decision based on the addresses.
    898 	 */
    899 	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
    900 		primary = inbound;
    901 		secondary = outbound;
    902 		is_inbound = B_TRUE;
    903 		if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
    904 			clone = B_TRUE;
    905 	} else {
    906 		if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
    907 			primary = outbound;
    908 			secondary = inbound;
    909 		}
    910 	}
    911 
    912 	if (primary == NULL) {
    913 		/*
    914 		 * The KMD did not set a direction flag, determine which
    915 		 * table to insert the SA into based on addresses.
    916 		 */
    917 		switch (ksi->ks_in_dsttype) {
    918 		case KS_IN_ADDR_MBCAST:
    919 			clone = B_TRUE;	/* All mcast SAs can be bidirectional */
    920 			assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
    921 			/* FALLTHRU */
    922 		/*
    923 		 * If the source address is either one of mine, or unspecified
    924 		 * (which is best summed up by saying "not 'not mine'"),
    925 		 * then the association is potentially bi-directional,
    926 		 * in that it can be used for inbound traffic and outbound
    927 		 * traffic.  The best example of such and SA is a multicast
    928 		 * SA (which allows me to receive the outbound traffic).
    929 		 */
    930 		case KS_IN_ADDR_ME:
    931 			assoc->sadb_sa_flags |= IPSA_F_INBOUND;
    932 			primary = inbound;
    933 			secondary = outbound;
    934 			if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
    935 				clone = B_TRUE;
    936 			is_inbound = B_TRUE;
    937 			break;
    938 		/*
    939 		 * If the source address literally not mine (either
    940 		 * unspecified or not mine), then this SA may have an
    941 		 * address that WILL be mine after some configuration.
    942 		 * We pay the price for this by making it a bi-directional
    943 		 * SA.
    944 		 */
    945 		case KS_IN_ADDR_NOTME:
    946 			assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
    947 			primary = outbound;
    948 			secondary = inbound;
    949 			if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
    950 				assoc->sadb_sa_flags |= IPSA_F_INBOUND;
    951 				clone = B_TRUE;
    952 			}
    953 			break;
    954 		default:
    955 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
    956 			return (EINVAL);
    957 		}
    958 	}
    959 
    960 	/*
    961 	 * Find a ACQUIRE list entry if possible.  If we've added an SA that
    962 	 * suits the needs of an ACQUIRE list entry, we can eliminate the
    963 	 * ACQUIRE list entry and transmit the enqueued packets.  Use the
    964 	 * high-bit of the sequence number to queue it.  Key off destination
    965 	 * addr, and change acqrec's state.
    966 	 */
    967 
    968 	if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
    969 		acq_bucket = &sp->sdb_acq[outhash];
    970 		mutex_enter(&acq_bucket->iacqf_lock);
    971 		for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
    972 		    acqrec = acqrec->ipsacq_next) {
    973 			mutex_enter(&acqrec->ipsacq_lock);
    974 			/*
    975 			 * Q:  I only check sequence.  Should I check dst?
    976 			 * A: Yes, check dest because those are the packets
    977 			 *    that are queued up.
    978 			 */
    979 			if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
    980 			    IPSA_ARE_ADDR_EQUAL(dstaddr,
    981 			    acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
    982 				break;
    983 			mutex_exit(&acqrec->ipsacq_lock);
    984 		}
    985 		if (acqrec != NULL) {
    986 			/*
    987 			 * AHA!  I found an ACQUIRE record for this SA.
    988 			 * Grab the msg list, and free the acquire record.
    989 			 * I already am holding the lock for this record,
    990 			 * so all I have to do is free it.
    991 			 */
    992 			acq_msgs = acqrec->ipsacq_mp;
    993 			acqrec->ipsacq_mp = NULL;
    994