Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  *
     21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     22  * Use is subject to license terms.
     23  */
     24 
     25 #include <inet/arp.h>
     26 #include <inet/ip.h>
     27 #include <inet/ip6.h>
     28 #include <inet/ip_if.h>
     29 #include <inet/ip_ire.h>
     30 #include <inet/ip_multi.h>
     31 #include <inet/ip_rts.h>
     32 #include <inet/mi.h>
     33 #include <net/if_types.h>
     34 #include <sys/dlpi.h>
     35 #include <sys/kmem.h>
     36 #include <sys/modhash.h>
     37 #include <sys/sdt.h>
     38 #include <sys/strsun.h>
     39 #include <sys/sunddi.h>
     40 #include <sys/types.h>
     41 
     42 /*
     43  * Convenience macros for getting the ip_stack_t associated with an
     44  * ipmp_illgrp_t or ipmp_grp_t.
     45  */
     46 #define	IPMP_GRP_TO_IPST(grp)		PHYINT_TO_IPST((grp)->gr_phyint)
     47 #define	IPMP_ILLGRP_TO_IPST(illg)	((illg)->ig_ipmp_ill->ill_ipst)
     48 
     49 /*
     50  * Assorted constants that aren't important enough to be tunable.
     51  */
     52 #define	IPMP_GRP_HASH_SIZE		64
     53 #define	IPMP_ILL_REFRESH_TIMEOUT	120	/* seconds */
     54 
     55 /*
     56  * Templates for IPMP ARP messages.
     57  */
     58 static const arie_t ipmp_aract_template = {
     59 	AR_IPMP_ACTIVATE,
     60 	sizeof (arie_t),		/* Name offset */
     61 	sizeof (arie_t)			/* Name length (set by ill_arp_alloc) */
     62 };
     63 
     64 static const arie_t ipmp_ardeact_template = {
     65 	AR_IPMP_DEACTIVATE,
     66 	sizeof (arie_t),		/* Name offset */
     67 	sizeof (arie_t)			/* Name length (set by ill_arp_alloc) */
     68 };
     69 
     70 /*
     71  * IPMP meta-interface kstats (based on those in PSARC/1997/198).
     72  */
     73 static const kstat_named_t ipmp_kstats[IPMP_KSTAT_MAX] = {
     74 	{ "obytes",	KSTAT_DATA_UINT32 },
     75 	{ "obytes64",	KSTAT_DATA_UINT64 },
     76 	{ "rbytes",	KSTAT_DATA_UINT32 },
     77 	{ "rbytes64",	KSTAT_DATA_UINT64 },
     78 	{ "opackets",	KSTAT_DATA_UINT32 },
     79 	{ "opackets64",	KSTAT_DATA_UINT64 },
     80 	{ "oerrors",	KSTAT_DATA_UINT32 },
     81 	{ "ipackets",	KSTAT_DATA_UINT32 },
     82 	{ "ipackets64",	KSTAT_DATA_UINT64 },
     83 	{ "ierrors",	KSTAT_DATA_UINT32 },
     84 	{ "multircv",	KSTAT_DATA_UINT32 },
     85 	{ "multixmt",	KSTAT_DATA_UINT32 },
     86 	{ "brdcstrcv",	KSTAT_DATA_UINT32 },
     87 	{ "brdcstxmt",	KSTAT_DATA_UINT32 },
     88 	{ "link_up",	KSTAT_DATA_UINT32 }
     89 };
     90 
     91 static void	ipmp_grp_insert(ipmp_grp_t *, mod_hash_hndl_t);
     92 static int	ipmp_grp_create_kstats(ipmp_grp_t *);
     93 static int	ipmp_grp_update_kstats(kstat_t *, int);
     94 static void	ipmp_grp_destroy_kstats(ipmp_grp_t *);
     95 static ill_t	*ipmp_illgrp_min_ill(ipmp_illgrp_t *);
     96 static ill_t	*ipmp_illgrp_max_ill(ipmp_illgrp_t *);
     97 static void	ipmp_illgrp_set_cast(ipmp_illgrp_t *, ill_t *);
     98 static void	ipmp_illgrp_set_mtu(ipmp_illgrp_t *, uint_t);
     99 static boolean_t ipmp_ill_activate(ill_t *);
    100 static void	ipmp_ill_deactivate(ill_t *);
    101 static void	ipmp_ill_ire_mark_testhidden(ire_t *, char *);
    102 static void	ipmp_ill_ire_clear_testhidden(ire_t *, char *);
    103 static void	ipmp_ill_refresh_active_timer_start(ill_t *);
    104 static void	ipmp_ill_rtsaddrmsg(ill_t *, int);
    105 static void	ipmp_ill_bind_ipif(ill_t *, ipif_t *, enum ip_resolver_action);
    106 static ipif_t	*ipmp_ill_unbind_ipif(ill_t *, ipif_t *, boolean_t);
    107 static void	ipmp_phyint_get_kstats(phyint_t *, uint64_t *);
    108 static boolean_t ipmp_ipif_is_up_dataaddr(const ipif_t *);
    109 
    110 /*
    111  * Initialize IPMP state for IP stack `ipst'; called from ip_stack_init().
    112  */
    113 void
    114 ipmp_init(ip_stack_t *ipst)
    115 {
    116 	ipst->ips_ipmp_grp_hash = mod_hash_create_extended("ipmp_grp_hash",
    117 	    IPMP_GRP_HASH_SIZE, mod_hash_null_keydtor, mod_hash_null_valdtor,
    118 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
    119 	rw_init(&ipst->ips_ipmp_lock, NULL, RW_DEFAULT, 0);
    120 }
    121 
    122 /*
    123  * Destroy IPMP state for IP stack `ipst'; called from ip_stack_fini().
    124  */
    125 void
    126 ipmp_destroy(ip_stack_t *ipst)
    127 {
    128 	mod_hash_destroy_hash(ipst->ips_ipmp_grp_hash);
    129 	rw_destroy(&ipst->ips_ipmp_lock);
    130 }
    131 
    132 /*
    133  * Create an IPMP group named `grname', associate it with IPMP phyint `phyi',
    134  * and add it to the hash.  On success, return a pointer to the created group.
    135  * Caller must ensure `grname' is not yet in the hash.  Assumes that the IPMP
    136  * meta-interface associated with the group also has the same name (but they
    137  * may differ later via ipmp_grp_rename()).
    138  */
    139 ipmp_grp_t *
    140 ipmp_grp_create(const char *grname, phyint_t *phyi)
    141 {
    142 	ipmp_grp_t *grp;
    143 	ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
    144 	mod_hash_hndl_t mh;
    145 
    146 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    147 
    148 	if ((grp = kmem_zalloc(sizeof (ipmp_grp_t), KM_NOSLEEP)) == NULL)
    149 		return (NULL);
    150 
    151 	(void) strlcpy(grp->gr_name, grname, sizeof (grp->gr_name));
    152 	(void) strlcpy(grp->gr_ifname, grname, sizeof (grp->gr_ifname));
    153 
    154 	/*
    155 	 * Cache the group's phyint.  This is safe since a phyint_t will
    156 	 * outlive its ipmp_grp_t.
    157 	 */
    158 	grp->gr_phyint = phyi;
    159 
    160 	/*
    161 	 * Create IPMP group kstats.
    162 	 */
    163 	if (ipmp_grp_create_kstats(grp) != 0) {
    164 		kmem_free(grp, sizeof (ipmp_grp_t));
    165 		return (NULL);
    166 	}
    167 
    168 	/*
    169 	 * Insert the group into the hash.
    170 	 */
    171 	if (mod_hash_reserve_nosleep(ipst->ips_ipmp_grp_hash, &mh) != 0) {
    172 		ipmp_grp_destroy_kstats(grp);
    173 		kmem_free(grp, sizeof (ipmp_grp_t));
    174 		return (NULL);
    175 	}
    176 	ipmp_grp_insert(grp, mh);
    177 
    178 	return (grp);
    179 }
    180 
    181 /*
    182  * Create IPMP kstat structures for `grp'.  Return an errno upon failure.
    183  */
    184 static int
    185 ipmp_grp_create_kstats(ipmp_grp_t *grp)
    186 {
    187 	kstat_t *ksp;
    188 	netstackid_t id = IPMP_GRP_TO_IPST(grp)->ips_netstack->netstack_stackid;
    189 
    190 	ksp = kstat_create_netstack("ipmp", 0, grp->gr_ifname, "net",
    191 	    KSTAT_TYPE_NAMED, IPMP_KSTAT_MAX, 0, id);
    192 	if (ksp == NULL)
    193 		return (ENOMEM);
    194 
    195 	ksp->ks_update = ipmp_grp_update_kstats;
    196 	ksp->ks_private = grp;
    197 	bcopy(ipmp_kstats, ksp->ks_data, sizeof (ipmp_kstats));
    198 
    199 	kstat_install(ksp);
    200 	grp->gr_ksp = ksp;
    201 	return (0);
    202 }
    203 
    204 /*
    205  * Update the IPMP kstats tracked by `ksp'; called by the kstats framework.
    206  */
    207 static int
    208 ipmp_grp_update_kstats(kstat_t *ksp, int rw)
    209 {
    210 	uint_t		i;
    211 	kstat_named_t	*kn = KSTAT_NAMED_PTR(ksp);
    212 	ipmp_grp_t	*grp = ksp->ks_private;
    213 	ip_stack_t	*ipst = IPMP_GRP_TO_IPST(grp);
    214 	ipsq_t		*ipsq, *grp_ipsq = grp->gr_phyint->phyint_ipsq;
    215 	phyint_t	*phyi;
    216 	uint64_t	phyi_kstats[IPMP_KSTAT_MAX];
    217 
    218 	if (rw == KSTAT_WRITE)
    219 		return (EACCES);
    220 
    221 	/*
    222 	 * Start with the group's baseline values.
    223 	 */
    224 	for (i = 0; i < IPMP_KSTAT_MAX; i++) {
    225 		if (kn[i].data_type == KSTAT_DATA_UINT32) {
    226 			kn[i].value.ui32 = grp->gr_kstats0[i];
    227 		} else {
    228 			ASSERT(kn[i].data_type == KSTAT_DATA_UINT64);
    229 			kn[i].value.ui64 = grp->gr_kstats0[i];
    230 		}
    231 	}
    232 
    233 	/*
    234 	 * Add in the stats of each phyint currently in the group.  Since we
    235 	 * don't directly track the phyints in a group, we cheat by walking
    236 	 * the IPSQ set under ill_g_lock.  (The IPSQ list cannot change while
    237 	 * ill_g_lock is held.)
    238 	 */
    239 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
    240 	ipsq = grp_ipsq->ipsq_next;
    241 	for (; ipsq != grp_ipsq; ipsq = ipsq->ipsq_next) {
    242 		phyi = ipsq->ipsq_phyint;
    243 
    244 		/*
    245 		 * If a phyint in a group is being unplumbed, it's possible
    246 		 * that ill_glist_delete() -> phyint_free() already freed the
    247 		 * phyint (and set ipsq_phyint to NULL), but the unplumb
    248 		 * operation has yet to complete (and thus ipsq_dq() has yet
    249 		 * to remove the phyint's IPSQ from the group IPSQ's phyint
    250 		 * list).  We skip those phyints here (note that their kstats
    251 		 * have already been added to gr_kstats0[]).
    252 		 */
    253 		if (phyi == NULL)
    254 			continue;
    255 
    256 		ipmp_phyint_get_kstats(phyi, phyi_kstats);
    257 
    258 		for (i = 0; i < IPMP_KSTAT_MAX; i++) {
    259 			phyi_kstats[i] -= phyi->phyint_kstats0[i];
    260 			if (kn[i].data_type == KSTAT_DATA_UINT32)
    261 				kn[i].value.ui32 += phyi_kstats[i];
    262 			else
    263 				kn[i].value.ui64 += phyi_kstats[i];
    264 		}
    265 	}
    266 
    267 	kn[IPMP_KSTAT_LINK_UP].value.ui32 =
    268 	    (grp->gr_phyint->phyint_flags & PHYI_RUNNING) != 0;
    269 
    270 	rw_exit(&ipst->ips_ill_g_lock);
    271 	return (0);
    272 }
    273 
    274 /*
    275  * Destroy IPMP kstat structures for `grp'.
    276  */
    277 static void
    278 ipmp_grp_destroy_kstats(ipmp_grp_t *grp)
    279 {
    280 	netstackid_t id = IPMP_GRP_TO_IPST(grp)->ips_netstack->netstack_stackid;
    281 
    282 	kstat_delete_netstack(grp->gr_ksp, id);
    283 	bzero(grp->gr_kstats0, sizeof (grp->gr_kstats0));
    284 	grp->gr_ksp = NULL;
    285 }
    286 
    287 /*
    288  * Look up an IPMP group named `grname' on IP stack `ipst'.  Return NULL if it
    289  * does not exist.
    290  */
    291 ipmp_grp_t *
    292 ipmp_grp_lookup(const char *grname, ip_stack_t *ipst)
    293 {
    294 	ipmp_grp_t *grp;
    295 
    296 	ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
    297 
    298 	if (mod_hash_find(ipst->ips_ipmp_grp_hash, (mod_hash_key_t)grname,
    299 	    (mod_hash_val_t *)&grp) == 0)
    300 		return (grp);
    301 
    302 	return (NULL);
    303 }
    304 
    305 /*
    306  * Place information about group `grp' into `lifgr'.
    307  */
    308 void
    309 ipmp_grp_info(const ipmp_grp_t *grp, lifgroupinfo_t *lifgr)
    310 {
    311 	ill_t *ill;
    312 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    313 
    314 	ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
    315 
    316 	lifgr->gi_v4 = (grp->gr_v4 != NULL);
    317 	lifgr->gi_v6 = (grp->gr_v6 != NULL);
    318 	lifgr->gi_nv4 = grp->gr_nv4 + grp->gr_pendv4;
    319 	lifgr->gi_nv6 = grp->gr_nv6 + grp->gr_pendv6;
    320 	lifgr->gi_mactype = grp->gr_nif > 0 ? grp->gr_mactype : SUNW_DL_IPMP;
    321 	(void) strlcpy(lifgr->gi_grifname, grp->gr_ifname, LIFNAMSIZ);
    322 	lifgr->gi_m4ifname[0] = '\0';
    323 	lifgr->gi_m6ifname[0] = '\0';
    324 	lifgr->gi_bcifname[0] = '\0';
    325 
    326 	if (grp->gr_v4 != NULL && (ill = grp->gr_v4->ig_cast_ill) != NULL) {
    327 		(void) strlcpy(lifgr->gi_m4ifname, ill->ill_name, LIFNAMSIZ);
    328 		(void) strlcpy(lifgr->gi_bcifname, ill->ill_name, LIFNAMSIZ);
    329 	}
    330 
    331 	if (grp->gr_v6 != NULL && (ill = grp->gr_v6->ig_cast_ill) != NULL)
    332 		(void) strlcpy(lifgr->gi_m6ifname, ill->ill_name, LIFNAMSIZ);
    333 }
    334 
    335 /*
    336  * Insert `grp' into the hash using the reserved hash entry `mh'.
    337  * Caller must ensure `grp' is not yet in the hash.
    338  */
    339 static void
    340 ipmp_grp_insert(ipmp_grp_t *grp, mod_hash_hndl_t mh)
    341 {
    342 	int err;
    343 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    344 
    345 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    346 
    347 	/*
    348 	 * Since grp->gr_name will exist at least as long as `grp' is in the
    349 	 * hash, we use it directly as the key.
    350 	 */
    351 	err = mod_hash_insert_reserve(ipst->ips_ipmp_grp_hash,
    352 	    (mod_hash_key_t)grp->gr_name, (mod_hash_val_t)grp, mh);
    353 	if (err != 0) {
    354 		/*
    355 		 * This should never happen since `mh' was preallocated.
    356 		 */
    357 		panic("cannot insert IPMP group \"%s\" (err %d)",
    358 		    grp->gr_name, err);
    359 	}
    360 }
    361 
    362 /*
    363  * Remove `grp' from the hash.  Caller must ensure `grp' is in it.
    364  */
    365 static void
    366 ipmp_grp_remove(ipmp_grp_t *grp)
    367 {
    368 	int err;
    369 	mod_hash_val_t val;
    370 	mod_hash_key_t key = (mod_hash_key_t)grp->gr_name;
    371 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    372 
    373 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    374 
    375 	err = mod_hash_remove(ipst->ips_ipmp_grp_hash, key, &val);
    376 	if (err != 0 || val != grp) {
    377 		panic("cannot remove IPMP group \"%s\" (err %d)",
    378 		    grp->gr_name, err);
    379 	}
    380 }
    381 
    382 /*
    383  * Attempt to rename `grp' to new name `grname'.  Return an errno if the new
    384  * group name already exists or is invalid, or if there isn't enough memory.
    385  */
    386 int
    387 ipmp_grp_rename(ipmp_grp_t *grp, const char *grname)
    388 {
    389 	mod_hash_hndl_t mh;
    390 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    391 
    392 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    393 
    394 	if (grname[0] == '\0')
    395 		return (EINVAL);
    396 
    397 	if (mod_hash_find(ipst->ips_ipmp_grp_hash, (mod_hash_key_t)grname,
    398 	    (mod_hash_val_t *)&grp) != MH_ERR_NOTFOUND)
    399 		return (EEXIST);
    400 
    401 	/*
    402 	 * Before we remove the group from the hash, ensure we'll be able to
    403 	 * re-insert it by reserving space.
    404 	 */
    405 	if (mod_hash_reserve_nosleep(ipst->ips_ipmp_grp_hash, &mh) != 0)
    406 		return (ENOMEM);
    407 
    408 	ipmp_grp_remove(grp);
    409 	(void) strlcpy(grp->gr_name, grname, sizeof (grp->gr_name));
    410 	ipmp_grp_insert(grp, mh);
    411 
    412 	return (0);
    413 }
    414 
    415 /*
    416  * Destroy `grp' and remove it from the hash.  Caller must ensure `grp' is in
    417  * the hash, and that there are no interfaces on it.
    418  */
    419 void
    420 ipmp_grp_destroy(ipmp_grp_t *grp)
    421 {
    422 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    423 
    424 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    425 
    426 	/*
    427 	 * If there are still interfaces using this group, panic before things
    428 	 * go really off the rails.
    429 	 */
    430 	if (grp->gr_nif != 0)
    431 		panic("cannot destroy IPMP group \"%s\": in use", grp->gr_name);
    432 
    433 	ipmp_grp_remove(grp);
    434 	ipmp_grp_destroy_kstats(grp);
    435 
    436 	ASSERT(grp->gr_v4 == NULL);
    437 	ASSERT(grp->gr_v6 == NULL);
    438 	ASSERT(grp->gr_nv4 == 0);
    439 	ASSERT(grp->gr_nv6 == 0);
    440 	ASSERT(grp->gr_nactif == 0);
    441 	ASSERT(grp->gr_linkdownmp == NULL);
    442 	grp->gr_phyint = NULL;
    443 
    444 	kmem_free(grp, sizeof (ipmp_grp_t));
    445 }
    446 
    447 /*
    448  * Check whether `ill' is suitable for inclusion into `grp', and return an
    449  * errno describing the problem (if any).  NOTE: many of these errno values
    450  * are interpreted by ifconfig, which will take corrective action and retry
    451  * the SIOCSLIFGROUPNAME, so please exercise care when changing them.
    452  */
    453 static int
    454 ipmp_grp_vet_ill(ipmp_grp_t *grp, ill_t *ill)
    455 {
    456 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    457 
    458 	ASSERT(IAM_WRITER_ILL(ill));
    459 	ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
    460 
    461 	/*
    462 	 * To sidestep complicated address migration logic in the kernel and
    463 	 * to force the kernel's all-hosts multicast memberships to be blown
    464 	 * away, all addresses that had been brought up must be brought back
    465 	 * down prior to adding an interface to a group.  (This includes
    466 	 * addresses currently down due to DAD.)  Once the interface has been
    467 	 * added to the group, its addresses can then be brought back up, at
    468 	 * which point they will be moved to the IPMP meta-interface.
    469 	 * NOTE: we do this before ill_appaddr_cnt() since bringing down the
    470 	 * link-local causes in.ndpd to remove its ADDRCONF'd addresses.
    471 	 */
    472 	if (ill->ill_ipif_up_count + ill->ill_ipif_dup_count > 0)
    473 		return (EADDRINUSE);
    474 
    475 	/*
    476 	 * To avoid confusing applications by changing addresses that are
    477 	 * under their control, all such control must be removed prior to
    478 	 * adding an interface into a group.
    479 	 */
    480 	if (ill_appaddr_cnt(ill) != 0)
    481 		return (EADDRNOTAVAIL);
    482 
    483 	/*
    484 	 * Since PTP addresses do not share the same broadcast domain, they
    485 	 * are not allowed to be in an IPMP group.
    486 	 */
    487 	if (ill_ptpaddr_cnt(ill) != 0)
    488 		return (EINVAL);
    489 
    490 	/*
    491 	 * An ill must support multicast to be allowed into a group.
    492 	 */
    493 	if (!(ill->ill_flags & ILLF_MULTICAST))
    494 		return (ENOTSUP);
    495 
    496 	/*
    497 	 * An ill must strictly be using ARP and/or ND for address
    498 	 * resolution for it to be allowed into a group.
    499 	 */
    500 	if (ill->ill_flags & (ILLF_NONUD | ILLF_NOARP | ILLF_XRESOLV))
    501 		return (ENOTSUP);
    502 
    503 	/*
    504 	 * An ill cannot also be using usesrc groups.  (Although usesrc uses
    505 	 * ill_g_usesrc_lock, we don't need to grab it since usesrc also does
    506 	 * all its modifications as writer.)
    507 	 */
    508 	if (IS_USESRC_ILL(ill) || IS_USESRC_CLI_ILL(ill))
    509 		return (ENOTSUP);
    510 
    511 	/*
    512 	 * All ills in a group must be the same mactype.
    513 	 */
    514 	if (grp->gr_nif > 0 && grp->gr_mactype != ill->ill_mactype)
    515 		return (EINVAL);
    516 
    517 	return (0);
    518 }
    519 
    520 /*
    521  * Check whether `phyi' is suitable for inclusion into `grp', and return an
    522  * errno describing the problem (if any).  See comment above ipmp_grp_vet_ill()
    523  * regarding errno values.
    524  */
    525 int
    526 ipmp_grp_vet_phyint(ipmp_grp_t *grp, phyint_t *phyi)
    527 {
    528 	int err = 0;
    529 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    530 
    531 	ASSERT(IAM_WRITER_IPSQ(phyi->phyint_ipsq));
    532 	ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
    533 
    534 	/*
    535 	 * An interface cannot have address families plumbed that are not
    536 	 * configured in the group.
    537 	 */
    538 	if (phyi->phyint_illv4 != NULL && grp->gr_v4 == NULL ||
    539 	    phyi->phyint_illv6 != NULL && grp->gr_v6 == NULL)
    540 		return (EAFNOSUPPORT);
    541 
    542 	if (phyi->phyint_illv4 != NULL)
    543 		err = ipmp_grp_vet_ill(grp, phyi->phyint_illv4);
    544 	if (err == 0 && phyi->phyint_illv6 != NULL)
    545 		err = ipmp_grp_vet_ill(grp, phyi->phyint_illv6);
    546 
    547 	return (err);
    548 }
    549 
    550 /*
    551  * Create a new illgrp on IPMP meta-interface `ill'.
    552  */
    553 ipmp_illgrp_t *
    554 ipmp_illgrp_create(ill_t *ill)
    555 {
    556 	uint_t mtu = ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU;
    557 	ipmp_illgrp_t *illg;
    558 
    559 	ASSERT(IAM_WRITER_ILL(ill));
    560 	ASSERT(IS_IPMP(ill));
    561 	ASSERT(ill->ill_grp == NULL);
    562 
    563 	if ((illg = kmem_zalloc(sizeof (ipmp_illgrp_t), KM_NOSLEEP)) == NULL)
    564 		return (NULL);
    565 
    566 	list_create(&illg->ig_if, sizeof (ill_t), offsetof(ill_t, ill_grpnode));
    567 	list_create(&illg->ig_actif, sizeof (ill_t),
    568 	    offsetof(ill_t, ill_actnode));
    569 	list_create(&illg->ig_arpent, sizeof (ipmp_arpent_t),
    570 	    offsetof(ipmp_arpent_t, ia_node));
    571 
    572 	illg->ig_ipmp_ill = ill;
    573 	ill->ill_grp = illg;
    574 	ipmp_illgrp_set_mtu(illg, mtu);
    575 
    576 	return (illg);
    577 }
    578 
    579 /*
    580  * Destroy illgrp `illg', and disconnect it from its IPMP meta-interface.
    581  */
    582 void
    583 ipmp_illgrp_destroy(ipmp_illgrp_t *illg)
    584 {
    585 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    586 	ASSERT(IS_IPMP(illg->ig_ipmp_ill));
    587 
    588 	/*
    589 	 * Verify `illg' is empty.
    590 	 */
    591 	ASSERT(illg->ig_next_ill == NULL);
    592 	ASSERT(illg->ig_cast_ill == NULL);
    593 	ASSERT(list_is_empty(&illg->ig_arpent));
    594 	ASSERT(list_is_empty(&illg->ig_if));
    595 	ASSERT(list_is_empty(&illg->ig_actif));
    596 	ASSERT(illg->ig_nactif == 0);
    597 
    598 	/*
    599 	 * Destroy `illg'.
    600 	 */
    601 	illg->ig_ipmp_ill->ill_grp = NULL;
    602 	illg->ig_ipmp_ill = NULL;
    603 	list_destroy(&illg->ig_if);
    604 	list_destroy(&illg->ig_actif);
    605 	list_destroy(&illg->ig_arpent);
    606 	kmem_free(illg, sizeof (ipmp_illgrp_t));
    607 }
    608 
    609 /*
    610  * Add `ipif' to the pool of usable data addresses on `illg' and attempt to
    611  * bind it to an underlying ill, while keeping an even address distribution.
    612  * If the bind is successful, return a pointer to the bound ill.
    613  */
    614 ill_t *
    615 ipmp_illgrp_add_ipif(ipmp_illgrp_t *illg, ipif_t *ipif)
    616 {
    617 	ill_t *minill;
    618 	ipmp_arpent_t *entp;
    619 
    620 	ASSERT(IAM_WRITER_IPIF(ipif));
    621 	ASSERT(ipmp_ipif_is_dataaddr(ipif));
    622 
    623 	/*
    624 	 * IPMP data address mappings are internally managed by IP itself, so
    625 	 * delete any existing ARP entries associated with the address.
    626 	 */
    627 	if (!ipif->ipif_isv6) {
    628 		entp = ipmp_illgrp_lookup_arpent(illg, &ipif->ipif_lcl_addr);
    629 		if (entp != NULL)
    630 			ipmp_illgrp_destroy_arpent(illg, entp);
    631 	}
    632 
    633 	if ((minill = ipmp_illgrp_min_ill(illg)) != NULL)
    634 		ipmp_ill_bind_ipif(minill, ipif, Res_act_none);
    635 
    636 	return (ipif->ipif_bound ? ipif->ipif_bound_ill : NULL);
    637 }
    638 
    639 /*
    640  * Delete `ipif' from the pool of usable data addresses on `illg'.  If it's
    641  * bound, unbind it from the underlying ill while keeping an even address
    642  * distribution.
    643  */
    644 void
    645 ipmp_illgrp_del_ipif(ipmp_illgrp_t *illg, ipif_t *ipif)
    646 {
    647 	ill_t *maxill, *boundill = ipif->ipif_bound_ill;
    648 
    649 	ASSERT(IAM_WRITER_IPIF(ipif));
    650 
    651 	if (boundill != NULL) {
    652 		(void) ipmp_ill_unbind_ipif(boundill, ipif, B_FALSE);
    653 
    654 		maxill = ipmp_illgrp_max_ill(illg);
    655 		if (maxill->ill_bound_cnt > boundill->ill_bound_cnt + 1) {
    656 			ipif = ipmp_ill_unbind_ipif(maxill, NULL, B_TRUE);
    657 			ipmp_ill_bind_ipif(boundill, ipif, Res_act_rebind);
    658 		}
    659 	}
    660 }
    661 
    662 /*
    663  * Return the active ill with the greatest number of data addresses in `illg'.
    664  */
    665 static ill_t *
    666 ipmp_illgrp_max_ill(ipmp_illgrp_t *illg)
    667 {
    668 	ill_t *ill, *bestill = NULL;
    669 
    670 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    671 
    672 	ill = list_head(&illg->ig_actif);
    673 	for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) {
    674 		if (bestill == NULL ||
    675 		    ill->ill_bound_cnt > bestill->ill_bound_cnt) {
    676 			bestill = ill;
    677 		}
    678 	}
    679 	return (bestill);
    680 }
    681 
    682 /*
    683  * Return the active ill with the fewest number of data addresses in `illg'.
    684  */
    685 static ill_t *
    686 ipmp_illgrp_min_ill(ipmp_illgrp_t *illg)
    687 {
    688 	ill_t *ill, *bestill = NULL;
    689 
    690 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    691 
    692 	ill = list_head(&illg->ig_actif);
    693 	for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) {
    694 		if (bestill == NULL ||
    695 		    ill->ill_bound_cnt < bestill->ill_bound_cnt) {
    696 			if (ill->ill_bound_cnt == 0)
    697 				return (ill);	 /* can't get better */
    698 			bestill = ill;
    699 		}
    700 	}
    701 	return (bestill);
    702 }
    703 
    704 /*
    705  * Return a pointer to IPMP meta-interface for `illg' (which must exist).
    706  * Since ig_ipmp_ill never changes for a given illg, no locks are needed.
    707  */
    708 ill_t *
    709 ipmp_illgrp_ipmp_ill(ipmp_illgrp_t *illg)
    710 {
    711 	return (illg->ig_ipmp_ill);
    712 }
    713 
    714 /*
    715  * Return a pointer to the next available underlying ill in `illg', or NULL if
    716  * one doesn't exist.  Caller must be inside the IPSQ.
    717  */
    718 ill_t *
    719 ipmp_illgrp_next_ill(ipmp_illgrp_t *illg)
    720 {
    721 	ill_t *ill;
    722 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    723 
    724 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    725 
    726 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
    727 	if ((ill = illg->ig_next_ill) != NULL) {
    728 		illg->ig_next_ill = list_next(&illg->ig_actif, ill);
    729 		if (illg->ig_next_ill == NULL)
    730 			illg->ig_next_ill = list_head(&illg->ig_actif);
    731 	}
    732 	rw_exit(&ipst->ips_ipmp_lock);
    733 
    734 	return (ill);
    735 }
    736 
    737 /*
    738  * Return a held pointer to the next available underlying ill in `illg', or
    739  * NULL if one doesn't exist.  Caller need not be inside the IPSQ.
    740  */
    741 ill_t *
    742 ipmp_illgrp_hold_next_ill(ipmp_illgrp_t *illg)
    743 {
    744 	ill_t *ill;
    745 	uint_t i;
    746 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    747 
    748 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
    749 	for (i = 0; i < illg->ig_nactif; i++) {
    750 		ill = illg->ig_next_ill;
    751 		illg->ig_next_ill = list_next(&illg->ig_actif, ill);
    752 		if (illg->ig_next_ill == NULL)
    753 			illg->ig_next_ill = list_head(&illg->ig_actif);
    754 
    755 		if (ill_check_and_refhold(ill) == 0) {
    756 			rw_exit(&ipst->ips_ipmp_lock);
    757 			return (ill);
    758 		}
    759 	}
    760 	rw_exit(&ipst->ips_ipmp_lock);
    761 
    762 	return (NULL);
    763 }
    764 
    765 /*
    766  * Return a pointer to the nominated multicast ill in `illg', or NULL if one
    767  * doesn't exist.  Caller must be inside the IPSQ.
    768  */
    769 ill_t *
    770 ipmp_illgrp_cast_ill(ipmp_illgrp_t *illg)
    771 {
    772 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    773 	return (illg->ig_cast_ill);
    774 }
    775 
    776 /*
    777  * Return a held pointer to the nominated multicast ill in `illg', or NULL if
    778  * one doesn't exist.  Caller need not be inside the IPSQ.
    779  */
    780 ill_t *
    781 ipmp_illgrp_hold_cast_ill(ipmp_illgrp_t *illg)
    782 {
    783 	ill_t *castill;
    784 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    785 
    786 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
    787 	castill = illg->ig_cast_ill;
    788 	if (castill != NULL && ill_check_and_refhold(castill) == 0) {
    789 		rw_exit(&ipst->ips_ipmp_lock);
    790 		return (castill);
    791 	}
    792 	rw_exit(&ipst->ips_ipmp_lock);
    793 	return (NULL);
    794 }
    795 
    796 /*
    797  * Set the nominated cast ill on `illg' to `castill'.  If `castill' is NULL,
    798  * any existing nomination is removed.  Caller must be inside the IPSQ.
    799  */
    800 static void
    801 ipmp_illgrp_set_cast(ipmp_illgrp_t *illg, ill_t *castill)
    802 {
    803 	ill_t *ocastill = illg->ig_cast_ill;
    804 	ill_t *ipmp_ill = illg->ig_ipmp_ill;
    805 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    806 
    807 	ASSERT(IAM_WRITER_ILL(ipmp_ill));
    808 
    809 	/*
    810 	 * Disable old nominated ill (if any).
    811 	 */
    812 	if (ocastill != NULL) {
    813 		DTRACE_PROBE2(ipmp__illgrp__cast__disable, ipmp_illgrp_t *,
    814 		    illg, ill_t *, ocastill);
    815 		ASSERT(ocastill->ill_nom_cast);
    816 		ocastill->ill_nom_cast = B_FALSE;
    817 		/*
    818 		 * If the IPMP meta-interface is down, we never did the join,
    819 		 * so we must not try to leave.
    820 		 */
    821 		if (ipmp_ill->ill_dl_up)
    822 			ill_leave_multicast(ipmp_ill);
    823 	}
    824 
    825 	/*
    826 	 * Set new nomination.
    827 	 */
    828 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
    829 	illg->ig_cast_ill = castill;
    830 	rw_exit(&ipst->ips_ipmp_lock);
    831 
    832 	if (ocastill != NULL) {
    833 		/*
    834 		 * Delete any IREs tied to the old nomination.  We must do
    835 		 * this after the new castill is set and has reached global
    836 		 * visibility since the datapath has not been quiesced.
    837 		 */
    838 		ire_walk_ill(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
    839 		    ill_stq_cache_delete, ocastill, ocastill);
    840 	}
    841 
    842 	/*
    843 	 * Enable new nominated ill (if any).
    844 	 */
    845 	if (castill != NULL) {
    846 		DTRACE_PROBE2(ipmp__illgrp__cast__enable, ipmp_illgrp_t *,
    847 		    illg, ill_t *, castill);
    848 		ASSERT(!castill->ill_nom_cast);
    849 		castill->ill_nom_cast = B_TRUE;
    850 		/*
    851 		 * If the IPMP meta-interface is down, the attempt to recover
    852 		 * will silently fail but ill_need_recover_multicast will be
    853 		 * erroneously cleared -- so check first.
    854 		 */
    855 		if (ipmp_ill->ill_dl_up)
    856 			ill_recover_multicast(ipmp_ill);
    857 	}
    858 
    859 	/*
    860 	 * For IPv4, refresh our broadcast IREs.  This needs to be done even
    861 	 * if there's no new nomination since ill_refresh_bcast() still must
    862 	 * update the IPMP meta-interface's broadcast IREs to point back at
    863 	 * the IPMP meta-interface itself.
    864 	 */
    865 	if (!ipmp_ill->ill_isv6)
    866 		ill_refresh_bcast(ipmp_ill);
    867 }
    868 
    869 /*
    870  * Create an IPMP ARP entry and add it to the set tracked on `illg'.  If an
    871  * entry for the same IP address already exists, destroy it first.  Return the
    872  * created IPMP ARP entry, or NULL on failure.
    873  */
    874 ipmp_arpent_t *
    875 ipmp_illgrp_create_arpent(ipmp_illgrp_t *illg, mblk_t *mp, boolean_t proxyarp)
    876 {
    877 	uchar_t *addrp;
    878 	area_t *area = (area_t *)mp->b_rptr;
    879 	ipmp_arpent_t *entp, *oentp;
    880 
    881 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    882 	ASSERT(area->area_proto_addr_length == sizeof (ipaddr_t));
    883 
    884 	if ((entp = kmem_zalloc(sizeof (ipmp_arpent_t), KM_NOSLEEP)) == NULL)
    885 		return (NULL);
    886 
    887 	if ((mp = copyb(mp)) == NULL) {
    888 		kmem_free(entp, sizeof (ipmp_arpent_t));
    889 		return (NULL);
    890 	}
    891 
    892 	DB_TYPE(mp) = M_PROTO;
    893 	entp->ia_area_mp = mp;
    894 	entp->ia_proxyarp = proxyarp;
    895 	addrp = mi_offset_paramc(mp, area->area_proto_addr_offset,
    896 	    sizeof (ipaddr_t));
    897 	bcopy(addrp, &entp->ia_ipaddr, sizeof (ipaddr_t));
    898 
    899 	if ((oentp = ipmp_illgrp_lookup_arpent(illg, &entp->ia_ipaddr)) != NULL)
    900 		ipmp_illgrp_destroy_arpent(illg, oentp);
    901 
    902 	list_insert_head(&illg->ig_arpent, entp);
    903 	return (entp);
    904 }
    905 
    906 /*
    907  * Remove IPMP ARP entry `entp' from the set tracked on `illg' and destroy it.
    908  */
    909 void
    910 ipmp_illgrp_destroy_arpent(ipmp_illgrp_t *illg, ipmp_arpent_t *entp)
    911 {
    912 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    913 
    914 	list_remove(&illg->ig_arpent, entp);
    915 	freeb(entp->ia_area_mp);
    916 	kmem_free(entp, sizeof (ipmp_arpent_t));
    917 }
    918 
    919 /*
    920  * Mark that ARP has been notified about the IP address on `entp'; `illg' is
    921  * taken as a debugging aid for DTrace FBT probes.
    922  */
    923 /* ARGSUSED */
    924 void
    925 ipmp_illgrp_mark_arpent(ipmp_illgrp_t *illg, ipmp_arpent_t *entp)
    926 {
    927 	entp->ia_notified = B_TRUE;
    928 }
    929 
    930 /*
    931  * Look up the IPMP ARP entry for IP address `addrp' on `illg'; if `addrp' is
    932  * NULL, any IPMP ARP entry is requested.  Return NULL if it does not exist.
    933  */
    934 ipmp_arpent_t *
    935 ipmp_illgrp_lookup_arpent(ipmp_illgrp_t *illg, ipaddr_t *addrp)
    936 {
    937 	ipmp_arpent_t *entp = list_head(&illg->ig_arpent);
    938 
    939 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    940 
    941 	if (addrp == NULL)
    942 		return (entp);
    943 
    944 	for (; entp != NULL; entp = list_next(&illg->ig_arpent, entp))
    945 		if (entp->ia_ipaddr == *addrp)
    946 			break;
    947 	return (entp);
    948 }
    949 
    950 /*
    951  * Refresh ARP entries on `illg' to be distributed across its active
    952  * interfaces.  Entries that cannot be refreshed (e.g., because there are no
    953  * active interfaces) are marked so that subsequent calls can try again.
    954  */
    955 void
    956 ipmp_illgrp_refresh_arpent(ipmp_illgrp_t *illg)
    957 {
    958 	ill_t *ill, *ipmp_ill = illg->ig_ipmp_ill;
    959 	uint_t paddrlen = ipmp_ill->ill_phys_addr_length;
    960 	area_t *area;
    961 	mblk_t *area_mp;
    962 	uchar_t *physaddr;
    963 	ipmp_arpent_t *entp;
    964 
    965 	ASSERT(IAM_WRITER_ILL(ipmp_ill));
    966 	ASSERT(!ipmp_ill->ill_isv6);
    967 
    968 	ill = list_head(&illg->ig_actif);
    969 	entp = list_head(&illg->ig_arpent);
    970 	for (; entp != NULL; entp = list_next(&illg->ig_arpent, entp)) {
    971 		if (ill == NULL || ipmp_ill->ill_ipif_up_count == 0) {
    972 			entp->ia_notified = B_FALSE;
    973 			continue;
    974 		}
    975 
    976 		area = (area_t *)entp->ia_area_mp->b_rptr;
    977 		ASSERT(paddrlen == ill->ill_phys_addr_length);
    978 		ASSERT(paddrlen == area->area_hw_addr_length);
    979 		physaddr = mi_offset_paramc(entp->ia_area_mp,
    980 		    area->area_hw_addr_offset, paddrlen);
    981 
    982 		/*
    983 		 * If this is a proxy ARP entry, we can skip notifying ARP if
    984 		 * the entry is already up-to-date.  If it has changed, we
    985 		 * update the entry's hardware address before notifying ARP.
    986 		 */
    987 		if (entp->ia_proxyarp) {
    988 			if (bcmp(ill->ill_phys_addr, physaddr, paddrlen) == 0 &&
    989 			    entp->ia_notified)
    990 				continue;
    991 			bcopy(ill->ill_phys_addr, physaddr, paddrlen);
    992 		}
    993 
    994 		if ((area_mp = copyb(entp->ia_area_mp)) == NULL) {
    995 			entp->ia_notified = B_FALSE;
    996 			continue;
    997 		}
    998 
    999 		putnext(ipmp_ill->ill_rq, area_mp);
   1000 		ipmp_illgrp_mark_arpent(illg, entp);
   1001 
   1002 		if ((ill = list_next(&illg->ig_actif, ill)) == NULL)
   1003 			ill = list_head(&illg->ig_actif);
   1004 	}
   1005 }
   1006 
   1007 /*
   1008  * Return an interface in `illg' with the specified `physaddr', or NULL if one
   1009  * doesn't exist.  Caller must hold ill_g_lock if it's not inside the IPSQ.
   1010  */
   1011 ill_t *
   1012 ipmp_illgrp_find_ill(ipmp_illgrp_t *illg, uchar_t *physaddr, uint_t paddrlen)
   1013 {
   1014 	ill_t *ill;
   1015 	ill_t *ipmp_ill = illg->ig_ipmp_ill;
   1016 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
   1017 
   1018 	ASSERT(IAM_WRITER_ILL(ipmp_ill) || RW_LOCK_HELD(&ipst->ips_ill_g_lock));
   1019 
   1020 	ill = list_head(&illg->ig_if);
   1021 	for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) {
   1022 		if (ill->ill_phys_addr_length == paddrlen &&
   1023 		    bcmp(ill->ill_phys_addr, physaddr, paddrlen) == 0)
   1024 			return (ill);
   1025 	}
   1026 	return (NULL);
   1027 }
   1028 
   1029 /*
   1030  * Asynchronously update the MTU for an IPMP ill by injecting a DL_NOTIFY_IND.
   1031  * Caller must be inside the IPSQ unless this is initialization.
   1032  */
   1033 static void
   1034 ipmp_illgrp_set_mtu(ipmp_illgrp_t *illg, uint_t mtu)
   1035 {
   1036 	ill_t *ill = illg->ig_ipmp_ill;
   1037 	mblk_t *mp;
   1038 
   1039 	ASSERT(illg->ig_mtu == 0 || IAM_WRITER_ILL(ill));
   1040 
   1041 	/*
   1042 	 * If allocation fails, we have bigger problems than MTU.
   1043 	 */
   1044 	if ((mp = ip_dlnotify_alloc(DL_NOTE_SDU_SIZE, mtu)) != NULL) {
   1045 		illg->ig_mtu = mtu;
   1046 		put(ill->ill_rq, mp);
   1047 	}
   1048 }
   1049 
   1050 /*
   1051  * Recalculate the IPMP group MTU for `illg', and update its associated IPMP
   1052  * ill MTU if necessary.
   1053  */
   1054 void
   1055 ipmp_illgrp_refresh_mtu(ipmp_illgrp_t *illg)
   1056 {
   1057 	ill_t *ill;
   1058 	ill_t *ipmp_ill = illg->ig_ipmp_ill;
   1059 	uint_t mtu = 0;
   1060 
   1061 	ASSERT(IAM_WRITER_ILL(ipmp_ill));
   1062 
   1063 	/*
   1064 	 * Since ill_max_mtu can only change under ill_lock, we hold ill_lock
   1065 	 * for each ill as we iterate through the list.  Any changes to the
   1066 	 * ill_max_mtu will also trigger an update, so even if we missed it
   1067 	 * this time around, the update will catch it.
   1068 	 */
   1069 	ill = list_head(&illg->ig_if);
   1070 	for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) {
   1071 		mutex_enter(&ill->ill_lock);
   1072 		if (mtu == 0 || ill->ill_max_mtu < mtu)
   1073 			mtu = ill->ill_max_mtu;
   1074 		mutex_exit(&ill->ill_lock);
   1075 	}
   1076 
   1077 	/*
   1078 	 * MTU must be at least the minimum MTU.
   1079 	 */
   1080 	mtu = MAX(mtu, ipmp_ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU);
   1081 
   1082 	if (illg->ig_mtu != mtu)
   1083 		ipmp_illgrp_set_mtu(illg, mtu);
   1084 }
   1085 
   1086 /*
   1087  * Link illgrp `illg' to IPMP group `grp'.  To simplify the caller, silently
   1088  * allow the same link to be established more than once.
   1089  */
   1090 void
   1091 ipmp_illgrp_link_grp(ipmp_illgrp_t *illg, ipmp_grp_t *grp)
   1092 {
   1093 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
   1094 
   1095 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
   1096 
   1097 	if (illg->ig_ipmp_ill->ill_isv6) {
   1098 		ASSERT(grp->gr_v6 == NULL || grp->gr_v6 == illg);
   1099 		grp->gr_v6 = illg;
   1100 	} else {
   1101 		ASSERT(grp->gr_v4 == NULL || grp->gr_v4 == illg);
   1102 		grp->gr_v4 = illg;
   1103 	}
   1104 }
   1105 
   1106 /*
   1107  * Unlink illgrp `illg' from its IPMP group.  Return an errno if the illgrp
   1108  * cannot be unlinked (e.g., because there are still interfaces using it).
   1109  */
   1110 int
   1111 ipmp_illgrp_unlink_grp(ipmp_illgrp_t *illg)
   1112 {
   1113 	ipmp_grp_t *grp = illg->ig_ipmp_ill->ill_phyint->phyint_grp;
   1114 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
   1115 
   1116 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
   1117 
   1118 	if (illg->ig_ipmp_ill->ill_isv6) {
   1119 		if (grp->gr_nv6 + grp->gr_pendv6 != 0)
   1120 			return (EBUSY);
   1121 		grp->gr_v6 = NULL;
   1122 	} else {
   1123 		if (grp->gr_nv4 + grp->gr_pendv4 != 0)
   1124 			return (EBUSY);
   1125 		grp->gr_v4 = NULL;
   1126 	}
   1127 	return (0);
   1128 }
   1129 
   1130 /*
   1131  * Place `ill' into `illg', and rebalance the data addresses on `illg'
   1132  * to be spread evenly across the ills now in it.  Also, adjust the IPMP
   1133  * ill as necessary to account for `ill' (e.g., MTU).
   1134  */
   1135 void
   1136 ipmp_ill_join_illgrp(ill_t *ill, ipmp_illgrp_t *illg)
   1137 {
   1138 	ill_t *ipmp_ill;
   1139 	ipif_t *ipif;
   1140 	ip_stack_t *ipst = ill->ill_ipst;
   1141 
   1142 	/* IS_UNDER_IPMP() requires ill_grp to be non-NULL */
   1143 	ASSERT(!IS_IPMP(ill) && ill->ill_phyint->phyint_grp != NULL);
   1144 	ASSERT(IAM_WRITER_ILL(ill));
   1145 	ASSERT(ill->ill_grp == NULL);
   1146 
   1147 	ipmp_ill = illg->ig_ipmp_ill;
   1148 
   1149 	/*
   1150 	 * Account for `ill' joining the illgrp.
   1151 	 */
   1152 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1153 	if (ill->ill_isv6)
   1154 		ill->ill_phyint->phyint_grp->gr_nv6++;
   1155 	else
   1156 		ill->ill_phyint->phyint_grp->gr_nv4++;
   1157 	rw_exit(&ipst->ips_ipmp_lock);
   1158 
   1159 	/*
   1160 	 * Ensure the ILLF_ROUTER flag remains consistent across the group.
   1161 	 */
   1162 	mutex_enter(&ill->ill_lock);
   1163 	if (ipmp_ill->ill_flags & ILLF_ROUTER)
   1164 		ill->ill_flags |= ILLF_ROUTER;
   1165 	else
   1166 		ill->ill_flags &= ~ILLF_ROUTER;
   1167 	mutex_exit(&ill->ill_lock);
   1168 
   1169 	/*
   1170 	 * Blow away all multicast memberships that currently exist on `ill'.
   1171 	 * This may seem odd, but it's consistent with the application view
   1172 	 * that `ill' no longer exists (e.g., due to ipmp_ill_rtsaddrmsg()).
   1173 	 */
   1174 	if (ill->ill_isv6) {
   1175 		reset_conn_ill(ill);
   1176 		reset_mrt_ill(ill);
   1177 	} else {
   1178 		ipif = ill->ill_ipif;
   1179 		for (; ipif != NULL; ipif = ipif->ipif_next) {
   1180 			reset_conn_ipif(ipif);
   1181 			reset_mrt_vif_ipif(ipif);
   1182 		}
   1183 	}
   1184 	ip_purge_allmulti(ill);
   1185 
   1186 	/*
   1187 	 * Borrow the first ill's ill_phys_addr_length value for the illgrp's
   1188 	 * physical address length.  All other ills must have the same value,
   1189 	 * since they are required to all be the same mactype.  Also update
   1190 	 * the IPMP ill's MTU and CoS marking, if necessary.
   1191 	 */
   1192 	if (list_is_empty(&illg->ig_if)) {
   1193 		ASSERT(ipmp_ill->ill_phys_addr_length == 0);
   1194 		/*
   1195 		 * NOTE: we leave ill_phys_addr NULL since the IPMP group
   1196 		 * doesn't have a physical address.  This means that code must
   1197 		 * not assume that ill_phys_addr is non-NULL just because
   1198 		 * ill_phys_addr_length is non-zero.  Likewise for ill_nd_lla.
   1199 		 */
   1200 		ipmp_ill->ill_phys_addr_length = ill->ill_phys_addr_length;
   1201 		ipmp_ill->ill_nd_lla_len = ill->ill_phys_addr_length;
   1202 		ipmp_ill->ill_type = ill->ill_type;
   1203 
   1204 		if (ill->ill_flags & ILLF_COS_ENABLED) {
   1205 			mutex_enter(&ipmp_ill->ill_lock);
   1206 			ipmp_ill->ill_flags |= ILLF_COS_ENABLED;
   1207 			mutex_exit(&ipmp_ill->ill_lock);
   1208 		}
   1209 		ipmp_illgrp_set_mtu(illg, ill->ill_max_mtu);
   1210 	} else {
   1211 		ASSERT(ipmp_ill->ill_phys_addr_length ==
   1212 		    ill->ill_phys_addr_length);
   1213 		ASSERT(ipmp_ill->ill_type == ill->ill_type);
   1214 
   1215 		if (!(ill->ill_flags & ILLF_COS_ENABLED)) {
   1216 			mutex_enter(&ipmp_ill->ill_lock);
   1217 			ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED;
   1218 			mutex_exit(&ipmp_ill->ill_lock);
   1219 		}
   1220 		if (illg->ig_mtu > ill->ill_max_mtu)
   1221 			ipmp_illgrp_set_mtu(illg, ill->ill_max_mtu);
   1222 	}
   1223 
   1224 	rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
   1225 	list_insert_tail(&illg->ig_if, ill);
   1226 	ill->ill_grp = illg;
   1227 	rw_exit(&ipst->ips_ill_g_lock);
   1228 
   1229 	/*
   1230 	 * Hide the IREs on `ill' so that we don't accidentally find them when
   1231 	 * sending data traffic.
   1232 	 */
   1233 	ire_walk_ill(MATCH_IRE_ILL, 0, ipmp_ill_ire_mark_testhidden, ill, ill);
   1234 
   1235 	/*
   1236 	 * Merge any broadcast IREs, if need be.
   1237 	 */
   1238 	if (!ill->ill_isv6)
   1239 		ill_refresh_bcast(ill);
   1240 
   1241 	ipmp_ill_refresh_active(ill);
   1242 }
   1243 
   1244 /*
   1245  * Remove `ill' from its illgrp, and rebalance the data addresses in that
   1246  * illgrp to be spread evenly across the remaining ills.  Also, adjust the
   1247  * IPMP ill as necessary now that `ill' is removed (e.g., MTU).
   1248  */
   1249 void
   1250 ipmp_ill_leave_illgrp(ill_t *ill)
   1251 {
   1252 	ill_t *ipmp_ill;
   1253 	ipif_t *ipif;
   1254 	ipmp_arpent_t *entp;
   1255 	ipmp_illgrp_t *illg = ill->ill_grp;
   1256 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
   1257 
   1258 	ASSERT(IS_UNDER_IPMP(ill));
   1259 	ASSERT(IAM_WRITER_ILL(ill));
   1260 	ASSERT(illg != NULL);
   1261 
   1262 	ipmp_ill = illg->ig_ipmp_ill;
   1263 
   1264 	/*
   1265 	 * Cancel IPMP-specific ill timeouts.
   1266 	 */
   1267 	(void) untimeout(ill->ill_refresh_tid);
   1268 
   1269 	/*
   1270 	 * Expose any previously-hidden IREs on `ill'.
   1271 	 */
   1272 	ire_walk_ill(MATCH_IRE_ILL, 0, ipmp_ill_ire_clear_testhidden, ill, ill);
   1273 
   1274 	/*
   1275 	 * Ensure the multicast state for each ipif on `ill' is down so that
   1276 	 * our ipif_multicast_up() (once `ill' leaves the group) will rejoin
   1277 	 * all eligible groups.
   1278 	 */
   1279 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
   1280 		if (ipif->ipif_flags & IPIF_UP)
   1281 			ipif_multicast_down(ipif);
   1282 
   1283 	/*
   1284 	 * Account for `ill' leaving the illgrp.
   1285 	 */
   1286 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1287 	if (ill->ill_isv6)
   1288 		ill->ill_phyint->phyint_grp->gr_nv6--;
   1289 	else
   1290 		ill->ill_phyint->phyint_grp->gr_nv4--;
   1291 	rw_exit(&ipst->ips_ipmp_lock);
   1292 
   1293 	/*
   1294 	 * Pull `ill' out of the interface lists.
   1295 	 */
   1296 	if (list_link_active(&ill->ill_actnode))
   1297 		ipmp_ill_deactivate(ill);
   1298 	rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
   1299 	list_remove(&illg->ig_if, ill);
   1300 	ill->ill_grp = NULL;
   1301 	rw_exit(&ipst->ips_ill_g_lock);
   1302 
   1303 	/*
   1304 	 * Recreate any broadcast IREs that had been shared, if need be.
   1305 	 */
   1306 	if (!ill->ill_isv6)
   1307 		ill_refresh_bcast(ill);
   1308 
   1309 	/*
   1310 	 * Re-establish multicast memberships that were previously being
   1311 	 * handled by the IPMP meta-interface.
   1312 	 */
   1313 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
   1314 		if (ipif->ipif_flags & IPIF_UP)
   1315 			ipif_multicast_up(ipif);
   1316 
   1317 	/*
   1318 	 * Refresh the group MTU based on the new interface list.
   1319 	 */
   1320 	ipmp_illgrp_refresh_mtu(illg);
   1321 
   1322 	if (list_is_empty(&illg->ig_if)) {
   1323 		/*
   1324 		 * No ills left in the illgrp; we no longer have a physical
   1325 		 * address length, nor can we support ARP, CoS, or anything
   1326 		 * else that depends on knowing the link layer type.
   1327 		 */
   1328 		while ((entp = ipmp_illgrp_lookup_arpent(illg, NULL)) != NULL)
   1329 			ipmp_illgrp_destroy_arpent(illg, entp);
   1330 
   1331 		ipmp_ill->ill_phys_addr_length = 0;
   1332 		ipmp_ill->ill_nd_lla_len = 0;
   1333 		ipmp_ill->ill_type = IFT_OTHER;
   1334 		mutex_enter(&ipmp_ill->ill_lock);
   1335 		ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED;
   1336 		mutex_exit(&ipmp_ill->ill_lock);
   1337 	} else {
   1338 		/*
   1339 		 * If `ill' didn't support CoS, see if it can now be enabled.
   1340 		 */
   1341 		if (!(ill->ill_flags & ILLF_COS_ENABLED)) {
   1342 			ASSERT(!(ipmp_ill->ill_flags & ILLF_COS_ENABLED));
   1343 
   1344 			ill = list_head(&illg->ig_if);
   1345 			do {
   1346 				if (!(ill->ill_flags & ILLF_COS_ENABLED))
   1347 					break;
   1348 			} while ((ill = list_next(&illg->ig_if, ill)) != NULL);
   1349 
   1350 			if (ill == NULL) {
   1351 				mutex_enter(&ipmp_ill->ill_lock);
   1352 				ipmp_ill->ill_flags |= ILLF_COS_ENABLED;
   1353 				mutex_exit(&ipmp_ill->ill_lock);
   1354 			}
   1355 		}
   1356 	}
   1357 }
   1358 
   1359 /*
   1360  * Check if `ill' should be active, and activate or deactivate if need be.
   1361  * Return B_FALSE if a refresh was necessary but could not be performed.
   1362  */
   1363 static boolean_t
   1364 ipmp_ill_try_refresh_active(ill_t *ill)
   1365 {
   1366 	boolean_t refreshed = B_TRUE;
   1367 
   1368 	ASSERT(IAM_WRITER_ILL(ill));
   1369 	ASSERT(IS_UNDER_IPMP(ill));
   1370 
   1371 	if (ipmp_ill_is_active(ill)) {
   1372 		if (!list_link_active(&ill->ill_actnode))
   1373 			refreshed = ipmp_ill_activate(ill);
   1374 	} else {
   1375 		if (list_link_active(&ill->ill_actnode))
   1376 			ipmp_ill_deactivate(ill);
   1377 	}
   1378 
   1379 	return (refreshed);
   1380 }
   1381 
   1382 /*
   1383  * Check if `ill' should be active, and activate or deactivate if need be.
   1384  * If the refresh fails, schedule a timer to try again later.
   1385  */
   1386 void
   1387 ipmp_ill_refresh_active(ill_t *ill)
   1388 {
   1389 	if (!ipmp_ill_try_refresh_active(ill))
   1390 		ipmp_ill_refresh_active_timer_start(ill);
   1391 }
   1392 
   1393 /*
   1394  * Retry ipmp_ill_try_refresh_active() on the ill named by `ill_arg'.
   1395  */
   1396 static void
   1397 ipmp_ill_refresh_active_timer(void *ill_arg)
   1398 {
   1399 	ill_t *ill = ill_arg;
   1400 	boolean_t refreshed = B_FALSE;
   1401 
   1402 	/*
   1403 	 * Clear ill_refresh_tid to indicate that no timeout is pending
   1404 	 * (another thread could schedule a new timeout while we're still
   1405 	 * running, but that's harmless).  If the ill is going away, bail.
   1406 	 */
   1407 	mutex_enter(&ill->ill_lock);
   1408 	ill->ill_refresh_tid = 0;
   1409 	if (ill->ill_state_flags & ILL_CONDEMNED) {
   1410 		mutex_exit(&ill->ill_lock);
   1411 		return;
   1412 	}
   1413 	mutex_exit(&ill->ill_lock);
   1414 
   1415 	if (ipsq_try_enter(NULL, ill, NULL, NULL, NULL, NEW_OP, B_FALSE)) {
   1416 		refreshed = ipmp_ill_try_refresh_active(ill);
   1417 		ipsq_exit(ill->ill_phyint->phyint_ipsq);
   1418 	}
   1419 
   1420 	/*
   1421 	 * If the refresh failed, schedule another attempt.
   1422 	 */
   1423 	if (!refreshed)
   1424 		ipmp_ill_refresh_active_timer_start(ill);
   1425 }
   1426 
   1427 /*
   1428  * Retry an ipmp_ill_try_refresh_active() on the ill named by `arg'.
   1429  */
   1430 static void
   1431 ipmp_ill_refresh_active_timer_start(ill_t *ill)
   1432 {
   1433 	mutex_enter(&ill->ill_lock);
   1434 
   1435 	/*
   1436 	 * If the ill is going away or a refresh is already scheduled, bail.
   1437 	 */
   1438 	if (ill->ill_refresh_tid != 0 ||
   1439 	    (ill->ill_state_flags & ILL_CONDEMNED)) {
   1440 		mutex_exit(&ill->ill_lock);
   1441 		return;
   1442 	}
   1443 
   1444 	ill->ill_refresh_tid = timeout(ipmp_ill_refresh_active_timer, ill,
   1445 	    SEC_TO_TICK(IPMP_ILL_REFRESH_TIMEOUT));
   1446 
   1447 	mutex_exit(&ill->ill_lock);
   1448 }
   1449 
   1450 /*
   1451  * Activate `ill' so it will be used to send and receive data traffic.  Return
   1452  * B_FALSE if `ill' cannot be activated.  Note that we allocate any messages
   1453  * needed to deactivate `ill' here as well so that deactivation cannot fail.
   1454  */
   1455 static boolean_t
   1456 ipmp_ill_activate(ill_t *ill)
   1457 {
   1458 	ipif_t		*ipif;
   1459 	mblk_t		*actmp = NULL, *deactmp = NULL;
   1460 	mblk_t		*linkupmp = NULL, *linkdownmp = NULL;
   1461 	ipmp_grp_t	*grp = ill->ill_phyint->phyint_grp;
   1462 	const char	*grifname = grp->gr_ifname;
   1463 	ipmp_illgrp_t	*illg = ill->ill_grp;
   1464 	ill_t		*maxill;
   1465 	ip_stack_t	*ipst = IPMP_ILLGRP_TO_IPST(illg);
   1466 
   1467 	ASSERT(IAM_WRITER_ILL(ill));
   1468 	ASSERT(IS_UNDER_IPMP(ill));
   1469 
   1470 	/*
   1471 	 * If this will be the first active interface in the group, allocate
   1472 	 * the link-up and link-down messages.
   1473 	 */
   1474 	if (grp->gr_nactif == 0) {
   1475 		linkupmp = ip_dlnotify_alloc(DL_NOTE_LINK_UP, 0);
   1476 		linkdownmp = ip_dlnotify_alloc(DL_NOTE_LINK_DOWN, 0);
   1477 		if (linkupmp == NULL || linkdownmp == NULL)
   1478 			goto fail;
   1479 	}
   1480 
   1481 	/*
   1482 	 * For IPv4, allocate the activate/deactivate messages, and tell ARP.
   1483 	 */
   1484 	if (!ill->ill_isv6) {
   1485 		actmp = ill_arie_alloc(ill, grifname, &ipmp_aract_template);
   1486 		deactmp = ill_arie_alloc(ill, grifname, &ipmp_ardeact_template);
   1487 		if (actmp == NULL || deactmp == NULL)
   1488 			goto fail;
   1489 
   1490 		ASSERT(ill->ill_ardeact_mp == NULL);
   1491 		ill->ill_ardeact_mp = deactmp;
   1492 		putnext(illg->ig_ipmp_ill->ill_rq, actmp);
   1493 	}
   1494 
   1495 	if (list_is_empty(&illg->ig_actif)) {
   1496 		/*
   1497 		 * Now that we have an active ill, nominate it for multicast
   1498 		 * and broadcast duties.  Do this before ipmp_ill_bind_ipif()
   1499 		 * since that may need to send multicast packets (e.g., IPv6
   1500 		 * neighbor discovery probes).
   1501 		 */
   1502 		ipmp_illgrp_set_cast(illg, ill);
   1503 
   1504 		/*
   1505 		 * This is the first active ill in the illgrp -- add 'em all.
   1506 		 * We can access/walk ig_ipmp_ill's ipif list since we're
   1507 		 * writer on its IPSQ as well.
   1508 		 */
   1509 		ipif = illg->ig_ipmp_ill->ill_ipif;
   1510 		for (; ipif != NULL; ipif = ipif->ipif_next)
   1511 			if (ipmp_ipif_is_up_dataaddr(ipif))
   1512 				ipmp_ill_bind_ipif(ill, ipif, Res_act_initial);
   1513 	} else {
   1514 		/*
   1515 		 * Redistribute the addresses by moving them from the ill with
   1516 		 * the most addresses until the ill being activated is at the
   1517 		 * same level as the rest of the ills.
   1518 		 */
   1519 		for (;;) {
   1520 			maxill = ipmp_illgrp_max_ill(illg);
   1521 			ASSERT(maxill != NULL);
   1522 			if (ill->ill_bound_cnt + 1 >= maxill->ill_bound_cnt)
   1523 				break;
   1524 			ipif = ipmp_ill_unbind_ipif(maxill, NULL, B_TRUE);
   1525 			ipmp_ill_bind_ipif(ill, ipif, Res_act_rebind);
   1526 		}
   1527 
   1528 		/*
   1529 		 * TODO: explore whether it's advantageous to flush IRE_CACHE
   1530 		 * bindings to force existing connections to be redistributed
   1531 		 * to the new ill.
   1532 		 */
   1533 	}
   1534 
   1535 	/*
   1536 	 * Put the interface in the active list.
   1537 	 */
   1538 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1539 	list_insert_tail(&illg->ig_actif, ill);
   1540 	illg->ig_nactif++;
   1541 	illg->ig_next_ill = ill;
   1542 	rw_exit(&ipst->ips_ipmp_lock);
   1543 
   1544 	/*
   1545 	 * Refresh ARP entries to use `ill', if need be.
   1546 	 */
   1547 	if (!ill->ill_isv6)
   1548 		ipmp_illgrp_refresh_arpent(illg);
   1549 
   1550 	/*
   1551 	 * Finally, mark the group link up, if necessary.
   1552 	 */
   1553 	if (grp->gr_nactif++ == 0) {
   1554 		ASSERT(grp->gr_linkdownmp == NULL);
   1555 		grp->gr_linkdownmp = linkdownmp;
   1556 		put(illg->ig_ipmp_ill->ill_rq, linkupmp);
   1557 	}
   1558 	return (B_TRUE);
   1559 fail:
   1560 	freemsg(actmp);
   1561 	freemsg(deactmp);
   1562 	freemsg(linkupmp);
   1563 	freemsg(linkdownmp);
   1564 	return (B_FALSE);
   1565 }
   1566 
   1567 /*
   1568  * Deactivate `ill' so it will not be used to send or receive data traffic.
   1569  */
   1570 static void
   1571 ipmp_ill_deactivate(ill_t *ill)
   1572 {
   1573 	ill_t		*minill;
   1574 	ipif_t		*ipif, *ubnextipif, *ubheadipif = NULL;
   1575 	mblk_t		*mp;
   1576 	ipmp_grp_t	*grp = ill->ill_phyint->phyint_grp;
   1577 	ipmp_illgrp_t	*illg = ill->ill_grp;
   1578 	ip_stack_t	*ipst = IPMP_ILLGRP_TO_IPST(illg);
   1579 
   1580 	ASSERT(IAM_WRITER_ILL(ill));
   1581 	ASSERT(IS_UNDER_IPMP(ill));
   1582 
   1583 	/*
   1584 	 * Delete all IRE_CACHE entries for the group.  (We cannot restrict
   1585 	 * ourselves to entries with ire_stq == ill since there may be other
   1586 	 * IREs that are backed by ACEs that are tied to this ill -- and thus
   1587 	 * when those ACEs are deleted, the IREs will be adrift without any
   1588 	 * AR_CN_ANNOUNCE notification from ARP.)
   1589 	 */
   1590 	if (ill->ill_isv6)
   1591 		ire_walk_v6(ill_grp_cache_delete, ill, ALL_ZONES, ipst);
   1592 	else
   1593 		ire_walk_v4(ill_grp_cache_delete, ill, ALL_ZONES, ipst);
   1594 
   1595 	/*
   1596 	 * Pull the interface out of the active list.
   1597 	 */
   1598 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1599 	list_remove(&illg->ig_actif, ill);
   1600 	illg->ig_nactif--;
   1601 	illg->ig_next_ill = list_head(&illg->ig_actif);
   1602 	rw_exit(&ipst->ips_ipmp_lock);
   1603 
   1604 	/*
   1605 	 * If the ill that's being deactivated had been nominated for
   1606 	 * multicast/broadcast, nominate a new one.
   1607 	 */
   1608 	if (ill == illg->ig_cast_ill)
   1609 		ipmp_illgrp_set_cast(illg, list_head(&illg->ig_actif));
   1610 
   1611 	/*
   1612 	 * Unbind all of the ipifs bound to this ill, and save 'em in a list;
   1613 	 * we'll rebind them after we tell the resolver the ill is no longer
   1614 	 * active.  We must do things in this order or the resolver could
   1615 	 * accidentally rebind to the ill we're trying to remove if multiple
   1616 	 * ills in the group have the same hardware address (which is
   1617 	 * unsupported, but shouldn't lead to a wedged machine).
   1618 	 */
   1619 	while ((ipif = ipmp_ill_unbind_ipif(ill, NULL, B_TRUE)) != NULL) {
   1620 		ipif->ipif_bound_next = ubheadipif;
   1621 		ubheadipif = ipif;
   1622 	}
   1623 
   1624 	if (!ill->ill_isv6) {
   1625 		/*
   1626 		 * Tell ARP `ill' is no longer active in the group.
   1627 		 */
   1628 		mp = ill->ill_ardeact_mp;
   1629 		ill->ill_ardeact_mp = NULL;
   1630 		ASSERT(mp != NULL);
   1631 		putnext(illg->ig_ipmp_ill->ill_rq, mp);
   1632 
   1633 		/*
   1634 		 * Refresh any ARP entries that had been using `ill'.
   1635 		 */
   1636 		ipmp_illgrp_refresh_arpent(illg);
   1637 	}
   1638 
   1639 	/*
   1640 	 * Rebind each ipif from the deactivated ill to the active ill with
   1641 	 * the fewest ipifs.  If there are no active ills, the ipifs will
   1642 	 * remain unbound.
   1643 	 */
   1644 	for (ipif = ubheadipif; ipif != NULL; ipif = ubnextipif) {
   1645 		ubnextipif = ipif->ipif_bound_next;
   1646 		ipif->ipif_bound_next = NULL;
   1647 
   1648 		if ((minill = ipmp_illgrp_min_ill(illg)) != NULL)
   1649 			ipmp_ill_bind_ipif(minill, ipif, Res_act_rebind);
   1650 	}
   1651 
   1652 	/*
   1653 	 * Finally, mark the group link down, if necessary.
   1654 	 */
   1655 	if (--grp->gr_nactif == 0) {
   1656 		mp = grp->gr_linkdownmp;
   1657 		grp->gr_linkdownmp = NULL;
   1658 		ASSERT(mp != NULL);
   1659 		put(illg->ig_ipmp_ill->ill_rq, mp);
   1660 	}
   1661 }
   1662 
   1663 /*
   1664  * Send the routing socket messages needed to make `ill' "appear" (RTM_ADD)
   1665  * or "disappear" (RTM_DELETE) to non-IPMP-aware routing socket listeners.
   1666  */
   1667 static void
   1668 ipmp_ill_rtsaddrmsg(ill_t *ill, int cmd)
   1669 {
   1670 	ipif_t *ipif;
   1671 
   1672 	ASSERT(IAM_WRITER_ILL(ill));
   1673 	ASSERT(cmd == RTM_ADD || cmd == RTM_DELETE);
   1674 
   1675 	/*
   1676 	 * If `ill' is truly down, there are no messages to generate since:
   1677 	 *
   1678 	 * 1. If cmd == RTM_DELETE, then we're supposed to hide the interface
   1679 	 *    and its addresses by bringing them down.  But that's already
   1680 	 *    true, so there's nothing to hide.
   1681 	 *
   1682 	 * 2. If cmd == RTM_ADD, then we're supposed to generate messages
   1683 	 *    indicating that any previously-hidden up addresses are again
   1684 	 *    back up (along with the interface).  But they aren't, so
   1685 	 *    there's nothing to expose.
   1686 	 */
   1687 	if (ill->ill_ipif_up_count == 0)
   1688 		return;
   1689 
   1690 	if (cmd == RTM_ADD)
   1691 		ip_rts_xifmsg(ill->ill_ipif, IPIF_UP, 0, RTSQ_NORMAL);
   1692 
   1693 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
   1694 		if (ipif->ipif_flags & IPIF_UP)
   1695 			ip_rts_newaddrmsg(cmd, 0, ipif, RTSQ_NORMAL);
   1696 
   1697 	if (cmd == RTM_DELETE)
   1698 		ip_rts_xifmsg(ill->ill_ipif, 0, IPIF_UP, RTSQ_NORMAL);
   1699 }
   1700 
   1701 /*
   1702  * Bind the address named by `ipif' to the underlying ill named by `ill'.
   1703  * If `act' is Res_act_none, don't notify the resolver.  Otherwise, `act'
   1704  * will indicate to the resolver whether this is an initial bringup of
   1705  * `ipif', or just a rebind to another ill.
   1706  */
   1707 static void
   1708 ipmp_ill_bind_ipif(ill_t *ill, ipif_t *ipif, enum ip_resolver_action act)
   1709 {
   1710 	int err = 0;
   1711 	ip_stack_t *ipst = ill->ill_ipst;
   1712 
   1713 	ASSERT(IAM_WRITER_ILL(ill) && IAM_WRITER_IPIF(ipif));
   1714 	ASSERT(IS_UNDER_IPMP(ill) && IS_IPMP(ipif->ipif_ill));
   1715 	ASSERT(act == Res_act_none || ipmp_ipif_is_up_dataaddr(ipif));
   1716 	ASSERT(ipif->ipif_bound_ill == NULL);
   1717 	ASSERT(ipif->ipif_bound_next == NULL);
   1718 
   1719 	ipif->ipif_bound_next = ill->ill_bound_ipif;
   1720 	ill->ill_bound_ipif = ipif;
   1721 	ill->ill_bound_cnt++;
   1722 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1723 	ipif->ipif_bound_ill = ill;
   1724 	rw_exit(&ipst->ips_ipmp_lock);
   1725 
   1726 	/*
   1727 	 * If necessary, tell ARP/NDP about the new mapping.  Note that
   1728 	 * ipif_resolver_up() cannot fail for non-XRESOLV IPv6 ills.
   1729 	 */
   1730 	if (act != Res_act_none) {
   1731 		if (ill->ill_isv6) {
   1732 			VERIFY(ipif_resolver_up(ipif, act) == 0);
   1733 			err = ipif_ndp_up(ipif, act == Res_act_initial);
   1734 		} else {
   1735 			err = ipif_resolver_up(ipif, act);
   1736 		}
   1737 
   1738 		/*
   1739 		 * Since ipif_ndp_up() never returns EINPROGRESS and
   1740 		 * ipif_resolver_up() only returns EINPROGRESS when the
   1741 		 * associated ill is not up, we should never be here with
   1742 		 * EINPROGRESS.  We rely on this to simplify the design.
   1743 		 */
   1744 		ASSERT(err != EINPROGRESS);
   1745 	}
   1746 	/* TODO: retry binding on failure? when? */
   1747 	ipif->ipif_bound = (err == 0);
   1748 }
   1749 
   1750 /*
   1751  * Unbind the address named by `ipif' from the underlying ill named by `ill'.
   1752  * If `ipif' is NULL, then an arbitrary ipif on `ill' is unbound and returned.
   1753  * If no ipifs are bound to `ill', NULL is returned.  If `notifyres' is
   1754  * B_TRUE, notify the resolver about the change.
   1755  */
   1756 static ipif_t *
   1757 ipmp_ill_unbind_ipif(ill_t *ill, ipif_t *ipif, boolean_t notifyres)
   1758 {
   1759 	ill_t *ipmp_ill;
   1760 	ipif_t *previpif;
   1761 	ip_stack_t *ipst = ill->ill_ipst;
   1762 
   1763 	ASSERT(IAM_WRITER_ILL(ill));
   1764 	ASSERT(IS_UNDER_IPMP(ill));
   1765 
   1766 	ipmp_ill = ill->ill_grp->ig_ipmp_ill;
   1767 
   1768 	/*
   1769 	 * If necessary, find an ipif to unbind.
   1770 	 */
   1771 	if (ipif == NULL) {
   1772 		if ((ipif = ill->ill_bound_ipif) == NULL) {
   1773 			ASSERT(ill->ill_bound_cnt == 0);
   1774 			return (NULL);
   1775 		}
   1776 	}
   1777 
   1778 	ASSERT(IAM_WRITER_IPIF(ipif));
   1779 	ASSERT(IS_IPMP(ipif->ipif_ill));
   1780 	ASSERT(ipif->ipif_bound_ill == ill);
   1781 	ASSERT(ill->ill_bound_cnt > 0);
   1782 
   1783 	/*
   1784 	 * Unbind it.
   1785 	 */
   1786 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1787 	ipif->ipif_bound_ill = NULL;
   1788 	rw_exit(&ipst->ips_ipmp_lock);
   1789 	ill->ill_bound_cnt--;
   1790 
   1791 	if (ill->ill_bound_ipif == ipif) {
   1792 		ill->ill_bound_ipif = ipif->ipif_bound_next;
   1793 	} else {
   1794 		previpif = ill->ill_bound_ipif;
   1795 		while (previpif->ipif_bound_next != ipif)
   1796 			previpif = previpif->ipif_bound_next;
   1797 
   1798 		previpif->ipif_bound_next = ipif->ipif_bound_next;
   1799 	}
   1800 	ipif->ipif_bound_next = NULL;
   1801 
   1802 	/*
   1803 	 * If requested, notify the resolvers (provided we're bound).
   1804 	 */
   1805 	if (notifyres && ipif->ipif_bound) {
   1806 		if (ill->ill_isv6) {
   1807 			ipif_ndp_down(ipif);
   1808 		} else {
   1809 			ASSERT(ipif->ipif_arp_del_mp != NULL);
   1810 			putnext(ipmp_ill->ill_rq, ipif->ipif_arp_del_mp);
   1811 			ipif->ipif_arp_del_mp = NULL;
   1812 		}
   1813 	}
   1814 	ipif->ipif_bound = B_FALSE;
   1815 
   1816 	return (ipif);
   1817 }
   1818 
   1819 /*
   1820  * Check if `ill' is active.  Caller must hold ill_lock and phyint_lock if
   1821  * it's not inside the IPSQ.  Since ipmp_ill_try_refresh_active() calls this
   1822  * to determine whether an ill should be considered active, other consumers
   1823  * may race and learn about an ill that should be deactivated/activated before
   1824  * IPMP has performed the activation/deactivation.  This should be safe though
   1825  * since at worst e.g. ire_atomic_start() will prematurely delete an IRE that
   1826  * would've been cleaned up by ipmp_ill_deactivate().
   1827  */
   1828 boolean_t
   1829 ipmp_ill_is_active(ill_t *ill)
   1830 {
   1831 	phyint_t *phyi = ill->ill_phyint;
   1832 
   1833 	ASSERT(IS_UNDER_IPMP(ill));
   1834 	ASSERT(IAM_WRITER_ILL(ill) ||
   1835 	    (MUTEX_HELD(&ill->ill_lock) && MUTEX_HELD(&phyi->phyint_lock)));
   1836 
   1837 	/*
   1838 	 * Note that PHYI_RUNNING isn't checked since we rely on in.mpathd to
   1839 	 * set PHYI_FAILED whenever PHYI_RUNNING is cleared.  This allows the
   1840 	 * link flapping logic to be just in in.mpathd and allows us to ignore
   1841 	 * changes to PHYI_RUNNING.
   1842 	 */
   1843 	return (!(ill->ill_ipif_up_count == 0 ||
   1844 	    (phyi->phyint_flags & (PHYI_OFFLINE|PHYI_INACTIVE|PHYI_FAILED))));
   1845 }
   1846 
   1847 /*
   1848  * IRE walker callback: set IRE_MARK_TESTHIDDEN on cache/interface/offsubnet
   1849  * IREs with a source address on `ill_arg'.
   1850  */
   1851 static void
   1852 ipmp_ill_ire_mark_testhidden(ire_t *ire, char *ill_arg)
   1853 {
   1854 	ill_t *ill = (ill_t *)ill_arg;
   1855 
   1856 	ASSERT(IAM_WRITER_ILL(ill));
   1857 	ASSERT(!IS_IPMP(ill));
   1858 
   1859 	if (ire->ire_ipif->ipif_ill != ill)
   1860 		return;
   1861 
   1862 	switch (ire->ire_type) {
   1863 	case IRE_HOST:
   1864 	case IRE_PREFIX:
   1865 	case IRE_DEFAULT:
   1866 	case IRE_CACHE:
   1867 	case IRE_IF_RESOLVER:
   1868 	case IRE_IF_NORESOLVER:
   1869 		DTRACE_PROBE1(ipmp__mark__testhidden, ire_t *, ire);
   1870 		ire->ire_marks |= IRE_MARK_TESTHIDDEN;
   1871 		break;
   1872 	default:
   1873 		break;
   1874 	}
   1875 }
   1876 
   1877 /*
   1878  * IRE walker callback: clear IRE_MARK_TESTHIDDEN if the IRE has a source
   1879  * address on `ill_arg'.
   1880  */
   1881 static void
   1882 ipmp_ill_ire_clear_testhidden(ire_t *ire, char *ill_arg)
   1883 {
   1884 	ill_t *ill = (ill_t *)ill_arg;
   1885 
   1886 	ASSERT(IAM_WRITER_ILL(ill));
   1887 	ASSERT(!IS_IPMP(ill));
   1888 
   1889 	if (ire->ire_ipif->ipif_ill == ill) {
   1890 		DTRACE_PROBE1(ipmp__clear__testhidden, ire_t *, ire);
   1891 		ire->ire_marks &= ~IRE_MARK_TESTHIDDEN;
   1892 	}
   1893 }
   1894 
   1895 /*
   1896  * Return a held pointer to the IPMP ill for underlying interface `ill', or
   1897  * NULL if one doesn't exist.  (Unfortunately, this function needs to take an
   1898  * underlying ill rather than an ipmp_illgrp_t because an underlying ill's
   1899  * ill_grp pointer may become stale when not inside an IPSQ and not holding
   1900  * ipmp_lock.)  Caller need not be inside the IPSQ.
   1901  */
   1902 ill_t *
   1903 ipmp_ill_hold_ipmp_ill(ill_t *ill)
   1904 {
   1905 	ip_stack_t *ipst = ill->ill_ipst;
   1906 	ipmp_illgrp_t *illg;
   1907 
   1908 	ASSERT(!IS_IPMP(ill));
   1909 
   1910 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
   1911 	illg = ill->ill_grp;
   1912 	if (illg != NULL && ill_check_and_refhold(illg->ig_ipmp_ill) == 0) {
   1913 		rw_exit(&ipst->ips_ipmp_lock);
   1914 		return (illg->ig_ipmp_ill);
   1915 	}
   1916 	/*
   1917 	 * Assume `ill' was removed from the illgrp in the meantime.
   1918 	 */
   1919 	rw_exit(&ill->ill_ipst->ips_ipmp_lock);
   1920 	return (NULL);
   1921 }
   1922 
   1923 /*
   1924  * Return the interface index for the IPMP ill tied to underlying interface
   1925  * `ill', or zero if one doesn't exist.  Caller need not be inside the IPSQ.
   1926  */
   1927 uint_t
   1928 ipmp_ill_get_ipmp_ifindex(const ill_t *ill)
   1929 {
   1930 	uint_t ifindex = 0;
   1931 	ip_stack_t *ipst = ill->ill_ipst;
   1932 	ipmp_grp_t *grp;
   1933 
   1934 	ASSERT(!IS_IPMP(ill));
   1935 
   1936 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
   1937 	if ((grp = ill->ill_phyint->phyint_grp) != NULL)
   1938 		ifindex = grp->gr_phyint->phyint_ifindex;
   1939 	rw_exit(&ipst->ips_ipmp_lock);
   1940 	return (ifindex);
   1941 }
   1942 
   1943 /*
   1944  * Place phyint `phyi' into IPMP group `grp'.
   1945  */
   1946 void
   1947 ipmp_phyint_join_grp(phyint_t *phyi, ipmp_grp_t *grp)
   1948 {
   1949 	ill_t *ill;
   1950 	ipsq_t *ipsq = phyi->phyint_ipsq;
   1951 	ipsq_t *grp_ipsq = grp->gr_phyint->phyint_ipsq;
   1952 	ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
   1953 
   1954 	ASSERT(IAM_WRITER_IPSQ(ipsq));
   1955 	ASSERT(phyi->phyint_illv4 != NULL || phyi->phyint_illv6 != NULL);
   1956 
   1957 	/*
   1958 	 * Send routing socket messages indicating that the phyint's ills
   1959 	 * and ipifs vanished.
   1960 	 */
   1961 	if (phyi->phyint_illv4 != NULL) {
   1962 		ill = phyi->phyint_illv4;
   1963 		ipmp_ill_rtsaddrmsg(ill, RTM_DELETE);
   1964 	}
   1965 
   1966 	if (phyi->phyint_illv6 != NULL) {
   1967 		ill = phyi->phyint_illv6;
   1968 		ipmp_ill_rtsaddrmsg(ill, RTM_DELETE);
   1969 	}
   1970 
   1971 	/*
   1972 	 * Snapshot the phyint's initial kstats as a baseline.
   1973 	 */
   1974 	ipmp_phyint_get_kstats(phyi, phyi->phyint_kstats0);
   1975 
   1976 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1977 
   1978 	phyi->phyint_grp = grp;
   1979 	if (++grp->gr_nif == 1)
   1980 		grp->gr_mactype = ill->ill_mactype;
   1981 	else
   1982 		ASSERT(grp->gr_mactype == ill->ill_mactype);
   1983 
   1984 	/*
   1985 	 * Now that we're in the group, request a switch to the group's xop
   1986 	 * when we ipsq_exit().  All future operations will be exclusive on
   1987 	 * the group xop until ipmp_phyint_leave_grp() is called.
   1988 	 */
   1989 	ASSERT(ipsq->ipsq_swxop == NULL);
   1990 	ASSERT(grp_ipsq->ipsq_xop == &grp_ipsq->ipsq_ownxop);
   1991 	ipsq->ipsq_swxop = &grp_ipsq->ipsq_ownxop;
   1992 
   1993 	rw_exit(&ipst->ips_ipmp_lock);
   1994 }
   1995 
   1996 /*
   1997  * Remove phyint `phyi' from its current IPMP group.
   1998  */
   1999 void
   2000 ipmp_phyint_leave_grp(phyint_t *phyi)
   2001 {
   2002 	uint_t i;
   2003 	ipsq_t *ipsq = phyi->phyint_ipsq;
   2004 	ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
   2005 	uint64_t phyi_kstats[IPMP_KSTAT_MAX];
   2006 
   2007 	ASSERT(IAM_WRITER_IPSQ(ipsq));
   2008 
   2009 	/*
   2010 	 * If any of the phyint's ills are still in an illgrp, kick 'em out.
   2011 	 */
   2012 	if (phyi->phyint_illv4 != NULL && IS_UNDER_IPMP(phyi->phyint_illv4))
   2013 		ipmp_ill_leave_illgrp(phyi->phyint_illv4);
   2014 	if (phyi->phyint_illv6 != NULL && IS_UNDER_IPMP(phyi->phyint_illv6))
   2015 		ipmp_ill_leave_illgrp(phyi->phyint_illv6);
   2016 
   2017 	/*
   2018 	 * Send routing socket messages indicating that the phyint's ills
   2019 	 * and ipifs have reappeared.
   2020 	 */
   2021 	if (phyi->phyint_illv4 != NULL)
   2022 		ipmp_ill_rtsaddrmsg(phyi->phyint_illv4, RTM_ADD);
   2023 	if (phyi->phyint_illv6 != NULL)
   2024 		ipmp_ill_rtsaddrmsg(phyi->phyint_illv6, RTM_ADD);
   2025 
   2026 	/*
   2027 	 * Calculate the phyint's cumulative kstats while it was in the group,
   2028 	 * and add that to the group's baseline.
   2029 	 */
   2030 	ipmp_phyint_get_kstats(phyi, phyi_kstats);
   2031 	for (i = 0; i < IPMP_KSTAT_MAX; i++) {
   2032 		phyi_kstats[i] -= phyi->phyint_kstats0[i];
   2033 		atomic_add_64(&phyi->phyint_grp->gr_kstats0[i], phyi_kstats[i]);
   2034 	}
   2035 
   2036 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   2037 
   2038 	phyi->phyint_grp->gr_nif--;
   2039 	phyi->phyint_grp = NULL;
   2040 
   2041 	/*
   2042 	 * As our final act in leaving the group, request a switch back to our
   2043 	 * IPSQ's own xop when we ipsq_exit().
   2044 	 */
   2045 	ASSERT(ipsq->ipsq_swxop == NULL);
   2046 	ipsq->ipsq_swxop = &ipsq->ipsq_ownxop;
   2047 
   2048 	rw_exit(&ipst->ips_ipmp_lock);
   2049 }
   2050 
   2051 /*
   2052  * Store the IPMP-related kstats for `phyi' into the array named by `kstats'.
   2053  * Assumes that `kstats' has at least IPMP_KSTAT_MAX elements.
   2054  */
   2055 static void
   2056 ipmp_phyint_get_kstats(phyint_t *phyi, uint64_t kstats[])
   2057 {
   2058 	uint_t		i, j;
   2059 	const char	*name;
   2060 	kstat_t		*ksp;
   2061 	kstat_named_t	*kn;
   2062 	ip_stack_t	*ipst = PHYINT_TO_IPST(phyi);
   2063 	zoneid_t	zoneid;
   2064 
   2065 	bzero(kstats, sizeof (kstats[0]) * IPMP_KSTAT_MAX);
   2066 	zoneid = netstackid_to_zoneid(ipst->ips_netstack->netstack_stackid);
   2067 	ksp = kstat_hold_byname("link", 0, phyi->phyint_name, zoneid);
   2068 	if (ksp == NULL)
   2069 		return;
   2070 
   2071 	KSTAT_ENTER(ksp);
   2072 
   2073 	if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED) {
   2074 		/*
   2075 		 * Bring kstats up-to-date before recording.
   2076 		 */
   2077 		(void) KSTAT_UPDATE(ksp, KSTAT_READ);
   2078 
   2079 		kn = KSTAT_NAMED_PTR(ksp);
   2080 		for (i = 0; i < IPMP_KSTAT_MAX; i++) {
   2081 			name = ipmp_kstats[i].name;
   2082 			kstats[i] = 0;
   2083 			for (j = 0; j < ksp->ks_ndata; j++) {
   2084 				if (strcmp(kn[j].name, name) != 0)
   2085 					continue;
   2086 
   2087 				switch (kn[j].data_type) {
   2088 				case KSTAT_DATA_INT32:
   2089 				case KSTAT_DATA_UINT32:
   2090 					kstats[i] = kn[j].value.ui32;
   2091 					break;
   2092 #ifdef	_LP64
   2093 				case KSTAT_DATA_LONG:
   2094 				case KSTAT_DATA_ULONG:
   2095 					kstats[i] = kn[j].value.ul;
   2096 					break;
   2097 #endif
   2098 				case KSTAT_DATA_INT64:
   2099 				case KSTAT_DATA_UINT64:
   2100 					kstats[i] = kn[j].value.ui64;
   2101 					break;
   2102 				}
   2103 				break;
   2104 			}
   2105 		}
   2106 	}
   2107 
   2108 	KSTAT_EXIT(ksp);
   2109 	kstat_rele(ksp);
   2110 }
   2111 
   2112 /*
   2113  * Refresh the active state of all ills on `phyi'.
   2114  */
   2115 void
   2116 ipmp_phyint_refresh_active(phyint_t *phyi)
   2117 {
   2118 	if (phyi->phyint_illv4 != NULL)
   2119 		ipmp_ill_refresh_active(phyi->phyint_illv4);
   2120 	if (phyi->phyint_illv6 != NULL)
   2121 		ipmp_ill_refresh_active(phyi->phyint_illv6);
   2122 }
   2123 
   2124 /*
   2125  * Return a held pointer to the underlying ill bound to `ipif', or NULL if one
   2126  * doesn't exist.  Caller need not be inside the IPSQ.
   2127  */
   2128 ill_t *
   2129 ipmp_ipif_hold_bound_ill(const ipif_t *ipif)
   2130 {
   2131 	ill_t *boundill;
   2132 	ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
   2133 
   2134 	ASSERT(IS_IPMP(ipif->ipif_ill));
   2135 
   2136 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
   2137 	boundill = ipif->ipif_bound_ill;
   2138 	if (boundill != NULL && ill_check_and_refhold(boundill) == 0) {
   2139 		rw_exit(&ipst->ips_ipmp_lock);
   2140 		return (boundill);
   2141 	}
   2142 	rw_exit(&ipst->ips_ipmp_lock);
   2143 	return (NULL);
   2144 }
   2145 
   2146 /*
   2147  * Return a pointer to the underlying ill bound to `ipif', or NULL if one
   2148  * doesn't exist.  Caller must be inside the IPSQ.
   2149  */
   2150 ill_t *
   2151 ipmp_ipif_bound_ill(const ipif_t *ipif)
   2152 {
   2153 	ASSERT(IAM_WRITER_ILL(ipif->ipif_ill));
   2154 	ASSERT(IS_IPMP(ipif->ipif_ill));
   2155 
   2156 	return (ipif->ipif_bound_ill);
   2157 }
   2158 
   2159 /*
   2160  * Check if `ipif' is a "stub" (placeholder address not being used).
   2161  */
   2162 boolean_t
   2163 ipmp_ipif_is_stubaddr(const ipif_t *ipif)
   2164 {
   2165 	if (ipif->ipif_flags & IPIF_UP)
   2166 		return (B_FALSE);
   2167 	if (ipif->ipif_ill->ill_isv6)
   2168 		return (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr));
   2169 	else
   2170 		return (ipif->ipif_lcl_addr == INADDR_ANY);
   2171 }
   2172 
   2173 /*
   2174  * Check if `ipif' is an IPMP data address.
   2175  */
   2176 boolean_t
   2177 ipmp_ipif_is_dataaddr(const ipif_t *ipif)
   2178 {
   2179 	if (ipif->ipif_flags & IPIF_NOFAILOVER)
   2180 		return (B_FALSE);
   2181 	if (ipif->ipif_ill->ill_isv6)
   2182 		return (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr));
   2183 	else
   2184 		return (ipif->ipif_lcl_addr != INADDR_ANY);
   2185 }
   2186 
   2187 /*
   2188  * Check if `ipif' is an IPIF_UP IPMP data address.
   2189  */
   2190 static boolean_t
   2191 ipmp_ipif_is_up_dataaddr(const ipif_t *ipif)
   2192 {
   2193 	return (ipmp_ipif_is_dataaddr(ipif) && (ipif->ipif_flags & IPIF_UP));
   2194 }
   2195