Home | History | Annotate | Download | only in ip
      1   8485      Peter /*
      2   8485      Peter  * CDDL HEADER START
      3   8485      Peter  *
      4   8485      Peter  * The contents of this file are subject to the terms of the
      5   8485      Peter  * Common Development and Distribution License (the "License").
      6   8485      Peter  * You may not use this file except in compliance with the License.
      7   8485      Peter  *
      8   8485      Peter  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9   8485      Peter  * or http://www.opensolaris.org/os/licensing.
     10   8485      Peter  * See the License for the specific language governing permissions
     11   8485      Peter  * and limitations under the License.
     12   8485      Peter  *
     13   8485      Peter  * When distributing Covered Code, include this CDDL HEADER in each
     14   8485      Peter  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15   8485      Peter  * If applicable, add the following below this CDDL HEADER, with the
     16   8485      Peter  * fields enclosed by brackets "[]" replaced with your own identifying
     17   8485      Peter  * information: Portions Copyright [yyyy] [name of copyright owner]
     18   8485      Peter  *
     19   8485      Peter  * CDDL HEADER END
     20   8485      Peter  *
     21   8485      Peter  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     22   8485      Peter  * Use is subject to license terms.
     23   8485      Peter  */
     24   8485      Peter 
     25   8485      Peter #include <inet/ip.h>
     26   8485      Peter #include <inet/ip6.h>
     27   8485      Peter #include <inet/ip_if.h>
     28   8485      Peter #include <inet/ip_ire.h>
     29   8485      Peter #include <inet/ip_multi.h>
     30  11042       Erik #include <inet/ip_ndp.h>
     31   8485      Peter #include <inet/ip_rts.h>
     32   8485      Peter #include <inet/mi.h>
     33   8485      Peter #include <net/if_types.h>
     34   8485      Peter #include <sys/dlpi.h>
     35   8485      Peter #include <sys/kmem.h>
     36   8485      Peter #include <sys/modhash.h>
     37   8485      Peter #include <sys/sdt.h>
     38   8485      Peter #include <sys/strsun.h>
     39   8485      Peter #include <sys/sunddi.h>
     40   8485      Peter #include <sys/types.h>
     41   8485      Peter 
     42   8485      Peter /*
     43   8485      Peter  * Convenience macros for getting the ip_stack_t associated with an
     44   8485      Peter  * ipmp_illgrp_t or ipmp_grp_t.
     45   8485      Peter  */
     46   8485      Peter #define	IPMP_GRP_TO_IPST(grp)		PHYINT_TO_IPST((grp)->gr_phyint)
     47   8485      Peter #define	IPMP_ILLGRP_TO_IPST(illg)	((illg)->ig_ipmp_ill->ill_ipst)
     48   8485      Peter 
     49   8485      Peter /*
     50   8485      Peter  * Assorted constants that aren't important enough to be tunable.
     51   8485      Peter  */
     52   8485      Peter #define	IPMP_GRP_HASH_SIZE		64
     53   8485      Peter #define	IPMP_ILL_REFRESH_TIMEOUT	120	/* seconds */
     54   8485      Peter 
     55   8485      Peter 
     56   8485      Peter /*
     57   8485      Peter  * IPMP meta-interface kstats (based on those in PSARC/1997/198).
     58   8485      Peter  */
     59   8485      Peter static const kstat_named_t ipmp_kstats[IPMP_KSTAT_MAX] = {
     60   8485      Peter 	{ "obytes",	KSTAT_DATA_UINT32 },
     61   8485      Peter 	{ "obytes64",	KSTAT_DATA_UINT64 },
     62   8485      Peter 	{ "rbytes",	KSTAT_DATA_UINT32 },
     63   8485      Peter 	{ "rbytes64",	KSTAT_DATA_UINT64 },
     64   8485      Peter 	{ "opackets",	KSTAT_DATA_UINT32 },
     65   8485      Peter 	{ "opackets64",	KSTAT_DATA_UINT64 },
     66   8485      Peter 	{ "oerrors",	KSTAT_DATA_UINT32 },
     67   8485      Peter 	{ "ipackets",	KSTAT_DATA_UINT32 },
     68   8485      Peter 	{ "ipackets64",	KSTAT_DATA_UINT64 },
     69   8485      Peter 	{ "ierrors",	KSTAT_DATA_UINT32 },
     70   8485      Peter 	{ "multircv",	KSTAT_DATA_UINT32 },
     71   8485      Peter 	{ "multixmt",	KSTAT_DATA_UINT32 },
     72   8485      Peter 	{ "brdcstrcv",	KSTAT_DATA_UINT32 },
     73   8485      Peter 	{ "brdcstxmt",	KSTAT_DATA_UINT32 },
     74   8485      Peter 	{ "link_up",	KSTAT_DATA_UINT32 }
     75   8485      Peter };
     76   8485      Peter 
     77   8485      Peter static void	ipmp_grp_insert(ipmp_grp_t *, mod_hash_hndl_t);
     78   8485      Peter static int	ipmp_grp_create_kstats(ipmp_grp_t *);
     79   8485      Peter static int	ipmp_grp_update_kstats(kstat_t *, int);
     80   8485      Peter static void	ipmp_grp_destroy_kstats(ipmp_grp_t *);
     81   8485      Peter static ill_t	*ipmp_illgrp_min_ill(ipmp_illgrp_t *);
     82   8485      Peter static ill_t	*ipmp_illgrp_max_ill(ipmp_illgrp_t *);
     83   8485      Peter static void	ipmp_illgrp_set_cast(ipmp_illgrp_t *, ill_t *);
     84   8485      Peter static void	ipmp_illgrp_set_mtu(ipmp_illgrp_t *, uint_t);
     85   8485      Peter static boolean_t ipmp_ill_activate(ill_t *);
     86   8485      Peter static void	ipmp_ill_deactivate(ill_t *);
     87   8485      Peter static void	ipmp_ill_ire_mark_testhidden(ire_t *, char *);
     88   8485      Peter static void	ipmp_ill_ire_clear_testhidden(ire_t *, char *);
     89   8485      Peter static void	ipmp_ill_refresh_active_timer_start(ill_t *);
     90   8485      Peter static void	ipmp_ill_rtsaddrmsg(ill_t *, int);
     91   8485      Peter static void	ipmp_ill_bind_ipif(ill_t *, ipif_t *, enum ip_resolver_action);
     92   8485      Peter static ipif_t	*ipmp_ill_unbind_ipif(ill_t *, ipif_t *, boolean_t);
     93   8485      Peter static void	ipmp_phyint_get_kstats(phyint_t *, uint64_t *);
     94   8485      Peter static boolean_t ipmp_ipif_is_up_dataaddr(const ipif_t *);
     95   8485      Peter 
     96   8485      Peter /*
     97   8485      Peter  * Initialize IPMP state for IP stack `ipst'; called from ip_stack_init().
     98   8485      Peter  */
     99   8485      Peter void
    100   8485      Peter ipmp_init(ip_stack_t *ipst)
    101   8485      Peter {
    102   8485      Peter 	ipst->ips_ipmp_grp_hash = mod_hash_create_extended("ipmp_grp_hash",
    103   8485      Peter 	    IPMP_GRP_HASH_SIZE, mod_hash_null_keydtor, mod_hash_null_valdtor,
    104   8485      Peter 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
    105   8485      Peter 	rw_init(&ipst->ips_ipmp_lock, NULL, RW_DEFAULT, 0);
    106   8485      Peter }
    107   8485      Peter 
    108   8485      Peter /*
    109   8485      Peter  * Destroy IPMP state for IP stack `ipst'; called from ip_stack_fini().
    110   8485      Peter  */
    111   8485      Peter void
    112   8485      Peter ipmp_destroy(ip_stack_t *ipst)
    113   8485      Peter {
    114   8485      Peter 	mod_hash_destroy_hash(ipst->ips_ipmp_grp_hash);
    115   8485      Peter 	rw_destroy(&ipst->ips_ipmp_lock);
    116   8485      Peter }
    117   8485      Peter 
    118   8485      Peter /*
    119   8485      Peter  * Create an IPMP group named `grname', associate it with IPMP phyint `phyi',
    120   8485      Peter  * and add it to the hash.  On success, return a pointer to the created group.
    121   8485      Peter  * Caller must ensure `grname' is not yet in the hash.  Assumes that the IPMP
    122   8485      Peter  * meta-interface associated with the group also has the same name (but they
    123   8485      Peter  * may differ later via ipmp_grp_rename()).
    124   8485      Peter  */
    125   8485      Peter ipmp_grp_t *
    126   8485      Peter ipmp_grp_create(const char *grname, phyint_t *phyi)
    127   8485      Peter {
    128   8485      Peter 	ipmp_grp_t *grp;
    129   8485      Peter 	ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
    130   8485      Peter 	mod_hash_hndl_t mh;
    131   8485      Peter 
    132   8485      Peter 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    133   8485      Peter 
    134   8485      Peter 	if ((grp = kmem_zalloc(sizeof (ipmp_grp_t), KM_NOSLEEP)) == NULL)
    135   8485      Peter 		return (NULL);
    136   8485      Peter 
    137   8485      Peter 	(void) strlcpy(grp->gr_name, grname, sizeof (grp->gr_name));
    138   8485      Peter 	(void) strlcpy(grp->gr_ifname, grname, sizeof (grp->gr_ifname));
    139   8485      Peter 
    140   8485      Peter 	/*
    141   8485      Peter 	 * Cache the group's phyint.  This is safe since a phyint_t will
    142   8485      Peter 	 * outlive its ipmp_grp_t.
    143   8485      Peter 	 */
    144   8485      Peter 	grp->gr_phyint = phyi;
    145   8485      Peter 
    146   8485      Peter 	/*
    147   8485      Peter 	 * Create IPMP group kstats.
    148   8485      Peter 	 */
    149   8485      Peter 	if (ipmp_grp_create_kstats(grp) != 0) {
    150   8485      Peter 		kmem_free(grp, sizeof (ipmp_grp_t));
    151   8485      Peter 		return (NULL);
    152   8485      Peter 	}
    153   8485      Peter 
    154   8485      Peter 	/*
    155   8485      Peter 	 * Insert the group into the hash.
    156   8485      Peter 	 */
    157   8485      Peter 	if (mod_hash_reserve_nosleep(ipst->ips_ipmp_grp_hash, &mh) != 0) {
    158   8485      Peter 		ipmp_grp_destroy_kstats(grp);
    159   8485      Peter 		kmem_free(grp, sizeof (ipmp_grp_t));
    160   8485      Peter 		return (NULL);
    161   8485      Peter 	}
    162   8485      Peter 	ipmp_grp_insert(grp, mh);
    163   8485      Peter 
    164   8485      Peter 	return (grp);
    165   8485      Peter }
    166   8485      Peter 
    167   8485      Peter /*
    168   8485      Peter  * Create IPMP kstat structures for `grp'.  Return an errno upon failure.
    169   8485      Peter  */
    170   8485      Peter static int
    171   8485      Peter ipmp_grp_create_kstats(ipmp_grp_t *grp)
    172   8485      Peter {
    173   8485      Peter 	kstat_t *ksp;
    174   8485      Peter 	netstackid_t id = IPMP_GRP_TO_IPST(grp)->ips_netstack->netstack_stackid;
    175   8485      Peter 
    176   8485      Peter 	ksp = kstat_create_netstack("ipmp", 0, grp->gr_ifname, "net",
    177   8485      Peter 	    KSTAT_TYPE_NAMED, IPMP_KSTAT_MAX, 0, id);
    178   8485      Peter 	if (ksp == NULL)
    179   8485      Peter 		return (ENOMEM);
    180   8485      Peter 
    181   8485      Peter 	ksp->ks_update = ipmp_grp_update_kstats;
    182   8485      Peter 	ksp->ks_private = grp;
    183   8485      Peter 	bcopy(ipmp_kstats, ksp->ks_data, sizeof (ipmp_kstats));
    184   8485      Peter 
    185   8485      Peter 	kstat_install(ksp);
    186   8485      Peter 	grp->gr_ksp = ksp;
    187   8485      Peter 	return (0);
    188   8485      Peter }
    189   8485      Peter 
    190   8485      Peter /*
    191   8485      Peter  * Update the IPMP kstats tracked by `ksp'; called by the kstats framework.
    192   8485      Peter  */
    193   8485      Peter static int
    194   8485      Peter ipmp_grp_update_kstats(kstat_t *ksp, int rw)
    195   8485      Peter {
    196   8485      Peter 	uint_t		i;
    197   8485      Peter 	kstat_named_t	*kn = KSTAT_NAMED_PTR(ksp);
    198   8485      Peter 	ipmp_grp_t	*grp = ksp->ks_private;
    199   8485      Peter 	ip_stack_t	*ipst = IPMP_GRP_TO_IPST(grp);
    200   8485      Peter 	ipsq_t		*ipsq, *grp_ipsq = grp->gr_phyint->phyint_ipsq;
    201   8485      Peter 	phyint_t	*phyi;
    202   8485      Peter 	uint64_t	phyi_kstats[IPMP_KSTAT_MAX];
    203   8485      Peter 
    204   8485      Peter 	if (rw == KSTAT_WRITE)
    205   8485      Peter 		return (EACCES);
    206   8485      Peter 
    207   8485      Peter 	/*
    208   8485      Peter 	 * Start with the group's baseline values.
    209   8485      Peter 	 */
    210   8485      Peter 	for (i = 0; i < IPMP_KSTAT_MAX; i++) {
    211   8485      Peter 		if (kn[i].data_type == KSTAT_DATA_UINT32) {
    212   8485      Peter 			kn[i].value.ui32 = grp->gr_kstats0[i];
    213   8485      Peter 		} else {
    214   8485      Peter 			ASSERT(kn[i].data_type == KSTAT_DATA_UINT64);
    215   8485      Peter 			kn[i].value.ui64 = grp->gr_kstats0[i];
    216   8485      Peter 		}
    217   8485      Peter 	}
    218   8485      Peter 
    219   8485      Peter 	/*
    220   8485      Peter 	 * Add in the stats of each phyint currently in the group.  Since we
    221   8485      Peter 	 * don't directly track the phyints in a group, we cheat by walking
    222   8485      Peter 	 * the IPSQ set under ill_g_lock.  (The IPSQ list cannot change while
    223   8485      Peter 	 * ill_g_lock is held.)
    224   8485      Peter 	 */
    225   8485      Peter 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
    226   8485      Peter 	ipsq = grp_ipsq->ipsq_next;
    227   8485      Peter 	for (; ipsq != grp_ipsq; ipsq = ipsq->ipsq_next) {
    228   8485      Peter 		phyi = ipsq->ipsq_phyint;
    229   8485      Peter 
    230   8485      Peter 		/*
    231   8485      Peter 		 * If a phyint in a group is being unplumbed, it's possible
    232   8485      Peter 		 * that ill_glist_delete() -> phyint_free() already freed the
    233   8485      Peter 		 * phyint (and set ipsq_phyint to NULL), but the unplumb
    234   8485      Peter 		 * operation has yet to complete (and thus ipsq_dq() has yet
    235   8485      Peter 		 * to remove the phyint's IPSQ from the group IPSQ's phyint
    236   8485      Peter 		 * list).  We skip those phyints here (note that their kstats
    237   8485      Peter 		 * have already been added to gr_kstats0[]).
    238   8485      Peter 		 */
    239   8485      Peter 		if (phyi == NULL)
    240   8485      Peter 			continue;
    241   8485      Peter 
    242   8485      Peter 		ipmp_phyint_get_kstats(phyi, phyi_kstats);
    243   8485      Peter 
    244   8485      Peter 		for (i = 0; i < IPMP_KSTAT_MAX; i++) {
    245   8485      Peter 			phyi_kstats[i] -= phyi->phyint_kstats0[i];
    246   8485      Peter 			if (kn[i].data_type == KSTAT_DATA_UINT32)
    247   8485      Peter 				kn[i].value.ui32 += phyi_kstats[i];
    248   8485      Peter 			else
    249   8485      Peter 				kn[i].value.ui64 += phyi_kstats[i];
    250   8485      Peter 		}
    251   8485      Peter 	}
    252   8485      Peter 
    253   8485      Peter 	kn[IPMP_KSTAT_LINK_UP].value.ui32 =
    254   8485      Peter 	    (grp->gr_phyint->phyint_flags & PHYI_RUNNING) != 0;
    255   8485      Peter 
    256   8485      Peter 	rw_exit(&ipst->ips_ill_g_lock);
    257   8485      Peter 	return (0);
    258   8485      Peter }
    259   8485      Peter 
    260   8485      Peter /*
    261   8485      Peter  * Destroy IPMP kstat structures for `grp'.
    262   8485      Peter  */
    263   8485      Peter static void
    264   8485      Peter ipmp_grp_destroy_kstats(ipmp_grp_t *grp)
    265   8485      Peter {
    266   8485      Peter 	netstackid_t id = IPMP_GRP_TO_IPST(grp)->ips_netstack->netstack_stackid;
    267   8485      Peter 
    268   8485      Peter 	kstat_delete_netstack(grp->gr_ksp, id);
    269   8485      Peter 	bzero(grp->gr_kstats0, sizeof (grp->gr_kstats0));
    270   8485      Peter 	grp->gr_ksp = NULL;
    271   8485      Peter }
    272   8485      Peter 
    273   8485      Peter /*
    274   8485      Peter  * Look up an IPMP group named `grname' on IP stack `ipst'.  Return NULL if it
    275   8485      Peter  * does not exist.
    276   8485      Peter  */
    277   8485      Peter ipmp_grp_t *
    278   8485      Peter ipmp_grp_lookup(const char *grname, ip_stack_t *ipst)
    279   8485      Peter {
    280   8485      Peter 	ipmp_grp_t *grp;
    281   8485      Peter 
    282   8485      Peter 	ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
    283   8485      Peter 
    284   8485      Peter 	if (mod_hash_find(ipst->ips_ipmp_grp_hash, (mod_hash_key_t)grname,
    285   8485      Peter 	    (mod_hash_val_t *)&grp) == 0)
    286   8485      Peter 		return (grp);
    287   8485      Peter 
    288   8485      Peter 	return (NULL);
    289   8485      Peter }
    290   8485      Peter 
    291   8485      Peter /*
    292   8485      Peter  * Place information about group `grp' into `lifgr'.
    293   8485      Peter  */
    294   8485      Peter void
    295   8485      Peter ipmp_grp_info(const ipmp_grp_t *grp, lifgroupinfo_t *lifgr)
    296   8485      Peter {
    297   8485      Peter 	ill_t *ill;
    298   8485      Peter 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    299   8485      Peter 
    300   8485      Peter 	ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
    301   8485      Peter 
    302   8485      Peter 	lifgr->gi_v4 = (grp->gr_v4 != NULL);
    303   8485      Peter 	lifgr->gi_v6 = (grp->gr_v6 != NULL);
    304   8485      Peter 	lifgr->gi_nv4 = grp->gr_nv4 + grp->gr_pendv4;
    305   8485      Peter 	lifgr->gi_nv6 = grp->gr_nv6 + grp->gr_pendv6;
    306   8485      Peter 	lifgr->gi_mactype = grp->gr_nif > 0 ? grp->gr_mactype : SUNW_DL_IPMP;
    307   8485      Peter 	(void) strlcpy(lifgr->gi_grifname, grp->gr_ifname, LIFNAMSIZ);
    308   8485      Peter 	lifgr->gi_m4ifname[0] = '\0';
    309   8485      Peter 	lifgr->gi_m6ifname[0] = '\0';
    310   8485      Peter 	lifgr->gi_bcifname[0] = '\0';
    311   8485      Peter 
    312   8485      Peter 	if (grp->gr_v4 != NULL && (ill = grp->gr_v4->ig_cast_ill) != NULL) {
    313   8485      Peter 		(void) strlcpy(lifgr->gi_m4ifname, ill->ill_name, LIFNAMSIZ);
    314   8485      Peter 		(void) strlcpy(lifgr->gi_bcifname, ill->ill_name, LIFNAMSIZ);
    315   8485      Peter 	}
    316   8485      Peter 
    317   8485      Peter 	if (grp->gr_v6 != NULL && (ill = grp->gr_v6->ig_cast_ill) != NULL)
    318   8485      Peter 		(void) strlcpy(lifgr->gi_m6ifname, ill->ill_name, LIFNAMSIZ);
    319   8485      Peter }
    320   8485      Peter 
    321   8485      Peter /*
    322   8485      Peter  * Insert `grp' into the hash using the reserved hash entry `mh'.
    323   8485      Peter  * Caller must ensure `grp' is not yet in the hash.
    324   8485      Peter  */
    325   8485      Peter static void
    326   8485      Peter ipmp_grp_insert(ipmp_grp_t *grp, mod_hash_hndl_t mh)
    327   8485      Peter {
    328   8485      Peter 	int err;
    329   8485      Peter 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    330   8485      Peter 
    331   8485      Peter 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    332   8485      Peter 
    333   8485      Peter 	/*
    334   8485      Peter 	 * Since grp->gr_name will exist at least as long as `grp' is in the
    335   8485      Peter 	 * hash, we use it directly as the key.
    336   8485      Peter 	 */
    337   8485      Peter 	err = mod_hash_insert_reserve(ipst->ips_ipmp_grp_hash,
    338   8485      Peter 	    (mod_hash_key_t)grp->gr_name, (mod_hash_val_t)grp, mh);
    339   8485      Peter 	if (err != 0) {
    340   8485      Peter 		/*
    341   8485      Peter 		 * This should never happen since `mh' was preallocated.
    342   8485      Peter 		 */
    343   8485      Peter 		panic("cannot insert IPMP group \"%s\" (err %d)",
    344   8485      Peter 		    grp->gr_name, err);
    345   8485      Peter 	}
    346   8485      Peter }
    347   8485      Peter 
    348   8485      Peter /*
    349   8485      Peter  * Remove `grp' from the hash.  Caller must ensure `grp' is in it.
    350   8485      Peter  */
    351   8485      Peter static void
    352   8485      Peter ipmp_grp_remove(ipmp_grp_t *grp)
    353   8485      Peter {
    354   8485      Peter 	int err;
    355   8485      Peter 	mod_hash_val_t val;
    356   8485      Peter 	mod_hash_key_t key = (mod_hash_key_t)grp->gr_name;
    357   8485      Peter 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    358   8485      Peter 
    359   8485      Peter 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    360   8485      Peter 
    361   8485      Peter 	err = mod_hash_remove(ipst->ips_ipmp_grp_hash, key, &val);
    362   8485      Peter 	if (err != 0 || val != grp) {
    363   8485      Peter 		panic("cannot remove IPMP group \"%s\" (err %d)",
    364   8485      Peter 		    grp->gr_name, err);
    365   8485      Peter 	}
    366   8485      Peter }
    367   8485      Peter 
    368   8485      Peter /*
    369   8485      Peter  * Attempt to rename `grp' to new name `grname'.  Return an errno if the new
    370   8485      Peter  * group name already exists or is invalid, or if there isn't enough memory.
    371   8485      Peter  */
    372   8485      Peter int
    373   8485      Peter ipmp_grp_rename(ipmp_grp_t *grp, const char *grname)
    374   8485      Peter {
    375   8485      Peter 	mod_hash_hndl_t mh;
    376   8485      Peter 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    377   8485      Peter 
    378   8485      Peter 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    379   8485      Peter 
    380   8485      Peter 	if (grname[0] == '\0')
    381   8485      Peter 		return (EINVAL);
    382   8485      Peter 
    383   8485      Peter 	if (mod_hash_find(ipst->ips_ipmp_grp_hash, (mod_hash_key_t)grname,
    384   8485      Peter 	    (mod_hash_val_t *)&grp) != MH_ERR_NOTFOUND)
    385   8485      Peter 		return (EEXIST);
    386   8485      Peter 
    387   8485      Peter 	/*
    388   8485      Peter 	 * Before we remove the group from the hash, ensure we'll be able to
    389   8485      Peter 	 * re-insert it by reserving space.
    390   8485      Peter 	 */
    391   8485      Peter 	if (mod_hash_reserve_nosleep(ipst->ips_ipmp_grp_hash, &mh) != 0)
    392   8485      Peter 		return (ENOMEM);
    393   8485      Peter 
    394   8485      Peter 	ipmp_grp_remove(grp);
    395   8485      Peter 	(void) strlcpy(grp->gr_name, grname, sizeof (grp->gr_name));
    396   8485      Peter 	ipmp_grp_insert(grp, mh);
    397   8485      Peter 
    398   8485      Peter 	return (0);
    399   8485      Peter }
    400   8485      Peter 
    401   8485      Peter /*
    402   8485      Peter  * Destroy `grp' and remove it from the hash.  Caller must ensure `grp' is in
    403   8485      Peter  * the hash, and that there are no interfaces on it.
    404   8485      Peter  */
    405   8485      Peter void
    406   8485      Peter ipmp_grp_destroy(ipmp_grp_t *grp)
    407   8485      Peter {
    408   8485      Peter 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    409   8485      Peter 
    410   8485      Peter 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
    411   8485      Peter 
    412   8485      Peter 	/*
    413   8485      Peter 	 * If there are still interfaces using this group, panic before things
    414   8485      Peter 	 * go really off the rails.
    415   8485      Peter 	 */
    416   8485      Peter 	if (grp->gr_nif != 0)
    417   8485      Peter 		panic("cannot destroy IPMP group \"%s\": in use", grp->gr_name);
    418   8485      Peter 
    419   8485      Peter 	ipmp_grp_remove(grp);
    420   8485      Peter 	ipmp_grp_destroy_kstats(grp);
    421   8485      Peter 
    422   8485      Peter 	ASSERT(grp->gr_v4 == NULL);
    423   8485      Peter 	ASSERT(grp->gr_v6 == NULL);
    424   8485      Peter 	ASSERT(grp->gr_nv4 == 0);
    425   8485      Peter 	ASSERT(grp->gr_nv6 == 0);
    426   8485      Peter 	ASSERT(grp->gr_nactif == 0);
    427   8485      Peter 	ASSERT(grp->gr_linkdownmp == NULL);
    428   8485      Peter 	grp->gr_phyint = NULL;
    429   8485      Peter 
    430   8485      Peter 	kmem_free(grp, sizeof (ipmp_grp_t));
    431   8485      Peter }
    432   8485      Peter 
    433   8485      Peter /*
    434   8485      Peter  * Check whether `ill' is suitable for inclusion into `grp', and return an
    435   8485      Peter  * errno describing the problem (if any).  NOTE: many of these errno values
    436   8485      Peter  * are interpreted by ifconfig, which will take corrective action and retry
    437   8485      Peter  * the SIOCSLIFGROUPNAME, so please exercise care when changing them.
    438   8485      Peter  */
    439   8485      Peter static int
    440   8485      Peter ipmp_grp_vet_ill(ipmp_grp_t *grp, ill_t *ill)
    441   8485      Peter {
    442   8485      Peter 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    443   8485      Peter 
    444   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
    445   8485      Peter 	ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
    446   8485      Peter 
    447   8485      Peter 	/*
    448   8485      Peter 	 * To sidestep complicated address migration logic in the kernel and
    449   8485      Peter 	 * to force the kernel's all-hosts multicast memberships to be blown
    450   8485      Peter 	 * away, all addresses that had been brought up must be brought back
    451   8485      Peter 	 * down prior to adding an interface to a group.  (This includes
    452   8485      Peter 	 * addresses currently down due to DAD.)  Once the interface has been
    453   8485      Peter 	 * added to the group, its addresses can then be brought back up, at
    454   8485      Peter 	 * which point they will be moved to the IPMP meta-interface.
    455   8485      Peter 	 * NOTE: we do this before ill_appaddr_cnt() since bringing down the
    456   8485      Peter 	 * link-local causes in.ndpd to remove its ADDRCONF'd addresses.
    457   8485      Peter 	 */
    458   8485      Peter 	if (ill->ill_ipif_up_count + ill->ill_ipif_dup_count > 0)
    459   8485      Peter 		return (EADDRINUSE);
    460   8485      Peter 
    461   8485      Peter 	/*
    462   8485      Peter 	 * To avoid confusing applications by changing addresses that are
    463   8485      Peter 	 * under their control, all such control must be removed prior to
    464   8485      Peter 	 * adding an interface into a group.
    465   8485      Peter 	 */
    466   8485      Peter 	if (ill_appaddr_cnt(ill) != 0)
    467   8485      Peter 		return (EADDRNOTAVAIL);
    468   8485      Peter 
    469   8485      Peter 	/*
    470   8485      Peter 	 * Since PTP addresses do not share the same broadcast domain, they
    471   8485      Peter 	 * are not allowed to be in an IPMP group.
    472   8485      Peter 	 */
    473   8485      Peter 	if (ill_ptpaddr_cnt(ill) != 0)
    474   8485      Peter 		return (EINVAL);
    475   8485      Peter 
    476   8485      Peter 	/*
    477   8485      Peter 	 * An ill must support multicast to be allowed into a group.
    478   8485      Peter 	 */
    479   8485      Peter 	if (!(ill->ill_flags & ILLF_MULTICAST))
    480   8485      Peter 		return (ENOTSUP);
    481   8485      Peter 
    482   8485      Peter 	/*
    483   8485      Peter 	 * An ill must strictly be using ARP and/or ND for address
    484   8485      Peter 	 * resolution for it to be allowed into a group.
    485   8485      Peter 	 */
    486  11042       Erik 	if (ill->ill_flags & (ILLF_NONUD | ILLF_NOARP))
    487   8485      Peter 		return (ENOTSUP);
    488   8485      Peter 
    489   8485      Peter 	/*
    490   8485      Peter 	 * An ill cannot also be using usesrc groups.  (Although usesrc uses
    491   8485      Peter 	 * ill_g_usesrc_lock, we don't need to grab it since usesrc also does
    492   8485      Peter 	 * all its modifications as writer.)
    493   8485      Peter 	 */
    494   8485      Peter 	if (IS_USESRC_ILL(ill) || IS_USESRC_CLI_ILL(ill))
    495   8485      Peter 		return (ENOTSUP);
    496   8485      Peter 
    497   8485      Peter 	/*
    498   8485      Peter 	 * All ills in a group must be the same mactype.
    499   8485      Peter 	 */
    500   8485      Peter 	if (grp->gr_nif > 0 && grp->gr_mactype != ill->ill_mactype)
    501   8485      Peter 		return (EINVAL);
    502   8485      Peter 
    503   8485      Peter 	return (0);
    504   8485      Peter }
    505   8485      Peter 
    506   8485      Peter /*
    507   8485      Peter  * Check whether `phyi' is suitable for inclusion into `grp', and return an
    508   8485      Peter  * errno describing the problem (if any).  See comment above ipmp_grp_vet_ill()
    509   8485      Peter  * regarding errno values.
    510   8485      Peter  */
    511   8485      Peter int
    512   8485      Peter ipmp_grp_vet_phyint(ipmp_grp_t *grp, phyint_t *phyi)
    513   8485      Peter {
    514   8485      Peter 	int err = 0;
    515   8485      Peter 	ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
    516   8485      Peter 
    517   8485      Peter 	ASSERT(IAM_WRITER_IPSQ(phyi->phyint_ipsq));
    518   8485      Peter 	ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
    519   8485      Peter 
    520   8485      Peter 	/*
    521   8485      Peter 	 * An interface cannot have address families plumbed that are not
    522   8485      Peter 	 * configured in the group.
    523   8485      Peter 	 */
    524   8485      Peter 	if (phyi->phyint_illv4 != NULL && grp->gr_v4 == NULL ||
    525   8485      Peter 	    phyi->phyint_illv6 != NULL && grp->gr_v6 == NULL)
    526   8485      Peter 		return (EAFNOSUPPORT);
    527   8485      Peter 
    528   8485      Peter 	if (phyi->phyint_illv4 != NULL)
    529   8485      Peter 		err = ipmp_grp_vet_ill(grp, phyi->phyint_illv4);
    530   8485      Peter 	if (err == 0 && phyi->phyint_illv6 != NULL)
    531   8485      Peter 		err = ipmp_grp_vet_ill(grp, phyi->phyint_illv6);
    532   8485      Peter 
    533   8485      Peter 	return (err);
    534   8485      Peter }
    535   8485      Peter 
    536   8485      Peter /*
    537   8485      Peter  * Create a new illgrp on IPMP meta-interface `ill'.
    538   8485      Peter  */
    539   8485      Peter ipmp_illgrp_t *
    540   8485      Peter ipmp_illgrp_create(ill_t *ill)
    541   8485      Peter {
    542   8485      Peter 	uint_t mtu = ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU;
    543   8485      Peter 	ipmp_illgrp_t *illg;
    544   8485      Peter 
    545   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
    546   8485      Peter 	ASSERT(IS_IPMP(ill));
    547   8485      Peter 	ASSERT(ill->ill_grp == NULL);
    548   8485      Peter 
    549   8485      Peter 	if ((illg = kmem_zalloc(sizeof (ipmp_illgrp_t), KM_NOSLEEP)) == NULL)
    550   8485      Peter 		return (NULL);
    551   8485      Peter 
    552   8485      Peter 	list_create(&illg->ig_if, sizeof (ill_t), offsetof(ill_t, ill_grpnode));
    553   8485      Peter 	list_create(&illg->ig_actif, sizeof (ill_t),
    554   8485      Peter 	    offsetof(ill_t, ill_actnode));
    555   8485      Peter 	list_create(&illg->ig_arpent, sizeof (ipmp_arpent_t),
    556   8485      Peter 	    offsetof(ipmp_arpent_t, ia_node));
    557   8485      Peter 
    558   8485      Peter 	illg->ig_ipmp_ill = ill;
    559   8485      Peter 	ill->ill_grp = illg;
    560   8485      Peter 	ipmp_illgrp_set_mtu(illg, mtu);
    561   8485      Peter 
    562   8485      Peter 	return (illg);
    563   8485      Peter }
    564   8485      Peter 
    565   8485      Peter /*
    566   8485      Peter  * Destroy illgrp `illg', and disconnect it from its IPMP meta-interface.
    567   8485      Peter  */
    568   8485      Peter void
    569   8485      Peter ipmp_illgrp_destroy(ipmp_illgrp_t *illg)
    570   8485      Peter {
    571   8485      Peter 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    572   8485      Peter 	ASSERT(IS_IPMP(illg->ig_ipmp_ill));
    573   8485      Peter 
    574   8485      Peter 	/*
    575   8485      Peter 	 * Verify `illg' is empty.
    576   8485      Peter 	 */
    577   8485      Peter 	ASSERT(illg->ig_next_ill == NULL);
    578   8485      Peter 	ASSERT(illg->ig_cast_ill == NULL);
    579   8485      Peter 	ASSERT(list_is_empty(&illg->ig_arpent));
    580   8485      Peter 	ASSERT(list_is_empty(&illg->ig_if));
    581   8485      Peter 	ASSERT(list_is_empty(&illg->ig_actif));
    582   8485      Peter 	ASSERT(illg->ig_nactif == 0);
    583   8485      Peter 
    584   8485      Peter 	/*
    585   8485      Peter 	 * Destroy `illg'.
    586   8485      Peter 	 */
    587   8485      Peter 	illg->ig_ipmp_ill->ill_grp = NULL;
    588   8485      Peter 	illg->ig_ipmp_ill = NULL;
    589   8485      Peter 	list_destroy(&illg->ig_if);
    590   8485      Peter 	list_destroy(&illg->ig_actif);
    591   8485      Peter 	list_destroy(&illg->ig_arpent);
    592   8485      Peter 	kmem_free(illg, sizeof (ipmp_illgrp_t));
    593   8485      Peter }
    594   8485      Peter 
    595   8485      Peter /*
    596   8485      Peter  * Add `ipif' to the pool of usable data addresses on `illg' and attempt to
    597   8485      Peter  * bind it to an underlying ill, while keeping an even address distribution.
    598   8485      Peter  * If the bind is successful, return a pointer to the bound ill.
    599   8485      Peter  */
    600   8485      Peter ill_t *
    601   8485      Peter ipmp_illgrp_add_ipif(ipmp_illgrp_t *illg, ipif_t *ipif)
    602   8485      Peter {
    603   8485      Peter 	ill_t *minill;
    604   8485      Peter 	ipmp_arpent_t *entp;
    605   8485      Peter 
    606   8485      Peter 	ASSERT(IAM_WRITER_IPIF(ipif));
    607   8485      Peter 	ASSERT(ipmp_ipif_is_dataaddr(ipif));
    608   8485      Peter 
    609   8485      Peter 	/*
    610   8485      Peter 	 * IPMP data address mappings are internally managed by IP itself, so
    611   8485      Peter 	 * delete any existing ARP entries associated with the address.
    612   8485      Peter 	 */
    613   8485      Peter 	if (!ipif->ipif_isv6) {
    614   8485      Peter 		entp = ipmp_illgrp_lookup_arpent(illg, &ipif->ipif_lcl_addr);
    615   8485      Peter 		if (entp != NULL)
    616   8485      Peter 			ipmp_illgrp_destroy_arpent(illg, entp);
    617   8485      Peter 	}
    618   8485      Peter 
    619   8485      Peter 	if ((minill = ipmp_illgrp_min_ill(illg)) != NULL)
    620   8485      Peter 		ipmp_ill_bind_ipif(minill, ipif, Res_act_none);
    621   8485      Peter 
    622   8485      Peter 	return (ipif->ipif_bound ? ipif->ipif_bound_ill : NULL);
    623   8485      Peter }
    624   8485      Peter 
    625   8485      Peter /*
    626   8485      Peter  * Delete `ipif' from the pool of usable data addresses on `illg'.  If it's
    627   8485      Peter  * bound, unbind it from the underlying ill while keeping an even address
    628   8485      Peter  * distribution.
    629   8485      Peter  */
    630   8485      Peter void
    631   8485      Peter ipmp_illgrp_del_ipif(ipmp_illgrp_t *illg, ipif_t *ipif)
    632   8485      Peter {
    633   8485      Peter 	ill_t *maxill, *boundill = ipif->ipif_bound_ill;
    634   8485      Peter 
    635   8485      Peter 	ASSERT(IAM_WRITER_IPIF(ipif));
    636   8485      Peter 
    637   8485      Peter 	if (boundill != NULL) {
    638   8485      Peter 		(void) ipmp_ill_unbind_ipif(boundill, ipif, B_FALSE);
    639   8485      Peter 
    640   8485      Peter 		maxill = ipmp_illgrp_max_ill(illg);
    641   8485      Peter 		if (maxill->ill_bound_cnt > boundill->ill_bound_cnt + 1) {
    642   8485      Peter 			ipif = ipmp_ill_unbind_ipif(maxill, NULL, B_TRUE);
    643   8485      Peter 			ipmp_ill_bind_ipif(boundill, ipif, Res_act_rebind);
    644   8485      Peter 		}
    645   8485      Peter 	}
    646   8485      Peter }
    647   8485      Peter 
    648   8485      Peter /*
    649   8485      Peter  * Return the active ill with the greatest number of data addresses in `illg'.
    650   8485      Peter  */
    651   8485      Peter static ill_t *
    652   8485      Peter ipmp_illgrp_max_ill(ipmp_illgrp_t *illg)
    653   8485      Peter {
    654   8485      Peter 	ill_t *ill, *bestill = NULL;
    655   8485      Peter 
    656   8485      Peter 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    657   8485      Peter 
    658   8485      Peter 	ill = list_head(&illg->ig_actif);
    659   8485      Peter 	for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) {
    660   8485      Peter 		if (bestill == NULL ||
    661   8485      Peter 		    ill->ill_bound_cnt > bestill->ill_bound_cnt) {
    662   8485      Peter 			bestill = ill;
    663   8485      Peter 		}
    664   8485      Peter 	}
    665   8485      Peter 	return (bestill);
    666   8485      Peter }
    667   8485      Peter 
    668   8485      Peter /*
    669   8485      Peter  * Return the active ill with the fewest number of data addresses in `illg'.
    670   8485      Peter  */
    671   8485      Peter static ill_t *
    672   8485      Peter ipmp_illgrp_min_ill(ipmp_illgrp_t *illg)
    673   8485      Peter {
    674   8485      Peter 	ill_t *ill, *bestill = NULL;
    675   8485      Peter 
    676   8485      Peter 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    677   8485      Peter 
    678   8485      Peter 	ill = list_head(&illg->ig_actif);
    679   8485      Peter 	for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) {
    680   8485      Peter 		if (bestill == NULL ||
    681   8485      Peter 		    ill->ill_bound_cnt < bestill->ill_bound_cnt) {
    682   8485      Peter 			if (ill->ill_bound_cnt == 0)
    683   8485      Peter 				return (ill);	 /* can't get better */
    684   8485      Peter 			bestill = ill;
    685   8485      Peter 		}
    686   8485      Peter 	}
    687   8485      Peter 	return (bestill);
    688   8485      Peter }
    689   8485      Peter 
    690   8485      Peter /*
    691   8485      Peter  * Return a pointer to IPMP meta-interface for `illg' (which must exist).
    692   8485      Peter  * Since ig_ipmp_ill never changes for a given illg, no locks are needed.
    693   8485      Peter  */
    694   8485      Peter ill_t *
    695   8485      Peter ipmp_illgrp_ipmp_ill(ipmp_illgrp_t *illg)
    696   8485      Peter {
    697   8485      Peter 	return (illg->ig_ipmp_ill);
    698   8485      Peter }
    699   8485      Peter 
    700   8485      Peter /*
    701   8485      Peter  * Return a pointer to the next available underlying ill in `illg', or NULL if
    702   8485      Peter  * one doesn't exist.  Caller must be inside the IPSQ.
    703   8485      Peter  */
    704   8485      Peter ill_t *
    705   8485      Peter ipmp_illgrp_next_ill(ipmp_illgrp_t *illg)
    706   8485      Peter {
    707   8485      Peter 	ill_t *ill;
    708   8485      Peter 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    709   8485      Peter 
    710   8485      Peter 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    711   8485      Peter 
    712   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
    713   8485      Peter 	if ((ill = illg->ig_next_ill) != NULL) {
    714   8485      Peter 		illg->ig_next_ill = list_next(&illg->ig_actif, ill);
    715   8485      Peter 		if (illg->ig_next_ill == NULL)
    716   8485      Peter 			illg->ig_next_ill = list_head(&illg->ig_actif);
    717   8485      Peter 	}
    718   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
    719   8485      Peter 
    720   8485      Peter 	return (ill);
    721   8485      Peter }
    722   8485      Peter 
    723   8485      Peter /*
    724   8485      Peter  * Return a held pointer to the next available underlying ill in `illg', or
    725   8485      Peter  * NULL if one doesn't exist.  Caller need not be inside the IPSQ.
    726   8485      Peter  */
    727   8485      Peter ill_t *
    728   8485      Peter ipmp_illgrp_hold_next_ill(ipmp_illgrp_t *illg)
    729   8485      Peter {
    730   8485      Peter 	ill_t *ill;
    731   8485      Peter 	uint_t i;
    732   8485      Peter 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    733   8485      Peter 
    734   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
    735   8485      Peter 	for (i = 0; i < illg->ig_nactif; i++) {
    736   8485      Peter 		ill = illg->ig_next_ill;
    737   8485      Peter 		illg->ig_next_ill = list_next(&illg->ig_actif, ill);
    738   8485      Peter 		if (illg->ig_next_ill == NULL)
    739   8485      Peter 			illg->ig_next_ill = list_head(&illg->ig_actif);
    740   8485      Peter 
    741  11042       Erik 		if (ill_check_and_refhold(ill)) {
    742   8485      Peter 			rw_exit(&ipst->ips_ipmp_lock);
    743   8485      Peter 			return (ill);
    744   8485      Peter 		}
    745   8485      Peter 	}
    746   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
    747   8485      Peter 
    748   8485      Peter 	return (NULL);
    749   8485      Peter }
    750   8485      Peter 
    751   8485      Peter /*
    752   8485      Peter  * Return a held pointer to the nominated multicast ill in `illg', or NULL if
    753   8485      Peter  * one doesn't exist.  Caller need not be inside the IPSQ.
    754   8485      Peter  */
    755   8485      Peter ill_t *
    756   8485      Peter ipmp_illgrp_hold_cast_ill(ipmp_illgrp_t *illg)
    757   8485      Peter {
    758   8485      Peter 	ill_t *castill;
    759   8485      Peter 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    760   8485      Peter 
    761   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
    762   8485      Peter 	castill = illg->ig_cast_ill;
    763  11042       Erik 	if (castill != NULL && ill_check_and_refhold(castill)) {
    764   8485      Peter 		rw_exit(&ipst->ips_ipmp_lock);
    765   8485      Peter 		return (castill);
    766   8485      Peter 	}
    767   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
    768   8485      Peter 	return (NULL);
    769  11042       Erik }
    770  11042       Erik 
    771  11042       Erik /*
    772  11042       Erik  * Callback routine for ncec_walk() that deletes `nce' if it is associated with
    773  11042       Erik  * the `(ill_t *)arg' and it is not one of the local addresses.  Caller must be
    774  11042       Erik  * inside the IPSQ.
    775  11042       Erik  */
    776  11042       Erik static void
    777  11042       Erik ipmp_ncec_delete_nonlocal(ncec_t *ncec, uchar_t *arg)
    778  11042       Erik {
    779  11042       Erik 	if ((ncec != NULL) && !NCE_MYADDR(ncec) &&
    780  11042       Erik 	    ncec->ncec_ill == (ill_t *)arg) {
    781  11042       Erik 		ncec_delete(ncec);
    782  11042       Erik 	}
    783   8485      Peter }
    784   8485      Peter 
    785   8485      Peter /*
    786   8485      Peter  * Set the nominated cast ill on `illg' to `castill'.  If `castill' is NULL,
    787   8485      Peter  * any existing nomination is removed.  Caller must be inside the IPSQ.
    788   8485      Peter  */
    789   8485      Peter static void
    790   8485      Peter ipmp_illgrp_set_cast(ipmp_illgrp_t *illg, ill_t *castill)
    791   8485      Peter {
    792   8485      Peter 	ill_t *ocastill = illg->ig_cast_ill;
    793   8485      Peter 	ill_t *ipmp_ill = illg->ig_ipmp_ill;
    794   8485      Peter 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    795   8485      Peter 
    796   8485      Peter 	ASSERT(IAM_WRITER_ILL(ipmp_ill));
    797   8485      Peter 
    798   8485      Peter 	/*
    799   8485      Peter 	 * Disable old nominated ill (if any).
    800   8485      Peter 	 */
    801   8485      Peter 	if (ocastill != NULL) {
    802   8485      Peter 		DTRACE_PROBE2(ipmp__illgrp__cast__disable, ipmp_illgrp_t *,
    803   8485      Peter 		    illg, ill_t *, ocastill);
    804   8485      Peter 		ASSERT(ocastill->ill_nom_cast);
    805   8485      Peter 		ocastill->ill_nom_cast = B_FALSE;
    806   8485      Peter 		/*
    807   8485      Peter 		 * If the IPMP meta-interface is down, we never did the join,
    808   8485      Peter 		 * so we must not try to leave.
    809   8485      Peter 		 */
    810   8485      Peter 		if (ipmp_ill->ill_dl_up)
    811   8485      Peter 			ill_leave_multicast(ipmp_ill);
    812  11042       Erik 
    813  11042       Erik 		/*
    814  11042       Erik 		 * Delete any NCEs tied to the old nomination.  We must do this
    815  11042       Erik 		 * last since ill_leave_multicast() may trigger IREs to be
    816  11042       Erik 		 * built using ig_cast_ill.
    817  11042       Erik 		 */
    818  11042       Erik 		ncec_walk(ocastill, (pfi_t)ipmp_ncec_delete_nonlocal, ocastill,
    819  11042       Erik 		    ocastill->ill_ipst);
    820   8485      Peter 	}
    821   8485      Peter 
    822   8485      Peter 	/*
    823   8485      Peter 	 * Set new nomination.
    824   8485      Peter 	 */
    825   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
    826   8485      Peter 	illg->ig_cast_ill = castill;
    827   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
    828   8485      Peter 
    829   8485      Peter 	/*
    830   8485      Peter 	 * Enable new nominated ill (if any).
    831   8485      Peter 	 */
    832   8485      Peter 	if (castill != NULL) {
    833   8485      Peter 		DTRACE_PROBE2(ipmp__illgrp__cast__enable, ipmp_illgrp_t *,
    834   8485      Peter 		    illg, ill_t *, castill);
    835   8485      Peter 		ASSERT(!castill->ill_nom_cast);
    836   8485      Peter 		castill->ill_nom_cast = B_TRUE;
    837   8485      Peter 		/*
    838   8485      Peter 		 * If the IPMP meta-interface is down, the attempt to recover
    839   8485      Peter 		 * will silently fail but ill_need_recover_multicast will be
    840   8485      Peter 		 * erroneously cleared -- so check first.
    841   8485      Peter 		 */
    842   8485      Peter 		if (ipmp_ill->ill_dl_up)
    843   8485      Peter 			ill_recover_multicast(ipmp_ill);
    844   8485      Peter 	}
    845   8485      Peter }
    846   8485      Peter 
    847   8485      Peter /*
    848   8485      Peter  * Create an IPMP ARP entry and add it to the set tracked on `illg'.  If an
    849   8485      Peter  * entry for the same IP address already exists, destroy it first.  Return the
    850   8485      Peter  * created IPMP ARP entry, or NULL on failure.
    851   8485      Peter  */
    852   8485      Peter ipmp_arpent_t *
    853  11042       Erik ipmp_illgrp_create_arpent(ipmp_illgrp_t *illg, boolean_t proxyarp,
    854  11042       Erik     ipaddr_t ipaddr, uchar_t *lladdr, size_t lladdr_len, uint16_t flags)
    855   8485      Peter {
    856   8485      Peter 	ipmp_arpent_t *entp, *oentp;
    857   8485      Peter 
    858   8485      Peter 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    859   8485      Peter 
    860  11042       Erik 	if ((entp = kmem_alloc(sizeof (ipmp_arpent_t) + lladdr_len,
    861  11042       Erik 	    KM_NOSLEEP)) == NULL)
    862   8485      Peter 		return (NULL);
    863   8485      Peter 
    864  11042       Erik 	/*
    865  11042       Erik 	 * Delete any existing ARP entry for this address.
    866  11042       Erik 	 */
    867   8485      Peter 	if ((oentp = ipmp_illgrp_lookup_arpent(illg, &entp->ia_ipaddr)) != NULL)
    868   8485      Peter 		ipmp_illgrp_destroy_arpent(illg, oentp);
    869   8485      Peter 
    870  11042       Erik 	/*
    871  11042       Erik 	 * Prepend the new entry.
    872  11042       Erik 	 */
    873  11042       Erik 	entp->ia_ipaddr = ipaddr;
    874  11042       Erik 	entp->ia_flags = flags;
    875  11042       Erik 	entp->ia_lladdr_len = lladdr_len;
    876  11042       Erik 	entp->ia_lladdr = (uchar_t *)&entp[1];
    877  11042       Erik 	bcopy(lladdr, entp->ia_lladdr, lladdr_len);
    878  11042       Erik 	entp->ia_proxyarp = proxyarp;
    879  11042       Erik 	entp->ia_notified = B_TRUE;
    880   8485      Peter 	list_insert_head(&illg->ig_arpent, entp);
    881   8485      Peter 	return (entp);
    882   8485      Peter }
    883   8485      Peter 
    884   8485      Peter /*
    885   8485      Peter  * Remove IPMP ARP entry `entp' from the set tracked on `illg' and destroy it.
    886   8485      Peter  */
    887   8485      Peter void
    888   8485      Peter ipmp_illgrp_destroy_arpent(ipmp_illgrp_t *illg, ipmp_arpent_t *entp)
    889   8485      Peter {
    890   8485      Peter 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    891   8485      Peter 
    892   8485      Peter 	list_remove(&illg->ig_arpent, entp);
    893  11042       Erik 	kmem_free(entp, sizeof (ipmp_arpent_t) + entp->ia_lladdr_len);
    894   8485      Peter }
    895   8485      Peter 
    896   8485      Peter /*
    897   8485      Peter  * Mark that ARP has been notified about the IP address on `entp'; `illg' is
    898   8485      Peter  * taken as a debugging aid for DTrace FBT probes.
    899   8485      Peter  */
    900   8485      Peter /* ARGSUSED */
    901   8485      Peter void
    902   8485      Peter ipmp_illgrp_mark_arpent(ipmp_illgrp_t *illg, ipmp_arpent_t *entp)
    903   8485      Peter {
    904   8485      Peter 	entp->ia_notified = B_TRUE;
    905   8485      Peter }
    906   8485      Peter 
    907   8485      Peter /*
    908   8485      Peter  * Look up the IPMP ARP entry for IP address `addrp' on `illg'; if `addrp' is
    909   8485      Peter  * NULL, any IPMP ARP entry is requested.  Return NULL if it does not exist.
    910   8485      Peter  */
    911   8485      Peter ipmp_arpent_t *
    912   8485      Peter ipmp_illgrp_lookup_arpent(ipmp_illgrp_t *illg, ipaddr_t *addrp)
    913   8485      Peter {
    914   8485      Peter 	ipmp_arpent_t *entp = list_head(&illg->ig_arpent);
    915   8485      Peter 
    916   8485      Peter 	ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
    917   8485      Peter 
    918   8485      Peter 	if (addrp == NULL)
    919   8485      Peter 		return (entp);
    920   8485      Peter 
    921   8485      Peter 	for (; entp != NULL; entp = list_next(&illg->ig_arpent, entp))
    922   8485      Peter 		if (entp->ia_ipaddr == *addrp)
    923   8485      Peter 			break;
    924   8485      Peter 	return (entp);
    925   8485      Peter }
    926   8485      Peter 
    927   8485      Peter /*
    928   8485      Peter  * Refresh ARP entries on `illg' to be distributed across its active
    929   8485      Peter  * interfaces.  Entries that cannot be refreshed (e.g., because there are no
    930   8485      Peter  * active interfaces) are marked so that subsequent calls can try again.
    931   8485      Peter  */
    932   8485      Peter void
    933   8485      Peter ipmp_illgrp_refresh_arpent(ipmp_illgrp_t *illg)
    934   8485      Peter {
    935   8485      Peter 	ill_t *ill, *ipmp_ill = illg->ig_ipmp_ill;
    936   8485      Peter 	uint_t paddrlen = ipmp_ill->ill_phys_addr_length;
    937   8485      Peter 	ipmp_arpent_t *entp;
    938  11042       Erik 	ncec_t *ncec;
    939  11042       Erik 	nce_t  *nce;
    940   8485      Peter 
    941   8485      Peter 	ASSERT(IAM_WRITER_ILL(ipmp_ill));
    942   8485      Peter 	ASSERT(!ipmp_ill->ill_isv6);
    943   8485      Peter 
    944   8485      Peter 	ill = list_head(&illg->ig_actif);
    945   8485      Peter 	entp = list_head(&illg->ig_arpent);
    946   8485      Peter 	for (; entp != NULL; entp = list_next(&illg->ig_arpent, entp)) {
    947   8485      Peter 		if (ill == NULL || ipmp_ill->ill_ipif_up_count == 0) {
    948   8485      Peter 			entp->ia_notified = B_FALSE;
    949   8485      Peter 			continue;
    950   8485      Peter 		}
    951   8485      Peter 
    952   8485      Peter 		ASSERT(paddrlen == ill->ill_phys_addr_length);
    953   8485      Peter 
    954   8485      Peter 		/*
    955   8485      Peter 		 * If this is a proxy ARP entry, we can skip notifying ARP if
    956   8485      Peter 		 * the entry is already up-to-date.  If it has changed, we
    957   8485      Peter 		 * update the entry's hardware address before notifying ARP.
    958   8485      Peter 		 */
    959   8485      Peter 		if (entp->ia_proxyarp) {
    960  11042       Erik 			if (bcmp(ill->ill_phys_addr, entp->ia_lladdr,
    961  11042       Erik 			    paddrlen) == 0 && entp->ia_notified)
    962   8485      Peter 				continue;
    963  11042       Erik 			bcopy(ill->ill_phys_addr, entp->ia_lladdr, paddrlen);
    964   8485      Peter 		}
    965   8485      Peter 
    966  11042       Erik 		(void) nce_lookup_then_add_v4(ipmp_ill, entp->ia_lladdr,
    967  11042       Erik 		    paddrlen, &entp->ia_ipaddr, entp->ia_flags, ND_UNCHANGED,
    968  11042       Erik 		    &nce);
    969  11042       Erik 		if (nce == NULL || !entp->ia_proxyarp) {
    970  11042       Erik 			if (nce != NULL)
    971  11042       Erik 				nce_refrele(nce);
    972   8485      Peter 			continue;
    973   8485      Peter 		}
    974  11042       Erik 		ncec = nce->nce_common;
    975  11042       Erik 		mutex_enter(&ncec->ncec_lock);
    976  11042       Erik 		nce_update(ncec, ND_UNCHANGED, ill->ill_phys_addr);
    977  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    978  11042       Erik 		nce_refrele(nce);
    979   8485      Peter 		ipmp_illgrp_mark_arpent(illg, entp);
    980   8485      Peter 
    981   8485      Peter 		if ((ill = list_next(&illg->ig_actif, ill)) == NULL)
    982   8485      Peter 			ill = list_head(&illg->ig_actif);
    983   8485      Peter 	}
    984   8485      Peter }
    985   8485      Peter 
    986   8485      Peter /*
    987   8485      Peter  * Return an interface in `illg' with the specified `physaddr', or NULL if one
    988   8485      Peter  * doesn't exist.  Caller must hold ill_g_lock if it's not inside the IPSQ.
    989   8485      Peter  */
    990   8485      Peter ill_t *
    991   8485      Peter ipmp_illgrp_find_ill(ipmp_illgrp_t *illg, uchar_t *physaddr, uint_t paddrlen)
    992   8485      Peter {
    993   8485      Peter 	ill_t *ill;
    994   8485      Peter 	ill_t *ipmp_ill = illg->ig_ipmp_ill;
    995   8485      Peter 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
    996   8485      Peter 
    997   8485      Peter 	ASSERT(IAM_WRITER_ILL(ipmp_ill) || RW_LOCK_HELD(&ipst->ips_ill_g_lock));
    998   8485      Peter 
    999   8485      Peter 	ill = list_head(&illg->ig_if);
   1000   8485      Peter 	for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) {
   1001   8485      Peter 		if (ill->ill_phys_addr_length == paddrlen &&
   1002   8485      Peter 		    bcmp(ill->ill_phys_addr, physaddr, paddrlen) == 0)
   1003   8485      Peter 			return (ill);
   1004   8485      Peter 	}
   1005   8485      Peter 	return (NULL);
   1006   8485      Peter }
   1007   8485      Peter 
   1008   8485      Peter /*
   1009   8485      Peter  * Asynchronously update the MTU for an IPMP ill by injecting a DL_NOTIFY_IND.
   1010   8485      Peter  * Caller must be inside the IPSQ unless this is initialization.
   1011   8485      Peter  */
   1012   8485      Peter static void
   1013   8485      Peter ipmp_illgrp_set_mtu(ipmp_illgrp_t *illg, uint_t mtu)
   1014   8485      Peter {
   1015   8485      Peter 	ill_t *ill = illg->ig_ipmp_ill;
   1016   8485      Peter 	mblk_t *mp;
   1017   8485      Peter 
   1018   8485      Peter 	ASSERT(illg->ig_mtu == 0 || IAM_WRITER_ILL(ill));
   1019   8485      Peter 
   1020   8485      Peter 	/*
   1021   8485      Peter 	 * If allocation fails, we have bigger problems than MTU.
   1022   8485      Peter 	 */
   1023   8485      Peter 	if ((mp = ip_dlnotify_alloc(DL_NOTE_SDU_SIZE, mtu)) != NULL) {
   1024   8485      Peter 		illg->ig_mtu = mtu;
   1025   8485      Peter 		put(ill->ill_rq, mp);
   1026   8485      Peter 	}
   1027   8485      Peter }
   1028   8485      Peter 
   1029   8485      Peter /*
   1030   8485      Peter  * Recalculate the IPMP group MTU for `illg', and update its associated IPMP
   1031   8485      Peter  * ill MTU if necessary.
   1032   8485      Peter  */
   1033   8485      Peter void
   1034   8485      Peter ipmp_illgrp_refresh_mtu(ipmp_illgrp_t *illg)
   1035   8485      Peter {
   1036   8485      Peter 	ill_t *ill;
   1037   8485      Peter 	ill_t *ipmp_ill = illg->ig_ipmp_ill;
   1038   8485      Peter 	uint_t mtu = 0;
   1039   8485      Peter 
   1040   8485      Peter 	ASSERT(IAM_WRITER_ILL(ipmp_ill));
   1041   8485      Peter 
   1042   8485      Peter 	/*
   1043  11042       Erik 	 * Since ill_mtu can only change under ill_lock, we hold ill_lock
   1044   8485      Peter 	 * for each ill as we iterate through the list.  Any changes to the
   1045  11042       Erik 	 * ill_mtu will also trigger an update, so even if we missed it
   1046   8485      Peter 	 * this time around, the update will catch it.
   1047   8485      Peter 	 */
   1048   8485      Peter 	ill = list_head(&illg->ig_if);
   1049   8485      Peter 	for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) {
   1050   8485      Peter 		mutex_enter(&ill->ill_lock);
   1051  11042       Erik 		if (mtu == 0 || ill->ill_mtu < mtu)
   1052  11042       Erik 			mtu = ill->ill_mtu;
   1053   8485      Peter 		mutex_exit(&ill->ill_lock);
   1054   8485      Peter 	}
   1055   8485      Peter 
   1056   8485      Peter 	/*
   1057   8485      Peter 	 * MTU must be at least the minimum MTU.
   1058   8485      Peter 	 */
   1059   8485      Peter 	mtu = MAX(mtu, ipmp_ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU);
   1060   8485      Peter 
   1061   8485      Peter 	if (illg->ig_mtu != mtu)
   1062   8485      Peter 		ipmp_illgrp_set_mtu(illg, mtu);
   1063   8485      Peter }
   1064   8485      Peter 
   1065   8485      Peter /*
   1066   8485      Peter  * Link illgrp `illg' to IPMP group `grp'.  To simplify the caller, silently
   1067   8485      Peter  * allow the same link to be established more than once.
   1068   8485      Peter  */
   1069   8485      Peter void
   1070   8485      Peter ipmp_illgrp_link_grp(ipmp_illgrp_t *illg, ipmp_grp_t *grp)
   1071   8485      Peter {
   1072   8485      Peter 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
   1073   8485      Peter 
   1074   8485      Peter 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
   1075   8485      Peter 
   1076   8485      Peter 	if (illg->ig_ipmp_ill->ill_isv6) {
   1077   8485      Peter 		ASSERT(grp->gr_v6 == NULL || grp->gr_v6 == illg);
   1078   8485      Peter 		grp->gr_v6 = illg;
   1079   8485      Peter 	} else {
   1080   8485      Peter 		ASSERT(grp->gr_v4 == NULL || grp->gr_v4 == illg);
   1081   8485      Peter 		grp->gr_v4 = illg;
   1082   8485      Peter 	}
   1083   8485      Peter }
   1084   8485      Peter 
   1085   8485      Peter /*
   1086   8485      Peter  * Unlink illgrp `illg' from its IPMP group.  Return an errno if the illgrp
   1087   8485      Peter  * cannot be unlinked (e.g., because there are still interfaces using it).
   1088   8485      Peter  */
   1089   8485      Peter int
   1090   8485      Peter ipmp_illgrp_unlink_grp(ipmp_illgrp_t *illg)
   1091   8485      Peter {
   1092   8485      Peter 	ipmp_grp_t *grp = illg->ig_ipmp_ill->ill_phyint->phyint_grp;
   1093   8485      Peter 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
   1094   8485      Peter 
   1095   8485      Peter 	ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
   1096   8485      Peter 
   1097   8485      Peter 	if (illg->ig_ipmp_ill->ill_isv6) {
   1098   8485      Peter 		if (grp->gr_nv6 + grp->gr_pendv6 != 0)
   1099   8485      Peter 			return (EBUSY);
   1100   8485      Peter 		grp->gr_v6 = NULL;
   1101   8485      Peter 	} else {
   1102   8485      Peter 		if (grp->gr_nv4 + grp->gr_pendv4 != 0)
   1103   8485      Peter 			return (EBUSY);
   1104   8485      Peter 		grp->gr_v4 = NULL;
   1105   8485      Peter 	}
   1106   8485      Peter 	return (0);
   1107   8485      Peter }
   1108   8485      Peter 
   1109   8485      Peter /*
   1110   8485      Peter  * Place `ill' into `illg', and rebalance the data addresses on `illg'
   1111   8485      Peter  * to be spread evenly across the ills now in it.  Also, adjust the IPMP
   1112   8485      Peter  * ill as necessary to account for `ill' (e.g., MTU).
   1113   8485      Peter  */
   1114   8485      Peter void
   1115   8485      Peter ipmp_ill_join_illgrp(ill_t *ill, ipmp_illgrp_t *illg)
   1116   8485      Peter {
   1117   8485      Peter 	ill_t *ipmp_ill;
   1118   8485      Peter 	ipif_t *ipif;
   1119   8485      Peter 	ip_stack_t *ipst = ill->ill_ipst;
   1120   8485      Peter 
   1121   8485      Peter 	/* IS_UNDER_IPMP() requires ill_grp to be non-NULL */
   1122   8485      Peter 	ASSERT(!IS_IPMP(ill) && ill->ill_phyint->phyint_grp != NULL);
   1123   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1124   8485      Peter 	ASSERT(ill->ill_grp == NULL);
   1125   8485      Peter 
   1126   8485      Peter 	ipmp_ill = illg->ig_ipmp_ill;
   1127   8485      Peter 
   1128   8485      Peter 	/*
   1129   8485      Peter 	 * Account for `ill' joining the illgrp.
   1130   8485      Peter 	 */
   1131   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1132   8485      Peter 	if (ill->ill_isv6)
   1133   8485      Peter 		ill->ill_phyint->phyint_grp->gr_nv6++;
   1134   8485      Peter 	else
   1135   8485      Peter 		ill->ill_phyint->phyint_grp->gr_nv4++;
   1136   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1137   8485      Peter 
   1138   8485      Peter 	/*
   1139   8485      Peter 	 * Ensure the ILLF_ROUTER flag remains consistent across the group.
   1140   8485      Peter 	 */
   1141   8485      Peter 	mutex_enter(&ill->ill_lock);
   1142   8485      Peter 	if (ipmp_ill->ill_flags & ILLF_ROUTER)
   1143   8485      Peter 		ill->ill_flags |= ILLF_ROUTER;
   1144   8485      Peter 	else
   1145   8485      Peter 		ill->ill_flags &= ~ILLF_ROUTER;
   1146   8485      Peter 	mutex_exit(&ill->ill_lock);
   1147   8485      Peter 
   1148   8485      Peter 	/*
   1149   8485      Peter 	 * Blow away all multicast memberships that currently exist on `ill'.
   1150   8485      Peter 	 * This may seem odd, but it's consistent with the application view
   1151   8485      Peter 	 * that `ill' no longer exists (e.g., due to ipmp_ill_rtsaddrmsg()).
   1152   8485      Peter 	 */
   1153  11042       Erik 	update_conn_ill(ill, ill->ill_ipst);
   1154   8485      Peter 	if (ill->ill_isv6) {
   1155   8485      Peter 		reset_mrt_ill(ill);
   1156   8485      Peter 	} else {
   1157   8485      Peter 		ipif = ill->ill_ipif;
   1158   8485      Peter 		for (; ipif != NULL; ipif = ipif->ipif_next) {
   1159   8485      Peter 			reset_mrt_vif_ipif(ipif);
   1160   8485      Peter 		}
   1161   8485      Peter 	}
   1162   8485      Peter 	ip_purge_allmulti(ill);
   1163   8485      Peter 
   1164   8485      Peter 	/*
   1165   8485      Peter 	 * Borrow the first ill's ill_phys_addr_length value for the illgrp's
   1166   8485      Peter 	 * physical address length.  All other ills must have the same value,
   1167   8485      Peter 	 * since they are required to all be the same mactype.  Also update
   1168   8485      Peter 	 * the IPMP ill's MTU and CoS marking, if necessary.
   1169   8485      Peter 	 */
   1170   8485      Peter 	if (list_is_empty(&illg->ig_if)) {
   1171   8485      Peter 		ASSERT(ipmp_ill->ill_phys_addr_length == 0);
   1172   8485      Peter 		/*
   1173   8485      Peter 		 * NOTE: we leave ill_phys_addr NULL since the IPMP group
   1174   8485      Peter 		 * doesn't have a physical address.  This means that code must
   1175   8485      Peter 		 * not assume that ill_phys_addr is non-NULL just because
   1176   8485      Peter 		 * ill_phys_addr_length is non-zero.  Likewise for ill_nd_lla.
   1177   8485      Peter 		 */
   1178   8485      Peter 		ipmp_ill->ill_phys_addr_length = ill->ill_phys_addr_length;
   1179   8485      Peter 		ipmp_ill->ill_nd_lla_len = ill->ill_phys_addr_length;
   1180   8485      Peter 		ipmp_ill->ill_type = ill->ill_type;
   1181   8485      Peter 
   1182   8485      Peter 		if (ill->ill_flags & ILLF_COS_ENABLED) {
   1183   8485      Peter 			mutex_enter(&ipmp_ill->ill_lock);
   1184   8485      Peter 			ipmp_ill->ill_flags |= ILLF_COS_ENABLED;
   1185   8485      Peter 			mutex_exit(&ipmp_ill->ill_lock);
   1186   8485      Peter 		}
   1187  11042       Erik 		ipmp_illgrp_set_mtu(illg, ill->ill_mtu);
   1188   8485      Peter 	} else {
   1189   8485      Peter 		ASSERT(ipmp_ill->ill_phys_addr_length ==
   1190   8485      Peter 		    ill->ill_phys_addr_length);
   1191   8485      Peter 		ASSERT(ipmp_ill->ill_type == ill->ill_type);
   1192   8485      Peter 
   1193   8485      Peter 		if (!(ill->ill_flags & ILLF_COS_ENABLED)) {
   1194   8485      Peter 			mutex_enter(&ipmp_ill->ill_lock);
   1195   8485      Peter 			ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED;
   1196   8485      Peter 			mutex_exit(&ipmp_ill->ill_lock);
   1197   8485      Peter 		}
   1198  11042       Erik 		if (illg->ig_mtu > ill->ill_mtu)
   1199  11042       Erik 			ipmp_illgrp_set_mtu(illg, ill->ill_mtu);
   1200   8485      Peter 	}
   1201   8485      Peter 
   1202   8485      Peter 	rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
   1203   8485      Peter 	list_insert_tail(&illg->ig_if, ill);
   1204   8485      Peter 	ill->ill_grp = illg;
   1205   8485      Peter 	rw_exit(&ipst->ips_ill_g_lock);
   1206   8485      Peter 
   1207   8485      Peter 	/*
   1208   8485      Peter 	 * Hide the IREs on `ill' so that we don't accidentally find them when
   1209   8485      Peter 	 * sending data traffic.
   1210   8485      Peter 	 */
   1211   8485      Peter 	ire_walk_ill(MATCH_IRE_ILL, 0, ipmp_ill_ire_mark_testhidden, ill, ill);
   1212   8485      Peter 
   1213   8485      Peter 	ipmp_ill_refresh_active(ill);
   1214   8485      Peter }
   1215   8485      Peter 
   1216   8485      Peter /*
   1217   8485      Peter  * Remove `ill' from its illgrp, and rebalance the data addresses in that
   1218   8485      Peter  * illgrp to be spread evenly across the remaining ills.  Also, adjust the
   1219   8485      Peter  * IPMP ill as necessary now that `ill' is removed (e.g., MTU).
   1220   8485      Peter  */
   1221   8485      Peter void
   1222   8485      Peter ipmp_ill_leave_illgrp(ill_t *ill)
   1223   8485      Peter {
   1224   8485      Peter 	ill_t *ipmp_ill;
   1225   8485      Peter 	ipif_t *ipif;
   1226   8485      Peter 	ipmp_arpent_t *entp;
   1227   8485      Peter 	ipmp_illgrp_t *illg = ill->ill_grp;
   1228   8485      Peter 	ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
   1229   8485      Peter 
   1230   8485      Peter 	ASSERT(IS_UNDER_IPMP(ill));
   1231   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1232   8485      Peter 	ASSERT(illg != NULL);
   1233   8485      Peter 
   1234   8485      Peter 	ipmp_ill = illg->ig_ipmp_ill;
   1235   8485      Peter 
   1236   8485      Peter 	/*
   1237   8485      Peter 	 * Cancel IPMP-specific ill timeouts.
   1238   8485      Peter 	 */
   1239   8485      Peter 	(void) untimeout(ill->ill_refresh_tid);
   1240   8485      Peter 
   1241   8485      Peter 	/*
   1242   8485      Peter 	 * Expose any previously-hidden IREs on `ill'.
   1243   8485      Peter 	 */
   1244   8485      Peter 	ire_walk_ill(MATCH_IRE_ILL, 0, ipmp_ill_ire_clear_testhidden, ill, ill);
   1245   8485      Peter 
   1246   8485      Peter 	/*
   1247   8485      Peter 	 * Ensure the multicast state for each ipif on `ill' is down so that
   1248   8485      Peter 	 * our ipif_multicast_up() (once `ill' leaves the group) will rejoin
   1249   8485      Peter 	 * all eligible groups.
   1250   8485      Peter 	 */
   1251   8485      Peter 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
   1252   8485      Peter 		if (ipif->ipif_flags & IPIF_UP)
   1253   8485      Peter 			ipif_multicast_down(ipif);
   1254   8485      Peter 
   1255   8485      Peter 	/*
   1256   8485      Peter 	 * Account for `ill' leaving the illgrp.
   1257   8485      Peter 	 */
   1258   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1259   8485      Peter 	if (ill->ill_isv6)
   1260   8485      Peter 		ill->ill_phyint->phyint_grp->gr_nv6--;
   1261   8485      Peter 	else
   1262   8485      Peter 		ill->ill_phyint->phyint_grp->gr_nv4--;
   1263   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1264   8485      Peter 
   1265   8485      Peter 	/*
   1266   8485      Peter 	 * Pull `ill' out of the interface lists.
   1267   8485      Peter 	 */
   1268   8485      Peter 	if (list_link_active(&ill->ill_actnode))
   1269   8485      Peter 		ipmp_ill_deactivate(ill);
   1270   8485      Peter 	rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
   1271   8485      Peter 	list_remove(&illg->ig_if, ill);
   1272   8485      Peter 	ill->ill_grp = NULL;
   1273   8485      Peter 	rw_exit(&ipst->ips_ill_g_lock);
   1274   8485      Peter 
   1275   8485      Peter 	/*
   1276   8485      Peter 	 * Re-establish multicast memberships that were previously being
   1277   8485      Peter 	 * handled by the IPMP meta-interface.
   1278   8485      Peter 	 */
   1279   8485      Peter 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
   1280   8485      Peter 		if (ipif->ipif_flags & IPIF_UP)
   1281   8485      Peter 			ipif_multicast_up(ipif);
   1282   8485      Peter 
   1283   8485      Peter 	/*
   1284   8485      Peter 	 * Refresh the group MTU based on the new interface list.
   1285   8485      Peter 	 */
   1286   8485      Peter 	ipmp_illgrp_refresh_mtu(illg);
   1287   8485      Peter 
   1288   8485      Peter 	if (list_is_empty(&illg->ig_if)) {
   1289   8485      Peter 		/*
   1290   8485      Peter 		 * No ills left in the illgrp; we no longer have a physical
   1291   8485      Peter 		 * address length, nor can we support ARP, CoS, or anything
   1292   8485      Peter 		 * else that depends on knowing the link layer type.
   1293   8485      Peter 		 */
   1294   8485      Peter 		while ((entp = ipmp_illgrp_lookup_arpent(illg, NULL)) != NULL)
   1295   8485      Peter 			ipmp_illgrp_destroy_arpent(illg, entp);
   1296   8485      Peter 
   1297   8485      Peter 		ipmp_ill->ill_phys_addr_length = 0;
   1298   8485      Peter 		ipmp_ill->ill_nd_lla_len = 0;
   1299   8485      Peter 		ipmp_ill->ill_type = IFT_OTHER;
   1300   8485      Peter 		mutex_enter(&ipmp_ill->ill_lock);
   1301   8485      Peter 		ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED;
   1302   8485      Peter 		mutex_exit(&ipmp_ill->ill_lock);
   1303   8485      Peter 	} else {
   1304   8485      Peter 		/*
   1305   8485      Peter 		 * If `ill' didn't support CoS, see if it can now be enabled.
   1306   8485      Peter 		 */
   1307   8485      Peter 		if (!(ill->ill_flags & ILLF_COS_ENABLED)) {
   1308   8485      Peter 			ASSERT(!(ipmp_ill->ill_flags & ILLF_COS_ENABLED));
   1309   8485      Peter 
   1310   8485      Peter 			ill = list_head(&illg->ig_if);
   1311   8485      Peter 			do {
   1312   8485      Peter 				if (!(ill->ill_flags & ILLF_COS_ENABLED))
   1313   8485      Peter 					break;
   1314   8485      Peter 			} while ((ill = list_next(&illg->ig_if, ill)) != NULL);
   1315   8485      Peter 
   1316   8485      Peter 			if (ill == NULL) {
   1317   8485      Peter 				mutex_enter(&ipmp_ill->ill_lock);
   1318   8485      Peter 				ipmp_ill->ill_flags |= ILLF_COS_ENABLED;
   1319   8485      Peter 				mutex_exit(&ipmp_ill->ill_lock);
   1320   8485      Peter 			}
   1321   8485      Peter 		}
   1322   8485      Peter 	}
   1323   8485      Peter }
   1324   8485      Peter 
   1325   8485      Peter /*
   1326   8485      Peter  * Check if `ill' should be active, and activate or deactivate if need be.
   1327   8485      Peter  * Return B_FALSE if a refresh was necessary but could not be performed.
   1328   8485      Peter  */
   1329   8485      Peter static boolean_t
   1330   8485      Peter ipmp_ill_try_refresh_active(ill_t *ill)
   1331   8485      Peter {
   1332   8485      Peter 	boolean_t refreshed = B_TRUE;
   1333   8485      Peter 
   1334   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1335   8485      Peter 	ASSERT(IS_UNDER_IPMP(ill));
   1336   8485      Peter 
   1337   8485      Peter 	if (ipmp_ill_is_active(ill)) {
   1338   8485      Peter 		if (!list_link_active(&ill->ill_actnode))
   1339   8485      Peter 			refreshed = ipmp_ill_activate(ill);
   1340   8485      Peter 	} else {
   1341   8485      Peter 		if (list_link_active(&ill->ill_actnode))
   1342   8485      Peter 			ipmp_ill_deactivate(ill);
   1343   8485      Peter 	}
   1344   8485      Peter 
   1345   8485      Peter 	return (refreshed);
   1346   8485      Peter }
   1347   8485      Peter 
   1348   8485      Peter /*
   1349   8485      Peter  * Check if `ill' should be active, and activate or deactivate if need be.
   1350   8485      Peter  * If the refresh fails, schedule a timer to try again later.
   1351   8485      Peter  */
   1352   8485      Peter void
   1353   8485      Peter ipmp_ill_refresh_active(ill_t *ill)
   1354   8485      Peter {
   1355   8485      Peter 	if (!ipmp_ill_try_refresh_active(ill))
   1356   8485      Peter 		ipmp_ill_refresh_active_timer_start(ill);
   1357   8485      Peter }
   1358   8485      Peter 
   1359   8485      Peter /*
   1360   8485      Peter  * Retry ipmp_ill_try_refresh_active() on the ill named by `ill_arg'.
   1361   8485      Peter  */
   1362   8485      Peter static void
   1363   8485      Peter ipmp_ill_refresh_active_timer(void *ill_arg)
   1364   8485      Peter {
   1365   8485      Peter 	ill_t *ill = ill_arg;
   1366   8485      Peter 	boolean_t refreshed = B_FALSE;
   1367   8485      Peter 
   1368   8485      Peter 	/*
   1369   8485      Peter 	 * Clear ill_refresh_tid to indicate that no timeout is pending
   1370   8485      Peter 	 * (another thread could schedule a new timeout while we're still
   1371   8485      Peter 	 * running, but that's harmless).  If the ill is going away, bail.
   1372   8485      Peter 	 */
   1373   8485      Peter 	mutex_enter(&ill->ill_lock);
   1374   8485      Peter 	ill->ill_refresh_tid = 0;
   1375   8485      Peter 	if (ill->ill_state_flags & ILL_CONDEMNED) {
   1376   8485      Peter 		mutex_exit(&ill->ill_lock);
   1377   8485      Peter 		return;
   1378   8485      Peter 	}
   1379   8485      Peter 	mutex_exit(&ill->ill_lock);
   1380   8485      Peter 
   1381   8485      Peter 	if (ipsq_try_enter(NULL, ill, NULL, NULL, NULL, NEW_OP, B_FALSE)) {
   1382   8485      Peter 		refreshed = ipmp_ill_try_refresh_active(ill);
   1383   8485      Peter 		ipsq_exit(ill->ill_phyint->phyint_ipsq);
   1384   8485      Peter 	}
   1385   8485      Peter 
   1386   8485      Peter 	/*
   1387   8485      Peter 	 * If the refresh failed, schedule another attempt.
   1388   8485      Peter 	 */
   1389   8485      Peter 	if (!refreshed)
   1390   8485      Peter 		ipmp_ill_refresh_active_timer_start(ill);
   1391   8485      Peter }
   1392   8485      Peter 
   1393   8485      Peter /*
   1394   8485      Peter  * Retry an ipmp_ill_try_refresh_active() on the ill named by `arg'.
   1395   8485      Peter  */
   1396   8485      Peter static void
   1397   8485      Peter ipmp_ill_refresh_active_timer_start(ill_t *ill)
   1398   8485      Peter {
   1399   8485      Peter 	mutex_enter(&ill->ill_lock);
   1400   8485      Peter 
   1401   8485      Peter 	/*
   1402   8485      Peter 	 * If the ill is going away or a refresh is already scheduled, bail.
   1403   8485      Peter 	 */
   1404   8485      Peter 	if (ill->ill_refresh_tid != 0 ||
   1405   8485      Peter 	    (ill->ill_state_flags & ILL_CONDEMNED)) {
   1406   8485      Peter 		mutex_exit(&ill->ill_lock);
   1407   8485      Peter 		return;
   1408   8485      Peter 	}
   1409   8485      Peter 
   1410   8485      Peter 	ill->ill_refresh_tid = timeout(ipmp_ill_refresh_active_timer, ill,
   1411   8485      Peter 	    SEC_TO_TICK(IPMP_ILL_REFRESH_TIMEOUT));
   1412   8485      Peter 
   1413   8485      Peter 	mutex_exit(&ill->ill_lock);
   1414   8485      Peter }
   1415   8485      Peter 
   1416   8485      Peter /*
   1417   8485      Peter  * Activate `ill' so it will be used to send and receive data traffic.  Return
   1418   8485      Peter  * B_FALSE if `ill' cannot be activated.  Note that we allocate any messages
   1419   8485      Peter  * needed to deactivate `ill' here as well so that deactivation cannot fail.
   1420   8485      Peter  */
   1421   8485      Peter static boolean_t
   1422   8485      Peter ipmp_ill_activate(ill_t *ill)
   1423   8485      Peter {
   1424   8485      Peter 	ipif_t		*ipif;
   1425   8485      Peter 	mblk_t		*linkupmp = NULL, *linkdownmp = NULL;
   1426   8485      Peter 	ipmp_grp_t	*grp = ill->ill_phyint->phyint_grp;
   1427   8485      Peter 	ipmp_illgrp_t	*illg = ill->ill_grp;
   1428   8485      Peter 	ill_t		*maxill;
   1429   8485      Peter 	ip_stack_t	*ipst = IPMP_ILLGRP_TO_IPST(illg);
   1430   8485      Peter 
   1431   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1432   8485      Peter 	ASSERT(IS_UNDER_IPMP(ill));
   1433   8485      Peter 
   1434   8485      Peter 	/*
   1435   8485      Peter 	 * If this will be the first active interface in the group, allocate
   1436   8485      Peter 	 * the link-up and link-down messages.
   1437   8485      Peter 	 */
   1438   8485      Peter 	if (grp->gr_nactif == 0) {
   1439   8485      Peter 		linkupmp = ip_dlnotify_alloc(DL_NOTE_LINK_UP, 0);
   1440   8485      Peter 		linkdownmp = ip_dlnotify_alloc(DL_NOTE_LINK_DOWN, 0);
   1441   8485      Peter 		if (linkupmp == NULL || linkdownmp == NULL)
   1442   8485      Peter 			goto fail;
   1443   8485      Peter 	}
   1444   8485      Peter 
   1445   8485      Peter 	if (list_is_empty(&illg->ig_actif)) {
   1446   8485      Peter 		/*
   1447   8485      Peter 		 * Now that we have an active ill, nominate it for multicast
   1448   8485      Peter 		 * and broadcast duties.  Do this before ipmp_ill_bind_ipif()
   1449   8485      Peter 		 * since that may need to send multicast packets (e.g., IPv6
   1450   8485      Peter 		 * neighbor discovery probes).
   1451   8485      Peter 		 */
   1452   8485      Peter 		ipmp_illgrp_set_cast(illg, ill);
   1453   8485      Peter 
   1454   8485      Peter 		/*
   1455   8485      Peter 		 * This is the first active ill in the illgrp -- add 'em all.
   1456   8485      Peter 		 * We can access/walk ig_ipmp_ill's ipif list since we're
   1457   8485      Peter 		 * writer on its IPSQ as well.
   1458   8485      Peter 		 */
   1459   8485      Peter 		ipif = illg->ig_ipmp_ill->ill_ipif;
   1460   8485      Peter 		for (; ipif != NULL; ipif = ipif->ipif_next)
   1461   8485      Peter 			if (ipmp_ipif_is_up_dataaddr(ipif))
   1462   8485      Peter 				ipmp_ill_bind_ipif(ill, ipif, Res_act_initial);
   1463   8485      Peter 	} else {
   1464   8485      Peter 		/*
   1465   8485      Peter 		 * Redistribute the addresses by moving them from the ill with
   1466   8485      Peter 		 * the most addresses until the ill being activated is at the
   1467   8485      Peter 		 * same level as the rest of the ills.
   1468   8485      Peter 		 */
   1469   8485      Peter 		for (;;) {
   1470   8485      Peter 			maxill = ipmp_illgrp_max_ill(illg);
   1471   8485      Peter 			ASSERT(maxill != NULL);
   1472   8485      Peter 			if (ill->ill_bound_cnt + 1 >= maxill->ill_bound_cnt)
   1473   8485      Peter 				break;
   1474   8485      Peter 			ipif = ipmp_ill_unbind_ipif(maxill, NULL, B_TRUE);
   1475   8485      Peter 			ipmp_ill_bind_ipif(ill, ipif, Res_act_rebind);
   1476   8485      Peter 		}
   1477   8485      Peter 	}
   1478   8485      Peter 
   1479   8485      Peter 	/*
   1480   8485      Peter 	 * Put the interface in the active list.
   1481   8485      Peter 	 */
   1482   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1483   8485      Peter 	list_insert_tail(&illg->ig_actif, ill);
   1484   8485      Peter 	illg->ig_nactif++;
   1485   8485      Peter 	illg->ig_next_ill = ill;
   1486   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1487   8485      Peter 
   1488   8485      Peter 	/*
   1489  11042       Erik 	 * Refresh static/proxy ARP entries to use `ill', if need be.
   1490   8485      Peter 	 */
   1491   8485      Peter 	if (!ill->ill_isv6)
   1492   8485      Peter 		ipmp_illgrp_refresh_arpent(illg);
   1493   8485      Peter 
   1494   8485      Peter 	/*
   1495   8485      Peter 	 * Finally, mark the group link up, if necessary.
   1496   8485      Peter 	 */
   1497   8485      Peter 	if (grp->gr_nactif++ == 0) {
   1498   8485      Peter 		ASSERT(grp->gr_linkdownmp == NULL);
   1499   8485      Peter 		grp->gr_linkdownmp = linkdownmp;
   1500   8485      Peter 		put(illg->ig_ipmp_ill->ill_rq, linkupmp);
   1501   8485      Peter 	}
   1502   8485      Peter 	return (B_TRUE);
   1503   8485      Peter fail:
   1504   8485      Peter 	freemsg(linkupmp);
   1505   8485      Peter 	freemsg(linkdownmp);
   1506   8485      Peter 	return (B_FALSE);
   1507   8485      Peter }
   1508   8485      Peter 
   1509   8485      Peter /*
   1510   8485      Peter  * Deactivate `ill' so it will not be used to send or receive data traffic.
   1511   8485      Peter  */
   1512   8485      Peter static void
   1513   8485      Peter ipmp_ill_deactivate(ill_t *ill)
   1514   8485      Peter {
   1515   8485      Peter 	ill_t		*minill;
   1516   8485      Peter 	ipif_t		*ipif, *ubnextipif, *ubheadipif = NULL;
   1517   8485      Peter 	mblk_t		*mp;
   1518   8485      Peter 	ipmp_grp_t	*grp = ill->ill_phyint->phyint_grp;
   1519   8485      Peter 	ipmp_illgrp_t	*illg = ill->ill_grp;
   1520   8485      Peter 	ip_stack_t	*ipst = IPMP_ILLGRP_TO_IPST(illg);
   1521   8485      Peter 
   1522   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1523   8485      Peter 	ASSERT(IS_UNDER_IPMP(ill));
   1524   8485      Peter 
   1525   8485      Peter 	/*
   1526   8485      Peter 	 * Pull the interface out of the active list.
   1527   8485      Peter 	 */
   1528   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1529   8485      Peter 	list_remove(&illg->ig_actif, ill);
   1530   8485      Peter 	illg->ig_nactif--;
   1531   8485      Peter 	illg->ig_next_ill = list_head(&illg->ig_actif);
   1532   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1533   8485      Peter 
   1534   8485      Peter 	/*
   1535   8485      Peter 	 * If the ill that's being deactivated had been nominated for
   1536   8485      Peter 	 * multicast/broadcast, nominate a new one.
   1537   8485      Peter 	 */
   1538   8485      Peter 	if (ill == illg->ig_cast_ill)
   1539   8485      Peter 		ipmp_illgrp_set_cast(illg, list_head(&illg->ig_actif));
   1540   8485      Peter 
   1541   8485      Peter 	/*
   1542  11042       Erik 	 * Delete all nce_t entries using this ill, so that the next attempt
   1543  11042       Erik 	 * to send data traffic will revalidate cached nce's.
   1544  11042       Erik 	 */
   1545  11042       Erik 	nce_flush(ill, B_TRUE);
   1546  11042       Erik 
   1547  11042       Erik 	/*
   1548   8485      Peter 	 * Unbind all of the ipifs bound to this ill, and save 'em in a list;
   1549   8485      Peter 	 * we'll rebind them after we tell the resolver the ill is no longer
   1550   8485      Peter 	 * active.  We must do things in this order or the resolver could
   1551   8485      Peter 	 * accidentally rebind to the ill we're trying to remove if multiple
   1552   8485      Peter 	 * ills in the group have the same hardware address (which is
   1553   8485      Peter 	 * unsupported, but shouldn't lead to a wedged machine).
   1554   8485      Peter 	 */
   1555   8485      Peter 	while ((ipif = ipmp_ill_unbind_ipif(ill, NULL, B_TRUE)) != NULL) {
   1556   8485      Peter 		ipif->ipif_bound_next = ubheadipif;
   1557   8485      Peter 		ubheadipif = ipif;
   1558   8485      Peter 	}
   1559   8485      Peter 	if (!ill->ill_isv6) {
   1560   8485      Peter 
   1561   8485      Peter 		/*
   1562  11042       Erik 		 * Refresh static/proxy ARP entries that had been using `ill'.
   1563   8485      Peter 		 */
   1564   8485      Peter 		ipmp_illgrp_refresh_arpent(illg);
   1565   8485      Peter 	}
   1566   8485      Peter 
   1567   8485      Peter 	/*
   1568   8485      Peter 	 * Rebind each ipif from the deactivated ill to the active ill with
   1569   8485      Peter 	 * the fewest ipifs.  If there are no active ills, the ipifs will
   1570   8485      Peter 	 * remain unbound.
   1571   8485      Peter 	 */
   1572   8485      Peter 	for (ipif = ubheadipif; ipif != NULL; ipif = ubnextipif) {
   1573   8485      Peter 		ubnextipif = ipif->ipif_bound_next;
   1574   8485      Peter 		ipif->ipif_bound_next = NULL;
   1575   8485      Peter 
   1576   8485      Peter 		if ((minill = ipmp_illgrp_min_ill(illg)) != NULL)
   1577   8485      Peter 			ipmp_ill_bind_ipif(minill, ipif, Res_act_rebind);
   1578   8485      Peter 	}
   1579  11042       Erik 
   1580  11042       Erik 	if (list_is_empty(&illg->ig_actif)) {
   1581  11042       Erik 		ill_t *ipmp_ill = illg->ig_ipmp_ill;
   1582  11042       Erik 
   1583  11042       Erik 		ncec_walk(ipmp_ill, (pfi_t)ncec_delete_per_ill,
   1584  11042       Erik 		    (uchar_t *)ipmp_ill, ipmp_ill->ill_ipst);
   1585  11042       Erik 	}
   1586  11042       Erik 
   1587  11042       Erik 	/*
   1588  11042       Erik 	 * Remove any IRE_IF_CLONE for this ill since they might have
   1589  11042       Erik 	 * an ire_nce_cache/nce_common which refers to another ill in the group.
   1590  11042       Erik 	 */
   1591  11042       Erik 	ire_walk_ill(MATCH_IRE_TYPE, IRE_IF_CLONE, ill_downi_if_clone,
   1592  11042       Erik 	    ill, ill);
   1593   8485      Peter 
   1594   8485      Peter 	/*
   1595   8485      Peter 	 * Finally, mark the group link down, if necessary.
   1596   8485      Peter 	 */
   1597   8485      Peter 	if (--grp->gr_nactif == 0) {
   1598   8485      Peter 		mp = grp->gr_linkdownmp;
   1599   8485      Peter 		grp->gr_linkdownmp = NULL;
   1600   8485      Peter 		ASSERT(mp != NULL);
   1601   8485      Peter 		put(illg->ig_ipmp_ill->ill_rq, mp);
   1602   8485      Peter 	}
   1603   8485      Peter }
   1604   8485      Peter 
   1605   8485      Peter /*
   1606   8485      Peter  * Send the routing socket messages needed to make `ill' "appear" (RTM_ADD)
   1607   8485      Peter  * or "disappear" (RTM_DELETE) to non-IPMP-aware routing socket listeners.
   1608   8485      Peter  */
   1609   8485      Peter static void
   1610   8485      Peter ipmp_ill_rtsaddrmsg(ill_t *ill, int cmd)
   1611   8485      Peter {
   1612   8485      Peter 	ipif_t *ipif;
   1613   8485      Peter 
   1614   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1615   8485      Peter 	ASSERT(cmd == RTM_ADD || cmd == RTM_DELETE);
   1616   8485      Peter 
   1617   8485      Peter 	/*
   1618   8485      Peter 	 * If `ill' is truly down, there are no messages to generate since:
   1619   8485      Peter 	 *
   1620   8485      Peter 	 * 1. If cmd == RTM_DELETE, then we're supposed to hide the interface
   1621   8485      Peter 	 *    and its addresses by bringing them down.  But that's already
   1622   8485      Peter 	 *    true, so there's nothing to hide.
   1623   8485      Peter 	 *
   1624   8485      Peter 	 * 2. If cmd == RTM_ADD, then we're supposed to generate messages
   1625   8485      Peter 	 *    indicating that any previously-hidden up addresses are again
   1626   8485      Peter 	 *    back up (along with the interface).  But they aren't, so
   1627   8485      Peter 	 *    there's nothing to expose.
   1628   8485      Peter 	 */
   1629   8485      Peter 	if (ill->ill_ipif_up_count == 0)
   1630   8485      Peter 		return;
   1631   8485      Peter 
   1632   8485      Peter 	if (cmd == RTM_ADD)
   1633   8485      Peter 		ip_rts_xifmsg(ill->ill_ipif, IPIF_UP, 0, RTSQ_NORMAL);
   1634   8485      Peter 
   1635   8485      Peter 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
   1636   8485      Peter 		if (ipif->ipif_flags & IPIF_UP)
   1637   8485      Peter 			ip_rts_newaddrmsg(cmd, 0, ipif, RTSQ_NORMAL);
   1638   8485      Peter 
   1639   8485      Peter 	if (cmd == RTM_DELETE)
   1640   8485      Peter 		ip_rts_xifmsg(ill->ill_ipif, 0, IPIF_UP, RTSQ_NORMAL);
   1641   8485      Peter }
   1642   8485      Peter 
   1643   8485      Peter /*
   1644   8485      Peter  * Bind the address named by `ipif' to the underlying ill named by `ill'.
   1645   8485      Peter  * If `act' is Res_act_none, don't notify the resolver.  Otherwise, `act'
   1646   8485      Peter  * will indicate to the resolver whether this is an initial bringup of
   1647   8485      Peter  * `ipif', or just a rebind to another ill.
   1648   8485      Peter  */
   1649   8485      Peter static void
   1650   8485      Peter ipmp_ill_bind_ipif(ill_t *ill, ipif_t *ipif, enum ip_resolver_action act)
   1651   8485      Peter {
   1652   8485      Peter 	int err = 0;
   1653   8485      Peter 	ip_stack_t *ipst = ill->ill_ipst;
   1654   8485      Peter 
   1655   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill) && IAM_WRITER_IPIF(ipif));
   1656   8485      Peter 	ASSERT(IS_UNDER_IPMP(ill) && IS_IPMP(ipif->ipif_ill));
   1657   8485      Peter 	ASSERT(act == Res_act_none || ipmp_ipif_is_up_dataaddr(ipif));
   1658   8485      Peter 	ASSERT(ipif->ipif_bound_ill == NULL);
   1659   8485      Peter 	ASSERT(ipif->ipif_bound_next == NULL);
   1660   8485      Peter 
   1661   8485      Peter 	ipif->ipif_bound_next = ill->ill_bound_ipif;
   1662   8485      Peter 	ill->ill_bound_ipif = ipif;
   1663   8485      Peter 	ill->ill_bound_cnt++;
   1664   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1665   8485      Peter 	ipif->ipif_bound_ill = ill;
   1666   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1667   8485      Peter 
   1668   8485      Peter 	/*
   1669   8485      Peter 	 * If necessary, tell ARP/NDP about the new mapping.  Note that
   1670  11042       Erik 	 * ipif_resolver_up() cannot fail for IPv6 ills.
   1671   8485      Peter 	 */
   1672   8485      Peter 	if (act != Res_act_none) {
   1673   8485      Peter 		if (ill->ill_isv6) {
   1674   8485      Peter 			VERIFY(ipif_resolver_up(ipif, act) == 0);
   1675   8485      Peter 			err = ipif_ndp_up(ipif, act == Res_act_initial);
   1676   8485      Peter 		} else {
   1677   8485      Peter 			err = ipif_resolver_up(ipif, act);
   1678   8485      Peter 		}
   1679   8485      Peter 
   1680   8485      Peter 		/*
   1681   8485      Peter 		 * Since ipif_ndp_up() never returns EINPROGRESS and
   1682   8485      Peter 		 * ipif_resolver_up() only returns EINPROGRESS when the
   1683   8485      Peter 		 * associated ill is not up, we should never be here with
   1684   8485      Peter 		 * EINPROGRESS.  We rely on this to simplify the design.
   1685   8485      Peter 		 */
   1686   8485      Peter 		ASSERT(err != EINPROGRESS);
   1687   8485      Peter 	}
   1688   8485      Peter 	/* TODO: retry binding on failure? when? */
   1689   8485      Peter 	ipif->ipif_bound = (err == 0);
   1690   8485      Peter }
   1691   8485      Peter 
   1692   8485      Peter /*
   1693   8485      Peter  * Unbind the address named by `ipif' from the underlying ill named by `ill'.
   1694   8485      Peter  * If `ipif' is NULL, then an arbitrary ipif on `ill' is unbound and returned.
   1695   8485      Peter  * If no ipifs are bound to `ill', NULL is returned.  If `notifyres' is
   1696   8485      Peter  * B_TRUE, notify the resolver about the change.
   1697   8485      Peter  */
   1698   8485      Peter static ipif_t *
   1699   8485      Peter ipmp_ill_unbind_ipif(ill_t *ill, ipif_t *ipif, boolean_t notifyres)
   1700   8485      Peter {
   1701   8485      Peter 	ipif_t *previpif;
   1702   8485      Peter 	ip_stack_t *ipst = ill->ill_ipst;
   1703   8485      Peter 
   1704   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1705   8485      Peter 	ASSERT(IS_UNDER_IPMP(ill));
   1706   8485      Peter 
   1707   8485      Peter 	/*
   1708   8485      Peter 	 * If necessary, find an ipif to unbind.
   1709   8485      Peter 	 */
   1710   8485      Peter 	if (ipif == NULL) {
   1711   8485      Peter 		if ((ipif = ill->ill_bound_ipif) == NULL) {
   1712   8485      Peter 			ASSERT(ill->ill_bound_cnt == 0);
   1713   8485      Peter 			return (NULL);
   1714   8485      Peter 		}
   1715   8485      Peter 	}
   1716   8485      Peter 
   1717   8485      Peter 	ASSERT(IAM_WRITER_IPIF(ipif));
   1718   8485      Peter 	ASSERT(IS_IPMP(ipif->ipif_ill));
   1719   8485      Peter 	ASSERT(ipif->ipif_bound_ill == ill);
   1720   8485      Peter 	ASSERT(ill->ill_bound_cnt > 0);
   1721   8485      Peter 
   1722   8485      Peter 	/*
   1723   8485      Peter 	 * Unbind it.
   1724   8485      Peter 	 */
   1725   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1726   8485      Peter 	ipif->ipif_bound_ill = NULL;
   1727   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1728   8485      Peter 	ill->ill_bound_cnt--;
   1729   8485      Peter 
   1730   8485      Peter 	if (ill->ill_bound_ipif == ipif) {
   1731   8485      Peter 		ill->ill_bound_ipif = ipif->ipif_bound_next;
   1732   8485      Peter 	} else {
   1733   8485      Peter 		previpif = ill->ill_bound_ipif;
   1734   8485      Peter 		while (previpif->ipif_bound_next != ipif)
   1735   8485      Peter 			previpif = previpif->ipif_bound_next;
   1736   8485      Peter 
   1737   8485      Peter 		previpif->ipif_bound_next = ipif->ipif_bound_next;
   1738   8485      Peter 	}
   1739   8485      Peter 	ipif->ipif_bound_next = NULL;
   1740   8485      Peter 
   1741   8485      Peter 	/*
   1742   8485      Peter 	 * If requested, notify the resolvers (provided we're bound).
   1743   8485      Peter 	 */
   1744   8485      Peter 	if (notifyres && ipif->ipif_bound) {
   1745  11042       Erik 		if (ill->ill_isv6)
   1746   8485      Peter 			ipif_ndp_down(ipif);
   1747  11042       Erik 		else
   1748  11042       Erik 			(void) ipif_arp_down(ipif);
   1749   8485      Peter 	}
   1750   8485      Peter 	ipif->ipif_bound = B_FALSE;
   1751   8485      Peter 
   1752   8485      Peter 	return (ipif);
   1753   8485      Peter }
   1754   8485      Peter 
   1755   8485      Peter /*
   1756   8485      Peter  * Check if `ill' is active.  Caller must hold ill_lock and phyint_lock if
   1757   8485      Peter  * it's not inside the IPSQ.  Since ipmp_ill_try_refresh_active() calls this
   1758   8485      Peter  * to determine whether an ill should be considered active, other consumers
   1759   8485      Peter  * may race and learn about an ill that should be deactivated/activated before
   1760   8485      Peter  * IPMP has performed the activation/deactivation.  This should be safe though
   1761   8485      Peter  * since at worst e.g. ire_atomic_start() will prematurely delete an IRE that
   1762   8485      Peter  * would've been cleaned up by ipmp_ill_deactivate().
   1763   8485      Peter  */
   1764   8485      Peter boolean_t
   1765   8485      Peter ipmp_ill_is_active(ill_t *ill)
   1766   8485      Peter {
   1767   8485      Peter 	phyint_t *phyi = ill->ill_phyint;
   1768   8485      Peter 
   1769   8485      Peter 	ASSERT(IS_UNDER_IPMP(ill));
   1770   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill) ||
   1771   8485      Peter 	    (MUTEX_HELD(&ill->ill_lock) && MUTEX_HELD(&phyi->phyint_lock)));
   1772   8485      Peter 
   1773   8485      Peter 	/*
   1774   8485      Peter 	 * Note that PHYI_RUNNING isn't checked since we rely on in.mpathd to
   1775   8485      Peter 	 * set PHYI_FAILED whenever PHYI_RUNNING is cleared.  This allows the
   1776   8485      Peter 	 * link flapping logic to be just in in.mpathd and allows us to ignore
   1777   8485      Peter 	 * changes to PHYI_RUNNING.
   1778   8485      Peter 	 */
   1779   8485      Peter 	return (!(ill->ill_ipif_up_count == 0 ||
   1780   8485      Peter 	    (phyi->phyint_flags & (PHYI_OFFLINE|PHYI_INACTIVE|PHYI_FAILED))));
   1781   8485      Peter }
   1782   8485      Peter 
   1783   8485      Peter /*
   1784  11042       Erik  * IRE walker callback: set ire_testhidden on IRE_HIDDEN_TYPE IREs associated
   1785  11042       Erik  * with `ill_arg'.
   1786   8485      Peter  */
   1787   8485      Peter static void
   1788   8485      Peter ipmp_ill_ire_mark_testhidden(ire_t *ire, char *ill_arg)
   1789   8485      Peter {
   1790   8485      Peter 	ill_t *ill = (ill_t *)ill_arg;
   1791   8485      Peter 
   1792   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1793   8485      Peter 	ASSERT(!IS_IPMP(ill));
   1794   8485      Peter 
   1795  11042       Erik 	if (ire->ire_ill != ill)
   1796   8485      Peter 		return;
   1797   8485      Peter 
   1798  11042       Erik 	if (IRE_HIDDEN_TYPE(ire->ire_type)) {
   1799   8485      Peter 		DTRACE_PROBE1(ipmp__mark__testhidden, ire_t *, ire);
   1800  11042       Erik 		ire->ire_testhidden = B_TRUE;
   1801   8485      Peter 	}
   1802   8485      Peter }
   1803   8485      Peter 
   1804   8485      Peter /*
   1805  11042       Erik  * IRE walker callback: clear ire_testhidden if the IRE has a source address
   1806  11042       Erik  * on `ill_arg'.
   1807   8485      Peter  */
   1808   8485      Peter static void
   1809   8485      Peter ipmp_ill_ire_clear_testhidden(ire_t *ire, char *ill_arg)
   1810   8485      Peter {
   1811   8485      Peter 	ill_t *ill = (ill_t *)ill_arg;
   1812   8485      Peter 
   1813   8485      Peter 	ASSERT(IAM_WRITER_ILL(ill));
   1814   8485      Peter 	ASSERT(!IS_IPMP(ill));
   1815   8485      Peter 
   1816  11042       Erik 	if (ire->ire_ill == ill) {
   1817   8485      Peter 		DTRACE_PROBE1(ipmp__clear__testhidden, ire_t *, ire);
   1818  11042       Erik 		ire->ire_testhidden = B_FALSE;
   1819   8485      Peter 	}
   1820   8485      Peter }
   1821   8485      Peter 
   1822   8485      Peter /*
   1823   8485      Peter  * Return a held pointer to the IPMP ill for underlying interface `ill', or
   1824   8485      Peter  * NULL if one doesn't exist.  (Unfortunately, this function needs to take an
   1825   8485      Peter  * underlying ill rather than an ipmp_illgrp_t because an underlying ill's
   1826   8564      Peter  * ill_grp pointer may become stale when not inside an IPSQ and not holding
   1827   8485      Peter  * ipmp_lock.)  Caller need not be inside the IPSQ.
   1828   8485      Peter  */
   1829   8485      Peter ill_t *
   1830   8485      Peter ipmp_ill_hold_ipmp_ill(ill_t *ill)
   1831   8485      Peter {
   1832   8485      Peter 	ip_stack_t *ipst = ill->ill_ipst;
   1833   8485      Peter 	ipmp_illgrp_t *illg;
   1834   8485      Peter 
   1835   8485      Peter 	ASSERT(!IS_IPMP(ill));
   1836   8485      Peter 
   1837   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
   1838   8485      Peter 	illg = ill->ill_grp;
   1839  11042       Erik 	if (illg != NULL && ill_check_and_refhold(illg->ig_ipmp_ill)) {
   1840   8485      Peter 		rw_exit(&ipst->ips_ipmp_lock);
   1841   8485      Peter 		return (illg->ig_ipmp_ill);
   1842   8485      Peter 	}
   1843   8485      Peter 	/*
   1844   8485      Peter 	 * Assume `ill' was removed from the illgrp in the meantime.
   1845   8485      Peter 	 */
   1846   8485      Peter 	rw_exit(&ill->ill_ipst->ips_ipmp_lock);
   1847   8485      Peter 	return (NULL);
   1848   8485      Peter }
   1849   8485      Peter 
   1850   8485      Peter /*
   1851   8485      Peter  * Return the interface index for the IPMP ill tied to underlying interface
   1852   8485      Peter  * `ill', or zero if one doesn't exist.  Caller need not be inside the IPSQ.
   1853   8485      Peter  */
   1854   8485      Peter uint_t
   1855   8485      Peter ipmp_ill_get_ipmp_ifindex(const ill_t *ill)
   1856   8485      Peter {
   1857   8485      Peter 	uint_t ifindex = 0;
   1858   8485      Peter 	ip_stack_t *ipst = ill->ill_ipst;
   1859   8485      Peter 	ipmp_grp_t *grp;
   1860   8485      Peter 
   1861   8485      Peter 	ASSERT(!IS_IPMP(ill));
   1862   8485      Peter 
   1863   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
   1864   8485      Peter 	if ((grp = ill->ill_phyint->phyint_grp) != NULL)
   1865   8485      Peter 		ifindex = grp->gr_phyint->phyint_ifindex;
   1866   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1867   8485      Peter 	return (ifindex);
   1868   8485      Peter }
   1869   8485      Peter 
   1870   8485      Peter /*
   1871   8485      Peter  * Place phyint `phyi' into IPMP group `grp'.
   1872   8485      Peter  */
   1873   8485      Peter void
   1874   8485      Peter ipmp_phyint_join_grp(phyint_t *phyi, ipmp_grp_t *grp)
   1875   8485      Peter {
   1876   8485      Peter 	ill_t *ill;
   1877   8485      Peter 	ipsq_t *ipsq = phyi->phyint_ipsq;
   1878   8485      Peter 	ipsq_t *grp_ipsq = grp->gr_phyint->phyint_ipsq;
   1879   8485      Peter 	ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
   1880   8485      Peter 
   1881   8485      Peter 	ASSERT(IAM_WRITER_IPSQ(ipsq));
   1882   8485      Peter 	ASSERT(phyi->phyint_illv4 != NULL || phyi->phyint_illv6 != NULL);
   1883   8485      Peter 
   1884   8485      Peter 	/*
   1885   8485      Peter 	 * Send routing socket messages indicating that the phyint's ills
   1886   8485      Peter 	 * and ipifs vanished.
   1887   8485      Peter 	 */
   1888   8485      Peter 	if (phyi->phyint_illv4 != NULL) {
   1889   8485      Peter 		ill = phyi->phyint_illv4;
   1890   8485      Peter 		ipmp_ill_rtsaddrmsg(ill, RTM_DELETE);
   1891   8485      Peter 	}
   1892   8485      Peter 
   1893   8485      Peter 	if (phyi->phyint_illv6 != NULL) {
   1894   8485      Peter 		ill = phyi->phyint_illv6;
   1895   8485      Peter 		ipmp_ill_rtsaddrmsg(ill, RTM_DELETE);
   1896   8485      Peter 	}
   1897   8485      Peter 
   1898   8485      Peter 	/*
   1899   8485      Peter 	 * Snapshot the phyint's initial kstats as a baseline.
   1900   8485      Peter 	 */
   1901   8485      Peter 	ipmp_phyint_get_kstats(phyi, phyi->phyint_kstats0);
   1902   8485      Peter 
   1903   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1904   8485      Peter 
   1905   8485      Peter 	phyi->phyint_grp = grp;
   1906   8485      Peter 	if (++grp->gr_nif == 1)
   1907   8485      Peter 		grp->gr_mactype = ill->ill_mactype;
   1908   8485      Peter 	else
   1909   8485      Peter 		ASSERT(grp->gr_mactype == ill->ill_mactype);
   1910   8485      Peter 
   1911   8485      Peter 	/*
   1912   8485      Peter 	 * Now that we're in the group, request a switch to the group's xop
   1913   8485      Peter 	 * when we ipsq_exit().  All future operations will be exclusive on
   1914   8485      Peter 	 * the group xop until ipmp_phyint_leave_grp() is called.
   1915   8485      Peter 	 */
   1916   8485      Peter 	ASSERT(ipsq->ipsq_swxop == NULL);
   1917   8485      Peter 	ASSERT(grp_ipsq->ipsq_xop == &grp_ipsq->ipsq_ownxop);
   1918   8485      Peter 	ipsq->ipsq_swxop = &grp_ipsq->ipsq_ownxop;
   1919   8485      Peter 
   1920   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1921   8485      Peter }
   1922   8485      Peter 
   1923   8485      Peter /*
   1924   8485      Peter  * Remove phyint `phyi' from its current IPMP group.
   1925   8485      Peter  */
   1926   8485      Peter void
   1927   8485      Peter ipmp_phyint_leave_grp(phyint_t *phyi)
   1928   8485      Peter {
   1929   8485      Peter 	uint_t i;
   1930   8485      Peter 	ipsq_t *ipsq = phyi->phyint_ipsq;
   1931   8485      Peter 	ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
   1932   8485      Peter 	uint64_t phyi_kstats[IPMP_KSTAT_MAX];
   1933   8485      Peter 
   1934   8485      Peter 	ASSERT(IAM_WRITER_IPSQ(ipsq));
   1935   8485      Peter 
   1936   8485      Peter 	/*
   1937   8485      Peter 	 * If any of the phyint's ills are still in an illgrp, kick 'em out.
   1938   8485      Peter 	 */
   1939   8485      Peter 	if (phyi->phyint_illv4 != NULL && IS_UNDER_IPMP(phyi->phyint_illv4))
   1940   8485      Peter 		ipmp_ill_leave_illgrp(phyi->phyint_illv4);
   1941   8485      Peter 	if (phyi->phyint_illv6 != NULL && IS_UNDER_IPMP(phyi->phyint_illv6))
   1942   8485      Peter 		ipmp_ill_leave_illgrp(phyi->phyint_illv6);
   1943   8485      Peter 
   1944   8485      Peter 	/*
   1945   8485      Peter 	 * Send routing socket messages indicating that the phyint's ills
   1946   8485      Peter 	 * and ipifs have reappeared.
   1947   8485      Peter 	 */
   1948   8485      Peter 	if (phyi->phyint_illv4 != NULL)
   1949   8485      Peter 		ipmp_ill_rtsaddrmsg(phyi->phyint_illv4, RTM_ADD);
   1950   8485      Peter 	if (phyi->phyint_illv6 != NULL)
   1951   8485      Peter 		ipmp_ill_rtsaddrmsg(phyi->phyint_illv6, RTM_ADD);
   1952   8485      Peter 
   1953   8485      Peter 	/*
   1954   8485      Peter 	 * Calculate the phyint's cumulative kstats while it was in the group,
   1955   8485      Peter 	 * and add that to the group's baseline.
   1956   8485      Peter 	 */
   1957   8485      Peter 	ipmp_phyint_get_kstats(phyi, phyi_kstats);
   1958   8485      Peter 	for (i = 0; i < IPMP_KSTAT_MAX; i++) {
   1959   8485      Peter 		phyi_kstats[i] -= phyi->phyint_kstats0[i];
   1960   8485      Peter 		atomic_add_64(&phyi->phyint_grp->gr_kstats0[i], phyi_kstats[i]);
   1961   8485      Peter 	}
   1962   8485      Peter 
   1963   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
   1964   8485      Peter 
   1965   8485      Peter 	phyi->phyint_grp->gr_nif--;
   1966   8485      Peter 	phyi->phyint_grp = NULL;
   1967   8485      Peter 
   1968   8485      Peter 	/*
   1969   8485      Peter 	 * As our final act in leaving the group, request a switch back to our
   1970   8485      Peter 	 * IPSQ's own xop when we ipsq_exit().
   1971   8485      Peter 	 */
   1972   8485      Peter 	ASSERT(ipsq->ipsq_swxop == NULL);
   1973   8485      Peter 	ipsq->ipsq_swxop = &ipsq->ipsq_ownxop;
   1974   8485      Peter 
   1975   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   1976   8485      Peter }
   1977   8485      Peter 
   1978   8485      Peter /*
   1979   8485      Peter  * Store the IPMP-related kstats for `phyi' into the array named by `kstats'.
   1980   8485      Peter  * Assumes that `kstats' has at least IPMP_KSTAT_MAX elements.
   1981   8485      Peter  */
   1982   8485      Peter static void
   1983   8485      Peter ipmp_phyint_get_kstats(phyint_t *phyi, uint64_t kstats[])
   1984   8485      Peter {
   1985   8485      Peter 	uint_t		i, j;
   1986   8485      Peter 	const char	*name;
   1987   8485      Peter 	kstat_t		*ksp;
   1988   8485      Peter 	kstat_named_t	*kn;
   1989  10616  Sebastien 	ip_stack_t	*ipst = PHYINT_TO_IPST(phyi);
   1990  10616  Sebastien 	zoneid_t	zoneid;
   1991   8485      Peter 
   1992   8485      Peter 	bzero(kstats, sizeof (kstats[0]) * IPMP_KSTAT_MAX);
   1993  10616  Sebastien 	zoneid = netstackid_to_zoneid(ipst->ips_netstack->netstack_stackid);
   1994  10616  Sebastien 	ksp = kstat_hold_byname("link", 0, phyi->phyint_name, zoneid);
   1995   8485      Peter 	if (ksp == NULL)
   1996   8485      Peter 		return;
   1997   8485      Peter 
   1998   8485      Peter 	KSTAT_ENTER(ksp);
   1999   8485      Peter 
   2000   8485      Peter 	if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED) {
   2001   8485      Peter 		/*
   2002   8485      Peter 		 * Bring kstats up-to-date before recording.
   2003   8485      Peter 		 */
   2004   8485      Peter 		(void) KSTAT_UPDATE(ksp, KSTAT_READ);
   2005   8485      Peter 
   2006   8485      Peter 		kn = KSTAT_NAMED_PTR(ksp);
   2007   8485      Peter 		for (i = 0; i < IPMP_KSTAT_MAX; i++) {
   2008   8485      Peter 			name = ipmp_kstats[i].name;
   2009   8485      Peter 			kstats[i] = 0;
   2010   8485      Peter 			for (j = 0; j < ksp->ks_ndata; j++) {
   2011   8485      Peter 				if (strcmp(kn[j].name, name) != 0)
   2012   8485      Peter 					continue;
   2013   8485      Peter 
   2014   8485      Peter 				switch (kn[j].data_type) {
   2015   8485      Peter 				case KSTAT_DATA_INT32:
   2016   8485      Peter 				case KSTAT_DATA_UINT32:
   2017   8485      Peter 					kstats[i] = kn[j].value.ui32;
   2018   8485      Peter 					break;
   2019   8485      Peter #ifdef	_LP64
   2020   8485      Peter 				case KSTAT_DATA_LONG:
   2021   8485      Peter 				case KSTAT_DATA_ULONG:
   2022   8485      Peter 					kstats[i] = kn[j].value.ul;
   2023   8485      Peter 					break;
   2024   8485      Peter #endif
   2025   8485      Peter 				case KSTAT_DATA_INT64:
   2026   8485      Peter 				case KSTAT_DATA_UINT64:
   2027   8485      Peter 					kstats[i] = kn[j].value.ui64;
   2028   8485      Peter 					break;
   2029   8485      Peter 				}
   2030   8485      Peter 				break;
   2031   8485      Peter 			}
   2032   8485      Peter 		}
   2033   8485      Peter 	}
   2034   8485      Peter 
   2035   8485      Peter 	KSTAT_EXIT(ksp);
   2036   8485      Peter 	kstat_rele(ksp);
   2037   8485      Peter }
   2038   8485      Peter 
   2039   8485      Peter /*
   2040   8485      Peter  * Refresh the active state of all ills on `phyi'.
   2041   8485      Peter  */
   2042   8485      Peter void
   2043   8485      Peter ipmp_phyint_refresh_active(phyint_t *phyi)
   2044   8485      Peter {
   2045   8485      Peter 	if (phyi->phyint_illv4 != NULL)
   2046   8485      Peter 		ipmp_ill_refresh_active(phyi->phyint_illv4);
   2047   8485      Peter 	if (phyi->phyint_illv6 != NULL)
   2048   8485      Peter 		ipmp_ill_refresh_active(phyi->phyint_illv6);
   2049   8485      Peter }
   2050   8485      Peter 
   2051   8485      Peter /*
   2052   8485      Peter  * Return a held pointer to the underlying ill bound to `ipif', or NULL if one
   2053   8485      Peter  * doesn't exist.  Caller need not be inside the IPSQ.
   2054   8485      Peter  */
   2055   8485      Peter ill_t *
   2056   8485      Peter ipmp_ipif_hold_bound_ill(const ipif_t *ipif)
   2057   8485      Peter {
   2058   8485      Peter 	ill_t *boundill;
   2059   8485      Peter 	ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
   2060   8485      Peter 
   2061   8485      Peter 	ASSERT(IS_IPMP(ipif->ipif_ill));
   2062   8485      Peter 
   2063   8485      Peter 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
   2064   8485      Peter 	boundill = ipif->ipif_bound_ill;
   2065  11042       Erik 	if (boundill != NULL && ill_check_and_refhold(boundill)) {
   2066   8485      Peter 		rw_exit(&ipst->ips_ipmp_lock);
   2067   8485      Peter 		return (boundill);
   2068   8485      Peter 	}
   2069   8485      Peter 	rw_exit(&ipst->ips_ipmp_lock);
   2070   8485      Peter 	return (NULL);
   2071   8485      Peter }
   2072   8485      Peter 
   2073   8485      Peter /*
   2074   8485      Peter  * Return a pointer to the underlying ill bound to `ipif', or NULL if one
   2075   8485      Peter  * doesn't exist.  Caller must be inside the IPSQ.
   2076   8485      Peter  */
   2077   8485      Peter ill_t *
   2078   8485      Peter ipmp_ipif_bound_ill(const ipif_t *ipif)
   2079   8485      Peter {
   2080   8485      Peter 	ASSERT(IAM_WRITER_ILL(ipif->ipif_ill));
   2081   8485      Peter 	ASSERT(IS_IPMP(ipif->ipif_ill));
   2082   8485      Peter 
   2083   8485      Peter 	return (ipif->ipif_bound_ill);
   2084   8485      Peter }
   2085   8485      Peter 
   2086   8485      Peter /*
   2087   8485      Peter  * Check if `ipif' is a "stub" (placeholder address not being used).
   2088   8485      Peter  */
   2089   8485      Peter boolean_t
   2090   8485      Peter ipmp_ipif_is_stubaddr(const ipif_t *ipif)
   2091   8485      Peter {
   2092   8485      Peter 	if (ipif->ipif_flags & IPIF_UP)
   2093   8485      Peter 		return (B_FALSE);
   2094   8485      Peter 	if (ipif->ipif_ill->ill_isv6)
   2095   8485      Peter 		return (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr));
   2096   8485      Peter 	else
   2097   8485      Peter 		return (ipif->ipif_lcl_addr == INADDR_ANY);
   2098   8485      Peter }
   2099   8485      Peter 
   2100   8485      Peter /*
   2101   8485      Peter  * Check if `ipif' is an IPMP data address.
   2102   8485      Peter  */
   2103   8485      Peter boolean_t
   2104   8485      Peter ipmp_ipif_is_dataaddr(const ipif_t *ipif)
   2105   8485      Peter {
   2106   8485      Peter 	if (ipif->ipif_flags & IPIF_NOFAILOVER)
   2107   8485      Peter 		return (B_FALSE);
   2108   8485      Peter 	if (ipif->ipif_ill->ill_isv6)
   2109   8485      Peter 		return (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr));
   2110   8485      Peter 	else
   2111   8485      Peter 		return (ipif->ipif_lcl_addr != INADDR_ANY);
   2112   8485      Peter }
   2113   8485      Peter 
   2114   8485      Peter /*
   2115   8485      Peter  * Check if `ipif' is an IPIF_UP IPMP data address.
   2116   8485      Peter  */
   2117   8485      Peter static boolean_t
   2118   8485      Peter ipmp_ipif_is_up_dataaddr(const ipif_t *ipif)
   2119   8485      Peter {
   2120   8485      Peter 	return (ipmp_ipif_is_dataaddr(ipif) && (ipif->ipif_flags & IPIF_UP));
   2121   8485      Peter }
   2122  11042       Erik 
   2123  11042       Erik /*
   2124  11042       Erik  * Check if `mp' contains a probe packet by verifying if the IP source address
   2125  11042       Erik  * is a test address on an underlying interface `ill'. Caller need not be inside
   2126  11042       Erik  * the IPSQ.
   2127  11042       Erik  */
   2128  11042       Erik boolean_t
   2129  11042       Erik ipmp_packet_is_probe(mblk_t *mp, ill_t *ill)
   2130  11042       Erik {
   2131  11042       Erik 	ip6_t *ip6h = (ip6_t *)mp->b_rptr;
   2132  11042       Erik 	ipha_t *ipha = (ipha_t *)mp->b_rptr;
   2133  11042       Erik 
   2134  11042       Erik 	ASSERT(DB_TYPE(mp) != M_CTL);
   2135  11042       Erik 
   2136  11042       Erik 	if (!IS_UNDER_IPMP(ill))
   2137  11042       Erik 		return (B_FALSE);
   2138  11042       Erik 
   2139  11042       Erik 	if (ill->ill_isv6) {
   2140  11042       Erik 		if (!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) &&
   2141  11042       Erik 		    ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL))
   2142  11042       Erik 			return (B_TRUE);
   2143  11042       Erik 	} else {
   2144  11042       Erik 		if ((ipha->ipha_src != INADDR_ANY) &&
   2145  11042       Erik 		    ipif_lookup_testaddr_v4(ill, &ipha->ipha_src, NULL))
   2146  11042       Erik 			return (B_TRUE);
   2147  11042       Erik 	}
   2148  11042       Erik 	return (B_FALSE);
   2149  11042       Erik }
   2150  11042       Erik 
   2151  11042       Erik /*
   2152  11042       Erik  * Pick out an appropriate underlying interface for packet transmit.  This
   2153  11042       Erik  * function may be called from the data path, so we need to verify that the
   2154  11042       Erik  * IPMP group associated with `ill' is non-null after holding the ill_g_lock.
   2155  11042       Erik  * Caller need not be inside the IPSQ.
   2156  11042       Erik  */
   2157  11042       Erik ill_t *
   2158  11042       Erik ipmp_ill_get_xmit_ill(ill_t *ill, boolean_t is_unicast)
   2159  11042       Erik {
   2160  11042       Erik 	ill_t *xmit_ill;
   2161  11042       Erik 	ip_stack_t *ipst = ill->ill_ipst;
   2162  11042       Erik 
   2163  11042       Erik 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   2164  11042       Erik 	if (ill->ill_grp == NULL) {
   2165  11042       Erik 		/*
   2166  11042       Erik 		 * The interface was taken out of the group. Return ill itself,
   2167  11042       Erik 		 * but take a ref so that callers will always be able to do
   2168  11042       Erik 		 * ill_refrele(ill);
   2169  11042       Erik 		 */
   2170  11042       Erik 		rw_exit(&ipst->ips_ill_g_lock);
   2171  11042       Erik 		ill_refhold(ill);
   2172  11042       Erik 		return (ill);
   2173  11042       Erik 	}
   2174  11042       Erik 	if (!is_unicast)
   2175  11042       Erik 		xmit_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
   2176  11042       Erik 	else
   2177  11042       Erik 		xmit_ill = ipmp_illgrp_hold_next_ill(ill->ill_grp);
   2178  11042       Erik 	rw_exit(&ipst->ips_ill_g_lock);
   2179  11042       Erik 	return (xmit_ill);
   2180  11042       Erik }
   2181  11042       Erik 
   2182  11042       Erik /*
   2183  11042       Erik  * Flush out any nce that points at `ncec' from an underlying interface
   2184  11042       Erik  */
   2185  11042       Erik void
   2186  11042       Erik ipmp_ncec_flush_nce(ncec_t *ncec)
   2187  11042       Erik {
   2188  11042       Erik 	ill_t		*ncec_ill = ncec->ncec_ill;
   2189  11042       Erik 	ill_t		*ill;
   2190  11042       Erik 	ipmp_illgrp_t	*illg;
   2191  11042       Erik 	ip_stack_t	*ipst = ncec_ill->ill_ipst;
   2192  11042       Erik 	list_t		dead;
   2193  11042       Erik 	nce_t		*nce;
   2194  11042       Erik 
   2195  11042       Erik 	if (!IS_IPMP(ncec_ill))
   2196  11042       Erik 		return;
   2197  11042       Erik 
   2198  11042       Erik 	illg = ncec_ill->ill_grp;
   2199  11042       Erik 	list_create(&dead, sizeof (nce_t), offsetof(nce_t, nce_node));
   2200  11042       Erik 
   2201  11042       Erik 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   2202  11042       Erik 	ill = list_head(&illg->ig_if);
   2203  11042       Erik 	for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) {
   2204  11042       Erik 		nce_fastpath_list_delete(ill, ncec, &dead);
   2205  11042       Erik 	}
   2206  11042       Erik 	rw_exit(&ipst->ips_ill_g_lock);
   2207  11042       Erik 
   2208  11042       Erik 	/*
   2209  11042       Erik 	 * we may now nce_refrele() all dead entries since all locks have been
   2210  11042       Erik 	 * dropped.
   2211  11042       Erik 	 */
   2212  11042       Erik 	while ((nce = list_head(&dead)) != NULL) {
   2213  11042       Erik 		list_remove(&dead, nce);
   2214  11042       Erik 		nce_refrele(nce);
   2215  11042       Erik 	}
   2216  11042       Erik 	ASSERT(list_is_empty(&dead));
   2217  11042       Erik 	list_destroy(&dead);
   2218  11042       Erik }
   2219  11042       Erik 
   2220  11042       Erik /*
   2221  11042       Erik  * For each interface in the IPMP group, if there are nce_t entries for the IP
   2222  11042       Erik  * address corresponding to `ncec', then their dl_unitdata_req_t and fastpath
   2223  11042       Erik  * information must be updated to match the link-layer address information in
   2224  11042       Erik  * `ncec'.
   2225  11042       Erik  */
   2226  11042       Erik void
   2227  11042       Erik ipmp_ncec_fastpath(ncec_t *ncec, ill_t *ipmp_ill)
   2228  11042       Erik {
   2229  11042       Erik 	ill_t		*ill;
   2230  11042       Erik 	ipmp_illgrp_t	*illg = ipmp_ill->ill_grp;
   2231  11042       Erik 	ip_stack_t	*ipst = ipmp_ill->ill_ipst;
   2232  11042       Erik 	nce_t		*nce, *nce_next;
   2233  11042       Erik 	list_t		replace;
   2234  11042       Erik 
   2235  11042       Erik 	ASSERT(IS_IPMP(ipmp_ill));
   2236  11042       Erik 
   2237  11042       Erik 	/*
   2238  11042       Erik 	 * if ncec itself is not reachable, there is no use in creating nce_t
   2239  11042       Erik 	 * entries on the underlying interfaces in the group.
   2240  11042       Erik 	 */
   2241  11042       Erik 	if (!NCE_ISREACHABLE(ncec))
   2242  11042       Erik 		return;
   2243  11042       Erik 
   2244  11042       Erik 	list_create(&replace, sizeof (nce_t), offsetof(nce_t, nce_node));
   2245  11042       Erik 	rw_enter(&ipst->ips_ipmp_lock, RW_READER);
   2246  11042       Erik 	ill = list_head(&illg->ig_actif);
   2247  11042       Erik 	for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) {
   2248  11042       Erik 		/*
   2249  11042       Erik 		 * For each underlying interface, we first check if there is an
   2250  11042       Erik 		 * nce_t for the address in ncec->ncec_addr. If one exists,
   2251  11042       Erik 		 * we should trigger nce_fastpath for that nce_t. However, the
   2252  11042       Erik 		 * catch is that we are holding the ips_ipmp_lock to prevent
   2253  11042       Erik 		 * changes to the IPMP group membership, so that we cannot
   2254  11042       Erik 		 * putnext() to the driver.  So we nce_delete the
   2255  11042       Erik 		 * list nce_t entries that need to be updated into the
   2256  11042       Erik 		 * `replace' list, and then process the `replace' list
   2257  11042       Erik 		 * after dropping the ips_ipmp_lock.
   2258  11042       Erik 		 */
   2259  11042       Erik 		mutex_enter(&ill->ill_lock);
   2260  11042       Erik 		for (nce = list_head(&ill->ill_nce); nce != NULL; ) {
   2261  11042       Erik 			nce_next = list_next(&ill->ill_nce, nce);
   2262  11042       Erik 			if (!IN6_ARE_ADDR_EQUAL(&nce->nce_addr,
   2263  11042       Erik 			    &ncec->ncec_addr)) {
   2264  11042       Erik 				nce = nce_next;
   2265  11042       Erik 				continue;
   2266  11042       Erik 			}
   2267  11042       Erik 			nce_refhold(nce);
   2268  11042       Erik 			nce_delete(nce);
   2269  11042       Erik 			list_insert_tail(&replace, nce);
   2270  11042       Erik 			nce = nce_next;
   2271  11042       Erik 		}
   2272  11042       Erik 		mutex_exit(&ill->ill_lock);
   2273  11042       Erik 	}
   2274  11042       Erik 	rw_exit(&ipst->ips_ipmp_lock);
   2275  11042       Erik 	/*
   2276  11042       Erik 	 * `replace' now has the list of nce's on which we should be triggering
   2277  11042       Erik 	 * nce_fastpath(). We now retrigger fastpath by setting up the nce
   2278  11042       Erik 	 * again. The code in nce_lookup_then_add_v* ensures that nce->nce_ill
   2279  11042       Erik 	 * is still in the group for ncec->ncec_ill
   2280  11042       Erik 	 */
   2281  11042       Erik 	while ((nce = list_head(&replace)) != NULL) {
   2282  11042       Erik 		list_remove(&replace, nce);
   2283  11042       Erik 		if (ncec->ncec_ill->ill_isv6) {
   2284  11042       Erik 			(void) nce_lookup_then_add_v6(nce->nce_ill,
   2285  11042       Erik 			    ncec->ncec_lladdr,  ncec->ncec_lladdr_length,
   2286  11042       Erik 			    &nce->nce_addr, ncec->ncec_flags, ND_UNCHANGED,
   2287  11042       Erik 			    NULL);
   2288  11042       Erik 		} else {
   2289  11042       Erik 			ipaddr_t ipaddr;
   2290  11042       Erik 
   2291  11042       Erik 			IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ipaddr);
   2292  11042       Erik 			(void) nce_lookup_then_add_v4(nce->nce_ill,
   2293  11042       Erik 			    ncec->ncec_lladdr, ncec->ncec_lladdr_length,
   2294  11042       Erik 			    &ipaddr, ncec->ncec_flags, ND_UNCHANGED, NULL);
   2295  11042       Erik 		}
   2296  11042       Erik 		nce_refrele(nce);
   2297  11042       Erik 	}
   2298  11042       Erik 	ASSERT(list_is_empty(&replace));
   2299  11042       Erik 	list_destroy(&replace);
   2300  11042       Erik }
   2301