Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 /* Copyright (c) 1990 Mentat Inc. */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <sys/types.h>
     30 #include <sys/stream.h>
     31 #include <sys/dlpi.h>
     32 #include <sys/stropts.h>
     33 #include <sys/strsun.h>
     34 #include <sys/ddi.h>
     35 #include <sys/cmn_err.h>
     36 #include <sys/sdt.h>
     37 #include <sys/zone.h>
     38 
     39 #include <sys/param.h>
     40 #include <sys/socket.h>
     41 #include <sys/sockio.h>
     42 #include <net/if.h>
     43 #include <sys/systm.h>
     44 #include <sys/strsubr.h>
     45 #include <net/route.h>
     46 #include <netinet/in.h>
     47 #include <net/if_dl.h>
     48 #include <netinet/ip6.h>
     49 #include <netinet/icmp6.h>
     50 
     51 #include <inet/common.h>
     52 #include <inet/mi.h>
     53 #include <inet/nd.h>
     54 #include <inet/arp.h>
     55 #include <inet/ip.h>
     56 #include <inet/ip6.h>
     57 #include <inet/ip_if.h>
     58 #include <inet/ip_ndp.h>
     59 #include <inet/ip_multi.h>
     60 #include <inet/ipclassifier.h>
     61 #include <inet/ipsec_impl.h>
     62 #include <inet/sctp_ip.h>
     63 #include <inet/ip_listutils.h>
     64 #include <inet/udp_impl.h>
     65 
     66 /* igmpv3/mldv2 source filter manipulation */
     67 static void	ilm_bld_flists(conn_t *conn, void *arg);
     68 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
     69     slist_t *flist);
     70 
     71 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
     72     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
     73     int orig_ifindex, zoneid_t zoneid);
     74 static void	ilm_delete(ilm_t *ilm);
     75 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
     76 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
     77 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
     78     const in6_addr_t *v6group, int index);
     79 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
     80     ipif_t *ipif);
     81 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
     82     mcast_record_t fmode, ipaddr_t src);
     83 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
     84     mcast_record_t fmode, const in6_addr_t *v6src);
     85 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
     86 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
     87     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
     88 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
     89     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
     90 static void	conn_ilg_reap(conn_t *connp);
     91 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
     92     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
     93 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
     94     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
     95     const in6_addr_t *v6src);
     96 
     97 /*
     98  * MT notes:
     99  *
    100  * Multicast joins operate on both the ilg and ilm structures. Multiple
    101  * threads operating on an conn (socket) trying to do multicast joins
    102  * need to synchronize  when operating on the ilg. Multiple threads
    103  * potentially operating on different conn (socket endpoints) trying to
    104  * do multicast joins could eventually end up trying to manipulate the
    105  * ilm simulatenously and need to synchronize on the access to the ilm.
    106  * Both are amenable to standard Solaris MT techniques, but it would be
    107  * complex to handle a failover or failback which needs to manipulate
    108  * ilg/ilms if an applications can also simultaenously join/leave
    109  * multicast groups. Hence multicast join/leave also go through the ipsq_t
    110  * serialization.
    111  *
    112  * Multicast joins and leaves are single-threaded per phyint/IPMP group
    113  * using the ipsq serialization mechanism.
    114  *
    115  * An ilm is an IP data structure used to track multicast join/leave.
    116  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
    117  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
    118  * referencing the ilm. ilms are created / destroyed only as writer. ilms
    119  * are not passed around, instead they are looked up and used under the
    120  * ill_lock or as writer. So we don't need a dynamic refcount of the number
    121  * of threads holding reference to an ilm.
    122  *
    123  * Multicast Join operation:
    124  *
    125  * The first step is to determine the ipif (v4) or ill (v6) on which
    126  * the join operation is to be done. The join is done after becoming
    127  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
    128  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
    129  * Multiple threads can attempt to join simultaneously on different ipif/ill
    130  * on the same conn. In this case the ipsq serialization does not help in
    131  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
    132  * The conn_lock also protects all the ilg_t members.
    133  *
    134  * Leave operation.
    135  *
    136  * Similar to the join operation, the first step is to determine the ipif
    137  * or ill (v6) on which the leave operation is to be done. The leave operation
    138  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
    139  * As with join ilg modification is done under the protection of the conn lock.
    140  */
    141 
    142 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
    143 	ASSERT(connp != NULL);					\
    144 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
    145 	    (first_mp), (func), (type), B_TRUE);		\
    146 	if ((ipsq) == NULL) {					\
    147 		ipif_refrele(ipif);				\
    148 		return (EINPROGRESS);				\
    149 	}
    150 
    151 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
    152 	ASSERT(connp != NULL);					\
    153 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
    154 	    (first_mp),	(func), (type), B_TRUE);		\
    155 	if ((ipsq) == NULL) {					\
    156 		ill_refrele(ill);				\
    157 		return (EINPROGRESS);				\
    158 	}
    159 
    160 #define	IPSQ_EXIT(ipsq)	\
    161 	if (ipsq != NULL)	\
    162 		ipsq_exit(ipsq);
    163 
    164 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
    165 
    166 #define	ILG_WALKER_RELE(connp)				\
    167 	{						\
    168 		(connp)->conn_ilg_walker_cnt--;		\
    169 		if ((connp)->conn_ilg_walker_cnt == 0)	\
    170 			conn_ilg_reap(connp);		\
    171 	}
    172 
    173 static void
    174 conn_ilg_reap(conn_t *connp)
    175 {
    176 	int	to;
    177 	int	from;
    178 	ilg_t	*ilg;
    179 
    180 	ASSERT(MUTEX_HELD(&connp->conn_lock));
    181 
    182 	to = 0;
    183 	from = 0;
    184 	while (from < connp->conn_ilg_inuse) {
    185 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
    186 			ilg = &connp->conn_ilg[from];
    187 			FREE_SLIST(ilg->ilg_filter);
    188 			ilg->ilg_flags &= ~ILG_DELETED;
    189 			from++;
    190 			continue;
    191 		}
    192 		if (to != from)
    193 			connp->conn_ilg[to] = connp->conn_ilg[from];
    194 		to++;
    195 		from++;
    196 	}
    197 
    198 	connp->conn_ilg_inuse = to;
    199 
    200 	if (connp->conn_ilg_inuse == 0) {
    201 		mi_free((char *)connp->conn_ilg);
    202 		connp->conn_ilg = NULL;
    203 		cv_broadcast(&connp->conn_refcv);
    204 	}
    205 }
    206 
    207 #define	GETSTRUCT(structure, number)	\
    208 	((structure *)mi_zalloc(sizeof (structure) * (number)))
    209 
    210 #define	ILG_ALLOC_CHUNK	16
    211 
    212 /*
    213  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
    214  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
    215  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
    216  * returned ilg).  Returns NULL on failure (ENOMEM).
    217  *
    218  * Assumes connp->conn_lock is held.
    219  */
    220 static ilg_t *
    221 conn_ilg_alloc(conn_t *connp)
    222 {
    223 	ilg_t *new, *ret;
    224 	int curcnt;
    225 
    226 	ASSERT(MUTEX_HELD(&connp->conn_lock));
    227 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
    228 
    229 	if (connp->conn_ilg == NULL) {
    230 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
    231 		if (connp->conn_ilg == NULL)
    232 			return (NULL);
    233 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
    234 		connp->conn_ilg_inuse = 0;
    235 	}
    236 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
    237 		if (connp->conn_ilg_walker_cnt != 0) {
    238 			/*
    239 			 * XXX We cannot grow the array at this point
    240 			 * because a list walker could be in progress, and
    241 			 * we cannot wipe out the existing array until the
    242 			 * walker is done. Just return NULL for now.
    243 			 * ilg_delete_all() will have to be changed when
    244 			 * this logic is changed.
    245 			 */
    246 			return (NULL);
    247 		}
    248 		curcnt = connp->conn_ilg_allocated;
    249 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
    250 		if (new == NULL)
    251 			return (NULL);
    252 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
    253 		mi_free((char *)connp->conn_ilg);
    254 		connp->conn_ilg = new;
    255 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
    256 	}
    257 
    258 	ret = &connp->conn_ilg[connp->conn_ilg_inuse++];
    259 	ASSERT((ret->ilg_flags & ILG_DELETED) == 0);
    260 	bzero(ret, sizeof (*ret));
    261 	return (ret);
    262 }
    263 
    264 typedef struct ilm_fbld_s {
    265 	ilm_t		*fbld_ilm;
    266 	int		fbld_in_cnt;
    267 	int		fbld_ex_cnt;
    268 	slist_t		fbld_in;
    269 	slist_t		fbld_ex;
    270 	boolean_t	fbld_in_overflow;
    271 } ilm_fbld_t;
    272 
    273 static void
    274 ilm_bld_flists(conn_t *conn, void *arg)
    275 {
    276 	int i;
    277 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
    278 	ilm_t *ilm = fbld->fbld_ilm;
    279 	in6_addr_t *v6group = &ilm->ilm_v6addr;
    280 
    281 	if (conn->conn_ilg_inuse == 0)
    282 		return;
    283 
    284 	/*
    285 	 * Since we can't break out of the ipcl_walk once started, we still
    286 	 * have to look at every conn.  But if we've already found one
    287 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
    288 	 * ilgs--that will be our state.
    289 	 */
    290 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
    291 		return;
    292 
    293 	/*
    294 	 * Check this conn's ilgs to see if any are interested in our
    295 	 * ilm (group, interface match).  If so, update the master
    296 	 * include and exclude lists we're building in the fbld struct
    297 	 * with this ilg's filter info.
    298 	 */
    299 	mutex_enter(&conn->conn_lock);
    300 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
    301 		ilg_t *ilg = &conn->conn_ilg[i];
    302 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
    303 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
    304 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
    305 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
    306 				fbld->fbld_in_cnt++;
    307 				if (!fbld->fbld_in_overflow)
    308 					l_union_in_a(&fbld->fbld_in,
    309 					    ilg->ilg_filter,
    310 					    &fbld->fbld_in_overflow);
    311 			} else {
    312 				fbld->fbld_ex_cnt++;
    313 				/*
    314 				 * On the first exclude list, don't try to do
    315 				 * an intersection, as the master exclude list
    316 				 * is intentionally empty.  If the master list
    317 				 * is still empty on later iterations, that
    318 				 * means we have at least one ilg with an empty
    319 				 * exclude list, so that should be reflected
    320 				 * when we take the intersection.
    321 				 */
    322 				if (fbld->fbld_ex_cnt == 1) {
    323 					if (ilg->ilg_filter != NULL)
    324 						l_copy(ilg->ilg_filter,
    325 						    &fbld->fbld_ex);
    326 				} else {
    327 					l_intersection_in_a(&fbld->fbld_ex,
    328 					    ilg->ilg_filter);
    329 				}
    330 			}
    331 			/* there will only be one match, so break now. */
    332 			break;
    333 		}
    334 	}
    335 	mutex_exit(&conn->conn_lock);
    336 }
    337 
    338 static void
    339 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
    340 {
    341 	ilm_fbld_t fbld;
    342 	ip_stack_t *ipst = ilm->ilm_ipst;
    343 
    344 	fbld.fbld_ilm = ilm;
    345 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
    346 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
    347 	fbld.fbld_in_overflow = B_FALSE;
    348 
    349 	/* first, construct our master include and exclude lists */
    350 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
    351 
    352 	/* now use those master lists to generate the interface filter */
    353 
    354 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
    355 	if (fbld.fbld_in_overflow) {
    356 		*fmode = MODE_IS_EXCLUDE;
    357 		flist->sl_numsrc = 0;
    358 		return;
    359 	}
    360 
    361 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
    362 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
    363 		*fmode = MODE_IS_INCLUDE;
    364 		flist->sl_numsrc = 0;
    365 		return;
    366 	}
    367 
    368 	/*
    369 	 * If there are no exclude lists, then the interface filter
    370 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
    371 	 * exclude list makes the interface filter EXCLUDE, with its
    372 	 * filter list equal to (fbld_ex - fbld_in).
    373 	 */
    374 	if (fbld.fbld_ex_cnt == 0) {
    375 		*fmode = MODE_IS_INCLUDE;
    376 		l_copy(&fbld.fbld_in, flist);
    377 	} else {
    378 		*fmode = MODE_IS_EXCLUDE;
    379 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
    380 	}
    381 }
    382 
    383 /*
    384  * If the given interface has failed, choose a new one to join on so
    385  * that we continue to receive packets.  ilg_orig_ifindex remembers
    386  * what the application used to join on so that we know the ilg to
    387  * delete even though we change the ill here.  Callers will store the
    388  * ilg returned from this function in ilg_ill.  Thus when we receive
    389  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
    390  *
    391  * This function must be called as writer so we can walk the group
    392  * list and examine flags without holding a lock.
    393  */
    394 ill_t *
    395 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
    396 {
    397 	ill_t	*till;
    398 	ill_group_t *illgrp = ill->ill_group;
    399 
    400 	ASSERT(IAM_WRITER_ILL(ill));
    401 
    402 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
    403 		return (ill);
    404 
    405 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
    406 		return (ill);
    407 
    408 	till = illgrp->illgrp_ill;
    409 	while (till != NULL &&
    410 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
    411 		till = till->ill_group_next;
    412 	}
    413 	if (till != NULL)
    414 		return (till);
    415 
    416 	return (ill);
    417 }
    418 
    419 static int
    420 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
    421     boolean_t isv6)
    422 {
    423 	mcast_record_t fmode;
    424 	slist_t *flist;
    425 	boolean_t fdefault;
    426 	char buf[INET6_ADDRSTRLEN];
    427 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
    428 
    429 	/*
    430 	 * There are several cases where the ilm's filter state
    431 	 * defaults to (EXCLUDE, NULL):
    432 	 *	- we've had previous joins without associated ilgs
    433 	 *	- this join has no associated ilg
    434 	 *	- the ilg's filter state is (EXCLUDE, NULL)
    435 	 */
    436 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
    437 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
    438 
    439 	/* attempt mallocs (if needed) before doing anything else */
    440 	if ((flist = l_alloc()) == NULL)
    441 		return (ENOMEM);
    442 	if (!fdefault && ilm->ilm_filter == NULL) {
    443 		ilm->ilm_filter = l_alloc();
    444 		if (ilm->ilm_filter == NULL) {
    445 			l_free(flist);
    446 			return (ENOMEM);
    447 		}
    448 	}
    449 
    450 	if (ilgstat != ILGSTAT_CHANGE)
    451 		ilm->ilm_refcnt++;
    452 
    453 	if (ilgstat == ILGSTAT_NONE)
    454 		ilm->ilm_no_ilg_cnt++;
    455 
    456 	/*
    457 	 * Determine new filter state.  If it's not the default
    458 	 * (EXCLUDE, NULL), we must walk the conn list to find
    459 	 * any ilgs interested in this group, and re-build the
    460 	 * ilm filter.
    461 	 */
    462 	if (fdefault) {
    463 		fmode = MODE_IS_EXCLUDE;
    464 		flist->sl_numsrc = 0;
    465 	} else {
    466 		ilm_gen_filter(ilm, &fmode, flist);
    467 	}
    468 
    469 	/* make sure state actually changed; nothing to do if not. */
    470 	if ((ilm->ilm_fmode == fmode) &&
    471 	    !lists_are_different(ilm->ilm_filter, flist)) {
    472 		l_free(flist);
    473 		return (0);
    474 	}
    475 
    476 	/* send the state change report */
    477 	if (!IS_LOOPBACK(ill)) {
    478 		if (isv6)
    479 			mld_statechange(ilm, fmode, flist);
    480 		else
    481 			igmp_statechange(ilm, fmode, flist);
    482 	}
    483 
    484 	/* update the ilm state */
    485 	ilm->ilm_fmode = fmode;
    486 	if (flist->sl_numsrc > 0)
    487 		l_copy(flist, ilm->ilm_filter);
    488 	else
    489 		CLEAR_SLIST(ilm->ilm_filter);
    490 
    491 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
    492 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
    493 
    494 	l_free(flist);
    495 	return (0);
    496 }
    497 
    498 static int
    499 ilm_update_del(ilm_t *ilm, boolean_t isv6)
    500 {
    501 	mcast_record_t fmode;
    502 	slist_t *flist;
    503 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
    504 
    505 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
    506 	    ilm->ilm_refcnt));
    507 
    508 	if ((flist = l_alloc()) == NULL)
    509 		return (ENOMEM);
    510 
    511 	/*
    512 	 * If present, the ilg in question has already either been
    513 	 * updated or removed from our list; so all we need to do
    514 	 * now is walk the list to update the ilm filter state.
    515 	 *
    516 	 * Skip the list walk if we have any no-ilg joins, which
    517 	 * cause the filter state to revert to (EXCLUDE, NULL).
    518 	 */
    519 	if (ilm->ilm_no_ilg_cnt != 0) {
    520 		fmode = MODE_IS_EXCLUDE;
    521 		flist->sl_numsrc = 0;
    522 	} else {
    523 		ilm_gen_filter(ilm, &fmode, flist);
    524 	}
    525 
    526 	/* check to see if state needs to be updated */
    527 	if ((ilm->ilm_fmode == fmode) &&
    528 	    (!lists_are_different(ilm->ilm_filter, flist))) {
    529 		l_free(flist);
    530 		return (0);
    531 	}
    532 
    533 	if (!IS_LOOPBACK(ill)) {
    534 		if (isv6)
    535 			mld_statechange(ilm, fmode, flist);
    536 		else
    537 			igmp_statechange(ilm, fmode, flist);
    538 	}
    539 
    540 	ilm->ilm_fmode = fmode;
    541 	if (flist->sl_numsrc > 0) {
    542 		if (ilm->ilm_filter == NULL) {
    543 			ilm->ilm_filter = l_alloc();
    544 			if (ilm->ilm_filter == NULL) {
    545 				char buf[INET6_ADDRSTRLEN];
    546 				ip1dbg(("ilm_update_del: failed to alloc ilm "
    547 				    "filter; no source filtering for %s on %s",
    548 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
    549 				    buf, sizeof (buf)), ill->ill_name));
    550 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
    551 				l_free(flist);
    552 				return (0);
    553 			}
    554 		}
    555 		l_copy(flist, ilm->ilm_filter);
    556 	} else {
    557 		CLEAR_SLIST(ilm->ilm_filter);
    558 	}
    559 
    560 	l_free(flist);
    561 	return (0);
    562 }
    563 
    564 /*
    565  * INADDR_ANY means all multicast addresses. This is only used
    566  * by the multicast router.
    567  * INADDR_ANY is stored as IPv6 unspecified addr.
    568  */
    569 int
    570 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
    571     mcast_record_t ilg_fmode, slist_t *ilg_flist)
    572 {
    573 	ill_t	*ill = ipif->ipif_ill;
    574 	ilm_t 	*ilm;
    575 	in6_addr_t v6group;
    576 	int	ret;
    577 
    578 	ASSERT(IAM_WRITER_IPIF(ipif));
    579 
    580 	if (!CLASSD(group) && group != INADDR_ANY)
    581 		return (EINVAL);
    582 
    583 	/*
    584 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
    585 	 */
    586 	if (group == INADDR_ANY)
    587 		v6group = ipv6_all_zeros;
    588 	else
    589 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
    590 
    591 	mutex_enter(&ill->ill_lock);
    592 	ilm = ilm_lookup_ipif(ipif, group);
    593 	mutex_exit(&ill->ill_lock);
    594 	/*
    595 	 * Since we are writer, we know the ilm_flags itself cannot
    596 	 * change at this point, and ilm_lookup_ipif would not have
    597 	 * returned a DELETED ilm. However, the data path can free
    598 	 * ilm->next via ilm_walker_cleanup() so we can safely
    599 	 * access anything in ilm except ilm_next (for safe access to
    600 	 * ilm_next we'd have  to take the ill_lock).
    601 	 */
    602 	if (ilm != NULL)
    603 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
    604 
    605 	/*
    606 	 * ilms are associated with ipifs in IPv4. It moves with the
    607 	 * ipif if the ipif moves to a new ill when the interface
    608 	 * fails. Thus we really don't check whether the ipif_ill
    609 	 * has failed like in IPv6. If it has FAILED the ipif
    610 	 * will move (daemon will move it) and hence the ilm, if the
    611 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
    612 	 * we continue to receive in the same place even if the
    613 	 * interface fails.
    614 	 */
    615 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
    616 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
    617 	if (ilm == NULL)
    618 		return (ENOMEM);
    619 
    620 	if (group == INADDR_ANY) {
    621 		/*
    622 		 * Check how many ipif's have members in this group -
    623 		 * if more then one we should not tell the driver to join
    624 		 * this time
    625 		 */
    626 		if (ilm_numentries_v6(ill, &v6group) > 1)
    627 			return (0);
    628 		if (ill->ill_group == NULL)
    629 			ret = ip_join_allmulti(ipif);
    630 		else
    631 			ret = ill_nominate_mcast_rcv(ill->ill_group);
    632 		if (ret != 0)
    633 			ilm_delete(ilm);
    634 		return (ret);
    635 	}
    636 
    637 	if (!IS_LOOPBACK(ill))
    638 		igmp_joingroup(ilm);
    639 
    640 	if (ilm_numentries_v6(ill, &v6group) > 1)
    641 		return (0);
    642 
    643 	ret = ip_ll_addmulti_v6(ipif, &v6group);
    644 	if (ret != 0)
    645 		ilm_delete(ilm);
    646 	return (ret);
    647 }
    648 
    649 /*
    650  * The unspecified address means all multicast addresses.
    651  * This is only used by the multicast router.
    652  *
    653  * ill identifies the interface to join on; it may not match the
    654  * interface requested by the application of a failover has taken
    655  * place.  orig_ifindex always identifies the interface requested
    656  * by the app.
    657  *
    658  * ilgstat tells us if there's an ilg associated with this join,
    659  * and if so, if it's a new ilg or a change to an existing one.
    660  * ilg_fmode and ilg_flist give us the current filter state of
    661  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
    662  */
    663 int
    664 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
    665     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
    666     slist_t *ilg_flist)
    667 {
    668 	ilm_t	*ilm;
    669 	int	ret;
    670 
    671 	ASSERT(IAM_WRITER_ILL(ill));
    672 
    673 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
    674 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
    675 		return (EINVAL);
    676 	}
    677 
    678 	/*
    679 	 * An ilm is uniquely identified by the tuple of (group, ill,
    680 	 * orig_ill).  group is the multicast group address, ill is
    681 	 * the interface on which it is currently joined, and orig_ill
    682 	 * is the interface on which the application requested the
    683 	 * join.  orig_ill and ill are the same unless orig_ill has
    684 	 * failed over.
    685 	 *
    686 	 * Both orig_ill and ill are required, which means we may have
    687 	 * 2 ilms on an ill for the same group, but with different
    688 	 * orig_ills.  These must be kept separate, so that when failback
    689 	 * occurs, the appropriate ilms are moved back to their orig_ill
    690 	 * without disrupting memberships on the ill to which they had
    691 	 * been moved.
    692 	 *
    693 	 * In order to track orig_ill, we store orig_ifindex in the
    694 	 * ilm and ilg.
    695 	 */
    696 	mutex_enter(&ill->ill_lock);
    697 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
    698 	mutex_exit(&ill->ill_lock);
    699 	if (ilm != NULL)
    700 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
    701 
    702 	/*
    703 	 * We need to remember where the application really wanted
    704 	 * to join. This will be used later if we want to failback
    705 	 * to the original interface.
    706 	 */
    707 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
    708 	    ilg_flist, orig_ifindex, zoneid);
    709 	if (ilm == NULL)
    710 		return (ENOMEM);
    711 
    712 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
    713 		/*
    714 		 * Check how many ipif's that have members in this group -
    715 		 * if more then one we should not tell the driver to join
    716 		 * this time
    717 		 */
    718 		if (ilm_numentries_v6(ill, v6group) > 1)
    719 			return (0);
    720 		if (ill->ill_group == NULL)
    721 			ret = ip_join_allmulti(ill->ill_ipif);
    722 		else
    723 			ret = ill_nominate_mcast_rcv(ill->ill_group);
    724 
    725 		if (ret != 0)
    726 			ilm_delete(ilm);
    727 		return (ret);
    728 	}
    729 
    730 	if (!IS_LOOPBACK(ill))
    731 		mld_joingroup(ilm);
    732 
    733 	/*
    734 	 * If we have more then one we should not tell the driver
    735 	 * to join this time.
    736 	 */
    737 	if (ilm_numentries_v6(ill, v6group) > 1)
    738 		return (0);
    739 
    740 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
    741 	if (ret != 0)
    742 		ilm_delete(ilm);
    743 	return (ret);
    744 }
    745 
    746 /*
    747  * Send a multicast request to the driver for enabling multicast reception
    748  * for v6groupp address. The caller has already checked whether it is
    749  * appropriate to send one or not.
    750  */
    751 int
    752 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
    753 {
    754 	mblk_t	*mp;
    755 	uint32_t addrlen, addroff;
    756 	char	group_buf[INET6_ADDRSTRLEN];
    757 
    758 	ASSERT(IAM_WRITER_ILL(ill));
    759 
    760 	/*
    761 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
    762 	 * on.
    763 	 */
    764 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
    765 	    &addrlen, &addroff);
    766 	if (!mp)
    767 		return (ENOMEM);
    768 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
    769 		ipaddr_t v4group;
    770 
    771 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
    772 		/*
    773 		 * NOTE!!!
    774 		 * The "addroff" passed in here was calculated by
    775 		 * ill_create_dl(), and will be used by ill_create_squery()
    776 		 * to perform some twisted coding magic. It is the offset
    777 		 * into the dl_xxx_req of the hw addr. Here, it will be
    778 		 * added to b_wptr - b_rptr to create a magic number that
    779 		 * is not an offset into this squery mblk.
    780 		 * The actual hardware address will be accessed only in the
    781 		 * dl_xxx_req, not in the squery. More importantly,
    782 		 * that hardware address can *only* be accessed in this
    783 		 * mblk chain by calling mi_offset_param_c(), which uses
    784 		 * the magic number in the squery hw offset field to go
    785 		 * to the *next* mblk (the dl_xxx_req), subtract the
    786 		 * (b_wptr - b_rptr), and find the actual offset into
    787 		 * the dl_xxx_req.
    788 		 * Any method that depends on using the
    789 		 * offset field in the dl_disabmulti_req or squery
    790 		 * to find either hardware address will similarly fail.
    791 		 *
    792 		 * Look in ar_entry_squery() in arp.c to see how this offset
    793 		 * is used.
    794 		 */
    795 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
    796 		if (!mp)
    797 			return (ENOMEM);
    798 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
    799 		    inet_ntop(AF_INET6, v6groupp, group_buf,
    800 		    sizeof (group_buf)),
    801 		    ill->ill_name));
    802 		putnext(ill->ill_rq, mp);
    803 	} else {
    804 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
    805 		    " %s\n",
    806 		    inet_ntop(AF_INET6, v6groupp, group_buf,
    807 		    sizeof (group_buf)),
    808 		    ill->ill_name));
    809 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
    810 	}
    811 	return (0);
    812 }
    813 
    814 /*
    815  * Send a multicast request to the driver for enabling multicast
    816  * membership for v6group if appropriate.
    817  */
    818 static int
    819 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
    820 {
    821 	ill_t	*ill = ipif->ipif_ill;
    822 
    823 	ASSERT(IAM_WRITER_IPIF(ipif));
    824 
    825 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
    826 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
    827 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
    828 		return (0);	/* Must be IRE_IF_NORESOLVER */
    829 	}
    830 
    831 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
    832 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
    833 		return (0);
    834 	}
    835 	if (!ill->ill_dl_up) {
    836 		/*
    837 		 * Nobody there. All multicast addresses will be re-joined
    838 		 * when we get the DL_BIND_ACK bringing the interface up.
    839 		 */
    840 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
    841 		return (0);
    842 	}
    843 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
    844 }
    845 
    846 /*
    847  * INADDR_ANY means all multicast addresses. This is only used
    848  * by the multicast router.
    849  * INADDR_ANY is stored as the IPv6 unspecifed addr.
    850  */
    851 int
    852 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
    853 {
    854 	ill_t	*ill = ipif->ipif_ill;
    855 	ilm_t *ilm;
    856 	in6_addr_t v6group;
    857 	int	ret;
    858 
    859 	ASSERT(IAM_WRITER_IPIF(ipif));
    860 
    861 	if (!CLASSD(group) && group != INADDR_ANY)
    862 		return (EINVAL);
    863 
    864 	/*
    865 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
    866 	 */
    867 	if (group == INADDR_ANY)
    868 		v6group = ipv6_all_zeros;
    869 	else
    870 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
    871 
    872 	/*
    873 	 * Look for a match on the ipif.
    874 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
    875 	 */
    876 	mutex_enter(&ill->ill_lock);
    877 	ilm = ilm_lookup_ipif(ipif, group);
    878 	mutex_exit(&ill->ill_lock);
    879 	if (ilm == NULL)
    880 		return (ENOENT);
    881 
    882 	/* Update counters */
    883 	if (no_ilg)
    884 		ilm->ilm_no_ilg_cnt--;
    885 
    886 	if (leaving)
    887 		ilm->ilm_refcnt--;
    888 
    889 	if (ilm->ilm_refcnt > 0)
    890 		return (ilm_update_del(ilm, B_FALSE));
    891 
    892 	if (group == INADDR_ANY) {
    893 		ilm_delete(ilm);
    894 		/*
    895 		 * Check how many ipif's that have members in this group -
    896 		 * if there are still some left then don't tell the driver
    897 		 * to drop it.
    898 		 */
    899 		if (ilm_numentries_v6(ill, &v6group) != 0)
    900 			return (0);
    901 
    902 		/*
    903 		 * If we never joined, then don't leave.  This can happen
    904 		 * if we're in an IPMP group, since only one ill per IPMP
    905 		 * group receives all multicast packets.
    906 		 */
    907 		if (!ill->ill_join_allmulti) {
    908 			ASSERT(ill->ill_group != NULL);
    909 			return (0);
    910 		}
    911 
    912 		ret = ip_leave_allmulti(ipif);
    913 		if (ill->ill_group != NULL)
    914 			(void) ill_nominate_mcast_rcv(ill->ill_group);
    915 		return (ret);
    916 	}
    917 
    918 	if (!IS_LOOPBACK(ill))
    919 		igmp_leavegroup(ilm);
    920 
    921 	ilm_delete(ilm);
    922 	/*
    923 	 * Check how many ipif's that have members in this group -
    924 	 * if there are still some left then don't tell the driver
    925 	 * to drop it.
    926 	 */
    927 	if (ilm_numentries_v6(ill, &v6group) != 0)
    928 		return (0);
    929 	return (ip_ll_delmulti_v6(ipif, &v6group));
    930 }
    931 
    932 /*
    933  * The unspecified address means all multicast addresses.
    934  * This is only used by the multicast router.
    935  */
    936 int
    937 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
    938     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
    939 {
    940 	ipif_t	*ipif;
    941 	ilm_t *ilm;
    942 	int	ret;
    943 
    944 	ASSERT(IAM_WRITER_ILL(ill));
    945 
    946 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
    947 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
    948 		return (EINVAL);
    949 
    950 	/*
    951 	 * Look for a match on the ill.
    952 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
    953 	 *
    954 	 * Similar to ip_addmulti_v6, we should always look using
    955 	 * the orig_ifindex.
    956 	 *
    957 	 * 1) If orig_ifindex is different from ill's ifindex
    958 	 *    we should have an ilm with orig_ifindex created in
    959 	 *    ip_addmulti_v6. We should delete that here.
    960 	 *
    961 	 * 2) If orig_ifindex is same as ill's ifindex, we should
    962 	 *    not delete the ilm that is temporarily here because of
    963 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
    964 	 *    different from ill's ifindex.
    965 	 *
    966 	 * Thus, always lookup using orig_ifindex.
    967 	 */
    968 	mutex_enter(&ill->ill_lock);
    969 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
    970 	mutex_exit(&ill->ill_lock);
    971 	if (ilm == NULL)
    972 		return (ENOENT);
    973 
    974 	ASSERT(ilm->ilm_ill == ill);
    975 
    976 	ipif = ill->ill_ipif;
    977 
    978 	/* Update counters */
    979 	if (no_ilg)
    980 		ilm->ilm_no_ilg_cnt--;
    981 
    982 	if (leaving)
    983 		ilm->ilm_refcnt--;
    984 
    985 	if (ilm->ilm_refcnt > 0)
    986 		return (ilm_update_del(ilm, B_TRUE));
    987 
    988 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
    989 		ilm_delete(ilm);
    990 		/*
    991 		 * Check how many ipif's that have members in this group -
    992 		 * if there are still some left then don't tell the driver
    993 		 * to drop it.
    994 		 */
    995 		if (ilm_numentries_v6(ill, v6group) != 0)
    996 			return (0);
    997 
    998 		/*
    999 		 * If we never joined, then don't leave.  This can happen
   1000 		 * if we're in an IPMP group, since only one ill per IPMP
   1001 		 * group receives all multicast packets.
   1002 		 */
   1003 		if (!ill->ill_join_allmulti) {
   1004 			ASSERT(ill->ill_group != NULL);
   1005 			return (0);
   1006 		}
   1007 
   1008 		ret = ip_leave_allmulti(ipif);
   1009 		if (ill->ill_group != NULL)
   1010 			(void) ill_nominate_mcast_rcv(ill->ill_group);
   1011 		return (ret);
   1012 	}
   1013 
   1014 	if (!IS_LOOPBACK(ill))
   1015 		mld_leavegroup(ilm);
   1016 
   1017 	ilm_delete(ilm);
   1018 	/*
   1019 	 * Check how many ipif's that have members in this group -
   1020 	 * if there are still some left then don't tell the driver
   1021 	 * to drop it.
   1022 	 */
   1023 	if (ilm_numentries_v6(ill, v6group) != 0)
   1024 		return (0);
   1025 	return (ip_ll_delmulti_v6(ipif, v6group));
   1026 }
   1027 
   1028 /*
   1029  * Send a multicast request to the driver for disabling multicast reception
   1030  * for v6groupp address. The caller has already checked whether it is
   1031  * appropriate to send one or not.
   1032  */
   1033 int
   1034 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
   1035 {
   1036 	mblk_t	*mp;
   1037 	char	group_buf[INET6_ADDRSTRLEN];
   1038 	uint32_t	addrlen, addroff;
   1039 
   1040 	ASSERT(IAM_WRITER_ILL(ill));
   1041 	/*
   1042 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
   1043 	 * on.
   1044 	 */
   1045 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
   1046 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
   1047 
   1048 	if (!mp)
   1049 		return (ENOMEM);
   1050 
   1051 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
   1052 		ipaddr_t v4group;
   1053 
   1054 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
   1055 		/*
   1056 		 * NOTE!!!
   1057 		 * The "addroff" passed in here was calculated by
   1058 		 * ill_create_dl(), and will be used by ill_create_squery()
   1059 		 * to perform some twisted coding magic. It is the offset
   1060 		 * into the dl_xxx_req of the hw addr. Here, it will be
   1061 		 * added to b_wptr - b_rptr to create a magic number that
   1062 		 * is not an offset into this mblk.
   1063 		 *
   1064 		 * Please see the comment in ip_ll_send)enabmulti_req()
   1065 		 * for a complete explanation.
   1066 		 *
   1067 		 * Look in ar_entry_squery() in arp.c to see how this offset
   1068 		 * is used.
   1069 		 */
   1070 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
   1071 		if (!mp)
   1072 			return (ENOMEM);
   1073 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
   1074 		    inet_ntop(AF_INET6, v6groupp, group_buf,
   1075 		    sizeof (group_buf)),
   1076 		    ill->ill_name));
   1077 		putnext(ill->ill_rq, mp);
   1078 	} else {
   1079 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
   1080 		    " %s\n",
   1081 		    inet_ntop(AF_INET6, v6groupp, group_buf,
   1082 		    sizeof (group_buf)),
   1083 		    ill->ill_name));
   1084 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
   1085 	}
   1086 	return (0);
   1087 }
   1088 
   1089 /*
   1090  * Send a multicast request to the driver for disabling multicast
   1091  * membership for v6group if appropriate.
   1092  */
   1093 static int
   1094 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
   1095 {
   1096 	ill_t	*ill = ipif->ipif_ill;
   1097 
   1098 	ASSERT(IAM_WRITER_IPIF(ipif));
   1099 
   1100 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
   1101 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
   1102 		return (0);	/* Must be IRE_IF_NORESOLVER */
   1103 	}
   1104 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
   1105 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
   1106 		return (0);
   1107 	}
   1108 	if (!ill->ill_dl_up) {
   1109 		/*
   1110 		 * Nobody there. All multicast addresses will be re-joined
   1111 		 * when we get the DL_BIND_ACK bringing the interface up.
   1112 		 */
   1113