Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 /* Copyright (c) 1990 Mentat Inc. */
     26 
     27 /*
     28  * Internet Group Management Protocol (IGMP) routines.
     29  * Multicast Listener Discovery Protocol (MLD) routines.
     30  *
     31  * Written by Steve Deering, Stanford, May 1988.
     32  * Modified by Rosen Sharma, Stanford, Aug 1994.
     33  * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
     34  *
     35  * MULTICAST 3.5.1.1
     36  */
     37 
     38 #include <sys/types.h>
     39 #include <sys/stream.h>
     40 #include <sys/stropts.h>
     41 #include <sys/strlog.h>
     42 #include <sys/strsun.h>
     43 #include <sys/systm.h>
     44 #include <sys/ddi.h>
     45 #include <sys/sunddi.h>
     46 #include <sys/cmn_err.h>
     47 #include <sys/atomic.h>
     48 #include <sys/zone.h>
     49 #include <sys/callb.h>
     50 #include <sys/param.h>
     51 #include <sys/socket.h>
     52 #include <inet/ipclassifier.h>
     53 #include <net/if.h>
     54 #include <net/route.h>
     55 #include <netinet/in.h>
     56 #include <netinet/igmp_var.h>
     57 #include <netinet/ip6.h>
     58 #include <netinet/icmp6.h>
     59 
     60 #include <inet/common.h>
     61 #include <inet/mi.h>
     62 #include <inet/nd.h>
     63 #include <inet/ip.h>
     64 #include <inet/ip6.h>
     65 #include <inet/ip_multi.h>
     66 #include <inet/ip_listutils.h>
     67 
     68 #include <netinet/igmp.h>
     69 #include <inet/ip_if.h>
     70 #include <net/pfkeyv2.h>
     71 #include <inet/ipsec_info.h>
     72 
     73 static uint_t	igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
     74 static uint_t	igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
     75 static uint_t	mld_query_in(mld_hdr_t *mldh, ill_t *ill);
     76 static uint_t	mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
     77 static void	igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
     78 static void	mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
     79 static void	igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist);
     80 static void	mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
     81 static mrec_t	*mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
     82 		    slist_t *srclist, mrec_t *next);
     83 static void	mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
     84 		    mcast_record_t rtype, slist_t *flist);
     85 static mrec_t	*mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
     86 static void	mcast_signal_restart_thread(ip_stack_t *ipst);
     87 
     88 /*
     89  * Macros used to do timer len conversions.  Timer values are always
     90  * stored and passed to the timer functions as milliseconds; but the
     91  * default values and values from the wire may not be.
     92  *
     93  * And yes, it's obscure, but decisecond is easier to abbreviate than
     94  * "tenths of a second".
     95  */
     96 #define	DSEC_TO_MSEC(dsec)	((dsec) * 100)
     97 #define	SEC_TO_MSEC(sec)	((sec) * 1000)
     98 
     99 /*
    100  * A running timer (scheduled thru timeout) can be cancelled if another
    101  * timer with a shorter timeout value is scheduled before it has timed
    102  * out.  When the shorter timer expires, the original timer is updated
    103  * to account for the time elapsed while the shorter timer ran; but this
    104  * does not take into account the amount of time already spent in timeout
    105  * state before being preempted by the shorter timer, that is the time
    106  * interval between time scheduled to time cancelled.  This can cause
    107  * delays in sending out multicast membership reports.  To resolve this
    108  * problem, wallclock time (absolute time) is used instead of deltas
    109  * (relative time) to track timers.
    110  *
    111  * The MACRO below gets the lbolt value, used for proper timer scheduling
    112  * and firing. Therefore multicast membership reports are sent on time.
    113  * The timer does not exactly fire at the time it was scehduled to fire,
    114  * there is a difference of a few milliseconds observed. An offset is used
    115  * to take care of the difference.
    116  */
    117 
    118 #define	CURRENT_MSTIME	((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
    119 #define	CURRENT_OFFSET	(999)
    120 
    121 /*
    122  * The first multicast join will trigger the igmp timers / mld timers
    123  * The unit for next is milliseconds.
    124  */
    125 static void
    126 igmp_start_timers(unsigned next, ip_stack_t *ipst)
    127 {
    128 	int	time_left;
    129 	int	ret;
    130 
    131 	ASSERT(next != 0 && next != INFINITY);
    132 
    133 	mutex_enter(&ipst->ips_igmp_timer_lock);
    134 
    135 	if (ipst->ips_igmp_timer_setter_active) {
    136 		/*
    137 		 * Serialize timer setters, one at a time. If the
    138 		 * timer is currently being set by someone,
    139 		 * just record the next time when it has to be
    140 		 * invoked and return. The current setter will
    141 		 * take care.
    142 		 */
    143 		ipst->ips_igmp_time_to_next =
    144 		    MIN(ipst->ips_igmp_time_to_next, next);
    145 		mutex_exit(&ipst->ips_igmp_timer_lock);
    146 		return;
    147 	} else {
    148 		ipst->ips_igmp_timer_setter_active = B_TRUE;
    149 	}
    150 	if (ipst->ips_igmp_timeout_id == 0) {
    151 		/*
    152 		 * The timer is inactive. We need to start a timer
    153 		 */
    154 		ipst->ips_igmp_time_to_next = next;
    155 		ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
    156 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
    157 		ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
    158 		ipst->ips_igmp_timer_setter_active = B_FALSE;
    159 		mutex_exit(&ipst->ips_igmp_timer_lock);
    160 		return;
    161 	}
    162 
    163 	/*
    164 	 * The timer was scheduled sometime back for firing in
    165 	 * 'igmp_time_to_next' ms and is active. We need to
    166 	 * reschedule the timeout if the new 'next' will happen
    167 	 * earlier than the currently scheduled timeout
    168 	 */
    169 	time_left = ipst->ips_igmp_timer_scheduled_last +
    170 	    MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
    171 	if (time_left < MSEC_TO_TICK(next)) {
    172 		ipst->ips_igmp_timer_setter_active = B_FALSE;
    173 		mutex_exit(&ipst->ips_igmp_timer_lock);
    174 		return;
    175 	}
    176 
    177 	mutex_exit(&ipst->ips_igmp_timer_lock);
    178 	ret = untimeout(ipst->ips_igmp_timeout_id);
    179 	mutex_enter(&ipst->ips_igmp_timer_lock);
    180 	/*
    181 	 * The timeout was cancelled, or the timeout handler
    182 	 * completed, while we were blocked in the untimeout.
    183 	 * No other thread could have set the timer meanwhile
    184 	 * since we serialized all the timer setters. Thus
    185 	 * no timer is currently active nor executing nor will
    186 	 * any timer fire in the future. We start the timer now
    187 	 * if needed.
    188 	 */
    189 	if (ret == -1) {
    190 		ASSERT(ipst->ips_igmp_timeout_id == 0);
    191 	} else {
    192 		ASSERT(ipst->ips_igmp_timeout_id != 0);
    193 		ipst->ips_igmp_timeout_id = 0;
    194 	}
    195 	if (ipst->ips_igmp_time_to_next != 0) {
    196 		ipst->ips_igmp_time_to_next =
    197 		    MIN(ipst->ips_igmp_time_to_next, next);
    198 		ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
    199 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
    200 		ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
    201 	}
    202 	ipst->ips_igmp_timer_setter_active = B_FALSE;
    203 	mutex_exit(&ipst->ips_igmp_timer_lock);
    204 }
    205 
    206 /*
    207  * mld_start_timers:
    208  * The unit for next is milliseconds.
    209  */
    210 static void
    211 mld_start_timers(unsigned next, ip_stack_t *ipst)
    212 {
    213 	int	time_left;
    214 	int	ret;
    215 
    216 	ASSERT(next != 0 && next != INFINITY);
    217 
    218 	mutex_enter(&ipst->ips_mld_timer_lock);
    219 	if (ipst->ips_mld_timer_setter_active) {
    220 		/*
    221 		 * Serialize timer setters, one at a time. If the
    222 		 * timer is currently being set by someone,
    223 		 * just record the next time when it has to be
    224 		 * invoked and return. The current setter will
    225 		 * take care.
    226 		 */
    227 		ipst->ips_mld_time_to_next =
    228 		    MIN(ipst->ips_mld_time_to_next, next);
    229 		mutex_exit(&ipst->ips_mld_timer_lock);
    230 		return;
    231 	} else {
    232 		ipst->ips_mld_timer_setter_active = B_TRUE;
    233 	}
    234 	if (ipst->ips_mld_timeout_id == 0) {
    235 		/*
    236 		 * The timer is inactive. We need to start a timer
    237 		 */
    238 		ipst->ips_mld_time_to_next = next;
    239 		ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
    240 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
    241 		ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
    242 		ipst->ips_mld_timer_setter_active = B_FALSE;
    243 		mutex_exit(&ipst->ips_mld_timer_lock);
    244 		return;
    245 	}
    246 
    247 	/*
    248 	 * The timer was scheduled sometime back for firing in
    249 	 * 'igmp_time_to_next' ms and is active. We need to
    250 	 * reschedule the timeout if the new 'next' will happen
    251 	 * earlier than the currently scheduled timeout
    252 	 */
    253 	time_left = ipst->ips_mld_timer_scheduled_last +
    254 	    MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
    255 	if (time_left < MSEC_TO_TICK(next)) {
    256 		ipst->ips_mld_timer_setter_active = B_FALSE;
    257 		mutex_exit(&ipst->ips_mld_timer_lock);
    258 		return;
    259 	}
    260 
    261 	mutex_exit(&ipst->ips_mld_timer_lock);
    262 	ret = untimeout(ipst->ips_mld_timeout_id);
    263 	mutex_enter(&ipst->ips_mld_timer_lock);
    264 	/*
    265 	 * The timeout was cancelled, or the timeout handler
    266 	 * completed, while we were blocked in the untimeout.
    267 	 * No other thread could have set the timer meanwhile
    268 	 * since we serialized all the timer setters. Thus
    269 	 * no timer is currently active nor executing nor will
    270 	 * any timer fire in the future. We start the timer now
    271 	 * if needed.
    272 	 */
    273 	if (ret == -1) {
    274 		ASSERT(ipst->ips_mld_timeout_id == 0);
    275 	} else {
    276 		ASSERT(ipst->ips_mld_timeout_id != 0);
    277 		ipst->ips_mld_timeout_id = 0;
    278 	}
    279 	if (ipst->ips_mld_time_to_next != 0) {
    280 		ipst->ips_mld_time_to_next =
    281 		    MIN(ipst->ips_mld_time_to_next, next);
    282 		ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
    283 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
    284 		ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
    285 	}
    286 	ipst->ips_mld_timer_setter_active = B_FALSE;
    287 	mutex_exit(&ipst->ips_mld_timer_lock);
    288 }
    289 
    290 /*
    291  * igmp_input:
    292  * Return NULL for a bad packet that is discarded here.
    293  * Return mp if the message is OK and should be handed to "raw" receivers.
    294  * Callers of igmp_input() may need to reinitialize variables that were copied
    295  * from the mblk as this calls pullupmsg().
    296  */
    297 /* ARGSUSED */
    298 mblk_t *
    299 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill)
    300 {
    301 	igmpa_t 	*igmpa;
    302 	ipha_t		*ipha = (ipha_t *)(mp->b_rptr);
    303 	int		iphlen, igmplen, mblklen;
    304 	ilm_t 		*ilm;
    305 	uint32_t	src, dst;
    306 	uint32_t 	group;
    307 	uint_t		next;
    308 	ipif_t 		*ipif;
    309 	ip_stack_t	*ipst;
    310 	ilm_walker_t	ilw;
    311 
    312 	ASSERT(ill != NULL);
    313 	ASSERT(!ill->ill_isv6);
    314 	ipst = ill->ill_ipst;
    315 	++ipst->ips_igmpstat.igps_rcv_total;
    316 
    317 	mblklen = MBLKL(mp);
    318 	if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) {
    319 		++ipst->ips_igmpstat.igps_rcv_tooshort;
    320 		goto bad_pkt;
    321 	}
    322 	igmplen = ntohs(ipha->ipha_length) - iphlen;
    323 	/*
    324 	 * Since msg sizes are more variable with v3, just pullup the
    325 	 * whole thing now.
    326 	 */
    327 	if (MBLKL(mp) < (igmplen + iphlen)) {
    328 		mblk_t *mp1;
    329 		if ((mp1 = msgpullup(mp, -1)) == NULL) {
    330 			++ipst->ips_igmpstat.igps_rcv_tooshort;
    331 			goto bad_pkt;
    332 		}
    333 		freemsg(mp);
    334 		mp = mp1;
    335 		ipha = (ipha_t *)(mp->b_rptr);
    336 	}
    337 
    338 	/*
    339 	 * Validate lengths
    340 	 */
    341 	if (igmplen < IGMP_MINLEN) {
    342 		++ipst->ips_igmpstat.igps_rcv_tooshort;
    343 		goto bad_pkt;
    344 	}
    345 	/*
    346 	 * Validate checksum
    347 	 */
    348 	if (IP_CSUM(mp, iphlen, 0)) {
    349 		++ipst->ips_igmpstat.igps_rcv_badsum;
    350 		goto bad_pkt;
    351 	}
    352 
    353 	igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
    354 	src = ipha->ipha_src;
    355 	dst = ipha->ipha_dst;
    356 	if (ip_debug > 1)
    357 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
    358 		    "igmp_input: src 0x%x, dst 0x%x on %s\n",
    359 		    (int)ntohl(src), (int)ntohl(dst),
    360 		    ill->ill_name);
    361 
    362 	switch (igmpa->igmpa_type) {
    363 	case IGMP_MEMBERSHIP_QUERY:
    364 		/*
    365 		 * packet length differentiates between v1/v2 and v3
    366 		 * v1/v2 should be exactly 8 octets long; v3 is >= 12
    367 		 */
    368 		if ((igmplen == IGMP_MINLEN) ||
    369 		    (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
    370 			next = igmp_query_in(ipha, igmpa, ill);
    371 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
    372 			next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
    373 			    igmplen);
    374 		} else {
    375 			++ipst->ips_igmpstat.igps_rcv_tooshort;
    376 			goto bad_pkt;
    377 		}
    378 		if (next == 0)
    379 			goto bad_pkt;
    380 
    381 		if (next != INFINITY)
    382 			igmp_start_timers(next, ipst);
    383 
    384 		break;
    385 
    386 	case IGMP_V1_MEMBERSHIP_REPORT:
    387 	case IGMP_V2_MEMBERSHIP_REPORT:
    388 		/*
    389 		 * For fast leave to work, we have to know that we are the
    390 		 * last person to send a report for this group. Reports
    391 		 * generated by us are looped back since we could potentially
    392 		 * be a multicast router, so discard reports sourced by me.
    393 		 */
    394 		mutex_enter(&ill->ill_lock);
    395 		for (ipif = ill->ill_ipif; ipif != NULL;
    396 		    ipif = ipif->ipif_next) {
    397 			if (ipif->ipif_lcl_addr == src) {
    398 				if (ip_debug > 1) {
    399 					(void) mi_strlog(ill->ill_rq,
    400 					    1,
    401 					    SL_TRACE,
    402 					    "igmp_input: we are only "
    403 					    "member src 0x%x ipif_local 0x%x",
    404 					    (int)ntohl(src),
    405 					    (int)ntohl(ipif->ipif_lcl_addr));
    406 				}
    407 				mutex_exit(&ill->ill_lock);
    408 				return (mp);
    409 			}
    410 		}
    411 		mutex_exit(&ill->ill_lock);
    412 
    413 		++ipst->ips_igmpstat.igps_rcv_reports;
    414 		group = igmpa->igmpa_group;
    415 		if (!CLASSD(group)) {
    416 			++ipst->ips_igmpstat.igps_rcv_badreports;
    417 			goto bad_pkt;
    418 		}
    419 
    420 		/*
    421 		 * KLUDGE: if the IP source address of the report has an
    422 		 * unspecified (i.e., zero) subnet number, as is allowed for
    423 		 * a booting host, replace it with the correct subnet number
    424 		 * so that a process-level multicast routing demon can
    425 		 * determine which subnet it arrived from.  This is necessary
    426 		 * to compensate for the lack of any way for a process to
    427 		 * determine the arrival interface of an incoming packet.
    428 		 *
    429 		 * Requires that a copy of *this* message it passed up
    430 		 * to the raw interface which is done by our caller.
    431 		 */
    432 		if ((src & htonl(0xFF000000U)) == 0) {	/* Minimum net mask */
    433 			/* Pick the first ipif on this ill */
    434 			mutex_enter(&ill->ill_lock);
    435 			src = ill->ill_ipif->ipif_subnet;
    436 			mutex_exit(&ill->ill_lock);
    437 			ip1dbg(("igmp_input: changed src to 0x%x\n",
    438 			    (int)ntohl(src)));
    439 			ipha->ipha_src = src;
    440 		}
    441 
    442 		/*
    443 		 * If our ill has ILMs that belong to the group being
    444 		 * reported, and we are a 'Delaying Member' in the RFC
    445 		 * terminology, stop our timer for that group and 'clear
    446 		 * flag' i.e. mark as IGMP_OTHERMEMBER.
    447 		 */
    448 		ilm = ilm_walker_start(&ilw, ill);
    449 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
    450 			if (ilm->ilm_addr == group) {
    451 				++ipst->ips_igmpstat.igps_rcv_ourreports;
    452 				ilm->ilm_timer = INFINITY;
    453 				ilm->ilm_state = IGMP_OTHERMEMBER;
    454 			}
    455 		}
    456 		ilm_walker_finish(&ilw);
    457 		break;
    458 
    459 	case IGMP_V3_MEMBERSHIP_REPORT:
    460 		/*
    461 		 * Currently nothing to do here; IGMP router is not
    462 		 * implemented in ip, and v3 hosts don't pay attention
    463 		 * to membership reports.
    464 		 */
    465 		break;
    466 	}
    467 	/*
    468 	 * Pass all valid IGMP packets up to any process(es) listening
    469 	 * on a raw IGMP socket. Do not free the packet.
    470 	 */
    471 	return (mp);
    472 
    473 bad_pkt:
    474 	freemsg(mp);
    475 	return (NULL);
    476 }
    477 
    478 static uint_t
    479 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
    480 {
    481 	ilm_t	*ilm;
    482 	int	timer;
    483 	uint_t	next, current;
    484 	ip_stack_t	 *ipst;
    485 	ilm_walker_t 	ilw;
    486 
    487 	ipst = ill->ill_ipst;
    488 	++ipst->ips_igmpstat.igps_rcv_queries;
    489 
    490 	/*
    491 	 * In the IGMPv2 specification, there are 3 states and a flag.
    492 	 *
    493 	 * In Non-Member state, we simply don't have a membership record.
    494 	 * In Delaying Member state, our timer is running (ilm->ilm_timer
    495 	 * < INFINITY).  In Idle Member state, our timer is not running
    496 	 * (ilm->ilm_timer == INFINITY).
    497 	 *
    498 	 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
    499 	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
    500 	 * if I sent the last report.
    501 	 */
    502 	if ((igmpa->igmpa_code == 0) ||
    503 	    (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
    504 		/*
    505 		 * Query from an old router.
    506 		 * Remember that the querier on this interface is old,
    507 		 * and set the timer to the value in RFC 1112.
    508 		 */
    509 
    510 
    511 		mutex_enter(&ill->ill_lock);
    512 		ill->ill_mcast_v1_time = 0;
    513 		ill->ill_mcast_v1_tset = 1;
    514 		if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
    515 			ip1dbg(("Received IGMPv1 Query on %s, switching mode "
    516 			    "to IGMP_V1_ROUTER\n", ill->ill_name));
    517 			atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
    518 			ill->ill_mcast_type = IGMP_V1_ROUTER;
    519 		}
    520 		mutex_exit(&ill->ill_lock);
    521 
    522 		timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
    523 
    524 		if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
    525 		    igmpa->igmpa_group != 0) {
    526 			++ipst->ips_igmpstat.igps_rcv_badqueries;
    527 			return (0);
    528 		}
    529 
    530 	} else {
    531 		in_addr_t group;
    532 
    533 		/*
    534 		 * Query from a new router
    535 		 * Simply do a validity check
    536 		 */
    537 		group = igmpa->igmpa_group;
    538 		if (group != 0 && (!CLASSD(group))) {
    539 			++ipst->ips_igmpstat.igps_rcv_badqueries;
    540 			return (0);
    541 		}
    542 
    543 		/*
    544 		 * Switch interface state to v2 on receipt of a v2 query
    545 		 * ONLY IF current state is v3.  Let things be if current
    546 		 * state if v1 but do reset the v2-querier-present timer.
    547 		 */
    548 		mutex_enter(&ill->ill_lock);
    549 		if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
    550 			ip1dbg(("Received IGMPv2 Query on %s, switching mode "
    551 			    "to IGMP_V2_ROUTER", ill->ill_name));
    552 			atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1);
    553 			ill->ill_mcast_type = IGMP_V2_ROUTER;
    554 		}
    555 		ill->ill_mcast_v2_time = 0;
    556 		ill->ill_mcast_v2_tset = 1;
    557 		mutex_exit(&ill->ill_lock);
    558 
    559 		timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
    560 	}
    561 
    562 	if (ip_debug > 1) {
    563 		mutex_enter(&ill->ill_lock);
    564 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
    565 		    "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
    566 		    (int)ntohs(igmpa->igmpa_code),
    567 		    (int)ntohs(igmpa->igmpa_type));
    568 		mutex_exit(&ill->ill_lock);
    569 	}
    570 
    571 	/*
    572 	 * -Start the timers in all of our membership records
    573 	 *  for the physical interface on which the query
    574 	 *  arrived, excluding those that belong to the "all
    575 	 *  hosts" group (224.0.0.1).
    576 	 *
    577 	 * -Restart any timer that is already running but has
    578 	 *  a value longer than the requested timeout.
    579 	 *
    580 	 * -Use the value specified in the query message as
    581 	 *  the maximum timeout.
    582 	 */
    583 	next = (unsigned)INFINITY;
    584 
    585 	ilm = ilm_walker_start(&ilw, ill);
    586 	mutex_enter(&ill->ill_lock);
    587 	current = CURRENT_MSTIME;
    588 
    589 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
    590 		/*
    591 		 * A multicast router joins INADDR_ANY address
    592 		 * to enable promiscuous reception of all
    593 		 * mcasts from the interface. This INADDR_ANY
    594 		 * is stored in the ilm_v6addr as V6 unspec addr
    595 		 */
    596 		if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
    597 			continue;
    598 		if (ilm->ilm_addr == htonl(INADDR_ANY))
    599 			continue;
    600 		if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
    601 		    (igmpa->igmpa_group == 0) ||
    602 		    (igmpa->igmpa_group == ilm->ilm_addr)) {
    603 			if (ilm->ilm_timer > timer) {
    604 				MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
    605 				if (ilm->ilm_timer < next)
    606 					next = ilm->ilm_timer;
    607 				ilm->ilm_timer += current;
    608 			}
    609 		}
    610 	}
    611 	mutex_exit(&ill->ill_lock);
    612 	ilm_walker_finish(&ilw);
    613 
    614 	return (next);
    615 }
    616 
    617 static uint_t
    618 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
    619 {
    620 	uint_t		i, next, mrd, qqi, timer, delay, numsrc;
    621 	uint_t		current;
    622 	ilm_t		*ilm;
    623 	ipaddr_t	*src_array;
    624 	uint8_t		qrv;
    625 	ip_stack_t	 *ipst;
    626 	ilm_walker_t	ilw;
    627 
    628 	ipst = ill->ill_ipst;
    629 	/* make sure numsrc matches packet size */
    630 	numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
    631 	if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
    632 		++ipst->ips_igmpstat.igps_rcv_tooshort;
    633 		return (0);
    634 	}
    635 	src_array = (ipaddr_t *)&igmp3qa[1];
    636 
    637 	++ipst->ips_igmpstat.igps_rcv_queries;
    638 
    639 	if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
    640 		uint_t hdrval, mant, exp;
    641 		hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
    642 		mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
    643 		exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
    644 		mrd = (mant | 0x10) << (exp + 3);
    645 	}
    646 	if (mrd == 0)
    647 		mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
    648 	timer = DSEC_TO_MSEC(mrd);
    649 	MCAST_RANDOM_DELAY(delay, timer);
    650 	next = (unsigned)INFINITY;
    651 	current = CURRENT_MSTIME;
    652 
    653 	if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
    654 		ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
    655 	else
    656 		ill->ill_mcast_rv = qrv;
    657 
    658 	if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
    659 		uint_t hdrval, mant, exp;
    660 		hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
    661 		mant = hdrval & IGMP_V3_QQI_MANT_MASK;
    662 		exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
    663 		qqi = (mant | 0x10) << (exp + 3);
    664 	}
    665 	ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
    666 
    667 	/*
    668 	 * If we have a pending general query response that's scheduled
    669 	 * sooner than the delay we calculated for this response, then
    670 	 * no action is required (RFC3376 section 5.2 rule 1)
    671 	 */
    672 	mutex_enter(&ill->ill_lock);
    673 	if (ill->ill_global_timer < (current + delay)) {
    674 		mutex_exit(&ill->ill_lock);
    675 		return (next);
    676 	}
    677 	mutex_exit(&ill->ill_lock);
    678 
    679 	/*
    680 	 * Now take action depending upon query type:
    681 	 * general, group specific, or group/source specific.
    682 	 */
    683 	if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
    684 		/*
    685 		 * general query
    686 		 * We know global timer is either not running or is
    687 		 * greater than our calculated delay, so reset it to
    688 		 * our delay (random value in range [0, response time]).
    689 		 */
    690 		mutex_enter(&ill->ill_lock);
    691 		ill->ill_global_timer =  current + delay;
    692 		mutex_exit(&ill->ill_lock);
    693 		next = delay;
    694 
    695 	} else {
    696 		/* group or group/source specific query */
    697 		ilm = ilm_walker_start(&ilw, ill);
    698 		mutex_enter(&ill->ill_lock);
    699 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
    700 			if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
    701 			    (ilm->ilm_addr == htonl(INADDR_ANY)) ||
    702 			    (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
    703 			    (igmp3qa->igmp3qa_group != ilm->ilm_addr))
    704 				continue;
    705 			/*
    706 			 * If the query is group specific or we have a
    707 			 * pending group specific query, the response is
    708 			 * group specific (pending sources list should be
    709 			 * empty).  Otherwise, need to update the pending
    710 			 * sources list for the group and source specific
    711 			 * response.
    712 			 */
    713 			if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
    714 			    SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
    715 group_query:
    716 				FREE_SLIST(ilm->ilm_pendsrcs);
    717 				ilm->ilm_pendsrcs = NULL;
    718 			} else {
    719 				boolean_t overflow;
    720 				slist_t *pktl;
    721 				if (numsrc > MAX_FILTER_SIZE ||
    722 				    (ilm->ilm_pendsrcs == NULL &&
    723 				    (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
    724 					/*
    725 					 * We've been sent more sources than
    726 					 * we can deal with; or we can't deal
    727 					 * with a source list at all.  Revert
    728 					 * to a group specific query.
    729 					 */
    730 					goto group_query;
    731 				}
    732 				if ((pktl = l_alloc()) == NULL)
    733 					goto group_query;
    734 				pktl->sl_numsrc = numsrc;
    735 				for (i = 0; i < numsrc; i++)
    736 					IN6_IPADDR_TO_V4MAPPED(src_array[i],
    737 					    &(pktl->sl_addr[i]));
    738 				l_union_in_a(ilm->ilm_pendsrcs, pktl,
    739 				    &overflow);
    740 				l_free(pktl);
    741 				if (overflow)
    742 					goto group_query;
    743 			}
    744 
    745 			ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
    746 			    INFINITY : (ilm->ilm_timer - current);
    747 			/* choose soonest timer */
    748 			ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
    749 			if (ilm->ilm_timer < next)
    750 				next = ilm->ilm_timer;
    751 			ilm->ilm_timer += current;
    752 		}
    753 		mutex_exit(&ill->ill_lock);
    754 		ilm_walker_finish(&ilw);
    755 	}
    756 
    757 	return (next);
    758 }
    759 
    760 void
    761 igmp_joingroup(ilm_t *ilm)
    762 {
    763 	uint_t	timer;
    764 	ill_t	*ill;
    765 	ip_stack_t	*ipst = ilm->ilm_ipst;
    766 
    767 	ill = ilm->ilm_ipif->ipif_ill;
    768 
    769 	ASSERT(IAM_WRITER_ILL(ill));
    770 	ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6);
    771 
    772 	mutex_enter(&ill->ill_lock);
    773 	if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
    774 		ilm->ilm_rtx.rtx_timer = INFINITY;
    775 		ilm->ilm_state = IGMP_OTHERMEMBER;
    776 		mutex_exit(&ill->ill_lock);
    777 	} else {
    778 		ip1dbg(("Querier mode %d, sending report, group %x\n",
    779 		    ill->ill_mcast_type, htonl(ilm->ilm_addr)));
    780 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
    781 			mutex_exit(&ill->ill_lock);
    782 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
    783 			mutex_enter(&ill->ill_lock);
    784 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
    785 			mutex_exit(&ill->ill_lock);
    786 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
    787 			mutex_enter(&ill->ill_lock);
    788 		} else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
    789 			mrec_t *rp;
    790 			mcast_record_t rtype;
    791 			/*
    792 			 * The possible state changes we need to handle here:
    793 			 *   Old State	New State	Report
    794 			 *
    795 			 *   INCLUDE(0)	INCLUDE(X)	ALLOW(X),BLOCK(0)
    796 			 *   INCLUDE(0)	EXCLUDE(X)	TO_EX(X)
    797 			 *
    798 			 * No need to send the BLOCK(0) report; ALLOW(X)
    799 			 * is enough.
    800 			 */
    801 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
    802 			    ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
    803 			rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
    804 			    ilm->ilm_filter, NULL);
    805 			mutex_exit(&ill->ill_lock);
    806 			igmpv3_sendrpt(ilm->ilm_ipif, rp);
    807 			mutex_enter(&ill->ill_lock);
    808 			/*
    809 			 * Set up retransmission state.  Timer is set below,
    810 			 * for both v3 and older versions.
    811 			 */
    812 			mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
    813 			    ilm->ilm_filter);
    814 		}
    815 
    816 		/* Set the ilm timer value */
    817 		ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
    818 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
    819 		    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
    820 		timer = ilm->ilm_rtx.rtx_timer;
    821 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
    822 		ilm->ilm_state = IGMP_IREPORTEDLAST;
    823 		mutex_exit(&ill->ill_lock);
    824 
    825 		/*
    826 		 * We need to restart the IGMP timers, but we can't do it here
    827 		 * since we're inside the IPSQ and thus igmp_start_timers() ->
    828 		 * untimeout() (inside the IPSQ, waiting for a running timeout
    829 		 * to finish) could deadlock with igmp_timeout_handler() ->
    830 		 * ipsq_enter() (running the timeout, waiting to get inside
    831 		 * the IPSQ).  We also can't just delay it until after we
    832 		 * ipsq_exit() since we could be inside more than one IPSQ and
    833 		 * thus still have the other IPSQs pinned after we exit -- and
    834 		 * igmp_start_timers() may be trying to enter one of those.
    835 		 * Instead, signal a dedicated thread that will do it for us.
    836 		 */
    837 		mutex_enter(&ipst->ips_igmp_timer_lock);
    838 		ipst->ips_igmp_deferred_next = MIN(timer,
    839 		    ipst->ips_igmp_deferred_next);
    840 		mutex_exit(&ipst->ips_igmp_timer_lock);
    841 		mcast_signal_restart_thread(ipst);
    842 	}
    843 
    844 	if (ip_debug > 1) {
    845 		(void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE,
    846 		    "igmp_joingroup: multicast_type %d timer %d",
    847 		    (ilm->ilm_ipif->ipif_ill->ill_mcast_type),
    848 		    (int)ntohl(timer));
    849 	}
    850 }
    851 
    852 void
    853 mld_joingroup(ilm_t *ilm)
    854 {
    855 	uint_t	timer;
    856 	ill_t	*ill;
    857 	ip_stack_t	*ipst = ilm->ilm_ipst;
    858 
    859 	ill = ilm->ilm_ill;
    860 
    861 	ASSERT(IAM_WRITER_ILL(ill));
    862 	ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6);
    863 
    864 	mutex_enter(&ill->ill_lock);
    865 	if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
    866 		ilm->ilm_rtx.rtx_timer = INFINITY;
    867 		ilm->ilm_state = IGMP_OTHERMEMBER;
    868 		mutex_exit(&ill->ill_lock);
    869 	} else {
    870 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
    871 			mutex_exit(&ill->ill_lock);
    872 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
    873 			mutex_enter(&ill->ill_lock);
    874 		} else {
    875 			mrec_t *rp;
    876 			mcast_record_t rtype;
    877 			/*
    878 			 * The possible state changes we need to handle here:
    879 			 *	Old State   New State	Report
    880 			 *
    881 			 *	INCLUDE(0)  INCLUDE(X)	ALLOW(X),BLOCK(0)
    882 			 *	INCLUDE(0)  EXCLUDE(X)	TO_EX(X)
    883 			 *
    884 			 * No need to send the BLOCK(0) report; ALLOW(X)
    885 			 * is enough
    886 			 */
    887 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
    888 			    ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
    889 			rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
    890 			    ilm->ilm_filter, NULL);
    891 			mutex_exit(&ill->ill_lock);
    892 			mldv2_sendrpt(ill, rp);
    893 			mutex_enter(&ill->ill_lock);
    894 			/*
    895 			 * Set up retransmission state.  Timer is set below,
    896 			 * for both v2 and v1.
    897 			 */
    898 			mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
    899 			    ilm->ilm_filter);
    900 		}
    901 
    902 		/* Set the ilm timer value */
    903 		ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
    904 		    ilm->ilm_rtx.rtx_cnt > 0);
    905 
    906 		ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
    907 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
    908 		    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
    909 		timer = ilm->ilm_rtx.rtx_timer;
    910 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
    911 		ilm->ilm_state = IGMP_IREPORTEDLAST;
    912 		mutex_exit(&ill->ill_lock);
    913 
    914 		/*
    915 		 * Signal another thread to restart the timers.  See the
    916 		 * comment in igmp_joingroup() for details.
    917 		 */
    918 		mutex_enter(&ipst->ips_mld_timer_lock);
    919 		ipst->ips_mld_deferred_next = MIN(timer,
    920 		    ipst->ips_mld_deferred_next);
    921 		mutex_exit(&ipst->ips_mld_timer_lock);
    922 		mcast_signal_restart_thread(ipst);
    923 	}
    924 
    925 	if (ip_debug > 1) {
    926 		(void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
    927 		    "mld_joingroup: multicast_type %d timer %d",
    928 		    (ilm->ilm_ill->ill_mcast_type),
    929 		    (int)ntohl(timer));
    930 	}
    931 }
    932 
    933 void
    934 igmp_leavegroup(ilm_t *ilm)
    935 {
    936 	ill_t *ill = ilm->ilm_ipif->ipif_ill;
    937 
    938 	ASSERT(ilm->ilm_ill == NULL);
    939 	ASSERT(!ill->ill_isv6);
    940 
    941 	mutex_enter(&ill->ill_lock);
    942 	if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
    943 	    ill->ill_mcast_type == IGMP_V2_ROUTER &&
    944 	    (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
    945 		mutex_exit(&ill->ill_lock);
    946 		igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
    947 		    (htonl(INADDR_ALLRTRS_GROUP)));
    948 		return;
    949 	} else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
    950 	    (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
    951 		mrec_t *rp;
    952 		/*
    953 		 * The possible state changes we need to handle here:
    954 		 *	Old State	New State	Report
    955 		 *
    956 		 *	INCLUDE(X)	INCLUDE(0)	ALLOW(0),BLOCK(X)
    957 		 *	EXCLUDE(X)	INCLUDE(0)	TO_IN(0)
    958 		 *
    959 		 * No need to send the ALLOW(0) report; BLOCK(X) is enough
    960 		 */
    961 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
    962 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
    963 			    ilm->ilm_filter, NULL);
    964 		} else {
    965 			rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
    966 			    NULL, NULL);
    967 		}
    968 		mutex_exit(&ill->ill_lock);
    969 		igmpv3_sendrpt(ilm->ilm_ipif, rp);
    970 		return;
    971 	}
    972 	mutex_exit(&ill->ill_lock);
    973 }
    974 
    975 void
    976 mld_leavegroup(ilm_t *ilm)
    977 {
    978 	ill_t *ill = ilm->ilm_ill;
    979 
    980 	ASSERT(ilm->ilm_ipif == NULL);
    981 	ASSERT(ill->ill_isv6);
    982 
    983 	mutex_enter(&ill->ill_lock);
    984 	if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
    985 	    ill->ill_mcast_type == MLD_V1_ROUTER &&
    986 	    (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
    987 		mutex_exit(&ill->ill_lock);
    988 		mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
    989 		return;
    990 	} else if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
    991 	    (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
    992 		mrec_t *rp;
    993 		/*
    994 		 * The possible state changes we need to handle here:
    995 		 *	Old State	New State	Report
    996 		 *
    997 		 *	INCLUDE(X)	INCLUDE(0)	ALLOW(0),BLOCK(X)
    998 		 *	EXCLUDE(X)	INCLUDE(0)	TO_IN(0)
    999 		 *
   1000 		 * No need to send the ALLOW(0) report; BLOCK(X) is enough
   1001 		 */
   1002 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
   1003 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
   1004 			    ilm->ilm_filter, NULL);
   1005 		} else {
   1006 			rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
   1007 			    NULL, NULL);
   1008 		}
   1009 		mutex_exit(&ill->ill_lock);
   1010 		mldv2_sendrpt(ill, rp);
   1011 		return;
   1012 	}
   1013 	mutex_exit(&ill->ill_lock);
   1014 }
   1015 
   1016 void
   1017 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
   1018 {
   1019 	ill_t *ill;
   1020 	mrec_t *rp;
   1021 	ip_stack_t	*ipst = ilm->ilm_ipst;
   1022 
   1023 	ASSERT(ilm != NULL);
   1024 
   1025 	/* state change reports should only be sent if the router is v3 */
   1026 	if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER)
   1027 		return;
   1028 
   1029 	if (ilm->ilm_ill == NULL) {
   1030 		ASSERT(ilm->ilm_ipif != NULL);
   1031 		ill = ilm->ilm_ipif->ipif_ill;
   1032 	} else {
   1033 		ill = ilm->ilm_ill;
   1034 	}
   1035 
   1036 	mutex_enter(&ill->ill_lock);
   1037 
   1038 	/*
   1039 	 * Compare existing(old) state with the new state and prepare
   1040 	 * State Change Report, according to the rules in RFC 3376:
   1041 	 *
   1042 	 *	Old State	New State	State Change Report
   1043 	 *
   1044 	 *	INCLUDE(A)	INCLUDE(B)	ALLOW(B-A),BLOCK(A-B)
   1045 	 *	EXCLUDE(A)	EXCLUDE(B)	ALLOW(A-B),BLOCK(B-A)
   1046 	 *	INCLUDE(A)	EXCLUDE(B)	TO_EX(B)
   1047 	 *	EXCLUDE(A)	INCLUDE(B)	TO_IN(B)
   1048 	 */
   1049 
   1050 	if (ilm->ilm_fmode == fmode) {
   1051 		slist_t	*a_minus_b = NULL, *b_minus_a = NULL;
   1052 		slist_t *allow, *block;
   1053 		if (((a_minus_b = l_alloc()) == NULL) ||
   1054 		    ((b_minus_a = l_alloc()) == NULL)) {
   1055 			l_free(a_minus_b);
   1056 			if (ilm->ilm_fmode == MODE_IS_INCLUDE)
   1057 				goto send_to_ex;
   1058 			else
   1059 				goto send_to_in;
   1060 		}
   1061 		l_difference(ilm->ilm_filter, flist, a_minus_b);
   1062 		l_difference(flist, ilm->ilm_filter, b_minus_a);
   1063 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
   1064 			allow = b_minus_a;
   1065 			block = a_minus_b;
   1066 		} else {
   1067 			allow = a_minus_b;
   1068 			block = b_minus_a;
   1069 		}
   1070 		rp = NULL;
   1071 		if (!SLIST_IS_EMPTY(allow))
   1072 			rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
   1073 			    allow, rp);
   1074 		if (!SLIST_IS_EMPTY(block))
   1075 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
   1076 			    block, rp);
   1077 		l_free(a_minus_b);
   1078 		l_free(b_minus_a);
   1079 	} else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
   1080 send_to_ex:
   1081 		rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
   1082 		    NULL);
   1083 	} else {
   1084 send_to_in:
   1085 		rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
   1086 		    NULL);
   1087 	}
   1088 
   1089 	/*
   1090 	 * Need to set up retransmission state; merge the new info with the
   1091 	 * current state (which may be null).  If the timer is not currently
   1092 	 * running, signal a thread to restart it -- see the comment in
   1093 	 * igmp_joingroup() for details.
   1094 	 */
   1095 	rp = mcast_merge_rtx(ilm, rp, flist);
   1096 	if (ilm->ilm_rtx.rtx_timer == INFINITY) {
   1097 		ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
   1098 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
   1099 		    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
   1100 		mutex_enter(&ipst->ips_igmp_timer_lock);
   1101 		ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
   1102 		    ilm->ilm_rtx.rtx_timer);
   1103 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
   1104 		mutex_exit(&ipst->ips_igmp_timer_lock);
   1105 		mcast_signal_restart_thread(ipst);
   1106 	}
   1107 
   1108 	mutex_exit(&ill->ill_lock);
   1109 	igmpv3_sendrpt(ilm->ilm_ipif, rp);
   1110 }
   1111 
   1112 void
   1113 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
   1114 {
   1115 	ill_t *ill;
   1116 	mrec_t *rp = NULL;
   1117 	ip_stack_t	*ipst = ilm->ilm_ipst;
   1118 
   1119 	ASSERT(ilm != NULL);
   1120 
   1121 	ill = ilm->ilm_ill;
   1122 
   1123 	/* only need to send if we have an mldv2-capable router */
   1124 	mutex_enter(&ill->ill_lock);
   1125 	if (ill->ill_mcast_type != MLD_V2_ROUTER) {
   1126 		mutex_exit(&ill->ill_lock);
   1127 		return;
   1128 	}
   1129 
   1130 	/*
   1131 	 * Compare existing (old) state with the new state passed in
   1132 	 * and send appropriate MLDv2 State Change Report.
   1133 	 *
   1134 	 *	Old State	New State	State Change Report
   1135 	 *
   1136 	 *	INCLUDE(A)	INCLUDE(B)	ALLOW(B-A),BLOCK(A-B)
   1137 	 *	EXCLUDE(A)	EXCLUDE(B)	ALLOW(A-B),BLOCK(B-A)
   1138 	 *	INCLUDE(A)	EXCLUDE(B)	TO_EX(B)
   1139 	 *	EXCLUDE(A)	INCLUDE(B)	TO_IN(B)
   1140 	 */
   1141 	if (ilm->ilm_fmode == fmode) {
   1142 		slist_t	*a_minus_b = NULL, *b_minus_a = NULL;
   1143 		slist_t *allow, *block;
   1144 		if (((a_minus_b = l_alloc()) == NULL) ||
   1145 		    ((b_minus_a = l_alloc()) == NULL)) {
   1146 			l_free(a_minus_b);
   1147 			if (ilm->ilm_fmode == MODE_IS_INCLUDE)
   1148 				goto send_to_ex;
   1149 			else
   1150 				goto send_to_in;
   1151 		}
   1152 		l_difference(ilm->ilm_filter, flist, a_minus_b);
   1153 		l_difference(flist, ilm->ilm_filter, b_minus_a);
   1154 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
   1155 			allow = b_minus_a;
   1156 			block = a_minus_b;
   1157 		} else {
   1158 			allow = a_minus_b;
   1159 			block = b_minus_a;
   1160 		}
   1161 		if (!SLIST_IS_EMPTY(allow))
   1162 			rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
   1163 			    allow, rp);
   1164 		if (!SLIST_IS_EMPTY(block))
   1165 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
   1166 			    block, rp);
   1167 		l_free(a_minus_b);
   1168 		l_free(b_minus_a);
   1169 	} else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
   1170 send_to_ex:
   1171 		rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
   1172 		    NULL);
   1173 	} else {
   1174 send_to_in:
   1175 		rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
   1176 		    NULL);
   1177 	}
   1178 
   1179 	/*
   1180 	 * Need to set up retransmission state; merge the new info with the
   1181 	 * current state (which may be null).  If the timer is not currently
   1182 	 * running, signal a thread to restart it -- see the comment in
   1183 	 * igmp_joingroup() for details.
   1184 	 */
   1185 	rp = mcast_merge_rtx(ilm, rp, flist);
   1186 	ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
   1187 	if (ilm->ilm_rtx.rtx_timer == INFINITY) {
   1188 		ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
   1189 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
   1190 		    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
   1191 		mutex_enter(&ipst->ips_mld_timer_lock);
   1192 		ipst->ips_mld_deferred_next =
   1193 		    MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
   1194 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
   1195 		mutex_exit(&ipst->ips_mld_timer_lock);
   1196 		mcast_signal_restart_thread(ipst);
   1197 	}
   1198 
   1199 	mutex_exit(&ill->ill_lock);
   1200 	mldv2_sendrpt(ill, rp);
   1201 }
   1202 
   1203 uint_t
   1204 igmp_timeout_handler_per_ill(ill_t *ill)
   1205 {
   1206 	uint_t	next = INFINITY, current;
   1207 	ilm_t	*ilm;
   1208 	ipif_t	*ipif;
   1209 	mrec_t	*rp = NULL;
   1210 	mrec_t	*rtxrp = NULL;
   1211 	rtx_state_t *rtxp;
   1212 	mcast_record_t	rtype;
   1213 
   1214 	ASSERT(IAM_WRITER_ILL(ill));
   1215 
   1216 	mutex_enter(&ill->ill_lock);
   1217 
   1218 	current = CURRENT_MSTIME;
   1219 	/* First check the global timer on this interface */
   1220 	if (ill->ill_global_timer == INFINITY)
   1221 		goto per_ilm_timer;
   1222 	if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
   1223 		ill->ill_global_timer = INFINITY;
   1224 		/*
   1225 		 * Send report for each group on this interface.
   1226 		 * Since we just set the global timer (received a v3 general
   1227 		 * query), need to skip the all hosts addr (224.0.0.1), per
   1228 		 * RFC 3376 section 5.
   1229 		 */
   1230 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
   1231 			if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
   1232 				continue;
   1233 			ASSERT(ilm->ilm_ipif != NULL);
   1234 			ilm->ilm_ipif->ipif_igmp_rpt =
   1235 			    mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
   1236 			    ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt);
   1237 			/*
   1238 			 * Since we're sending a report on this group, okay
   1239 			 * to delete pending group-specific timers.  Note
   1240 			 * that group-specific retransmit timers still need
   1241 			 * to be checked in the per_ilm_timer for-loop.
   1242 			 */
   1243 			ilm->ilm_timer = INFINITY;
   1244 			ilm->ilm_state = IGMP_IREPORTEDLAST;
   1245 			FREE_SLIST(ilm->ilm_pendsrcs);
   1246 			ilm->ilm_pendsrcs = NULL;
   1247 		}
   1248 		/*
   1249 		 * We've built per-ipif mrec lists; walk the ill's ipif list
   1250 		 * and send a report for each ipif that has an mrec list.
   1251 		 */
   1252 		for (ipif = ill->ill_ipif; ipif != NULL;
   1253 		    ipif = ipif->ipif_next) {
   1254 			if (ipif->ipif_igmp_rpt == NULL)
   1255 				continue;
   1256 			mutex_exit(&ill->ill_lock);
   1257 			igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt);
   1258 			mutex_enter(&ill->ill_lock);
   1259 			/* mrec list was freed by igmpv3_sendrpt() */
   1260 			ipif->ipif_igmp_rpt = NULL;
   1261 		}
   1262 	} else {
   1263 		if ((ill->ill_global_timer - current) < next)
   1264 			next = ill->ill_global_timer - current;
   1265 	}
   1266 
   1267 per_ilm_timer:
   1268 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
   1269 		if (ilm->ilm_timer == INFINITY)
   1270 			goto per_ilm_rtxtimer;
   1271 
   1272 		if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
   1273 			if ((ilm->ilm_timer - current) < next)
   1274 				next = ilm->ilm_timer - current;
   1275 
   1276 			if (ip_debug > 1) {
   1277 				(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
   1278 				    "igmp_timo_hlr 2: ilm_timr %d "
   1279 				    "typ %d nxt %d",
   1280 				    (int)ntohl(ilm->ilm_timer - current),
   1281 				    (ill->ill_mcast_type), next);
   1282 			}
   1283 
   1284 			goto per_ilm_rtxtimer;
   1285 		}
   1286 
   1287 		/* the timer has expired, need to take action */
   1288 		ilm->ilm_timer = INFINITY;
   1289 		ilm->ilm_state = IGMP_IREPORTEDLAST;
   1290 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
   1291 			mutex_exit(&ill->ill_lock);
   1292 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
   1293 			mutex_enter(&ill->ill_lock);
   1294 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
   1295 			mutex_exit(&ill->ill_lock);
   1296 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
   1297 			mutex_enter(&ill->ill_lock);
   1298 		} else {
   1299 			slist_t *rsp;
   1300 			if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
   1301 			    (rsp = l_alloc()) != NULL) {
   1302 				/*
   1303 				 * Contents of reply depend on pending
   1304 				 * requested source list.
   1305 				 */
   1306 				if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
   1307 					l_intersection(ilm->ilm_filter,
   1308 					    ilm->ilm_pendsrcs, rsp);
   1309 				} else {
   1310 					l_difference(ilm->ilm_pendsrcs,
   1311 					    ilm->ilm_filter, rsp);
   1312 				}
   1313 				FREE_SLIST(ilm->ilm_pendsrcs);
   1314 				ilm->ilm_pendsrcs = NULL;
   1315 				if (!SLIST_IS_EMPTY(rsp))
   1316 					rp = mcast_bldmrec(MODE_IS_INCLUDE,
   1317 					    &ilm->ilm_v6addr, rsp, rp);
   1318 				FREE_SLIST(rsp);
   1319 			} else {
   1320 				/*
   1321 				 * Either the pending request is just group-
   1322 				 * specific, or we couldn't get the resources
   1323 				 * (rsp) to build a source-specific reply.
   1324 				 */
   1325 				rp = mcast_bldmrec(ilm->ilm_fmode,
   1326 				    &ilm->ilm_v6addr, ilm->ilm_filter, rp);
   1327 			}
   1328 			mutex_exit(&ill->ill_lock);
   1329 			igmpv3_sendrpt(ill->ill_ipif, rp);
   1330 			mutex_enter(&ill->ill_lock);
   1331 			rp = NULL;
   1332 		}
   1333 
   1334 per_ilm_rtxtimer:
   1335 		rtxp = &ilm->ilm_rtx;
   1336 
   1337 		if (rtxp->rtx_timer == INFINITY)
   1338 			continue;
   1339 		if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
   1340 			if ((rtxp->rtx_timer - current) < next)
   1341 				next = rtxp->rtx_timer - current;
   1342 			continue;
   1343 		}
   1344 
   1345 		rtxp->rtx_timer = INFINITY;
   1346 		ilm->ilm_state = IGMP_IREPORTEDLAST;
   1347 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
   1348 			mutex_exit(&ill->ill_lock);
   1349 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
   1350 			mutex_enter(&ill->ill_lock);
   1351 			continue;
   1352 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
   1353 			mutex_exit(&ill->ill_lock);
   1354 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
   1355 			mutex_enter(&ill->ill_lock);
   1356 			continue;
   1357 		}
   1358 
   1359 		/*
   1360 		 * The retransmit timer has popped, and our router is
   1361 		 * IGMPv3.  We have to delve into the retransmit state
   1362 		 * stored in the ilm.
   1363 		 *
   1364 		 * Decrement the retransmit count.  If the fmode rtx
   1365 		 * count is active, decrement it, and send a filter
   1366 		 * mode change report with the ilm's source list.
   1367 		 * Otherwise, send a source list change report with
   1368 		 * the current retransmit lists.
   1369 		 */
   1370 		ASSERT(rtxp->rtx_cnt > 0);
   1371 		ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
   1372 		rtxp->rtx_cnt--;
   1373 		if (rtxp->rtx_fmode_cnt > 0) {
   1374 			rtxp->rtx_fmode_cnt--;
   1375 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
   1376 			    CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
   1377 			rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
   1378 			    ilm->ilm_filter, rtxrp);
   1379 		} else {
   1380 			rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
   1381 			    &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
   1382 			rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
   1383 			    &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
   1384 		}
   1385 		if (rtxp->rtx_cnt > 0) {
   1386 			MCAST_RANDOM_DELAY(rtxp->rtx_timer,
   1387 			    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
   1388 			if (rtxp->rtx_timer < next)
   1389 				next = rtxp->rtx_timer;
   1390 			rtxp->rtx_timer += current;
   1391 		} else {
   1392 			ASSERT(rtxp->rtx_timer == INFINITY);
   1393 			CLEAR_SLIST(rtxp->rtx_allow);
   1394 			CLEAR_SLIST(rtxp->rtx_block);
   1395 		}
   1396 		mutex_exit(&ill->ill_lock);
   1397 		igmpv3_sendrpt(ilm->ilm_ipif, rtxrp);
   1398 		mutex_enter(&ill->ill_lock);
   1399 		rtxrp = NULL;
   1400 	}
   1401 
   1402 	mutex_exit(&ill->ill_lock);
   1403 
   1404 	return (next);
   1405 }
   1406 
   1407 /*
   1408  * igmp_timeout_handler:
   1409  * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
   1410  * Returns number of ticks to next event (or 0 if none).
   1411  *
   1412  * As part of multicast join and leave igmp we may need to send out an
   1413  * igmp request. The igmp related state variables in the ilm are protected
   1414  * by ill_lock. A single global igmp timer is used to track igmp timeouts.
   1415  * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
   1416  * starts the igmp timer if needed. It serializes multiple threads trying to
   1417  * simultaneously start the timer using the igmp_timer_setter_active flag.
   1418  *
   1419  * igmp_input() receives igmp queries and responds to the queries
   1420  * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
   1421  * Later the igmp_timer fires, the timeout handler igmp_timeout_handler()
   1422  * performs the action exclusively after entering each ill's ipsq as writer.
   1423  * (The need to enter the IPSQ is largely historical but there are still some
   1424  * fields like ilm_filter that rely on it.)
   1425  *
   1426  * The igmp_slowtimeo() function is called thru another timer.
   1427  * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
   1428  */
   1429 void
   1430 igmp_timeout_handler(void *arg)
   1431 {
   1432 	ill_t	*ill;
   1433 	uint_t  global_next = INFINITY;
   1434 	uint_t  next;
   1435 	ill_walk_context_t ctx;
   1436 	boolean_t success;
   1437 	ip_stack_t *ipst = arg;
   1438 
   1439 	ASSERT(arg != NULL);
   1440 	mutex_enter(&ipst->ips_igmp_timer_lock);
   1441 	ASSERT(ipst->ips_igmp_timeout_id != 0);
   1442 	ipst->ips_igmp_timer_scheduled_last = 0;
   1443 	ipst->ips_igmp_time_to_next = 0;
   1444 	mutex_exit(&ipst->ips_igmp_timer_lock);
   1445 
   1446 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   1447 	ill = ILL_START_WALK_V4(&ctx, ipst);
   1448 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
   1449 		ASSERT(!ill->ill_isv6);
   1450 		/*
   1451 		 * We may not be able to refhold the ill if the ill/ipif
   1452 		 * is changing. But we need to make sure that the ill will
   1453 		 * not vanish. So we just bump up the ill_waiter count.
   1454 		 */
   1455 		if (!ill_waiter_inc(ill))
   1456 			continue;
   1457 		rw_exit(&ipst->ips_ill_g_lock);
   1458 		success = ipsq_enter(ill, B_TRUE, NEW_OP);
   1459 		if (success) {
   1460 			next = igmp_timeout_handler_per_ill(ill);
   1461 			if (next < global_next)
   1462 				global_next = next;
   1463 			ipsq_exit(ill->ill_phyint->phyint_ipsq);
   1464 		}
   1465 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   1466 		ill_waiter_dcr(ill);
   1467 	}
   1468 	rw_exit(&ipst->ips_ill_g_lock);
   1469 
   1470 	mutex_enter(&ipst->ips_igmp_timer_lock);
   1471 	ASSERT(ipst->ips_igmp_timeout_id != 0);
   1472 	ipst->ips_igmp_timeout_id = 0;
   1473 	mutex_exit(&ipst->ips_igmp_timer_lock);
   1474 
   1475 	if (global_next != INFINITY)
   1476 		igmp_start_timers(global_next, ipst);
   1477 }
   1478 
   1479 /*
   1480  * mld_timeout_handler:
   1481  * Called when there are timeout events, every next (tick).
   1482  * Returns number of ticks to next event (or 0 if none).
   1483  */
   1484 /* ARGSUSED */
   1485 uint_t
   1486 mld_timeout_handler_per_ill(ill_t *ill)
   1487 {
   1488 	ilm_t 	*ilm;
   1489 	uint_t	next = INFINITY, current;
   1490 	mrec_t	*rp, *rtxrp;
   1491 	rtx_state_t *rtxp;
   1492 	mcast_record_t	rtype;
   1493 
   1494 	ASSERT(IAM_WRITER_ILL(ill));
   1495 
   1496 	mutex_enter(&ill->ill_lock);
   1497 
   1498 	current = CURRENT_MSTIME;
   1499 	/*
   1500 	 * First check the global timer on this interface; the global timer
   1501 	 * is not used for MLDv1, so if it's set we can assume we're v2.
   1502 	 */
   1503 	if (ill->ill_global_timer == INFINITY)
   1504 		goto per_ilm_timer;
   1505 	if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
   1506 		ill->ill_global_timer = INFINITY;
   1507 		/*
   1508 		 * Send report for each group on this interface.
   1509 		 * Since we just set the global timer (received a v2 general
   1510 		 * query), need to skip the all hosts addr (ff02::1), per
   1511 		 * RFC 3810 section 6.
   1512 		 */
   1513 		rp = NULL;
   1514 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
   1515 			if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
   1516 			    &ipv6_all_hosts_mcast))
   1517 				continue;
   1518 			rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
   1519 			    ilm->ilm_filter, rp);
   1520 			/*
   1521 			 * Since we're sending a report on this group, okay
   1522 			 * to delete pending group-specific timers.  Note
   1523 			 * that group-specific retransmit timers still need
   1524 			 * to be checked in the per_ilm_timer for-loop.
   1525 			 */
   1526 			ilm->ilm_timer = INFINITY;
   1527 			ilm->ilm_state = IGMP_IREPORTEDLAST;
   1528 			FREE_SLIST(ilm->ilm_pendsrcs);
   1529 			ilm->ilm_pendsrcs = NULL;
   1530 		}
   1531 		mutex_exit(&ill->ill_lock);
   1532 		mldv2_sendrpt(ill, rp);
   1533 		mutex_enter(&ill->ill_lock);
   1534 	} else {
   1535 		if ((ill->ill_global_timer - current) < next)
   1536 			next = ill->ill_global_timer - current;
   1537 	}
   1538 
   1539 per_ilm_timer:
   1540 	rp = rtxrp = NULL;
   1541 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
   1542 		if (ilm->ilm_timer == INFINITY)
   1543 			goto per_ilm_rtxtimer;
   1544 
   1545 		if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
   1546 			if ((ilm->ilm_timer - current) < next)
   1547 				next = ilm->ilm_timer - current;
   1548 
   1549 			if (ip_debug > 1) {
   1550 				(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
   1551 				    "igmp_timo_hlr 2: ilm_timr"
   1552 				    " %d typ %d nxt %d",
   1553 				    (int)ntohl(ilm->ilm_timer - current),
   1554 				    (ill->ill_mcast_type), next);
   1555 			}
   1556 
   1557 			goto per_ilm_rtxtimer;
   1558 		}
   1559 
   1560 		/* the timer has expired, need to take action */
   1561 		ilm->ilm_timer = INFINITY;
   1562 		ilm->ilm_state = IGMP_IREPORTEDLAST;
   1563 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
   1564 			mutex_exit(&ill->ill_lock);
   1565 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
   1566 			mutex_enter(&ill->ill_lock);
   1567 		} else {
   1568 			slist_t *rsp;
   1569 			if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
   1570 			    (rsp = l_alloc()) != NULL) {
   1571 				/*
   1572 				 * Contents of reply depend on pending
   1573 				 * requested source list.
   1574 				 */
   1575 				if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
   1576 					l_intersection(ilm->ilm_filter,
   1577 					    ilm->ilm_pendsrcs, rsp);
   1578 				} else {
   1579 					l_difference(ilm->ilm_pendsrcs,
   1580 					    ilm->ilm_filter, rsp);
   1581 				}
   1582 				FREE_SLIST(ilm->ilm_pendsrcs);
   1583 				ilm->ilm_pendsrcs = NULL;
   1584 				if (!SLIST_IS_EMPTY(rsp))
   1585 					rp = mcast_bldmrec(MODE_IS_INCLUDE,
   1586 					    &ilm->ilm_v6addr, rsp, rp);
   1587 				FREE_SLIST(rsp);
   1588 			} else {
   1589 				rp = mcast_bldmrec(ilm->ilm_fmode,
   1590 				    &ilm->ilm_v6addr, ilm->ilm_filter, rp);
   1591 			}
   1592 		}
   1593 
   1594 per_ilm_rtxtimer:
   1595 		rtxp = &ilm->ilm_rtx;
   1596 
   1597 		if (rtxp->rtx_timer == INFINITY)
   1598 			continue;
   1599 		if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
   1600 			if ((rtxp->rtx_timer - current) < next)
   1601 				next = rtxp->rtx_timer - current;
   1602 			continue;
   1603 		}
   1604 
   1605 		rtxp->rtx_timer = INFINITY;
   1606 		ilm->ilm_state = IGMP_IREPORTEDLAST;
   1607 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
   1608 			mutex_exit(&ill->ill_lock);
   1609 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
   1610 			mutex_enter(&ill->ill_lock);
   1611 			continue;
   1612 		}
   1613 
   1614 		/*
   1615 		 * The retransmit timer has popped, and our router is
   1616 		 * MLDv2.  We have to delve into the retransmit state
   1617 		 * stored in the ilm.
   1618 		 *
   1619 		 * Decrement the retransmit count.  If the fmode rtx
   1620 		 * count is active, decrement it, and send a filter
   1621 		 * mode change report with the ilm's source list.
   1622 		 * Otherwise, send a source list change report with
   1623 		 * the current retransmit lists.
   1624 		 */
   1625 		ASSERT(rtxp->rtx_cnt > 0);
   1626 		ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
   1627 		rtxp->rtx_cnt--;
   1628 		if (rtxp->rtx_fmode_cnt > 0) {
   1629 			rtxp->rtx_fmode_cnt--;
   1630 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
   1631 			    CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
   1632 			rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
   1633 			    ilm->ilm_filter, rtxrp);
   1634 		} else {
   1635 			rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
   1636 			    &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
   1637 			rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
   1638 			    &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
   1639 		}
   1640 		if (rtxp->rtx_cnt > 0) {
   1641 			MCAST_RANDOM_DELAY(rtxp->rtx_timer,
   1642 			    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
   1643 			if (rtxp->rtx_timer < next)
   1644 				next = rtxp->rtx_timer;
   1645 			rtxp->rtx_timer += current;
   1646 		} else {
   1647 			ASSERT(rtxp->rtx_timer == INFINITY);
   1648 			CLEAR_SLIST(rtxp->rtx_allow);
   1649 			CLEAR_SLIST(rtxp->rtx_block);
   1650 		}
   1651 	}
   1652 
   1653 	if (ill->ill_mcast_type == MLD_V2_ROUTER) {
   1654 		mutex_exit(&ill->ill_lock);
   1655 		mldv2_sendrpt(ill, rp);
   1656 		mldv2_sendrpt(ill, rtxrp);
   1657 		return (next);
   1658 	}
   1659 
   1660 	mutex_exit(&ill->ill_lock);
   1661 
   1662 	return (next);
   1663 }
   1664 
   1665 /*
   1666  * mld_timeout_handler:
   1667  * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
   1668  * Returns number of ticks to next event (or 0 if none).
   1669  * MT issues are same as igmp_timeout_handler
   1670  */
   1671 void
   1672 mld_timeout_handler(void *arg)
   1673 {
   1674 	ill_t	*ill;
   1675 	uint_t  global_next = INFINITY;
   1676 	uint_t  next;
   1677 	ill_walk_context_t ctx;
   1678 	boolean_t success;
   1679 	ip_stack_t *ipst = arg;
   1680 
   1681 	ASSERT(arg != NULL);
   1682 	mutex_enter(&ipst->ips_mld_timer_lock);
   1683 	ASSERT(ipst->ips_mld_timeout_id != 0);
   1684 	ipst->ips_mld_timer_scheduled_last = 0;
   1685 	ipst->ips_mld_time_to_next = 0;
   1686 	mutex_exit(&ipst->ips_mld_timer_lock);
   1687 
   1688 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   1689 	ill = ILL_START_WALK_V6(&ctx, ipst);
   1690 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
   1691 		ASSERT(ill->ill_isv6);
   1692 		/*
   1693 		 * We may not be able to refhold the ill if the ill/ipif
   1694 		 * is changing. But we need to make sure that the ill will
   1695 		 * not vanish. So we just bump up the ill_waiter count.
   1696 		 */
   1697 		if (!ill_waiter_inc(ill))
   1698 			continue;
   1699 		rw_exit(&ipst->ips_ill_g_lock);
   1700 		success = ipsq_enter(ill, B_TRUE, NEW_OP);
   1701 		if (success) {
   1702 			next = mld_timeout_handler_per_ill(ill);
   1703 			if (next < global_next)
   1704 				global_next = next;
   1705 			ipsq_exit(ill->ill_phyint->phyint_ipsq);
   1706 		}
   1707 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   1708 		ill_waiter_dcr(ill);
   1709 	}
   1710 	rw_exit(&ipst->ips_ill_g_lock);
   1711 
   1712 	mutex_enter(&ipst->ips_mld_timer_lock);
   1713 	ASSERT(ipst->ips_mld_timeout_id != 0);
   1714 	ipst->ips_mld_timeout_id = 0;
   1715 	mutex_exit(&ipst->ips_mld_timer_lock);
   1716 
   1717 	if (global_next != INFINITY)
   1718 		mld_start_timers(global_next, ipst);
   1719 }
   1720 
   1721 /*
   1722  * Calculate the Older Version Querier Present timeout value, in number
   1723  * of slowtimo intervals, for the given ill.
   1724  */
   1725 #define	OVQP(ill) \
   1726 	((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
   1727 	+ MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
   1728 
   1729 /*
   1730  * igmp_slowtimo:
   1731  * - Resets to new router if we didnt we hear from the router
   1732  *   in IGMP_AGE_THRESHOLD seconds.
   1733  * - Resets slowtimeout.
   1734  * Check for ips_igmp_max_version ensures that we don't revert to a higher
   1735  * IGMP version than configured.
   1736  */
   1737 void
   1738 igmp_slowtimo(void *arg)
   1739 {
   1740 	ill_t	*ill;
   1741 	ill_if_t *ifp;
   1742 	avl_tree_t *avl_tree;
   1743 	ip_stack_t *ipst = (ip_stack_t *)arg;
   1744 
   1745 	ASSERT(arg != NULL);
   1746 	/* Hold the ill_g_lock so that we can safely walk the ill list */
   1747 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   1748 
   1749 	/*
   1750 	 * The ill_if_t list is circular, hence the odd loop parameters.
   1751 	 *
   1752 	 * We can't use the ILL_START_WALK and ill_next() wrappers for this
   1753 	 * walk, as we need to check the illif_mcast_* fields in the ill_if_t
   1754 	 * structure (allowing us to skip if none of the instances have timers
   1755 	 * running).
   1756 	 */
   1757 	for (ifp = IP_V4_ILL_G_LIST(ipst);
   1758 	    ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
   1759 	    ifp = ifp->illif_next) {
   1760 		/*
   1761 		 * illif_mcast_v[12] are set using atomics. If an ill hears
   1762 		 * a V1 or V2 query now and we miss seeing the count now,
   1763 		 * we will see it the next time igmp_slowtimo is called.
   1764 		 */
   1765 		if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
   1766 			continue;
   1767 
   1768 		avl_tree = &ifp->illif_avl_by_ppa;
   1769 		for (ill = avl_first(avl_tree); ill != NULL;
   1770 		    ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
   1771 			mutex_enter(&ill->ill_lock);
   1772 			if (ill->ill_mcast_v1_tset == 1)
   1773 				ill->ill_mcast_v1_time++;
   1774 			if (ill->ill_mcast_v2_tset == 1)
   1775 				ill->ill_mcast_v2_time++;
   1776 			if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
   1777 			    (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
   1778 			    (ill->ill_mcast_v1_time >= OVQP(ill))) {
   1779 				if ((ill->ill_mcast_v2_tset > 0) ||
   1780 				    (ipst->ips_igmp_max_version ==
   1781 				    IGMP_V2_ROUTER)) {
   1782 					ip1dbg(("V1 query timer "
   1783 					    "expired on %s; switching "
   1784 					    "mode to IGMP_V2\n",
   1785 					    ill->ill_name));
   1786 					ill->ill_mcast_type =
   1787 					    IGMP_V2_ROUTER;
   1788 				} else {
   1789 					ip1dbg(("V1 query timer "
   1790 					    "expired on %s; switching "
   1791 					    "mode to IGMP_V3\n",
   1792 					    ill->ill_name));
   1793 					ill->ill_mcast_type =
   1794 					    IGMP_V3_ROUTER;
   1795 				}
   1796 				ill->ill_mcast_v1_time = 0;
   1797 				ill->ill_mcast_v1_tset = 0;
   1798 				atomic_add_16(&ifp->illif_mcast_v1, -1);
   1799 			}
   1800 			if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
   1801 			    (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
   1802 			    (ill->ill_mcast_v2_time >= OVQP(ill))) {
   1803 				ip1dbg(("V2 query timer expired on "
   1804 				    "%s; switching mode to IGMP_V3\n",
   1805 				    ill->ill_name));
   1806 				ill->ill_mcast_type = IGMP_V3_ROUTER;
   1807 				ill->ill_mcast_v2_time = 0;
   1808 				ill->ill_mcast_v2_tset = 0;
   1809 				atomic_add_16(&ifp->illif_mcast_v2, -1);
   1810 			}
   1811 			mutex_exit(&ill->ill_lock);
   1812 		}
   1813 	}
   1814 	rw_exit(&ipst->ips_ill_g_lock);
   1815 	mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
   1816 	ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst,
   1817 	    MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
   1818 	mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
   1819 }
   1820 
   1821 /*
   1822  * mld_slowtimo:
   1823  * - Resets to newer version if we didn't hear from the older version router
   1824  *   in MLD_AGE_THRESHOLD seconds.
   1825  * - Restarts slowtimeout.
   1826  * Check for ips_mld_max_version ensures that we don't revert to a higher
   1827  * IGMP version than configured.
   1828  */
   1829 /* ARGSUSED */
   1830 void
   1831 mld_slowtimo(void *arg)
   1832 {
   1833 	ill_t *ill;
   1834 	ill_if_t *ifp;
   1835 	avl_tree_t *avl_tree;
   1836 	ip_stack_t *ipst = (ip_stack_t *)arg;
   1837 
   1838 	ASSERT(arg != NULL);
   1839 	/* See comments in igmp_slowtimo() above... */
   1840 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   1841 	for (ifp = IP_V6_ILL_G_LIST(ipst);
   1842 	    ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
   1843 	    ifp = ifp->illif_next) {
   1844 		if (ifp->illif_mcast_v1 == 0)
   1845 			continue;
   1846 
   1847 		avl_tree = &ifp->illif_avl_by_ppa;
   1848 		for (ill = avl_first(avl_tree); ill != NULL;
   1849 		    ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
   1850 			mutex_enter(&ill->ill_lock);
   1851 			if (ill->ill_mcast_v1_tset == 1)
   1852 				ill->ill_mcast_v1_time++;
   1853 			if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
   1854 			    (ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
   1855 			    (ill->ill_mcast_v1_time >= OVQP(ill))) {
   1856 				ip1dbg(("MLD query timer expired on"
   1857 				    " %s; switching mode to MLD_V2\n",
   1858 				    ill->ill_name));
   1859 				ill->ill_mcast_type = MLD_V2_ROUTER;
   1860 				ill->ill_mcast_v1_time = 0;
   1861 				ill->ill_mcast_v1_tset = 0;
   1862 				atomic_add_16(&ifp->illif_mcast_v1, -1);
   1863 			}
   1864 			mutex_exit(&ill->ill_lock);
   1865 		}
   1866 	}
   1867 	rw_exit(&ipst->ips_ill_g_lock);
   1868 	mutex_enter(&ipst->ips_mld_slowtimeout_lock);
   1869 	ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst,
   1870 	    MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
   1871 	mutex_exit(&ipst->ips_mld_slowtimeout_lock);
   1872 }
   1873 
   1874 /*
   1875  * igmp_sendpkt:
   1876  * This will send to ip_wput like icmp_inbound.
   1877  * Note that the lower ill (on which the membership is kept) is used
   1878  * as an upper ill to pass in the multicast parameters.
   1879  */
   1880 static void
   1881 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
   1882 {
   1883 	mblk_t	*mp;
   1884 	igmpa_t	*igmpa;
   1885 	uint8_t *rtralert;
   1886 	ipha_t	*ipha;
   1887 	int	hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
   1888 	size_t	size  = hdrlen + sizeof (igmpa_t);
   1889 	ipif_t 	*ipif = ilm->ilm_ipif;
   1890 	ill_t 	*ill  = ipif->ipif_ill;
   1891 	mblk_t	*first_mp;
   1892 	ipsec_out_t *io;
   1893 	zoneid_t zoneid;
   1894 	ip_stack_t *ipst = ill->ill_ipst;
   1895 
   1896 	/*
   1897 	 * We need to make sure this packet goes out on an ipif. If
   1898 	 * there is some global policy match in ip_wput_ire, we need
   1899 	 * to get to the right interface after IPSEC processing.
   1900 	 * To make sure this multicast packet goes out on the right
   1901 	 * interface, we attach an ipsec_out and initialize ill_index
   1902 	 * like we did in ip_wput. To make sure that this packet does
   1903 	 * not get forwarded on other interfaces or looped back, we
   1904 	 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop
   1905 	 * to B_FALSE.
   1906 	 */
   1907 	first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
   1908 	if (first_mp == NULL)
   1909 		return;
   1910 
   1911 	first_mp->b_datap->db_type = M_CTL;
   1912 	first_mp->b_wptr += sizeof (ipsec_info_t);
   1913 	bzero(first_mp->b_rptr, sizeof (ipsec_info_t));
   1914 	/* ipsec_out_secure is B_FALSE now */
   1915 	io = (ipsec_out_t *)first_mp->b_rptr;
   1916 	io->ipsec_out_type = IPSEC_OUT;
   1917 	io->ipsec_out_len = sizeof (ipsec_out_t);
   1918 	io->ipsec_out_use_global_policy = B_TRUE;
   1919 	io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex;
   1920 	io->ipsec_out_multicast_loop = B_FALSE;
   1921 	io->ipsec_out_dontroute = B_TRUE;
   1922 	if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES)
   1923 		zoneid = GLOBAL_ZONEID;
   1924 	io->ipsec_out_zoneid = zoneid;
   1925 	io->ipsec_out_ns = ipst->ips_netstack;	/* No netstack_hold */
   1926 
   1927 	mp = allocb(size, BPRI_HI);
   1928 	if (mp == NULL) {
   1929 		freemsg(first_mp);
   1930 		return;
   1931 	}
   1932 	mp->b_wptr = mp->b_rptr + size;
   1933 	first_mp->b_cont = mp;
   1934 
   1935 	ipha = (ipha_t *)mp->b_rptr;
   1936 	rtralert = (uint8_t *)&(ipha[1]);
   1937 	igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
   1938 	igmpa->igmpa_type   = type;
   1939 	igmpa->igmpa_code   = 0;
   1940 	igmpa->igmpa_group  = ilm->ilm_addr;
   1941 	igmpa->igmpa_cksum  = 0;
   1942 	igmpa->igmpa_cksum  = IP_CSUM(mp, hdrlen, 0);
   1943 
   1944 	rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
   1945 	rtralert[1] = RTRALERT_LEN;
   1946 	rtralert[2] = 0;
   1947 	rtralert[3] = 0;
   1948 
   1949 	ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
   1950 	    | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
   1951 	ipha->ipha_type_of_service 	= 0;
   1952 	ipha->ipha_length = htons(size);
   1953 	ipha->ipha_ident = 0;
   1954 	ipha->ipha_fragment_offset_and_flags = 0;
   1955 	ipha->ipha_ttl 		= IGMP_TTL;
   1956 	ipha->ipha_protocol 	= IPPROTO_IGMP;
   1957 	ipha->ipha_hdr_checksum 	= 0;
   1958 	ipha->ipha_dst 		= addr ? addr : igmpa->igmpa_group;
   1959 	ipha->ipha_src 		= ipif->ipif_src_addr;
   1960 	/*
   1961 	 * Request loopback of the report if we are acting as a multicast
   1962 	 * router, so that the process-level routing demon can hear it.
   1963 	 */
   1964 	/*
   1965 	 * This will run multiple times for the same group if there are members
   1966 	 * on the same group for multiple ipif's on the same ill. The
   1967 	 * igmp_input code will suppress this due to the loopback thus we
   1968 	 * always loopback membership report.
   1969 	 */
   1970 	ASSERT(ill->ill_rq != NULL);
   1971 	ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid);
   1972 
   1973 	ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid);
   1974 
   1975 	++ipst->ips_igmpstat.igps_snd_reports;
   1976 }
   1977 
   1978 /*
   1979  * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated
   1980  * with the passed-in ipif.  The report will contain one group record
   1981  * for each element of reclist.  If this causes packet length to
   1982  * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent.
   1983  * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
   1984  * and those buffers are freed here.
   1985  */
   1986 static void
   1987 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist)
   1988 {
   1989 	ipsec_out_t *io;
   1990 	igmp3ra_t *igmp3ra;
   1991 	grphdra_t *grphdr;
   1992 	mblk_t *first_mp, *mp;
   1993 	ipha_t *ipha;
   1994 	uint8_t *rtralert;
   1995 	ipaddr_t *src_array;
   1996 	int i, j, numrec, more_src_cnt;
   1997 	size_t hdrsize, size, rsize;
   1998 	ill_t *ill = ipif->ipif_ill;
   1999 	mrec_t *rp, *cur_reclist;
   2000 	mrec_t *next_reclist = reclist;
   2001 	boolean_t morepkts;
   2002 	zoneid_t zoneid;
   2003 	ip_stack_t	 *ipst = ill->ill_ipst;
   2004 
   2005 	ASSERT(IAM_WRITER_IPIF(ipif));
   2006 
   2007 	/* if there aren't any records, there's nothing to send */
   2008 	if (reclist == NULL)
   2009 		return;
   2010 
   2011 	hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
   2012 nextpkt:
   2013 	size = hdrsize + sizeof (igmp3ra_t);
   2014 	morepkts = B_FALSE;
   2015 	more_src_cnt = 0;
   2016 	cur_reclist = next_reclist;
   2017 	numrec = 0;
   2018 	for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
   2019 		rsize = sizeof (grphdra_t) +
   2020 		    (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
   2021 		if (size + rsize > ill->ill_max_frag) {
   2022 			if (rp == cur_reclist) {
   2023 				/*
   2024 				 * If the first mrec we looked at is too big
   2025 				 * to fit in a single packet (i.e the source
   2026 				 * list is too big), we must either truncate
   2027 				 * the list (if TO_EX or IS_EX), or send
   2028 				 * multiple reports for the same group (all
   2029 				 * other types).
   2030 				 */
   2031 				int srcspace, srcsperpkt;
   2032 				srcspace = ill->ill_max_frag - (size +
   2033 				    sizeof (grphdra_t));
   2034 
   2035 				/*
   2036 				 * Skip if there's not even enough room in
   2037 				 * a single packet to send something useful.
   2038 				 */
   2039 				if (srcspace <= sizeof (ipaddr_t))
   2040 					continue;
   2041 
   2042 				srcsperpkt = srcspace / sizeof (ipaddr_t);
   2043 				/*
   2044 				 * Increment size and numrec, because we will
   2045 				 * be sending a record for the mrec we're
   2046 				 * looking at now.
   2047 				 */
   2048 				size += sizeof (grphdra_t) +
   2049 				    (srcsperpkt * sizeof (ipaddr_t));
   2050 				numrec++;
   2051 				if (rp->mrec_type == MODE_IS_EXCLUDE ||
   2052 				    rp->mrec_type == CHANGE_TO_EXCLUDE) {
   2053 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
   2054 					if (rp->mrec_next == NULL) {
   2055 						/* no more packets to send */
   2056 						break;
   2057 					} else {
   2058 						/*
   2059 						 * more packets, but we're
   2060 						 * done with this mrec.
   2061 						 */
   2062 						next_reclist = rp->mrec_next;
   2063 					}
   2064 				} else {
   2065 					more_src_cnt = rp->mrec_srcs.sl_numsrc
   2066 					    - srcsperpkt;
   2067 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
   2068 					/*
   2069 					 * We'll fix up this mrec (remove the
   2070 					 * srcs we've already sent) before
   2071 					 * returning to nextpkt above.
   2072 					 */
   2073 					next_reclist = rp;
   2074 				}
   2075 			} else {
   2076 				next_reclist = rp;
   2077 			}
   2078 			morepkts = B_TRUE;
   2079 			break;
   2080 		}
   2081 		size += rsize;
   2082 		numrec++;
   2083 	}
   2084 
   2085 	/*
   2086 	 * See comments in igmp_sendpkt() about initializing for ipsec and
   2087 	 * load balancing requirements.
   2088 	 */
   2089 	first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
   2090 	if (first_mp == NULL)
   2091 		goto free_reclist;
   2092 
   2093 	first_mp->b_datap->db_type = M_CTL;
   2094 	first_mp->b_wptr += sizeof (ipsec_info_t);
   2095 	bzero(first_mp->b_rptr, sizeof (ipsec_info_t));
   2096 	/* ipsec_out_secure is B_FALSE now */
   2097 	io = (ipsec_out_t *)first_mp->b_rptr;
   2098 	io->ipsec_out_type = IPSEC_OUT;
   2099 	io->ipsec_out_len = sizeof (ipsec_out_t);
   2100 	io->ipsec_out_use_global_policy = B_TRUE;
   2101 	io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex;
   2102 	io->ipsec_out_multicast_loop = B_FALSE;
   2103 	io->ipsec_out_dontroute = B_TRUE;
   2104 	if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES)
   2105 		zoneid = GLOBAL_ZONEID;
   2106 	io->ipsec_out_zoneid = zoneid;
   2107 
   2108 	mp = allocb(size, BPRI_HI);
   2109 	if (mp == NULL) {
   2110 		freemsg(first_mp);
   2111 		goto free_reclist;
   2112 	}
   2113 	bzero((char *)mp->b_rptr, size);
   2114 	mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
   2115 	first_mp->b_cont = mp;
   2116 
   2117 	ipha = (ipha_t *)mp->b_rptr;
   2118 	rtralert = (uint8_t *)&(ipha[1]);
   2119 	igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
   2120 	grphdr = (grphdra_t *)&(igmp3ra[1]);
   2121 
   2122 	rp = cur_reclist;
   2123 	for (i = 0; i < numrec; i++) {
   2124 		grphdr->grphdra_type = rp->mrec_type;
   2125 		grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
   2126 		grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
   2127 		src_array = (ipaddr_t *)&(grphdr[1]);
   2128 
   2129 		for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
   2130 			src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
   2131 
   2132 		grphdr = (grphdra_t *)&(src_array[j]);
   2133 		rp = rp->mrec_next;
   2134 	}
   2135 
   2136 	igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
   2137 	igmp3ra->igmp3ra_numrec = htons(numrec);
   2138 	igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
   2139 
   2140 	rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
   2141 	rtralert[1] = RTRALERT_LEN;
   2142 	rtralert[2] = 0;
   2143 	rtralert[3] = 0;
   2144 
   2145 	ipha->ipha_version_and_hdr_length = IP_VERSION << 4
   2146 	    | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
   2147 	ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
   2148 	ipha->ipha_length = htons(size);
   2149 	ipha->ipha_ttl = IGMP_TTL;
   2150 	ipha->ipha_protocol = IPPROTO_IGMP;
   2151 	ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
   2152 	ipha->ipha_src = ipif->ipif_src_addr;
   2153 
   2154 	/*
   2155 	 * Request loopback of the report if we are acting as a multicast
   2156 	 * router, so that the process-level routing daemon can hear it.
   2157 	 *
   2158 	 * This will run multiple times for the same group if there are
   2159 	 * members on the same group for multiple ipifs on the same ill.
   2160 	 * The igmp_input code will suppress this due to the loopback;
   2161 	 * thus we always loopback membership report.
   2162 	 */
   2163 	ASSERT(ill->ill_rq != NULL);
   2164 	ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid);
   2165 
   2166 	ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid);
   2167 
   2168 	++ipst->ips_igmpstat.igps_snd_reports;
   2169 
   2170 	if (morepkts) {
   2171 		if (more_src_cnt > 0) {
   2172 			int index, mvsize;
   2173 			slist_t *sl = &next_reclist->mrec_srcs;
   2174 			index = sl->sl_numsrc;
   2175 			mvsize = more_src_cnt * sizeof (in6_addr_t);
   2176 			(void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
   2177 			    mvsize);
   2178 			sl->sl_numsrc = more_src_cnt;
   2179 		}
   2180 		goto nextpkt;
   2181 	}
   2182 
   2183 free_reclist:
   2184 	while (reclist != NULL) {
   2185 		rp = reclist->mrec_next;
   2186 		mi_free(reclist);
   2187 		reclist = rp;
   2188 	}
   2189 }
   2190 
   2191 /*
   2192  * mld_input:
   2193  */
   2194 /* ARGSUSED */
   2195 void
   2196 mld_input(queue_t *q, mblk_t *mp, ill_t *ill)
   2197 {
   2198 	ip6_t		*ip6h = (ip6_t *)(mp->b_rptr);
   2199 	mld_hdr_t	*mldh;
   2200 	ilm_t		*ilm;
   2201 	ipif_t		*ipif;
   2202 	uint16_t	hdr_length, exthdr_length;
   2203 	in6_addr_t	*v6group_ptr, *lcladdr_ptr;
   2204 	uint_t		next;
   2205 	int		mldlen;
   2206 	ip_stack_t	*ipst = ill->ill_ipst;
   2207 	ilm_walker_t	ilw;
   2208 
   2209 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
   2210 
   2211 	/* Make sure the src address of the packet is link-local */
   2212 	if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
   2213 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
   2214 		freemsg(mp);
   2215 		return;
   2216 	}
   2217 
   2218 	if (ip6h->ip6_hlim != 1) {
   2219 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
   2220 		freemsg(mp);
   2221 		return;
   2222 	}
   2223 
   2224 	/* Get to the icmp header part */
   2225 	if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
   2226 		hdr_length = ip_hdr_length_v6(mp, ip6h);
   2227 		exthdr_length = hdr_length - IPV6_HDR_LEN;
   2228 	} else {
   2229 		hdr_length = IPV6_HDR_LEN;
   2230 		exthdr_length = 0;
   2231 	}
   2232 	mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
   2233 
   2234 	/* An MLD packet must at least be 24 octets to be valid */
   2235 	if (mldlen < MLD_MINLEN) {
   2236 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
   2237 		freemsg(mp);
   2238 		return;
   2239 	}
   2240 
   2241 	mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
   2242 
   2243 	switch (mldh->mld_type) {
   2244 	case MLD_LISTENER_QUERY:
   2245 		/*
   2246 		 * packet length differentiates between v1 and v2.  v1
   2247 		 * query should be exactly 24 octets long; v2 is >= 28.
   2248 		 */
   2249 		if ((mldlen == MLD_MINLEN) ||
   2250 		    (ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
   2251 			next = mld_query_in(mldh, ill);
   2252 		} else if (mldlen >= MLD_V2_QUERY_MINLEN) {
   2253 			next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
   2254 		} else {
   2255 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
   2256 			freemsg(mp);
   2257 			return;
   2258 		}
   2259 		if (next == 0) {
   2260 			freemsg(mp);
   2261 			return;
   2262 		}
   2263 
   2264 		if (next != INFINITY)
   2265 			mld_start_timers(next, ipst);
   2266 		break;
   2267 
   2268 	case MLD_LISTENER_REPORT: {
   2269 
   2270 		ASSERT(ill->ill_ipif != NULL);
   2271 		/*
   2272 		 * For fast leave to work, we have to know that we are the
   2273 		 * last person to send a report for this group.  Reports
   2274 		 * generated by us are looped back since we could potentially
   2275 		 * be a multicast router, so discard reports sourced by me.
   2276 		 */
   2277 		lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet);
   2278 		mutex_enter(&ill->ill_lock);
   2279 		for (ipif = ill->ill_ipif; ipif != NULL;
   2280 		    ipif = ipif->ipif_next) {
   2281 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
   2282 			    lcladdr_ptr)) {
   2283 				if (ip_debug > 1) {
   2284 					char    buf1[INET6_ADDRSTRLEN];
   2285 					char	buf2[INET6_ADDRSTRLEN];
   2286 
   2287 					(void) mi_strlog(ill->ill_rq,
   2288 					    1,
   2289 					    SL_TRACE,
   2290 					    "mld_input: we are only "
   2291 					    "member src %s ipif_local %s",
   2292 					    inet_ntop(AF_INET6, lcladdr_ptr,
   2293 					    buf1, sizeof (buf1)),
   2294 					    inet_ntop(AF_INET6,
   2295 					    &ipif->ipif_v6lcl_addr,
   2296 					    buf2, sizeof (buf2)));
   2297 				}
   2298 				mutex_exit(&ill->ill_lock);
   2299 				freemsg(mp);
   2300 				return;
   2301 			}
   2302 		}
   2303 		mutex_exit(&ill->ill_lock);
   2304 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
   2305 
   2306 		v6group_ptr = &mldh->mld_addr;
   2307 		if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
   2308 			BUMP_MIB(ill->ill_icmp6_mib,
   2309 			    ipv6IfIcmpInGroupMembBadReports);
   2310 			freemsg(mp);
   2311 			return;
   2312 		}
   2313 
   2314 		/*
   2315 		 * If we belong to the group being reported, and we are a
   2316 		 * 'Delaying member' per the RFC terminology, stop our timer
   2317 		 * for that group and 'clear flag' i.e. mark ilm_state as
   2318 		 * IGMP_OTHERMEMBER. With zones, there can be multiple group
   2319 		 * membership entries for the same group address (one per zone)
   2320 		 * so we need to walk the ill_ilm list.
   2321 		 */
   2322 		ilm = ilm_walker_start(&ilw, ill);
   2323 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
   2324 			if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
   2325 				continue;
   2326 			BUMP_MIB(ill->ill_icmp6_mib,
   2327 			    ipv6IfIcmpInGroupMembOurReports);
   2328 
   2329 			ilm->ilm_timer = INFINITY;
   2330 			ilm->ilm_state = IGMP_OTHERMEMBER;
   2331 		}
   2332 		ilm_walker_finish(&ilw);
   2333 		break;
   2334 	}
   2335 	case MLD_LISTENER_REDUCTION:
   2336 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
   2337 		break;
   2338 	}
   2339 	/*
   2340 	 * All MLD packets have already been passed up to any
   2341 	 * process(es) listening on a ICMP6 raw socket. This
   2342 	 * has been accomplished in ip_deliver_local_v6 prior to
   2343 	 * this function call. It is assumed that the multicast daemon
   2344 	 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the
   2345 	 * ICMP6_FILTER socket option to only receive the MLD messages)
   2346 	 * Thus we can free the MLD message block here
   2347 	 */
   2348 	freemsg(mp);
   2349 }
   2350 
   2351 /*
   2352  * Handles an MLDv1 Listener Query.  Returns 0 on error, or the appropriate
   2353  * (non-zero, unsigned) timer value to be set on success.
   2354  */
   2355 static uint_t
   2356 mld_query_in(mld_hdr_t *mldh, ill_t *ill)
   2357 {
   2358 	ilm_t	*ilm;
   2359 	int	timer;
   2360 	uint_t	next, current;
   2361 	in6_addr_t *v6group;
   2362 	ilm_walker_t ilw;
   2363 
   2364 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
   2365 
   2366 	/*
   2367 	 * In the MLD specification, there are 3 states and a flag.
   2368 	 *
   2369 	 * In Non-Listener state, we simply don't have a membership record.
   2370 	 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
   2371 	 * In Idle Member state, our timer is not running (ilm->ilm_timer ==
   2372 	 * INFINITY)
   2373 	 *
   2374 	 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
   2375 	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
   2376 	 * if I sent the last report.
   2377 	 */
   2378 	v6group = &mldh->mld_addr;
   2379 	if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
   2380 	    ((!IN6_IS_ADDR_MULTICAST(v6group)))) {
   2381 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
   2382 		return (0);
   2383 	}
   2384 
   2385 	/* Need to do compatibility mode checking */
   2386 	mutex_enter(&ill->ill_lock);
   2387 	ill->ill_mcast_v1_time = 0;
   2388 	ill->ill_mcast_v1_tset = 1;
   2389 	if (ill->ill_mcast_type == MLD_V2_ROUTER) {
   2390 		ip1dbg(("Received MLDv1 Query on %s, switching mode to "
   2391 		    "MLD_V1_ROUTER\n", ill->ill_name));
   2392 		atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
   2393 		ill->ill_mcast_type = MLD_V1_ROUTER;
   2394 	}
   2395 	mutex_exit(&ill->ill_lock);
   2396 
   2397 	timer = (int)ntohs(mldh->mld_maxdelay);
   2398 	if (ip_debug > 1) {
   2399 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
   2400 		    "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
   2401 		    timer, (int)mldh->mld_type);
   2402 	}
   2403 
   2404 	/*
   2405 	 * -Start the timers in all of our membership records for
   2406 	 * the physical interface on which the query arrived,
   2407 	 * excl:
   2408 	 *	1.  those that belong to the "all hosts" group,
   2409 	 *	2.  those with 0 scope, or 1 node-local scope.
   2410 	 *
   2411 	 * -Restart any timer that is already running but has a value
   2412 	 * longer that the requested timeout.
   2413 	 * -Use the value specified in the query message as the
   2414 	 * maximum timeout.
   2415 	 */
   2416 	next = INFINITY;
   2417 
   2418 	ilm = ilm_walker_start(&ilw, ill);
   2419 	mutex_enter(&ill->ill_lock);
   2420 	current = CURRENT_MSTIME;
   2421 
   2422 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
   2423 		ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
   2424 
   2425 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
   2426 		    IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
   2427 		    IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
   2428 			continue;
   2429 		if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
   2430 		    &ipv6_all_hosts_mcast)) &&
   2431 		    (IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
   2432 		    (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
   2433 			if (timer == 0) {
   2434 				/* Respond immediately */
   2435 				ilm->ilm_timer = INFINITY;
   2436 				ilm->ilm_state = IGMP_IREPORTEDLAST;
   2437 				mutex_exit(&ill->ill_lock);
   2438 				mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
   2439 				mutex_enter(&ill->ill_lock);
   2440 				break;
   2441 			}
   2442 			if (ilm->ilm_timer > timer) {
   2443 				MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
   2444 				if (ilm->ilm_timer < next)
   2445 					next = ilm->ilm_timer;
   2446 				ilm->ilm_timer += current;
   2447 			}
   2448 			break;
   2449 		}
   2450 	}
   2451 	mutex_exit(&ill->ill_lock);
   2452 	ilm_walker_finish(&ilw);
   2453 
   2454 	return (next);
   2455 }
   2456 
   2457 /*
   2458  * Handles an MLDv2 Listener Query.  On error, returns 0; on success,
   2459  * returns the appropriate (non-zero, unsigned) timer value (which may
   2460  * be INFINITY) to be set.
   2461  */
   2462 static uint_t
   2463 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
   2464 {
   2465 	ilm_t	*ilm;
   2466 	in6_addr_t *v6group, *src_array;
   2467 	uint_t	next, numsrc, i, mrd, delay, qqi, current;
   2468 	uint8_t	qrv;
   2469 	ilm_walker_t ilw;
   2470 
   2471 	v6group = &mld2q->mld2q_addr;
   2472 	numsrc = ntohs(mld2q->mld2q_numsrc);
   2473 
   2474 	/* make sure numsrc matches packet size */
   2475 	if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
   2476 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
   2477 		return (0);
   2478 	}
   2479 	src_array = (in6_addr_t *)&mld2q[1];
   2480 
   2481 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
   2482 
   2483 	/* extract Maximum Response Delay from code in header */
   2484 	mrd = ntohs(mld2q->mld2q_mxrc);
   2485 	if (mrd >= MLD_V2_MAXRT_FPMIN) {
   2486 		uint_t hdrval, mant, exp;
   2487 		hdrval = mrd;
   2488 		mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
   2489 		exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
   2490 		mrd = (mant | 0x1000) << (exp + 3);
   2491 	}
   2492 	if (mrd == 0)
   2493 		mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
   2494 
   2495 	MCAST_RANDOM_DELAY(delay, mrd);
   2496 	next = (unsigned)INFINITY;
   2497 	current = CURRENT_MSTIME;
   2498 
   2499 	if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
   2500 		ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
   2501 	else
   2502 		ill->ill_mcast_rv = qrv;
   2503 
   2504 	if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
   2505 		uint_t mant, exp;
   2506 		mant = qqi & MLD_V2_QQI_MANT_MASK;
   2507 		exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
   2508 		qqi = (mant | 0x10) << (exp + 3);
   2509 	}
   2510 	ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
   2511 
   2512 	/*
   2513 	 * If we have a pending general query response that's scheduled
   2514 	 * sooner than the delay we calculated for this response, then
   2515 	 * no action is required (MLDv2 draft section 6.2 rule 1)
   2516 	 */
   2517 	mutex_enter(&ill->ill_lock);
   2518 	if (ill->ill_global_timer < (current + delay)) {
   2519 		mutex_exit(&ill->ill_lock);
   2520 		return (next);
   2521 	}
   2522 	mutex_exit(&ill->ill_lock);
   2523 
   2524 	/*
   2525 	 * Now take action depending on query type: general,
   2526 	 * group specific, or group/source specific.
   2527 	 */
   2528 	if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
   2529 		/*
   2530 		 * general query
   2531 		 * We know global timer is either not running or is
   2532 		 * greater than our calculated delay, so reset it to
   2533 		 * our delay (random value in range [0, response time])
   2534 		 */
   2535 		mutex_enter(&ill->ill_lock);
   2536 		ill->ill_global_timer = current + delay;
   2537 		mutex_exit(&ill->ill_lock);
   2538 		next = delay;
   2539 
   2540 	} else {
   2541 		/* group or group/source specific query */
   2542 		ilm = ilm_walker_start(&ilw, ill);
   2543 		mutex_enter(&ill->ill_lock);
   2544 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
   2545 			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
   2546 			    IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
   2547 			    IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
   2548 			    !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
   2549 				continue;
   2550 
   2551 			/*
   2552 			 * If the query is group specific or we have a
   2553 			 * pending group specific query, the response is
   2554 			 * group specific (pending sources list should be
   2555 			 * empty).  Otherwise, need to update the pending
   2556 			 * sources list for the group and source specific
   2557 			 * response.
   2558 			 */
   2559 			if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
   2560 			    SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
   2561 group_query:
   2562 				FREE_SLIST(ilm->ilm_pendsrcs);
   2563 				ilm->ilm_pendsrcs = NULL;
   2564 			} else {
   2565 				boolean_t overflow;
   2566 				slist_t *pktl;
   2567 				if (numsrc > MAX_FILTER_SIZE ||
   2568 				    (ilm->ilm_pendsrcs == NULL &&
   2569 				    (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
   2570 					/*
   2571 					 * We've been sent more sources than
   2572 					 * we can deal with; or we can't deal
   2573 					 * with a source list at all. Revert
   2574 					 * to a group specific query.
   2575 					 */
   2576 					goto group_query;
   2577 				}
   2578 				if ((pktl = l_alloc()) == NULL)
   2579 					goto group_query;
   2580 				pktl->sl_numsrc = numsrc;
   2581 				for (i = 0; i < numsrc; i++)
   2582 					pktl->sl_addr[i] = src_array[i];
   2583 				l_union_in_a(ilm->ilm_pendsrcs, pktl,
   2584 				    &overflow);
   2585 				l_free(pktl);
   2586 				if (overflow)
   2587 					goto group_query;
   2588 			}
   2589 			ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
   2590 			    INFINITY : (ilm->ilm_timer - current);
   2591 			/* set timer to soonest value */
   2592 			ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
   2593 			if (ilm->ilm_timer < next)
   2594 				next = ilm->ilm_timer;
   2595 			ilm->ilm_timer += current;
   2596 			break;
   2597 		}
   2598 		mutex_exit(&ill->ill_lock);
   2599 		ilm_walker_finish(&ilw);
   2600 	}
   2601 
   2602 	return (next);
   2603 }
   2604 
   2605 /*
   2606  * Send MLDv1 response packet with hoplimit 1
   2607  */
   2608 static void
   2609 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
   2610 {
   2611 	mblk_t		*mp;
   2612 	mld_hdr_t	*mldh;
   2613 	ip6_t 		*ip6h;
   2614 	ip6_hbh_t	*ip6hbh;
   2615 	struct ip6_opt_router	*ip6router;
   2616 	size_t		size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
   2617 	ill_t		*ill = ilm->ilm_ill;
   2618 	ipif_t		*ipif;
   2619 
   2620 	/*
   2621 	 * We need to place a router alert option in this packet.  The length
   2622 	 * of the options must be a multiple of 8.  The hbh option header is 2
   2623 	 * bytes followed by the 4 byte router alert option.  That leaves
   2624 	 * 2 bytes of pad for a total of 8 bytes.
   2625 	 */
   2626 	const int	router_alert_length = 8;
   2627 
   2628 	ASSERT(ill->ill_isv6);
   2629 
   2630 	size += router_alert_length;
   2631 	mp = allocb(size, BPRI_HI);
   2632 	if (mp == NULL)
   2633 		return;
   2634 	bzero(mp->b_rptr, size);
   2635 	mp->b_wptr = mp->b_rptr + size;
   2636 
   2637 	ip6h = (ip6_t *)mp->b_rptr;
   2638 	ip6hbh = (struct ip6_hbh *)&ip6h[1];
   2639 	ip6router = (struct ip6_opt_router *)&ip6hbh[1];
   2640 	/*
   2641 	 * A zero is a pad option of length 1.  The bzero of the whole packet
   2642 	 * above will pad between ip6router and mld.
   2643 	 */
   2644 	mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
   2645 
   2646 	mldh->mld_type = type;
   2647 	mldh->mld_addr = ilm->ilm_v6addr;
   2648 
   2649 	ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
   2650 	ip6router->ip6or_len = 2;
   2651 	ip6router->ip6or_value[0] = 0;
   2652 	ip6router->ip6or_value[1] = IP6_ALERT_MLD;
   2653 
   2654 	ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
   2655 	ip6hbh->ip6h_len = 0;
   2656 
   2657 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
   2658 	ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
   2659 	ip6h->ip6_nxt = IPPROTO_HOPOPTS;
   2660 	ip6h->ip6_hops = MLD_HOP_LIMIT;
   2661 	if (v6addr == NULL)
   2662 		ip6h->ip6_dst =  ilm->ilm_v6addr;
   2663 	else
   2664 		ip6h->ip6_dst = *v6addr;
   2665 
   2666 	/* ipif returned by ipif_lookup_zoneid is link-local (if present) */
   2667 	if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) {
   2668 		ip6h->ip6_src = ipif->ipif_v6src_addr;
   2669 		ipif_refrele(ipif);
   2670 	} else {
   2671 		/* Otherwise, use IPv6 default address selection. */
   2672 		ip6h->ip6_src = ipv6_all_zeros;
   2673 	}
   2674 
   2675 	/*
   2676 	 * Prepare for checksum by putting icmp length in the icmp
   2677 	 * checksum field. The checksum is calculated in ip_wput_v6.
   2678 	 */
   2679 	mldh->mld_cksum = htons(sizeof (*mldh));
   2680 
   2681 	/*
   2682 	 * ip_wput will automatically loopback the multicast packet to
   2683 	 * the conn if multicast loopback is enabled.
   2684 	 * The MIB stats corresponding to this outgoing MLD packet
   2685 	 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6
   2686 	 * ->icmp_update_out_mib_v6 function call.
   2687 	 */
   2688 	(void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT);
   2689 }
   2690 
   2691 /*
   2692  * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill.  The
   2693  * report will contain one multicast address record for each element of
   2694  * reclist.  If this causes packet length to exceed ill->ill_max_frag,
   2695  * multiple reports are sent.  reclist is assumed to be made up of
   2696  * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
   2697  */
   2698 static void
   2699 mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
   2700 {
   2701 	mblk_t		*mp;
   2702 	mld2r_t		*mld2r;
   2703 	mld2mar_t	*mld2mar;
   2704 	in6_addr_t	*srcarray;
   2705 	ip6_t		*ip6h;
   2706 	ip6_hbh_t	*ip6hbh;
   2707 	struct ip6_opt_router	*ip6router;
   2708 	size_t		size, optlen, padlen, icmpsize, rsize;
   2709 	ipif_t		*ipif;
   2710 	int		i, numrec, more_src_cnt;
   2711 	mrec_t		*rp, *cur_reclist;
   2712 	mrec_t		*next_reclist = reclist;
   2713 	boolean_t	morepkts;
   2714 
   2715 	ASSERT(IAM_WRITER_ILL(ill));
   2716 
   2717 	/* If there aren't any records, there's nothing to send */
   2718 	if (reclist == NULL)
   2719 		return;
   2720 
   2721 	ASSERT(ill->ill_isv6);
   2722 
   2723 	/*
   2724 	 * Total option length (optlen + padlen) must be a multiple of
   2725 	 * 8 bytes.  We assume here that optlen <= 8, so the total option
   2726 	 * length will be 8.  Assert this in case anything ever changes.
   2727 	 */
   2728 	optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
   2729 	ASSERT(optlen <= 8);
   2730 	padlen = 8 - optlen;
   2731 nextpkt:
   2732 	icmpsize = sizeof (mld2r_t);
   2733 	size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
   2734 	morepkts = B_FALSE;
   2735 	more_src_cnt = 0;
   2736 	for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
   2737 	    rp = rp->mrec_next, numrec++) {
   2738 		rsize = sizeof (mld2mar_t) +
   2739 		    (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
   2740 		if (size + rsize > ill->ill_max_frag) {
   2741 			if (rp == cur_reclist) {
   2742 				/*
   2743 				 * If the first mrec we looked at is too big
   2744 				 * to fit in a single packet (i.e the source
   2745 				 * list is too big), we must either truncate
   2746 				 * the list (if TO_EX or IS_EX), or send
   2747 				 * multiple reports for the same group (all
   2748 				 * other types).
   2749 				 */
   2750 				int srcspace, srcsperpkt;
   2751 				srcspace = ill->ill_max_frag -
   2752 				    (size + sizeof (mld2mar_t));
   2753 
   2754 				/*
   2755 				 * Skip if there's not even enough room in
   2756 				 * a single packet to send something useful.
   2757 				 */
   2758 				if (srcspace <= sizeof (in6_addr_t))
   2759 					continue;
   2760 
   2761 				srcsperpkt = srcspace / sizeof (in6_addr_t);
   2762 				/*
   2763 				 * Increment icmpsize and size, because we will
   2764 				 * be sending a record for the mrec we're
   2765 				 * looking at now.
   2766 				 */
   2767 				rsize = sizeof (mld2mar_t) +
   2768 				    (srcsperpkt * sizeof (in6_addr_t));
   2769 				icmpsize += rsize;
   2770 				size += rsize;
   2771 				if (rp->mrec_type == MODE_IS_EXCLUDE ||
   2772 				    rp->mrec_type == CHANGE_TO_EXCLUDE) {
   2773 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
   2774 					if (rp->mrec_next == NULL) {
   2775 						/* no more packets to send */
   2776 						break;
   2777 					} else {
   2778 						/*
   2779 						 * more packets, but we're
   2780 						 * done with this mrec.
   2781 						 */
   2782 						next_reclist = rp->mrec_next;
   2783 					}
   2784 				} else {
   2785 					more_src_cnt = rp->mrec_srcs.sl_numsrc
   2786 					    - srcsperpkt;
   2787 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
   2788 					/*
   2789 					 * We'll fix up this mrec (remove the
   2790 					 * srcs we've already sent) before
   2791 					 * returning to nextpkt above.
   2792 					 */
   2793 					next_reclist = rp;
   2794 				}
   2795 			} else {
   2796 				next_reclist = rp;
   2797 			}
   2798 			morepkts = B_TRUE;
   2799 			break;
   2800 		}
   2801 		icmpsize += rsize;
   2802 		size += rsize;
   2803 	}
   2804 
   2805 	mp = allocb(size, BPRI_HI);
   2806 	if (mp == NULL)
   2807 		goto free_reclist;
   2808 	bzero(mp->b_rptr, size);
   2809 	mp->b_wptr = mp->b_rptr + size;
   2810 
   2811 	ip6h = (ip6_t *)mp->b_rptr;
   2812 	ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
   2813 	ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
   2814 	mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
   2815 	mld2mar = (mld2mar_t *)&(mld2r[1]);
   2816 
   2817 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
   2818 	ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
   2819 	ip6h->ip6_nxt = IPPROTO_HOPOPTS;
   2820 	ip6h->ip6_hops = MLD_HOP_LIMIT;
   2821 	ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
   2822 	/* ipif returned by ipif_lookup_zoneid is link-local (if present) */
   2823 	if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) {
   2824 		ip6h->ip6_src = ipif->ipif_v6src_addr;
   2825 		ipif_refrele(ipif);
   2826 	} else {
   2827 		/* otherwise, use IPv6 default address selection. */
   2828 		ip6h->ip6_src = ipv6_all_zeros;
   2829 	}
   2830 
   2831 	ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
   2832 	/*
   2833 	 * ip6h_len is the number of 8-byte words, not including the first
   2834 	 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
   2835 	 */
   2836 	ip6hbh->ip6h_len = 0;
   2837 
   2838 	ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
   2839 	ip6router->ip6or_len = 2;
   2840 	ip6router->ip6or_value[0] = 0;
   2841 	ip6router->ip6or_value[1] = IP6_ALERT_MLD;
   2842 
   2843 	mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
   2844 	mld2r->mld2r_nummar = htons(numrec);
   2845 	/*
   2846 	 * Prepare for the checksum by putting icmp length in the icmp
   2847 	 * checksum field. The checksum is calculated in ip_wput_v6.
   2848 	 */
   2849 	mld2r->mld2r_cksum = htons(icmpsize);
   2850 
   2851 	for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
   2852 		mld2mar->mld2mar_type = rp->mrec_type;
   2853 		mld2mar->mld2mar_auxlen = 0;
   2854 		mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
   2855 		mld2mar->mld2mar_group = rp->mrec_group;
   2856 		srcarray = (in6_addr_t *)&(mld2mar[1]);
   2857 
   2858 		for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
   2859 			srcarray[i] = rp->mrec_srcs.sl_addr[i];
   2860 
   2861 		mld2mar = (mld2mar_t *)&(srcarray[i]);
   2862 	}
   2863 
   2864 	/*
   2865 	 * ip_wput will automatically loopback the multicast packet to
   2866 	 * the conn if multicast loopback is enabled.
   2867 	 * The MIB stats corresponding to this outgoing MLD packet
   2868 	 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6
   2869 	 * ->icmp_update_out_mib_v6 function call.
   2870 	 */
   2871 	(void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT);
   2872 
   2873 	if (morepkts) {
   2874 		if (more_src_cnt > 0) {
   2875 			int index, mvsize;
   2876 			slist_t *sl = &next_reclist->mrec_srcs;
   2877 			index = sl->sl_numsrc;
   2878 			mvsize = more_src_cnt * sizeof (in6_addr_t);
   2879 			(void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
   2880 			    mvsize);
   2881 			sl->sl_numsrc = more_src_cnt;
   2882 		}
   2883 		goto nextpkt;
   2884 	}
   2885 
   2886 free_reclist:
   2887 	while (reclist != NULL) {
   2888 		rp = reclist->mrec_next;
   2889 		mi_free(reclist);
   2890 		reclist = rp;
   2891 	}
   2892 }
   2893 
   2894 static mrec_t *
   2895 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
   2896     mrec_t *next)
   2897 {
   2898 	mrec_t *rp;
   2899 	int i;
   2900 
   2901 	if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
   2902 	    SLIST_IS_EMPTY(srclist))
   2903 		return (next);
   2904 
   2905 	rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
   2906 	if (rp == NULL)
   2907 		return (next);
   2908 
   2909 	rp->mrec_next = next;
   2910 	rp->mrec_type = type;
   2911 	rp->mrec_auxlen = 0;
   2912 	rp->mrec_group = *grp;
   2913 	if (srclist == NULL) {
   2914 		rp->mrec_srcs.sl_numsrc = 0;
   2915 	} else {
   2916 		rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
   2917 		for (i = 0; i < srclist->sl_numsrc; i++)
   2918 			rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
   2919 	}
   2920 
   2921 	return (rp);
   2922 }
   2923 
   2924 /*
   2925  * Set up initial retransmit state.  If memory cannot be allocated for
   2926  * the source lists, simply create as much state as is possible; memory
   2927  * allocation failures are considered one type of transient error that
   2928  * the retransmissions are designed to overcome (and if they aren't
   2929  * transient, there are bigger problems than failing to notify the
   2930  * router about multicast group membership state changes).
   2931  */
   2932 static void
   2933 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
   2934     slist_t *flist)
   2935 {
   2936 	/*
   2937 	 * There are only three possibilities for rtype:
   2938 	 *	New join, transition from INCLUDE {} to INCLUDE {flist}
   2939 	 *	  => rtype is ALLOW_NEW_SOURCES
   2940 	 *	New join, transition from INCLUDE {} to EXCLUDE {flist}
   2941 	 *	  => rtype is CHANGE_TO_EXCLUDE
   2942 	 *	State change that involves a filter mode change
   2943 	 *	  => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
   2944 	 */
   2945 	ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
   2946 	    rtype == ALLOW_NEW_SOURCES);
   2947 
   2948 	rtxp->rtx_cnt = ill->ill_mcast_rv;
   2949 
   2950 	switch (rtype) {
   2951 	case CHANGE_TO_EXCLUDE:
   2952 		rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
   2953 		CLEAR_SLIST(rtxp->rtx_allow);
   2954 		COPY_SLIST(flist, rtxp->rtx_block);
   2955 		break;
   2956 	case ALLOW_NEW_SOURCES:
   2957 	case CHANGE_TO_INCLUDE:
   2958 		rtxp->rtx_fmode_cnt =
   2959 		    rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
   2960 		CLEAR_SLIST(rtxp->rtx_block);
   2961 		COPY_SLIST(flist, rtxp->rtx_allow);
   2962 		break;
   2963 	}
   2964 }
   2965 
   2966 /*
   2967  * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
   2968  * RFC 3376 section 5.1, covers three cases:
   2969  *	* The current state change is a filter mode change
   2970  *		Set filter mode retransmit counter; set retransmit allow or
   2971  *		block list to new source list as appropriate, and clear the
   2972  *		retransmit list that was not set; send TO_IN or TO_EX with
   2973  *		new source list.
   2974  *	* The current state change is a source list change, but the filter
   2975  *	  mode retransmit counter is > 0
   2976  *		Decrement filter mode retransmit counter; set retransmit
   2977  *		allow or block list to  new source list as appropriate,
   2978  *		and clear the retransmit list that was not set; send TO_IN
   2979  *		or TO_EX with new source list.
   2980  *	* The current state change is a source list change, and the filter
   2981  *	  mode retransmit counter is 0.
   2982  *		Merge existing rtx allow and block lists with new state:
   2983  *		  rtx_allow = (new allow + rtx_allow) - new block
   2984  *		  rtx_block = (new block + rtx_block) - new allow
   2985  *		Send ALLOW and BLOCK records for new retransmit lists;
   2986  *		decrement retransmit counter.
   2987  *
   2988  * As is the case for mcast_init_rtx(), memory allocation failures are
   2989  * acceptable; we just create as much state as we can.
   2990  */
   2991 static mrec_t *
   2992 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
   2993 {
   2994 	ill_t *ill;
   2995 	rtx_state_t *rtxp = &ilm->ilm_rtx;
   2996 	mcast_record_t txtype;
   2997 	mrec_t *rp, *rpnext, *rtnmrec;
   2998 	boolean_t ovf;
   2999 
   3000 	ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill);
   3001 
   3002 	if (mreclist == NULL)
   3003 		return (mreclist);
   3004 
   3005 	/*
   3006 	 * A filter mode change is indicated by a single mrec, which is
   3007 	 * either TO_IN or TO_EX.  In this case, we just need to set new
   3008 	 * retransmit state as if this were an initial join.  There is
   3009 	 * no change to the mrec list.
   3010 	 */
   3011 	if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
   3012 	    mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
   3013 		mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
   3014 		    &mreclist->mrec_srcs);
   3015 		return (mreclist);
   3016 	}
   3017 
   3018 	/*
   3019 	 * Only the source list has changed
   3020 	 */
   3021 	rtxp->rtx_cnt = ill->ill_mcast_rv;
   3022 	if (rtxp->rtx_fmode_cnt > 0) {
   3023 		/* but we're still sending filter mode change reports */
   3024 		rtxp->rtx_fmode_cnt--;
   3025 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
   3026 			CLEAR_SLIST(rtxp->rtx_block);
   3027 			COPY_SLIST(flist, rtxp->rtx_allow);
   3028 			txtype = CHANGE_TO_INCLUDE;
   3029 		} else {
   3030 			CLEAR_SLIST(rtxp->rtx_allow);
   3031 			COPY_SLIST(flist, rtxp->rtx_block);
   3032 			txtype = CHANGE_TO_EXCLUDE;
   3033 		}
   3034 		/* overwrite first mrec with new info */
   3035 		mreclist->mrec_type = txtype;
   3036 		l_copy(flist, &mreclist->mrec_srcs);
   3037 		/* then free any remaining mrecs */
   3038 		for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
   3039 			rpnext = rp->mrec_next;
   3040 			mi_free(rp);
   3041 		}
   3042 		mreclist->mrec_next = NULL;
   3043 		rtnmrec = mreclist;
   3044 	} else {
   3045 		mrec_t *allow_mrec, *block_mrec;
   3046 		/*
   3047 		 * Just send the source change reports; but we need to
   3048 		 * recalculate the ALLOW and BLOCK lists based on previous
   3049 		 * state and new changes.
   3050 		 */
   3051 		rtnmrec = mreclist;
   3052 		allow_mrec = block_mrec = NULL;
   3053 		for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
   3054 			ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
   3055 			    rp->mrec_type == BLOCK_OLD_SOURCES);
   3056 			if (rp->mrec_type == ALLOW_NEW_SOURCES)
   3057 				allow_mrec = rp;
   3058 			else
   3059 				block_mrec = rp;
   3060 		}
   3061 		/*
   3062 		 * Perform calculations:
   3063 		 *   new_allow = mrec_allow + (rtx_allow - mrec_block)
   3064 		 *   new_block = mrec_block + (rtx_block - mrec_allow)
   3065 		 *
   3066 		 * Each calc requires two steps, for example:
   3067 		 *   rtx_allow = rtx_allow - mrec_block;
   3068 		 *   new_allow = mrec_allow + rtx_allow;
   3069 		 *
   3070 		 * Store results in mrec lists, and then copy into rtx lists.
   3071 		 * We do it in this order in case the rtx list hasn't been
   3072 		 * alloc'd yet; if it hasn't and our alloc fails, that's okay,
   3073 		 * Overflows are also okay.
   3074 		 */
   3075 		if (block_mrec != NULL) {
   3076 			l_difference_in_a(rtxp->rtx_allow,
   3077 			    &block_mrec->mrec_srcs);
   3078 		}
   3079 		if (allow_mrec != NULL) {
   3080 			l_difference_in_a(rtxp->rtx_block,
   3081 			    &allow_mrec->mrec_srcs);
   3082 			l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
   3083 			    &ovf);
   3084 		}
   3085 		if (block_mrec != NULL) {
   3086 			l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
   3087 			    &ovf);
   3088 			COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
   3089 		} else {
   3090 			rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
   3091 			    &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
   3092 		}
   3093 		if (allow_mrec != NULL) {
   3094 			COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
   3095 		} else {
   3096 			rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
   3097 			    &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
   3098 		}
   3099 	}
   3100 
   3101 	return (rtnmrec);
   3102 }
   3103 
   3104 /*
   3105  * Convenience routine to signal the restart-timer thread.
   3106  */
   3107 static void
   3108 mcast_signal_restart_thread(ip_stack_t *ipst)
   3109 {
   3110 	mutex_enter(&ipst->ips_mrt_lock);
   3111 	ipst->ips_mrt_flags |= IP_MRT_RUN;
   3112 	cv_signal(&ipst->ips_mrt_cv);
   3113 	mutex_exit(&ipst->ips_mrt_lock);
   3114 }
   3115 
   3116 /*
   3117  * Thread to restart IGMP/MLD timers.  See the comment in igmp_joingroup() for
   3118  * the story behind this unfortunate thread.
   3119  */
   3120 void
   3121 mcast_restart_timers_thread(ip_stack_t *ipst)
   3122 {
   3123 	int next;
   3124 	char name[64];
   3125 	callb_cpr_t cprinfo;
   3126 
   3127 	(void) snprintf(name, sizeof (name), "mcast_restart_timers_thread_%d",
   3128 	    ipst->ips_netstack->netstack_stackid);
   3129 	CALLB_CPR_INIT(&cprinfo, &ipst->ips_mrt_lock, callb_generic_cpr, name);
   3130 
   3131 	for (;;) {
   3132 		mutex_enter(&ipst->ips_mrt_lock);
   3133 		while (!(ipst->ips_mrt_flags & (IP_MRT_STOP|IP_MRT_RUN))) {
   3134 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
   3135 			cv_wait(&ipst->ips_mrt_cv, &ipst->ips_mrt_lock);
   3136 			CALLB_CPR_SAFE_END(&cprinfo, &ipst->ips_mrt_lock);
   3137 		}
   3138 		if (ipst->ips_mrt_flags & IP_MRT_STOP)
   3139 			break;
   3140 		ipst->ips_mrt_flags &= ~IP_MRT_RUN;
   3141 		mutex_exit(&ipst->ips_mrt_lock);
   3142 
   3143 		mutex_enter(&ipst->ips_igmp_timer_lock);
   3144 		next = ipst->ips_igmp_deferred_next;
   3145 		ipst->ips_igmp_deferred_next = INFINITY;
   3146 		mutex_exit(&ipst->ips_igmp_timer_lock);
   3147 
   3148 		if (next != INFINITY)
   3149 			igmp_start_timers(next, ipst);
   3150 
   3151 		mutex_enter(&ipst->ips_mld_timer_lock);
   3152 		next = ipst->ips_mld_deferred_next;
   3153 		ipst->ips_mld_deferred_next = INFINITY;
   3154 		mutex_exit(&ipst->ips_mld_timer_lock);
   3155 		if (next != INFINITY)
   3156 			mld_start_timers(next, ipst);
   3157 	}
   3158 
   3159 	ipst->ips_mrt_flags |= IP_MRT_DONE;
   3160 	cv_signal(&ipst->ips_mrt_done_cv);
   3161 	CALLB_CPR_EXIT(&cprinfo);	/* drops ips_mrt_lock */
   3162 	thread_exit();
   3163 }
   3164