1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Internet Group Management Protocol (IGMP) routines. 31 * Multicast Listener Discovery Protocol (MLD) routines. 32 * 33 * Written by Steve Deering, Stanford, May 1988. 34 * Modified by Rosen Sharma, Stanford, Aug 1994. 35 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 36 * 37 * MULTICAST 3.5.1.1 38 */ 39 40 #include <sys/types.h> 41 #include <sys/stream.h> 42 #include <sys/stropts.h> 43 #include <sys/strlog.h> 44 #include <sys/strsun.h> 45 #include <sys/systm.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/cmn_err.h> 49 #include <sys/atomic.h> 50 #include <sys/zone.h> 51 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <inet/ipclassifier.h> 55 #include <net/if.h> 56 #include <net/route.h> 57 #include <netinet/in.h> 58 #include <netinet/igmp_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 62 #include <inet/common.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_listutils.h> 69 70 #include <netinet/igmp.h> 71 #include <inet/ip_if.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 75 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 76 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 77 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 78 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 79 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 80 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 81 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 82 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 83 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 84 slist_t *srclist, mrec_t *next); 85 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 86 mcast_record_t rtype, slist_t *flist); 87 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 88 89 90 /* 91 * Macros used to do timer len conversions. Timer values are always 92 * stored and passed to the timer functions as milliseconds; but the 93 * default values and values from the wire may not be. 94 * 95 * And yes, it's obscure, but decisecond is easier to abbreviate than 96 * "tenths of a second". 97 */ 98 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 99 #define SEC_TO_MSEC(sec) ((sec) * 1000) 100 101 /* 102 * A running timer (scheduled thru timeout) can be cancelled if another 103 * timer with a shorter timeout value is scheduled before it has timed 104 * out. When the shorter timer expires, the original timer is updated 105 * to account for the time elapsed while the shorter timer ran; but this 106 * does not take into account the amount of time already spent in timeout 107 * state before being preempted by the shorter timer, that is the time 108 * interval between time scheduled to time cancelled. This can cause 109 * delays in sending out multicast membership reports. To resolve this 110 * problem, wallclock time (absolute time) is used instead of deltas 111 * (relative time) to track timers. 112 * 113 * The MACRO below gets the lbolt value, used for proper timer scheduling 114 * and firing. Therefore multicast membership reports are sent on time. 115 * The timer does not exactly fire at the time it was scehduled to fire, 116 * there is a difference of a few milliseconds observed. An offset is used 117 * to take care of the difference. 118 */ 119 120 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt())) 121 #define CURRENT_OFFSET (999) 122 123 /* 124 * The first multicast join will trigger the igmp timers / mld timers 125 * The unit for next is milliseconds. 126 */ 127 void 128 igmp_start_timers(unsigned next, ip_stack_t *ipst) 129 { 130 int time_left; 131 int ret; 132 133 ASSERT(next != 0 && next != INFINITY); 134 135 mutex_enter(&ipst->ips_igmp_timer_lock); 136 137 if (ipst->ips_igmp_timer_setter_active) { 138 /* 139 * Serialize timer setters, one at a time. If the 140 * timer is currently being set by someone, 141 * just record the next time when it has to be 142 * invoked and return. The current setter will 143 * take care. 144 */ 145 ipst->ips_igmp_time_to_next = 146 MIN(ipst->ips_igmp_time_to_next, next); 147 mutex_exit(&ipst->ips_igmp_timer_lock); 148 return; 149 } else { 150 ipst->ips_igmp_timer_setter_active = B_TRUE; 151 } 152 if (ipst->ips_igmp_timeout_id == 0) { 153 /* 154 * The timer is inactive. We need to start a timer 155 */ 156 ipst->ips_igmp_time_to_next = next; 157 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 158 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 159 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 160 ipst->ips_igmp_timer_setter_active = B_FALSE; 161 mutex_exit(&ipst->ips_igmp_timer_lock); 162 return; 163 } 164 165 /* 166 * The timer was scheduled sometime back for firing in 167 * 'igmp_time_to_next' ms and is active. We need to 168 * reschedule the timeout if the new 'next' will happen 169 * earlier than the currently scheduled timeout 170 */ 171 time_left = ipst->ips_igmp_timer_scheduled_last + 172 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); 173 if (time_left < MSEC_TO_TICK(next)) { 174 ipst->ips_igmp_timer_setter_active = B_FALSE; 175 mutex_exit(&ipst->ips_igmp_timer_lock); 176 return; 177 } 178 179 mutex_exit(&ipst->ips_igmp_timer_lock); 180 ret = untimeout(ipst->ips_igmp_timeout_id); 181 mutex_enter(&ipst->ips_igmp_timer_lock); 182 /* 183 * The timeout was cancelled, or the timeout handler 184 * completed, while we were blocked in the untimeout. 185 * No other thread could have set the timer meanwhile 186 * since we serialized all the timer setters. Thus 187 * no timer is currently active nor executing nor will 188 * any timer fire in the future. We start the timer now 189 * if needed. 190 */ 191 if (ret == -1) { 192 ASSERT(ipst->ips_igmp_timeout_id == 0); 193 } else { 194 ASSERT(ipst->ips_igmp_timeout_id != 0); 195 ipst->ips_igmp_timeout_id = 0; 196 } 197 if (ipst->ips_igmp_time_to_next != 0) { 198 ipst->ips_igmp_time_to_next = 199 MIN(ipst->ips_igmp_time_to_next, next); 200 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 201 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 202 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 203 } 204 ipst->ips_igmp_timer_setter_active = B_FALSE; 205 mutex_exit(&ipst->ips_igmp_timer_lock); 206 } 207 208 /* 209 * mld_start_timers: 210 * The unit for next is milliseconds. 211 */ 212 void 213 mld_start_timers(unsigned next, ip_stack_t *ipst) 214 { 215 int time_left; 216 int ret; 217 218 ASSERT(next != 0 && next != INFINITY); 219 220 mutex_enter(&ipst->ips_mld_timer_lock); 221 if (ipst->ips_mld_timer_setter_active) { 222 /* 223 * Serialize timer setters, one at a time. If the 224 * timer is currently being set by someone, 225 * just record the next time when it has to be 226 * invoked and return. The current setter will 227 * take care. 228 */ 229 ipst->ips_mld_time_to_next = 230 MIN(ipst->ips_mld_time_to_next, next); 231 mutex_exit(&ipst->ips_mld_timer_lock); 232 return; 233 } else { 234 ipst->ips_mld_timer_setter_active = B_TRUE; 235 } 236 if (ipst->ips_mld_timeout_id == 0) { 237 /* 238 * The timer is inactive. We need to start a timer 239 */ 240 ipst->ips_mld_time_to_next = next; 241 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 242 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 243 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 244 ipst->ips_mld_timer_setter_active = B_FALSE; 245 mutex_exit(&ipst->ips_mld_timer_lock); 246 return; 247 } 248 249 /* 250 * The timer was scheduled sometime back for firing in 251 * 'igmp_time_to_next' ms and is active. We need to 252 * reschedule the timeout if the new 'next' will happen 253 * earlier than the currently scheduled timeout 254 */ 255 time_left = ipst->ips_mld_timer_scheduled_last + 256 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); 257 if (time_left < MSEC_TO_TICK(next)) { 258 ipst->ips_mld_timer_setter_active = B_FALSE; 259 mutex_exit(&ipst->ips_mld_timer_lock); 260 return; 261 } 262 263 mutex_exit(&ipst->ips_mld_timer_lock); 264 ret = untimeout(ipst->ips_mld_timeout_id); 265 mutex_enter(&ipst->ips_mld_timer_lock); 266 /* 267 * The timeout was cancelled, or the timeout handler 268 * completed, while we were blocked in the untimeout. 269 * No other thread could have set the timer meanwhile 270 * since we serialized all the timer setters. Thus 271 * no timer is currently active nor executing nor will 272 * any timer fire in the future. We start the timer now 273 * if needed. 274 */ 275 if (ret == -1) { 276 ASSERT(ipst->ips_mld_timeout_id == 0); 277 } else { 278 ASSERT(ipst->ips_mld_timeout_id != 0); 279 ipst->ips_mld_timeout_id = 0; 280 } 281 if (ipst->ips_mld_time_to_next != 0) { 282 ipst->ips_mld_time_to_next = 283 MIN(ipst->ips_mld_time_to_next, next); 284 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 285 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 286 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 287 } 288 ipst->ips_mld_timer_setter_active = B_FALSE; 289 mutex_exit(&ipst->ips_mld_timer_lock); 290 } 291 292 /* 293 * igmp_input: 294 * Return NULL for a bad packet that is discarded here. 295 * Return mp if the message is OK and should be handed to "raw" receivers. 296 * Callers of igmp_input() may need to reinitialize variables that were copied 297 * from the mblk as this calls pullupmsg(). 298 */ 299 /* ARGSUSED */ 300 mblk_t * 301 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 302 { 303 igmpa_t *igmpa; 304 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 305 int iphlen, igmplen, mblklen; 306 ilm_t *ilm; 307 uint32_t src, dst; 308 uint32_t group; 309 uint_t next; 310 ipif_t *ipif; 311 ip_stack_t *ipst; 312 313 ASSERT(ill != NULL); 314 ASSERT(!ill->ill_isv6); 315 ipst = ill->ill_ipst; 316 ++ipst->ips_igmpstat.igps_rcv_total; 317 318 mblklen = MBLKL(mp); 319 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 320 ++ipst->ips_igmpstat.igps_rcv_tooshort; 321 goto bad_pkt; 322 } 323 igmplen = ntohs(ipha->ipha_length) - iphlen; 324 /* 325 * Since msg sizes are more variable with v3, just pullup the 326 * whole thing now. 327 */ 328 if (MBLKL(mp) < (igmplen + iphlen)) { 329 mblk_t *mp1; 330 if ((mp1 = msgpullup(mp, -1)) == NULL) { 331 ++ipst->ips_igmpstat.igps_rcv_tooshort; 332 goto bad_pkt; 333 } 334 freemsg(mp); 335 mp = mp1; 336 ipha = (ipha_t *)(mp->b_rptr); 337 } 338 339 /* 340 * Validate lengths 341 */ 342 if (igmplen < IGMP_MINLEN) { 343 ++ipst->ips_igmpstat.igps_rcv_tooshort; 344 goto bad_pkt; 345 } 346 /* 347 * Validate checksum 348 */ 349 if (IP_CSUM(mp, iphlen, 0)) { 350 ++ipst->ips_igmpstat.igps_rcv_badsum; 351 goto bad_pkt; 352 } 353 354 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 355 src = ipha->ipha_src; 356 dst = ipha->ipha_dst; 357 if (ip_debug > 1) 358 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 359 "igmp_input: src 0x%x, dst 0x%x on %s\n", 360 (int)ntohl(src), (int)ntohl(dst), 361 ill->ill_name); 362 363 switch (igmpa->igmpa_type) { 364 case IGMP_MEMBERSHIP_QUERY: 365 /* 366 * packet length differentiates between v1/v2 and v3 367 * v1/v2 should be exactly 8 octets long; v3 is >= 12 368 */ 369 if ((igmplen == IGMP_MINLEN) || 370 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) { 371 next = igmp_query_in(ipha, igmpa, ill); 372 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 373 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 374 igmplen); 375 } else { 376 ++ipst->ips_igmpstat.igps_rcv_tooshort; 377 goto bad_pkt; 378 } 379 if (next == 0) 380 goto bad_pkt; 381 382 if (next != INFINITY) 383 igmp_start_timers(next, ipst); 384 385 break; 386 387 case IGMP_V1_MEMBERSHIP_REPORT: 388 case IGMP_V2_MEMBERSHIP_REPORT: 389 /* 390 * For fast leave to work, we have to know that we are the 391 * last person to send a report for this group. Reports 392 * generated by us are looped back since we could potentially 393 * be a multicast router, so discard reports sourced by me. 394 */ 395 mutex_enter(&ill->ill_lock); 396 for (ipif = ill->ill_ipif; ipif != NULL; 397 ipif = ipif->ipif_next) { 398 if (ipif->ipif_lcl_addr == src) { 399 if (ip_debug > 1) { 400 (void) mi_strlog(ill->ill_rq, 401 1, 402 SL_TRACE, 403 "igmp_input: we are only " 404 "member src 0x%x ipif_local 0x%x", 405 (int)ntohl(src), 406 (int) 407 ntohl(ipif->ipif_lcl_addr)); 408 } 409 mutex_exit(&ill->ill_lock); 410 return (mp); 411 } 412 } 413 mutex_exit(&ill->ill_lock); 414 415 ++ipst->ips_igmpstat.igps_rcv_reports; 416 group = igmpa->igmpa_group; 417 if (!CLASSD(group)) { 418 ++ipst->ips_igmpstat.igps_rcv_badreports; 419 goto bad_pkt; 420 } 421 422 /* 423 * KLUDGE: if the IP source address of the report has an 424 * unspecified (i.e., zero) subnet number, as is allowed for 425 * a booting host, replace it with the correct subnet number 426 * so that a process-level multicast routing demon can 427 * determine which subnet it arrived from. This is necessary 428 * to compensate for the lack of any way for a process to 429 * determine the arrival interface of an incoming packet. 430 * 431 * Requires that a copy of *this* message it passed up 432 * to the raw interface which is done by our caller. 433 */ 434 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 435 /* Pick the first ipif on this ill */ 436 mutex_enter(&ill->ill_lock); 437 src = ill->ill_ipif->ipif_subnet; 438 mutex_exit(&ill->ill_lock); 439 ip1dbg(("igmp_input: changed src to 0x%x\n", 440 (int)ntohl(src))); 441 ipha->ipha_src = src; 442 } 443 444 /* 445 * If we belong to the group being reported, and 446 * we are a 'Delaying member' in the RFC terminology, 447 * stop our timer for that group and 'clear flag' i.e. 448 * mark as IGMP_OTHERMEMBER. Do this for all logical 449 * interfaces on the given physical interface. 450 */ 451 mutex_enter(&ill->ill_lock); 452 for (ipif = ill->ill_ipif; ipif != NULL; 453 ipif = ipif->ipif_next) { 454 ilm = ilm_lookup_ipif(ipif, group); 455 if (ilm != NULL) { 456 ++ipst->ips_igmpstat.igps_rcv_ourreports; 457 ilm->ilm_timer = INFINITY; 458 ilm->ilm_state = IGMP_OTHERMEMBER; 459 } 460 } /* for */ 461 mutex_exit(&ill->ill_lock); 462 break; 463 464 case IGMP_V3_MEMBERSHIP_REPORT: 465 /* 466 * Currently nothing to do here; IGMP router is not 467 * implemented in ip, and v3 hosts don't pay attention 468 * to membership reports. 469 */ 470 break; 471 } 472 /* 473 * Pass all valid IGMP packets up to any process(es) listening 474 * on a raw IGMP socket. Do not free the packet. 475 */ 476 return (mp); 477 478 bad_pkt: 479 freemsg(mp); 480 return (NULL); 481 } 482 483 static uint_t 484 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 485 { 486 ilm_t *ilm; 487 int timer; 488 uint_t next, current; 489 ip_stack_t *ipst; 490 491 ipst = ill->ill_ipst; 492 ++ipst->ips_igmpstat.igps_rcv_queries; 493 494 /* 495 * In the IGMPv2 specification, there are 3 states and a flag. 496 * 497 * In Non-Member state, we simply don't have a membership record. 498 * In Delaying Member state, our timer is running (ilm->ilm_timer 499 * < INFINITY). In Idle Member state, our timer is not running 500 * (ilm->ilm_timer == INFINITY). 501 * 502 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 503 * we have heard a report from another member, or IGMP_IREPORTEDLAST 504 * if I sent the last report. 505 */ 506 if ((igmpa->igmpa_code == 0) || 507 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) { 508 /* 509 * Query from an old router. 510 * Remember that the querier on this interface is old, 511 * and set the timer to the value in RFC 1112. 512 */ 513 514 515 mutex_enter(&ill->ill_lock); 516 ill->ill_mcast_v1_time = 0; 517 ill->ill_mcast_v1_tset = 1; 518 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 519 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 520 "to IGMP_V1_ROUTER\n", ill->ill_name)); 521 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 522 ill->ill_mcast_type = IGMP_V1_ROUTER; 523 } 524 mutex_exit(&ill->ill_lock); 525 526 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 527 528 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 529 igmpa->igmpa_group != 0) { 530 ++ipst->ips_igmpstat.igps_rcv_badqueries; 531 return (0); 532 } 533 534 } else { 535 in_addr_t group; 536 537 /* 538 * Query from a new router 539 * Simply do a validity check 540 */ 541 group = igmpa->igmpa_group; 542 if (group != 0 && (!CLASSD(group))) { 543 ++ipst->ips_igmpstat.igps_rcv_badqueries; 544 return (0); 545 } 546 547 /* 548 * Switch interface state to v2 on receipt of a v2 query 549 * ONLY IF current state is v3. Let things be if current 550 * state if v1 but do reset the v2-querier-present timer. 551 */ 552 mutex_enter(&ill->ill_lock); 553 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 554 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 555 "to IGMP_V2_ROUTER", ill->ill_name)); 556 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 557 ill->ill_mcast_type = IGMP_V2_ROUTER; 558 } 559 ill->ill_mcast_v2_time = 0; 560 ill->ill_mcast_v2_tset = 1; 561 mutex_exit(&ill->ill_lock); 562 563 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 564 } 565 566 if (ip_debug > 1) { 567 mutex_enter(&ill->ill_lock); 568 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 569 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 570 (int)ntohs(igmpa->igmpa_code), 571 (int)ntohs(igmpa->igmpa_type)); 572 mutex_exit(&ill->ill_lock); 573 } 574 575 /* 576 * -Start the timers in all of our membership records 577 * for the physical interface on which the query 578 * arrived, excluding those that belong to the "all 579 * hosts" group (224.0.0.1). 580 * 581 * -Restart any timer that is already running but has 582 * a value longer than the requested timeout. 583 * 584 * -Use the value specified in the query message as 585 * the maximum timeout. 586 */ 587 next = (unsigned)INFINITY; 588 mutex_enter(&ill->ill_lock); 589 590 current = CURRENT_MSTIME; 591 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 592 593 /* 594 * A multicast router joins INADDR_ANY address 595 * to enable promiscuous reception of all 596 * mcasts from the interface. This INADDR_ANY 597 * is stored in the ilm_v6addr as V6 unspec addr 598 */ 599 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 600 continue; 601 if (ilm->ilm_addr == htonl(INADDR_ANY)) 602 continue; 603 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 604 (igmpa->igmpa_group == 0) || 605 (igmpa->igmpa_group == ilm->ilm_addr)) { 606 if (ilm->ilm_timer > timer) { 607 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 608 if (ilm->ilm_timer < next) 609 next = ilm->ilm_timer; 610 ilm->ilm_timer += current; 611 } 612 } 613 } 614 mutex_exit(&ill->ill_lock); 615 616 return (next); 617 } 618 619 static uint_t 620 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 621 { 622 uint_t i, next, mrd, qqi, timer, delay, numsrc; 623 uint_t current; 624 ilm_t *ilm; 625 ipaddr_t *src_array; 626 uint8_t qrv; 627 ip_stack_t *ipst; 628 629 ipst = ill->ill_ipst; 630 /* make sure numsrc matches packet size */ 631 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 632 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 633 ++ipst->ips_igmpstat.igps_rcv_tooshort; 634 return (0); 635 } 636 src_array = (ipaddr_t *)&igmp3qa[1]; 637 638 ++ipst->ips_igmpstat.igps_rcv_queries; 639 640 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 641 uint_t hdrval, mant, exp; 642 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 643 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 644 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 645 mrd = (mant | 0x10) << (exp + 3); 646 } 647 if (mrd == 0) 648 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 649 timer = DSEC_TO_MSEC(mrd); 650 MCAST_RANDOM_DELAY(delay, timer); 651 next = (unsigned)INFINITY; 652 current = CURRENT_MSTIME; 653 654 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 655 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 656 else 657 ill->ill_mcast_rv = qrv; 658 659 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 660 uint_t hdrval, mant, exp; 661 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 662 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 663 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 664 qqi = (mant | 0x10) << (exp + 3); 665 } 666 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 667 668 /* 669 * If we have a pending general query response that's scheduled 670 * sooner than the delay we calculated for this response, then 671 * no action is required (RFC3376 section 5.2 rule 1) 672 */ 673 mutex_enter(&ill->ill_lock); 674 if (ill->ill_global_timer < (current + delay)) { 675 mutex_exit(&ill->ill_lock); 676 return (next); 677 } 678 mutex_exit(&ill->ill_lock); 679 680 /* 681 * Now take action depending upon query type: 682 * general, group specific, or group/source specific. 683 */ 684 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 685 /* 686 * general query 687 * We know global timer is either not running or is 688 * greater than our calculated delay, so reset it to 689 * our delay (random value in range [0, response time]). 690 */ 691 mutex_enter(&ill->ill_lock); 692 ill->ill_global_timer = current + delay; 693 mutex_exit(&ill->ill_lock); 694 next = delay; 695 696 } else { 697 /* group or group/source specific query */ 698 mutex_enter(&ill->ill_lock); 699 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 700 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 701 (ilm->ilm_addr == htonl(INADDR_ANY)) || 702 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 703 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 704 continue; 705 /* 706 * If the query is group specific or we have a 707 * pending group specific query, the response is 708 * group specific (pending sources list should be 709 * empty). Otherwise, need to update the pending 710 * sources list for the group and source specific 711 * response. 712 */ 713 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 714 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 715 group_query: 716 FREE_SLIST(ilm->ilm_pendsrcs); 717 ilm->ilm_pendsrcs = NULL; 718 } else { 719 boolean_t overflow; 720 slist_t *pktl; 721 if (numsrc > MAX_FILTER_SIZE || 722 (ilm->ilm_pendsrcs == NULL && 723 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 724 /* 725 * We've been sent more sources than 726 * we can deal with; or we can't deal 727 * with a source list at all. Revert 728 * to a group specific query. 729 */ 730 goto group_query; 731 } 732 if ((pktl = l_alloc()) == NULL) 733 goto group_query; 734 pktl->sl_numsrc = numsrc; 735 for (i = 0; i < numsrc; i++) 736 IN6_IPADDR_TO_V4MAPPED(src_array[i], 737 &(pktl->sl_addr[i])); 738 l_union_in_a(ilm->ilm_pendsrcs, pktl, 739 &overflow); 740 l_free(pktl); 741 if (overflow) 742 goto group_query; 743 } 744 745 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 746 INFINITY : (ilm->ilm_timer - current); 747 /* choose soonest timer */ 748 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 749 if (ilm->ilm_timer < next) 750 next = ilm->ilm_timer; 751 ilm->ilm_timer += current; 752 } 753 mutex_exit(&ill->ill_lock); 754 } 755 756 return (next); 757 } 758 759 void 760 igmp_joingroup(ilm_t *ilm) 761 { 762 uint_t timer; 763 ill_t *ill; 764 ip_stack_t *ipst = ilm->ilm_ipst; 765 766 ill = ilm->ilm_ipif->ipif_ill; 767 768 ASSERT(IAM_WRITER_ILL(ill)); 769 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 770 771 mutex_enter(&ill->ill_lock); 772 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 773 ilm->ilm_rtx.rtx_timer = INFINITY; 774 ilm->ilm_state = IGMP_OTHERMEMBER; 775 mutex_exit(&ill->ill_lock); 776 } else { 777 ip1dbg(("Querier mode %d, sending report, group %x\n", 778 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 779 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 780 mutex_exit(&ill->ill_lock); 781 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 782 mutex_enter(&ill->ill_lock); 783 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 784 mutex_exit(&ill->ill_lock); 785 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 786 mutex_enter(&ill->ill_lock); 787 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 788 mrec_t *rp; 789 mcast_record_t rtype; 790 /* 791 * The possible state changes we need to handle here: 792 * Old State New State Report 793 * 794 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 795 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 796 * 797 * No need to send the BLOCK(0) report; ALLOW(X) 798 * is enough. 799 */ 800 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 801 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 802 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 803 ilm->ilm_filter, NULL); 804 mutex_exit(&ill->ill_lock); 805 igmpv3_sendrpt(ilm->ilm_ipif, rp); 806 mutex_enter(&ill->ill_lock); 807 /* 808 * Set up retransmission state. Timer is set below, 809 * for both v3 and older versions. 810 */ 811 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 812 ilm->ilm_filter); 813 } 814 815 /* Set the ilm timer value */ 816 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 817 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 818 timer = ilm->ilm_rtx.rtx_timer; 819 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 820 ilm->ilm_state = IGMP_IREPORTEDLAST; 821 mutex_exit(&ill->ill_lock); 822 823 /* 824 * To avoid deadlock, we don't call igmp_start_timers from 825 * here. igmp_start_timers needs to call untimeout, and we 826 * can't hold the ipsq across untimeout since 827 * igmp_timeout_handler could be blocking trying to 828 * acquire the ipsq. Instead we start the timer after we get 829 * out of the ipsq in ipsq_exit. 830 */ 831 mutex_enter(&ipst->ips_igmp_timer_lock); 832 ipst->ips_igmp_deferred_next = MIN(timer, 833 ipst->ips_igmp_deferred_next); 834 mutex_exit(&ipst->ips_igmp_timer_lock); 835 } 836 837 if (ip_debug > 1) { 838 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 839 "igmp_joingroup: multicast_type %d timer %d", 840 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 841 (int)ntohl(timer)); 842 } 843 } 844 845 void 846 mld_joingroup(ilm_t *ilm) 847 { 848 uint_t timer; 849 ill_t *ill; 850 ip_stack_t *ipst = ilm->ilm_ipst; 851 852 ill = ilm->ilm_ill; 853 854 ASSERT(IAM_WRITER_ILL(ill)); 855 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 856 857 mutex_enter(&ill->ill_lock); 858 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 859 ilm->ilm_rtx.rtx_timer = INFINITY; 860 ilm->ilm_state = IGMP_OTHERMEMBER; 861 mutex_exit(&ill->ill_lock); 862 } else { 863 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 864 mutex_exit(&ill->ill_lock); 865 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 866 mutex_enter(&ill->ill_lock); 867 } else { 868 mrec_t *rp; 869 mcast_record_t rtype; 870 /* 871 * The possible state changes we need to handle here: 872 * Old State New State Report 873 * 874 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 875 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 876 * 877 * No need to send the BLOCK(0) report; ALLOW(X) 878 * is enough 879 */ 880 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 881 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 882 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 883 ilm->ilm_filter, NULL); 884 mutex_exit(&ill->ill_lock); 885 mldv2_sendrpt(ill, rp); 886 mutex_enter(&ill->ill_lock); 887 /* 888 * Set up retransmission state. Timer is set below, 889 * for both v2 and v1. 890 */ 891 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 892 ilm->ilm_filter); 893 } 894 895 /* Set the ilm timer value */ 896 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 897 ilm->ilm_rtx.rtx_cnt > 0); 898 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 899 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 900 timer = ilm->ilm_rtx.rtx_timer; 901 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 902 ilm->ilm_state = IGMP_IREPORTEDLAST; 903 mutex_exit(&ill->ill_lock); 904 905 /* 906 * To avoid deadlock, we don't call mld_start_timers from 907 * here. mld_start_timers needs to call untimeout, and we 908 * can't hold the ipsq (i.e. the lock) across untimeout 909 * since mld_timeout_handler could be blocking trying to 910 * acquire the ipsq. Instead we start the timer after we get 911 * out of the ipsq in ipsq_exit 912 */ 913 mutex_enter(&ipst->ips_mld_timer_lock); 914 ipst->ips_mld_deferred_next = MIN(timer, 915 ipst->ips_mld_deferred_next); 916 mutex_exit(&ipst->ips_mld_timer_lock); 917 } 918 919 if (ip_debug > 1) { 920 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 921 "mld_joingroup: multicast_type %d timer %d", 922 (ilm->ilm_ill->ill_mcast_type), 923 (int)ntohl(timer)); 924 } 925 } 926 927 void 928 igmp_leavegroup(ilm_t *ilm) 929 { 930 ill_t *ill = ilm->ilm_ipif->ipif_ill; 931 932 ASSERT(ilm->ilm_ill == NULL); 933 ASSERT(!ill->ill_isv6); 934 935 mutex_enter(&ill->ill_lock); 936 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 937 ill->ill_mcast_type == IGMP_V2_ROUTER && 938 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 939 mutex_exit(&ill->ill_lock); 940 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 941 (htonl(INADDR_ALLRTRS_GROUP))); 942 return; 943 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 944 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 945 mrec_t *rp; 946 /* 947 * The possible state changes we need to handle here: 948 * Old State New State Report 949 * 950 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 951 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 952 * 953 * No need to send the ALLOW(0) report; BLOCK(X) is enough 954 */ 955 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 956 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 957 ilm->ilm_filter, NULL); 958 } else { 959 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 960 NULL, NULL); 961 } 962 mutex_exit(&ill->ill_lock); 963 igmpv3_sendrpt(ilm->ilm_ipif, rp); 964 return; 965 } 966 mutex_exit(&ill->ill_lock); 967 } 968 969 void 970 mld_leavegroup(ilm_t *ilm) 971 { 972 ill_t *ill = ilm->ilm_ill; 973 974 ASSERT(ilm->ilm_ipif == NULL); 975 ASSERT(ill->ill_isv6); 976 977 mutex_enter(&ill->ill_lock); 978 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 979 ill->ill_mcast_type == MLD_V1_ROUTER && 980 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 981 mutex_exit(&ill->ill_lock); 982 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 983 return; 984 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 985 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 986 mrec_t *rp; 987 /* 988 * The possible state changes we need to handle here: 989 * Old State New State Report 990 * 991 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 992 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 993 * 994 * No need to send the ALLOW(0) report; BLOCK(X) is enough 995 */ 996 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 997 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 998 ilm->ilm_filter, NULL); 999 } else { 1000 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 1001 NULL, NULL); 1002 } 1003 mutex_exit(&ill->ill_lock); 1004 mldv2_sendrpt(ill, rp); 1005 return; 1006 } 1007 mutex_exit(&ill->ill_lock); 1008 } 1009 1010 void 1011 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1012 { 1013 ill_t *ill; 1014 mrec_t *rp; 1015 ip_stack_t *ipst = ilm->ilm_ipst; 1016 1017 ASSERT(ilm != NULL); 1018 1019 /* state change reports should only be sent if the router is v3 */ 1020 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 1021 return; 1022