1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sdt.h> 37 #include <sys/zone.h> 38 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <net/if.h> 43 #include <sys/systm.h> 44 #include <sys/strsubr.h> 45 #include <net/route.h> 46 #include <netinet/in.h> 47 #include <net/if_dl.h> 48 #include <netinet/ip6.h> 49 #include <netinet/icmp6.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 #include <inet/nd.h> 54 #include <inet/arp.h> 55 #include <inet/ip.h> 56 #include <inet/ip6.h> 57 #include <inet/ip_if.h> 58 #include <inet/ip_ndp.h> 59 #include <inet/ip_multi.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ipsec_impl.h> 62 #include <inet/sctp_ip.h> 63 #include <inet/ip_listutils.h> 64 #include <inet/udp_impl.h> 65 66 /* igmpv3/mldv2 source filter manipulation */ 67 static void ilm_bld_flists(conn_t *conn, void *arg); 68 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 69 slist_t *flist); 70 71 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 72 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 73 int orig_ifindex, zoneid_t zoneid); 74 static void ilm_delete(ilm_t *ilm); 75 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 76 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 77 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 78 const in6_addr_t *v6group, int index); 79 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 80 ipif_t *ipif); 81 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 82 mcast_record_t fmode, ipaddr_t src); 83 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 84 mcast_record_t fmode, const in6_addr_t *v6src); 85 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 86 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 87 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 88 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 89 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 90 static void conn_ilg_reap(conn_t *connp); 91 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 92 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 93 static int ip_opt_delete_group_excl_v6(conn_t *connp, 94 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 95 const in6_addr_t *v6src); 96 97 /* 98 * MT notes: 99 * 100 * Multicast joins operate on both the ilg and ilm structures. Multiple 101 * threads operating on an conn (socket) trying to do multicast joins 102 * need to synchronize when operating on the ilg. Multiple threads 103 * potentially operating on different conn (socket endpoints) trying to 104 * do multicast joins could eventually end up trying to manipulate the 105 * ilm simulatenously and need to synchronize on the access to the ilm. 106 * Both are amenable to standard Solaris MT techniques, but it would be 107 * complex to handle a failover or failback which needs to manipulate 108 * ilg/ilms if an applications can also simultaenously join/leave 109 * multicast groups. Hence multicast join/leave also go through the ipsq_t 110 * serialization. 111 * 112 * Multicast joins and leaves are single-threaded per phyint/IPMP group 113 * using the ipsq serialization mechanism. 114 * 115 * An ilm is an IP data structure used to track multicast join/leave. 116 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 117 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 118 * referencing the ilm. ilms are created / destroyed only as writer. ilms 119 * are not passed around, instead they are looked up and used under the 120 * ill_lock or as writer. So we don't need a dynamic refcount of the number 121 * of threads holding reference to an ilm. 122 * 123 * Multicast Join operation: 124 * 125 * The first step is to determine the ipif (v4) or ill (v6) on which 126 * the join operation is to be done. The join is done after becoming 127 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 128 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 129 * Multiple threads can attempt to join simultaneously on different ipif/ill 130 * on the same conn. In this case the ipsq serialization does not help in 131 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 132 * The conn_lock also protects all the ilg_t members. 133 * 134 * Leave operation. 135 * 136 * Similar to the join operation, the first step is to determine the ipif 137 * or ill (v6) on which the leave operation is to be done. The leave operation 138 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 139 * As with join ilg modification is done under the protection of the conn lock. 140 */ 141 142 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 143 ASSERT(connp != NULL); \ 144 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 145 (first_mp), (func), (type), B_TRUE); \ 146 if ((ipsq) == NULL) { \ 147 ipif_refrele(ipif); \ 148 return (EINPROGRESS); \ 149 } 150 151 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 152 ASSERT(connp != NULL); \ 153 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 154 (first_mp), (func), (type), B_TRUE); \ 155 if ((ipsq) == NULL) { \ 156 ill_refrele(ill); \ 157 return (EINPROGRESS); \ 158 } 159 160 #define IPSQ_EXIT(ipsq) \ 161 if (ipsq != NULL) \ 162 ipsq_exit(ipsq); 163 164 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 165 166 #define ILG_WALKER_RELE(connp) \ 167 { \ 168 (connp)->conn_ilg_walker_cnt--; \ 169 if ((connp)->conn_ilg_walker_cnt == 0) \ 170 conn_ilg_reap(connp); \ 171 } 172 173 static void 174 conn_ilg_reap(conn_t *connp) 175 { 176 int to; 177 int from; 178 ilg_t *ilg; 179 180 ASSERT(MUTEX_HELD(&connp->conn_lock)); 181 182 to = 0; 183 from = 0; 184 while (from < connp->conn_ilg_inuse) { 185 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 186 ilg = &connp->conn_ilg[from]; 187 FREE_SLIST(ilg->ilg_filter); 188 ilg->ilg_flags &= ~ILG_DELETED; 189 from++; 190 continue; 191 } 192 if (to != from) 193 connp->conn_ilg[to] = connp->conn_ilg[from]; 194 to++; 195 from++; 196 } 197 198 connp->conn_ilg_inuse = to; 199 200 if (connp->conn_ilg_inuse == 0) { 201 mi_free((char *)connp->conn_ilg); 202 connp->conn_ilg = NULL; 203 cv_broadcast(&connp->conn_refcv); 204 } 205 } 206 207 #define GETSTRUCT(structure, number) \ 208 ((structure *)mi_zalloc(sizeof (structure) * (number))) 209 210 #define ILG_ALLOC_CHUNK 16 211 212 /* 213 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 214 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 215 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 216 * returned ilg). Returns NULL on failure (ENOMEM). 217 * 218 * Assumes connp->conn_lock is held. 219 */ 220 static ilg_t * 221 conn_ilg_alloc(conn_t *connp) 222 { 223 ilg_t *new, *ret; 224 int curcnt; 225 226 ASSERT(MUTEX_HELD(&connp->conn_lock)); 227 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 228 229 if (connp->conn_ilg == NULL) { 230 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 231 if (connp->conn_ilg == NULL) 232 return (NULL); 233 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 234 connp->conn_ilg_inuse = 0; 235 } 236 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 237 if (connp->conn_ilg_walker_cnt != 0) { 238 /* 239 * XXX We cannot grow the array at this point 240 * because a list walker could be in progress, and 241 * we cannot wipe out the existing array until the 242 * walker is done. Just return NULL for now. 243 * ilg_delete_all() will have to be changed when 244 * this logic is changed. 245 */ 246 return (NULL); 247 } 248 curcnt = connp->conn_ilg_allocated; 249 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 250 if (new == NULL) 251 return (NULL); 252 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 253 mi_free((char *)connp->conn_ilg); 254 connp->conn_ilg = new; 255 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 256 } 257 258 ret = &connp->conn_ilg[connp->conn_ilg_inuse++]; 259 ASSERT((ret->ilg_flags & ILG_DELETED) == 0); 260 bzero(ret, sizeof (*ret)); 261 return (ret); 262 } 263 264 typedef struct ilm_fbld_s { 265 ilm_t *fbld_ilm; 266 int fbld_in_cnt; 267 int fbld_ex_cnt; 268 slist_t fbld_in; 269 slist_t fbld_ex; 270 boolean_t fbld_in_overflow; 271 } ilm_fbld_t; 272 273 static void 274 ilm_bld_flists(conn_t *conn, void *arg) 275 { 276 int i; 277 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 278 ilm_t *ilm = fbld->fbld_ilm; 279 in6_addr_t *v6group = &ilm->ilm_v6addr; 280 281 if (conn->conn_ilg_inuse == 0) 282 return; 283 284 /* 285 * Since we can't break out of the ipcl_walk once started, we still 286 * have to look at every conn. But if we've already found one 287 * (EXCLUDE, NULL) list, there's no need to keep checking individual 288 * ilgs--that will be our state. 289 */ 290 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 291 return; 292 293 /* 294 * Check this conn's ilgs to see if any are interested in our 295 * ilm (group, interface match). If so, update the master 296 * include and exclude lists we're building in the fbld struct 297 * with this ilg's filter info. 298 */ 299 mutex_enter(&conn->conn_lock); 300 for (i = 0; i < conn->conn_ilg_inuse; i++) { 301 ilg_t *ilg = &conn->conn_ilg[i]; 302 if ((ilg->ilg_ill == ilm->ilm_ill) && 303 (ilg->ilg_ipif == ilm->ilm_ipif) && 304 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 305 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 306 fbld->fbld_in_cnt++; 307 if (!fbld->fbld_in_overflow) 308 l_union_in_a(&fbld->fbld_in, 309 ilg->ilg_filter, 310 &fbld->fbld_in_overflow); 311 } else { 312 fbld->fbld_ex_cnt++; 313 /* 314 * On the first exclude list, don't try to do 315 * an intersection, as the master exclude list 316 * is intentionally empty. If the master list 317 * is still empty on later iterations, that 318 * means we have at least one ilg with an empty 319 * exclude list, so that should be reflected 320 * when we take the intersection. 321 */ 322 if (fbld->fbld_ex_cnt == 1) { 323 if (ilg->ilg_filter != NULL) 324 l_copy(ilg->ilg_filter, 325 &fbld->fbld_ex); 326 } else { 327 l_intersection_in_a(&fbld->fbld_ex, 328 ilg->ilg_filter); 329 } 330 } 331 /* there will only be one match, so break now. */ 332 break; 333 } 334 } 335 mutex_exit(&conn->conn_lock); 336 } 337 338 static void 339 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 340 { 341 ilm_fbld_t fbld; 342 ip_stack_t *ipst = ilm->ilm_ipst; 343 344 fbld.fbld_ilm = ilm; 345 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 346 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 347 fbld.fbld_in_overflow = B_FALSE; 348 349 /* first, construct our master include and exclude lists */ 350 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 351 352 /* now use those master lists to generate the interface filter */ 353 354 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 355 if (fbld.fbld_in_overflow) { 356 *fmode = MODE_IS_EXCLUDE; 357 flist->sl_numsrc = 0; 358 return; 359 } 360 361 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 362 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 363 *fmode = MODE_IS_INCLUDE; 364 flist->sl_numsrc = 0; 365 return; 366 } 367 368 /* 369 * If there are no exclude lists, then the interface filter 370 * is INCLUDE, with its filter list equal to fbld_in. A single 371 * exclude list makes the interface filter EXCLUDE, with its 372 * filter list equal to (fbld_ex - fbld_in). 373 */ 374 if (fbld.fbld_ex_cnt == 0) { 375 *fmode = MODE_IS_INCLUDE; 376 l_copy(&fbld.fbld_in, flist); 377 } else { 378 *fmode = MODE_IS_EXCLUDE; 379 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 380 } 381 } 382 383 /* 384 * If the given interface has failed, choose a new one to join on so 385 * that we continue to receive packets. ilg_orig_ifindex remembers 386 * what the application used to join on so that we know the ilg to 387 * delete even though we change the ill here. Callers will store the 388 * ilg returned from this function in ilg_ill. Thus when we receive 389 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 390 * 391 * This function must be called as writer so we can walk the group 392 * list and examine flags without holding a lock. 393 */ 394 ill_t * 395 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 396 { 397 ill_t *till; 398 ill_group_t *illgrp = ill->ill_group; 399 400 ASSERT(IAM_WRITER_ILL(ill)); 401 402 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 403 return (ill); 404 405 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 406 return (ill); 407 408 till = illgrp->illgrp_ill; 409 while (till != NULL && 410 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 411 till = till->ill_group_next; 412 } 413 if (till != NULL) 414 return (till); 415 416 return (ill); 417 } 418 419 static int 420 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 421 boolean_t isv6) 422 { 423 mcast_record_t fmode; 424 slist_t *flist; 425 boolean_t fdefault; 426 char buf[INET6_ADDRSTRLEN]; 427 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 428 429 /* 430 * There are several cases where the ilm's filter state 431 * defaults to (EXCLUDE, NULL): 432 * - we've had previous joins without associated ilgs 433 * - this join has no associated ilg 434 * - the ilg's filter state is (EXCLUDE, NULL) 435 */ 436 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 437 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 438 439 /* attempt mallocs (if needed) before doing anything else */ 440 if ((flist = l_alloc()) == NULL) 441 return (ENOMEM); 442 if (!fdefault && ilm->ilm_filter == NULL) { 443 ilm->ilm_filter = l_alloc(); 444 if (ilm->ilm_filter == NULL) { 445 l_free(flist); 446 return (ENOMEM); 447 } 448 } 449 450 if (ilgstat != ILGSTAT_CHANGE) 451 ilm->ilm_refcnt++; 452 453 if (ilgstat == ILGSTAT_NONE) 454 ilm->ilm_no_ilg_cnt++; 455 456 /* 457 * Determine new filter state. If it's not the default 458 * (EXCLUDE, NULL), we must walk the conn list to find 459 * any ilgs interested in this group, and re-build the 460 * ilm filter. 461 */ 462 if (fdefault) { 463 fmode = MODE_IS_EXCLUDE; 464 flist->sl_numsrc = 0; 465 } else { 466 ilm_gen_filter(ilm, &fmode, flist); 467 } 468 469 /* make sure state actually changed; nothing to do if not. */ 470 if ((ilm->ilm_fmode == fmode) && 471 !lists_are_different(ilm->ilm_filter, flist)) { 472 l_free(flist); 473 return (0); 474 } 475 476 /* send the state change report */ 477 if (!IS_LOOPBACK(ill)) { 478 if (isv6) 479 mld_statechange(ilm, fmode, flist); 480 else 481 igmp_statechange(ilm, fmode, flist); 482 } 483 484 /* update the ilm state */ 485 ilm->ilm_fmode = fmode; 486 if (flist->sl_numsrc > 0) 487 l_copy(flist, ilm->ilm_filter); 488 else 489 CLEAR_SLIST(ilm->ilm_filter); 490 491 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 492 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 493 494 l_free(flist); 495 return (0); 496 } 497 498 static int 499 ilm_update_del(ilm_t *ilm, boolean_t isv6) 500 { 501 mcast_record_t fmode; 502 slist_t *flist; 503 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 504 505 ip1dbg(("ilm_update_del: still %d left; updating state\n", 506 ilm->ilm_refcnt)); 507 508 if ((flist = l_alloc()) == NULL) 509 return (ENOMEM); 510 511 /* 512 * If present, the ilg in question has already either been 513 * updated or removed from our list; so all we need to do 514 * now is walk the list to update the ilm filter state. 515 * 516 * Skip the list walk if we have any no-ilg joins, which 517 * cause the filter state to revert to (EXCLUDE, NULL). 518 */ 519 if (ilm->ilm_no_ilg_cnt != 0) { 520 fmode = MODE_IS_EXCLUDE; 521 flist->sl_numsrc = 0; 522 } else { 523 ilm_gen_filter(ilm, &fmode, flist); 524 } 525 526 /* check to see if state needs to be updated */ 527 if ((ilm->ilm_fmode == fmode) && 528 (!lists_are_different(ilm->ilm_filter, flist))) { 529 l_free(flist); 530 return (0); 531 } 532 533 if (!IS_LOOPBACK(ill)) { 534 if (isv6) 535 mld_statechange(ilm, fmode, flist); 536 else 537 igmp_statechange(ilm, fmode, flist); 538 } 539 540 ilm->ilm_fmode = fmode; 541 if (flist->sl_numsrc > 0) { 542 if (ilm->ilm_filter == NULL) { 543 ilm->ilm_filter = l_alloc(); 544 if (ilm->ilm_filter == NULL) { 545 char buf[INET6_ADDRSTRLEN]; 546 ip1dbg(("ilm_update_del: failed to alloc ilm " 547 "filter; no source filtering for %s on %s", 548 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 549 buf, sizeof (buf)), ill->ill_name)); 550 ilm->ilm_fmode = MODE_IS_EXCLUDE; 551 l_free(flist); 552 return (0); 553 } 554 } 555 l_copy(flist, ilm->ilm_filter); 556 } else { 557 CLEAR_SLIST(ilm->ilm_filter); 558 } 559 560 l_free(flist); 561 return (0); 562 } 563 564 /* 565 * INADDR_ANY means all multicast addresses. This is only used 566 * by the multicast router. 567 * INADDR_ANY is stored as IPv6 unspecified addr. 568 */ 569 int 570 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 571 mcast_record_t ilg_fmode, slist_t *ilg_flist) 572 { 573 ill_t *ill = ipif->ipif_ill; 574 ilm_t *ilm; 575 in6_addr_t v6group; 576 int ret; 577 578 ASSERT(IAM_WRITER_IPIF(ipif)); 579 580 if (!CLASSD(group) && group != INADDR_ANY) 581 return (EINVAL); 582 583 /* 584 * INADDR_ANY is represented as the IPv6 unspecifed addr. 585 */ 586 if (group == INADDR_ANY) 587 v6group = ipv6_all_zeros; 588 else 589 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 590 591 mutex_enter(&ill->ill_lock); 592 ilm = ilm_lookup_ipif(ipif, group); 593 mutex_exit(&ill->ill_lock); 594 /* 595 * Since we are writer, we know the ilm_flags itself cannot 596 * change at this point, and ilm_lookup_ipif would not have 597 * returned a DELETED ilm. However, the data path can free 598 * ilm->next via ilm_walker_cleanup() so we can safely 599 * access anything in ilm except ilm_next (for safe access to 600 * ilm_next we'd have to take the ill_lock). 601 */ 602 if (ilm != NULL) 603 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 604 605 /* 606 * ilms are associated with ipifs in IPv4. It moves with the 607 * ipif if the ipif moves to a new ill when the interface 608 * fails. Thus we really don't check whether the ipif_ill 609 * has failed like in IPv6. If it has FAILED the ipif 610 * will move (daemon will move it) and hence the ilm, if the 611 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 612 * we continue to receive in the same place even if the 613 * interface fails. 614 */ 615 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 616 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 617 if (ilm == NULL) 618 return (ENOMEM); 619 620 if (group == INADDR_ANY) { 621 /* 622 * Check how many ipif's have members in this group - 623 * if more then one we should not tell the driver to join 624 * this time 625 */ 626 if (ilm_numentries_v6(ill, &v6group) > 1) 627 return (0); 628 if (ill->ill_group == NULL) 629 ret = ip_join_allmulti(ipif); 630 else 631 ret = ill_nominate_mcast_rcv(ill->ill_group); 632 if (ret != 0) 633 ilm_delete(ilm); 634 return (ret); 635 } 636 637 if (!IS_LOOPBACK(ill)) 638 igmp_joingroup(ilm); 639 640 if (ilm_numentries_v6(ill, &v6group) > 1) 641 return (0); 642 643 ret = ip_ll_addmulti_v6(ipif, &v6group); 644 if (ret != 0) 645 ilm_delete(ilm); 646 return (ret); 647 } 648 649 /* 650 * The unspecified address means all multicast addresses. 651 * This is only used by the multicast router. 652 * 653 * ill identifies the interface to join on; it may not match the 654 * interface requested by the application of a failover has taken 655 * place. orig_ifindex always identifies the interface requested 656 * by the app. 657 * 658 * ilgstat tells us if there's an ilg associated with this join, 659 * and if so, if it's a new ilg or a change to an existing one. 660 * ilg_fmode and ilg_flist give us the current filter state of 661 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 662 */ 663 int 664 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 665 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 666 slist_t *ilg_flist) 667 { 668 ilm_t *ilm; 669 int ret; 670 671 ASSERT(IAM_WRITER_ILL(ill)); 672 673 if (!IN6_IS_ADDR_MULTICAST(v6group) && 674 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 675 return (EINVAL); 676 } 677 678 /* 679 * An ilm is uniquely identified by the tuple of (group, ill, 680 * orig_ill). group is the multicast group address, ill is 681 * the interface on which it is currently joined, and orig_ill 682 * is the interface on which the application requested the 683 * join. orig_ill and ill are the same unless orig_ill has 684 * failed over. 685 * 686 * Both orig_ill and ill are required, which means we may have 687 * 2 ilms on an ill for the same group, but with different 688 * orig_ills. These must be kept separate, so that when failback 689 * occurs, the appropriate ilms are moved back to their orig_ill 690 * without disrupting memberships on the ill to which they had 691 * been moved. 692 * 693 * In order to track orig_ill, we store orig_ifindex in the 694 * ilm and ilg. 695 */ 696 mutex_enter(&ill->ill_lock); 697 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 698 mutex_exit(&ill->ill_lock); 699 if (ilm != NULL) 700 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 701 702 /* 703 * We need to remember where the application really wanted 704 * to join. This will be used later if we want to failback 705 * to the original interface. 706 */ 707 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 708 ilg_flist, orig_ifindex, zoneid); 709 if (ilm == NULL) 710 return (ENOMEM); 711 712 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 713 /* 714 * Check how many ipif's that have members in this group - 715 * if more then one we should not tell the driver to join 716 * this time 717 */ 718 if (ilm_numentries_v6(ill, v6group) > 1) 719 return (0); 720 if (ill->ill_group == NULL) 721 ret = ip_join_allmulti(ill->ill_ipif); 722 else 723 ret = ill_nominate_mcast_rcv(ill->ill_group); 724 725 if (ret != 0) 726 ilm_delete(ilm); 727 return (ret); 728 } 729 730 if (!IS_LOOPBACK(ill)) 731 mld_joingroup(ilm); 732 733 /* 734 * If we have more then one we should not tell the driver 735 * to join this time. 736 */ 737 if (ilm_numentries_v6(ill, v6group) > 1) 738 return (0); 739 740 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 741 if (ret != 0) 742 ilm_delete(ilm); 743 return (ret); 744 } 745 746 /* 747 * Send a multicast request to the driver for enabling multicast reception 748 * for v6groupp address. The caller has already checked whether it is 749 * appropriate to send one or not. 750 */ 751 int 752 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 753 { 754 mblk_t *mp; 755 uint32_t addrlen, addroff; 756 char group_buf[INET6_ADDRSTRLEN]; 757 758 ASSERT(IAM_WRITER_ILL(ill)); 759 760 /* 761 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 762 * on. 763 */ 764 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 765 &addrlen, &addroff); 766 if (!mp) 767 return (ENOMEM); 768 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 769 ipaddr_t v4group; 770 771 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 772 /* 773 * NOTE!!! 774 * The "addroff" passed in here was calculated by 775 * ill_create_dl(), and will be used by ill_create_squery() 776 * to perform some twisted coding magic. It is the offset 777 * into the dl_xxx_req of the hw addr. Here, it will be 778 * added to b_wptr - b_rptr to create a magic number that 779 * is not an offset into this squery mblk. 780 * The actual hardware address will be accessed only in the 781 * dl_xxx_req, not in the squery. More importantly, 782 * that hardware address can *only* be accessed in this 783 * mblk chain by calling mi_offset_param_c(), which uses 784 * the magic number in the squery hw offset field to go 785 * to the *next* mblk (the dl_xxx_req), subtract the 786 * (b_wptr - b_rptr), and find the actual offset into 787 * the dl_xxx_req. 788 * Any method that depends on using the 789 * offset field in the dl_disabmulti_req or squery 790 * to find either hardware address will similarly fail. 791 * 792 * Look in ar_entry_squery() in arp.c to see how this offset 793 * is used. 794 */ 795 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 796 if (!mp) 797 return (ENOMEM); 798 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 799 inet_ntop(AF_INET6, v6groupp, group_buf, 800 sizeof (group_buf)), 801 ill->ill_name)); 802 putnext(ill->ill_rq, mp); 803 } else { 804 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 805 " %s\n", 806 inet_ntop(AF_INET6, v6groupp, group_buf, 807 sizeof (group_buf)), 808 ill->ill_name)); 809 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 810 } 811 return (0); 812 } 813 814 /* 815 * Send a multicast request to the driver for enabling multicast 816 * membership for v6group if appropriate. 817 */ 818 static int 819 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 820 { 821 ill_t *ill = ipif->ipif_ill; 822 823 ASSERT(IAM_WRITER_IPIF(ipif)); 824 825 if (ill->ill_net_type != IRE_IF_RESOLVER || 826 ipif->ipif_flags & IPIF_POINTOPOINT) { 827 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 828 return (0); /* Must be IRE_IF_NORESOLVER */ 829 } 830 831 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 832 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 833 return (0); 834 } 835 if (!ill->ill_dl_up) { 836 /* 837 * Nobody there. All multicast addresses will be re-joined 838 * when we get the DL_BIND_ACK bringing the interface up. 839 */ 840 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 841 return (0); 842 } 843 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 844 } 845 846 /* 847 * INADDR_ANY means all multicast addresses. This is only used 848 * by the multicast router. 849 * INADDR_ANY is stored as the IPv6 unspecifed addr. 850 */ 851 int 852 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 853 { 854 ill_t *ill = ipif->ipif_ill; 855 ilm_t *ilm; 856 in6_addr_t v6group; 857 int ret; 858 859 ASSERT(IAM_WRITER_IPIF(ipif)); 860 861 if (!CLASSD(group) && group != INADDR_ANY) 862 return (EINVAL); 863 864 /* 865 * INADDR_ANY is represented as the IPv6 unspecifed addr. 866 */ 867 if (group == INADDR_ANY) 868 v6group = ipv6_all_zeros; 869 else 870 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 871 872 /* 873 * Look for a match on the ipif. 874 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 875 */ 876 mutex_enter(&ill->ill_lock); 877 ilm = ilm_lookup_ipif(ipif, group); 878 mutex_exit(&ill->ill_lock); 879 if (ilm == NULL) 880 return (ENOENT); 881 882 /* Update counters */ 883 if (no_ilg) 884 ilm->ilm_no_ilg_cnt--; 885 886 if (leaving) 887 ilm->ilm_refcnt--; 888 889 if (ilm->ilm_refcnt > 0) 890 return (ilm_update_del(ilm, B_FALSE)); 891 892 if (group == INADDR_ANY) { 893 ilm_delete(ilm); 894 /* 895 * Check how many ipif's that have members in this group - 896 * if there are still some left then don't tell the driver 897 * to drop it. 898 */ 899 if (ilm_numentries_v6(ill, &v6group) != 0) 900 return (0); 901 902 /* 903 * If we never joined, then don't leave. This can happen 904 * if we're in an IPMP group, since only one ill per IPMP 905 * group receives all multicast packets. 906 */ 907 if (!ill->ill_join_allmulti) { 908 ASSERT(ill->ill_group != NULL); 909 return (0); 910 } 911 912 ret = ip_leave_allmulti(ipif); 913 if (ill->ill_group != NULL) 914 (void) ill_nominate_mcast_rcv(ill->ill_group); 915 return (ret); 916 } 917 918 if (!IS_LOOPBACK(ill)) 919 igmp_leavegroup(ilm); 920 921 ilm_delete(ilm); 922 /* 923 * Check how many ipif's that have members in this group - 924 * if there are still some left then don't tell the driver 925 * to drop it. 926 */ 927 if (ilm_numentries_v6(ill, &v6group) != 0) 928 return (0); 929 return (ip_ll_delmulti_v6(ipif, &v6group)); 930 } 931 932 /* 933 * The unspecified address means all multicast addresses. 934 * This is only used by the multicast router. 935 */ 936 int 937 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 938 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 939 { 940 ipif_t *ipif; 941 ilm_t *ilm; 942 int ret; 943 944 ASSERT(IAM_WRITER_ILL(ill)); 945 946 if (!IN6_IS_ADDR_MULTICAST(v6group) && 947 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 948 return (EINVAL); 949 950 /* 951 * Look for a match on the ill. 952 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 953 * 954 * Similar to ip_addmulti_v6, we should always look using 955 * the orig_ifindex. 956 * 957 * 1) If orig_ifindex is different from ill's ifindex 958 * we should have an ilm with orig_ifindex created in 959 * ip_addmulti_v6. We should delete that here. 960 * 961 * 2) If orig_ifindex is same as ill's ifindex, we should 962 * not delete the ilm that is temporarily here because of 963 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 964 * different from ill's ifindex. 965 * 966 * Thus, always lookup using orig_ifindex. 967 */ 968 mutex_enter(&ill->ill_lock); 969 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 970 mutex_exit(&ill->ill_lock); 971 if (ilm == NULL) 972 return (ENOENT); 973 974 ASSERT(ilm->ilm_ill == ill); 975 976 ipif = ill->ill_ipif; 977 978 /* Update counters */ 979 if (no_ilg) 980 ilm->ilm_no_ilg_cnt--; 981 982 if (leaving) 983 ilm->ilm_refcnt--; 984 985 if (ilm->ilm_refcnt > 0) 986 return (ilm_update_del(ilm, B_TRUE)); 987 988 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 989 ilm_delete(ilm); 990 /* 991 * Check how many ipif's that have members in this group - 992 * if there are still some left then don't tell the driver 993 * to drop it. 994 */ 995 if (ilm_numentries_v6(ill, v6group) != 0) 996 return (0); 997 998 /* 999 * If we never joined, then don't leave. This can happen 1000 * if we're in an IPMP group, since only one ill per IPMP 1001 * group receives all multicast packets. 1002 */ 1003 if (!ill->ill_join_allmulti) { 1004 ASSERT(ill->ill_group != NULL); 1005 return (0); 1006 } 1007 1008 ret = ip_leave_allmulti(ipif); 1009 if (ill->ill_group != NULL) 1010 (void) ill_nominate_mcast_rcv(ill->ill_group); 1011 return (ret); 1012 } 1013 1014 if (!IS_LOOPBACK(ill)) 1015 mld_leavegroup(ilm); 1016 1017 ilm_delete(ilm); 1018 /* 1019 * Check how many ipif's that have members in this group - 1020 * if there are still some left then don't tell the driver 1021 * to drop it. 1022 */ 1023 if (ilm_numentries_v6(ill, v6group) != 0) 1024 return (0); 1025 return (ip_ll_delmulti_v6(ipif, v6group)); 1026 } 1027 1028 /* 1029 * Send a multicast request to the driver for disabling multicast reception 1030 * for v6groupp address. The caller has already checked whether it is 1031 * appropriate to send one or not. 1032 */ 1033 int 1034 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1035 { 1036 mblk_t *mp; 1037 char group_buf[INET6_ADDRSTRLEN]; 1038 uint32_t addrlen, addroff; 1039 1040 ASSERT(IAM_WRITER_ILL(ill)); 1041 /* 1042 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1043 * on. 1044 */ 1045 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1046 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1047 1048 if (!mp) 1049 return (ENOMEM); 1050 1051 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1052 ipaddr_t v4group; 1053 1054 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1055 /* 1056 * NOTE!!! 1057 * The "addroff" passed in here was calculated by 1058 * ill_create_dl(), and will be used by ill_create_squery() 1059 * to perform some twisted coding magic. It is the offset 1060 * into the dl_xxx_req of the hw addr. Here, it will be 1061 * added to b_wptr - b_rptr to create a magic number that 1062 * is not an offset into this mblk. 1063 * 1064 * Please see the comment in ip_ll_send)enabmulti_req() 1065 * for a complete explanation. 1066 * 1067 * Look in ar_entry_squery() in arp.c to see how this offset 1068 * is used. 1069 */ 1070 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1071 if (!mp) 1072 return (ENOMEM); 1073 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1074 inet_ntop(AF_INET6, v6groupp, group_buf, 1075 sizeof (group_buf)), 1076 ill->ill_name)); 1077 putnext(ill->ill_rq, mp); 1078 } else { 1079 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 1080 " %s\n", 1081 inet_ntop(AF_INET6, v6groupp, group_buf, 1082 sizeof (group_buf)), 1083 ill->ill_name)); 1084 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1085 } 1086 return (0); 1087 } 1088 1089 /* 1090 * Send a multicast request to the driver for disabling multicast 1091 * membership for v6group if appropriate. 1092 */ 1093 static int 1094 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1095 { 1096 ill_t *ill = ipif->ipif_ill; 1097 1098 ASSERT(IAM_WRITER_IPIF(ipif)); 1099 1100 if (ill->ill_net_type != IRE_IF_RESOLVER || 1101 ipif->ipif_flags & IPIF_POINTOPOINT) { 1102 return (0); /* Must be IRE_IF_NORESOLVER */ 1103 } 1104 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1105 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1106 return (0); 1107 } 1108 if (!ill->ill_dl_up) { 1109 /* 1110 * Nobody there. All multicast addresses will be re-joined 1111 * when we get the DL_BIND_ACK bringing the interface up. 1112 */ 1113