1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1676 jpk * Common Development and Distribution License (the "License"). 6 1676 jpk * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 8485 Peter * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel /* 26 0 stevel * Copyright (c) 1990 Mentat Inc. 27 0 stevel */ 28 0 stevel 29 0 stevel /* 30 0 stevel * This file contains the interface control functions for IPv6. 31 0 stevel */ 32 0 stevel 33 0 stevel #include <sys/types.h> 34 0 stevel #include <sys/sysmacros.h> 35 0 stevel #include <sys/stream.h> 36 0 stevel #include <sys/dlpi.h> 37 0 stevel #include <sys/stropts.h> 38 0 stevel #include <sys/ddi.h> 39 0 stevel #include <sys/cmn_err.h> 40 0 stevel #include <sys/kstat.h> 41 0 stevel #include <sys/debug.h> 42 0 stevel #include <sys/zone.h> 43 3448 dh155122 #include <sys/policy.h> 44 0 stevel 45 0 stevel #include <sys/systm.h> 46 0 stevel #include <sys/param.h> 47 0 stevel #include <sys/socket.h> 48 0 stevel #include <sys/isa_defs.h> 49 0 stevel #include <net/if.h> 50 0 stevel #include <net/if_dl.h> 51 0 stevel #include <net/route.h> 52 0 stevel #include <netinet/in.h> 53 0 stevel #include <netinet/igmp_var.h> 54 0 stevel #include <netinet/ip6.h> 55 0 stevel #include <netinet/icmp6.h> 56 0 stevel 57 0 stevel #include <inet/common.h> 58 0 stevel #include <inet/nd.h> 59 0 stevel #include <inet/mib2.h> 60 0 stevel #include <inet/ip.h> 61 0 stevel #include <inet/ip6.h> 62 0 stevel #include <inet/ip_multi.h> 63 0 stevel #include <inet/ip_ire.h> 64 0 stevel #include <inet/ip_rts.h> 65 0 stevel #include <inet/ip_ndp.h> 66 0 stevel #include <inet/ip_if.h> 67 0 stevel #include <inet/ip6_asp.h> 68 0 stevel #include <inet/ipclassifier.h> 69 0 stevel #include <inet/sctp_ip.h> 70 0 stevel 71 1676 jpk #include <sys/tsol/tndb.h> 72 1676 jpk #include <sys/tsol/tnet.h> 73 0 stevel 74 0 stevel static in6_addr_t ipv6_ll_template = 75 0 stevel {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0}; 76 0 stevel 77 0 stevel static ipif_t * 78 0 stevel ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 79 11042 Erik ip_stack_t *ipst); 80 11042 Erik 81 11042 Erik static int ipif_add_ires_v6(ipif_t *, boolean_t); 82 0 stevel 83 0 stevel /* 84 11042 Erik * This function is called when an application does not specify an interface 85 11042 Erik * to be used for multicast traffic. It calls ire_lookup_multi_v6() to look 86 4459 kcpoon * for an interface route for the specified multicast group. Doing 87 4459 kcpoon * this allows the administrator to add prefix routes for multicast to 88 4459 kcpoon * indicate which interface to be used for multicast traffic in the above 89 4459 kcpoon * scenario. The route could be for all multicast (ff00::/8), for a single 90 4459 kcpoon * multicast group (a /128 route) or anything in between. If there is no 91 4459 kcpoon * such multicast route, we just find any multicast capable interface and 92 4459 kcpoon * return it. 93 11042 Erik * 94 11042 Erik * We support MULTIRT and RTF_SETSRC on the multicast routes added to the 95 11042 Erik * unicast table. This is used by CGTP. 96 0 stevel */ 97 11042 Erik ill_t * 98 11042 Erik ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst, 99 11042 Erik boolean_t *multirtp, in6_addr_t *setsrcp) 100 0 stevel { 101 11042 Erik ill_t *ill; 102 0 stevel 103 11042 Erik ill = ire_lookup_multi_ill_v6(group, zoneid, ipst, multirtp, setsrcp); 104 11042 Erik if (ill != NULL) 105 11042 Erik return (ill); 106 4459 kcpoon 107 11042 Erik return (ill_lookup_multicast(ipst, zoneid, B_TRUE)); 108 0 stevel } 109 0 stevel 110 0 stevel /* 111 0 stevel * Look for an ipif with the specified interface address and destination. 112 0 stevel * The destination address is used only for matching point-to-point interfaces. 113 0 stevel */ 114 0 stevel static ipif_t * 115 0 stevel ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 116 11042 Erik ip_stack_t *ipst) 117 0 stevel { 118 0 stevel ipif_t *ipif; 119 0 stevel ill_t *ill; 120 0 stevel ill_walk_context_t ctx; 121 0 stevel 122 0 stevel /* 123 0 stevel * First match all the point-to-point interfaces 124 0 stevel * before looking at non-point-to-point interfaces. 125 0 stevel * This is done to avoid returning non-point-to-point 126 0 stevel * ipif instead of unnumbered point-to-point ipif. 127 0 stevel */ 128 3448 dh155122 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 129 3448 dh155122 ill = ILL_START_WALK_V6(&ctx, ipst); 130 0 stevel for (; ill != NULL; ill = ill_next(&ctx, ill)) { 131 0 stevel mutex_enter(&ill->ill_lock); 132 2733 nordmark for (ipif = ill->ill_ipif; ipif != NULL; 133 2733 nordmark ipif = ipif->ipif_next) { 134 0 stevel /* Allow the ipif to be down */ 135 0 stevel if ((ipif->ipif_flags & IPIF_POINTOPOINT) && 136 0 stevel (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 137 0 stevel if_addr)) && 138 0 stevel (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 139 0 stevel dst))) { 140 11042 Erik if (!IPIF_IS_CONDEMNED(ipif)) { 141 0 stevel ipif_refhold_locked(ipif); 142 0 stevel mutex_exit(&ill->ill_lock); 143 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 144 0 stevel return (ipif); 145 0 stevel } 146 0 stevel } 147 0 stevel } 148 0 stevel mutex_exit(&ill->ill_lock); 149 0 stevel } 150 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 151 0 stevel /* lookup the ipif based on interface address */ 152 11042 Erik ipif = ipif_lookup_addr_v6(if_addr, NULL, ALL_ZONES, ipst); 153 0 stevel ASSERT(ipif == NULL || ipif->ipif_isv6); 154 0 stevel return (ipif); 155 0 stevel } 156 0 stevel 157 0 stevel /* 158 8485 Peter * Common function for ipif_lookup_addr_v6() and ipif_lookup_addr_exact_v6(). 159 0 stevel */ 160 8485 Peter static ipif_t * 161 8485 Peter ipif_lookup_addr_common_v6(const in6_addr_t *addr, ill_t *match_ill, 162 11042 Erik uint32_t match_flags, zoneid_t zoneid, ip_stack_t *ipst) 163 0 stevel { 164 0 stevel ipif_t *ipif; 165 0 stevel ill_t *ill; 166 0 stevel boolean_t ptp = B_FALSE; 167 0 stevel ill_walk_context_t ctx; 168 11042 Erik boolean_t match_illgrp = (match_flags & IPIF_MATCH_ILLGRP); 169 11042 Erik boolean_t no_duplicate = (match_flags & IPIF_MATCH_NONDUP); 170 0 stevel 171 3448 dh155122 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 172 0 stevel /* 173 0 stevel * Repeat twice, first based on local addresses and 174 0 stevel * next time for pointopoint. 175 0 stevel */ 176 0 stevel repeat: 177 3448 dh155122 ill = ILL_START_WALK_V6(&ctx, ipst); 178 0 stevel for (; ill != NULL; ill = ill_next(&ctx, ill)) { 179 8485 Peter if (match_ill != NULL && ill != match_ill && 180 8485 Peter (!match_illgrp || !IS_IN_SAME_ILLGRP(ill, match_ill))) { 181 0 stevel continue; 182 0 stevel } 183 0 stevel mutex_enter(&ill->ill_lock); 184 2733 nordmark for (ipif = ill->ill_ipif; ipif != NULL; 185 2733 nordmark ipif = ipif->ipif_next) { 186 1676 jpk if (zoneid != ALL_ZONES && 187 1676 jpk ipif->ipif_zoneid != zoneid && 188 1676 jpk ipif->ipif_zoneid != ALL_ZONES) 189 0 stevel continue; 190 11042 Erik 191 11042 Erik if (no_duplicate && 192 11042 Erik !(ipif->ipif_flags & IPIF_UP)) { 193 11042 Erik continue; 194 11042 Erik } 195 11042 Erik 196 0 stevel /* Allow the ipif to be down */ 197 0 stevel if ((!ptp && (IN6_ARE_ADDR_EQUAL( 198 0 stevel &ipif->ipif_v6lcl_addr, addr) && 199 0 stevel (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 200 0 stevel (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 201 0 stevel IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 202 0 stevel addr))) { 203 11042 Erik if (!IPIF_IS_CONDEMNED(ipif)) { 204 0 stevel ipif_refhold_locked(ipif); 205 0 stevel mutex_exit(&ill->ill_lock); 206 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 207 0 stevel return (ipif); 208 0 stevel } 209 0 stevel } 210 0 stevel } 211 0 stevel mutex_exit(&ill->ill_lock); 212 0 stevel } 213 0 stevel 214 2733 nordmark /* If we already did the ptp case, then we are done */ 215 0 stevel if (ptp) { 216 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 217 0 stevel return (NULL); 218 2733 nordmark } 219 2733 nordmark ptp = B_TRUE; 220 2733 nordmark goto repeat; 221 2733 nordmark } 222 2733 nordmark 223 2733 nordmark /* 224 8485 Peter * Lookup an ipif with the specified address. For point-to-point links we 225 8485 Peter * look for matches on either the destination address or the local address, 226 8485 Peter * but we skip the local address check if IPIF_UNNUMBERED is set. If the 227 8485 Peter * `match_ill' argument is non-NULL, the lookup is restricted to that ill 228 8485 Peter * (or illgrp if `match_ill' is in an IPMP group). 229 8485 Peter */ 230 8485 Peter ipif_t * 231 8485 Peter ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid, 232 11042 Erik ip_stack_t *ipst) 233 8485 Peter { 234 11042 Erik return (ipif_lookup_addr_common_v6(addr, match_ill, IPIF_MATCH_ILLGRP, 235 11042 Erik zoneid, ipst)); 236 11042 Erik } 237 11042 Erik 238 11042 Erik /* 239 11042 Erik * Lookup an ipif with the specified address. Similar to ipif_lookup_addr, 240 11042 Erik * except that we will only return an address if it is not marked as 241 11042 Erik * IPIF_DUPLICATE 242 11042 Erik */ 243 11042 Erik ipif_t * 244 11042 Erik ipif_lookup_addr_nondup_v6(const in6_addr_t *addr, ill_t *match_ill, 245 11042 Erik zoneid_t zoneid, ip_stack_t *ipst) 246 11042 Erik { 247 11042 Erik return (ipif_lookup_addr_common_v6(addr, match_ill, 248 11042 Erik (IPIF_MATCH_ILLGRP | IPIF_MATCH_NONDUP), zoneid, 249 11042 Erik ipst)); 250 8485 Peter } 251 8485 Peter 252 8485 Peter /* 253 8485 Peter * Special abbreviated version of ipif_lookup_addr_v6() that doesn't match 254 8485 Peter * `match_ill' across the IPMP group. This function is only needed in some 255 8485 Peter * corner-cases; almost everything should use ipif_lookup_addr_v6(). 256 8485 Peter */ 257 8485 Peter ipif_t * 258 8485 Peter ipif_lookup_addr_exact_v6(const in6_addr_t *addr, ill_t *match_ill, 259 8485 Peter ip_stack_t *ipst) 260 8485 Peter { 261 8485 Peter ASSERT(match_ill != NULL); 262 11042 Erik return (ipif_lookup_addr_common_v6(addr, match_ill, 0, ALL_ZONES, 263 11042 Erik ipst)); 264 8485 Peter } 265 8485 Peter 266 8485 Peter /* 267 2733 nordmark * Look for an ipif with the specified address. For point-point links 268 2733 nordmark * we look for matches on either the destination address and the local 269 2733 nordmark * address, but we ignore the check on the local address if IPIF_UNNUMBERED 270 2733 nordmark * is set. 271 8485 Peter * If the `match_ill' argument is non-NULL, the lookup is restricted to that 272 8485 Peter * ill (or illgrp if `match_ill' is in an IPMP group). 273 2733 nordmark * Return the zoneid for the ipif. ALL_ZONES if none found. 274 2733 nordmark */ 275 2733 nordmark zoneid_t 276 3448 dh155122 ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill, 277 3448 dh155122 ip_stack_t *ipst) 278 2733 nordmark { 279 2733 nordmark ipif_t *ipif; 280 2733 nordmark ill_t *ill; 281 2733 nordmark boolean_t ptp = B_FALSE; 282 2733 nordmark ill_walk_context_t ctx; 283 2733 nordmark zoneid_t zoneid; 284 2733 nordmark 285 3448 dh155122 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 286 2733 nordmark /* 287 2733 nordmark * Repeat twice, first based on local addresses and 288 2733 nordmark * next time for pointopoint. 289 2733 nordmark */ 290 2733 nordmark repeat: 291 3448 dh155122 ill = ILL_START_WALK_V6(&ctx, ipst); 292 2733 nordmark for (; ill != NULL; ill = ill_next(&ctx, ill)) { 293 8485 Peter if (match_ill != NULL && ill != match_ill && 294 8485 Peter !IS_IN_SAME_ILLGRP(ill, match_ill)) { 295 2733 nordmark continue; 296 2733 nordmark } 297 2733 nordmark mutex_enter(&ill->ill_lock); 298 2733 nordmark for (ipif = ill->ill_ipif; ipif != NULL; 299 2733 nordmark ipif = ipif->ipif_next) { 300 2733 nordmark /* Allow the ipif to be down */ 301 2733 nordmark if ((!ptp && (IN6_ARE_ADDR_EQUAL( 302 2733 nordmark &ipif->ipif_v6lcl_addr, addr) && 303 2733 nordmark (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 304 2733 nordmark (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 305 2733 nordmark IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 306 2733 nordmark addr)) && 307 2733 nordmark !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 308 2733 nordmark zoneid = ipif->ipif_zoneid; 309 2733 nordmark mutex_exit(&ill->ill_lock); 310 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 311 2733 nordmark /* 312 2733 nordmark * If ipif_zoneid was ALL_ZONES then we have 313 2733 nordmark * a trusted extensions shared IP address. 314 2733 nordmark * In that case GLOBAL_ZONEID works to send. 315 2733 nordmark */ 316 2733 nordmark if (zoneid == ALL_ZONES) 317 2733 nordmark zoneid = GLOBAL_ZONEID; 318 2733 nordmark return (zoneid); 319 2733 nordmark } 320 2733 nordmark } 321 2733 nordmark mutex_exit(&ill->ill_lock); 322 2733 nordmark } 323 2733 nordmark 324 2733 nordmark /* If we already did the ptp case, then we are done */ 325 2733 nordmark if (ptp) { 326 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 327 2733 nordmark return (ALL_ZONES); 328 0 stevel } 329 0 stevel ptp = B_TRUE; 330 0 stevel goto repeat; 331 0 stevel } 332 0 stevel 333 0 stevel /* 334 0 stevel * Perform various checks to verify that an address would make sense as a local 335 0 stevel * interface address. This is currently only called when an attempt is made 336 0 stevel * to set a local address. 337 0 stevel * 338 0 stevel * Does not allow a v4-mapped address, an address that equals the subnet 339 0 stevel * anycast address, ... a multicast address, ... 340 0 stevel */ 341 0 stevel boolean_t 342 0 stevel ip_local_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 343 0 stevel { 344 0 stevel in6_addr_t subnet; 345 0 stevel 346 0 stevel if (IN6_IS_ADDR_UNSPECIFIED(addr)) 347 0 stevel return (B_TRUE); /* Allow all zeros */ 348 0 stevel 349 0 stevel /* 350 0 stevel * Don't allow all zeroes or host part, but allow 351 0 stevel * all ones netmask. 352 0 stevel */ 353 0 stevel V6_MASK_COPY(*addr, *subnet_mask, subnet); 354 0 stevel if (IN6_IS_ADDR_V4MAPPED(addr) || 355 0 stevel (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 356 0 stevel !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 357 0 stevel (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))) || 358 0 stevel IN6_IS_ADDR_MULTICAST(addr)) 359 0 stevel return (B_FALSE); 360 0 stevel 361 0 stevel return (B_TRUE); 362 0 stevel } 363 0 stevel 364 0 stevel /* 365 0 stevel * Perform various checks to verify that an address would make sense as a 366 0 stevel * remote/subnet interface address. 367 0 stevel */ 368 0 stevel boolean_t 369 0 stevel ip_remote_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 370 0 stevel { 371 0 stevel in6_addr_t subnet; 372 0 stevel 373 0 stevel if (IN6_IS_ADDR_UNSPECIFIED(addr)) 374 0 stevel return (B_TRUE); /* Allow all zeros */ 375 0 stevel 376 0 stevel V6_MASK_COPY(*addr, *subnet_mask, subnet); 377 0 stevel if (IN6_IS_ADDR_V4MAPPED(addr) || 378 0 stevel (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 379 0 stevel !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 380 0 stevel IN6_IS_ADDR_MULTICAST(addr) || 381 0 stevel (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr))))) 382 0 stevel return (B_FALSE); 383 0 stevel 384 0 stevel return (B_TRUE); 385 0 stevel } 386 0 stevel 387 0 stevel /* 388 0 stevel * ip_rt_add_v6 is called to add an IPv6 route to the forwarding table. 389 11042 Erik * ill is passed in to associate it with the correct interface 390 0 stevel * (for link-local destinations and gateways). 391 11042 Erik * If ire_arg is set, then we return the held IRE in that location. 392 0 stevel */ 393 0 stevel /* ARGSUSED1 */ 394 0 stevel int 395 0 stevel ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 396 0 stevel const in6_addr_t *gw_addr, const in6_addr_t *src_addr, int flags, 397 11042 Erik ill_t *ill, ire_t **ire_arg, struct rtsa_s *sp, ip_stack_t *ipst, 398 11042 Erik zoneid_t zoneid) 399 0 stevel { 400 11042 Erik ire_t *ire, *nire; 401 0 stevel ire_t *gw_ire = NULL; 402 0 stevel ipif_t *ipif; 403 0 stevel uint_t type; 404 0 stevel int match_flags = MATCH_IRE_TYPE; 405 1676 jpk tsol_gc_t *gc = NULL; 406 1676 jpk tsol_gcgrp_t *gcgrp = NULL; 407 1676 jpk boolean_t gcgrp_xtraref = B_FALSE; 408 0 stevel 409 0 stevel if (ire_arg != NULL) 410 0 stevel *ire_arg = NULL; 411 0 stevel 412 0 stevel /* 413 0 stevel * Prevent routes with a zero gateway from being created (since 414 0 stevel * interfaces can currently be plumbed and brought up with no assigned 415 0 stevel * address). 416 0 stevel */ 417 0 stevel if (IN6_IS_ADDR_UNSPECIFIED(gw_addr)) 418 0 stevel return (ENETUNREACH); 419 0 stevel 420 0 stevel /* 421 0 stevel * If this is the case of RTF_HOST being set, then we set the netmask 422 0 stevel * to all ones (regardless if one was supplied). 423 0 stevel */ 424 0 stevel if (flags & RTF_HOST) 425 0 stevel mask = &ipv6_all_ones; 426 0 stevel 427 0 stevel /* 428 0 stevel * Get the ipif, if any, corresponding to the gw_addr 429 11042 Erik * If -ifp was specified we restrict ourselves to the ill, otherwise 430 11042 Erik * we match on the gatway and destination to handle unnumbered pt-pt 431 11042 Erik * interfaces. 432 0 stevel */ 433 11042 Erik if (ill != NULL) 434 11042 Erik ipif = ipif_lookup_addr_v6(gw_addr, ill, ALL_ZONES, ipst); 435 11042 Erik else 436 11042 Erik ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, ipst); 437 11042 Erik if (ipif != NULL) { 438 11042 Erik if (IS_VNI(ipif->ipif_ill)) { 439 11042 Erik ipif_refrele(ipif); 440 11042 Erik return (EINVAL); 441 11042 Erik } 442 0 stevel } 443 0 stevel 444 0 stevel /* 445 0 stevel * GateD will attempt to create routes with a loopback interface 446 0 stevel * address as the gateway and with RTF_GATEWAY set. We allow 447 0 stevel * these routes to be added, but create them as interface routes 448 0 stevel * since the gateway is an interface address. 449 0 stevel */ 450 1822 rk129064 if ((ipif != NULL) && (ipif->ipif_ire_type == IRE_LOOPBACK)) { 451 0 stevel flags &= ~RTF_GATEWAY; 452 1822 rk129064 if (IN6_ARE_ADDR_EQUAL(gw_addr, &ipv6_loopback) && 453 1822 rk129064 IN6_ARE_ADDR_EQUAL(dst_addr, &ipv6_loopback) && 454 1822 rk129064 IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) { 455 11042 Erik ire = ire_ftable_lookup_v6(dst_addr, 0, 0, IRE_LOOPBACK, 456 11042 Erik NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 0, ipst, 457 11042 Erik NULL); 458 1822 rk129064 if (ire != NULL) { 459 1822 rk129064 ire_refrele(ire); 460 11042 Erik ipif_refrele(ipif); 461 1822 rk129064 return (EEXIST); 462 1822 rk129064 } 463 11042 Erik ip1dbg(("ip_rt_add_v6: 0x%p creating IRE 0x%x" 464 1822 rk129064 "for 0x%x\n", (void *)ipif, 465 1822 rk129064 ipif->ipif_ire_type, 466 1822 rk129064 ntohl(ipif->ipif_lcl_addr))); 467 1822 rk129064 ire = ire_create_v6( 468 1822 rk129064 dst_addr, 469 1822 rk129064 mask, 470 1822 rk129064 NULL, 471 11042 Erik ipif->ipif_ire_type, /* LOOPBACK */ 472 11042 Erik ipif->ipif_ill, 473 11042 Erik zoneid, 474 11042 Erik (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, 475 3448 dh155122 NULL, 476 3448 dh155122 ipst); 477 11042 Erik 478 1822 rk129064 if (ire == NULL) { 479 11042 Erik ipif_refrele(ipif); 480 1822 rk129064 return (ENOMEM); 481 1822 rk129064 } 482 11042 Erik /* src address assigned by the caller? */ 483 11042 Erik if ((flags & RTF_SETSRC) && 484 11042 Erik !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 485 11042 Erik ire->ire_setsrc_addr_v6 = *src_addr; 486 11042 Erik 487 11042 Erik nire = ire_add(ire); 488 11042 Erik if (nire == NULL) { 489 11042 Erik /* 490 11042 Erik * In the result of failure, ire_add() will have 491 11042 Erik * already deleted the ire in question, so there 492 11042 Erik * is no need to do that here. 493 11042 Erik */ 494 11042 Erik ipif_refrele(ipif); 495 11042 Erik return (ENOMEM); 496 11042 Erik } 497 1822 rk129064 /* 498 11042 Erik * Check if it was a duplicate entry. This handles 499 11042 Erik * the case of two racing route adds for the same route 500 1822 rk129064 */ 501 11042 Erik if (nire != ire) { 502 11042 Erik ASSERT(nire->ire_identical_ref > 1); 503 11042 Erik ire_delete(nire); 504 11042 Erik ire_refrele(nire); 505 1822 rk129064 ipif_refrele(ipif); 506 11042 Erik return (EEXIST); 507 11042 Erik } 508 11042 Erik ire = nire; 509 11042 Erik goto save_ire; 510 1822 rk129064 } 511 1822 rk129064 } 512 11042 Erik 513 11042 Erik /* 514 11042 Erik * The routes for multicast with CGTP are quite special in that 515 11042 Erik * the gateway is the local interface address, yet RTF_GATEWAY 516 11042 Erik * is set. We turn off RTF_GATEWAY to provide compatibility with 517 11042 Erik * this undocumented and unusual use of multicast routes. 518 11042 Erik */ 519 11042 Erik if ((flags & RTF_MULTIRT) && ipif != NULL) 520 11042 Erik flags &= ~RTF_GATEWAY; 521 0 stevel 522 0 stevel /* 523 0 stevel * Traditionally, interface routes are ones where RTF_GATEWAY isn't set 524 0 stevel * and the gateway address provided is one of the system's interface 525 0 stevel * addresses. By using the routing socket interface and supplying an 526 0 stevel * RTA_IFP sockaddr with an interface index, an alternate method of 527 0 stevel * specifying an interface route to be created is available which uses 528 0 stevel * the interface index that specifies the outgoing interface rather than 529 0 stevel * the address of an outgoing interface (which may not be able to 530 0 stevel * uniquely identify an interface). When coupled with the RTF_GATEWAY 531 0 stevel * flag, routes can be specified which not only specify the next-hop to 532 0 stevel * be used when routing to a certain prefix, but also which outgoing 533 0 stevel * interface should be used. 534 0 stevel * 535 0 stevel * Previously, interfaces would have unique addresses assigned to them 536 0 stevel * and so the address assigned to a particular interface could be used 537 0 stevel * to identify a particular interface. One exception to this was the 538 0 stevel * case of an unnumbered interface (where IPIF_UNNUMBERED was set). 539 0 stevel * 540 0 stevel * With the advent of IPv6 and its link-local addresses, this 541 0 stevel * restriction was relaxed and interfaces could share addresses between 542 0 stevel * themselves. In fact, typically all of the link-local interfaces on 543 0 stevel * an IPv6 node or router will have the same link-local address. In 544 0 stevel * order to differentiate between these interfaces, the use of an 545 0 stevel * interface index is necessary and this index can be carried inside a 546 0 stevel * RTA_IFP sockaddr (which is actually a sockaddr_dl). One restriction 547 0 stevel * of using the interface index, however, is that all of the ipif's that 548 0 stevel * are part of an ill have the same index and so the RTA_IFP sockaddr 549 0 stevel * cannot be used to differentiate between ipif's (or logical 550 0 stevel * interfaces) that belong to the same ill (physical interface). 551 0 stevel * 552 0 stevel * For example, in the following case involving IPv4 interfaces and 553 0 stevel * logical interfaces 554 0 stevel * 555 0 stevel * 192.0.2.32 255.255.255.224 192.0.2.33 U if0 556 11042 Erik * 192.0.2.32 255.255.255.224 192.0.2.34 U if0 557 11042 Erik * 192.0.2.32 255.255.255.224 192.0.2.35 U if0 558 0 stevel * 559 0 stevel * the ipif's corresponding to each of these interface routes can be 560 0 stevel * uniquely identified by the "gateway" (actually interface address). 561 0 stevel * 562 0 stevel * In this case involving multiple IPv6 default routes to a particular 563 0 stevel * link-local gateway, the use of RTA_IFP is necessary to specify which 564 0 stevel * default route is of interest: 565 0 stevel * 566 0 stevel * default fe80::123:4567:89ab:cdef U if0 567 0 stevel * default fe80::123:4567:89ab:cdef U if1 568 0 stevel */ 569 0 stevel 570 0 stevel /* RTF_GATEWAY not set */ 571 0 stevel if (!(flags & RTF_GATEWAY)) { 572 1676 jpk if (sp != NULL) { 573 1676 jpk ip2dbg(("ip_rt_add_v6: gateway security attributes " 574 1676 jpk "cannot be set with interface route\n")); 575 11042 Erik if (ipif != NULL) 576 1676 jpk ipif_refrele(ipif); 577 1676 jpk return (EINVAL); 578 1676 jpk } 579 1676 jpk 580 0 stevel /* 581 11042 Erik * Whether or not ill (RTA_IFP) is set, we require that 582 11042 Erik * the gateway is one of our local addresses. 583 0 stevel */ 584 11042 Erik if (ipif == NULL) 585 11042 Erik return (ENETUNREACH); 586 11042 Erik 587 11042 Erik /* 588 11042 Erik * We use MATCH_IRE_ILL here. If the caller specified an 589 11042 Erik * interface (from the RTA_IFP sockaddr) we use it, otherwise 590 11042 Erik * we use the ill derived from the gateway address. 591 11042 Erik * We can always match the gateway address since we record it 592 11042 Erik * in ire_gateway_addr. 593 11042 Erik * We don't allow RTA_IFP to specify a different ill than the 594 11042 Erik * one matching the ipif to make sure we can delete the route. 595 11042 Erik */ 596 11042 Erik match_flags |= MATCH_IRE_GW | MATCH_IRE_ILL; 597 11042 Erik if (ill == NULL) { 598 11042 Erik ill = ipif->ipif_ill; 599 11042 Erik } else if (ill != ipif->ipif_ill) { 600 11042 Erik ipif_refrele(ipif); 601 11042 Erik return (EINVAL); 602 0 stevel } 603 0 stevel 604 0 stevel /* 605 0 stevel * We check for an existing entry at this point. 606 0 stevel */ 607 0 stevel match_flags |= MATCH_IRE_MASK; 608 11042 Erik ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, 609 11042 Erik IRE_INTERFACE, ill, ALL_ZONES, NULL, match_flags, 0, ipst, 610 11042 Erik NULL); 611 0 stevel if (ire != NULL) { 612 0 stevel ire_refrele(ire); 613 11042 Erik ipif_refrele(ipif); 614 0 stevel return (EEXIST); 615 0 stevel } 616 0 stevel 617 0 stevel /* 618 0 stevel * Create a copy of the IRE_LOOPBACK, IRE_IF_NORESOLVER or 619 11042 Erik * IRE_IF_RESOLVER with the modified address, netmask, and 620 11042 Erik * gateway. 621 0 stevel */ 622 0 stevel ire = ire_create_v6( 623 0 stevel dst_addr, 624 0 stevel mask, 625 11042 Erik gw_addr, 626 11042 Erik ill->ill_net_type, 627 11042 Erik ill, 628 11042 Erik zoneid, 629 0 stevel flags, 630 3448 dh155122 NULL, 631 3448 dh155122 ipst); 632 0 stevel if (ire == NULL) { 633 11042 Erik ipif_refrele(ipif); 634 0 stevel return (ENOMEM); 635 0 stevel } 636 0 stevel 637 0 stevel /* 638 0 stevel * Some software (for example, GateD and Sun Cluster) attempts 639 0 stevel * to create (what amount to) IRE_PREFIX routes with the 640 0 stevel * loopback address as the gateway. This is primarily done to 641 0 stevel * set up prefixes with the RTF_REJECT flag set (for example, 642 5907 ja97890 * when generating aggregate routes). We also OR in the 643 5907 ja97890 * RTF_BLACKHOLE flag as these interface routes, by 644 5907 ja97890 * definition, can only be that. 645 0 stevel * 646 11042 Erik * If the IRE type (as defined by ill->ill_net_type) is 647 0 stevel * IRE_LOOPBACK, then we map the request into a 648 0 stevel * IRE_IF_NORESOLVER. 649 0 stevel * 650 0 stevel * Needless to say, the real IRE_LOOPBACK is NOT created by this 651 0 stevel * routine, but rather using ire_create_v6() directly. 652 0 stevel */ 653 11042 Erik if (ill->ill_net_type == IRE_LOOPBACK) { 654 0 stevel ire->ire_type = IRE_IF_NORESOLVER; 655 5907 ja97890 ire->ire_flags |= RTF_BLACKHOLE; 656 5907 ja97890 } 657 11042 Erik /* src address assigned by the caller? */ 658 11042 Erik if ((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 659 11042 Erik ire->ire_setsrc_addr_v6 = *src_addr; 660 11042 Erik 661 11042 Erik nire = ire_add(ire); 662 11042 Erik if (nire == NULL) { 663 11042 Erik /* 664 11042 Erik * In the result of failure, ire_add() will have 665 11042 Erik * already deleted the ire in question, so there 666 11042 Erik * is no need to do that here. 667 11042 Erik */ 668 11042 Erik ipif_refrele(ipif); 669 11042 Erik return (ENOMEM); 670 11042 Erik } 671 0 stevel /* 672 11042 Erik * Check if it was a duplicate entry. This handles 673 11042 Erik * the case of two racing route adds for the same route 674 0 stevel */ 675 11042 Erik if (nire != ire) { 676 11042 Erik ASSERT(nire->ire_identical_ref > 1); 677 11042 Erik ire_delete(nire); 678 11042 Erik ire_refrele(nire); 679 0 stevel ipif_refrele(ipif); 680 11042 Erik return (EEXIST); 681 11042 Erik } 682 11042 Erik ire = nire; 683 11042 Erik goto save_ire; 684 0 stevel } 685 0 stevel 686 0 stevel /* 687 0 stevel * Get an interface IRE for the specified gateway. 688 0 stevel * If we don't have an IRE_IF_NORESOLVER or IRE_IF_RESOLVER for the 689 0 stevel * gateway, it is currently unreachable and we fail the request 690 0 stevel * accordingly. 691 11042 Erik * If RTA_IFP was specified we look on that particular ill. 692 0 stevel */ 693 11042 Erik if (ill != NULL) 694 0 stevel match_flags |= MATCH_IRE_ILL; 695 11042 Erik 696 11042 Erik /* Check whether the gateway is reachable. */ 697 11042 Erik type = IRE_INTERFACE; 698 11042 Erik if (flags & RTF_INDIRECT) 699 11042 Erik type |= IRE_OFFLINK; 700 11042 Erik 701 11042 Erik gw_ire = ire_ftable_lookup_v6(gw_addr, 0, 0, type, ill, 702 11042 Erik ALL_ZONES, NULL, match_flags, 0, ipst, NULL); 703 11042 Erik if (gw_ire == NULL) { 704 11042 Erik if (ipif != NULL) 705 11042 Erik ipif_refrele(ipif); 706 0 stevel return (ENETUNREACH); 707 11042 Erik } 708 0 stevel 709 0 stevel /* 710 0 stevel * We create one of three types of IREs as a result of this request 711 0 stevel * based on the netmask. A netmask of all ones (which is automatically 712 0 stevel * assumed when RTF_HOST is set) results in an IRE_HOST being created. 713 0 stevel * An all zeroes netmask implies a default route so an IRE_DEFAULT is 714 0 stevel * created. Otherwise, an IRE_PREFIX route is created for the 715 0 stevel * destination prefix. 716 0 stevel */ 717 0 stevel if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 718 0 stevel type = IRE_HOST; 719 0 stevel else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 720 0 stevel type = IRE_DEFAULT; 721 0 stevel else 722 0 stevel type = IRE_PREFIX; 723 0 stevel 724 0 stevel /* check for a duplicate entry */ 725 11042 Erik ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, ill, 726 11042 Erik ALL_ZONES, NULL, 727 11042 Erik match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, 0, ipst, NULL); 728 0 stevel if (ire != NULL) { 729 11042 Erik if (ipif != NULL) 730 11042 Erik ipif_refrele(ipif); 731 0 stevel ire_refrele(gw_ire); 732 0 stevel ire_refrele(ire); 733 0 stevel return (EEXIST); 734 1676 jpk } 735 1676 jpk 736 1676 jpk /* Security attribute exists */ 737 1676 jpk if (sp != NULL) { 738 1676 jpk tsol_gcgrp_addr_t ga; 739 1676 jpk 740 1676 jpk /* find or create the gateway credentials group */ 741 1676 jpk ga.ga_af = AF_INET6; 742 1676 jpk ga.ga_addr = *gw_addr; 743 1676 jpk 744 1676 jpk /* we hold reference to it upon success */ 745 1676 jpk gcgrp = gcgrp_lookup(&ga, B_TRUE); 746 1676 jpk if (gcgrp == NULL) { 747 11042 Erik if (ipif != NULL) 748 11042 Erik ipif_refrele(ipif); 749 1676 jpk ire_refrele(gw_ire); 750 1676 jpk return (ENOMEM); 751 1676 jpk } 752 1676 jpk 753 1676 jpk /* 754 1676 jpk * Create and add the security attribute to the group; a 755 1676 jpk * reference to the group is made upon allocating a new 756 1676 jpk * entry successfully. If it finds an already-existing 757 1676 jpk * entry for the security attribute in the group, it simply 758 1676 jpk * returns it and no new reference is made to the group. 759 1676 jpk */ 760 1676 jpk gc = gc_create(sp, gcgrp, &gcgrp_xtraref); 761 1676 jpk if (gc == NULL) { 762 1676 jpk /* release reference held by gcgrp_lookup */ 763 1676 jpk GCGRP_REFRELE(gcgrp); 764 11042 Erik if (ipif != NULL) 765 11042 Erik ipif_refrele(ipif); 766 1676 jpk ire_refrele(gw_ire); 767 1676 jpk return (ENOMEM); 768 1676 jpk } 769 0 stevel } 770 0 stevel 771 0 stevel /* Create the IRE. */ 772 0 stevel ire = ire_create_v6( 773 0 stevel dst_addr, /* dest address */ 774 0 stevel mask, /* mask */ 775 0 stevel gw_addr, /* gateway address */ 776 0 stevel (ushort_t)type, /* IRE type */ 777 11042 Erik ill, 778 11042 Erik zoneid, 779 0 stevel flags, 780 1676 jpk gc, /* security attribute */ 781 3448 dh155122 ipst); 782 3448 dh155122 783 1676 jpk /* 784 1676 jpk * The ire holds a reference to the 'gc' and the 'gc' holds a 785 1676 jpk * reference to the 'gcgrp'. We can now release the extra reference 786 1676 jpk * the 'gcgrp' acquired in the gcgrp_lookup, if it was not used. 787 1676 jpk */ 788 1676 jpk if (gcgrp_xtraref) 789 1676 jpk GCGRP_REFRELE(gcgrp); 790 0 stevel if (ire == NULL) { 791 1676 jpk if (gc != NULL) 792 1676 jpk GC_REFRELE(gc); 793 11042 Erik if (ipif != NULL) 794 11042 Erik ipif_refrele(ipif); 795 0 stevel ire_refrele(gw_ire); 796 0 stevel return (ENOMEM); 797 0 stevel } 798 11042 Erik 799 11042 Erik /* src address assigned by the caller? */ 800 11042 Erik if ((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 801 11042 Erik ire->ire_setsrc_addr_v6 = *src_addr; 802 0 stevel 803 0 stevel /* 804 0 stevel * POLICY: should we allow an RTF_HOST with address INADDR_ANY? 805 0 stevel * SUN/OS socket stuff does but do we really want to allow ::0 ? 806 0 stevel */ 807 0 stevel 808 0 stevel /* Add the new IRE. */ 809 11042 Erik nire = ire_add(ire); 810 11042 Erik if (nire == NULL) { 811 11042 Erik /* 812 11042 Erik * In the result of failure, ire_add() will have 813 11042 Erik * already deleted the ire in question, so there 814 11042 Erik * is no need to do that here. 815 11042 Erik */ 816 11042 Erik if (ipif != NULL) 817 11042 Erik ipif_refrele(ipif); 818 11042 Erik ire_refrele(gw_ire); 819 11042 Erik return (ENOMEM); 820 11042 Erik } 821 0 stevel /* 822 11042 Erik * Check if it was a duplicate entry. This handles 823 11042 Erik * the case of two racing route adds for the same route 824 0 stevel */ 825 11042 Erik if (nire != ire) { 826 11042 Erik ASSERT(nire->ire_identical_ref > 1); 827 11042 Erik ire_delete(nire); 828 11042 Erik ire_refrele(nire); 829 11042 Erik if (ipif != NULL) 830 11042 Erik ipif_refrele(ipif); 831 0 stevel ire_refrele(gw_ire); 832 11042 Erik return (EEXIST); 833 0 stevel } 834 11042 Erik ire = nire; 835 0 stevel 836 0 stevel if (flags & RTF_MULTIRT) { 837 0 stevel /* 838 0 stevel * Invoke the CGTP (multirouting) filtering module 839 0 stevel * to add the dst address in the filtering database. 840 0 stevel * Replicated inbound packets coming from that address 841 0 stevel * will be filtered to discard the duplicates. 842 0 stevel * It is not necessary to call the CGTP filter hook 843 0 stevel * when the dst address is a multicast, because an 844 0 stevel * IP source address cannot be a multicast. 845 0 stevel */ 846 4961 nordmark if (ipst->ips_ip_cgtp_filter_ops != NULL && 847 0 stevel !IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))) { 848 4961 nordmark int res; 849 11042 Erik ipif_t *src_ipif; 850 4961 nordmark 851 11042 Erik /* Find the source address corresponding to gw_ire */ 852 11042 Erik src_ipif = ipif_lookup_addr_v6( 853 11042 Erik &gw_ire->ire_gateway_addr_v6, NULL, zoneid, ipst); 854 11042 Erik if (src_ipif != NULL) { 855 11042 Erik res = ipst->ips_ip_cgtp_filter_ops-> 856 11042 Erik cfo_add_dest_v6( 857 11042 Erik ipst->ips_netstack->netstack_stackid, 858 11042 Erik &ire->ire_addr_v6, 859 11042 Erik &ire->ire_gateway_addr_v6, 860 11042 Erik &ire->ire_setsrc_addr_v6, 861 11042 Erik &src_ipif->ipif_v6lcl_addr); 862 11042 Erik ipif_refrele(src_ipif); 863 11042 Erik } else { 864 11042 Erik res = EADDRNOTAVAIL; 865 11042 Erik } 866 0 stevel if (res != 0) { 867 11042 Erik if (ipif != NULL) 868 11042 Erik ipif_refrele(ipif); 869 0 stevel ire_refrele(gw_ire); 870 0 stevel ire_delete(ire); 871 11042 Erik ire_refrele(ire); /* Held in ire_add */ 872 0 stevel return (res); 873 0 stevel } 874 0 stevel } 875 1676 jpk } 876 1676 jpk 877 0 stevel save_ire: 878 0 stevel if (gw_ire != NULL) { 879 0 stevel ire_refrele(gw_ire); 880 11042 Erik gw_ire = NULL; 881 0 stevel } 882 11042 Erik if (ire->ire_ill != NULL) { 883 0 stevel /* 884 0 stevel * Save enough information so that we can recreate the IRE if 885 11042 Erik * the ILL goes down and then up. The metrics associated 886 0 stevel * with the route will be saved as well when rts_setmetrics() is 887 0 stevel * called after the IRE has been created. In the case where 888 0 stevel * memory cannot be allocated, none of this information will be 889 0 stevel * saved. 890 0 stevel */ 891 11042 Erik ill_save_ire(ire->ire_ill, ire); 892 11042 Erik } 893 0 stevel 894 0 stevel if (ire_arg != NULL) { 895 0 stevel /* 896 0 stevel * Store the ire that was successfully added into where ire_arg 897 0 stevel * points to so that callers don't have to look it up 898 0 stevel * themselves (but they are responsible for ire_refrele()ing 899 0 stevel * the ire when they are finished with it). 900 0 stevel */ 901 0 stevel *ire_arg = ire; 902 0 stevel } else { 903 0 stevel ire_refrele(ire); /* Held in ire_add */ 904 0 stevel } 905 11042 Erik if (ipif != NULL) 906 0 stevel ipif_refrele(ipif); 907 0 stevel return (0); 908 0 stevel } 909 0 stevel 910 0 stevel /* 911 0 stevel * ip_rt_delete_v6 is called to delete an IPv6 route. 912 11042 Erik * ill is passed in to associate it with the correct interface. 913 0 stevel * (for link-local destinations and gateways). 914 0 stevel */ 915 0 stevel /* ARGSUSED4 */ 916 0 stevel int 917 0 stevel ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 918 11042 Erik const in6_addr_t *gw_addr, uint_t rtm_addrs, int flags, ill_t *ill, 919 11042 Erik ip_stack_t *ipst, zoneid_t zoneid) 920 0 stevel { 921 0 stevel ire_t *ire = NULL; 922 0 stevel ipif_t *ipif; 923 0 stevel uint_t type; 924 0 stevel uint_t match_flags = MATCH_IRE_TYPE; 925 0 stevel int err = 0; 926 0 stevel 927 0 stevel /* 928 0 stevel * If this is the case of RTF_HOST being set, then we set the netmask 929 0 stevel * to all ones. Otherwise, we use the netmask if one was supplied. 930 0 stevel */ 931 0 stevel if (flags & RTF_HOST) { 932 0 stevel mask = &ipv6_all_ones; 933 0 stevel match_flags |= MATCH_IRE_MASK; 934 0 stevel } else if (rtm_addrs & RTA_NETMASK) { 935 0 stevel match_flags |= MATCH_IRE_MASK; 936 0 stevel } 937 0 stevel 938 0 stevel /* 939 0 stevel * Note that RTF_GATEWAY is never set on a delete, therefore 940 0 stevel * we check if the gateway address is one of our interfaces first, 941 0 stevel * and fall back on RTF_GATEWAY routes. 942 0 stevel * 943 0 stevel * This makes it possible to delete an original 944 0 stevel * IRE_IF_NORESOLVER/IRE_IF_RESOLVER - consistent with SunOS 4.1. 945 11042 Erik * However, we have RTF_KERNEL set on the ones created by ipif_up 946 11042 Erik * and those can not be deleted here. 947 0 stevel * 948 11042 Erik * We use MATCH_IRE_ILL if we know the interface. If the caller 949 11042 Erik * specified an interface (from the RTA_IFP sockaddr) we use it, 950 11042 Erik * otherwise we use the ill derived from the gateway address. 951 11042 Erik * We can always match the gateway address since we record it 952 11042 Erik * in ire_gateway_addr. 953 0 stevel * 954 0 stevel * For more detail on specifying routes by gateway address and by 955 0 stevel * interface index, see the comments in ip_rt_add_v6(). 956 0 stevel */ 957 11042 Erik ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, ipst); 958 0 stevel if (ipif != NULL) { 959 11042 Erik ill_t *ill_match; 960 11042 Erik 961 11042 Erik if (ill != NULL) 962 11042 Erik ill_match = ill; 963 11042 Erik else 964 11042 Erik ill_match = ipif->ipif_ill; 965 11042 Erik 966 11042 Erik match_flags |= MATCH_IRE_ILL; 967 11042 Erik if (ipif->ipif_ire_type == IRE_LOOPBACK) { 968 11042 Erik ire = ire_ftable_lookup_v6(dst_addr, 0, 0, IRE_LOOPBACK, 969 11042 Erik ill_match, ALL_ZONES, NULL, match_flags, 0, ipst, 970 11042 Erik NULL); 971 11042 Erik } 972 11042 Erik if (ire == NULL) { 973 11042 Erik match_flags |= MATCH_IRE_GW; 974 11042 Erik ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, 975 11042 Erik IRE_INTERFACE, ill_match, ALL_ZONES, NULL, 976 11042 Erik match_flags, 0, ipst, NULL); 977 11042 Erik } 978 11042 Erik /* Avoid deleting routes created by kernel from an ipif */ 979 11042 Erik if (ire != NULL && (ire->ire_flags & RTF_KERNEL)) { 980 11042 Erik ire_refrele(ire); 981 11042 Erik ire = NULL; 982 0 stevel } 983 0 stevel 984 11042 Erik /* Restore in case we didn't find a match */ 985 11042 Erik match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_ILL); 986 0 stevel } 987 11042 Erik 988 0 stevel if (ire == NULL) { 989 0 stevel /* 990 0 stevel * At this point, the gateway address is not one of our own 991 0 stevel * addresses or a matching interface route was not found. We 992 0 stevel * set the IRE type to lookup based on whether 993 0 stevel * this is a host route, a default route or just a prefix. 994 0 stevel * 995 11042 Erik * If an ill was passed in, then the lookup is based on an 996 0 stevel * interface index so MATCH_IRE_ILL is added to match_flags. 997 0 stevel */ 998 0 stevel match_flags |= MATCH_IRE_GW; 999 11042 Erik if (ill != NULL) 1000 0 stevel match_flags |= MATCH_IRE_ILL; 1001 0 stevel if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 1002 0 stevel type = IRE_HOST; 1003 0 stevel else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 1004 0 stevel type = IRE_DEFAULT; 1005 0 stevel else 1006 0 stevel type = IRE_PREFIX; 1007 0 stevel ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, 1008 11042 Erik ill, ALL_ZONES, NULL, match_flags, 0, ipst, NULL); 1009 0 stevel } 1010 0 stevel 1011 11042 Erik if (ipif != NULL) { 1012 0 stevel ipif_refrele(ipif); 1013 11042 Erik ipif = NULL; 1014 0 stevel } 1015 0 stevel if (ire == NULL) 1016 0 stevel return (ESRCH); 1017 0 stevel 1018 0 stevel if (ire->ire_flags & RTF_MULTIRT) { 1019 0 stevel /* 1020 0 stevel * Invoke the CGTP (multirouting) filtering module 1021 0 stevel * to remove the dst address from the filtering database. 1022 0 stevel * Packets coming from that address will no longer be 1023 0 stevel * filtered to remove duplicates. 1024 0 stevel */ 1025 4961 nordmark if (ipst->ips_ip_cgtp_filter_ops != NULL) { 1026 4961 nordmark err = ipst->ips_ip_cgtp_filter_ops->cfo_del_dest_v6( 1027 4961 nordmark ipst->ips_netstack->netstack_stackid, 1028 0 stevel &ire->ire_addr_v6, &ire->ire_gateway_addr_v6); 1029 0 stevel } 1030 0 stevel } 1031 0 stevel 1032 11042 Erik ill = ire->ire_ill; 1033 11042 Erik if (ill != NULL) 1034 11042 Erik ill_remove_saved_ire(ill, ire); 1035 0 stevel ire_delete(ire); 1036 0 stevel ire_refrele(ire); 1037 0 stevel return (err); 1038 0 stevel } 1039 0 stevel 1040 0 stevel /* 1041 10616 Sebastien * Derive an interface id from the link layer address. 1042 0 stevel */ 1043 10616 Sebastien void 1044 0 stevel ill_setdefaulttoken(ill_t *ill) 1045 0 stevel { 1046 10773 Sebastien if (!ill->ill_manual_token) { 1047 10616 Sebastien bzero(&ill->ill_token, sizeof (ill->ill_token)); 1048 10616 Sebastien MEDIA_V6INTFID(ill->ill_media, ill, &ill->ill_token); 1049 10616 Sebastien ill->ill_token_length = IPV6_TOKEN_LEN; 1050 10616 Sebastien } 1051 10616 Sebastien } 1052 0 stevel 1053 10616 Sebastien void 1054 10616 Sebastien ill_setdesttoken(ill_t *ill) 1055 10616 Sebastien { 1056 10616 Sebastien bzero(&ill->ill_dest_token, sizeof (ill->ill_dest_token)); 1057 10616 Sebastien MEDIA_V6DESTINTFID(ill->ill_media, ill, &ill->ill_dest_token); 1058 0 stevel } 1059 0 stevel 1060 0 stevel /* 1061 0 stevel * Create a link-local address from a token. 1062 0 stevel */ 1063 0 stevel static void 1064 0 stevel ipif_get_linklocal(in6_addr_t *dest, const in6_addr_t *token) 1065 0 stevel { 1066 0 stevel int i; 1067 0 stevel 1068 0 stevel for (i = 0; i < 4; i++) { 1069 0 stevel dest->s6_addr32[i] = 1070 0 stevel token->s6_addr32[i] | ipv6_ll_template.s6_addr32[i]; 1071 0 stevel } 1072 0 stevel } 1073 0 stevel 1074 0 stevel /* 1075 10616 Sebastien * Set a default IPv6 address for a 6to4 tunnel interface 2002:<tsrc>::1/16 1076 0 stevel */ 1077 0 stevel static void 1078 10616 Sebastien ipif_set6to4addr(ipif_t *ipif) 1079 0 stevel { 1080 10616 Sebastien ill_t *ill = ipif->ipif_ill; 1081 10616 Sebastien struct in_addr v4phys; 1082 0 stevel 1083 10616 Sebastien ASSERT(ill->ill_mactype == DL_6TO4); 1084 10616 Sebastien ASSERT(ill->ill_phys_addr_length == sizeof (struct in_addr)); 1085 10616 Sebastien ASSERT(ipif->ipif_isv6); 1086 10616 Sebastien 1087 10616 Sebastien if (ipif->ipif_flags & IPIF_UP) 1088 0 stevel return; 1089 0 stevel 1090 10616 Sebastien (void) ip_plen_to_mask_v6(16, &ipif->ipif_v6net_mask); 1091 10616 Sebastien bcopy(ill->ill_phys_addr, &v4phys, sizeof (struct in_addr)); 1092 10616 Sebastien IN6_V4ADDR_TO_6TO4(&v4phys, &ipif->ipif_v6lcl_addr); 1093 10616 Sebastien V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1094 10616 Sebastien ipif->ipif_v6subnet); 1095 0 stevel } 1096 0 stevel 1097 0 stevel /* 1098 0 stevel * Is it not possible to set the link local address? 1099 0 stevel * The address can be set if the token is set, and the token 1100 0 stevel * isn't too long. 1101 0 stevel * Return B_TRUE if the address can't be set, or B_FALSE if it can. 1102 0 stevel */ 1103 0 stevel boolean_t 1104 0 stevel ipif_cant_setlinklocal(ipif_t *ipif) 1105 0 stevel { 1106 0 stevel ill_t *ill = ipif->ipif_ill; 1107 0 stevel 1108 0 stevel if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token) || 1109 0 stevel ill->ill_token_length > IPV6_ABITS - IPV6_LL_PREFIXLEN) 1110 0 stevel return (B_TRUE); 1111 0 stevel 1112 0 stevel return (B_FALSE); 1113 0 stevel } 1114 0 stevel 1115 0 stevel /* 1116 0 stevel * Generate a link-local address from the token. 1117 0 stevel */ 1118 10616 Sebastien void 1119 0 stevel ipif_setlinklocal(ipif_t *ipif) 1120 0 stevel { 1121 3706 vi117747 ill_t *ill = ipif->ipif_ill; 1122 3706 vi117747 in6_addr_t ov6addr; 1123 0 stevel 1124 0 stevel ASSERT(IAM_WRITER_ILL(ill)); 1125 0 stevel 1126 10616 Sebastien /* 1127 10616 Sebastien * ill_manual_linklocal is set when the link-local address was 1128 10616 Sebastien * manually configured. 1129 10616 Sebastien */ 1130 10616 Sebastien if (ill->ill_manual_linklocal) 1131 10616 Sebastien return; 1132 10616 Sebastien 1133 10616 Sebastien /* 1134 10616 Sebastien * IPv6 interfaces over 6to4 tunnels are special. They do not have 1135 10616 Sebastien * link-local addresses, but instead have a single automatically 1136 10616 Sebastien * generated global address. 1137 10616 Sebastien */ 1138 10616 Sebastien if (ill->ill_mactype == DL_6TO4) { 1139 10616 Sebastien ipif_set6to4addr(ipif); 1140 10616 Sebastien return; 1141 10616 Sebastien } 1142 10616 Sebastien 1143 0 stevel if (ipif_cant_setlinklocal(ipif)) 1144 10616 Sebastien return; 1145 0 stevel 1146 3706 vi117747 ov6addr = ipif->ipif_v6lcl_addr; 1147 0 stevel ipif_get_linklocal(&ipif->ipif_v6lcl_addr, &ill->ill_token); 1148 3706 vi117747 sctp_update_ipif_addr(ipif, ov6addr); 1149 0 stevel (void) ip_plen_to_mask_v6(IPV6_LL_PREFIXLEN, &ipif->ipif_v6net_mask); 1150 10616 Sebastien if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) { 1151 10616 Sebastien V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1152 10616 Sebastien ipif->ipif_v6subnet); 1153 10616 Sebastien } 1154 0 stevel 1155 11076 Cathy ip_rts_newaddrmsg(RTM_CHGADDR, 0, ipif, RTSQ_DEFAULT); 1156 10616 Sebastien } 1157 10616 Sebastien 1158 10616 Sebastien /* 1159 10616 Sebastien * Set the destination link-local address for a point-to-point IPv6 1160 10616 Sebastien * interface with a destination interface id (IP tunnels are such 1161 10616 Sebastien * interfaces). 1162 10616 Sebastien */ 1163 10616 Sebastien void 1164 10616 Sebastien ipif_setdestlinklocal(ipif_t *ipif) 1165 10616 Sebastien { 1166 10616 Sebastien ill_t *ill = ipif->ipif_ill; 1167 10616 Sebastien 1168 10616 Sebastien ASSERT(IAM_WRITER_ILL(ill)); 1169 10616 Sebastien if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_dest_token)) 1170 10616 Sebastien return; 1171 11042 Erik /* Skip if we've already set the pp_dst_addr */ 1172 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) 1173 11042 Erik return; 1174 11042 Erik 1175 10616 Sebastien ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr, &ill->ill_dest_token); 1176 10616 Sebastien ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr; 1177 0 stevel } 1178 0 stevel 1179 0 stevel /* 1180 4972 meem * Get the resolver set up for a new ipif. (Always called as writer.) 1181 0 stevel */ 1182 0 stevel int 1183 8485 Peter ipif_ndp_up(ipif_t *ipif, boolean_t initial) 1184 0 stevel { 1185 0 stevel ill_t *ill = ipif->ipif_ill; 1186 0 stevel int err = 0; 1187 0 stevel nce_t *nce = NULL; 1188 8485 Peter boolean_t added_ipif = B_FALSE; 1189 0 stevel 1190 11042 Erik DTRACE_PROBE3(ipif__downup, char *, "ipif_ndp_up", 1191 11042 Erik ill_t *, ill, ipif_t *, ipif); 1192 4972 meem ip1dbg(("ipif_ndp_up(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 1193 0 stevel 1194 11042 Erik if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) || 1195 2546 carlsonj (!(ill->ill_net_type & IRE_INTERFACE))) { 1196 2546 carlsonj ipif->ipif_addr_ready = 1; 1197 0 stevel return (0); 1198 2546 carlsonj } 1199 0 stevel 1200 0 stevel if ((ipif->ipif_flags & (IPIF_UNNUMBERED|IPIF_NOLOCAL)) == 0) { 1201 0 stevel uint16_t flags; 1202 8485 Peter uint16_t state; 1203 11042 Erik uchar_t *hw_addr; 1204 8485 Peter ill_t *bound_ill; 1205 8485 Peter ipmp_illgrp_t *illg = ill->ill_grp; 1206 11042 Erik uint_t hw_addr_len; 1207 0 stevel 1208 11042 Erik flags = NCE_F_MYADDR | NCE_F_NONUD | NCE_F_PUBLISH | 1209 11042 Erik NCE_F_AUTHORITY; 1210 0 stevel if (ill->ill_flags & ILLF_ROUTER) 1211 0 stevel flags |= NCE_F_ISROUTER; 1212 0 stevel 1213 0 stevel if (ipif->ipif_flags & IPIF_ANYCAST) 1214 0 stevel flags |= NCE_F_ANYCAST; 1215 0 stevel 1216 8485 Peter if (IS_IPMP(ill)) { 1217 8485 Peter ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1218 8485 Peter /* 1219 8485 Peter * If we're here via ipif_up(), then the ipif won't be 1220 8485 Peter * bound yet -- add it to the group, which will bind 1221 8485 Peter * it if possible. (We would add it in ipif_up(), but 1222 8485 Peter * deleting on failure there is gruesome.) If we're 1223 8485 Peter * here via ipmp_ill_bind_ipif(), then the ipif has 1224 8485 Peter * already been added to the group and we just need to 1225 8485 Peter * use the binding. 1226 8485 Peter */ 1227 8485 Peter if ((bound_ill = ipmp_ipif_bound_ill(ipif)) == NULL) { 1228 8485 Peter bound_ill = ipmp_illgrp_add_ipif(illg, ipif); 1229 8485 Peter if (bound_ill == NULL) { 1230 8485 Peter /* 1231 8485 Peter * We couldn't bind the ipif to an ill 1232 8485 Peter * yet, so we have nothing to publish. 1233 8485 Peter * Set ipif_addr_ready so that this 1234 8485 Peter * address can be used locally for now. 1235 8485 Peter * The routing socket message will be 1236 8485 Peter * sent from ipif_up_done_v6(). 1237 8485 Peter */ 1238 8485 Peter ipif->ipif_addr_ready = 1; 1239 8485 Peter return (0); 1240 8485 Peter } 1241 8485 Peter added_ipif = B_TRUE; 1242 8485 Peter } 1243 8485 Peter hw_addr = bound_ill->ill_nd_lla; 1244 11042 Erik hw_addr_len = bound_ill->ill_phys_addr_length; 1245 8485 Peter } else { 1246 8485 Peter bound_ill = ill; 1247 11111 Sowmini hw_addr = ill->ill_nd_lla; 1248 11111 Sowmini hw_addr_len = ill->ill_phys_addr_length; 1249 8485 Peter } 1250 0 stevel 1251 8485 Peter /* 1252 8485 Peter * If this is an initial bring-up (or the ipif was never 1253 8485 Peter * completely brought up), do DAD. Otherwise, we're here 1254 8485 Peter * because IPMP has rebound an address to this ill: send 1255 8485 Peter * unsolicited advertisements to inform others. 1256 8485 Peter */ 1257 8485 Peter if (initial || !ipif->ipif_addr_ready) { 1258 11042 Erik /* Causes Duplicate Address Detection to run */ 1259 8485 Peter state = ND_PROBE; 1260 8485 Peter } else { 1261 8485 Peter state = ND_REACHABLE; 1262 8485 Peter flags |= NCE_F_UNSOL_ADV; 1263 0 stevel } 1264 11042 Erik 1265 9814 Sowmini retry: 1266 11042 Erik err = nce_lookup_then_add_v6(ill, hw_addr, hw_addr_len, 1267 11042 Erik &ipif->ipif_v6lcl_addr, flags, state, &nce); 1268 0 stevel switch (err) { 1269 0 stevel case 0: 1270 0 stevel ip1dbg(("ipif_ndp_up: NCE created for %s\n", 1271 2546 carlsonj ill->ill_name)); 1272 2546 carlsonj ipif->ipif_addr_ready = 1; 1273 9287 Sowmini ipif->ipif_added_nce = 1; 1274 9571 Sowmini nce->nce_ipif_cnt++; 1275 2546 carlsonj break; 1276 2546 carlsonj case EINPROGRESS: 1277 2546 carlsonj ip1dbg(("ipif_ndp_up: running DAD now for %s\n", 1278 0 stevel ill->ill_name)); 1279 9287 Sowmini ipif->ipif_added_nce = 1; 1280 9571 Sowmini nce->nce_ipif_cnt++; 1281 0 stevel break; 1282 0 stevel case EEXIST: 1283 0 stevel ip1dbg(("ipif_ndp_up: NCE already exists for %s\n", 1284 0 stevel ill->ill_name)); 1285 11042 Erik if (!NCE_MYADDR(nce->nce_common)) { 1286 11042 Erik /* 1287 11042 Erik * A leftover nce from before this address 1288 11042 Erik * existed 1289 11042 Erik */ 1290 11042 Erik ncec_delete(nce->nce_common); 1291 11042 Erik nce_refrele(nce); 1292 9814 Sowmini nce = NULL; 1293 9814 Sowmini goto retry; 1294 9814 Sowmini } 1295 9571 Sowmini if ((ipif->ipif_flags & IPIF_POINTOPOINT) == 0) { 1296 11042 Erik nce_refrele(nce); 1297 11042 Erik nce = NULL; 1298 11042 Erik ip1dbg(("ipif_ndp_up: NCE already exists " 1299 11042 Erik "for %s\n", ill->ill_name)); 1300 9571 Sowmini goto fail; 1301 9571 Sowmini } 1302 9571 Sowmini /* 1303 9571 Sowmini * Duplicate local addresses are permissible for 1304 9571 Sowmini * IPIF_POINTOPOINT interfaces which will get marked 1305 9571 Sowmini * IPIF_UNNUMBERED later in 1306 9571 Sowmini * ip_addr_availability_check(). 1307 9571 Sowmini * 1308 9571 Sowmini * The nce_ipif_cnt field tracks the number of 1309 9571 Sowmini * ipifs that have nce_addr as their local address. 1310 9571 Sowmini */ 1311 9571 Sowmini ipif->ipif_addr_ready = 1; 1312 9571 Sowmini ipif->ipif_added_nce = 1; 1313 9571 Sowmini nce->nce_ipif_cnt++; 1314 11042 Erik err = 0; 1315 9571 Sowmini break; 1316 0 stevel default: 1317 8485 Peter ip1dbg(("ipif_ndp_up: NCE creation failed for %s\n", 1318 0 stevel ill->ill_name)); 1319 8485 Peter goto fail; 1320 0 stevel } 1321 2546 carlsonj } else { 1322 2546 carlsonj /* No local NCE for this entry */ 1323 2546 carlsonj ipif->ipif_addr_ready = 1; 1324 0 stevel } 1325 0 stevel if (nce != NULL) 1326 11042 Erik nce_refrele(nce); 1327 0 stevel return (0); 1328 8485 Peter fail: 1329 8485 Peter if (added_ipif) 1330 8485 Peter ipmp_illgrp_del_ipif(ill->ill_grp, ipif); 1331 8485 Peter 1332 8485 Peter return (err); 1333 0 stevel } 1334 0 stevel 1335 0 stevel /* Remove all cache entries for this logical interface */ 1336 0 stevel void 1337 0 stevel ipif_ndp_down(ipif_t *ipif) 1338 0 stevel { 1339 11042 Erik ipif_nce_down(ipif); 1340 0 stevel } 1341 0 stevel 1342 0 stevel /* 1343 0 stevel * Return the scope of the given IPv6 address. If the address is an 1344 0 stevel * IPv4 mapped IPv6 address, return the scope of the corresponding 1345 0 stevel * IPv4 address. 1346 0 stevel */ 1347 0 stevel in6addr_scope_t 1348 0 stevel ip_addr_scope_v6(const in6_addr_t *addr) 1349 0 stevel { 1350 0 stevel static in6_addr_t ipv6loopback = IN6ADDR_LOOPBACK_INIT; 1351 0 stevel 1352 0 stevel if (IN6_IS_ADDR_V4MAPPED(addr)) { 1353 0 stevel in_addr_t v4addr_h = ntohl(V4_PART_OF_V6((*addr))); 1354 0 stevel if ((v4addr_h >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 1355 0 stevel (v4addr_h & IN_AUTOCONF_MASK) == IN_AUTOCONF_NET) 1356 0 stevel return (IP6_SCOPE_LINKLOCAL); 1357 0 stevel if ((v4addr_h & IN_PRIVATE8_MASK) == IN_PRIVATE8_NET || 1358 0 stevel (v4addr_h & IN_PRIVATE12_MASK) == IN_PRIVATE12_NET || 1359 0 stevel (v4addr_h & IN_PRIVATE16_MASK) == IN_PRIVATE16_NET) 1360 0 stevel return (IP6_SCOPE_SITELOCAL); 1361 0 stevel return (IP6_SCOPE_GLOBAL); 1362 0 stevel } 1363 0 stevel 1364 0 stevel if (IN6_IS_ADDR_MULTICAST(addr)) 1365 0 stevel return (IN6_ADDR_MC_SCOPE(addr)); 1366 0 stevel 1367 0 stevel /* link-local and loopback addresses are of link-local scope */ 1368 0 stevel if (IN6_IS_ADDR_LINKLOCAL(addr) || 1369 0 stevel IN6_ARE_ADDR_EQUAL(addr, &ipv6loopback)) 1370 0 stevel return (IP6_SCOPE_LINKLOCAL); 1371 0 stevel if (IN6_IS_ADDR_SITELOCAL(addr)) 1372 0 stevel return (IP6_SCOPE_SITELOCAL); 1373 0 stevel return (IP6_SCOPE_GLOBAL); 1374 0 stevel } 1375 0 stevel 1376 0 stevel 1377 0 stevel /* 1378 3431 carlsonj * Returns the length of the common prefix of a1 and a2, as per 1379 3431 carlsonj * CommonPrefixLen() defined in RFC 3484. 1380 0 stevel */ 1381 3431 carlsonj static int 1382 3431 carlsonj ip_common_prefix_v6(const in6_addr_t *a1, const in6_addr_t *a2) 1383 0 stevel { 1384 0 stevel int i; 1385 3431 carlsonj uint32_t a1val, a2val, mask; 1386 0 stevel 1387 3431 carlsonj for (i = 0; i < 4; i++) { 1388 3431 carlsonj if ((a1val = a1->s6_addr32[i]) != (a2val = a2->s6_addr32[i])) { 1389 3431 carlsonj a1val ^= a2val; 1390 3431 carlsonj i *= 32; 1391 3431 carlsonj mask = 0x80000000u; 1392 3431 carlsonj while (!(a1val & mask)) { 1393 3431 carlsonj mask >>= 1; 1394 3431 carlsonj i++; 1395 3431 carlsonj } 1396 3431 carlsonj return (i); 1397 3431 carlsonj } 1398 3431 carlsonj } 1399 3431 carlsonj return (IPV6_ABITS); 1400 0 stevel } 1401 0 stevel 1402 0 stevel #define IPIF_VALID_IPV6_SOURCE(ipif) \ 1403 0 stevel (((ipif)->ipif_flags & IPIF_UP) && \ 1404 11076 Cathy !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \ 1405 11076 Cathy !((ipif)->ipif_ill->ill_flags & ILLF_NOACCEPT)) 1406 0 stevel 1407 0 stevel /* source address candidate */ 1408 0 stevel typedef struct candidate { 1409 0 stevel ipif_t *cand_ipif; 1410 0 stevel /* The properties of this candidate */ 1411 0 stevel boolean_t cand_isdst; 1412 0 stevel boolean_t cand_isdst_set; 1413 0 stevel in6addr_scope_t cand_scope; 1414 0 stevel boolean_t cand_scope_set; 1415 0 stevel boolean_t cand_isdeprecated; 1416 0 stevel boolean_t cand_isdeprecated_set; 1417 0 stevel boolean_t cand_ispreferred; 1418 0 stevel boolean_t cand_ispreferred_set; 1419 0 stevel boolean_t cand_matchedinterface; 1420 0 stevel boolean_t cand_matchedinterface_set; 1421 0 stevel boolean_t cand_matchedlabel; 1422 0 stevel boolean_t cand_matchedlabel_set; 1423 0 stevel boolean_t cand_istmp; 1424 0 stevel boolean_t cand_istmp_set; 1425 3431 carlsonj int cand_common_pref; 1426 3431 carlsonj boolean_t cand_common_pref_set; 1427 3431 carlsonj boolean_t cand_pref_eq; 1428 3431 carlsonj boolean_t cand_pref_eq_set; 1429 3431 carlsonj int cand_pref_len; 1430 3431 carlsonj boolean_t cand_pref_len_set; 1431 0 stevel } cand_t; 1432 0 stevel #define cand_srcaddr cand_ipif->ipif_v6lcl_addr 1433 3431 carlsonj #define cand_mask cand_ipif->ipif_v6net_mask 1434 0 stevel #define cand_flags cand_ipif->ipif_flags 1435 0 stevel #define cand_ill cand_ipif->ipif_ill 1436 1676 jpk #define cand_zoneid cand_ipif->ipif_zoneid 1437 0 stevel 1438 0 stevel /* information about the destination for source address selection */ 1439 0 stevel typedef struct dstinfo { 1440 0 stevel const in6_addr_t *dst_addr; 1441 0 stevel ill_t *dst_ill; 1442 2202 rk129064 uint_t dst_restrict_ill; 1443 0 stevel boolean_t dst_prefer_src_tmp; 1444 0 stevel in6addr_scope_t dst_scope; 1445 0 stevel char *dst_label; 1446 0 stevel } dstinfo_t; 1447 0 stevel 1448 0 stevel /* 1449 0 stevel * The following functions are rules used to select a source address in 1450 0 stevel * ipif_select_source_v6(). Each rule compares a current candidate (cc) 1451 0 stevel * against the best candidate (bc). Each rule has three possible outcomes; 1452 0 stevel * the candidate is preferred over the best candidate (CAND_PREFER), the 1453 0 stevel * candidate is not preferred over the best candidate (CAND_AVOID), or the 1454 0 stevel * candidate is of equal value as the best candidate (CAND_TIE). 1455 0 stevel * 1456 0 stevel * These rules are part of a greater "Default Address Selection for IPv6" 1457 0 stevel * sheme, which is standards based work coming out of the IETF ipv6 working 1458 0 stevel * group. The IETF document defines both IPv6 source address selection and 1459 0 stevel * destination address ordering. The rules defined here implement the IPv6 1460 0 stevel * source address selection. Destination address ordering is done by 1461 0 stevel * libnsl, and uses a similar set of rules to implement the sorting. 1462 3431 carlsonj * 1463 3431 carlsonj * Most of the rules are defined by the RFC and are not typically altered. The 1464 3431 carlsonj * last rule, number 8, has language that allows for local preferences. In the 1465 3431 carlsonj * scheme below, this means that new Solaris rules should normally go between 1466 3431 carlsonj * rule_ifprefix and rule_prefix. 1467 0 stevel */ 1468 0 stevel typedef enum {CAND_AVOID, CAND_TIE, CAND_PREFER} rule_res_t; 1469 3448 dh155122 typedef rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *, 1470 3448 dh155122 ip_stack_t *); 1471 0 stevel 1472 0 stevel /* Prefer an address if it is equal to the destination address. */ 1473 3448 dh155122 /* ARGSUSED3 */ 1474 0 stevel static rule_res_t 1475 3448 dh155122 rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1476 0 stevel { 1477 0 stevel if (!bc->cand_isdst_set) { 1478 0 stevel bc->cand_isdst = 1479 0 stevel IN6_ARE_ADDR_EQUAL(&bc->cand_srcaddr, dstinfo->dst_addr); 1480 0 stevel bc->cand_isdst_set = B_TRUE; 1481 0 stevel } 1482 0 stevel 1483 0 stevel cc->cand_isdst = 1484 0 stevel IN6_ARE_ADDR_EQUAL(&cc->cand_srcaddr, dstinfo->dst_addr); 1485 0 stevel cc->cand_isdst_set = B_TRUE; 1486 0 stevel 1487 0 stevel if (cc->cand_isdst == bc->cand_isdst) 1488 0 stevel return (CAND_TIE); 1489 0 stevel else if (cc->cand_isdst) 1490 0 stevel return (CAND_PREFER); 1491 0 stevel else 1492 0 stevel return (CAND_AVOID); 1493 0 stevel } 1494 0 stevel 1495 0 stevel /* 1496 0 stevel * Prefer addresses that are of closest scope to the destination. Always 1497 0 stevel * prefer addresses that are of greater scope than the destination over 1498 0 stevel * those that are of lesser scope than the destination. 1499 0 stevel */ 1500 3448 dh155122 /* ARGSUSED3 */ 1501 0 stevel static rule_res_t 1502 3448 dh155122 rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1503 0 stevel { 1504 0 stevel if (!bc->cand_scope_set) { 1505 0 stevel bc->cand_scope = ip_addr_scope_v6(&bc->cand_srcaddr); 1506 0 stevel bc->cand_scope_set = B_TRUE; 1507 0 stevel } 1508 0 stevel 1509 0 stevel cc->cand_scope = ip_addr_scope_v6(&cc->cand_srcaddr); 1510 0 stevel cc->cand_scope_set = B_TRUE; 1511 0 stevel 1512 0 stevel if (cc->cand_scope < bc->cand_scope) { 1513 0 stevel if (cc->cand_scope < dstinfo->dst_scope) 1514 0 stevel return (CAND_AVOID); 1515 0 stevel else 1516 0 stevel return (CAND_PREFER); 1517 0 stevel } else if (bc->cand_scope < cc->cand_scope) { 1518 0 stevel if (bc->cand_scope < dstinfo->dst_scope) 1519 0 stevel return (CAND_PREFER); 1520 0 stevel else 1521 0 stevel return (CAND_AVOID); 1522 0 stevel } else { 1523 0 stevel return (CAND_TIE); 1524 0 stevel } 1525 0 stevel } 1526 0 stevel 1527 0 stevel /* 1528 0 stevel * Prefer non-deprecated source addresses. 1529 0 stevel */ 1530 0 stevel /* ARGSUSED2 */ 1531 0 stevel static rule_res_t 1532 3448 dh155122 rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1533 3448 dh155122 ip_stack_t *ipst) 1534 0 stevel { 1535 0 stevel if (!bc->cand_isdeprecated_set) { 1536 0 stevel bc->cand_isdeprecated = 1537 0 stevel ((bc->cand_flags & IPIF_DEPRECATED) != 0); 1538 0 stevel bc->cand_isdeprecated_set = B_TRUE; 1539 0 stevel } 1540 0 stevel 1541 0 stevel cc->cand_isdeprecated = ((cc->cand_flags & IPIF_DEPRECATED) != 0); 1542 0 stevel cc->cand_isdeprecated_set = B_TRUE; 1543 0 stevel 1544 0 stevel if (bc->cand_isdeprecated == cc->cand_isdeprecated) 1545 0 stevel return (CAND_TIE); 1546 0 stevel else if (cc->cand_isdeprecated) 1547 0 stevel return (CAND_AVOID); 1548 0 stevel else 1549 0 stevel return (CAND_PREFER); 1550 0 stevel } 1551 0 stevel 1552 0 stevel /* 1553 0 stevel * Prefer source addresses that have the IPIF_PREFERRED flag set. This 1554 0 stevel * rule must be before rule_interface because the flag could be set on any 1555 0 stevel * interface, not just the interface being used for outgoing packets (for 1556 0 stevel * example, the IFF_PREFERRED could be set on an address assigned to the 1557 0 stevel * loopback interface). 1558 0 stevel */ 1559 0 stevel /* ARGSUSED2 */ 1560 0 stevel static rule_res_t 1561 3448 dh155122 rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1562 3448 dh155122 ip_stack_t *ipst) 1563 0 stevel { 1564 0 stevel if (!bc->cand_ispreferred_set) { 1565 0 stevel bc->cand_ispreferred = ((bc->cand_flags & IPIF_PREFERRED) != 0); 1566 0 stevel bc->cand_ispreferred_set = B_TRUE; 1567 0 stevel } 1568 0 stevel 1569 0 stevel cc->cand_ispreferred = ((cc->cand_flags & IPIF_PREFERRED) != 0); 1570 0 stevel cc->cand_ispreferred_set = B_TRUE; 1571 0 stevel 1572 0 stevel if (bc->cand_ispreferred == cc->cand_ispreferred) 1573 0 stevel return (CAND_TIE); 1574 0 stevel else if (cc->cand_ispreferred) 1575 0 stevel return (CAND_PREFER); 1576 0 stevel else 1577 0 stevel return (CAND_AVOID); 1578 0 stevel } 1579 0 stevel 1580 0 stevel /* 1581 8485 Peter * Prefer source addresses that are assigned to the outgoing interface. 1582 0 stevel */ 1583 3448 dh155122 /* ARGSUSED3 */ 1584 0 stevel static rule_res_t 1585 3448 dh155122 rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1586 3448 dh155122 ip_stack_t *ipst) 1587 0 stevel { 1588 0 stevel ill_t *dstill = dstinfo->dst_ill; 1589 0 stevel 1590 0 stevel /* 1591 0 stevel * If dstinfo->dst_restrict_ill is set, this rule is unnecessary 1592 0 stevel * since we know all candidates will be on the same link. 1593 0 stevel */ 1594 0 stevel if (dstinfo->dst_restrict_ill) 1595 0 stevel return (CAND_TIE); 1596 0 stevel 1597 0 stevel if (!bc->cand_matchedinterface_set) { 1598 8485 Peter bc->cand_matchedinterface = bc->cand_ill == dstill; 1599 0 stevel bc->cand_matchedinterface_set = B_TRUE; 1600 0 stevel } 1601 0 stevel 1602 8485 Peter cc->cand_matchedinterface = cc->cand_ill == dstill; 1603 0 stevel cc->cand_matchedinterface_set = B_TRUE; 1604 0 stevel 1605 0 stevel if (bc->cand_matchedinterface == cc->cand_matchedinterface) 1606 0 stevel return (CAND_TIE); 1607 0 stevel else if (cc->cand_matchedinterface) 1608 0 stevel return (CAND_PREFER); 1609 0 stevel else 1610 0 stevel return (CAND_AVOID); 1611 0 stevel } 1612 0 stevel 1613 0 stevel /* 1614 0 stevel * Prefer source addresses whose label matches the destination's label. 1615 0 stevel */ 1616 0 stevel static rule_res_t 1617 3448 dh155122 rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1618 0 stevel { 1619 0 stevel char *label; 1620 0 stevel 1621 0 stevel if (!bc->cand_matchedlabel_set) { 1622 3448 dh155122 label = ip6_asp_lookup(&bc->cand_srcaddr, NULL, ipst); 1623 0 stevel bc->cand_matchedlabel = 1624 0 stevel ip6_asp_labelcmp(label, dstinfo->dst_label); 1625 0 stevel bc->cand_matchedlabel_set = B_TRUE; 1626 0 stevel } 1627 0 stevel 1628 3448 dh155122 label = ip6_asp_lookup(&cc->cand_srcaddr, NULL, ipst); 1629 0 stevel cc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label); 1630 0 stevel cc->cand_matchedlabel_set = B_TRUE; 1631 0 stevel 1632 0 stevel if (bc->cand_matchedlabel == cc->cand_matchedlabel) 1633 0 stevel return (CAND_TIE); 1634 0 stevel else if (cc->cand_matchedlabel) 1635 0 stevel return (CAND_PREFER); 1636 0 stevel else 1637 0 stevel return (CAND_AVOID); 1638 0 stevel } 1639 0 stevel 1640 0 stevel /* 1641 0 stevel * Prefer public addresses over temporary ones. An application can reverse 1642 0 stevel * the logic of this rule and prefer temporary addresses by using the 1643 0 stevel * IPV6_SRC_PREFERENCES socket option. 1644 0 stevel */ 1645 3448 dh155122 /* ARGSUSED3 */ 1646 0 stevel static rule_res_t 1647 3448 dh155122 rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1648 3448 dh155122 ip_stack_t *ipst) 1649 0 stevel { 1650 0 stevel if (!bc->cand_istmp_set) { 1651 0 stevel bc->cand_istmp = ((bc->cand_flags & IPIF_TEMPORARY) != 0); 1652 0 stevel bc->cand_istmp_set = B_TRUE; 1653 0 stevel } 1654 0 stevel 1655 0 stevel cc->cand_istmp = ((cc->cand_flags & IPIF_TEMPORARY) != 0); 1656 0 stevel cc->cand_istmp_set = B_TRUE; 1657 0 stevel 1658 0 stevel if (bc->cand_istmp == cc->cand_istmp) 1659 0 stevel return (CAND_TIE); 1660 0 stevel 1661 0 stevel if (dstinfo->dst_prefer_src_tmp && cc->cand_istmp) 1662 0 stevel return (CAND_PREFER); 1663 0 stevel else if (!dstinfo->dst_prefer_src_tmp && !cc->cand_istmp) 1664 0 stevel return (CAND_PREFER); 1665 0 stevel else 1666 0 stevel return (CAND_AVOID); 1667 0 stevel } 1668 0 stevel 1669 0 stevel /* 1670 3431 carlsonj * Prefer source addresses with longer matching prefix with the destination 1671 3431 carlsonj * under the interface mask. This gets us on the same subnet before applying 1672 3431 carlsonj * any Solaris-specific rules. 1673 0 stevel */ 1674 3448 dh155122 /* ARGSUSED3 */ 1675 0 stevel static rule_res_t 1676 3448 dh155122 rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1677 3448 dh155122 ip_stack_t *ipst) 1678 0 stevel { 1679 3431 carlsonj if (!bc->cand_pref_eq_set) { 1680 3431 carlsonj bc->cand_pref_eq = V6_MASK_EQ_2(bc->cand_srcaddr, 1681 3431 carlsonj bc->cand_mask, *dstinfo->dst_addr); 1682 3431 carlsonj bc->cand_pref_eq_set = B_TRUE; 1683 0 stevel } 1684 0 stevel 1685 3431 carlsonj cc->cand_pref_eq = V6_MASK_EQ_2(cc->cand_srcaddr, cc->cand_mask, 1686 3431 carlsonj *dstinfo->dst_addr); 1687 3431 carlsonj cc->cand_pref_eq_set = B_TRUE; 1688 0 stevel 1689 3431 carlsonj if (bc->cand_pref_eq) { 1690 3431 carlsonj if (cc->cand_pref_eq) { 1691 3431 carlsonj if (!bc->cand_pref_len_set) { 1692 3431 carlsonj bc->cand_pref_len = 1693 3431 carlsonj ip_mask_to_plen_v6(&bc->cand_mask); 1694 3431 carlsonj bc->cand_pref_len_set = B_TRUE; 1695 3431 carlsonj } 1696 3431 carlsonj cc->cand_pref_len = ip_mask_to_plen_v6(&cc->cand_mask); 1697 3431 carlsonj cc->cand_pref_len_set = B_TRUE; 1698 3431 carlsonj if (bc->cand_pref_len == cc->cand_pref_len) 1699 3431 carlsonj return (CAND_TIE); 1700 3431 carlsonj else if (bc->cand_pref_len > cc->cand_pref_len) 1701 3431 carlsonj return (CAND_AVOID); 1702 3431 carlsonj else 1703 3431 carlsonj return (CAND_PREFER); 1704 3431 carlsonj } else { 1705 3431 carlsonj return (CAND_AVOID); 1706 3431 carlsonj } 1707 3431 carlsonj } else { 1708 3431 carlsonj if (cc->cand_pref_eq) 1709 3431 carlsonj return (CAND_PREFER); 1710 3431 carlsonj else 1711 3431 carlsonj return (CAND_TIE); 1712 0 stevel } 1713 0 stevel } 1714 0 stevel 1715 0 stevel /* 1716 1676 jpk * Prefer to use zone-specific addresses when possible instead of all-zones 1717 1676 jpk * addresses. 1718 1676 jpk */ 1719 1676 jpk /* ARGSUSED2 */ 1720 1676 jpk static rule_res_t 1721 3448 dh155122 rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1722 3448 dh155122 ip_stack_t *ipst) 1723 1676 jpk { 1724 1676 jpk if ((bc->cand_zoneid == ALL_ZONES) == 1725 1676 jpk (cc->cand_zoneid == ALL_ZONES)) 1726 1676 jpk return (CAND_TIE); 1727 1676 jpk else if (cc->cand_zoneid == ALL_ZONES) 1728 1676 jpk return (CAND_AVOID); 1729 1676 jpk else 1730 1676 jpk return (CAND_PREFER); 1731 3431 carlsonj } 1732 3431 carlsonj 1733 3431 carlsonj /* 1734 3431 carlsonj * Prefer to use DHCPv6 (first) and static addresses (second) when possible 1735 3431 carlsonj * instead of statelessly autoconfigured addresses. 1736 3431 carlsonj * 1737 3431 carlsonj * This is done after trying all other preferences (and before the final tie 1738 3431 carlsonj * breaker) so that, if all else is equal, we select addresses configured by 1739 3431 carlsonj * DHCPv6 over other addresses. We presume that DHCPv6 addresses, unlike 1740 3431 carlsonj * stateless autoconfigured addresses, are deliberately configured by an 1741 3431 carlsonj * administrator, and thus are correctly set up in DNS and network packet 1742 3431 carlsonj * filters. 1743 3431 carlsonj */ 1744 3431 carlsonj /* ARGSUSED2 */ 1745 3431 carlsonj static rule_res_t 1746 3448 dh155122 rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1747 3448 dh155122 ip_stack_t *ipst) 1748 3431 carlsonj { 1749 3431 carlsonj #define ATYPE(x) \ 1750 3431 carlsonj ((x) & IPIF_DHCPRUNNING) ? 1 : ((x) & IPIF_ADDRCONF) ? 3 : 2 1751 3431 carlsonj int bcval = ATYPE(bc->cand_flags); 1752 3431 carlsonj int ccval = ATYPE(cc->cand_flags); 1753 3431 carlsonj #undef ATYPE 1754 3431 carlsonj 1755 3431 carlsonj if (bcval == ccval) 1756 3431 carlsonj return (CAND_TIE); 1757 3431 carlsonj else if (ccval < bcval) 1758 3431 carlsonj return (CAND_PREFER); 1759 3431 carlsonj else 1760 3431 carlsonj return (CAND_AVOID); 1761 3431 carlsonj } 1762 3431 carlsonj 1763 3431 carlsonj /* 1764 3431 carlsonj * Prefer source addresses with longer matching prefix with the destination. 1765 3431 carlsonj * We do the longest matching prefix calculation by doing an xor of both 1766 3431 carlsonj * addresses with the destination, and pick the address with the longest string 1767 3431 carlsonj * of leading zeros, as per CommonPrefixLen() defined in RFC 3484. 1768 3431 carlsonj */ 1769 3448 dh155122 /* ARGSUSED3 */ 1770 3431 carlsonj static rule_res_t 1771 3448 dh155122 rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1772 3431 carlsonj { 1773 3431 carlsonj if (!bc->cand_common_pref_set) { 1774 3431 carlsonj bc->cand_common_pref = ip_common_prefix_v6(&bc->cand_srcaddr, 1775 3431 carlsonj dstinfo->dst_addr); 1776 3431 carlsonj bc->cand_common_pref_set = B_TRUE; 1777 3431 carlsonj } 1778 3431 carlsonj 1779 3431 carlsonj cc->cand_common_pref = ip_common_prefix_v6(&cc->cand_srcaddr, 1780 3431 carlsonj dstinfo->dst_addr); 1781 3431 carlsonj cc->cand_common_pref_set = B_TRUE; 1782 3431 carlsonj 1783 3431 carlsonj if (bc->cand_common_pref == cc->cand_common_pref) 1784 3431 carlsonj return (CAND_TIE); 1785 3431 carlsonj else if (bc->cand_common_pref > cc->cand_common_pref) 1786 3431 carlsonj return (CAND_AVOID); 1787 3431 carlsonj else 1788 3431 carlsonj return (CAND_PREFER); 1789 3431 carlsonj } 1790 3431 carlsonj 1791 3431 carlsonj /* 1792 3431 carlsonj * Last rule: we must pick something, so just prefer the current best 1793 3431 carlsonj * candidate. 1794 3431 carlsonj */ 1795 3431 carlsonj /* ARGSUSED */ 1796 3431 carlsonj static rule_res_t 1797 3448 dh155122 rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1798 3448 dh155122 ip_stack_t *ipst) 1799 3431 carlsonj { 1800 3431 carlsonj return (CAND_AVOID); 1801 1676 jpk } 1802 1676 jpk 1803 1676 jpk /* 1804 0 stevel * Determine the best source address given a destination address and a 1805 0 stevel * destination ill. If no suitable source address is found, it returns 1806 0 stevel * NULL. If there is a usable address pointed to by the usesrc 1807 0 stevel * (i.e ill_usesrc_ifindex != 0) then return that first since it is more 1808 0 stevel * fine grained (i.e per interface) 1809 0 stevel * 1810 0 stevel * This implementation is based on the "Default Address Selection for IPv6" 1811 0 stevel * specification produced by the IETF IPv6 working group. It has been 1812 0 stevel * implemented so that the list of addresses is only traversed once (the 1813 0 stevel * specification's algorithm could traverse the list of addresses once for 1814 0 stevel * every rule). 1815 0 stevel * 1816 8485 Peter * The restrict_ill argument restricts the algorithm to choose a source 1817 8485 Peter * address that is assigned to the destination ill. This is used when 1818 8485 Peter * the destination address is a link-local or multicast address, and when 1819 0 stevel * ipv6_strict_dst_multihoming is turned on. 1820 0 stevel * 1821 0 stevel * src_prefs is the caller's set of source address preferences. If source 1822 0 stevel * address selection is being called to determine the source address of a 1823 11042 Erik * connected socket (from ip_set_destination_v6()), then the preferences are 1824 11042 Erik * taken from conn_ixa->ixa_src_preferences. These preferences can be set on a 1825 0 stevel * per-socket basis using the IPV6_SRC_PREFERENCES socket option. The only 1826 0 stevel * preference currently implemented is for rfc3041 temporary addresses. 1827 0 stevel */ 1828 0 stevel ipif_t * 1829 0 stevel ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, 1830 11042 Erik boolean_t restrict_ill, uint32_t src_prefs, zoneid_t zoneid, 1831 11042 Erik boolean_t allow_usesrc, boolean_t *notreadyp) 1832 0 stevel { 1833 0 stevel dstinfo_t dstinfo; 1834 0 stevel char dstr[INET6_ADDRSTRLEN]; 1835 0 stevel char sstr[INET6_ADDRSTRLEN]; 1836 8485 Peter ipif_t *ipif, *start_ipif, *next_ipif; 1837 8485 Peter ill_t *ill, *usesrc_ill = NULL, *ipmp_ill = NULL; 1838 0 stevel ill_walk_context_t ctx; 1839 0 stevel cand_t best_c; /* The best candidate */ 1840 0 stevel cand_t curr_c; /* The current candidate */ 1841 0 stevel uint_t index; 1842 0 stevel boolean_t first_candidate = B_TRUE; 1843 0 stevel rule_res_t rule_result; 1844 1676 jpk tsol_tpc_t *src_rhtp, *dst_rhtp; 1845 3448 dh155122 ip_stack_t *ipst = dstill->ill_ipst; 1846 0 stevel 1847 0 stevel /* 1848 0 stevel * The list of ordering rules. They are applied in the order they 1849 0 stevel * appear in the list. 1850 0 stevel * 1851 3431 carlsonj * Solaris doesn't currently support Mobile IPv6, so there's no 1852 3431 carlsonj * rule_mipv6 corresponding to rule 4 in the specification. 1853 0 stevel */ 1854 0 stevel rulef_t rules[] = { 1855 0 stevel rule_isdst, 1856 0 stevel rule_scope, 1857 0 stevel rule_deprecated, 1858 0 stevel rule_preferred, 1859 0 stevel rule_interface, 1860 0 stevel rule_label, 1861 0 stevel rule_temporary, 1862 3431 carlsonj rule_ifprefix, /* local rules after this */ 1863 1676 jpk rule_zone_specific, 1864 3431 carlsonj rule_addr_type, 1865 3431 carlsonj rule_prefix, /* local rules before this */ 1866 3431 carlsonj rule_must_be_last, /* must always be last */ 1867 0 stevel NULL 1868 0 stevel }; 1869 0 stevel 1870 0 stevel ASSERT(dstill->ill_isv6); 1871 0 stevel ASSERT(!IN6_IS_ADDR_V4MAPPED(dst)); 1872 0 stevel 1873 0 stevel /* 1874 0 stevel * Check if there is a usable src address pointed to by the 1875 0 stevel * usesrc ifindex. This has higher precedence since it is 1876 0 stevel * finer grained (i.e per interface) v/s being system wide. 1877 0 stevel */ 1878 11042 Erik if (dstill->ill_usesrc_ifindex != 0 && allow_usesrc) { 1879 0 stevel if ((usesrc_ill = 1880 0 stevel ill_lookup_on_ifindex(dstill->ill_usesrc_ifindex, B_TRUE, 1881 11042 Erik ipst)) != NULL) { 1882 0 stevel dstinfo.dst_ill = usesrc_ill; 1883 0 stevel } else { 1884 0 stevel return (NULL); 1885 0 stevel } 1886 8485 Peter } else if (IS_UNDER_IPMP(dstill)) { 1887 8485 Peter /* 1888 8485 Peter * Test addresses should never be used for source address 1889 8485 Peter * selection, so if we were passed an underlying ill, switch 1890 8485 Peter * to the IPMP meta-interface. 1891 8485 Peter */ 1892 8485 Peter if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(dstill)) != NULL) 1893 8485 Peter dstinfo.dst_ill = ipmp_ill; 1894 8485 Peter else 1895 8485 Peter return (NULL); 1896 0 stevel } else { 1897 0 stevel dstinfo.dst_ill = dstill; 1898 1676 jpk } 1899 1676 jpk 1900 1676 jpk /* 1901 1676 jpk * If we're dealing with an unlabeled destination on a labeled system, 1902 1676 jpk * make sure that we ignore source addresses that are incompatible with 1903 1676 jpk * the destination's default label. That destination's default label 1904 1676 jpk * must dominate the minimum label on the source address. 1905 1676 jpk * 1906 1676 jpk * (Note that this has to do with Trusted Solaris. It's not related to 1907 1676 jpk * the labels described by ip6_asp_lookup.) 1908 1676 jpk */ 1909 1676 jpk dst_rhtp = NULL; 1910 1676 jpk if (is_system_labeled()) { 1911 1676 jpk dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE); 1912 1676 jpk if (dst_rhtp == NULL) 1913 1676 jpk return (NULL); 1914 1676 jpk if (dst_rhtp->tpc_tp.host_type != UNLABELED) { 1915 1676 jpk TPC_RELE(dst_rhtp); 1916 1676 jpk dst_rhtp = NULL; 1917 1676 jpk } 1918 0 stevel } 1919 0 stevel 1920 0 stevel dstinfo.dst_addr = dst; 1921 0 stevel dstinfo.dst_scope = ip_addr_scope_v6(dst); 1922 3448 dh155122 dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst); 1923 0 stevel dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0); 1924 3448 dh155122 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1925 0 stevel /* 1926 0 stevel * Section three of the I-D states that for multicast and 1927 0 stevel * link-local destinations, the candidate set must be restricted to 1928 0 stevel * an interface that is on the same link as the outgoing interface. 1929 0 stevel * Also, when ipv6_strict_dst_multihoming is turned on, always 1930 0 stevel * restrict the source address to the destination link as doing 1931 0 stevel * otherwise will almost certainly cause problems. 1932 0 stevel */ 1933 0 stevel if (IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst) || 1934 3448 dh155122 ipst->ips_ipv6_strict_dst_multihoming || usesrc_ill != NULL) { 1935 8485 Peter dstinfo.dst_restrict_ill = B_TRUE; 1936 2202 rk129064 } else { 1937 0 stevel dstinfo.dst_restrict_ill = restrict_ill; 1938 2202 rk129064 } 1939 0 stevel 1940 0 stevel bzero(&best_c, sizeof (cand_t)); 1941 0 stevel 1942 0 stevel /* 1943 8485 Peter * Take a pass through the list of IPv6 interfaces to choose the best 1944 8485 Peter * possible source address. If restrict_ill is set, just use dst_ill. 1945 0 stevel */ 1946 8485 Peter if (dstinfo.dst_restrict_ill) 1947 8485 Peter ill = dstinfo.dst_ill; 1948 8485 Peter else 1949 3448 dh155122 ill = ILL_START_WALK_V6(&ctx, ipst); 1950 0 stevel 1951 8485 Peter for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1952 0 stevel ASSERT(ill->ill_isv6); 1953 2202 rk129064 1954 2202 rk129064 /* 1955 8485 Peter * Test addresses should never be used for source address 1956 8485 Peter * selection, so ignore underlying ills. 1957 2202 rk129064 */ 1958 8485 Peter if (IS_UNDER_IPMP(ill)) 1959 8485 Peter continue; 1960 0 stevel 1961 9658 Sowmini if (ill->ill_ipif == NULL) 1962 9658 Sowmini continue; 1963 8485 Peter /* 1964 8485 Peter * For source address selection, we treat the ipif list as 1965 8485 Peter * circular and continue until we get back to where we 1966 8485 Peter * started. This allows IPMP to vary source address selection 1967 8485 Peter * (which improves inbound load spreading) by caching its last 1968 8485 Peter * ending point and starting from there. NOTE: we don't have 1969 8485 Peter * to worry about ill_src_ipif changing ills since that can't 1970 8485 Peter * happen on the IPMP ill. 1971 8485 Peter */ 1972 8485 Peter start_ipif = ill->ill_ipif; 1973 8485 Peter if (IS_IPMP(ill) && ill->ill_src_ipif != NULL) 1974 8485 Peter start_ipif = ill->ill_src_ipif; 1975 8485 Peter 1976 8485 Peter ipif = start_ipif; 1977 8485 Peter do { 1978 8485 Peter if ((next_ipif = ipif->ipif_next) == NULL) 1979 8485 Peter next_ipif = ill->ill_ipif; 1980 0 stevel 1981 0 stevel if (!IPIF_VALID_IPV6_SOURCE(ipif)) 1982 0 stevel continue; 1983 0 stevel 1984 11042 Erik if (!ipif->ipif_addr_ready) { 1985 11042 Erik if (notreadyp != NULL) 1986 11042 Erik *notreadyp = B_TRUE; 1987 11042 Erik continue; 1988 11042 Erik } 1989 11042 Erik 1990 1676 jpk if (zoneid != ALL_ZONES && 1991 1676 jpk ipif->ipif_zoneid != zoneid && 1992 1676 jpk ipif->ipif_zoneid != ALL_ZONES) 1993 0 stevel continue; 1994 1676 jpk 1995 1676 jpk /* 1996 1676 jpk * Check compatibility of local address for 1997 1676 jpk * destination's default label if we're on a labeled 1998 1676 jpk * system. Incompatible addresses can't be used at 1999 1676 jpk * all and must be skipped over. 2000 1676 jpk */ 2001 1676 jpk if (dst_rhtp != NULL) { 2002 1676 jpk boolean_t incompat; 2003 1676 jpk 2004 1676 jpk src_rhtp = find_tpc(&ipif->ipif_v6lcl_addr, 2005 1676 jpk IPV6_VERSION, B_FALSE); 2006 1676 jpk if (src_rhtp == NULL) 2007 1676 jpk continue; 2008 1676 jpk incompat = 2009 1676 jpk src_rhtp->tpc_tp.host_type != SUN_CIPSO || 2010 1676 jpk src_rhtp->tpc_tp.tp_doi != 2011 1676 jpk dst_rhtp->tpc_tp.tp_doi || 2012 1676 jpk (!_blinrange(&dst_rhtp->tpc_tp.tp_def_label, 2013 1676 jpk &src_rhtp->tpc_tp.tp_sl_range_cipso) && 2014 1676 jpk !blinlset(&dst_rhtp->tpc_tp.tp_def_label, 2015 1676 jpk src_rhtp->tpc_tp.tp_sl_set_cipso)); 2016 1676 jpk TPC_RELE(src_rhtp); 2017 1676 jpk if (incompat) 2018 1676 jpk continue; 2019 1676 jpk } 2020 0 stevel 2021 0 stevel if (first_candidate) { 2022 0 stevel /* 2023 0 stevel * This is first valid address in the list. 2024 0 stevel * It is automatically the best candidate 2025 0 stevel * so far. 2026 0 stevel */ 2027 0 stevel best_c.cand_ipif = ipif; 2028 0 stevel first_candidate = B_FALSE; 2029 0 stevel continue; 2030 0 stevel } 2031 0 stevel 2032 0 stevel bzero(&curr_c, sizeof (cand_t)); 2033 0 stevel curr_c.cand_ipif = ipif; 2034 0 stevel 2035 0 stevel /* 2036 0 stevel * Compare this current candidate (curr_c) with the 2037 0 stevel * best candidate (best_c) by applying the 2038 0 stevel * comparison rules in order until one breaks the 2039 0 stevel * tie. 2040 0 stevel */ 2041 0 stevel for (index = 0; rules[index] != NULL; index++) { 2042 0 stevel /* Apply a comparison rule. */ 2043 8485 Peter rule_result = (rules[index])(&best_c, &curr_c, 2044 8485 Peter &dstinfo, ipst); 2045 0 stevel if (rule_result == CAND_AVOID) { 2046 0 stevel /* 2047 0 stevel * The best candidate is still the 2048 0 stevel * best candidate. Forget about 2049 0 stevel * this current candidate and go on 2050 0 stevel * to the next one. 2051 0 stevel */ 2052 0 stevel break; 2053 0 stevel } else if (rule_result == CAND_PREFER) { 2054 0 stevel /* 2055 0 stevel * This candidate is prefered. It 2056 0 stevel * becomes the best candidate so 2057 0 stevel * far. Go on to the next address. 2058 0 stevel */ 2059 0 stevel best_c = curr_c; 2060 0 stevel break; 2061 0 stevel } 2062 0 stevel /* We have a tie, apply the next rule. */ 2063 0 stevel } 2064 0 stevel 2065 0 stevel /* 2066 0 stevel * The last rule must be a tie breaker rule and 2067 0 stevel * must never produce a tie. At this point, the 2068 0 stevel * candidate should have either been rejected, or 2069 0 stevel * have been prefered as the best candidate so far. 2070 0 stevel */ 2071 0 stevel ASSERT(rule_result != CAND_TIE); 2072 8485 Peter } while ((ipif = next_ipif) != start_ipif); 2073 8485 Peter 2074 8485 Peter /* 2075 8485 Peter * For IPMP, update the source ipif rotor to the next ipif, 2076 8485 Peter * provided we can look it up. (We must not use it if it's 2077 8485 Peter * IPIF_CONDEMNED since we may have grabbed ill_g_lock after 2078 8485 Peter * ipif_free() checked ill_src_ipif.) 2079 8485 Peter */ 2080 8485 Peter if (IS_IPMP(ill) && ipif != NULL) { 2081 8485 Peter mutex_enter(&ipif->ipif_ill->ill_lock); 2082 8485 Peter next_ipif = ipif->ipif_next; 2083 11042 Erik if (next_ipif != NULL && !IPIF_IS_CONDEMNED(next_ipif)) 2084 8485 Peter ill->ill_src_ipif = next_ipif; 2085 8485 Peter else 2086 8485 Peter ill->ill_src_ipif = NULL; 2087 8485 Peter mutex_exit(&ipif->ipif_ill->ill_lock); 2088 0 stevel } 2089 0 stevel 2090 0 stevel /* 2091 8485 Peter * Only one ill to consider if dst_restrict_ill is set. 2092 0 stevel */ 2093 8485 Peter if (dstinfo.dst_restrict_ill) 2094 8485 Peter break; 2095 0 stevel } 2096 0 stevel 2097 0 stevel ipif = best_c.cand_ipif; 2098 0 stevel ip1dbg(("ipif_select_source_v6(%s, %s) -> %s\n", 2099 0 stevel dstinfo.dst_ill->ill_name, 2100 0 stevel inet_ntop(AF_INET6, dstinfo.dst_addr, dstr, sizeof (dstr)), 2101 0 stevel (ipif == NULL ? "NULL" : 2102 0 stevel inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, sstr, sizeof (sstr))))); 2103 0 stevel 2104 0 stevel if (usesrc_ill != NULL) 2105 0 stevel ill_refrele(usesrc_ill); 2106 8485 Peter 2107 8485 Peter if (ipmp_ill != NULL) 2108 8485 Peter ill_refrele(ipmp_ill); 2109 1676 jpk 2110 1676 jpk if (dst_rhtp != NULL) 2111 1676 jpk TPC_RELE(dst_rhtp); 2112 0 stevel 2113 0 stevel if (ipif == NULL) { 2114 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 2115 0 stevel return (NULL); 2116 0 stevel } 2117 0 stevel 2118 0 stevel mutex_enter(&ipif->ipif_ill->ill_lock); 2119 11042 Erik if (!IPIF_IS_CONDEMNED(ipif)) { 2120 0 stevel ipif_refhold_locked(ipif); 2121 0 stevel mutex_exit(&ipif->ipif_ill->ill_lock); 2122 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 2123 0 stevel return (ipif); 2124 0 stevel } 2125 0 stevel mutex_exit(&ipif->ipif_ill->ill_lock); 2126 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 2127 0 stevel ip1dbg(("ipif_select_source_v6 cannot lookup ipif %p" 2128 0 stevel " returning null \n", (void *)ipif)); 2129 0 stevel 2130 0 stevel return (NULL); 2131 0 stevel } 2132 0 stevel 2133 0 stevel /* 2134 11042 Erik * Pick a source address based on the destination ill and an optional setsrc 2135 11042 Erik * address. 2136 11042 Erik * The result is stored in srcp. If generation is set, then put the source 2137 11042 Erik * generation number there before we look for the source address (to avoid 2138 11042 Erik * missing changes in the set of source addresses. 2139 11042 Erik * If flagsp is set, then us it to pass back ipif_flags. 2140 0 stevel * 2141 11042 Erik * If the caller wants to cache the returned source address and detect when 2142 11042 Erik * that might be stale, the caller should pass in a generation argument, 2143 11042 Erik * which the caller can later compare against ips_src_generation 2144 11042 Erik * 2145 11042 Erik * The precedence order for selecting an IPv6 source address is: 2146 11042 Erik * - RTF_SETSRC on the first ire in the recursive lookup always wins. 2147 11042 Erik * - If usrsrc is set, swap the ill to be the usesrc one. 2148 11042 Erik * - If IPMP is used on the ill, select a random address from the most 2149 11042 Erik * preferred ones below: 2150 11042 Erik * That is followed by the long list of IPv6 source address selection rules 2151 11042 Erik * starting with rule_isdst(), rule_scope(), etc. 2152 11042 Erik * 2153 11042 Erik * We have lower preference for ALL_ZONES IP addresses, 2154 11042 Erik * as they pose problems with unlabeled destinations. 2155 11042 Erik * 2156 11042 Erik * Note that when multiple IP addresses match e.g., with rule_scope() we pick 2157 11042 Erik * the first one if IPMP is not in use. With IPMP we randomize. 2158 0 stevel */ 2159 11042 Erik int 2160 11042 Erik ip_select_source_v6(ill_t *ill, const in6_addr_t *setsrc, const in6_addr_t *dst, 2161 11042 Erik zoneid_t zoneid, ip_stack_t *ipst, uint_t restrict_ill, uint32_t src_prefs, 2162 11042 Erik in6_addr_t *srcp, uint32_t *generation, uint64_t *flagsp) 2163 0 stevel { 2164 11042 Erik ipif_t *ipif; 2165 11042 Erik boolean_t notready = B_FALSE; /* Set if !ipif_addr_ready found */ 2166 0 stevel 2167 11042 Erik if (flagsp != NULL) 2168 11042 Erik *flagsp = 0; 2169 0 stevel 2170 11042 Erik /* 2171 11042 Erik * Need to grab the generation number before we check to 2172 11042 Erik * avoid a race with a change to the set of local addresses. 2173 11042 Erik * No lock needed since the thread which updates the set of local 2174 11042 Erik * addresses use ipif/ill locks and exit those (hence a store memory 2175 11042 Erik * barrier) before doing the atomic increase of ips_src_generation. 2176 11042 Erik */ 2177 11042 Erik if (generation != NULL) { 2178 11042 Erik *generation = ipst->ips_src_generation; 2179 0 stevel } 2180 0 stevel 2181 11042 Erik /* Was RTF_SETSRC set on the first IRE in the recursive lookup? */ 2182 11042 Erik if (setsrc != NULL && !IN6_IS_ADDR_UNSPECIFIED(setsrc)) { 2183 11042 Erik *srcp = *setsrc; 2184 11042 Erik return (0); 2185 0 stevel } 2186 0 stevel 2187 11042 Erik ipif = ipif_select_source_v6(ill, dst, restrict_ill, src_prefs, zoneid, 2188 11042 Erik B_TRUE, ¬ready); 2189 11042 Erik if (ipif == NULL) { 2190 11042 Erik if (notready) 2191 11042 Erik return (ENETDOWN); 2192 11042 Erik else 2193 11042 Erik return (EADDRNOTAVAIL); 2194 0 stevel } 2195 11042 Erik *srcp = ipif->ipif_v6lcl_addr; 2196 11042 Erik if (flagsp != NULL) 2197 11042 Erik *flagsp = ipif->ipif_flags; 2198 11042 Erik ipif_refrele(ipif); 2199 11042 Erik return (0); 2200 0 stevel } 2201 0 stevel 2202 0 stevel /* 2203 0 stevel * Perform an attach and bind to get phys addr plus info_req for 2204 0 stevel * the physical device. 2205 0 stevel * q and mp represents an ioctl which will be queued waiting for 2206 0 stevel * completion of the DLPI message exchange. 2207 11042 Erik * MUST be called on an ill queue. 2208 0 stevel * 2209 11042 Erik * Returns EINPROGRESS when mp has been consumed by queueing it. 2210 11042 Erik * The ioctl will complete in ip_rput. 2211 0 stevel */ 2212 0 stevel int 2213 0 stevel ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) 2214 0 stevel { 2215 0 stevel mblk_t *v6token_mp = NULL; 2216 0 stevel mblk_t *v6lla_mp = NULL; 2217 10616 Sebastien mblk_t *dest_mp = NULL; 2218 0 stevel mblk_t *phys_mp = NULL; 2219 0 stevel mblk_t *info_mp = NULL; 2220 0 stevel mblk_t *attach_mp = NULL; 2221 0 stevel mblk_t *bind_mp = NULL; 2222 0 stevel mblk_t *unbind_mp = NULL; 2223 0 stevel mblk_t *notify_mp = NULL; 2224 11076 Cathy mblk_t *capab_mp = NULL; 2225 0 stevel 2226 0 stevel ip1dbg(("ill_dl_phys(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 2227 0 stevel ASSERT(ill->ill_dlpi_style_set); 2228 0 stevel ASSERT(WR(q)->q_next != NULL); 2229 0 stevel 2230 0 stevel if (ill->ill_isv6) { 2231 0 stevel v6token_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2232 0 stevel sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2233 0 stevel if (v6token_mp == NULL) 2234 0 stevel goto bad; 2235 0 stevel ((dl_phys_addr_req_t *)v6token_mp->b_rptr)->dl_addr_type = 2236 0 stevel DL_IPV6_TOKEN; 2237 0 stevel 2238 0 stevel v6lla_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2239 0 stevel sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2240 0 stevel if (v6lla_mp == NULL) 2241 0 stevel goto bad; 2242 0 stevel ((dl_phys_addr_req_t *)v6lla_mp->b_rptr)->dl_addr_type = 2243 0 stevel DL_IPV6_LINK_LAYER_ADDR; 2244 10616 Sebastien } 2245 10616 Sebastien 2246 10616 Sebastien if (ill->ill_mactype == DL_IPV4 || ill->ill_mactype == DL_IPV6) { 2247 10616 Sebastien dest_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2248 10616 Sebastien sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2249 10616 Sebastien if (dest_mp == NULL) 2250 10616 Sebastien goto bad; 2251 10616 Sebastien ((dl_phys_addr_req_t *)dest_mp->b_rptr)->dl_addr_type = 2252 10616 Sebastien DL_CURR_DEST_ADDR; 2253 0 stevel } 2254 0 stevel 2255 0 stevel /* 2256 0 stevel * Allocate a DL_NOTIFY_REQ and set the notifications we want. 2257 0 stevel */ 2258 0 stevel notify_mp = ip_dlpi_alloc(sizeof (dl_notify_req_t) + sizeof (long), 2259 0 stevel DL_NOTIFY_REQ); 2260 0 stevel if (notify_mp == NULL) 2261 0 stevel goto bad; 2262 0 stevel ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = 2263 0 stevel (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH | 2264 9073 Cathy DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG | 2265 9743 Girish DL_NOTE_PROMISC_ON_PHYS | DL_NOTE_PROMISC_OFF_PHYS | 2266 9073 Cathy DL_NOTE_REPLUMB); 2267 0 stevel 2268 0 stevel phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2269 0 stevel sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2270 0 stevel if (phys_mp == NULL) 2271 0 stevel goto bad; 2272 0 stevel ((dl_phys_addr_req_t *)phys_mp->b_rptr)->dl_addr_type = 2273 0 stevel DL_CURR_PHYS_ADDR; 2274 0 stevel 2275 0 stevel info_mp = ip_dlpi_alloc( 2276 0 stevel sizeof (dl_info_req_t) + sizeof (dl_info_ack_t), 2277 0 stevel DL_INFO_REQ); 2278 0 stevel if (info_mp == NULL) 2279 0 stevel goto bad; 2280 0 stevel 2281 11076 Cathy ASSERT(ill->ill_dlpi_capab_state == IDCS_UNKNOWN); 2282 11076 Cathy capab_mp = ip_dlpi_alloc(sizeof (dl_capability_req_t), 2283 11076 Cathy DL_CAPABILITY_REQ); 2284 11076 Cathy if (capab_mp == NULL) 2285 11076 Cathy goto bad; 2286 11076 Cathy 2287 0 stevel bind_mp = ip_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long), 2288 0 stevel DL_BIND_REQ); 2289 0 stevel if (bind_mp == NULL) 2290 0 stevel goto bad; 2291 0 stevel ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ill->ill_sap; 2292 0 stevel ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 2293 0 stevel 2294 0 stevel unbind_mp = ip_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ); 2295 0 stevel if (unbind_mp == NULL) 2296 0 stevel goto bad; 2297 0 stevel 2298 4360 meem /* If we need to attach, pre-alloc and initialize the mblk */ 2299 0 stevel if (ill->ill_needs_attach) { 2300 0 stevel attach_mp = ip_dlpi_alloc(sizeof (dl_attach_req_t), 2301 0 stevel DL_ATTACH_REQ); 2302 0 stevel if (attach_mp == NULL) 2303 0 stevel goto bad; 2304 0 stevel ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = ill->ill_ppa; 2305 0 stevel } 2306 0 stevel 2307 0 stevel /* 2308 0 stevel * Here we are going to delay the ioctl ack until after 2309 0 stevel * ACKs from DL_PHYS_ADDR_REQ. So need to save the 2310 0 stevel * original ioctl message before sending the requests 2311 0 stevel */ 2312 0 stevel mutex_enter(&ill->ill_lock); 2313 0 stevel /* ipsq_pending_mp_add won't fail since we pass in a NULL connp */ 2314 0 stevel (void) ipsq_pending_mp_add(NULL, ipif, ill->ill_wq, mp, 0); 2315 0 stevel /* 2316 0 stevel * Set ill_phys_addr_pend to zero. It will be set to the addr_type of 2317 0 stevel * the DL_PHYS_ADDR_REQ in ill_dlpi_send() and ill_dlpi_done(). It will 2318 0 stevel * be used to track which DL_PHYS_ADDR_REQ is being ACK'd/NAK'd. 2319 0 stevel */ 2320 0 stevel ill->ill_phys_addr_pend = 0; 2321 0 stevel mutex_exit(&ill->ill_lock); 2322 0 stevel 2323 0 stevel if (attach_mp != NULL) { 2324 0 stevel ip1dbg(("ill_dl_phys: attach\n")); 2325 0 stevel ill_dlpi_send(ill, attach_mp); 2326 0 stevel } 2327 0 stevel ill_dlpi_send(ill, bind_mp); 2328 0 stevel ill_dlpi_send(ill, info_mp); 2329 11076 Cathy 2330 11076 Cathy /* 2331 11076 Cathy * Send the capability request to get the VRRP capability information. 2332 11076 Cathy */ 2333 11076 Cathy ill_capability_send(ill, capab_mp); 2334 11076 Cathy 2335 10616 Sebastien if (v6token_mp != NULL) 2336 0 stevel ill_dlpi_send(ill, v6token_mp); 2337 10616 Sebastien if (v6lla_mp != NULL) 2338 0 stevel ill_dlpi_send(ill, v6lla_mp); 2339 10616 Sebastien if (dest_mp != NULL) 2340 10616 Sebastien ill_dlpi_send(ill, dest_mp); 2341 0 stevel ill_dlpi_send(ill, phys_mp); 2342 0 stevel ill_dlpi_send(ill, notify_mp); 2343 0 stevel ill_dlpi_send(ill, unbind_mp); 2344 0 stevel 2345 0 stevel /* 2346 0 stevel * This operation will complete in ip_rput_dlpi_writer with either 2347 0 stevel * a DL_PHYS_ADDR_ACK or DL_ERROR_ACK. 2348 0 stevel */ 2349 0 stevel return (EINPROGRESS); 2350 0 stevel bad: 2351 4360 meem freemsg(v6token_mp); 2352 4360 meem freemsg(v6lla_mp); 2353 10616 Sebastien freemsg(dest_mp); 2354 4360 meem freemsg(phys_mp); 2355 4360 meem freemsg(info_mp); 2356 4360 meem freemsg(attach_mp); 2357 4360 meem freemsg(bind_mp); 2358 11076 Cathy freemsg(capab_mp); 2359 4360 meem freemsg(unbind_mp); 2360 4360 meem freemsg(notify_mp); 2361 0 stevel return (ENOMEM); 2362 0 stevel } 2363 0 stevel 2364 11042 Erik /* Add room for tcp+ip headers */ 2365 0 stevel uint_t ip_loopback_mtu_v6plus = IP_LOOPBACK_MTU + IPV6_HDR_LEN + 20; 2366 0 stevel 2367 0 stevel /* 2368 0 stevel * DLPI is up. 2369 0 stevel * Create all the IREs associated with an interface bring up multicast. 2370 0 stevel * Set the interface flag and finish other initialization 2371 0 stevel * that potentially had to be differed to after DL_BIND_ACK. 2372 0 stevel */ 2373 0 stevel int 2374 0 stevel ipif_up_done_v6(ipif_t *ipif) 2375 0 stevel { 2376 0 stevel ill_t *ill = ipif->ipif_ill; 2377 0 stevel int err; 2378 0 stevel boolean_t loopback = B_FALSE; 2379 0 stevel 2380 0 stevel ip1dbg(("ipif_up_done_v6(%s:%u)\n", 2381 4459 kcpoon ipif->ipif_ill->ill_name, ipif->ipif_id)); 2382 11042 Erik DTRACE_PROBE3(ipif__downup, char *, "ipif_up_done_v6", 2383 11042 Erik ill_t *, ill, ipif_t *, ipif); 2384 0 stevel 2385 0 stevel /* Check if this is a loopback interface */ 2386 0 stevel if (ipif->ipif_ill->ill_wq == NULL) 2387 0 stevel loopback = B_TRUE; 2388 0 stevel 2389 0 stevel ASSERT(ipif->ipif_isv6); 2390 0 stevel ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 2391 0 stevel 2392 11042 Erik if (IS_LOOPBACK(ill) || ill->ill_net_type == IRE_IF_NORESOLVER) { 2393 11042 Erik nce_t *loop_nce = NULL; 2394 11042 Erik uint16_t flags = (NCE_F_MYADDR | NCE_F_NONUD | NCE_F_AUTHORITY); 2395 0 stevel 2396 0 stevel /* 2397 0 stevel * lo0:1 and subsequent ipifs were marked IRE_LOCAL in 2398 0 stevel * ipif_lookup_on_name(), but in the case of zones we can have 2399 0 stevel * several loopback addresses on lo0. So all the interfaces with 2400 0 stevel * loopback addresses need to be marked IRE_LOOPBACK. 2401 0 stevel */ 2402 0 stevel if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, &ipv6_loopback)) 2403 0 stevel ipif->ipif_ire_type = IRE_LOOPBACK; 2404 0 stevel else 2405 0 stevel ipif->ipif_ire_type = IRE_LOCAL; 2406 11042 Erik if (ill->ill_net_type != IRE_LOOPBACK) 2407 11042 Erik flags |= NCE_F_PUBLISH; 2408 11042 Erik err = nce_lookup_then_add_v6(ill, NULL, 2409 11042 Erik ill->ill_phys_addr_length, 2410 11042 Erik &ipif->ipif_v6lcl_addr, flags, ND_REACHABLE, &loop_nce); 2411 11042 Erik 2412 11042 Erik /* A shared-IP zone sees EEXIST for lo0:N */ 2413 11042 Erik if (err == 0 || err == EEXIST) { 2414 11042 Erik ipif->ipif_added_nce = 1; 2415 11042 Erik loop_nce->nce_ipif_cnt++; 2416 11042 Erik nce_refrele(loop_nce); 2417 11042 Erik err = 0; 2418 11042 Erik } else { 2419 11042 Erik ASSERT(loop_nce == NULL); 2420 11042 Erik return (err); 2421 11042 Erik } 2422 0 stevel } 2423 0 stevel 2424 11042 Erik err = ipif_add_ires_v6(ipif, loopback); 2425 11042 Erik if (err != 0) { 2426 0 stevel /* 2427 11042 Erik * See comments about return value from 2428 11042 Erik * ipif_addr_availability_check() in ipif_add_ires_v6(). 2429 0 stevel */ 2430 11042 Erik if (err != EADDRINUSE) { 2431 11042 Erik ipif_ndp_down(ipif); 2432 11042 Erik } else { 2433 11042 Erik /* 2434 11042 Erik * Make IPMP aware of the deleted ipif so that 2435 11042 Erik * the needed ipmp cleanup (e.g., of ipif_bound_ill) 2436 11042 Erik * can be completed. Note that we do not want to 2437 11042 Erik * destroy the nce that was created on the ipmp_ill 2438 11042 Erik * for the active copy of the duplicate address in 2439 11042 Erik * use. 2440 11042 Erik */ 2441 11042 Erik if (IS_IPMP(ill)) 2442 11042 Erik ipmp_illgrp_del_ipif(ill->ill_grp, ipif); 2443 11042 Erik err = EADDRNOTAVAIL; 2444 0 stevel } 2445 11042 Erik return (err); 2446 0 stevel } 2447 11042 Erik 2448 11042 Erik if (ill->ill_ipif_up_count == 1 && !loopback) { 2449 11042 Erik /* Recover any additional IREs entries for this ill */ 2450 11042 Erik (void) ill_recover_saved_ire(ill); 2451 11042 Erik } 2452 11042 Erik 2453 11042 Erik if (ill->ill_need_recover_multicast) { 2454 11042 Erik /* 2455 11042 Erik * Need to recover all multicast memberships in the driver. 2456 11042 Erik * This had to be deferred until we had attached. 2457 11042 Erik */ 2458 11042 Erik ill_recover_multicast(ill); 2459 11042 Erik } 2460 11042 Erik 2461 11042 Erik if (ill->ill_ipif_up_count == 1) { 2462 11042 Erik /* 2463 11042 Erik * Since the interface is now up, it may now be active. 2464 11042 Erik */ 2465 11042 Erik if (IS_UNDER_IPMP(ill)) 2466 11042 Erik ipmp_ill_refresh_active(ill); 2467 11042 Erik } 2468 11042 Erik 2469 11042 Erik /* Join the allhosts multicast address and the solicited node MC */ 2470 11042 Erik ipif_multicast_up(ipif); 2471 11042 Erik 2472 11042 Erik /* Perhaps ilgs should use this ill */ 2473 11042 Erik update_conn_ill(NULL, ill->ill_ipst); 2474 11042 Erik 2475 11042 Erik if (ipif->ipif_addr_ready) 2476 11042 Erik ipif_up_notify(ipif); 2477 11042 Erik 2478 11042 Erik return (0); 2479 11042 Erik } 2480 11042 Erik 2481 11042 Erik /* 2482 11042 Erik * Add the IREs associated with the ipif. 2483 11042 Erik * Those MUST be explicitly removed in ipif_delete_ires_v6. 2484 11042 Erik */ 2485 11042 Erik static int 2486 11042 Erik ipif_add_ires_v6(ipif_t *ipif, boolean_t loopback) 2487 11042 Erik { 2488 11042 Erik ill_t *ill = ipif->ipif_ill; 2489 11042 Erik ip_stack_t *ipst = ill->ill_ipst; 2490 11042 Erik in6_addr_t v6addr; 2491 11042 Erik in6_addr_t route_mask; 2492 11042 Erik int err; 2493 11042 Erik char buf[INET6_ADDRSTRLEN]; 2494 11042 Erik ire_t *ire_local = NULL; /* LOCAL or LOOPBACK */ 2495 11077 Erik ire_t *ire_if = NULL; 2496 0 stevel 2497 0 stevel if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) && 2498 0 stevel !(ipif->ipif_flags & IPIF_NOLOCAL)) { 2499 1676 jpk 2500 1676 jpk /* 2501 1676 jpk * If we're on a labeled system then make sure that zone- 2502 1676 jpk * private addresses have proper remote host database entries. 2503 1676 jpk */ 2504 1676 jpk if (is_system_labeled() && 2505 1676 jpk ipif->ipif_ire_type != IRE_LOOPBACK) { 2506 1676 jpk if (ip6opt_ls == 0) { 2507 1676 jpk cmn_err(CE_WARN, "IPv6 not enabled " 2508 1676 jpk "via /etc/system"); 2509 1676 jpk return (EINVAL); 2510 1676 jpk } 2511 1676 jpk if (!tsol_check_interface_address(ipif)) 2512 1676 jpk return (EINVAL); 2513 1676 jpk } 2514 1676 jpk 2515 0 stevel /* Register the source address for __sin6_src_id */ 2516 0 stevel err = ip_srcid_insert(&ipif->ipif_v6lcl_addr, 2517 3448 dh155122 ipif->ipif_zoneid, ipst); 2518 0 stevel if (err != 0) { 2519 11042 Erik ip0dbg(("ipif_add_ires_v6: srcid_insert %d\n", err)); 2520 0 stevel return (err); 2521 0 stevel } 2522 0 stevel /* 2523 0 stevel * If the interface address is set, create the LOCAL 2524 0 stevel * or LOOPBACK IRE. 2525 0 stevel */ 2526 11042 Erik ip1dbg(("ipif_add_ires_v6: creating IRE %d for %s\n", 2527 0 stevel ipif->ipif_ire_type, 2528 0 stevel inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 2529 0 stevel buf, sizeof (buf)))); 2530 0 stevel 2531 11042 Erik ire_local = ire_create_v6( 2532 0 stevel &ipif->ipif_v6lcl_addr, /* dest address */ 2533 0 stevel &ipv6_all_ones, /* mask */ 2534 0 stevel NULL, /* no gateway */ 2535 0 stevel ipif->ipif_ire_type, /* LOCAL or LOOPBACK */ 2536 11042 Erik ipif->ipif_ill, /* interface */ 2537 11042 Erik ipif->ipif_zoneid, 2538 11042 Erik ((ipif->ipif_flags & IPIF_PRIVATE) ? 2539 11042 Erik RTF_PRIVATE : 0) | RTF_KERNEL, 2540 3448 dh155122 NULL, 2541 3448 dh155122 ipst); 2542 11042 Erik if (ire_local == NULL) { 2543 11042 Erik ip1dbg(("ipif_up_done_v6: NULL ire_local\n")); 2544 11042 Erik err = ENOMEM; 2545 11042 Erik goto bad; 2546 11042 Erik } 2547 0 stevel } 2548 0 stevel 2549 10616 Sebastien /* Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate. */ 2550 11042 Erik if (!loopback && !(ipif->ipif_flags & IPIF_NOXMIT) && 2551 0 stevel !(IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) && 2552 0 stevel IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))) { 2553 0 stevel /* ipif_v6subnet is ipif_v6pp_dst_addr for pt-pt */ 2554 0 stevel v6addr = ipif->ipif_v6subnet; 2555 0 stevel 2556 0 stevel if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2557 0 stevel route_mask = ipv6_all_ones; 2558 0 stevel } else { 2559 0 stevel route_mask = ipif->ipif_v6net_mask; 2560 0 stevel } 2561 0 stevel 2562 11042 Erik ip1dbg(("ipif_add_ires_v6: creating if IRE %d for %s\n", 2563 0 stevel ill->ill_net_type, 2564 0 stevel inet_ntop(AF_INET6, &v6addr, buf, sizeof (buf)))); 2565 0 stevel 2566 11077 Erik ire_if = ire_create_v6( 2567 0 stevel &v6addr, /* dest pref */ 2568 0 stevel &route_mask, /* mask */ 2569 11042 Erik &ipif->ipif_v6lcl_addr, /* gateway */ 2570 0 stevel ill->ill_net_type, /* IF_[NO]RESOLVER */ 2571 11042 Erik ipif->ipif_ill, 2572 11042 Erik ipif->ipif_zoneid, 2573 11042 Erik ((ipif->ipif_flags & IPIF_PRIVATE) ? 2574 11042 Erik RTF_PRIVATE : 0) | RTF_KERNEL, 2575 3448 dh155122 NULL, 2576 3448 dh155122 ipst); 2577 11077 Erik if (ire_if == NULL) { 2578 11077 Erik ip1dbg(("ipif_up_done: NULL ire_if\n")); 2579 0 stevel err = ENOMEM; 2580 0 stevel goto bad; 2581 0 stevel } 2582 0 stevel } 2583 0 stevel 2584 0 stevel /* 2585 8485 Peter * Need to atomically check for IP address availability under 2586 8485 Peter * ip_addr_avail_lock. ill_g_lock is held as reader to ensure no new 2587 8485 Peter * ills or new ipifs can be added while we are checking availability. 2588 0 stevel */ 2589 3448 dh155122 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2590 3448 dh155122 mutex_enter(&ipst->ips_ip_addr_avail_lock); 2591 0 stevel ill->ill_ipif_up_count++; 2592 0 stevel ipif->ipif_flags |= IPIF_UP; 2593 0 stevel err = ip_addr_availability_check(ipif); 2594 3448 dh155122 mutex_exit(&ipst->ips_ip_addr_avail_lock); 2595 3448 dh155122 rw_exit(&ipst->ips_ill_g_lock); 2596 0 stevel 2597 0 stevel if (err != 0) { 2598 0 stevel /* 2599 0 stevel * Our address may already be up on the same ill. In this case, 2600 0 stevel * the external resolver entry for our ipif replaced the one for 2601 0 stevel * the other ipif. So we don't want to delete it (otherwise the 2602 0 stevel * other ipif would be unable to send packets). 2603 0 stevel * ip_addr_availability_check() identifies this case for us and 2604 11042 Erik * returns EADDRINUSE; Caller must turn it into EADDRNOTAVAIL 2605 0 stevel * which is the expected error code. 2606 9287 Sowmini * 2607 11042 Erik * Note that ipif_ndp_down() will only delete the nce in the 2608 11042 Erik * case when the nce_ipif_cnt drops to 0. 2609 0 stevel */ 2610 0 stevel ill->ill_ipif_up_count--; 2611 0 stevel ipif->ipif_flags &= ~IPIF_UP; 2612 0 stevel goto bad; 2613 0 stevel } 2614 0 stevel 2615 0 stevel /* 2616 8485 Peter * Add in all newly created IREs. 2617 11077 Erik * We add the IRE_INTERFACE before the IRE_LOCAL to ensure 2618 11077 Erik * that lookups find the IRE_LOCAL even if the IRE_INTERFACE is 2619 11077 Erik * a /128 route. 2620 0 stevel */ 2621 11077 Erik if (ire_if != NULL) { 2622 11077 Erik ire_if = ire_add(ire_if); 2623 11077 Erik if (ire_if == NULL) { 2624 11077 Erik err = ENOMEM; 2625 11077 Erik goto bad2; 2626 11077 Erik } 2627 11077 Erik #ifdef DEBUG 2628 11077 Erik ire_refhold_notr(ire_if); 2629 11077 Erik ire_refrele(ire_if); 2630 11077 Erik #endif 2631 11077 Erik } 2632 11042 Erik if (ire_local != NULL) { 2633 11042 Erik ire_local = ire_add(ire_local); 2634 11077 Erik if (ire_local == NULL) { 2635 11077 Erik err = ENOMEM; 2636 11077 Erik goto bad2; 2637 11077 Erik } 2638 11042 Erik #ifdef DEBUG 2639 11077 Erik ire_refhold_notr(ire_local); 2640 11077 Erik ire_refrele(ire_local); 2641 11042 Erik #endif 2642 11042 Erik } 2643 11042 Erik rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2644 11042 Erik if (ire_local != NULL) 2645 11042 Erik ipif->ipif_ire_local = ire_local; 2646 11077 Erik if (ire_if != NULL) 2647 11077 Erik ipif->ipif_ire_if = ire_if; 2648 11042 Erik rw_exit(&ipst->ips_ill_g_lock); 2649 11042 Erik ire_local = NULL; 2650 11077 Erik ire_if = NULL; 2651 0 stevel 2652 8023 Phil if (ipif->ipif_addr_ready) 2653 8023 Phil ipif_up_notify(ipif); 2654 0 stevel return (0); 2655 0 stevel 2656 11077 Erik bad2: 2657 11077 Erik ill->ill_ipif_up_count--; 2658 11077 Erik ipif->ipif_flags &= ~IPIF_UP; 2659 11077 Erik 2660 0 stevel bad: 2661 11042 Erik if (ire_local != NULL) 2662 11042 Erik ire_delete(ire_local); 2663 11077 Erik if (ire_if != NULL) 2664 11077 Erik ire_delete(ire_if); 2665 11077 Erik 2666 11077 Erik rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2667 11077 Erik ire_local = ipif->ipif_ire_local; 2668 11077 Erik ipif->ipif_ire_local = NULL; 2669 11077 Erik ire_if = ipif->ipif_ire_if; 2670 11077 Erik ipif->ipif_ire_if = NULL; 2671 11077 Erik rw_exit(&ipst->ips_ill_g_lock); 2672 11077 Erik if (ire_local != NULL) { 2673 11077 Erik ire_delete(ire_local); 2674 11077 Erik ire_refrele_notr(ire_local); 2675 11077 Erik } 2676 11077 Erik if (ire_if != NULL) { 2677 11077 Erik ire_delete(ire_if); 2678 11077 Erik ire_refrele_notr(ire_if); 2679 0 stevel } 2680 3448 dh155122 (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst); 2681 0 stevel 2682 0 stevel return (err); 2683 0 stevel } 2684 0 stevel 2685 11042 Erik /* Remove all the IREs created by ipif_add_ires_v6 */ 2686 11042 Erik void 2687 11042 Erik ipif_delete_ires_v6(ipif_t *ipif) 2688 11042 Erik { 2689 11042 Erik ill_t *ill = ipif->ipif_ill; 2690 11042 Erik ip_stack_t *ipst = ill->ill_ipst; 2691 11042 Erik ire_t *ire; 2692 11042 Erik 2693 11042 Erik rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2694 11077 Erik ire = ipif->ipif_ire_local; 2695 11077 Erik ipif->ipif_ire_local = NULL; 2696 11077 Erik rw_exit(&ipst->ips_ill_g_lock); 2697 11077 Erik if (ire != NULL) { 2698 11042 Erik /* 2699 11042 Erik * Move count to ipif so we don't loose the count due to 2700 11042 Erik * a down/up dance. 2701 11042 Erik */ 2702 11042 Erik atomic_add_32(&ipif->ipif_ib_pkt_count, ire->ire_ib_pkt_count); 2703 11042 Erik 2704 11042 Erik ire_delete(ire); 2705 11042 Erik ire_refrele_notr(ire); 2706 11042 Erik } 2707 11077 Erik rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2708 11077 Erik ire = ipif->ipif_ire_if; 2709 11077 Erik ipif->ipif_ire_if = NULL; 2710 11077 Erik rw_exit(&ipst->ips_ill_g_lock); 2711 11077 Erik if (ire != NULL) { 2712 11042 Erik ire_delete(ire); 2713 11077 Erik ire_refrele_notr(ire); 2714 11042 Erik } 2715 11042 Erik } 2716 11042 Erik 2717 0 stevel /* 2718 11042 Erik * Delete an ND entry if it exists. 2719 0 stevel */ 2720 0 stevel /* ARGSUSED */ 2721 0 stevel int 2722 0 stevel ip_siocdelndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2723 0 stevel ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2724 0 stevel { 2725 0 stevel sin6_t *sin6; 2726 0 stevel struct lifreq *lifr; 2727 0 stevel lif_nd_req_t *lnr; 2728 8485 Peter ill_t *ill = ipif->ipif_ill; 2729 11042 Erik nce_t *nce; 2730 0 stevel 2731 8485 Peter lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2732 0 stevel lnr = &lifr->lifr_nd; 2733 0 stevel /* Only allow for logical unit zero i.e. not on "le0:17" */ 2734 0 stevel if (ipif->ipif_id != 0) 2735 0 stevel return (EINVAL); 2736 0 stevel 2737 0 stevel if (!ipif->ipif_isv6) 2738 0 stevel return (EINVAL); 2739 0 stevel 2740 0 stevel if (lnr->lnr_addr.ss_family != AF_INET6) 2741 0 stevel return (EAFNOSUPPORT); 2742 0 stevel 2743 0 stevel sin6 = (sin6_t *)&lnr->lnr_addr; 2744 8485 Peter 2745 8485 Peter /* 2746 8485 Peter * Since ND mappings must be consistent across an IPMP group, prohibit 2747 11042 Erik * deleting ND mappings on underlying interfaces. 2748 11042 Erik * Don't allow deletion of mappings for local addresses. 2749 8485 Peter */ 2750 8485 Peter if (IS_UNDER_IPMP(ill)) 2751 8485 Peter return (EPERM); 2752 8485 Peter 2753 11042 Erik nce = nce_lookup_v6(ill, &sin6->sin6_addr); 2754 11042 Erik if (nce == NULL) 2755 11042 Erik return (ESRCH); 2756 11042 Erik 2757 11042 Erik if (NCE_MYADDR(nce->nce_common)) { 2758 11042 Erik nce_refrele(nce); 2759 11042 Erik return (EPERM); 2760 8485 Peter } 2761 8485 Peter 2762 11042 Erik /* 2763 11042 Erik * delete the nce_common which will also delete the nces on any 2764 11042 Erik * under_ill in the case of ipmp. 2765 11042 Erik */ 2766 11042 Erik ncec_delete(nce->nce_common); 2767 11042 Erik nce_refrele(nce); 2768 0 stevel return (0); 2769 0 stevel } 2770 0 stevel 2771 0 stevel /* 2772 0 stevel * Return nbr cache info. 2773 0 stevel */ 2774 0 stevel /* ARGSUSED */ 2775 0 stevel int 2776 0 stevel ip_siocqueryndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2777 0 stevel ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2778 0 stevel { 2779 0 stevel ill_t *ill = ipif->ipif_ill; 2780 0 stevel struct lifreq *lifr; 2781 0 stevel lif_nd_req_t *lnr; 2782 0 stevel 2783 0 stevel lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2784 0 stevel lnr = &lifr->lifr_nd; 2785 0 stevel /* Only allow for logical unit zero i.e. not on "le0:17" */ 2786 0 stevel if (ipif->ipif_id != 0) 2787 0 stevel return (EINVAL); 2788 0 stevel 2789 0 stevel if (!ipif->ipif_isv6) 2790 0 stevel return (EINVAL); 2791 0 stevel 2792 0 stevel if (lnr->lnr_addr.ss_family != AF_INET6) 2793 0 stevel return (EAFNOSUPPORT); 2794 0 stevel 2795 0 stevel if (ill->ill_phys_addr_length > sizeof (lnr->lnr_hdw_addr)) 2796 0 stevel return (EINVAL); 2797 0 stevel 2798 0 stevel return (ndp_query(ill, lnr)); 2799 0 stevel } 2800 0 stevel 2801 0 stevel /* 2802 0 stevel * Perform an update of the nd entry for the specified address. 2803 0 stevel */ 2804 0 stevel /* ARGSUSED */ 2805 0 stevel int 2806 0 stevel ip_siocsetndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2807 0 stevel ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2808 0 stevel { 2809 8485 Peter sin6_t *sin6; 2810 0 stevel ill_t *ill = ipif->ipif_ill; 2811 0 stevel struct lifreq *lifr; 2812 0 stevel lif_nd_req_t *lnr; 2813 8485 Peter ire_t *ire; 2814 3448 dh155122 2815 0 stevel lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2816 0 stevel lnr = &lifr->lifr_nd; 2817 0 stevel /* Only allow for logical unit zero i.e. not on "le0:17" */ 2818 0 stevel if (ipif->ipif_id != 0) 2819 0 stevel return (EINVAL); 2820 0 stevel 2821 0 stevel if (!ipif->ipif_isv6) 2822 0 stevel return (EINVAL); 2823 0 stevel 2824 0 stevel if (lnr->lnr_addr.ss_family != AF_INET6) 2825 0 stevel return (EAFNOSUPPORT); 2826 0 stevel 2827 8485 Peter sin6 = (sin6_t *)&lnr->lnr_addr; 2828 8485 Peter 2829 8485 Peter /* 2830 8485 Peter * Since ND mappings must be consistent across an IPMP group, prohibit 2831 8485 Peter * updating ND mappings on underlying interfaces. Also, since ND 2832 8485 Peter * mappings for IPMP data addresses are owned by IP itself, prohibit 2833 8485 Peter * updating them. 2834 8485 Peter */ 2835 8485 Peter if (IS_UNDER_IPMP(ill)) 2836 8485 Peter return (EPERM); 2837 8485 Peter 2838 8485 Peter if (IS_IPMP(ill)) { 2839 11042 Erik ire = ire_ftable_lookup_v6(&sin6->sin6_addr, NULL, NULL, 2840 11042 Erik IRE_LOCAL, ill, ALL_ZONES, NULL, 2841 11042 Erik MATCH_IRE_TYPE | MATCH_IRE_ILL, 0, ill->ill_ipst, NULL); 2842 8485 Peter if (ire != NULL) { 2843 8485 Peter ire_refrele(ire); 2844 8485 Peter return (EPERM); 2845 8485 Peter } 2846 8485 Peter } 2847 8485 Peter 2848 0 stevel return (ndp_sioc_update(ill, lnr)); 2849 0 stevel } 2850