1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1676 jpk * Common Development and Distribution License (the "License"). 6 1676 jpk * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 8485 Peter * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel /* 26 0 stevel * Copyright (c) 1990 Mentat Inc. 27 0 stevel */ 28 0 stevel 29 0 stevel /* 30 0 stevel * This file contains routines that manipulate Internet Routing Entries (IREs). 31 0 stevel */ 32 0 stevel #include <sys/types.h> 33 0 stevel #include <sys/stream.h> 34 0 stevel #include <sys/stropts.h> 35 0 stevel #include <sys/ddi.h> 36 0 stevel #include <sys/cmn_err.h> 37 0 stevel 38 0 stevel #include <sys/systm.h> 39 0 stevel #include <sys/param.h> 40 0 stevel #include <sys/socket.h> 41 0 stevel #include <net/if.h> 42 0 stevel #include <net/route.h> 43 0 stevel #include <netinet/in.h> 44 0 stevel #include <net/if_dl.h> 45 0 stevel #include <netinet/ip6.h> 46 0 stevel #include <netinet/icmp6.h> 47 0 stevel 48 0 stevel #include <inet/common.h> 49 0 stevel #include <inet/mi.h> 50 0 stevel #include <inet/ip.h> 51 0 stevel #include <inet/ip6.h> 52 0 stevel #include <inet/ip_ndp.h> 53 0 stevel #include <inet/ip_if.h> 54 0 stevel #include <inet/ip_ire.h> 55 0 stevel #include <inet/ipclassifier.h> 56 0 stevel #include <inet/nd.h> 57 0 stevel #include <sys/kmem.h> 58 0 stevel #include <sys/zone.h> 59 1676 jpk 60 1676 jpk #include <sys/tsol/label.h> 61 1676 jpk #include <sys/tsol/tnet.h> 62 0 stevel 63 11042 Erik #define IS_DEFAULT_ROUTE_V6(ire) \ 64 11042 Erik (((ire)->ire_type & IRE_DEFAULT) || \ 65 11042 Erik (((ire)->ire_type & IRE_INTERFACE) && \ 66 11042 Erik (IN6_IS_ADDR_UNSPECIFIED(&(ire)->ire_addr_v6)))) 67 11042 Erik 68 0 stevel static ire_t ire_null; 69 0 stevel 70 11042 Erik static ire_t * 71 11042 Erik ire_ftable_lookup_impl_v6(const in6_addr_t *addr, const in6_addr_t *mask, 72 11042 Erik const in6_addr_t *gateway, int type, const ill_t *ill, 73 11042 Erik zoneid_t zoneid, const ts_label_t *tsl, int flags, 74 11042 Erik ip_stack_t *ipst); 75 0 stevel 76 0 stevel /* 77 0 stevel * Initialize the ire that is specific to IPv6 part and call 78 0 stevel * ire_init_common to finish it. 79 11042 Erik * Returns zero or errno. 80 0 stevel */ 81 11042 Erik int 82 4714 sowmini ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 83 11042 Erik const in6_addr_t *v6gateway, ushort_t type, ill_t *ill, 84 11042 Erik zoneid_t zoneid, uint_t flags, tsol_gc_t *gc, ip_stack_t *ipst) 85 0 stevel { 86 11042 Erik int error; 87 2535 sangeeta 88 1676 jpk /* 89 11042 Erik * Reject IRE security attmakeribute creation/initialization 90 1676 jpk * if system is not running in Trusted mode. 91 1676 jpk */ 92 11042 Erik if (gc != NULL && !is_system_labeled()) 93 11042 Erik return (EINVAL); 94 0 stevel 95 3448 dh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 96 11042 Erik if (v6addr != NULL) 97 11042 Erik ire->ire_addr_v6 = *v6addr; 98 0 stevel if (v6gateway != NULL) 99 0 stevel ire->ire_gateway_addr_v6 = *v6gateway; 100 0 stevel 101 11042 Erik /* Make sure we don't have stray values in some fields */ 102 11042 Erik switch (type) { 103 11042 Erik case IRE_LOOPBACK: 104 11042 Erik ire->ire_gateway_addr_v6 = ire->ire_addr_v6; 105 11042 Erik /* FALLTHRU */ 106 11042 Erik case IRE_HOST: 107 11042 Erik case IRE_LOCAL: 108 11042 Erik case IRE_IF_CLONE: 109 11042 Erik ire->ire_mask_v6 = ipv6_all_ones; 110 11042 Erik ire->ire_masklen = IPV6_ABITS; 111 11042 Erik break; 112 11042 Erik case IRE_PREFIX: 113 11042 Erik case IRE_DEFAULT: 114 11042 Erik case IRE_IF_RESOLVER: 115 11042 Erik case IRE_IF_NORESOLVER: 116 11042 Erik if (v6mask != NULL) { 117 11042 Erik ire->ire_mask_v6 = *v6mask; 118 11042 Erik ire->ire_masklen = 119 11042 Erik ip_mask_to_plen_v6(&ire->ire_mask_v6); 120 11042 Erik } 121 11042 Erik break; 122 11042 Erik case IRE_MULTICAST: 123 11042 Erik case IRE_NOROUTE: 124 11042 Erik ASSERT(v6mask == NULL); 125 11042 Erik break; 126 11042 Erik default: 127 11042 Erik ASSERT(0); 128 11042 Erik return (EINVAL); 129 0 stevel } 130 0 stevel 131 11042 Erik error = ire_init_common(ire, type, ill, zoneid, flags, IPV6_VERSION, 132 11042 Erik gc, ipst); 133 11042 Erik if (error != NULL) 134 11042 Erik return (error); 135 0 stevel 136 11042 Erik /* Determine which function pointers to use */ 137 11042 Erik ire->ire_postfragfn = ip_xmit; /* Common case */ 138 0 stevel 139 11042 Erik switch (ire->ire_type) { 140 11042 Erik case IRE_LOCAL: 141 11042 Erik ire->ire_sendfn = ire_send_local_v6; 142 11042 Erik ire->ire_recvfn = ire_recv_local_v6; 143 11042 Erik ASSERT(ire->ire_ill != NULL); 144 11076 Cathy if (ire->ire_ill->ill_flags & ILLF_NOACCEPT) 145 11042 Erik ire->ire_recvfn = ire_recv_noaccept_v6; 146 11042 Erik break; 147 11042 Erik case IRE_LOOPBACK: 148 11042 Erik ire->ire_sendfn = ire_send_local_v6; 149 11042 Erik ire->ire_recvfn = ire_recv_loopback_v6; 150 11042 Erik break; 151 11042 Erik case IRE_MULTICAST: 152 11042 Erik ire->ire_postfragfn = ip_postfrag_loopcheck; 153 11042 Erik ire->ire_sendfn = ire_send_multicast_v6; 154 11042 Erik ire->ire_recvfn = ire_recv_multicast_v6; 155 11042 Erik break; 156 11042 Erik default: 157 11042 Erik /* 158 11042 Erik * For IRE_IF_ALL and IRE_OFFLINK we forward received 159 11042 Erik * packets by default. 160 11042 Erik */ 161 11042 Erik ire->ire_sendfn = ire_send_wire_v6; 162 11042 Erik ire->ire_recvfn = ire_recv_forward_v6; 163 11042 Erik break; 164 0 stevel } 165 11042 Erik if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 166 11042 Erik ire->ire_sendfn = ire_send_noroute_v6; 167 11042 Erik ire->ire_recvfn = ire_recv_noroute_v6; 168 11042 Erik } else if (ire->ire_flags & RTF_MULTIRT) { 169 11042 Erik ire->ire_postfragfn = ip_postfrag_multirt_v6; 170 11042 Erik ire->ire_sendfn = ire_send_multirt_v6; 171 11042 Erik ire->ire_recvfn = ire_recv_multirt_v6; 172 0 stevel } 173 11042 Erik ire->ire_nce_capable = ire_determine_nce_capable(ire); 174 11042 Erik return (0); 175 0 stevel } 176 0 stevel 177 0 stevel /* 178 0 stevel * ire_create_v6 is called to allocate and initialize a new IRE. 179 0 stevel * 180 0 stevel * NOTE : This is called as writer sometimes though not required 181 0 stevel * by this function. 182 0 stevel */ 183 4714 sowmini /* ARGSUSED */ 184 0 stevel ire_t * 185 0 stevel ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 186 11042 Erik const in6_addr_t *v6gateway, ushort_t type, ill_t *ill, zoneid_t zoneid, 187 11042 Erik uint_t flags, tsol_gc_t *gc, ip_stack_t *ipst) 188 0 stevel { 189 0 stevel ire_t *ire; 190 11042 Erik int error; 191 0 stevel 192 0 stevel ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 193 0 stevel 194 0 stevel ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 195 0 stevel if (ire == NULL) { 196 11042 Erik DTRACE_PROBE(kmem__cache__alloc); 197 0 stevel return (NULL); 198 0 stevel } 199 0 stevel *ire = ire_null; 200 0 stevel 201 11042 Erik error = ire_init_v6(ire, v6addr, v6mask, v6gateway, 202 11042 Erik type, ill, zoneid, flags, gc, ipst); 203 0 stevel 204 11042 Erik if (error != 0) { 205 11042 Erik DTRACE_PROBE2(ire__init__v6, ire_t *, ire, int, error); 206 0 stevel kmem_cache_free(ire_cache, ire); 207 0 stevel return (NULL); 208 0 stevel } 209 0 stevel return (ire); 210 0 stevel } 211 0 stevel 212 0 stevel /* 213 11042 Erik * Find the ill matching a multicast group. 214 0 stevel * Allows different routes for multicast addresses 215 0 stevel * in the unicast routing table (akin to FF::0/8 but could be more specific) 216 0 stevel * which point at different interfaces. This is used when IPV6_MULTICAST_IF 217 0 stevel * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 218 0 stevel * specify the interface to join on. 219 0 stevel * 220 11042 Erik * Supports link-local addresses by using ire_route_recursive which follows 221 11042 Erik * the ill when recursing. 222 11042 Erik * 223 11042 Erik * To handle CGTP, since we don't have a separate IRE_MULTICAST for each group 224 11042 Erik * and the MULTIRT property can be different for different groups, we 225 11042 Erik * extract RTF_MULTIRT from the special unicast route added for a group 226 11042 Erik * with CGTP and pass that back in the multirtp argument. 227 11042 Erik * This is used in ip_set_destination etc to set ixa_postfragfn for multicast. 228 11042 Erik * We have a setsrcp argument for the same reason. 229 0 stevel */ 230 11042 Erik ill_t * 231 11042 Erik ire_lookup_multi_ill_v6(const in6_addr_t *group, zoneid_t zoneid, 232 11042 Erik ip_stack_t *ipst, boolean_t *multirtp, in6_addr_t *setsrcp) 233 0 stevel { 234 0 stevel ire_t *ire; 235 11042 Erik ill_t *ill; 236 0 stevel 237 11042 Erik ire = ire_route_recursive_v6(group, 0, NULL, zoneid, NULL, 238 11042 Erik MATCH_IRE_DSTONLY, B_FALSE, 0, ipst, setsrcp, NULL, NULL); 239 11042 Erik ASSERT(ire != NULL); 240 0 stevel 241 11042 Erik if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 242 0 stevel ire_refrele(ire); 243 0 stevel return (NULL); 244 0 stevel } 245 0 stevel 246 11042 Erik if (multirtp != NULL) 247 11042 Erik *multirtp = (ire->ire_flags & RTF_MULTIRT) != 0; 248 0 stevel 249 11042 Erik ill = ire_nexthop_ill(ire); 250 11042 Erik ire_refrele(ire); 251 11042 Erik return (ill); 252 0 stevel } 253 0 stevel 254 0 stevel /* 255 0 stevel * This function takes a mask and returns number of bits set in the 256 0 stevel * mask (the represented prefix length). Assumes a contiguous mask. 257 0 stevel */ 258 0 stevel int 259 0 stevel ip_mask_to_plen_v6(const in6_addr_t *v6mask) 260 0 stevel { 261 0 stevel int bits; 262 0 stevel int plen = IPV6_ABITS; 263 0 stevel int i; 264 0 stevel 265 0 stevel for (i = 3; i >= 0; i--) { 266 0 stevel if (v6mask->s6_addr32[i] == 0) { 267 0 stevel plen -= 32; 268 0 stevel continue; 269 0 stevel } 270 0 stevel bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 271 0 stevel if (bits == 0) 272 0 stevel break; 273 0 stevel plen -= bits; 274 0 stevel } 275 0 stevel 276 0 stevel return (plen); 277 0 stevel } 278 0 stevel 279 0 stevel /* 280 0 stevel * Convert a prefix length to the mask for that prefix. 281 0 stevel * Returns the argument bitmask. 282 0 stevel */ 283 0 stevel in6_addr_t * 284 0 stevel ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 285 0 stevel { 286 0 stevel uint32_t *ptr; 287 0 stevel 288 0 stevel if (plen < 0 || plen > IPV6_ABITS) 289 0 stevel return (NULL); 290 0 stevel *bitmask = ipv6_all_zeros; 291 11042 Erik if (plen == 0) 292 11042 Erik return (bitmask); 293 0 stevel 294 0 stevel ptr = (uint32_t *)bitmask; 295 0 stevel while (plen > 32) { 296 0 stevel *ptr++ = 0xffffffffU; 297 0 stevel plen -= 32; 298 0 stevel } 299 0 stevel *ptr = htonl(0xffffffffU << (32 - plen)); 300 0 stevel return (bitmask); 301 0 stevel } 302 0 stevel 303 0 stevel /* 304 11042 Erik * Add a fully initialized IPv6 IRE to the forwarding table. 305 11042 Erik * This returns NULL on failure, or a held IRE on success. 306 11042 Erik * Normally the returned IRE is the same as the argument. But a different 307 11042 Erik * IRE will be returned if the added IRE is deemed identical to an existing 308 11042 Erik * one. In that case ire_identical_ref will be increased. 309 11042 Erik * The caller always needs to do an ire_refrele() on the returned IRE. 310 0 stevel */ 311 11042 Erik ire_t * 312 11042 Erik ire_add_v6(ire_t *ire) 313 0 stevel { 314 0 stevel ire_t *ire1; 315 0 stevel int mask_table_index; 316 0 stevel irb_t *irb_ptr; 317 0 stevel ire_t **irep; 318 11042 Erik int match_flags; 319 0 stevel int error; 320 3448 dh155122 ip_stack_t *ipst = ire->ire_ipst; 321 0 stevel 322 0 stevel ASSERT(ire->ire_ipversion == IPV6_VERSION); 323 0 stevel 324 0 stevel /* Make sure the address is properly masked. */ 325 0 stevel V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 326 0 stevel 327 11042 Erik mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 328 11042 Erik if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == NULL) { 329 11042 Erik irb_t *ptr; 330 11042 Erik int i; 331 11042 Erik 332 11042 Erik ptr = (irb_t *)mi_zalloc((ipst->ips_ip6_ftable_hash_size * 333 11042 Erik sizeof (irb_t))); 334 11042 Erik if (ptr == NULL) { 335 11042 Erik ire_delete(ire); 336 11042 Erik return (NULL); 337 11042 Erik } 338 11042 Erik for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 339 11042 Erik rw_init(&ptr[i].irb_lock, NULL, RW_DEFAULT, NULL); 340 11042 Erik } 341 11042 Erik mutex_enter(&ipst->ips_ire_ft_init_lock); 342 11042 Erik if (ipst->ips_ip_forwarding_table_v6[mask_table_index] == 343 3448 dh155122 NULL) { 344 11042 Erik ipst->ips_ip_forwarding_table_v6[mask_table_index] = 345 11042 Erik ptr; 346 11042 Erik mutex_exit(&ipst->ips_ire_ft_init_lock); 347 11042 Erik } else { 348 11042 Erik /* 349 11042 Erik * Some other thread won the race in 350 11042 Erik * initializing the forwarding table at the 351 11042 Erik * same index. 352 11042 Erik */ 353 11042 Erik mutex_exit(&ipst->ips_ire_ft_init_lock); 354 11042 Erik for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 355 11042 Erik rw_destroy(&ptr[i].irb_lock); 356 0 stevel } 357 11042 Erik mi_free(ptr); 358 0 stevel } 359 0 stevel } 360 11042 Erik irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 361 11042 Erik IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 362 11042 Erik ipst->ips_ip6_ftable_hash_size)]); 363 0 stevel 364 11042 Erik match_flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 365 11042 Erik if (ire->ire_ill != NULL) 366 11042 Erik match_flags |= MATCH_IRE_ILL; 367 0 stevel /* 368 11042 Erik * Start the atomic add of the ire. Grab the bucket lock and the 369 11042 Erik * ill lock. Check for condemned. 370 0 stevel */ 371 11042 Erik error = ire_atomic_start(irb_ptr, ire); 372 11042 Erik if (error != 0) { 373 11042 Erik ire_delete(ire); 374 11042 Erik return (NULL); 375 11042 Erik } 376 8485 Peter 377 0 stevel /* 378 8485 Peter * If we are creating a hidden IRE, make sure we search for 379 8485 Peter * hidden IREs when searching for duplicates below. 380 8485 Peter * Otherwise, we might find an IRE on some other interface 381 8485 Peter * that's not marked hidden. 382 0 stevel */ 383 11042 Erik if (ire->ire_testhidden) 384 11042 Erik match_flags |= MATCH_IRE_TESTHIDDEN; 385 0 stevel 386 0 stevel /* 387 0 stevel * Atomically check for duplicate and insert in the table. 388 0 stevel */ 389 0 stevel for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 390 11042 Erik if (IRE_IS_CONDEMNED(ire1)) 391 11042 Erik continue; 392 11042 Erik /* 393 11042 Erik * Here we need an exact match on zoneid, i.e., 394 11042 Erik * ire_match_args doesn't fit. 395 11042 Erik */ 396 11042 Erik if (ire1->ire_zoneid != ire->ire_zoneid) 397 0 stevel continue; 398 0 stevel 399 11042 Erik if (ire1->ire_type != ire->ire_type) 400 0 stevel continue; 401 11042 Erik 402 11042 Erik /* 403 11042 Erik * Note: We do not allow multiple routes that differ only 404 11042 Erik * in the gateway security attributes; such routes are 405 11042 Erik * considered duplicates. 406 11042 Erik * To change that we explicitly have to treat them as 407 11042 Erik * different here. 408 11042 Erik */ 409 0 stevel if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 410 0 stevel &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 411 11042 Erik ire->ire_type, ire->ire_ill, ire->ire_zoneid, NULL, 412 11042 Erik match_flags)) { 413 0 stevel /* 414 0 stevel * Return the old ire after doing a REFHOLD. 415 0 stevel * As most of the callers continue to use the IRE 416 0 stevel * after adding, we return a held ire. This will 417 0 stevel * avoid a lookup in the caller again. If the callers 418 0 stevel * don't want to use it, they need to do a REFRELE. 419 0 stevel */ 420 0 stevel ip1dbg(("found dup ire existing %p new %p", 421 0 stevel (void *)ire1, (void *)ire)); 422 11042 Erik ire_refhold(ire1); 423 11042 Erik atomic_add_32(&ire1->ire_identical_ref, 1); 424 0 stevel ire_atomic_end(irb_ptr, ire); 425 0 stevel ire_delete(ire); 426 11042 Erik return (ire1); 427 0 stevel } 428 0 stevel } 429 0 stevel 430 11042 Erik /* 431 11042 Erik * Normally we do head insertion since most things do not care about 432 11042 Erik * the order of the IREs in the bucket. 433 11042 Erik * However, due to shared-IP zones (and restrict_interzone_loopback) 434 11042 Erik * we can have an IRE_LOCAL as well as IRE_IF_CLONE for the same 435 11042 Erik * address. For that reason we do tail insertion for IRE_IF_CLONE. 436 11042 Erik */ 437 11042 Erik irep = (ire_t **)irb_ptr; 438 11042 Erik if (ire->ire_type & IRE_IF_CLONE) { 439 11042 Erik while ((ire1 = *irep) != NULL) 440 11042 Erik irep = &ire1->ire_next; 441 0 stevel } 442 0 stevel /* Insert at *irep */ 443 0 stevel ire1 = *irep; 444 0 stevel if (ire1 != NULL) 445 0 stevel ire1->ire_ptpn = &ire->ire_next; 446 0 stevel ire->ire_next = ire1; 447 0 stevel /* Link the new one in. */ 448 0 stevel ire->ire_ptpn = irep; 449 0 stevel /* 450 0 stevel * ire_walk routines de-reference ire_next without holding 451 0 stevel * a lock. Before we point to the new ire, we want to make 452 0 stevel * sure the store that sets the ire_next of the new ire 453 0 stevel * reaches global visibility, so that ire_walk routines 454 0 stevel * don't see a truncated list of ires i.e if the ire_next 455 0 stevel * of the new ire gets set after we do "*irep = ire" due 456 0 stevel * to re-ordering, the ire_walk thread will see a NULL 457 0 stevel * once it accesses the ire_next of the new ire. 458 0 stevel * membar_producer() makes sure that the following store 459 0 stevel * happens *after* all of the above stores. 460 0 stevel */ 461 0 stevel membar_producer(); 462 0 stevel *irep = ire; 463 0 stevel ire->ire_bucket = irb_ptr; 464 0 stevel /* 465 0 stevel * We return a bumped up IRE above. Keep it symmetrical 466 0 stevel * so that the callers will always have to release. This 467 0 stevel * helps the callers of this function because they continue 468 0 stevel * to use the IRE after adding and hence they don't have to 469 0 stevel * lookup again after we return the IRE. 470 0 stevel * 471 0 stevel * NOTE : We don't have to use atomics as this is appearing 472 0 stevel * in the list for the first time and no one else can bump 473 0 stevel * up the reference count on this yet. 474 0 stevel */ 475 11042 Erik ire_refhold_locked(ire); 476 3448 dh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 477 0 stevel irb_ptr->irb_ire_cnt++; 478 0 stevel 479 11042 Erik if (ire->ire_ill != NULL) { 480 11042 Erik DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ire->ire_ill, 481 6255 sowmini (char *), "ire", (void *), ire); 482 11042 Erik ire->ire_ill->ill_ire_cnt++; 483 11042 Erik ASSERT(ire->ire_ill->ill_ire_cnt != 0); /* Wraparound */ 484 0 stevel } 485 0 stevel ire_atomic_end(irb_ptr, ire); 486 0 stevel 487 11042 Erik /* Make any caching of the IREs be notified or updated */ 488 11042 Erik ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 489 0 stevel 490 11042 Erik return (ire); 491 0 stevel } 492 0 stevel 493 0 stevel /* 494 0 stevel * Search for all HOST REDIRECT routes that are 495 0 stevel * pointing at the specified gateway and 496 0 stevel * delete them. This routine is called only 497 0 stevel * when a default gateway is going away. 498 0 stevel */ 499 0 stevel static void 500 3448 dh155122 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 501 0 stevel { 502 0 stevel irb_t *irb_ptr; 503 0 stevel irb_t *irb; 504 0 stevel ire_t *ire; 505 0 stevel in6_addr_t gw_addr_v6; 506 0 stevel int i; 507 0 stevel 508 0 stevel /* get the hash table for HOST routes */ 509 3448 dh155122 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 510 0 stevel if (irb_ptr == NULL) 511 0 stevel return; 512 3448 dh155122 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 513 0 stevel irb = &irb_ptr[i]; 514 11042 Erik irb_refhold(irb); 515 0 stevel for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 516 3004 dd193516 if (!(ire->ire_flags & RTF_DYNAMIC)) 517 0 stevel continue; 518 0 stevel mutex_enter(&ire->ire_lock); 519 0 stevel gw_addr_v6 = ire->ire_gateway_addr_v6; 520 0 stevel mutex_exit(&ire->ire_lock); 521 0 stevel if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 522 0 stevel ire_delete(ire); 523 0 stevel } 524 11042 Erik irb_refrele(irb); 525 0 stevel } 526 0 stevel } 527 0 stevel 528 0 stevel /* 529 0 stevel * Delete the specified IRE. 530 0 stevel * All calls should use ire_delete(). 531 0 stevel * Sometimes called as writer though not required by this function. 532 0 stevel * 533 0 stevel * NOTE : This function is called only if the ire was added 534 0 stevel * in the list. 535 0 stevel */ 536 0 stevel void 537 0 stevel ire_delete_v6(ire_t *ire) 538 0 stevel { 539 0 stevel in6_addr_t gw_addr_v6; 540 3448 dh155122 ip_stack_t *ipst = ire->ire_ipst; 541 0 stevel 542 11042 Erik /* 543 11042 Erik * Make sure ire_generation increases from ire_flush_cache happen 544 11042 Erik * after any lookup/reader has read ire_generation. 545 11042 Erik * Since the rw_enter makes us wait until any lookup/reader has 546 11042 Erik * completed we can exit the lock immediately. 547 11042 Erik */ 548 11042 Erik rw_enter(&ipst->ips_ip6_ire_head_lock, RW_WRITER); 549 11042 Erik rw_exit(&ipst->ips_ip6_ire_head_lock); 550 11042 Erik 551 0 stevel ASSERT(ire->ire_refcnt >= 1); 552 0 stevel ASSERT(ire->ire_ipversion == IPV6_VERSION); 553 0 stevel 554 11042 Erik ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 555 11042 Erik 556 0 stevel if (ire->ire_type == IRE_DEFAULT) { 557 0 stevel /* 558 0 stevel * when a default gateway is going away 559 0 stevel * delete all the host redirects pointing at that 560 0 stevel * gateway. 561 0 stevel */ 562 0 stevel mutex_enter(&ire->ire_lock); 563 0 stevel gw_addr_v6 = ire->ire_gateway_addr_v6; 564 0 stevel mutex_exit(&ire->ire_lock); 565 3448 dh155122 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 566 0 stevel } 567 11042 Erik 568 11042 Erik /* 569 11042 Erik * If we are deleting an IRE_INTERFACE then we make sure we also 570 11042 Erik * delete any IRE_IF_CLONE that has been created from it. 571 11042 Erik * Those are always in ire_dep_children. 572 11042 Erik */ 573 11042 Erik if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0) 574 11042 Erik ire_dep_delete_if_clone(ire); 575 11042 Erik 576 11042 Erik /* Remove from parent dependencies and child */ 577 11042 Erik rw_enter(&ipst->ips_ire_dep_lock, RW_WRITER); 578 11042 Erik if (ire->ire_dep_parent != NULL) { 579 11042 Erik ire_dep_remove(ire); 580 11042 Erik } 581 11042 Erik while (ire->ire_dep_children != NULL) 582 11042 Erik ire_dep_remove(ire->ire_dep_children); 583 11042 Erik rw_exit(&ipst->ips_ire_dep_lock); 584 0 stevel } 585 0 stevel 586 0 stevel /* 587 11042 Erik * When an IRE is added or deleted this routine is called to make sure 588 11042 Erik * any caching of IRE information is notified or updated. 589 0 stevel * 590 11042 Erik * The flag argument indicates if the flush request is due to addition 591 11042 Erik * of new route (IRE_FLUSH_ADD), deletion of old route (IRE_FLUSH_DELETE), 592 11042 Erik * or a change to ire_gateway_addr (IRE_FLUSH_GWCHANGE). 593 0 stevel */ 594 0 stevel void 595 0 stevel ire_flush_cache_v6(ire_t *ire, int flag) 596 0 stevel { 597 11042 Erik ip_stack_t *ipst = ire->ire_ipst; 598 0 stevel 599 11042 Erik /* 600 11042 Erik * IRE_IF_CLONE ire's don't provide any new information 601 11042 Erik * than the parent from which they are cloned, so don't 602 11042 Erik * perturb the generation numbers. 603 11042 Erik */ 604 11042 Erik if (ire->ire_type & IRE_IF_CLONE) 605 4714 sowmini return; 606 0 stevel 607 0 stevel /* 608 11042 Erik * Ensure that an ire_add during a lookup serializes the updates of 609 11042 Erik * the generation numbers under ire_head_lock so that the lookup gets 610 11042 Erik * either the old ire and old generation number, or a new ire and new 611 11042 Erik * generation number. 612 0 stevel */ 613 11042 Erik rw_enter(&ipst->ips_ip6_ire_head_lock, RW_WRITER); 614 11042 Erik 615 11042 Erik /* 616 11042 Erik * If a route was just added, we need to notify everybody that 617 11042 Erik * has cached an IRE_NOROUTE since there might now be a better 618 11042 Erik * route for them. 619 11042 Erik */ 620 11042 Erik if (flag == IRE_FLUSH_ADD) { 621 11042 Erik ire_increment_generation(ipst->ips_ire_reject_v6); 622 11042 Erik ire_increment_generation(ipst->ips_ire_blackhole_v6); 623 11042 Erik } 624 11042 Erik 625 11042 Erik /* Adding a default can't otherwise provide a better route */ 626 11042 Erik if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) { 627 11042 Erik rw_exit(&ipst->ips_ip6_ire_head_lock); 628 0 stevel return; 629 11042 Erik } 630 11042 Erik 631 11042 Erik switch (flag) { 632 11042 Erik case IRE_FLUSH_DELETE: 633 11042 Erik case IRE_FLUSH_GWCHANGE: 634 0 stevel /* 635 11042 Erik * Update ire_generation for all ire_dep_children chains 636 11042 Erik * starting with this IRE 637 0 stevel */ 638 11042 Erik ire_dep_incr_generation(ire); 639 11042 Erik break; 640 11042 Erik case IRE_FLUSH_ADD: { 641 11042 Erik in6_addr_t addr; 642 11042 Erik in6_addr_t mask; 643 11042 Erik ip_stack_t *ipst = ire->ire_ipst; 644 11042 Erik uint_t masklen; 645 11042 Erik 646 11042 Erik /* 647 11042 Erik * Find an IRE which is a shorter match than the ire to be added 648 11042 Erik * For any such IRE (which we repeat) we update the 649 11042 Erik * ire_generation the same way as in the delete case. 650 11042 Erik */ 651 11042 Erik addr = ire->ire_addr_v6; 652 11042 Erik mask = ire->ire_mask_v6; 653 11042 Erik masklen = ip_mask_to_plen_v6(&mask); 654 11042 Erik 655 11042 Erik ire = ire_ftable_lookup_impl_v6(&addr, &mask, NULL, 0, NULL, 656 11042 Erik ALL_ZONES, NULL, MATCH_IRE_SHORTERMASK, ipst); 657 11042 Erik while (ire != NULL) { 658 11042 Erik /* We need to handle all in the same bucket */ 659 11042 Erik irb_increment_generation(ire->ire_bucket); 660 11042 Erik 661 11042 Erik mask = ire->ire_mask_v6; 662 11042 Erik ASSERT(masklen > ip_mask_to_plen_v6(&mask)); 663 11042 Erik masklen = ip_mask_to_plen_v6(&mask); 664 11042 Erik ire_refrele(ire); 665 11042 Erik ire = ire_ftable_lookup_impl_v6(&addr, &mask, NULL, 0, 666 11042 Erik NULL, ALL_ZONES, NULL, MATCH_IRE_SHORTERMASK, ipst); 667 0 stevel } 668 0 stevel } 669 11042 Erik break; 670 0 stevel } 671 11042 Erik rw_exit(&ipst->ips_ip6_ire_head_lock); 672 0 stevel } 673 0 stevel 674 0 stevel /* 675 0 stevel * Matches the arguments passed with the values in the ire. 676 0 stevel * 677 11042 Erik * Note: for match types that match using "ill" passed in, ill 678 0 stevel * must be checked for non-NULL before calling this routine. 679 0 stevel */ 680 11042 Erik boolean_t 681 0 stevel ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 682 11042 Erik const in6_addr_t *gateway, int type, const ill_t *ill, zoneid_t zoneid, 683 11042 Erik const ts_label_t *tsl, int match_flags) 684 0 stevel { 685 0 stevel in6_addr_t masked_addr; 686 0 stevel in6_addr_t gw_addr_v6; 687 0 stevel ill_t *ire_ill = NULL, *dst_ill; 688 11042 Erik ip_stack_t *ipst = ire->ire_ipst; 689 0 stevel 690 0 stevel ASSERT(ire->ire_ipversion == IPV6_VERSION); 691 0 stevel ASSERT(addr != NULL); 692 0 stevel ASSERT(mask != NULL); 693 0 stevel ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 694 8485 Peter ASSERT((!(match_flags & MATCH_IRE_ILL)) || 695 11042 Erik (ill != NULL && ill->ill_isv6)); 696 0 stevel 697 0 stevel /* 698 11042 Erik * If MATCH_IRE_TESTHIDDEN is set, then only return the IRE if it 699 11042 Erik * is in fact hidden, to ensure the caller gets the right one. 700 0 stevel */ 701 11042 Erik if (ire->ire_testhidden) { 702 11042 Erik if (!(match_flags & MATCH_IRE_TESTHIDDEN)) 703 8485 Peter return (B_FALSE); 704 8485 Peter } 705 0 stevel 706 1676 jpk if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 707 1676 jpk ire->ire_zoneid != ALL_ZONES) { 708 0 stevel /* 709 11042 Erik * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid 710 11042 Erik * does not match that of ire_zoneid, a failure to 711 0 stevel * match is reported at this point. Otherwise, since some IREs 712 0 stevel * that are available in the global zone can be used in local 713 0 stevel * zones, additional checks need to be performed: 714 0 stevel * 715 11042 Erik * IRE_LOOPBACK 716 11042 Erik * entries should never be matched in this situation. 717 11042 Erik * Each zone has its own IRE_LOOPBACK. 718 0 stevel * 719 11042 Erik * IRE_LOCAL 720 11042 Erik * We allow them for any zoneid. ire_route_recursive 721 11042 Erik * does additional checks when 722 11042 Erik * ip_restrict_interzone_loopback is set. 723 0 stevel * 724 11042 Erik * If ill_usesrc_ifindex is set 725 11042 Erik * Then we check if the zone has a valid source address 726 11042 Erik * on the usesrc ill. 727 0 stevel * 728 11042 Erik * If ire_ill is set, then check that the zone has an ipif 729 11042 Erik * on that ill. 730 11042 Erik * 731 11042 Erik * Outside of this function (in ire_round_robin) we check 732 11042 Erik * that any IRE_OFFLINK has a gateway that reachable from the 733 11042 Erik * zone when we have multiple choices (ECMP). 734 0 stevel */ 735 0 stevel if (match_flags & MATCH_IRE_ZONEONLY) 736 0 stevel return (B_FALSE); 737 11042 Erik if (ire->ire_type & IRE_LOOPBACK) 738 0 stevel return (B_FALSE); 739 11042 Erik 740 11042 Erik if (ire->ire_type & IRE_LOCAL) 741 11042 Erik goto matchit; 742 11042 Erik 743 0 stevel /* 744 11042 Erik * The normal case of IRE_ONLINK has a matching zoneid. 745 11042 Erik * Here we handle the case when shared-IP zones have been 746 11042 Erik * configured with IP addresses on vniN. In that case it 747 11042 Erik * is ok for traffic from a zone to use IRE_ONLINK routes 748 11042 Erik * if the ill has a usesrc pointing at vniN 749 11042 Erik * Applies to IRE_INTERFACE. 750 0 stevel */ 751 11042 Erik dst_ill = ire->ire_ill; 752 11042 Erik if (ire->ire_type & IRE_ONLINK) { 753 11042 Erik uint_t ifindex; 754 11042 Erik 755 11042 Erik /* 756 11042 Erik * Note there is no IRE_INTERFACE on vniN thus 757 11042 Erik * can't do an IRE lookup for a matching route. 758 11042 Erik */ 759 11042 Erik ifindex = dst_ill->ill_usesrc_ifindex; 760 11042 Erik if (ifindex == 0) 761 11042 Erik return (B_FALSE); 762 11042 Erik 763 0 stevel /* 764 0 stevel * If there is a usable source address in the 765 11042 Erik * zone, then it's ok to return this IRE_INTERFACE 766 0 stevel */ 767 11042 Erik if (!ipif_zone_avail(ifindex, dst_ill->ill_isv6, 768 11042 Erik zoneid, ipst)) { 769 11042 Erik ip3dbg(("ire_match_args: no usrsrc for zone" 770 0 stevel " dst_ill %p\n", (void *)dst_ill)); 771 0 stevel return (B_FALSE); 772 0 stevel } 773 0 stevel } 774 11042 Erik /* 775 11042 Erik * For exampe, with 776 11042 Erik * route add 11.0.0.0 gw1 -ifp bge0 777 11042 Erik * route add 11.0.0.0 gw2 -ifp bge1 778 11042 Erik * this code would differentiate based on 779 11042 Erik * where the sending zone has addresses. 780 11042 Erik * Only if the zone has an address on bge0 can it use the first 781 11042 Erik * route. It isn't clear if this behavior is documented 782 11042 Erik * anywhere. 783 11042 Erik */ 784 11042 Erik if (dst_ill != NULL && (ire->ire_type & IRE_OFFLINK)) { 785 0 stevel ipif_t *tipif; 786 0 stevel 787 11042 Erik mutex_enter(&dst_ill->ill_lock); 788 11042 Erik for (tipif = dst_ill->ill_ipif; 789 0 stevel tipif != NULL; tipif = tipif->ipif_next) { 790 11042 Erik if (!IPIF_IS_CONDEMNED(tipif) && 791 0 stevel (tipif->ipif_flags & IPIF_UP) && 792 1676 jpk (tipif->ipif_zoneid == zoneid || 793 1676 jpk tipif->ipif_zoneid == ALL_ZONES)) 794 0 stevel break; 795 0 stevel } 796 11042 Erik mutex_exit(&dst_ill->ill_lock); 797 0 stevel if (tipif == NULL) 798 0 stevel return (B_FALSE); 799 0 stevel } 800 0 stevel } 801 0 stevel 802 11042 Erik matchit: 803 0 stevel if (match_flags & MATCH_IRE_GW) { 804 0 stevel mutex_enter(&ire->ire_lock); 805 0 stevel gw_addr_v6 = ire->ire_gateway_addr_v6; 806 0 stevel mutex_exit(&ire->ire_lock); 807 0 stevel } 808 11042 Erik if (match_flags & MATCH_IRE_ILL) { 809 11042 Erik ire_ill = ire->ire_ill; 810 8485 Peter 811 11042 Erik /* 812 11042 Erik * If asked to match an ill, we *must* match 813 11042 Erik * on the ire_ill for ipmp test addresses, or 814 11042 Erik * any of the ill in the group for data addresses. 815 11042 Erik * If we don't, we may as well fail. 816 11042 Erik * However, we need an exception for IRE_LOCALs to ensure 817 11042 Erik * we loopback packets even sent to test addresses on different 818 11042 Erik * interfaces in the group. 819 11042 Erik */ 820 11042 Erik if ((match_flags & MATCH_IRE_TESTHIDDEN) && 821 11042 Erik !(ire->ire_type & IRE_LOCAL)) { 822 11042 Erik if (ire->ire_ill != ill) 823 11042 Erik return (B_FALSE); 824 11042 Erik } else { 825 11042 Erik match_flags &= ~MATCH_IRE_TESTHIDDEN; 826 11042 Erik /* 827 11042 Erik * We know that ill is not NULL, but ire_ill could be 828 11042 Erik * NULL 829 11042 Erik */ 830 11042 Erik if (ire_ill == NULL || !IS_ON_SAME_LAN(ill, ire_ill)) 831 11042 Erik return (B_FALSE); 832 11042 Erik } 833 0 stevel } 834 0 stevel /* No ire_addr_v6 bits set past the mask */ 835 0 stevel ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 836 0 stevel ire->ire_addr_v6)); 837 0 stevel V6_MASK_COPY(*addr, *mask, masked_addr); 838 0 stevel if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 839 0 stevel ((!(match_flags & MATCH_IRE_GW)) || 840 4714 sowmini IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 841 11042 Erik ((!(match_flags & MATCH_IRE_TYPE)) || (ire->ire_type & type)) && 842 11042 Erik ((!(match_flags & MATCH_IRE_TESTHIDDEN)) || ire->ire_testhidden) && 843 11042 Erik ((!(match_flags & MATCH_IRE_MASK)) || 844 11042 Erik (IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, mask))) && 845 1676 jpk ((!(match_flags & MATCH_IRE_SECATTR)) || 846 4714 sowmini (!is_system_labeled()) || 847 4714 sowmini (tsol_ire_match_gwattr(ire, tsl) == 0))) { 848 0 stevel /* We found the matched IRE */ 849 0 stevel return (B_TRUE); 850 0 stevel } 851 0 stevel return (B_FALSE); 852 0 stevel } 853 0 stevel 854 0 stevel /* 855 11042 Erik * Check if the zoneid (not ALL_ZONES) has an IRE_INTERFACE for the specified 856 11042 Erik * gateway address. If ill is non-NULL we also match on it. 857 11042 Erik * The caller must hold a read lock on RADIX_NODE_HEAD if lock_held is set. 858 0 stevel */ 859 11042 Erik boolean_t 860 11042 Erik ire_gateway_ok_zone_v6(const in6_addr_t *gateway, zoneid_t zoneid, ill_t *ill, 861 11042 Erik const ts_label_t *tsl, ip_stack_t *ipst, boolean_t lock_held) 862 0 stevel { 863 11042 Erik ire_t *ire; 864 11042 Erik uint_t match_flags; 865 0 stevel 866 11042 Erik if (lock_held) 867 11042 Erik ASSERT(RW_READ_HELD(&ipst->ips_ip6_ire_head_lock)); 868 11042 Erik else 869 11042 Erik rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER); 870 0 stevel 871 11042 Erik match_flags = MATCH_IRE_TYPE | MATCH_IRE_SECATTR; 872 11042 Erik if (ill != NULL) 873 11042 Erik match_flags |= MATCH_IRE_ILL; 874 11042 Erik 875 11042 Erik ire = ire_ftable_lookup_impl_v6(gateway, &ipv6_all_zeros, 876 11042 Erik &ipv6_all_zeros, IRE_INTERFACE, ill, zoneid, tsl, match_flags, 877 11042 Erik ipst); 878 11042 Erik 879 11042 Erik if (!lock_held) 880 11042 Erik rw_exit(&ipst->ips_ip6_ire_head_lock); 881 11042 Erik if (ire != NULL) { 882 11042 Erik ire_refrele(ire); 883 11042 Erik return (B_TRUE); 884 11042 Erik } else { 885 11042 Erik return (B_FALSE); 886 0 stevel } 887 0 stevel } 888 0 stevel 889 0 stevel /* 890 0 stevel * Lookup a route in forwarding table. 891 0 stevel * specific lookup is indicated by passing the 892 0 stevel * required parameters and indicating the 893 0 stevel * match required in flag field. 894 0 stevel * 895 0 stevel * Supports link-local addresses by following the ipif/ill when recursing. 896 0 stevel */ 897 0 stevel ire_t * 898 0 stevel ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 899 11042 Erik const in6_addr_t *gateway, int type, const ill_t *ill, 900 11042 Erik zoneid_t zoneid, const ts_label_t *tsl, int flags, 901 11042 Erik uint32_t xmit_hint, ip_stack_t *ipst, uint_t *generationp) 902 0 stevel { 903 0 stevel ire_t *ire = NULL; 904 0 stevel 905 0 stevel ASSERT(addr != NULL); 906 0 stevel ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 907 0 stevel ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 908 11042 Erik ASSERT(ill == NULL || ill->ill_isv6); 909 11042 Erik 910 11042 Erik ASSERT(!IN6_IS_ADDR_V4MAPPED(addr)); 911 0 stevel 912 0 stevel /* 913 11042 Erik * ire_match_args_v6() will dereference ill if MATCH_IRE_ILL 914 11042 Erik * is set. 915 0 stevel */ 916 11042 Erik if ((flags & (MATCH_IRE_ILL)) && (ill == NULL)) 917 11042 Erik return (NULL); 918 11042 Erik 919 11042 Erik rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER); 920 11042 Erik ire = ire_ftable_lookup_impl_v6(addr, mask, gateway, type, ill, zoneid, 921 11042 Erik tsl, flags, ipst); 922 11042 Erik if (ire == NULL) { 923 11042 Erik rw_exit(&ipst->ips_ip6_ire_head_lock); 924 11042 Erik return (NULL); 925 11042 Erik } 926 11042 Erik 927 0 stevel /* 928 11042 Erik * round-robin only if we have more than one route in the bucket. 929 11042 Erik * ips_ip_ecmp_behavior controls when we do ECMP 930 11042 Erik * 2: always 931 11042 Erik * 1: for IRE_DEFAULT and /0 IRE_INTERFACE 932 11042 Erik * 0: never 933 11042 Erik * 934 11042 Erik * Note: if we found an IRE_IF_CLONE we won't look at the bucket with 935 11042 Erik * other ECMP IRE_INTERFACEs since the IRE_IF_CLONE is a /128 match 936 11042 Erik * and the IRE_INTERFACESs are likely to be shorter matches. 937 0 stevel */ 938 11042 Erik if (ire->ire_bucket->irb_ire_cnt > 1 && !(flags & MATCH_IRE_GW)) { 939 11042 Erik if (ipst->ips_ip_ecmp_behavior == 2 || 940 11042 Erik (ipst->ips_ip_ecmp_behavior == 1 && 941 11042 Erik IS_DEFAULT_ROUTE_V6(ire))) { 942 11042 Erik ire_t *next_ire; 943 11042 Erik ire_ftable_args_t margs; 944 11042 Erik 945 11131 Erik bzero(&margs, sizeof (margs)); 946 11042 Erik margs.ift_addr_v6 = *addr; 947 11042 Erik if (mask != NULL) 948 11042 Erik margs.ift_mask_v6 = *mask; 949 11042 Erik if (gateway != NULL) 950 11042 Erik margs.ift_gateway_v6 = *gateway; 951 11042 Erik margs.ift_type = type; 952 11042 Erik margs.ift_ill = ill; 953 11042 Erik margs.ift_zoneid = zoneid; 954 11042 Erik margs.ift_tsl = tsl; 955 11042 Erik margs.ift_flags = flags; 956 11042 Erik 957 11042 Erik next_ire = ire_round_robin(ire->ire_bucket, &margs, 958 11042 Erik xmit_hint, ire, ipst); 959 11042 Erik if (next_ire == NULL) { 960 11042 Erik /* keep ire if next_ire is null */ 961 11042 Erik goto done; 962 11042 Erik } 963 11042 Erik ire_refrele(ire); 964 11042 Erik ire = next_ire; 965 11042 Erik } 966 11042 Erik } 967 11042 Erik 968 11042 Erik done: 969 11042 Erik /* Return generation before dropping lock */ 970 11042 Erik if (generationp != NULL) 971 11042 Erik *generationp = ire->ire_generation; 972 11042 Erik 973 11042 Erik rw_exit(&ipst->ips_ip6_ire_head_lock); 974 11042 Erik 975 11042 Erik /* 976 11042 Erik * For shared-IP zones we need additional checks to what was 977 11042 Erik * done in ire_match_args to make sure IRE_LOCALs are handled. 978 11042 Erik * 979 11042 Erik * When ip_restrict_interzone_loopback is set, then 980 11042 Erik * we ensure that IRE_LOCAL are only used for loopback 981 11042 Erik * between zones when the logical "Ethernet" would 982 11042 Erik * have looped them back. That is, if in the absense of 983 11042 Erik * the IRE_LOCAL we would have sent to packet out the 984 11042 Erik * same ill. 985 11042 Erik */ 986 11042 Erik if ((ire->ire_type & IRE_LOCAL) && zoneid != ALL_ZONES && 987 11042 Erik ire->ire_zoneid != zoneid && ire->ire_zoneid != ALL_ZONES && 988 11042 Erik ipst->ips_ip_restrict_interzone_loopback) { 989 11042 Erik ire = ire_alt_local(ire, zoneid, tsl, ill, generationp); 990 11042 Erik ASSERT(ire != NULL); 991 11042 Erik } 992 11042 Erik 993 11042 Erik return (ire); 994 11042 Erik } 995 11042 Erik 996 11042 Erik /* 997 11042 Erik * Look up a single ire. The caller holds either the read or write lock. 998 11042 Erik */ 999 11042 Erik ire_t * 1000 11042 Erik ire_ftable_lookup_impl_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1001 11042 Erik const in6_addr_t *gateway, int type, const ill_t *ill, 1002 11042 Erik zoneid_t zoneid, const ts_label_t *tsl, int flags, 1003 11042 Erik ip_stack_t *ipst) 1004 11042 Erik { 1005 11042 Erik irb_t *irb_ptr; 1006 11042 Erik ire_t *ire = NULL; 1007 11042 Erik int i; 1008 11042 Erik 1009 11042 Erik ASSERT(RW_LOCK_HELD(&ipst->ips_ip6_ire_head_lock)); 1010 0 stevel 1011 0 stevel /* 1012 0 stevel * If the mask is known, the lookup 1013 0 stevel * is simple, if the mask is not known 1014 0 stevel * we need to search. 1015 0 stevel */ 1016 0 stevel if (flags & MATCH_IRE_MASK) { 1017 0 stevel uint_t masklen; 1018 0 stevel 1019 0 stevel masklen = ip_mask_to_plen_v6(mask); 1020 11042 Erik if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) { 1021 0 stevel return (NULL); 1022 11042 Erik } 1023 3448 dh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1024 3448 dh155122 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1025 4714 sowmini ipst->ips_ip6_ftable_hash_size)]); 1026 0 stevel rw_enter(&irb_ptr->irb_lock, RW_READER); 1027 0 stevel for (ire = irb_ptr->irb_ire; ire != NULL; 1028 0 stevel ire = ire->ire_next) { 1029 11042 Erik if (IRE_IS_CONDEMNED(ire)) 1030 0 stevel continue; 1031 0 stevel if (ire_match_args_v6(ire, addr, mask, gateway, type, 1032 11042 Erik ill, zoneid, tsl, flags)) 1033 0 stevel goto found_ire; 1034 0 stevel } 1035 0 stevel rw_exit(&irb_ptr->irb_lock); 1036 0 stevel } else { 1037 11042 Erik uint_t masklen; 1038 11042 Erik 1039 0 stevel /* 1040 0 stevel * In this case we don't know the mask, we need to 1041 0 stevel * search the table assuming different mask sizes. 1042 0 stevel */ 1043 11042 Erik if (flags & MATCH_IRE_SHORTERMASK) { 1044 11042 Erik masklen = ip_mask_to_plen_v6(mask); 1045 11042 Erik if (masklen == 0) { 1046 11042 Erik /* Nothing shorter than zero */ 1047 11042 Erik return (NULL); 1048 11042 Erik } 1049 11042 Erik masklen--; 1050 11042 Erik } else { 1051 11042 Erik masklen = IP6_MASK_TABLE_SIZE - 1; 1052 11042 Erik } 1053 11042 Erik 1054 11042 Erik for (i = masklen; i >= 0; i--) { 1055 0 stevel in6_addr_t tmpmask; 1056 0 stevel 1057 3448 dh155122 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 1058 0 stevel continue; 1059 0 stevel (void) ip_plen_to_mask_v6(i, &tmpmask); 1060 3448 dh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 1061 0 stevel IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1062 3448 dh155122 ipst->ips_ip6_ftable_hash_size)]; 1063 0 stevel rw_enter(&irb_ptr->irb_lock, RW_READER); 1064 0 stevel for (ire = irb_ptr->irb_ire; ire != NULL; 1065 0 stevel ire = ire->ire_next) { 1066 11042 Erik if (IRE_IS_CONDEMNED(ire)) 1067 0 stevel continue; 1068 0 stevel if (ire_match_args_v6(ire, addr, 1069 11042 Erik &ire->ire_mask_v6, gateway, type, ill, 1070 11042 Erik zoneid, tsl, flags)) 1071 0 stevel goto found_ire; 1072 0 stevel } 1073 0 stevel rw_exit(&irb_ptr->irb_lock); 1074 0 stevel } 1075 0 stevel } 1076 11042 Erik ASSERT(ire == NULL); 1077 11042 Erik ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1078 11042 Erik return (NULL); 1079 11042 Erik 1080 11042 Erik found_ire: 1081 11042 Erik ire_refhold(ire); 1082 11042 Erik rw_exit(&irb_ptr->irb_lock); 1083 11042 Erik return (ire); 1084 11042 Erik } 1085 11042 Erik 1086 11042 Erik 1087 11042 Erik /* 1088 11042 Erik * This function is called by 1089 11042 Erik * ip_input/ire_route_recursive when doing a route lookup on only the 1090 11042 Erik * destination address. 1091 11042 Erik * 1092 11042 Erik * The optimizations of this function over ire_ftable_lookup are: 1093 11042 Erik * o removing unnecessary flag matching 1094 11042 Erik * o doing longest prefix match instead of overloading it further 1095 11042 Erik * with the unnecessary "best_prefix_match" 1096 11042 Erik * 1097 11042 Erik * If no route is found we return IRE_NOROUTE. 1098 11042 Erik */ 1099 11042 Erik ire_t * 1100 11042 Erik ire_ftable_lookup_simple_v6(const in6_addr_t *addr, uint32_t xmit_hint, 1101 11042 Erik ip_stack_t *ipst, uint_t *generationp) 1102 11042 Erik { 1103 11042 Erik ire_t *ire; 1104 11042 Erik 1105 11042 Erik ire = ire_ftable_lookup_v6(addr, NULL, NULL, 0, NULL, ALL_ZONES, NULL, 1106 11042 Erik MATCH_IRE_DSTONLY, xmit_hint, ipst, generationp); 1107 11042 Erik if (ire == NULL) { 1108 11042 Erik ire = ire_reject(ipst, B_TRUE); 1109 11042 Erik if (generationp != NULL) 1110 11042 Erik *generationp = IRE_GENERATION_VERIFY; 1111 11042 Erik } 1112 11042 Erik /* ftable_lookup did round robin */ 1113 11042 Erik return (ire); 1114 11042 Erik } 1115 11042 Erik 1116 11042 Erik ire_t * 1117 11042 Erik ip_select_route_v6(const in6_addr_t *dst, ip_xmit_attr_t *ixa, 1118 11042 Erik uint_t *generationp, in6_addr_t *setsrcp, int *errorp, boolean_t *multirtp) 1119 11042 Erik { 1120 11042 Erik ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1121 11042 Erik 1122 11042 Erik return (ip_select_route(dst, ixa, generationp, setsrcp, errorp, 1123 11042 Erik multirtp)); 1124 11042 Erik } 1125 11042 Erik 1126 11042 Erik /* 1127 11042 Erik * Recursively look for a route to the destination. Can also match on 1128 11042 Erik * the zoneid, ill, and label. Used for the data paths. See also 1129 11042 Erik * ire_route_recursive_dstonly. 1130 11042 Erik * 1131 11042 Erik * If ill is set this means we will match it by adding MATCH_IRE_ILL. 1132 11042 Erik * 1133 11042 Erik * If allocate is not set then we will only inspect the existing IREs; never 1134 11042 Erik * create an IRE_IF_CLONE. This is used on the receive side when we are not 1135 11042 Erik * forwarding. 1136 11042 Erik * 1137 11042 Erik * Note that this function never returns NULL. It returns an IRE_NOROUTE 1138 11042 Erik * instead. 1139 11042 Erik * 1140 11042 Erik * If we find any IRE_LOCAL|BROADCAST etc past the first iteration it 1141 11042 Erik * is an error. 1142 11042 Erik * Allow at most one RTF_INDIRECT. 1143 11042 Erik */ 1144 11042 Erik ire_t * 1145 11042 Erik ire_route_recursive_impl_v6(ire_t *ire, 1146 11042 Erik const in6_addr_t *nexthop, uint_t ire_type, const ill_t *ill_arg, 1147 11042 Erik zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args, 1148 11042 Erik boolean_t allocate, uint32_t xmit_hint, ip_stack_t *ipst, 1149 11042 Erik in6_addr_t *setsrcp, tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp) 1150 11042 Erik { 1151 11042 Erik int i, j; 1152 11042 Erik in6_addr_t v6nexthop = *nexthop; 1153 11042 Erik ire_t *ires[MAX_IRE_RECURSION]; 1154 11042 Erik uint_t generation; 1155 11042 Erik uint_t generations[MAX_IRE_RECURSION]; 1156 11042 Erik boolean_t need_refrele = B_FALSE; 1157 11042 Erik boolean_t invalidate = B_FALSE; 1158 11042 Erik int prefs[MAX_IRE_RECURSION]; 1159 11042 Erik ill_t *ill = NULL; 1160 11042 Erik 1161 11042 Erik if (setsrcp != NULL) 1162 11042 Erik ASSERT(IN6_IS_ADDR_UNSPECIFIED(setsrcp)); 1163 11042 Erik if (gwattrp != NULL) 1164 11042 Erik ASSERT(*gwattrp == NULL); 1165 11042 Erik 1166 11042 Erik if (ill_arg != NULL) 1167 11042 Erik match_args |= MATCH_IRE_ILL; 1168 0 stevel 1169 0 stevel /* 1170 11042 Erik * We iterate up to three times to resolve a route, even though 1171 11042 Erik * we have four slots in the array. The extra slot is for an 1172 11042 Erik * IRE_IF_CLONE we might need to create. 1173 0 stevel */ 1174 11042 Erik i = 0; 1175 11042 Erik while (i < MAX_IRE_RECURSION - 1) { 1176 11042 Erik /* ire_ftable_lookup handles round-robin/ECMP */ 1177 11042 Erik if (ire == NULL) { 1178 11042 Erik ire = ire_ftable_lookup_v6(&v6nexthop, 0, 0, ire_type, 1179 11042 Erik (ill_arg != NULL ? ill_arg : ill), zoneid, tsl, 1180 11042 Erik match_args, xmit_hint, ipst, &generation); 1181 11042 Erik } else { 1182 11042 Erik /* Caller passed it; extra hold since we will rele */ 1183 11042 Erik ire_refhold(ire); 1184 11042 Erik if (generationp != NULL) 1185 11042 Erik generation = *generationp; 1186 11042 Erik else 1187 11042 Erik generation = IRE_GENERATION_VERIFY; 1188 11042 Erik } 1189 0 stevel 1190 11042 Erik if (ire == NULL) 1191 11042 Erik ire = ire_reject(ipst, B_TRUE); 1192 0 stevel 1193 11042 Erik /* Need to return the ire with RTF_REJECT|BLACKHOLE */ 1194 11042 Erik if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) 1195 11042 Erik goto error; 1196 11042 Erik 1197 11042 Erik ASSERT(!(ire->ire_type & IRE_MULTICAST)); /* Not in ftable */ 1198 11042 Erik 1199 11042 Erik if (i != 0) { 1200 11131 Erik prefs[i] = ire_pref(ire); 1201 11042 Erik /* 1202 11042 Erik * Don't allow anything unusual past the first 1203 11042 Erik * iteration. 1204 11042 Erik */ 1205 11042 Erik if ((ire->ire_type & 1206 11042 Erik (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST)) || 1207 11042 Erik prefs[i] <= prefs[i-1]) { 1208 11042 Erik ire_refrele(ire); 1209 11042 Erik ire = ire_reject(ipst, B_TRUE); 1210 11042 Erik goto error; 1211 0 stevel } 1212 0 stevel } 1213 11042 Erik /* We have a usable IRE */ 1214 11042 Erik ires[i] = ire; 1215 11042 Erik generations[i] = generation; 1216 11042 Erik i++; 1217 0 stevel 1218 11042 Erik /* The first RTF_SETSRC address is passed back if setsrcp */ 1219 11042 Erik if ((ire->ire_flags & RTF_SETSRC) && 1220 11042 Erik setsrcp != NULL && IN6_IS_ADDR_UNSPECIFIED(setsrcp)) { 1221 11042 Erik ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 1222 11042 Erik &ire->ire_setsrc_addr_v6)); 1223 11042 Erik *setsrcp = ire->ire_setsrc_addr_v6; 1224 11042 Erik } 1225 11042 Erik 1226 11042 Erik /* The first ire_gw_secattr is passed back if gwattrp */ 1227 11042 Erik if (ire->ire_gw_secattr != NULL && 1228 11042 Erik gwattrp != NULL && *gwattrp == NULL) 1229 11042 Erik *gwattrp = ire->ire_gw_secattr; 1230 0 stevel 1231 0 stevel /* 1232 11042 Erik * Check if we have a short-cut pointer to an IRE for this 1233 11042 Erik * destination, and that the cached dependency isn't stale. 1234 11042 Erik * In that case we've rejoined an existing tree towards a 1235 11042 Erik * parent, thus we don't need to continue the loop to 1236 11042 Erik * discover the rest of the tree. 1237 0 stevel */ 1238 11042 Erik mutex_enter(&ire->ire_lock); 1239 11042 Erik if (ire->ire_dep_parent != NULL && 1240 11042 Erik ire->ire_dep_parent->ire_generation == 1241 11042 Erik ire->ire_dep_parent_generation) { 1242 11042 Erik mutex_exit(&ire->ire_lock); 1243 11042 Erik ire = NULL; 1244 11042 Erik goto done; 1245 11042 Erik } 1246 11042 Erik mutex_exit(&ire->ire_lock); 1247 11042 Erik 1248 11042 Erik /* 1249 11042 Erik * If this type should have an ire_nce_cache (even if it 1250 11042 Erik * doesn't yet have one) then we are done. Includes 1251 11042 Erik * IRE_INTERFACE with a full 128 bit mask. 1252 11042 Erik */ 1253 11042 Erik if (ire->ire_nce_capable) { 1254 11042 Erik ire = NULL; 1255 11042 Erik goto done; 1256 11042 Erik } 1257 11042 Erik ASSERT(!(ire->ire_type & IRE_IF_CLONE)); 1258 11042 Erik /* 1259 11042 Erik * For an IRE_INTERFACE we create an IRE_IF_CLONE for this 1260 11042 Erik * particular destination 1261 11042 Erik */ 1262 11042 Erik if (ire->ire_type & IRE_INTERFACE) { 1263 11042 Erik ire_t *clone; 1264 11042 Erik 1265 11042 Erik ASSERT(ire->ire_masklen != IPV6_ABITS); 1266 11042 Erik 1267 11042 Erik /* 1268 11042 Erik * In the case of ip_input and ILLF_FORWARDING not 1269 11042 Erik * being set, and in the case of RTM_GET, 1270 11042 Erik * there is no point in allocating 1271 11042 Erik * an IRE_IF_CLONE. We return the IRE_INTERFACE. 1272 11042 Erik * Note that !allocate can result in a ire_dep_parent 1273 11042 Erik * which is IRE_IF_* without an IRE_IF_CLONE. 1274 11042 Erik * We recover from that when we need to send packets 1275 11042 Erik * by ensuring that the generations become 1276 11042 Erik * IRE_GENERATION_VERIFY in this case. 1277 11042 Erik */ 1278 11042 Erik if (!allocate) { 1279 11042 Erik invalidate = B_TRUE; 1280 11042 Erik ire = NULL; 1281 11042 Erik goto done; 1282 11042 Erik } 1283 11042 Erik 1284 11042 Erik clone = ire_create_if_clone(ire, &v6nexthop, 1285 11042 Erik &generation); 1286 11042 Erik if (clone == NULL) { 1287 11042 Erik /* 1288 11042 Erik * Temporary failure - no memory. 1289 11042 Erik * Don't want caller to cache IRE_NOROUTE. 1290 11042 Erik */ 1291 11042 Erik invalidate = B_TRUE; 1292 11042 Erik ire = ire_blackhole(ipst, B_TRUE); 1293 11042 Erik goto error; 1294 11042 Erik } 1295 11042 Erik /* 1296 11042 Erik * Make clone next to last entry and the 1297 11042 Erik * IRE_INTERFACE the last in the dependency 1298 11042 Erik * chain since the clone depends on the 1299 11042 Erik * IRE_INTERFACE. 1300 11042 Erik */ 1301 11042 Erik ASSERT(i >= 1); 1302 11042 Erik ASSERT(i < MAX_IRE_RECURSION); 1303 11042 Erik 1304 11042 Erik ires[i] = ires[i-1]; 1305 11042 Erik generations[i] = generations[i-1]; 1306 11042 Erik ires[i-1] = clone; 1307 11042 Erik generations[i-1] = generation; 1308 11042 Erik i++; 1309 11042 Erik 1310 11042 Erik ire = NULL; 1311 11042 Erik goto done; 1312 0 stevel } 1313 0 stevel 1314 0 stevel /* 1315 11042 Erik * We only match on the type and optionally ILL when 1316 11042 Erik * recursing. The type match is used by some callers 1317 11042 Erik * to exclude certain types (such as IRE_IF_CLONE or 1318 11042 Erik * IRE_LOCAL|IRE_LOOPBACK). 1319 0 stevel */ 1320 11042 Erik match_args &= MATCH_IRE_TYPE; 1321 11042 Erik v6nexthop = ire->ire_gateway_addr_v6; 1322 11042 Erik if (ill == NULL && ire->ire_ill != NULL) { 1323 11042 Erik ill = ire->ire_ill; 1324 11042 Erik need_refrele = B_TRUE; 1325 11042 Erik ill_refhold(ill); 1326 11042 Erik match_args |= MATCH_IRE_ILL; 1327 0 stevel } 1328 11131 Erik /* 1329 11131 Erik * We set the prefs[i] value above if i > 0. We've already 1330 11131 Erik * done i++ so i is one in the case of the first time around. 1331 11131 Erik */ 1332 11131 Erik if (i == 1) 1333 11131 Erik prefs[0] = ire_pref(ire); 1334 11042 Erik ire = NULL; 1335 11042 Erik } 1336 11042 Erik ASSERT(ire == NULL); 1337 11042 Erik ire = ire_reject(ipst, B_TRUE); 1338 11042 Erik 1339 11042 Erik error: 1340 11042 Erik ASSERT(ire != NULL); 1341 11042 Erik if (need_refrele) 1342 11042 Erik ill_refrele(ill); 1343 11042 Erik 1344 11042 Erik /* 1345 11042 Erik * In the case of MULTIRT we want to try a different IRE the next 1346 11042 Erik * time. We let the next packet retry in that case. 1347 11042 Erik */ 1348 11042 Erik if (i > 0 && (ires[0]->ire_flags & RTF_MULTIRT)) 1349 11042 Erik (void) ire_no_good(ires[0]); 1350 11042 Erik 1351 11042 Erik cleanup: 1352 11042 Erik /* cleanup ires[i] */ 1353 11042 Erik ire_dep_unbuild(ires, i); 1354 11042 Erik for (j = 0; j < i; j++) 1355 11042 Erik ire_refrele(ires[j]); 1356 11042 Erik 1357 11042 Erik ASSERT(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)); 1358 11042 Erik /* 1359 11042 Erik * Use IRE_GENERATION_VERIFY to ensure that ip_output will redo the 1360 11042 Erik * ip_select_route since the reject or lack of memory might be gone. 1361 11042 Erik */ 1362 11042 Erik if (generationp != NULL) 1363 11042 Erik *generationp = IRE_GENERATION_VERIFY; 1364 11042 Erik return (ire); 1365 11042 Erik 1366 11042 Erik done: 1367 11042 Erik ASSERT(ire == NULL); 1368 11042 Erik if (need_refrele) 1369 11042 Erik ill_refrele(ill); 1370 11042 Erik 1371 11042 Erik /* Build dependencies */ 1372 11131 Erik if (i > 1 && !ire_dep_build(ires, generations, i)) { 1373 11042 Erik /* Something in chain was condemned; tear it apart */ 1374 11042 Erik ire = ire_blackhole(ipst, B_TRUE); 1375 11042 Erik goto cleanup; 1376 11042 Erik } 1377 11042 Erik 1378 11042 Erik /* 1379 11042 Erik * Release all refholds except the one for ires[0] that we 1380 11042 Erik * will return to the caller. 1381 11042 Erik */ 1382 11042 Erik for (j = 1; j < i; j++) 1383 11042 Erik ire_refrele(ires[j]); 1384 11042 Erik 1385 11042 Erik if (invalidate) { 1386 0 stevel /* 1387 11042 Erik * Since we needed to allocate but couldn't we need to make 1388 11042 Erik * sure that the dependency chain is rebuilt the next time. 1389 0 stevel */ 1390 11042 Erik ire_dep_invalidate_generations(ires[0]); 1391 11042 Erik generation = IRE_GENERATION_VERIFY; 1392 11042 Erik } else { 1393 11042 Erik /* 1394 11042 Erik * IREs can have been added or deleted while we did the 1395 11042 Erik * recursive lookup and we can't catch those until we've built 1396 11042 Erik * the dependencies. We verify the stored 1397 11042 Erik * ire_dep_parent_generation to catch any such changes and 1398 11042 Erik * return IRE_GENERATION_VERIFY (which will cause 1399 11042 Erik * ip_select_route to be called again so we can redo the 1400 11042 Erik * recursive lookup next time we send a packet. 1401 11042 Erik */ 1402 11131 Erik if (ires[0]->ire_dep_parent == NULL) 1403 11131 Erik generation = ires[0]->ire_generation; 1404 11131 Erik else 1405 11131 Erik generation = ire_dep_validate_generations(ires[0]); 1406 11042 Erik if (generations[0] != ires[0]->ire_generation) { 1407 11042 Erik /* Something changed at the top */ 1408 11042 Erik generation = IRE_GENERATION_VERIFY; 1409 0 stevel } 1410 0 stevel } 1411 11042 Erik if (generationp != NULL) 1412 11042 Erik *generationp = generation; 1413 0 stevel 1414 11042 Erik return (ires[0]); 1415 11042 Erik } 1416 0 stevel 1417 11042 Erik ire_t * 1418 11042 Erik ire_route_recursive_v6(const in6_addr_t *nexthop, uint_t ire_type, 1419 11042 Erik const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args, 1420 11042 Erik boolean_t allocate, uint32_t xmit_hint, ip_stack_t *ipst, 1421 11042 Erik in6_addr_t *setsrcp, tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp) 1422 11042 Erik { 1423 11042 Erik return (ire_route_recursive_impl_v6(NULL, nexthop, ire_type, ill, 1424 11042 Erik zoneid, tsl, match_args, allocate, xmit_hint, ipst, setsrcp, 1425 11042 Erik gwattrp, generationp)); 1426 0 stevel } 1427 0 stevel 1428 0 stevel /* 1429 11042 Erik * Recursively look for a route to the destination. 1430 11042 Erik * We only handle a destination match here, yet we have the same arguments 1431 11042 Erik * as the full match to allow function pointers to select between the two. 1432 11042 Erik * 1433 11042 Erik * Note that this function never returns NULL. It returns an IRE_NOROUTE 1434 11042 Erik * instead. 1435 11042 Erik * 1436 11042 Erik * If we find any IRE_LOCAL|BROADCAST etc past the first iteration it 1437 11042 Erik * is an error. 1438 11042 Erik * Allow at most one RTF_INDIRECT. 1439 0 stevel */ 1440 0 stevel ire_t * 1441 11042 Erik ire_route_recursive_dstonly_v6(const in6_addr_t *nexthop, boolean_t allocate, 1442 11042 Erik uint32_t xmit_hint, ip_stack_t *ipst) 1443 0 stevel { 1444 0 stevel ire_t *ire; 1445 11042 Erik ire_t *ire1; 1446 11042 Erik uint_t generation; 1447 0 stevel 1448 11042 Erik /* ire_ftable_lookup handles round-robin/ECMP */ 1449 11042 Erik ire = ire_ftable_lookup_simple_v6(nexthop, xmit_hint, ipst, 1450 11042 Erik &generation); 1451 11042 Erik ASSERT(ire != NULL); 1452 0 stevel 1453 0 stevel /* 1454 11042 Erik * If this type should have an ire_nce_cache (even if it 1455 11042 Erik * doesn't yet have one) then we are done. Includes 1456 11042 Erik * IRE_INTERFACE with a full 128 bit mask. 1457 0 stevel */ 1458 11042 Erik if (ire->ire_nce_capable) 1459 0 stevel return (ire); 1460 11042 Erik 1461 0 stevel /* 1462 11042 Erik * If the IRE has a current cached parent we know that the whole 1463 11042 Erik * parent chain is current, hence we don't need to discover and 1464 11042 Erik * build any dependencies by doing a recursive lookup. 1465 0 stevel */ 1466 0 stevel mutex_enter(&ire->ire_lock); 1467 11042 Erik if (ire->ire_dep_parent != NULL && 1468 11042 Erik ire->ire_dep_parent->ire_generation == 1469 11042 Erik ire->ire_dep_parent_generation) { 1470 11042 Erik mutex_exit(&ire->ire_lock); 1471 11042 Erik return (ire); 1472 11042 Erik } 1473 0 stevel mutex_exit(&ire->ire_lock); 1474 8485 Peter 1475 8485 Peter /* 1476 11042 Erik * Fallback to loop in the normal code starting with the ire 1477 11042 Erik * we found. Normally this would return the same ire. 1478 8485 Peter */ 1479 11042 Erik ire1 = ire_route_recursive_impl_v6(ire, nexthop, 0, NULL, ALL_ZONES, 1480 11042 Erik NULL, MATCH_IRE_DSTONLY, allocate, xmit_hint, ipst, NULL, NULL, 1481 11042 Erik &generation); 1482 11042 Erik ire_refrele(ire); 1483 11042 Erik return (ire1); 1484 0 stevel } 1485