1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ip_if.h> 62 #include <inet/ip_ire.h> 63 #include <inet/ip_rts.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/ipsec_info.h> 68 #include <inet/sctp_ip.h> 69 70 /* 71 * Function names with nce_ prefix are static while function 72 * names with ndp_ prefix are used by rest of the IP. 73 * 74 * Lock ordering: 75 * 76 * ndp_g_lock -> ill_lock -> nce_lock 77 * 78 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 79 * nce_next. Nce_lock protects the contents of the NCE (particularly 80 * nce_refcnt). 81 */ 82 83 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 84 uint32_t ll_addr_len); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 98 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 99 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 102 nce_t **, nce_t *); 103 104 #ifdef DEBUG 105 static void nce_trace_cleanup(const nce_t *); 106 #endif 107 108 #define NCE_HASH_PTR_V4(ipst, addr) \ 109 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 110 111 #define NCE_HASH_PTR_V6(ipst, addr) \ 112 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 113 NCE_TABLE_SIZE)])) 114 115 /* 116 * Compute default flags to use for an advertisement of this nce's address. 117 */ 118 static int 119 nce_advert_flags(const nce_t *nce) 120 { 121 int flag = 0; 122 123 if (nce->nce_flags & NCE_F_ISROUTER) 124 flag |= NDP_ISROUTER; 125 if (!(nce->nce_flags & NCE_F_ANYCAST)) 126 flag |= NDP_ORIDE; 127 128 return (flag); 129 } 130 131 /* Non-tunable probe interval, based on link capabilities */ 132 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 133 134 /* 135 * NDP Cache Entry creation routine. 136 * Mapped entries will never do NUD . 137 * This routine must always be called with ndp6->ndp_g_lock held. 138 * Prior to return, nce_refcnt is incremented. 139 */ 140 int 141 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 142 const in6_addr_t *mask, const in6_addr_t *extract_mask, 143 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 144 nce_t **newnce) 145 { 146 static nce_t nce_nil; 147 nce_t *nce; 148 mblk_t *mp; 149 mblk_t *template; 150 nce_t **ncep; 151 int err; 152 boolean_t dropped = B_FALSE; 153 ip_stack_t *ipst = ill->ill_ipst; 154 155 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 156 ASSERT(ill != NULL && ill->ill_isv6); 157 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 158 ip0dbg(("ndp_add_v6: no addr\n")); 159 return (EINVAL); 160 } 161 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 162 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 163 return (EINVAL); 164 } 165 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 166 (flags & NCE_F_MAPPING)) { 167 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 168 return (EINVAL); 169 } 170 /* 171 * Allocate the mblk to hold the nce. 172 * 173 * XXX This can come out of a separate cache - nce_cache. 174 * We don't need the mp anymore as there are no more 175 * "qwriter"s 176 */ 177 mp = allocb(sizeof (nce_t), BPRI_MED); 178 if (mp == NULL) 179 return (ENOMEM); 180 181 nce = (nce_t *)mp->b_rptr; 182 mp->b_wptr = (uchar_t *)&nce[1]; 183 *nce = nce_nil; 184 185 /* 186 * This one holds link layer address 187 */ 188 if (ill->ill_net_type == IRE_IF_RESOLVER) { 189 template = nce_udreq_alloc(ill); 190 } else { 191 if (ill->ill_resolver_mp == NULL) { 192 freeb(mp); 193 return (EINVAL); 194 } 195 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 196 template = copyb(ill->ill_resolver_mp); 197 } 198 if (template == NULL) { 199 freeb(mp); 200 return (ENOMEM); 201 } 202 nce->nce_ill = ill; 203 nce->nce_ipversion = IPV6_VERSION; 204 nce->nce_flags = flags; 205 nce->nce_state = state; 206 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 207 nce->nce_rcnt = ill->ill_xmit_count; 208 nce->nce_addr = *addr; 209 nce->nce_mask = *mask; 210 nce->nce_extract_mask = *extract_mask; 211 nce->nce_ll_extract_start = hw_extract_start; 212 nce->nce_fp_mp = NULL; 213 nce->nce_res_mp = template; 214 if (state == ND_REACHABLE) 215 nce->nce_last = TICK_TO_MSEC(lbolt64); 216 else 217 nce->nce_last = 0; 218 nce->nce_qd_mp = NULL; 219 nce->nce_mp = mp; 220 if (hw_addr != NULL) 221 nce_set_ll(nce, hw_addr); 222 /* This one is for nce getting created */ 223 nce->nce_refcnt = 1; 224 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 225 if (nce->nce_flags & NCE_F_MAPPING) { 226 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 227 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 228 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 229 ncep = &ipst->ips_ndp6->nce_mask_entries; 230 } else { 231 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 232 } 233 234 nce->nce_trace_disable = B_FALSE; 235 236 /* 237 * Atomically ensure that the ill is not CONDEMNED, before 238 * adding the NCE. 239 */ 240 mutex_enter(&ill->ill_lock); 241 if (ill->ill_state_flags & ILL_CONDEMNED) { 242 mutex_exit(&ill->ill_lock); 243 freeb(mp); 244 freeb(template); 245 return (EINVAL); 246 } 247 if ((nce->nce_next = *ncep) != NULL) 248 nce->nce_next->nce_ptpn = &nce->nce_next; 249 *ncep = nce; 250 nce->nce_ptpn = ncep; 251 *newnce = nce; 252 /* This one is for nce being used by an active thread */ 253 NCE_REFHOLD(*newnce); 254 255 /* Bump up the number of nce's referencing this ill */ 256 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 257 (char *), "nce", (void *), nce); 258 ill->ill_nce_cnt++; 259 mutex_exit(&ill->ill_lock); 260 261 err = 0; 262 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 263 mutex_enter(&nce->nce_lock); 264 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 265 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 266 mutex_exit(&nce->nce_lock); 267 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 268 &ipv6_all_zeros, addr, NDP_PROBE); 269 if (dropped) { 270 mutex_enter(&nce->nce_lock); 271 nce->nce_pcnt++; 272 mutex_exit(&nce->nce_lock); 273 } 274 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 275 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 276 err = EINPROGRESS; 277 } else if (flags & NCE_F_UNSOL_ADV) { 278 /* 279 * We account for the transmit below by assigning one 280 * less than the ndd variable. Subsequent decrements 281 * are done in ndp_timer. 282 */ 283 mutex_enter(&nce->nce_lock); 284 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 285 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 286 mutex_exit(&nce->nce_lock); 287 dropped = nce_xmit(ill, 288 ND_NEIGHBOR_ADVERT, 289 ill, /* ill to be used for extracting ill_nd_lla */ 290 B_TRUE, /* use ill_nd_lla */ 291 addr, /* Source and target of the advertisement pkt */ 292 &ipv6_all_hosts_mcast, /* Destination of the packet */ 293 nce_advert_flags(nce)); 294 mutex_enter(&nce->nce_lock); 295 if (dropped) 296 nce->nce_unsolicit_count++; 297 if (nce->nce_unsolicit_count != 0) { 298 nce->nce_timeout_id = timeout(ndp_timer, nce, 299 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 300 } 301 mutex_exit(&nce->nce_lock); 302 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 303 } 304 /* 305 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 306 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 307 * We call nce_fastpath from nce_update if the link layer address of 308 * the peer changes from nce_update 309 */ 310 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 311 nce_fastpath(nce); 312 return (err); 313 } 314 315 int 316 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 317 const in6_addr_t *mask, const in6_addr_t *extract_mask, 318 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 319 nce_t **newnce) 320 { 321 int err = 0; 322 nce_t *nce; 323 ip_stack_t *ipst = ill->ill_ipst; 324 325 ASSERT(ill->ill_isv6); 326 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 327 328 /* Get head of v6 hash table */ 329 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 330 nce = nce_lookup_addr(ill, addr, nce); 331 if (nce == NULL) { 332 err = ndp_add_v6(ill, 333 hw_addr, 334 addr, 335 mask, 336 extract_mask, 337 hw_extract_start, 338 flags, 339 state, 340 newnce); 341 } else { 342 *newnce = nce; 343 err = EEXIST; 344 } 345 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 346 return (err); 347 } 348 349 /* 350 * Remove all the CONDEMNED nces from the appropriate hash table. 351 * We create a private list of NCEs, these may have ires pointing 352 * to them, so the list will be passed through to clean up dependent 353 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 354 */ 355 static void 356 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 357 { 358 nce_t *nce1; 359 nce_t **ptpn; 360 361 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 362 ASSERT(ndp->ndp_g_walker == 0); 363 for (; nce; nce = nce1) { 364 nce1 = nce->nce_next; 365 mutex_enter(&nce->nce_lock); 366 if (nce->nce_flags & NCE_F_CONDEMNED) { 367 ptpn = nce->nce_ptpn; 368 nce1 = nce->nce_next; 369 if (nce1 != NULL) 370 nce1->nce_ptpn = ptpn; 371 *ptpn = nce1; 372 nce->nce_ptpn = NULL; 373 nce->nce_next = NULL; 374 nce->nce_next = *free_nce_list; 375 *free_nce_list = nce; 376 } 377 mutex_exit(&nce->nce_lock); 378 } 379 } 380 381 /* 382 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 383 * will return this NCE. Also no new IREs will be created that 384 * point to this NCE (See ire_add_v6). Also no new timeouts will 385 * be started (See NDP_RESTART_TIMER). 386 * 2. Cancel any currently running timeouts. 387 * 3. If there is an ndp walker, return. The walker will do the cleanup. 388 * This ensures that walkers see a consistent list of NCEs while walking. 389 * 4. Otherwise remove the NCE from the list of NCEs 390 * 5. Delete all IREs pointing to this NCE. 391 */ 392 void 393 ndp_delete(nce_t *nce) 394 { 395 nce_t **ptpn; 396 nce_t *nce1; 397 int ipversion = nce->nce_ipversion; 398 ndp_g_t *ndp; 399 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 400 401 if (ipversion == IPV4_VERSION) 402 ndp = ipst->ips_ndp4; 403 else 404 ndp = ipst->ips_ndp6; 405 406 /* Serialize deletes */ 407 mutex_enter(&nce->nce_lock); 408 if (nce->nce_flags & NCE_F_CONDEMNED) { 409 /* Some other thread is doing the delete */ 410 mutex_exit(&nce->nce_lock); 411 return; 412 } 413 /* 414 * Caller has a refhold. Also 1 ref for being in the list. Thus 415 * refcnt has to be >= 2 416 */ 417 ASSERT(nce->nce_refcnt >= 2); 418 nce->nce_flags |= NCE_F_CONDEMNED; 419 mutex_exit(&nce->nce_lock); 420 421 nce_fastpath_list_delete(nce); 422 423 /* 424 * Cancel any running timer. Timeout can't be restarted 425 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 426 * Passing invalid timeout id is fine. 427 */ 428 if (nce->nce_timeout_id != 0) { 429 (void) untimeout(nce->nce_timeout_id); 430 nce->nce_timeout_id = 0; 431 } 432 433 mutex_enter(&ndp->ndp_g_lock); 434 if (nce->nce_ptpn == NULL) { 435 /* 436 * The last ndp walker has already removed this nce from 437 * the list after we marked the nce CONDEMNED and before 438 * we grabbed the global lock. 439 */ 440 mutex_exit(&ndp->ndp_g_lock); 441 return; 442 } 443 if (ndp->ndp_g_walker > 0) { 444 /* 445 * Can't unlink. The walker will clean up 446 */ 447 ndp->ndp_g_walker_cleanup = B_TRUE; 448 mutex_exit(&ndp->ndp_g_lock); 449 return; 450 } 451 452 /* 453 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 454 * the timer since it is marked CONDEMNED. 455 */ 456 ptpn = nce->nce_ptpn; 457 nce1 = nce->nce_next; 458 if (nce1 != NULL) 459 nce1->nce_ptpn = ptpn; 460 *ptpn = nce1; 461 nce->nce_ptpn = NULL; 462 nce->nce_next = NULL; 463 mutex_exit(&ndp->ndp_g_lock); 464 465 nce_ire_delete(nce); 466 } 467 468 void 469 ndp_inactive(nce_t *nce) 470 { 471 mblk_t **mpp; 472 ill_t *ill; 473 474 ASSERT(nce->nce_refcnt == 0); 475 ASSERT(MUTEX_HELD(&nce->nce_lock)); 476 ASSERT(nce->nce_fastpath == NULL); 477 478 /* Free all nce allocated messages */ 479 mpp = &nce->nce_first_mp_to_free; 480 do { 481 while (*mpp != NULL) { 482 mblk_t *mp; 483 484 mp = *mpp; 485 *mpp = mp->b_next; 486 487 inet_freemsg(mp); 488 } 489 } while (mpp++ != &nce->nce_last_mp_to_free); 490 491 #ifdef DEBUG 492 nce_trace_cleanup(nce); 493 #endif 494 495 ill = nce->nce_ill; 496 mutex_enter(&ill->ill_lock); 497 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 498 (char *), "nce", (void *), nce); 499 ill->ill_nce_cnt--; 500 /* 501 * If the number of nce's associated with this ill have dropped 502 * to zero, check whether we need to restart any operation that 503 * is waiting for this to happen. 504 */ 505 if (ILL_DOWN_OK(ill)) { 506 /* ipif_ill_refrele_tail drops the ill_lock */ 507 ipif_ill_refrele_tail(ill); 508 } else { 509 mutex_exit(&ill->ill_lock); 510 } 511 mutex_destroy(&nce->nce_lock); 512 if (nce->nce_mp != NULL) 513 inet_freemsg(nce->nce_mp); 514 } 515 516 /* 517 * ndp_walk routine. Delete the nce if it is associated with the ill 518 * that is going away. Always called as a writer. 519 */ 520 void 521 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 522 { 523 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 524 ndp_delete(nce); 525 } 526 } 527 528 /* 529 * Walk a list of to be inactive NCEs and blow away all the ires. 530 */ 531 static void 532 nce_ire_delete_list(nce_t *nce) 533 { 534 nce_t *nce_next; 535 536 ASSERT(nce != NULL); 537 while (nce != NULL) { 538 nce_next = nce->nce_next; 539 nce->nce_next = NULL; 540 541 /* 542 * It is possible for the last ndp walker (this thread) 543 * to come here after ndp_delete has marked the nce CONDEMNED 544 * and before it has removed the nce from the fastpath list 545 * or called untimeout. So we need to do it here. It is safe 546 * for both ndp_delete and this thread to do it twice or 547 * even simultaneously since each of the threads has a 548 * reference on the nce. 549 */ 550 nce_fastpath_list_delete(nce); 551 /* 552 * Cancel any running timer. Timeout can't be restarted 553 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 554 * Passing invalid timeout id is fine. 555 */ 556 if (nce->nce_timeout_id != 0) { 557 (void) untimeout(nce->nce_timeout_id); 558 nce->nce_timeout_id = 0; 559 } 560 /* 561 * We might hit this func thus in the v4 case: 562 * ipif_down->ipif_ndp_down->ndp_walk 563 */ 564 565 if (nce->nce_ipversion == IPV4_VERSION) { 566 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 567 IRE_CACHE, nce_ire_delete1, 568 (char *)nce, nce->nce_ill); 569 } else { 570 ASSERT(nce->nce_ipversion == IPV6_VERSION); 571 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 572 IRE_CACHE, nce_ire_delete1, 573 (char *)nce, nce->nce_ill); 574 } 575 NCE_REFRELE_NOTR(nce); 576 nce = nce_next; 577 } 578 } 579 580 /* 581 * Delete an ire when the nce goes away. 582 */ 583 /* ARGSUSED */ 584 static void 585 nce_ire_delete(nce_t *nce) 586 { 587 if (nce->nce_ipversion == IPV6_VERSION) { 588 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 589 nce_ire_delete1, (char *)nce, nce->nce_ill); 590 NCE_REFRELE_NOTR(nce); 591 } else { 592 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 593 nce_ire_delete1, (char *)nce, nce->nce_ill); 594 NCE_REFRELE_NOTR(nce); 595 } 596 } 597 598 /* 599 * ire_walk routine used to delete every IRE that shares this nce 600 */ 601 static void 602 nce_ire_delete1(ire_t *ire, char *nce_arg) 603 { 604 nce_t *nce = (nce_t *)nce_arg; 605 606 ASSERT(ire->ire_type == IRE_CACHE); 607 608 if (ire->ire_nce == nce) { 609 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 610 ire_delete(ire); 611 } 612 } 613 614 /* 615 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 616 */ 617 boolean_t 618 ndp_restart_dad(nce_t *nce) 619 { 620 boolean_t started; 621 boolean_t dropped; 622 623 if (nce == NULL) 624 return (B_FALSE); 625 mutex_enter(&nce->nce_lock); 626 if (nce->nce_state == ND_PROBE) { 627 mutex_exit(&nce->nce_lock); 628 started = B_TRUE; 629 } else if (nce->nce_state == ND_REACHABLE) { 630 nce->nce_state = ND_PROBE; 631 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 632 mutex_exit(&nce->nce_lock); 633 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 634 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 635 if (dropped) { 636 mutex_enter(&nce->nce_lock); 637 nce->nce_pcnt++; 638 mutex_exit(&nce->nce_lock); 639 } 640 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 641 started = B_TRUE; 642 } else { 643 mutex_exit(&nce->nce_lock); 644 started = B_FALSE; 645 } 646 return (started); 647 } 648 649 /* 650 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 651 * If one is found, the refcnt on the nce will be incremented. 652 */ 653 nce_t * 654 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 655 { 656 nce_t *nce; 657 ip_stack_t *ipst; 658 659 ASSERT(ill != NULL); 660 ipst = ill->ill_ipst; 661 662 ASSERT(ill != NULL && ill->ill_isv6); 663 if (!caller_holds_lock) { 664 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 665 } 666 667 /* Get head of v6 hash table */ 668 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 669 nce = nce_lookup_addr(ill, addr, nce); 670 if (nce == NULL) 671 nce = nce_lookup_mapping(ill, addr); 672 if (!caller_holds_lock) 673 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 674 return (nce); 675 } 676 /* 677 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 678 * If one is found, the refcnt on the nce will be incremented. 679 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 680 * so we skip the nce_lookup_mapping call. 681 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 682 */ 683 nce_t * 684 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 685 { 686 nce_t *nce; 687 in6_addr_t addr6; 688 ip_stack_t *ipst = ill->ill_ipst; 689 690 if (!caller_holds_lock) { 691 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 692 } 693 694 /* Get head of v4 hash table */ 695 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 696 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 697 nce = nce_lookup_addr(ill, &addr6, nce); 698 if (!caller_holds_lock) 699 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 700 return (nce); 701 } 702 703 /* 704 * Cache entry lookup. Try to find an nce matching the parameters passed. 705 * Look only for exact entries (no mappings). If an nce is found, increment 706 * the hold count on that nce. The caller passes in the start of the 707 * appropriate hash table, and must be holding the appropriate global 708 * lock (ndp_g_lock). 709 */ 710 static nce_t * 711 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 712 { 713 ndp_g_t *ndp; 714 ip_stack_t *ipst = ill->ill_ipst; 715 716 if (ill->ill_isv6) 717 ndp = ipst->ips_ndp6; 718 else 719 ndp = ipst->ips_ndp4; 720 721 ASSERT(ill != NULL); 722 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 723 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 724 return (NULL); 725 for (; nce != NULL; nce = nce->nce_next) { 726 if (nce->nce_ill == ill) { 727 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 728 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 729 &ipv6_all_ones)) { 730 mutex_enter(&nce->nce_lock); 731 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 732 NCE_REFHOLD_LOCKED(nce); 733 mutex_exit(&nce->nce_lock); 734 break; 735 } 736 mutex_exit(&nce->nce_lock); 737 } 738 } 739 } 740 return (nce); 741 } 742 743 /* 744 * Cache entry lookup. Try to find an nce matching the parameters passed. 745 * Look only for mappings. 746 */ 747 static nce_t * 748 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 749 { 750 nce_t *nce; 751 ip_stack_t *ipst = ill->ill_ipst; 752 753 ASSERT(ill != NULL && ill->ill_isv6); 754 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 755 if (!IN6_IS_ADDR_MULTICAST(addr)) 756 return (NULL); 757 nce = ipst->ips_ndp6->nce_mask_entries; 758 for (; nce != NULL; nce = nce->nce_next) 759 if (nce->nce_ill == ill && 760 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 761 mutex_enter(&nce->nce_lock); 762 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 763 NCE_REFHOLD_LOCKED(nce); 764 mutex_exit(&nce->nce_lock); 765 break; 766 } 767 mutex_exit(&nce->nce_lock); 768 } 769 return (nce); 770 } 771 772 /* 773 * Process passed in parameters either from an incoming packet or via 774 * user ioctl. 775 */ 776 void 777 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 778 { 779 ill_t *ill = nce->nce_ill; 780 uint32_t hw_addr_len = ill->ill_nd_lla_len; 781 mblk_t *mp; 782 boolean_t ll_updated = B_FALSE; 783 boolean_t ll_changed; 784 ip_stack_t *ipst = ill->ill_ipst; 785 786 ASSERT(nce->nce_ipversion == IPV6_VERSION); 787 /* 788 * No updates of link layer address or the neighbor state is 789 * allowed, when the cache is in NONUD state. This still 790 * allows for responding to reachability solicitation. 791 */ 792 mutex_enter(&nce->nce_lock); 793 if (nce->nce_state == ND_INCOMPLETE) { 794 if (hw_addr == NULL) { 795 mutex_exit(&nce->nce_lock); 796 return; 797 } 798 nce_set_ll(nce, hw_addr); 799 /* 800 * Update nce state and send the queued packets 801 * back to ip this time ire will be added. 802 */ 803 if (flag & ND_NA_FLAG_SOLICITED) { 804 nce_update(nce, ND_REACHABLE, NULL); 805 } else { 806 nce_update(nce, ND_STALE, NULL); 807 } 808 mutex_exit(&nce->nce_lock); 809 nce_fastpath(nce); 810 mutex_enter(&nce->nce_lock); 811 mp = nce->nce_qd_mp; 812 nce->nce_qd_mp = NULL; 813 mutex_exit(&nce->nce_lock); 814 while (mp != NULL) { 815 mblk_t *nxt_mp, *data_mp; 816 817 nxt_mp = mp->b_next; 818 mp->b_next = NULL; 819 820 if (mp->b_datap->db_type == M_CTL) 821 data_mp = mp->b_cont; 822 else 823 data_mp = mp; 824 if (data_mp->b_prev != NULL) { 825 ill_t *inbound_ill; 826 queue_t *fwdq = NULL; 827 uint_t ifindex; 828 829 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 830 inbound_ill = ill_lookup_on_ifindex(ifindex, 831 B_TRUE, NULL, NULL, NULL, NULL, ipst); 832 if (inbound_ill == NULL) { 833 data_mp->b_prev = NULL; 834 freemsg(mp); 835 return; 836 } else { 837 fwdq = inbound_ill->ill_rq; 838 } 839 data_mp->b_prev = NULL; 840 /* 841 * Send a forwarded packet back into ip_rput_v6 842 * just as in ire_send_v6(). 843 * Extract the queue from b_prev (set in 844 * ip_rput_data_v6). 845 */ 846 if (fwdq != NULL) { 847 /* 848 * Forwarded packets hop count will 849 * get decremented in ip_rput_data_v6 850 */ 851 if (data_mp != mp) 852 freeb(mp); 853 put(fwdq, data_mp); 854 } else { 855 /* 856 * Send locally originated packets back 857 * into * ip_wput_v6. 858 */ 859 put(ill->ill_wq, mp); 860 } 861 ill_refrele(inbound_ill); 862 } else { 863 put(ill->ill_wq, mp); 864 } 865 mp = nxt_mp; 866 } 867 return; 868 } 869 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 870 if (!is_adv) { 871 /* If this is a SOLICITATION request only */ 872 if (ll_changed) 873 nce_update(nce, ND_STALE, hw_addr); 874 mutex_exit(&nce->nce_lock); 875 return; 876 } 877 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 878 /* If in any other state than REACHABLE, ignore */ 879 if (nce->nce_state == ND_REACHABLE) { 880 nce_update(nce, ND_STALE, NULL); 881 } 882 mutex_exit(&nce->nce_lock); 883 return; 884 } else { 885 if (ll_changed) { 886 nce_update(nce, ND_UNCHANGED, hw_addr); 887 ll_updated = B_TRUE; 888 } 889 if (flag & ND_NA_FLAG_SOLICITED) { 890 nce_update(nce, ND_REACHABLE, NULL); 891 } else { 892 if (ll_updated) { 893 nce_update(nce, ND_STALE, NULL); 894 } 895 } 896 mutex_exit(&nce->nce_lock); 897 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 898 NCE_F_ISROUTER)) { 899 ire_t *ire; 900 901 /* 902 * Router turned to host. We need to remove the 903 * entry as well as any default route that may be 904 * using this as a next hop. This is required by 905 * section 7.2.5 of RFC 2461. 906 */ 907 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 908 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 909 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 910 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 911 MATCH_IRE_DEFAULT, ipst); 912 if (ire != NULL) { 913 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 914 ire_delete(ire); 915 ire_refrele(ire); 916 } 917 ndp_delete(nce); 918 } 919 } 920 } 921 922 /* 923 * Pass arg1 to the pfi supplied, along with each nce in existence. 924 * ndp_walk() places a REFHOLD on the nce and drops the lock when 925 * walking the hash list. 926 */ 927 void 928 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 929 boolean_t trace) 930 { 931 932 nce_t *nce; 933 nce_t *nce1; 934 nce_t **ncep; 935 nce_t *free_nce_list = NULL; 936 937 mutex_enter(&ndp->ndp_g_lock); 938 /* Prevent ndp_delete from unlink and free of NCE */ 939 ndp->ndp_g_walker++; 940 mutex_exit(&ndp->ndp_g_lock); 941 for (ncep = ndp->nce_hash_tbl; 942 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 943 for (nce = *ncep; nce != NULL; nce = nce1) { 944 nce1 = nce->nce_next; 945 if (ill == NULL || nce->nce_ill == ill) { 946 if (trace) { 947 NCE_REFHOLD(nce); 948 (*pfi)(nce, arg1); 949 NCE_REFRELE(nce); 950 } else { 951 NCE_REFHOLD_NOTR(nce); 952 (*pfi)(nce, arg1); 953 NCE_REFRELE_NOTR(nce); 954 } 955 } 956 } 957 } 958 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 959 nce1 = nce->nce_next; 960 if (ill == NULL || nce->nce_ill == ill) { 961 if (trace) { 962 NCE_REFHOLD(nce); 963 (*pfi)(nce, arg1); 964 NCE_REFRELE(nce); 965 } else { 966 NCE_REFHOLD_NOTR(nce); 967 (*pfi)(nce, arg1); 968 NCE_REFRELE_NOTR(nce); 969 } 970 } 971 } 972 mutex_enter(&ndp->ndp_g_lock); 973 ndp->ndp_g_walker--; 974 /* 975 * While NCE's are removed from global list they are placed 976 * in a private list, to be passed to nce_ire_delete_list(). 977 * The reason is, there may be ires pointing to this nce 978 * which needs to cleaned up. 979 */ 980 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 981 /* Time to delete condemned entries */ 982 for (ncep = ndp->nce_hash_tbl; 983 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 984 nce = *ncep; 985 if (nce != NULL) { 986 nce_remove(ndp, nce, &free_nce_list); 987 } 988 } 989 nce = ndp->nce_mask_entries; 990 if (nce != NULL) { 991 nce_remove(ndp, nce, &free_nce_list); 992 } 993 ndp->ndp_g_walker_cleanup = B_FALSE; 994 } 995 996 mutex_exit(&ndp->ndp_g_lock); 997 998 if (free_nce_list != NULL) { 999 nce_ire_delete_list(free_nce_list); 1000 } 1001 } 1002 1003 /* 1004 * Walk everything. 1005 * Note that ill can be NULL hence can't derive the ipst from it. 1006 */ 1007 void 1008 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1009 { 1010 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1011 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1012 } 1013 1014 /* 1015 * Process resolve requests. Handles both mapped entries 1016 * as well as cases that needs to be send out on the wire. 1017 * Lookup a NCE for a given IRE. Regardless of whether one exists 1018 * or one is created, we defer making ire point to nce until the 1019 * ire is actually added at which point the nce_refcnt on the nce is 1020 * incremented. This is done primarily to have symmetry between ire_add() 1021 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1022 */ 1023 int 1024 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1025 { 1026 nce_t *nce; 1027 int err = 0; 1028 uint32_t ms; 1029 mblk_t *mp_nce = NULL; 1030 ip_stack_t *ipst = ill->ill_ipst; 1031 1032 ASSERT(ill->ill_isv6); 1033 if (IN6_IS_ADDR_MULTICAST(