1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/tun.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/udp_impl.h> 99 #include <inet/rawip_impl.h> 100 #include <inet/rts_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 #define IP6_MBLK_OK 0 185 #define IP6_MBLK_HDR_ERR 1 186 #define IP6_MBLK_LEN_ERR 2 187 188 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 189 boolean_t, zoneid_t); 190 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 191 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 192 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 193 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 194 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 195 boolean_t, boolean_t, boolean_t, boolean_t); 196 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 197 iulp_t *, ip_stack_t *); 198 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 199 uint16_t, boolean_t, boolean_t, boolean_t); 200 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 201 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 202 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 205 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 206 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 207 uint8_t *, uint_t, uint8_t, ip_stack_t *); 208 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 209 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 210 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 211 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 212 conn_t *, int, int, int, zoneid_t); 213 214 /* 215 * A template for an IPv6 AR_ENTRY_QUERY 216 */ 217 static areq_t ipv6_areq_template = { 218 AR_ENTRY_QUERY, /* cmd */ 219 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 220 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 221 IP6_DL_SAP, /* protocol, from arps perspective */ 222 sizeof (areq_t), /* target addr offset */ 223 IPV6_ADDR_LEN, /* target addr_length */ 224 0, /* flags */ 225 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 226 IPV6_ADDR_LEN, /* sender addr length */ 227 6, /* xmit_count */ 228 1000, /* (re)xmit_interval in milliseconds */ 229 4 /* max # of requests to buffer */ 230 /* anything else filled in by the code */ 231 }; 232 233 /* 234 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 235 * The message has already been checksummed and if needed, 236 * a copy has been made to be sent any interested ICMP client (conn) 237 * Note that this is different than icmp_inbound() which does the fanout 238 * to conn's as well as local processing of the ICMP packets. 239 * 240 * All error messages are passed to the matching transport stream. 241 * 242 * Zones notes: 243 * The packet is only processed in the context of the specified zone: typically 244 * only this zone will reply to an echo request. This means that the caller must 245 * call icmp_inbound_v6() for each relevant zone. 246 */ 247 static void 248 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 249 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 250 { 251 icmp6_t *icmp6; 252 ip6_t *ip6h; 253 boolean_t interested; 254 ip6i_t *ip6i; 255 in6_addr_t origsrc; 256 ire_t *ire; 257 mblk_t *first_mp; 258 ipsec_in_t *ii; 259 ip_stack_t *ipst = ill->ill_ipst; 260 261 ASSERT(ill != NULL); 262 first_mp = mp; 263 if (mctl_present) { 264 mp = first_mp->b_cont; 265 ASSERT(mp != NULL); 266 267 ii = (ipsec_in_t *)first_mp->b_rptr; 268 ASSERT(ii->ipsec_in_type == IPSEC_IN); 269 } 270 271 ip6h = (ip6_t *)mp->b_rptr; 272 273 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 274 275 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 276 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 277 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 278 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 279 freemsg(first_mp); 280 return; 281 } 282 ip6h = (ip6_t *)mp->b_rptr; 283 } 284 if (ipst->ips_icmp_accept_clear_messages == 0) { 285 first_mp = ipsec_check_global_policy(first_mp, NULL, 286 NULL, ip6h, mctl_present, ipst->ips_netstack); 287 if (first_mp == NULL) 288 return; 289 } 290 291 /* 292 * On a labeled system, we have to check whether the zone itself is 293 * permitted to receive raw traffic. 294 */ 295 if (is_system_labeled()) { 296 if (zoneid == ALL_ZONES) 297 zoneid = tsol_packet_to_zoneid(mp); 298 if (!tsol_can_accept_raw(mp, B_FALSE)) { 299 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 300 zoneid)); 301 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 302 freemsg(first_mp); 303 return; 304 } 305 } 306 307 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 308 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 309 icmp6->icmp6_code)); 310 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 311 312 /* Initiate IPPF processing here */ 313 if (IP6_IN_IPP(flags, ipst)) { 314 315 /* 316 * If the ifindex changes due to SIOCSLIFINDEX 317 * packet may return to IP on the wrong ill. 318 */ 319 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 320 if (mp == NULL) { 321 if (mctl_present) { 322 freeb(first_mp); 323 } 324 return; 325 } 326 } 327 328 switch (icmp6->icmp6_type) { 329 case ICMP6_DST_UNREACH: 330 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 331 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 332 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 333 break; 334 335 case ICMP6_TIME_EXCEEDED: 336 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 337 break; 338 339 case ICMP6_PARAM_PROB: 340 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 341 break; 342 343 case ICMP6_PACKET_TOO_BIG: 344 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 345 zoneid); 346 return; 347 case ICMP6_ECHO_REQUEST: 348 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 349 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 350 !ipst->ips_ipv6_resp_echo_mcast) 351 break; 352 353 /* 354 * We must have exclusive use of the mblk to convert it to 355 * a response. 356 * If not, we copy it. 357 */ 358 if (mp->b_datap->db_ref > 1) { 359 mblk_t *mp1; 360 361 mp1 = copymsg(mp); 362 freemsg(mp); 363 if (mp1 == NULL) { 364 BUMP_MIB(ill->ill_icmp6_mib, 365 ipv6IfIcmpInErrors); 366 if (mctl_present) 367 freeb(first_mp); 368 return; 369 } 370 mp = mp1; 371 ip6h = (ip6_t *)mp->b_rptr; 372 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 373 if (mctl_present) 374 first_mp->b_cont = mp; 375 else 376 first_mp = mp; 377 } 378 379 /* 380 * Turn the echo into an echo reply. 381 * Remove any extension headers (do not reverse a source route) 382 * and clear the flow id (keep traffic class for now). 383 */ 384 if (hdr_length != IPV6_HDR_LEN) { 385 int i; 386 387 for (i = 0; i < IPV6_HDR_LEN; i++) 388 mp->b_rptr[hdr_length - i - 1] = 389 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 390 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 391 ip6h = (ip6_t *)mp->b_rptr; 392 ip6h->ip6_nxt = IPPROTO_ICMPV6; 393 hdr_length = IPV6_HDR_LEN; 394 } 395 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 396 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 397 398 ip6h->ip6_plen = 399 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 400 origsrc = ip6h->ip6_src; 401 /* 402 * Reverse the source and destination addresses. 403 * If the return address is a multicast, zero out the source 404 * (ip_wput_v6 will set an address). 405 */ 406 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 407 ip6h->ip6_src = ipv6_all_zeros; 408 ip6h->ip6_dst = origsrc; 409 } else { 410 ip6h->ip6_src = ip6h->ip6_dst; 411 ip6h->ip6_dst = origsrc; 412 } 413 414 /* set the hop limit */ 415 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 416 417 /* 418 * Prepare for checksum by putting icmp length in the icmp 419 * checksum field. The checksum is calculated in ip_wput_v6. 420 */ 421 icmp6->icmp6_cksum = ip6h->ip6_plen; 422 /* 423 * ICMP echo replies should go out on the same interface 424 * the request came on as probes used by in.mpathd for 425 * detecting NIC failures are ECHO packets. We turn-off load 426 * spreading by allocating a ip6i and setting ip6i_attach_if 427 * to B_TRUE which is handled both by ip_wput_v6 and 428 * ip_newroute_v6. If we don't turnoff load spreading, 429 * the packets might get dropped if there are no 430 * non-FAILED/INACTIVE interfaces for it to go out on and 431 * in.mpathd would wrongly detect a failure or mis-detect 432 * a NIC failure as a link failure. As load spreading can 433 * happen only if ill_group is not NULL, we do only for 434 * that case and this does not affect the normal case. 435 * 436 * We force this only on echo packets that came from on-link 437 * hosts. We restrict this to link-local addresses which 438 * is used by in.mpathd for probing. In the IPv6 case, 439 * default routes typically have an ire_ipif pointer and 440 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 441 * might work. As a default route out of this interface 442 * may not be present, enforcing this packet to go out in 443 * this case may not work. 444 */ 445 if (ill->ill_group != NULL && 446 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 447 /* 448 * If we are sending replies to ourselves, don't 449 * set ATTACH_IF as we may not be able to find 450 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 451 * causes ip_wput_v6 to look for an IRE_LOCAL on 452 * "ill" which it may not find and will try to 453 * create an IRE_CACHE for our local address. Once 454 * we do this, we will try to forward all packets 455 * meant to our LOCAL address. 456 */ 457 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 458 NULL, ipst); 459 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 460 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 461 if (mp == NULL) { 462 BUMP_MIB(ill->ill_icmp6_mib, 463 ipv6IfIcmpInErrors); 464 if (ire != NULL) 465 ire_refrele(ire); 466 if (mctl_present) 467 freeb(first_mp); 468 return; 469 } else if (mctl_present) { 470 first_mp->b_cont = mp; 471 } else { 472 first_mp = mp; 473 } 474 ip6i = (ip6i_t *)mp->b_rptr; 475 ip6i->ip6i_flags = IP6I_ATTACH_IF; 476 ip6i->ip6i_ifindex = 477 ill->ill_phyint->phyint_ifindex; 478 } 479 if (ire != NULL) 480 ire_refrele(ire); 481 } 482 483 if (!mctl_present) { 484 /* 485 * This packet should go out the same way as it 486 * came in i.e in clear. To make sure that global 487 * policy will not be applied to this in ip_wput, 488 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 489 */ 490 ASSERT(first_mp == mp); 491 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 492 if (first_mp == NULL) { 493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 494 freemsg(mp); 495 return; 496 } 497 ii = (ipsec_in_t *)first_mp->b_rptr; 498 499 /* This is not a secure packet */ 500 ii->ipsec_in_secure = B_FALSE; 501 first_mp->b_cont = mp; 502 } 503 ii->ipsec_in_zoneid = zoneid; 504 ASSERT(zoneid != ALL_ZONES); 505 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 506 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 507 return; 508 } 509 put(WR(q), first_mp); 510 return; 511 512 case ICMP6_ECHO_REPLY: 513 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 514 break; 515 516 case ND_ROUTER_SOLICIT: 517 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 518 break; 519 520 case ND_ROUTER_ADVERT: 521 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 522 break; 523 524 case ND_NEIGHBOR_SOLICIT: 525 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 526 if (mctl_present) 527 freeb(first_mp); 528 /* XXX may wish to pass first_mp up to ndp_input someday. */ 529 ndp_input(ill, mp, dl_mp); 530 return; 531 532 case ND_NEIGHBOR_ADVERT: 533 BUMP_MIB(ill->ill_icmp6_mib, 534 ipv6IfIcmpInNeighborAdvertisements); 535 if (mctl_present) 536 freeb(first_mp); 537 /* XXX may wish to pass first_mp up to ndp_input someday. */ 538 ndp_input(ill, mp, dl_mp); 539 return; 540 541 case ND_REDIRECT: { 542 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 543 544 if (ipst->ips_ipv6_ignore_redirect) 545 break; 546 547 /* 548 * As there is no upper client to deliver, we don't 549 * need the first_mp any more. 550 */ 551 if (mctl_present) 552 freeb(first_mp); 553 if (!pullupmsg(mp, -1)) { 554 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 555 break; 556 } 557 icmp_redirect_v6(q, mp, ill); 558 return; 559 } 560 561 /* 562 * The next three icmp messages will be handled by MLD. 563 * Pass all valid MLD packets up to any process(es) 564 * listening on a raw ICMP socket. MLD messages are 565 * freed by mld_input function. 566 */ 567 case MLD_LISTENER_QUERY: 568 case MLD_LISTENER_REPORT: 569 case MLD_LISTENER_REDUCTION: 570 if (mctl_present) 571 freeb(first_mp); 572 mld_input(q, mp, ill); 573 return; 574 default: 575 break; 576 } 577 if (interested) { 578 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 579 mctl_present, zoneid); 580 } else { 581 freemsg(first_mp); 582 } 583 } 584 585 /* 586 * Process received IPv6 ICMP Packet too big. 587 * After updating any IRE it does the fanout to any matching transport streams. 588 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 589 */ 590 /* ARGSUSED */ 591 static void 592 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 593 boolean_t mctl_present, zoneid_t zoneid) 594 { 595 ip6_t *ip6h; 596 ip6_t *inner_ip6h; 597 icmp6_t *icmp6; 598 uint16_t hdr_length; 599 uint32_t mtu; 600 ire_t *ire, *first_ire; 601 mblk_t *first_mp; 602 ip_stack_t *ipst = ill->ill_ipst; 603 604 first_mp = mp; 605 if (mctl_present) 606 mp = first_mp->b_cont; 607 /* 608 * We must have exclusive use of the mblk to update the MTU 609 * in the packet. 610 * If not, we copy it. 611 * 612 * If there's an M_CTL present, we know that allocated first_mp 613 * earlier in this function, so we know first_mp has refcnt of one. 614 */ 615 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 616 if (mp->b_datap->db_ref > 1) { 617 mblk_t *mp1; 618 619 mp1 = copymsg(mp); 620 freemsg(mp); 621 if (mp1 == NULL) { 622 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 623 if (mctl_present) 624 freeb(first_mp); 625 return; 626 } 627 mp = mp1; 628 if (mctl_present) 629 first_mp->b_cont = mp; 630 else 631 first_mp = mp; 632 } 633 ip6h = (ip6_t *)mp->b_rptr; 634 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 635 hdr_length = ip_hdr_length_v6(mp, ip6h); 636 else 637 hdr_length = IPV6_HDR_LEN; 638 639 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 640 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 641 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 642 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 643 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 644 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 645 freemsg(first_mp); 646 return; 647 } 648 ip6h = (ip6_t *)mp->b_rptr; 649 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 650 inner_ip6h = (ip6_t *)&icmp6[1]; 651 } 652 653 /* 654 * For link local destinations matching simply on IRE type is not 655 * sufficient. Same link local addresses for different ILL's is 656 * possible. 657 */ 658 659 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 660 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 661 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 662 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 663 664 if (first_ire == NULL) { 665 if (ip_debug > 2) { 666 /* ip1dbg */ 667 pr_addr_dbg("icmp_inbound_too_big_v6:" 668 "no ire for dst %s\n", AF_INET6, 669 &inner_ip6h->ip6_dst); 670 } 671 freemsg(first_mp); 672 return; 673 } 674 675 mtu = ntohl(icmp6->icmp6_mtu); 676 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 677 for (ire = first_ire; ire != NULL && 678 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 679 ire = ire->ire_next) { 680 mutex_enter(&ire->ire_lock); 681 if (mtu < IPV6_MIN_MTU) { 682 ip1dbg(("Received mtu less than IPv6 " 683 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 684 mtu = IPV6_MIN_MTU; 685 /* 686 * If an mtu less than IPv6 min mtu is received, 687 * we must include a fragment header in 688 * subsequent packets. 689 */ 690 ire->ire_frag_flag |= IPH_FRAG_HDR; 691 } 692 ip1dbg(("Received mtu from router: %d\n", mtu)); 693 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 694 /* Record the new max frag size for the ULP. */ 695 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 696 /* 697 * If we need a fragment header in every packet 698 * (above case or multirouting), make sure the 699 * ULP takes it into account when computing the 700 * payload size. 701 */ 702 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 703 sizeof (ip6_frag_t)); 704 } else { 705 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 706 } 707 mutex_exit(&ire->ire_lock); 708 } 709 rw_exit(&first_ire->ire_bucket->irb_lock); 710 ire_refrele(first_ire); 711 } else { 712 irb_t *irb = NULL; 713 /* 714 * for non-link local destinations we match only on the IRE type 715 */ 716 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 717 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 718 ipst); 719 if (ire == NULL) { 720 if (ip_debug > 2) { 721 /* ip1dbg */ 722 pr_addr_dbg("icmp_inbound_too_big_v6:" 723 "no ire for dst %s\n", 724 AF_INET6, &inner_ip6h->ip6_dst); 725 } 726 freemsg(first_mp); 727 return; 728 } 729 irb = ire->ire_bucket; 730 ire_refrele(ire); 731 rw_enter(&irb->irb_lock, RW_READER); 732 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 733 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 734 &inner_ip6h->ip6_dst)) { 735 mtu = ntohl(icmp6->icmp6_mtu); 736 mutex_enter(&ire->ire_lock); 737 if (mtu < IPV6_MIN_MTU) { 738 ip1dbg(("Received mtu less than IPv6" 739 "min mtu %d: %d\n", 740 IPV6_MIN_MTU, mtu)); 741 mtu = IPV6_MIN_MTU; 742 /* 743 * If an mtu less than IPv6 min mtu is 744 * received, we must include a fragment 745 * header in subsequent packets. 746 */ 747 ire->ire_frag_flag |= IPH_FRAG_HDR; 748 } 749 750 ip1dbg(("Received mtu from router: %d\n", mtu)); 751 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 752 /* Record the new max frag size for the ULP. */ 753 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 754 /* 755 * If we need a fragment header in 756 * every packet (above case or 757 * multirouting), make sure the ULP 758 * takes it into account when computing 759 * the payload size. 760 */ 761 icmp6->icmp6_mtu = 762 htonl(ire->ire_max_frag - 763 sizeof (ip6_frag_t)); 764 } else { 765 icmp6->icmp6_mtu = 766 htonl(ire->ire_max_frag); 767 } 768 mutex_exit(&ire->ire_lock); 769 } 770 } 771 rw_exit(&irb->irb_lock); 772 } 773 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 774 mctl_present, zoneid); 775 } 776 777 /* 778 * Fanout received ICMPv6 error packets to the transports. 779 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 780 */ 781 void 782 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 783 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 784 { 785 uint16_t *up; /* Pointer to ports in ULP header */ 786 uint32_t ports; /* reversed ports for fanout */ 787 ip6_t rip6h; /* With reversed addresses */ 788 uint16_t hdr_length; 789 uint8_t *nexthdrp; 790 uint8_t nexthdr; 791 mblk_t *first_mp; 792 ipsec_in_t *ii; 793 tcpha_t *tcpha; 794 conn_t *connp; 795 ip_stack_t *ipst = ill->ill_ipst; 796 797 first_mp = mp; 798 if (mctl_present) { 799 mp = first_mp->b_cont; 800 ASSERT(mp != NULL); 801 802 ii = (ipsec_in_t *)first_mp->b_rptr; 803 ASSERT(ii->ipsec_in_type == IPSEC_IN); 804 } else { 805 ii = NULL; 806 } 807 808 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 809 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 810 811 /* 812 * Need to pullup everything in order to use 813 * ip_hdr_length_nexthdr_v6() 814 */ 815 if (mp->b_cont != NULL) { 816 if (!pullupmsg(mp, -1)) { 817 ip1dbg(("icmp_inbound_error_fanout_v6: " 818 "pullupmsg failed\n")); 819 goto drop_pkt; 820 } 821 ip6h = (ip6_t *)mp->b_rptr; 822 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 823 } 824 825 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 826 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 827 goto drop_pkt; 828 829 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 830 goto drop_pkt; 831 nexthdr = *nexthdrp; 832 833 /* Set message type, must be done after pullups */ 834 mp->b_datap->db_type = M_CTL; 835 836 /* Try to pass the ICMP message to clients who need it */ 837 switch (nexthdr) { 838 case IPPROTO_UDP: { 839 /* 840 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 841 * UDP header to get the port information. 842 */ 843 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 844 mp->b_wptr) { 845 break; 846 } 847 /* 848 * Attempt to find a client stream based on port. 849 * Note that we do a reverse lookup since the header is 850 * in the form we sent it out. 851 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 852 * and we only set the src and dst addresses and nexthdr. 853 */ 854 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 855 rip6h.ip6_src = ip6h->ip6_dst; 856 rip6h.ip6_dst = ip6h->ip6_src; 857 rip6h.ip6_nxt = nexthdr; 858 ((uint16_t *)&ports)[0] = up[1]; 859 ((uint16_t *)&ports)[1] = up[0]; 860 861 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 862 IP6_NO_IPPOLICY, mctl_present, zoneid); 863 return; 864 } 865 case IPPROTO_TCP: { 866 /* 867 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 868 * the TCP header to get the port information. 869 */ 870 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 871 mp->b_wptr) { 872 break; 873 } 874 875 /* 876 * Attempt to find a client stream based on port. 877 * Note that we do a reverse lookup since the header is 878 * in the form we sent it out. 879 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 880 * we only set the src and dst addresses and nexthdr. 881 */ 882 883 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 884 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 885 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 886 if (connp == NULL) { 887 goto drop_pkt; 888 } 889 890 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 891 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 892 return; 893 894 } 895 case IPPROTO_SCTP: 896 /* 897 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 898 * the SCTP header to get the port information. 899 */ 900 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 901 mp->b_wptr) { 902 break; 903 } 904 905 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 906 ((uint16_t *)&ports)[0] = up[1]; 907 ((uint16_t *)&ports)[1] = up[0]; 908 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 0, 909 mctl_present, IP6_NO_IPPOLICY, zoneid); 910 return; 911 case IPPROTO_ESP: 912 case IPPROTO_AH: { 913 int ipsec_rc; 914 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 915 916 /* 917 * We need a IPSEC_IN in the front to fanout to AH/ESP. 918 * We will re-use the IPSEC_IN if it is already present as 919 * AH/ESP will not affect any fields in the IPSEC_IN for 920 * ICMP errors. If there is no IPSEC_IN, allocate a new 921 * one and attach it in the front. 922 */ 923 if (ii != NULL) { 924 /* 925 * ip_fanout_proto_again converts the ICMP errors 926 * that come back from AH/ESP to M_DATA so that 927 * if it is non-AH/ESP and we do a pullupmsg in 928 * this function, it would work. Convert it back 929 * to M_CTL before we send up as this is a ICMP 930 * error. This could have been generated locally or 931 * by some router. Validate the inner IPSEC 932 * headers. 933 * 934 * NOTE : ill_index is used by ip_fanout_proto_again 935 * to locate the ill. 936 */ 937 ASSERT(ill != NULL); 938 ii->ipsec_in_ill_index = 939 ill->ill_phyint->phyint_ifindex; 940 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 941 first_mp->b_cont->b_datap->db_type = M_CTL; 942 } else { 943 /* 944 * IPSEC_IN is not present. We attach a ipsec_in 945 * message and send up to IPSEC for validating 946 * and removing the IPSEC headers. Clear 947 * ipsec_in_secure so that when we return 948 * from IPSEC, we don't mistakenly think that this 949 * is a secure packet came from the network. 950 * 951 * NOTE : ill_index is used by ip_fanout_proto_again 952 * to locate the ill. 953 */ 954 ASSERT(first_mp == mp); 955 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 956 ASSERT(ill != NULL); 957 if (first_mp == NULL) { 958 freemsg(mp); 959 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 960 return; 961 } 962 ii = (ipsec_in_t *)first_mp->b_rptr; 963 964 /* This is not a secure packet */ 965 ii->ipsec_in_secure = B_FALSE; 966 first_mp->b_cont = mp; 967 mp->b_datap->db_type = M_CTL; 968 ii->ipsec_in_ill_index = 969 ill->ill_phyint->phyint_ifindex; 970 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 971 } 972 973 if (!ipsec_loaded(ipss)) { 974 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 975 return; 976 } 977 978 if (nexthdr == IPPROTO_ESP) 979 ipsec_rc = ipsecesp_icmp_error(first_mp); 980 else 981 ipsec_rc = ipsecah_icmp_error(first_mp); 982 if (ipsec_rc == IPSEC_STATUS_FAILED) 983 return; 984 985 ip_fanout_proto_again(first_mp, ill, ill, NULL); 986 return; 987 } 988 case IPPROTO_ENCAP: 989 case IPPROTO_IPV6: 990 if ((uint8_t *)ip6h + hdr_length + 991 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 992 sizeof (ip6_t)) > mp->b_wptr) { 993 goto drop_pkt; 994 } 995 996 if (nexthdr == IPPROTO_ENCAP || 997 !IN6_ARE_ADDR_EQUAL( 998 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 999 &ip6h->ip6_src) || 1000 !IN6_ARE_ADDR_EQUAL( 1001 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1002 &ip6h->ip6_dst)) { 1003 /* 1004 * For tunnels that have used IPsec protection, 1005 * we need to adjust the MTU to take into account 1006 * the IPsec overhead. 1007 */ 1008 if (ii != NULL) 1009 icmp6->icmp6_mtu = htonl( 1010 ntohl(icmp6->icmp6_mtu) - 1011 ipsec_in_extra_length(first_mp)); 1012 } else { 1013 /* 1014 * Self-encapsulated case. As in the ipv4 case, 1015 * we need to strip the 2nd IP header. Since mp 1016 * is already pulled-up, we can simply bcopy 1017 * the 3rd header + data over the 2nd header. 1018 */ 1019 uint16_t unused_len; 1020 ip6_t *inner_ip6h = (ip6_t *) 1021 ((uchar_t *)ip6h + hdr_length); 1022 1023 /* 1024 * Make sure we don't do recursion more than once. 1025 */ 1026 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1027 &unused_len, &nexthdrp) || 1028 *nexthdrp == IPPROTO_IPV6) { 1029 goto drop_pkt; 1030 } 1031 1032 /* 1033 * We are about to modify the packet. Make a copy if 1034 * someone else has a reference to it. 1035 */ 1036 if (DB_REF(mp) > 1) { 1037 mblk_t *mp1; 1038 uint16_t icmp6_offset; 1039 1040 mp1 = copymsg(mp); 1041 if (mp1 == NULL) { 1042 goto drop_pkt; 1043 } 1044 icmp6_offset = (uint16_t) 1045 ((uchar_t *)icmp6 - mp->b_rptr); 1046 freemsg(mp); 1047 mp = mp1; 1048 1049 icmp6 = (icmp6_t *)(m