Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/param.h>
     29 #include <sys/types.h>
     30 #include <sys/systm.h>
     31 #include <sys/stream.h>
     32 #include <sys/strsubr.h>
     33 #include <sys/pattr.h>
     34 #include <sys/dlpi.h>
     35 #include <sys/atomic.h>
     36 #include <sys/sunddi.h>
     37 #include <sys/socket.h>
     38 #include <sys/neti.h>
     39 #include <sys/sdt.h>
     40 
     41 #include <netinet/in.h>
     42 #include <inet/common.h>
     43 #include <inet/mib2.h>
     44 #include <inet/ip.h>
     45 #include <inet/ip6.h>
     46 #include <inet/ip_if.h>
     47 #include <inet/ip_ire.h>
     48 #include <inet/ip_impl.h>
     49 #include <inet/ip_ndp.h>
     50 #include <inet/ipclassifier.h>
     51 #include <inet/ipp_common.h>
     52 #include <inet/ip_ftable.h>
     53 
     54 /*
     55  * IPv4 netinfo entry point declarations.
     56  */
     57 static int 		ip_getifname(phy_if_t, char *, const size_t,
     58     netstack_t *);
     59 static int 		ip_getmtu(phy_if_t, lif_if_t, netstack_t *);
     60 static int 		ip_getpmtuenabled(netstack_t *);
     61 static int 		ip_getlifaddr(phy_if_t, lif_if_t, size_t,
     62 			    net_ifaddr_t [], void *, netstack_t *);
     63 static phy_if_t		ip_phygetnext(phy_if_t, netstack_t *);
     64 static phy_if_t 	ip_phylookup(const char *, netstack_t *);
     65 static lif_if_t 	ip_lifgetnext(phy_if_t, lif_if_t, netstack_t *);
     66 static int 		ip_inject(inject_t, net_inject_t *, netstack_t *);
     67 static phy_if_t 	ip_routeto(struct sockaddr *, netstack_t *);
     68 static int 		ip_ispartialchecksum(mblk_t *);
     69 static int 		ip_isvalidchecksum(mblk_t *);
     70 
     71 static int 		ipv6_getifname(phy_if_t, char *, const size_t,
     72     netstack_t *);
     73 static int 		ipv6_getmtu(phy_if_t, lif_if_t, netstack_t *);
     74 static int 		ipv6_getlifaddr(phy_if_t, lif_if_t, size_t,
     75 			    net_ifaddr_t [], void *, netstack_t *);
     76 static phy_if_t 	ipv6_phygetnext(phy_if_t, netstack_t *);
     77 static phy_if_t 	ipv6_phylookup(const char *, netstack_t *);
     78 static lif_if_t 	ipv6_lifgetnext(phy_if_t, lif_if_t, netstack_t *);
     79 static int 		ipv6_inject(inject_t, net_inject_t *, netstack_t *);
     80 static phy_if_t 	ipv6_routeto(struct sockaddr *, netstack_t *);
     81 static int 		ipv6_isvalidchecksum(mblk_t *);
     82 
     83 /* Netinfo private functions */
     84 static	int		ip_getifname_impl(phy_if_t, char *,
     85     const size_t, boolean_t, ip_stack_t *);
     86 static	int		ip_getmtu_impl(phy_if_t, lif_if_t, boolean_t,
     87     ip_stack_t *);
     88 static	phy_if_t	ip_phylookup_impl(const char *, boolean_t,
     89     ip_stack_t *ipst);
     90 static	lif_if_t	ip_lifgetnext_impl(phy_if_t, lif_if_t, boolean_t,
     91     ip_stack_t *ipst);
     92 static	int		ip_inject_impl(inject_t, net_inject_t *, boolean_t,
     93     ip_stack_t *);
     94 static	int		ip_getifaddr_type(sa_family_t, ipif_t *, lif_if_t,
     95 			    void *);
     96 static	phy_if_t	ip_routeto_impl(struct sockaddr *, ip_stack_t *);
     97 static	int		ip_getlifaddr_impl(sa_family_t, phy_if_t, lif_if_t,
     98 			    size_t, net_ifaddr_t [], struct sockaddr *,
     99 			    ip_stack_t *);
    100 static	void		ip_ni_queue_in_func(void *);
    101 static	void		ip_ni_queue_out_func(void *);
    102 static	void		ip_ni_queue_func_impl(injection_t *,  boolean_t);
    103 
    104 
    105 static net_info_t ipv4info = {
    106 	NETINFO_VERSION,
    107 	NHF_INET,
    108 	ip_getifname,
    109 	ip_getmtu,
    110 	ip_getpmtuenabled,
    111 	ip_getlifaddr,
    112 	ip_phygetnext,
    113 	ip_phylookup,
    114 	ip_lifgetnext,
    115 	ip_inject,
    116 	ip_routeto,
    117 	ip_ispartialchecksum,
    118 	ip_isvalidchecksum
    119 };
    120 
    121 
    122 static net_info_t ipv6info = {
    123 	NETINFO_VERSION,
    124 	NHF_INET6,
    125 	ipv6_getifname,
    126 	ipv6_getmtu,
    127 	ip_getpmtuenabled,
    128 	ipv6_getlifaddr,
    129 	ipv6_phygetnext,
    130 	ipv6_phylookup,
    131 	ipv6_lifgetnext,
    132 	ipv6_inject,
    133 	ipv6_routeto,
    134 	ip_ispartialchecksum,
    135 	ipv6_isvalidchecksum
    136 };
    137 
    138 /*
    139  * The taskq eventq_queue_in is used to process the upside inject messages.
    140  * The taskq eventq_queue_out is used to process the downside inject messages.
    141  * The taskq eventq_queue_nic is used to process the nic event messages.
    142  */
    143 static ddi_taskq_t 	*eventq_queue_in = NULL;
    144 static ddi_taskq_t 	*eventq_queue_out = NULL;
    145 ddi_taskq_t 	*eventq_queue_nic = NULL;
    146 
    147 /*
    148  * Initialize queues for inject.
    149  */
    150 void
    151 ip_net_g_init()
    152 {
    153 	if (eventq_queue_out == NULL) {
    154 		eventq_queue_out = ddi_taskq_create(NULL,
    155 		    "IP_INJECT_QUEUE_OUT", 1, TASKQ_DEFAULTPRI, 0);
    156 
    157 		if (eventq_queue_out == NULL)
    158 			cmn_err(CE_NOTE, "ipv4_net_init: "
    159 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_OUT");
    160 	}
    161 
    162 	if (eventq_queue_in == NULL) {
    163 		eventq_queue_in = ddi_taskq_create(NULL,
    164 		    "IP_INJECT_QUEUE_IN", 1, TASKQ_DEFAULTPRI, 0);
    165 
    166 		if (eventq_queue_in == NULL)
    167 			cmn_err(CE_NOTE, "ipv4_net_init: "
    168 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_IN");
    169 	}
    170 
    171 	if (eventq_queue_nic == NULL) {
    172 		eventq_queue_nic = ddi_taskq_create(NULL,
    173 		    "IP_NIC_EVENT_QUEUE", 1, TASKQ_DEFAULTPRI, 0);
    174 
    175 		if (eventq_queue_nic == NULL)
    176 			cmn_err(CE_NOTE, "ipv4_net_init: "
    177 			    "ddi_taskq_create failed for IP_NIC_EVENT_QUEUE");
    178 	}
    179 }
    180 
    181 /*
    182  * Destroy inject queues
    183  */
    184 void
    185 ip_net_g_destroy()
    186 {
    187 	if (eventq_queue_nic != NULL) {
    188 		ddi_taskq_destroy(eventq_queue_nic);
    189 		eventq_queue_nic = NULL;
    190 	}
    191 
    192 	if (eventq_queue_in != NULL) {
    193 		ddi_taskq_destroy(eventq_queue_in);
    194 		eventq_queue_in = NULL;
    195 	}
    196 
    197 	if (eventq_queue_out != NULL) {
    198 		ddi_taskq_destroy(eventq_queue_out);
    199 		eventq_queue_out = NULL;
    200 	}
    201 }
    202 
    203 /*
    204  * Register IPv4 and IPv6 netinfo functions and initialize queues for inject.
    205  */
    206 void
    207 ip_net_init(ip_stack_t *ipst, netstack_t *ns)
    208 {
    209 
    210 	ipst->ips_ipv4_net_data = net_register_impl(&ipv4info, ns);
    211 	ASSERT(ipst->ips_ipv4_net_data != NULL);
    212 
    213 	ipst->ips_ipv6_net_data = net_register_impl(&ipv6info, ns);
    214 	ASSERT(ipst->ips_ipv6_net_data != NULL);
    215 }
    216 
    217 
    218 /*
    219  * Unregister IPv4 and IPv6 functions and inject queues
    220  */
    221 void
    222 ip_net_destroy(ip_stack_t *ipst)
    223 {
    224 	if (ipst->ips_ipv4_net_data != NULL) {
    225 		if (net_unregister(ipst->ips_ipv4_net_data) == 0)
    226 			ipst->ips_ipv4_net_data = NULL;
    227 	}
    228 
    229 	if (ipst->ips_ipv6_net_data != NULL) {
    230 		if (net_unregister(ipst->ips_ipv6_net_data) == 0)
    231 			ipst->ips_ipv6_net_data = NULL;
    232 	}
    233 }
    234 
    235 /*
    236  * Initialize IPv4 hooks family the event
    237  */
    238 void
    239 ipv4_hook_init(ip_stack_t *ipst)
    240 {
    241 	HOOK_FAMILY_INIT(&ipst->ips_ipv4root, Hn_IPV4);
    242 	if (net_register_family(ipst->ips_ipv4_net_data, &ipst->ips_ipv4root)
    243 	    != 0) {
    244 		cmn_err(CE_NOTE, "ipv4_hook_init: "
    245 		    "net_register_family failed for ipv4");
    246 	}
    247 
    248 	HOOK_EVENT_INIT(&ipst->ips_ip4_physical_in_event, NH_PHYSICAL_IN);
    249 	ipst->ips_ipv4firewall_physical_in = net_register_event(
    250 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_physical_in_event);
    251 	if (ipst->ips_ipv4firewall_physical_in == NULL) {
    252 		cmn_err(CE_NOTE, "ipv4_hook_init: "
    253 		    "net_register_event failed for ipv4/physical_in");
    254 	}
    255 
    256 	HOOK_EVENT_INIT(&ipst->ips_ip4_physical_out_event, NH_PHYSICAL_OUT);
    257 	ipst->ips_ipv4firewall_physical_out = net_register_event(
    258 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_physical_out_event);
    259 	if (ipst->ips_ipv4firewall_physical_out == NULL) {
    260 		cmn_err(CE_NOTE, "ipv4_hook_init: "
    261 		    "net_register_event failed for ipv4/physical_out");
    262 	}
    263 
    264 	HOOK_EVENT_INIT(&ipst->ips_ip4_forwarding_event, NH_FORWARDING);
    265 	ipst->ips_ipv4firewall_forwarding = net_register_event(
    266 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_forwarding_event);
    267 	if (ipst->ips_ipv4firewall_forwarding == NULL) {
    268 		cmn_err(CE_NOTE, "ipv4_hook_init: "
    269 		    "net_register_event failed for ipv4/forwarding");
    270 	}
    271 
    272 	HOOK_EVENT_INIT(&ipst->ips_ip4_loopback_in_event, NH_LOOPBACK_IN);
    273 	ipst->ips_ipv4firewall_loopback_in = net_register_event(
    274 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_loopback_in_event);
    275 	if (ipst->ips_ipv4firewall_loopback_in == NULL) {
    276 		cmn_err(CE_NOTE, "ipv4_hook_init: "
    277 		    "net_register_event failed for ipv4/loopback_in");
    278 	}
    279 
    280 	HOOK_EVENT_INIT(&ipst->ips_ip4_loopback_out_event, NH_LOOPBACK_OUT);
    281 	ipst->ips_ipv4firewall_loopback_out = net_register_event(
    282 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_loopback_out_event);
    283 	if (ipst->ips_ipv4firewall_loopback_out == NULL) {
    284 		cmn_err(CE_NOTE, "ipv4_hook_init: "
    285 		    "net_register_event failed for ipv4/loopback_out");
    286 	}
    287 
    288 	HOOK_EVENT_INIT(&ipst->ips_ip4_nic_events, NH_NIC_EVENTS);
    289 	ipst->ips_ip4_nic_events.he_flags = HOOK_RDONLY;
    290 	ipst->ips_ipv4nicevents = net_register_event(
    291 	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_nic_events);
    292 	if (ipst->ips_ipv4nicevents == NULL) {
    293 		cmn_err(CE_NOTE, "ipv4_hook_init: "
    294 		    "net_register_event failed for ipv4/nic_events");
    295 	}
    296 }
    297 
    298 void
    299 ipv4_hook_destroy(ip_stack_t *ipst)
    300 {
    301 	if (ipst->ips_ipv4firewall_forwarding != NULL) {
    302 		if (net_unregister_event(ipst->ips_ipv4_net_data,
    303 		    &ipst->ips_ip4_forwarding_event) == 0)
    304 			ipst->ips_ipv4firewall_forwarding = NULL;
    305 	}
    306 
    307 	if (ipst->ips_ipv4firewall_physical_in != NULL) {
    308 		if (net_unregister_event(ipst->ips_ipv4_net_data,
    309 		    &ipst->ips_ip4_physical_in_event) == 0)
    310 			ipst->ips_ipv4firewall_physical_in = NULL;
    311 	}
    312 
    313 	if (ipst->ips_ipv4firewall_physical_out != NULL) {
    314 		if (net_unregister_event(ipst->ips_ipv4_net_data,
    315 		    &ipst->ips_ip4_physical_out_event) == 0)
    316 			ipst->ips_ipv4firewall_physical_out = NULL;
    317 	}
    318 
    319 	if (ipst->ips_ipv4firewall_loopback_in != NULL) {
    320 		if (net_unregister_event(ipst->ips_ipv4_net_data,
    321 		    &ipst->ips_ip4_loopback_in_event) == 0)
    322 			ipst->ips_ipv4firewall_loopback_in = NULL;
    323 	}
    324 
    325 	if (ipst->ips_ipv4firewall_loopback_out != NULL) {
    326 		if (net_unregister_event(ipst->ips_ipv4_net_data,
    327 		    &ipst->ips_ip4_loopback_out_event) == 0)
    328 			ipst->ips_ipv4firewall_loopback_out = NULL;
    329 	}
    330 
    331 	if (ipst->ips_ipv4nicevents != NULL) {
    332 		if (net_unregister_event(ipst->ips_ipv4_net_data,
    333 		    &ipst->ips_ip4_nic_events) == 0)
    334 			ipst->ips_ipv4nicevents = NULL;
    335 	}
    336 
    337 	(void) net_unregister_family(ipst->ips_ipv4_net_data,
    338 	    &ipst->ips_ipv4root);
    339 }
    340 
    341 /*
    342  * Initialize IPv6 hooks family and event
    343  */
    344 void
    345 ipv6_hook_init(ip_stack_t *ipst)
    346 {
    347 
    348 	HOOK_FAMILY_INIT(&ipst->ips_ipv6root, Hn_IPV6);
    349 	if (net_register_family(ipst->ips_ipv6_net_data, &ipst->ips_ipv6root)
    350 	    != 0) {
    351 		cmn_err(CE_NOTE, "ipv6_hook_init: "
    352 		    "net_register_family failed for ipv6");
    353 	}
    354 
    355 	HOOK_EVENT_INIT(&ipst->ips_ip6_physical_in_event, NH_PHYSICAL_IN);
    356 	ipst->ips_ipv6firewall_physical_in = net_register_event(
    357 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_physical_in_event);
    358 	if (ipst->ips_ipv6firewall_physical_in == NULL) {
    359 		cmn_err(CE_NOTE, "ipv6_hook_init: "
    360 		    "net_register_event failed for ipv6/physical_in");
    361 	}
    362 
    363 	HOOK_EVENT_INIT(&ipst->ips_ip6_physical_out_event, NH_PHYSICAL_OUT);
    364 	ipst->ips_ipv6firewall_physical_out = net_register_event(
    365 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_physical_out_event);
    366 	if (ipst->ips_ipv6firewall_physical_out == NULL) {
    367 		cmn_err(CE_NOTE, "ipv6_hook_init: "
    368 		    "net_register_event failed for ipv6/physical_out");
    369 	}
    370 
    371 	HOOK_EVENT_INIT(&ipst->ips_ip6_forwarding_event, NH_FORWARDING);
    372 	ipst->ips_ipv6firewall_forwarding = net_register_event(
    373 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_forwarding_event);
    374 	if (ipst->ips_ipv6firewall_forwarding == NULL) {
    375 		cmn_err(CE_NOTE, "ipv6_hook_init: "
    376 		    "net_register_event failed for ipv6/forwarding");
    377 	}
    378 
    379 	HOOK_EVENT_INIT(&ipst->ips_ip6_loopback_in_event, NH_LOOPBACK_IN);
    380 	ipst->ips_ipv6firewall_loopback_in = net_register_event(
    381 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_loopback_in_event);
    382 	if (ipst->ips_ipv6firewall_loopback_in == NULL) {
    383 		cmn_err(CE_NOTE, "ipv6_hook_init: "
    384 		    "net_register_event failed for ipv6/loopback_in");
    385 	}
    386 
    387 	HOOK_EVENT_INIT(&ipst->ips_ip6_loopback_out_event, NH_LOOPBACK_OUT);
    388 	ipst->ips_ipv6firewall_loopback_out = net_register_event(
    389 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_loopback_out_event);
    390 	if (ipst->ips_ipv6firewall_loopback_out == NULL) {
    391 		cmn_err(CE_NOTE, "ipv6_hook_init: "
    392 		    "net_register_event failed for ipv6/loopback_out");
    393 	}
    394 
    395 	HOOK_EVENT_INIT(&ipst->ips_ip6_nic_events, NH_NIC_EVENTS);
    396 	ipst->ips_ip6_nic_events.he_flags = HOOK_RDONLY;
    397 	ipst->ips_ipv6nicevents = net_register_event(
    398 	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_nic_events);
    399 	if (ipst->ips_ipv6nicevents == NULL) {
    400 		cmn_err(CE_NOTE, "ipv6_hook_init: "
    401 		    "net_register_event failed for ipv6/nic_events");
    402 	}
    403 }
    404 
    405 void
    406 ipv6_hook_destroy(ip_stack_t *ipst)
    407 {
    408 	if (ipst->ips_ipv6firewall_forwarding != NULL) {
    409 		if (net_unregister_event(ipst->ips_ipv6_net_data,
    410 		    &ipst->ips_ip6_forwarding_event) == 0)
    411 			ipst->ips_ipv6firewall_forwarding = NULL;
    412 	}
    413 
    414 	if (ipst->ips_ipv6firewall_physical_in != NULL) {
    415 		if (net_unregister_event(ipst->ips_ipv6_net_data,
    416 		    &ipst->ips_ip6_physical_in_event) == 0)
    417 			ipst->ips_ipv6firewall_physical_in = NULL;
    418 	}
    419 
    420 	if (ipst->ips_ipv6firewall_physical_out != NULL) {
    421 		if (net_unregister_event(ipst->ips_ipv6_net_data,
    422 		    &ipst->ips_ip6_physical_out_event) == 0)
    423 			ipst->ips_ipv6firewall_physical_out = NULL;
    424 	}
    425 
    426 	if (ipst->ips_ipv6firewall_loopback_in != NULL) {
    427 		if (net_unregister_event(ipst->ips_ipv6_net_data,
    428 		    &ipst->ips_ip6_loopback_in_event) == 0)
    429 			ipst->ips_ipv6firewall_loopback_in = NULL;
    430 	}
    431 
    432 	if (ipst->ips_ipv6firewall_loopback_out != NULL) {
    433 		if (net_unregister_event(ipst->ips_ipv6_net_data,
    434 		    &ipst->ips_ip6_loopback_out_event) == 0)
    435 			ipst->ips_ipv6firewall_loopback_out = NULL;
    436 	}
    437 
    438 	if (ipst->ips_ipv6nicevents != NULL) {
    439 		if (net_unregister_event(ipst->ips_ipv6_net_data,
    440 		    &ipst->ips_ip6_nic_events) == 0)
    441 			ipst->ips_ipv6nicevents = NULL;
    442 	}
    443 
    444 	(void) net_unregister_family(ipst->ips_ipv6_net_data,
    445 	    &ipst->ips_ipv6root);
    446 }
    447 
    448 /*
    449  * Determine the name of an IPv4 interface
    450  */
    451 static int
    452 ip_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen,
    453     netstack_t *ns)
    454 {
    455 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_FALSE,
    456 	    ns->netstack_ip));
    457 }
    458 
    459 /*
    460  * Determine the name of an IPv6 interface
    461  */
    462 static int
    463 ipv6_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen,
    464     netstack_t *ns)
    465 {
    466 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_TRUE,
    467 	    ns->netstack_ip));
    468 }
    469 
    470 /*
    471  * Shared implementation to determine the name of a given network interface
    472  */
    473 /* ARGSUSED */
    474 static int
    475 ip_getifname_impl(phy_if_t phy_ifdata,
    476     char *buffer, const size_t buflen, boolean_t isv6, ip_stack_t *ipst)
    477 {
    478 	ill_t *ill;
    479 	char *name;
    480 
    481 	ASSERT(buffer != NULL);
    482 
    483 	ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL,
    484 	    NULL, NULL, ipst);
    485 	if (ill != NULL) {
    486 		name = ill->ill_name;
    487 	} else {
    488 		/* Fallback to group names only if hook_emulation is set */
    489 		if (ipst->ips_ipmp_hook_emulation) {
    490 			ill = ill_group_lookup_on_ifindex((uint_t)phy_ifdata,
    491 			    isv6, ipst);
    492 		}
    493 		if (ill == NULL)
    494 			return (1);
    495 		name = ill->ill_phyint->phyint_groupname;
    496 	}
    497 	if (name != NULL) {
    498 		(void) strlcpy(buffer, name, buflen);
    499 		ill_refrele(ill);
    500 		return (0);
    501 	} else {
    502 		ill_refrele(ill);
    503 		return (1);
    504 	}
    505 
    506 }
    507 
    508 /*
    509  * Determine the MTU of an IPv4 network interface
    510  */
    511 static int
    512 ip_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns)
    513 {
    514 	ASSERT(ns != NULL);
    515 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_FALSE, ns->netstack_ip));
    516 }
    517 
    518 /*
    519  * Determine the MTU of an IPv6 network interface
    520  */
    521 static int
    522 ipv6_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns)
    523 {
    524 	ASSERT(ns != NULL);
    525 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_TRUE, ns->netstack_ip));
    526 }
    527 
    528 /*
    529  * Shared implementation to determine the MTU of a network interface
    530  *
    531  * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set.
    532  * But IP Filter only uses a zero ifdata.
    533  */
    534 /* ARGSUSED */
    535 static int
    536 ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6,
    537     ip_stack_t *ipst)
    538 {
    539 	lif_if_t ipifid;
    540 	ipif_t *ipif;
    541 	int mtu;
    542 
    543 	ipifid = UNMAP_IPIF_ID(ifdata);
    544 
    545 	ipif = ipif_getby_indexes((uint_t)phy_ifdata, (uint_t)ipifid,
    546 	    isv6, ipst);
    547 	if (ipif == NULL)
    548 		return (0);
    549 
    550 	mtu = ipif->ipif_mtu;
    551 	ipif_refrele(ipif);
    552 
    553 	if (mtu == 0) {
    554 		ill_t *ill;
    555 
    556 		if ((ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6,
    557 		    NULL, NULL, NULL, NULL, ipst)) == NULL) {
    558 			/*
    559 			 * Fallback to group names only if hook_emulation
    560 			 * is set
    561 			 */
    562 			if (ipst->ips_ipmp_hook_emulation) {
    563 				ill = ill_group_lookup_on_ifindex(
    564 				    (uint_t)phy_ifdata, isv6, ipst);
    565 			}
    566 			if (ill == NULL)
    567 				return (0);
    568 		}
    569 		mtu = ill->ill_max_frag;
    570 		ill_refrele(ill);
    571 	}
    572 
    573 	return (mtu);
    574 }
    575 
    576 /*
    577  * Determine if path MTU discovery is enabled for IP
    578  */
    579 static int
    580 ip_getpmtuenabled(netstack_t *ns)
    581 {
    582 	ASSERT(ns != NULL);
    583 	return ((ns->netstack_ip)->ips_ip_path_mtu_discovery);
    584 }
    585 
    586 /*
    587  * Get next interface from the current list of IPv4 physical network interfaces
    588  *
    589  * Note: this does not handle the case when ipmp_hook_emulation is set.
    590  * But IP Filter does not use this function.
    591  */
    592 static phy_if_t
    593 ip_phygetnext(phy_if_t phy_ifdata, netstack_t *ns)
    594 {
    595 	ASSERT(ns != NULL);
    596 	return (ill_get_next_ifindex(phy_ifdata, B_FALSE, ns->netstack_ip));
    597 }
    598 
    599 /*
    600  * Get next interface from the current list of IPv6 physical network interfaces
    601  */
    602 static phy_if_t
    603 ipv6_phygetnext(phy_if_t phy_ifdata, netstack_t *ns)
    604 {
    605 	ASSERT(ns != NULL);
    606 	return (ill_get_next_ifindex(phy_ifdata, B_TRUE, ns->netstack_ip));
    607 }
    608 
    609 /*
    610  * Determine if a network interface name exists for IPv4
    611  */
    612 static phy_if_t
    613 ip_phylookup(const char *name, netstack_t *ns)
    614 {
    615 	ASSERT(ns != NULL);
    616 	return (ip_phylookup_impl(name, B_FALSE, ns->netstack_ip));
    617 }
    618 
    619 /*
    620  * Determine if a network interface name exists for IPv6
    621  */
    622 static phy_if_t
    623 ipv6_phylookup(const char *name, netstack_t *ns)
    624 {
    625 	ASSERT(ns != NULL);
    626 	return (ip_phylookup_impl(name, B_TRUE, ns->netstack_ip));
    627 }
    628 
    629 /*
    630  * Implement looking up an ill_t based on the name supplied and matching
    631  * it up with either IPv4 or IPv6.  ill_get_ifindex_by_name() is not used
    632  * because it does not match on the address family in addition to the name.
    633  */
    634 static phy_if_t
    635 ip_phylookup_impl(const char *name, boolean_t isv6, ip_stack_t *ipst)
    636 {
    637 	phy_if_t phy;
    638 	ill_t *ill;
    639 
    640 	ill = ill_lookup_on_name((char *)name, B_FALSE, isv6, NULL, NULL,
    641 	    NULL, NULL, NULL, ipst);
    642 
    643 	/* Fallback to group names only if hook_emulation is set */
    644 	if (ill == NULL && ipst->ips_ipmp_hook_emulation) {
    645 		ill = ill_group_lookup_on_name((char *)name, isv6, ipst);
    646 	}
    647 	if (ill == NULL)
    648 		return (0);
    649 
    650 	phy = ill->ill_phyint->phyint_hook_ifindex;
    651 
    652 	ill_refrele(ill);
    653 
    654 	return (phy);
    655 }
    656 
    657 /*
    658  * Get next interface from the current list of IPv4 logical network interfaces
    659  */
    660 static lif_if_t
    661 ip_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns)
    662 {
    663 	ASSERT(ns != NULL);
    664 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_FALSE,
    665 	    ns->netstack_ip));
    666 }
    667 
    668 /*
    669  * Get next interface from the current list of IPv6 logical network interfaces
    670  */
    671 static lif_if_t
    672 ipv6_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata, netstack_t *ns)
    673 {
    674 	ASSERT(ns != NULL);
    675 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_TRUE,
    676 	    ns->netstack_ip));
    677 }
    678 
    679 /*
    680  * Shared implementation to get next interface from the current list of
    681  * logical network interfaces
    682  *
    683  * Note: this does not handle the case when ipmp_hook_emulation is set.
    684  * But IP Filter does not use this function.
    685  */
    686 static lif_if_t
    687 ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6,
    688     ip_stack_t *ipst)
    689 {
    690 	lif_if_t newidx, oldidx;
    691 	boolean_t nextok;
    692 	ipif_t *ipif;
    693 	ill_t *ill;
    694 
    695 	ill = ill_lookup_on_ifindex(phy_ifdata, isv6, NULL, NULL,
    696 	    NULL, NULL, ipst);
    697 	if (ill == NULL)
    698 		return (0);
    699 
    700 	if (ifdata != 0) {
    701 		oldidx = UNMAP_IPIF_ID(ifdata);
    702 		nextok = B_FALSE;
    703 	} else {
    704 		oldidx = 0;
    705 		nextok = B_TRUE;
    706 	}
    707 
    708 	mutex_enter(&ill->ill_lock);
    709 	if (ill->ill_state_flags & ILL_CONDEMNED) {
    710 		mutex_exit(&ill->ill_lock);
    711 		ill_refrele(ill);
    712 		return (0);
    713 	}
    714 
    715 	/*
    716 	 * It's safe to iterate the ill_ipif list when holding an ill_lock.
    717 	 * And it's also safe to access ipif_id without ipif refhold.
    718 	 * See ipif_get_id().
    719 	 */
    720 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
    721 		if (!IPIF_CAN_LOOKUP(ipif))
    722 			continue;
    723 		if (nextok) {
    724 			ipif_refhold_locked(ipif);
    725 			break;
    726 		} else if (oldidx == ipif->ipif_id) {
    727 			nextok = B_TRUE;
    728 		}
    729 	}
    730 
    731 	mutex_exit(&ill->ill_lock);
    732 	ill_refrele(ill);
    733 
    734 	if (ipif == NULL)
    735 		return (0);
    736 
    737 	newidx = ipif->ipif_id;
    738 	ipif_refrele(ipif);
    739 
    740 	return (MAP_IPIF_ID(newidx));
    741 }
    742 
    743 /*
    744  * Inject an IPv4 packet to or from an interface
    745  */
    746 static int
    747 ip_inject(inject_t style, net_inject_t *packet, netstack_t *ns)
    748 {
    749 	ASSERT(ns != NULL);
    750 	return (ip_inject_impl(style, packet, B_FALSE, ns->netstack_ip));
    751 }
    752 
    753 
    754 /*
    755  * Inject an IPv6 packet to or from an interface
    756  */
    757 static int
    758 ipv6_inject(inject_t style, net_inject_t *packet, netstack_t *ns)
    759 {
    760 	ASSERT(ns != NULL);
    761 	return (ip_inject_impl(style, packet, B_TRUE, ns->netstack_ip));
    762 }
    763 
    764 /*
    765  * Shared implementation to inject a packet to or from an interface
    766  * Return value:
    767  *   0: successful
    768  *  -1: memory allocation failed
    769  *   1: other errors
    770  */
    771 static int
    772 ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6,
    773     ip_stack_t *ipst)
    774 {
    775 	struct sockaddr_in6 *sin6;
    776 	ddi_taskq_t *tq = NULL;
    777 	void (* func)(void *);
    778 	injection_t *inject;
    779 	ip6_t *ip6h;
    780 	ire_t *ire;
    781 	mblk_t *mp;
    782 
    783 	ASSERT(packet != NULL);
    784 	ASSERT(packet->ni_packet != NULL);
    785 	ASSERT(packet->ni_packet->b_datap->db_type == M_DATA);
    786 
    787 	switch (style) {
    788 	case NI_QUEUE_IN:
    789 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
    790 		if (inject == NULL)
    791 			return (-1);
    792 		inject->inj_data = *packet;
    793 		inject->inj_isv6 = isv6;
    794 		/*
    795 		 * deliver up into the kernel, immitating its reception by a
    796 		 * network interface, add to list and schedule timeout
    797 		 */
    798 		func = ip_ni_queue_in_func;
    799 		tq = eventq_queue_in;
    800 		break;
    801 
    802 	case NI_QUEUE_OUT:
    803 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
    804 		if (inject == NULL)
    805 			return (-1);
    806 		inject->inj_data = *packet;
    807 		inject->inj_isv6 = isv6;
    808 		/*
    809 		 * deliver out of the kernel, as if it were being sent via a
    810 		 * raw socket so that IPFilter will see it again, add to list
    811 		 * and schedule timeout
    812 		 */
    813 		func = ip_ni_queue_out_func;
    814 		tq = eventq_queue_out;
    815 		break;
    816 
    817 	case NI_DIRECT_OUT:
    818 		/*
    819 		 * Note:
    820 		 * For IPv4, the code path below will be greatly simplified
    821 		 * with the delivery of surya - it will become a single
    822 		 * function call to X.  A follow on project is aimed to
    823 		 * provide similar functionality for IPv6.
    824 		 */
    825 		mp = packet->ni_packet;
    826 
    827 		if (!isv6) {
    828 			struct sockaddr *sock;
    829 
    830 			sock = (struct sockaddr *)&packet->ni_addr;
    831 			/*
    832 			 * ipfil_sendpkt was provided by surya to ease the
    833 			 * problems associated with sending out a packet.
    834 			 * Currently this function only supports IPv4.
    835 			 */
    836 			switch (ipfil_sendpkt(sock, mp, packet->ni_physical,
    837 			    netstackid_to_zoneid(
    838 			    ipst->ips_netstack->netstack_stackid))) {
    839 			case 0 :
    840 			case EINPROGRESS:
    841 				return (0);
    842 			case ECOMM :
    843 			case ENONET :
    844 				return (1);
    845 			default :
    846 				return (1);
    847 			}
    848 			/* NOTREACHED */
    849 
    850 		}
    851 
    852 		ip6h = (ip6_t *)mp->b_rptr;
    853 		sin6 = (struct sockaddr_in6 *)&packet->ni_addr;
    854 		ASSERT(sin6->sin6_family == AF_INET6);
    855 
    856 		ire = ire_route_lookup_v6(&sin6->sin6_addr, 0, 0, 0,
    857 		    NULL, NULL, ALL_ZONES, NULL,
    858 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
    859 		    ipst);
    860 
    861 		if (ire == NULL) {
    862 			ip2dbg(("ip_inject: ire_cache_lookup failed\n"));
    863 			freemsg(mp);
    864 			return (1);
    865 		}
    866 
    867 		if (ire->ire_stq == NULL) {
    868 			/* Send to loopback destination. */
    869 			if (ire->ire_rfq == NULL) {
    870 				ip2dbg(("ip_inject: bad nexthop\n"));
    871 				ire_refrele(ire);
    872 				freemsg(mp);
    873 				return (1);
    874 			}
    875 			DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
    876 			    void_ip_t *, ip6h, __dtrace_ipsr_ill_t *,
    877 			    ire->ire_ipif->ipif_ill, ipha_t *, NULL, ip6_t *,
    878 			    ip6h, int, 1);
    879 			ip_wput_local_v6(ire->ire_rfq,
    880 			    ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0);
    881 			ire_refrele(ire);
    882 			return (0);
    883 		}
    884 
    885 		mp->b_queue = ire->ire_stq;
    886 
    887 		if (ire->ire_nce == NULL ||
    888 		    ire->ire_nce->nce_fp_mp == NULL &&
    889 		    ire->ire_nce->nce_res_mp == NULL) {
    890 			ip_newroute_v6(ire->ire_stq, mp,
    891 			    &sin6->sin6_addr, NULL, NULL, ALL_ZONES, ipst);
    892 
    893 			ire_refrele(ire);
    894 			return (0);
    895 		} else {
    896 			/* prepend L2 header for IPv6 packets. */
    897 			mblk_t *llmp;
    898 
    899 			/*
    900 			 * Lock IREs, see 6420438
    901 			 */
    902 			mutex_enter(&ire->ire_lock);
    903 			llmp = ire->ire_nce->nce_fp_mp ?
    904 			    ire->ire_nce->nce_fp_mp :
    905 			    ire->ire_nce->nce_res_mp;
    906 
    907 			if ((mp = dupb(llmp)) == NULL &&
    908 			    (mp = copyb(llmp)) == NULL) {
    909 				ip2dbg(("ip_inject: llhdr failed\n"));
    910 				mutex_exit(&ire->ire_lock);
    911 				ire_refrele(ire);
    912 				freemsg(mp);
    913 				return (1);
    914 			}
    915 			mutex_exit(&ire->ire_lock);
    916 			linkb(mp, packet->ni_packet);
    917 		}
    918 
    919 		mp->b_queue = ire->ire_stq;
    920 
    921 		break;
    922 	default:
    923 		freemsg(packet->ni_packet);
    924 		return (1);
    925 	}
    926 
    927 	if (tq) {
    928 		inject->inj_ptr = ipst;
    929 		if (ddi_taskq_dispatch(tq, func, (void *)inject,
    930 		    DDI_SLEEP) == DDI_FAILURE) {
    931 			ip2dbg(("ip_inject:  ddi_taskq_dispatch failed\n"));
    932 			freemsg(packet->ni_packet);
    933 			return (1);
    934 		}
    935 	} else {
    936 		putnext(ire->ire_stq, mp);
    937 		ire_refrele(ire);
    938 	}
    939 
    940 	return (0);
    941 }
    942 
    943 /*
    944  * Find the interface used for traffic to a given IPv4 address
    945  */
    946 static phy_if_t
    947 ip_routeto(struct sockaddr *address, netstack_t *ns)
    948 {
    949 	ASSERT(address != NULL);
    950 	ASSERT(ns != NULL);
    951 
    952 	if (address->sa_family != AF_INET)
    953 		return (0);
    954 	return (ip_routeto_impl(address, ns->netstack_ip));
    955 }
    956 
    957 /*
    958  * Find the interface used for traffic to a given IPv6 address
    959  */
    960 static phy_if_t
    961 ipv6_routeto(struct sockaddr *address, netstack_t *ns)
    962 {
    963 	ASSERT(address != NULL);
    964 	ASSERT(ns != NULL);
    965 
    966 	if (address->sa_family != AF_INET6)
    967 		return (0);
    968 	return (ip_routeto_impl(address, ns->netstack_ip));
    969 }
    970 
    971 
    972 /*
    973  * Find the interface used for traffic to an address
    974  */
    975 static phy_if_t
    976 ip_routeto_impl(struct sockaddr *address, ip_stack_t *ipst)
    977 {
    978 	ire_t *ire;
    979 	ill_t *ill;
    980 	phy_if_t phy_if;
    981 
    982 	if (address->sa_family == AF_INET6) {
    983 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)address;
    984 		ire = ire_route_lookup_v6(&sin6->sin6_addr, NULL,
    985 		    0, 0, NULL, NULL, ALL_ZONES, NULL,
    986 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
    987 		    ipst);
    988 	} else {
    989 		struct sockaddr_in *sin = (struct sockaddr_in *)address;
    990 		ire = ire_route_lookup(sin->sin_addr.s_addr, 0,
    991 		    0, 0, NULL, NULL, ALL_ZONES, NULL,
    992 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE,
    993 		    ipst);
    994 	}
    995 
    996 	if (ire == NULL)
    997 		return (0);
    998 
    999 	ill = ire_to_ill(ire);
   1000 	if (ill == NULL) {
   1001 		ire_refrele(ire);
   1002 		return (0);
   1003 	}
   1004 
   1005 	ASSERT(ill != NULL);
   1006 	phy_if = (phy_if_t)ill->ill_phyint->phyint_hook_ifindex;
   1007 	ire_refrele(ire);
   1008 
   1009 	return (phy_if);
   1010 }
   1011 
   1012 /*
   1013  * Determine if checksumming is being used for the given packet.
   1014  *
   1015  * Return value: