1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This file contains the interface control functions for IP. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/stream.h> 35 #include <sys/dlpi.h> 36 #include <sys/stropts.h> 37 #include <sys/strsun.h> 38 #include <sys/sysmacros.h> 39 #include <sys/strlog.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/cmn_err.h> 43 #include <sys/kstat.h> 44 #include <sys/debug.h> 45 #include <sys/zone.h> 46 #include <sys/sunldi.h> 47 #include <sys/file.h> 48 #include <sys/bitmap.h> 49 50 #include <sys/kmem.h> 51 #include <sys/systm.h> 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <sys/isa_defs.h> 55 #include <net/if.h> 56 #include <net/if_arp.h> 57 #include <net/if_types.h> 58 #include <net/if_dl.h> 59 #include <net/route.h> 60 #include <sys/sockio.h> 61 #include <netinet/in.h> 62 #include <netinet/ip6.h> 63 #include <netinet/icmp6.h> 64 #include <netinet/igmp_var.h> 65 #include <sys/strsun.h> 66 #include <sys/policy.h> 67 #include <sys/ethernet.h> 68 69 #include <inet/common.h> /* for various inet/mi.h and inet/nd.h needs */ 70 #include <inet/mi.h> 71 #include <inet/nd.h> 72 #include <inet/arp.h> 73 #include <inet/mib2.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/ip6_asp.h> 77 #include <inet/tcp.h> 78 #include <inet/ip_multi.h> 79 #include <inet/ip_ire.h> 80 #include <inet/ip_ftable.h> 81 #include <inet/ip_rts.h> 82 #include <inet/ip_ndp.h> 83 #include <inet/ip_if.h> 84 #include <inet/ip_impl.h> 85 #include <inet/tun.h> 86 #include <inet/sctp_ip.h> 87 #include <inet/ip_netinfo.h> 88 #include <inet/mib2.h> 89 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 #include <inet/sadb.h> 93 #include <inet/ipsec_impl.h> 94 #include <sys/iphada.h> 95 96 97 #include <netinet/igmp.h> 98 #include <inet/ip_listutils.h> 99 #include <inet/ipclassifier.h> 100 #include <sys/mac.h> 101 102 #include <sys/systeminfo.h> 103 #include <sys/bootconf.h> 104 105 #include <sys/tsol/tndb.h> 106 #include <sys/tsol/tnet.h> 107 108 /* The character which tells where the ill_name ends */ 109 #define IPIF_SEPARATOR_CHAR ':' 110 111 /* IP ioctl function table entry */ 112 typedef struct ipft_s { 113 int ipft_cmd; 114 pfi_t ipft_pfi; 115 int ipft_min_size; 116 int ipft_flags; 117 } ipft_t; 118 #define IPFT_F_NO_REPLY 0x1 /* IP ioctl does not expect any reply */ 119 #define IPFT_F_SELF_REPLY 0x2 /* ioctl callee does the ioctl reply */ 120 121 typedef struct ip_sock_ar_s { 122 union { 123 area_t ip_sock_area; 124 ared_t ip_sock_ared; 125 areq_t ip_sock_areq; 126 } ip_sock_ar_u; 127 queue_t *ip_sock_ar_q; 128 } ip_sock_ar_t; 129 130 static int nd_ill_forward_get(queue_t *, mblk_t *, caddr_t, cred_t *); 131 static int nd_ill_forward_set(queue_t *q, mblk_t *mp, 132 char *value, caddr_t cp, cred_t *ioc_cr); 133 134 static boolean_t ill_is_quiescent(ill_t *); 135 static boolean_t ip_addr_ok_v4(ipaddr_t addr, ipaddr_t subnet_mask); 136 static ip_m_t *ip_m_lookup(t_uscalar_t mac_type); 137 static int ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, 138 mblk_t *mp, boolean_t need_up); 139 static int ip_sioctl_dstaddr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, 140 mblk_t *mp, boolean_t need_up); 141 static int ip_sioctl_slifzone_tail(ipif_t *ipif, zoneid_t zoneid, 142 queue_t *q, mblk_t *mp, boolean_t need_up); 143 static int ip_sioctl_flags_tail(ipif_t *ipif, uint64_t flags, queue_t *q, 144 mblk_t *mp, boolean_t need_up); 145 static int ip_sioctl_netmask_tail(ipif_t *ipif, sin_t *sin, queue_t *q, 146 mblk_t *mp); 147 static int ip_sioctl_subnet_tail(ipif_t *ipif, in6_addr_t, in6_addr_t, 148 queue_t *q, mblk_t *mp, boolean_t need_up); 149 static int ip_sioctl_plink_ipmod(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 150 int ioccmd, struct linkblk *li, boolean_t doconsist); 151 static ipaddr_t ip_subnet_mask(ipaddr_t addr, ipif_t **, ip_stack_t *); 152 static void ip_wput_ioctl(queue_t *q, mblk_t *mp); 153 static void ipsq_flush(ill_t *ill); 154 155 static int ip_sioctl_token_tail(ipif_t *ipif, sin6_t *sin6, int addrlen, 156 queue_t *q, mblk_t *mp, boolean_t need_up); 157 static void ipsq_delete(ipsq_t *); 158 159 static ipif_t *ipif_allocate(ill_t *ill, int id, uint_t ire_type, 160 boolean_t initialize); 161 static void ipif_check_bcast_ires(ipif_t *test_ipif); 162 static ire_t **ipif_create_bcast_ires(ipif_t *ipif, ire_t **irep); 163 static boolean_t ipif_comp_multi(ipif_t *old_ipif, ipif_t *new_ipif, 164 boolean_t isv6); 165 static void ipif_down_delete_ire(ire_t *ire, char *ipif); 166 static void ipif_delete_cache_ire(ire_t *, char *); 167 static int ipif_logical_down(ipif_t *ipif, queue_t *q, mblk_t *mp); 168 static void ipif_free(ipif_t *ipif); 169 static void ipif_free_tail(ipif_t *ipif); 170 static void ipif_mtu_change(ire_t *ire, char *ipif_arg); 171 static void ipif_multicast_down(ipif_t *ipif); 172 static void ipif_recreate_interface_routes(ipif_t *old_ipif, ipif_t *ipif); 173 static void ipif_set_default(ipif_t *ipif); 174 static int ipif_set_values(queue_t *q, mblk_t *mp, 175 char *interf_name, uint_t *ppa); 176 static int ipif_set_values_tail(ill_t *ill, ipif_t *ipif, mblk_t *mp, 177 queue_t *q); 178 static ipif_t *ipif_lookup_on_name(char *name, size_t namelen, 179 boolean_t do_alloc, boolean_t *exists, boolean_t isv6, zoneid_t zoneid, 180 queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *); 181 static int ipif_up(ipif_t *ipif, queue_t *q, mblk_t *mp); 182 static void ipif_update_other_ipifs(ipif_t *old_ipif, ill_group_t *illgrp); 183 184 static int ill_alloc_ppa(ill_if_t *, ill_t *); 185 static int ill_arp_off(ill_t *ill); 186 static int ill_arp_on(ill_t *ill); 187 static void ill_delete_interface_type(ill_if_t *); 188 static int ill_dl_up(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q); 189 static void ill_dl_down(ill_t *ill); 190 static void ill_down(ill_t *ill); 191 static void ill_downi(ire_t *ire, char *ill_arg); 192 static void ill_free_mib(ill_t *ill); 193 static void ill_glist_delete(ill_t *); 194 static boolean_t ill_has_usable_ipif(ill_t *); 195 static int ill_lock_ipsq_ills(ipsq_t *sq, ill_t **list, int); 196 static void ill_nominate_bcast_rcv(ill_group_t *illgrp); 197 static void ill_phyint_free(ill_t *ill); 198 static void ill_phyint_reinit(ill_t *ill); 199 static void ill_set_nce_router_flags(ill_t *, boolean_t); 200 static void ill_set_phys_addr_tail(ipsq_t *, queue_t *, mblk_t *, void *); 201 static void ill_signal_ipsq_ills(ipsq_t *, boolean_t); 202 static boolean_t ill_split_ipsq(ipsq_t *cur_sq); 203 static void ill_stq_cache_delete(ire_t *, char *); 204 205 static boolean_t ip_ether_v6intfid(uint_t, uint8_t *, in6_addr_t *); 206 static boolean_t ip_nodef_v6intfid(uint_t, uint8_t *, in6_addr_t *); 207 static boolean_t ip_ether_v6mapinfo(uint_t, uint8_t *, uint8_t *, uint32_t *, 208 in6_addr_t *); 209 static boolean_t ip_ether_v4mapinfo(uint_t, uint8_t *, uint8_t *, uint32_t *, 210 ipaddr_t *); 211 static boolean_t ip_ib_v6intfid(uint_t, uint8_t *, in6_addr_t *); 212 static boolean_t ip_ib_v6mapinfo(uint_t, uint8_t *, uint8_t *, uint32_t *, 213 in6_addr_t *); 214 static boolean_t ip_ib_v4mapinfo(uint_t, uint8_t *, uint8_t *, uint32_t *, 215 ipaddr_t *); 216 217 static void ipif_save_ire(ipif_t *, ire_t *); 218 static void ipif_remove_ire(ipif_t *, ire_t *); 219 static void ip_cgtp_bcast_add(ire_t *, ire_t *, ip_stack_t *); 220 static void ip_cgtp_bcast_delete(ire_t *, ip_stack_t *); 221 222 /* 223 * Per-ill IPsec capabilities management. 224 */ 225 static ill_ipsec_capab_t *ill_ipsec_capab_alloc(void); 226 static void ill_ipsec_capab_free(ill_ipsec_capab_t *); 227 static void ill_ipsec_capab_add(ill_t *, uint_t, boolean_t); 228 static void ill_ipsec_capab_delete(ill_t *, uint_t); 229 static boolean_t ill_ipsec_capab_resize_algparm(ill_ipsec_capab_t *, int); 230 static void ill_capability_proto(ill_t *, int, mblk_t *); 231 static void ill_capability_dispatch(ill_t *, mblk_t *, dl_capability_sub_t *, 232 boolean_t); 233 static void ill_capability_id_ack(ill_t *, mblk_t *, dl_capability_sub_t *); 234 static void ill_capability_mdt_ack(ill_t *, mblk_t *, dl_capability_sub_t *); 235 static void ill_capability_mdt_reset(ill_t *, mblk_t **); 236 static void ill_capability_ipsec_ack(ill_t *, mblk_t *, dl_capability_sub_t *); 237 static void ill_capability_ipsec_reset(ill_t *, mblk_t **); 238 static void ill_capability_hcksum_ack(ill_t *, mblk_t *, dl_capability_sub_t *); 239 static void ill_capability_hcksum_reset(ill_t *, mblk_t **); 240 static void ill_capability_zerocopy_ack(ill_t *, mblk_t *, 241 dl_capability_sub_t *); 242 static void ill_capability_zerocopy_reset(ill_t *, mblk_t **); 243 static void ill_capability_lso_ack(ill_t *, mblk_t *, dl_capability_sub_t *); 244 static void ill_capability_lso_reset(ill_t *, mblk_t **); 245 static void ill_capability_dls_ack(ill_t *, mblk_t *, dl_capability_sub_t *); 246 static mac_resource_handle_t ill_ring_add(void *, mac_resource_t *); 247 static void ill_capability_dls_reset(ill_t *, mblk_t **); 248 static void ill_capability_dls_disable(ill_t *); 249 250 static void illgrp_cache_delete(ire_t *, char *); 251 static void illgrp_delete(ill_t *ill); 252 static void illgrp_reset_schednext(ill_t *ill); 253 254 static ill_t *ill_prev_usesrc(ill_t *); 255 static int ill_relink_usesrc_ills(ill_t *, ill_t *, uint_t); 256 static void ill_disband_usesrc_group(ill_t *); 257 258 static void conn_cleanup_stale_ire(conn_t *, caddr_t); 259 260 #ifdef DEBUG 261 static void ill_trace_cleanup(const ill_t *); 262 static void ipif_trace_cleanup(const ipif_t *); 263 #endif 264 265 /* 266 * if we go over the memory footprint limit more than once in this msec 267 * interval, we'll start pruning aggressively. 268 */ 269 int ip_min_frag_prune_time = 0; 270 271 /* 272 * max # of IPsec algorithms supported. Limited to 1 byte by PF_KEY 273 * and the IPsec DOI 274 */ 275 #define MAX_IPSEC_ALGS 256 276 277 #define BITSPERBYTE 8 278 #define BITS(type) (BITSPERBYTE * (long)sizeof (type)) 279 280 #define IPSEC_ALG_ENABLE(algs, algid) \ 281 ((algs)[(algid) / BITS(ipsec_capab_elem_t)] |= \ 282 (1 << ((algid) % BITS(ipsec_capab_elem_t)))) 283 284 #define IPSEC_ALG_IS_ENABLED(algid, algs) \ 285 ((algs)[(algid) / BITS(ipsec_capab_elem_t)] & \ 286 (1 << ((algid) % BITS(ipsec_capab_elem_t)))) 287 288 typedef uint8_t ipsec_capab_elem_t; 289 290 /* 291 * Per-algorithm parameters. Note that at present, only encryption 292 * algorithms have variable keysize (IKE does not provide a way to negotiate 293 * auth algorithm keysize). 294 * 295 * All sizes here are in bits. 296 */ 297 typedef struct 298 { 299 uint16_t minkeylen; 300 uint16_t maxkeylen; 301 } ipsec_capab_algparm_t; 302 303 /* 304 * Per-ill capabilities. 305 */ 306 struct ill_ipsec_capab_s { 307 ipsec_capab_elem_t *encr_hw_algs; 308 ipsec_capab_elem_t *auth_hw_algs; 309 uint32_t algs_size; /* size of _hw_algs in bytes */ 310 /* algorithm key lengths */ 311 ipsec_capab_algparm_t *encr_algparm; 312 uint32_t encr_algparm_size; 313 uint32_t encr_algparm_end; 314 }; 315 316 /* 317 * The field values are larger than strictly necessary for simple 318 * AR_ENTRY_ADDs but the padding lets us accomodate the socket ioctls. 319 */ 320 static area_t ip_area_template = { 321 AR_ENTRY_ADD, /* area_cmd */ 322 sizeof (ip_sock_ar_t) + (IP_ADDR_LEN*2) + sizeof (struct sockaddr_dl), 323 /* area_name_offset */ 324 /* area_name_length temporarily holds this structure length */ 325 sizeof (area_t), /* area_name_length */ 326 IP_ARP_PROTO_TYPE, /* area_proto */ 327 sizeof (ip_sock_ar_t), /* area_proto_addr_offset */ 328 IP_ADDR_LEN, /* area_proto_addr_length */ 329 sizeof (ip_sock_ar_t) + IP_ADDR_LEN, 330 /* area_proto_mask_offset */ 331 0, /* area_flags */ 332 sizeof (ip_sock_ar_t) + IP_ADDR_LEN + IP_ADDR_LEN, 333 /* area_hw_addr_offset */ 334 /* Zero length hw_addr_length means 'use your idea of the address' */ 335 0 /* area_hw_addr_length */ 336 }; 337 338 /* 339 * AR_ENTRY_ADD/DELETE templates have been added for IPv6 external resolver 340 * support 341 */ 342 static area_t ip6_area_template = { 343 AR_ENTRY_ADD, /* area_cmd */ 344 sizeof (ip_sock_ar_t) + (IPV6_ADDR_LEN*2) + sizeof (sin6_t), 345 /* area_name_offset */ 346 /* area_name_length temporarily holds this structure length */ 347 sizeof (area_t), /* area_name_length */ 348 IP_ARP_PROTO_TYPE, /* area_proto */ 349 sizeof (ip_sock_ar_t), /* area_proto_addr_offset */ 350 IPV6_ADDR_LEN, /* area_proto_addr_length */ 351 sizeof (ip_sock_ar_t) + IPV6_ADDR_LEN, 352 /* area_proto_mask_offset */ 353 0, /* area_flags */ 354 sizeof (ip_sock_ar_t) + IPV6_ADDR_LEN + IPV6_ADDR_LEN, 355 /* area_hw_addr_offset */ 356 /* Zero length hw_addr_length means 'use your idea of the address' */ 357 0 /* area_hw_addr_length */ 358 }; 359 360 static ared_t ip_ared_template = { 361 AR_ENTRY_DELETE, 362 sizeof (ared_t) + IP_ADDR_LEN, 363 sizeof (ared_t), 364 IP_ARP_PROTO_TYPE, 365 sizeof (ared_t), 366 IP_ADDR_LEN 367 }; 368 369 static ared_t ip6_ared_template = { 370 AR_ENTRY_DELETE, 371 sizeof (ared_t) + IPV6_ADDR_LEN, 372 sizeof (ared_t), 373 IP_ARP_PROTO_TYPE, 374 sizeof (ared_t), 375 IPV6_ADDR_LEN 376 }; 377 378 /* 379 * A template for an IPv6 AR_ENTRY_QUERY template has not been created, as 380 * as the areq doesn't include an IP address in ill_dl_up() (the only place a 381 * areq is used). 382 */ 383 static areq_t ip_areq_template = { 384 AR_ENTRY_QUERY, /* cmd */ 385 sizeof (areq_t)+(2*IP_ADDR_LEN), /* name offset */ 386 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 387 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 388 sizeof (areq_t), /* target addr offset */ 389 IP_ADDR_LEN, /* target addr_length */ 390 0, /* flags */ 391 sizeof (areq_t) + IP_ADDR_LEN, /* sender addr offset */ 392 IP_ADDR_LEN, /* sender addr length */ 393 AR_EQ_DEFAULT_XMIT_COUNT, /* xmit_count */ 394 AR_EQ_DEFAULT_XMIT_INTERVAL, /* (re)xmit_interval in milliseconds */ 395 AR_EQ_DEFAULT_MAX_BUFFERED /* max # of requests to buffer */ 396 /* anything else filled in by the code */ 397 }; 398 399 static arc_t ip_aru_template = { 400 AR_INTERFACE_UP, 401 sizeof (arc_t), /* Name offset */ 402 sizeof (arc_t) /* Name length (set by ill_arp_alloc) */ 403 }; 404 405 static arc_t ip_ard_template = { 406 AR_INTERFACE_DOWN, 407 sizeof (arc_t), /* Name offset */ 408 sizeof (arc_t) /* Name length (set by ill_arp_alloc) */ 409 }; 410 411 static arc_t ip_aron_template = { 412 AR_INTERFACE_ON, 413 sizeof (arc_t), /* Name offset */ 414 sizeof (arc_t) /* Name length (set by ill_arp_alloc) */ 415 }; 416 417 static arc_t ip_aroff_template = { 418 AR_INTERFACE_OFF, 419 sizeof (arc_t), /* Name offset */ 420 sizeof (arc_t) /* Name length (set by ill_arp_alloc) */ 421 }; 422 423 424 static arma_t ip_arma_multi_template = { 425 AR_MAPPING_ADD, 426 sizeof (arma_t) + 3*IP_ADDR_LEN + IP_MAX_HW_LEN, 427 /* Name offset */ 428 sizeof (arma_t), /* Name length (set by ill_arp_alloc) */ 429 IP_ARP_PROTO_TYPE, 430 sizeof (arma_t), /* proto_addr_offset */ 431 IP_ADDR_LEN, /* proto_addr_length */ 432 sizeof (arma_t) + IP_ADDR_LEN, /* proto_mask_offset */ 433 sizeof (arma_t) + 2*IP_ADDR_LEN, /* proto_extract_mask_offset */ 434 ACE_F_PERMANENT | ACE_F_MAPPING, /* flags */ 435 sizeof (arma_t) + 3*IP_ADDR_LEN, /* hw_addr_offset */ 436 IP_MAX_HW_LEN, /* hw_addr_length */ 437 0, /* hw_mapping_start */ 438 }; 439 440 static ipft_t ip_ioctl_ftbl[] = { 441 { IP_IOC_IRE_DELETE, ip_ire_delete, sizeof (ipid_t), 0 }, 442 { IP_IOC_IRE_DELETE_NO_REPLY, ip_ire_delete, sizeof (ipid_t), 443 IPFT_F_NO_REPLY }, 444 { IP_IOC_IRE_ADVISE_NO_REPLY, ip_ire_advise, sizeof (ipic_t), 445 IPFT_F_NO_REPLY }, 446 { IP_IOC_RTS_REQUEST, ip_rts_request, 0, IPFT_F_SELF_REPLY }, 447 { 0 } 448 }; 449 450 /* Simple ICMP IP Header Template */ 451 static ipha_t icmp_ipha = { 452 IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP 453 }; 454 455 /* Flag descriptors for ip_ipif_report */ 456 static nv_t ipif_nv_tbl[] = { 457 { IPIF_UP, "UP" }, 458 { IPIF_BROADCAST, "BROADCAST" }, 459 { ILLF_DEBUG, "DEBUG" }, 460 { PHYI_LOOPBACK, "LOOPBACK" }, 461 { IPIF_POINTOPOINT, "POINTOPOINT" }, 462 { ILLF_NOTRAILERS, "NOTRAILERS" }, 463 { PHYI_RUNNING, "RUNNING" }, 464 { ILLF_NOARP, "NOARP" }, 465 { PHYI_PROMISC, "PROMISC" }, 466 { PHYI_ALLMULTI, "ALLMULTI" }, 467 { PHYI_INTELLIGENT, "INTELLIGENT" }, 468 { ILLF_MULTICAST, "MULTICAST" }, 469 { PHYI_MULTI_BCAST, "MULTI_BCAST" }, 470 { IPIF_UNNUMBERED, "UNNUMBERED" }, 471 { IPIF_DHCPRUNNING, "DHCP" }, 472 { IPIF_PRIVATE, "PRIVATE" }, 473 { IPIF_NOXMIT, "NOXMIT" }, 474 { IPIF_NOLOCAL, "NOLOCAL" }, 475 { IPIF_DEPRECATED, "DEPRECATED" }, 476 { IPIF_PREFERRED, "PREFERRED" }, 477 { IPIF_TEMPORARY, "TEMPORARY" }, 478 { IPIF_ADDRCONF, "ADDRCONF" }, 479 { PHYI_VIRTUAL, "VIRTUAL" }, 480 { ILLF_ROUTER, "ROUTER" }, 481 { ILLF_NONUD, "NONUD" }, 482 { IPIF_ANYCAST, "ANYCAST" }, 483 { ILLF_NORTEXCH, "NORTEXCH" }, 484 { ILLF_IPV4, "IPV4" }, 485 { ILLF_IPV6, "IPV6" }, 486 { IPIF_NOFAILOVER, "NOFAILOVER" }, 487 { PHYI_FAILED, "FAILED" }, 488 { PHYI_STANDBY, "STANDBY" }, 489 { PHYI_INACTIVE, "INACTIVE" }, 490 { PHYI_OFFLINE, "OFFLINE" }, 491 }; 492 493 static uchar_t ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; 494 495 static ip_m_t ip_m_tbl[] = { 496 { DL_ETHER, IFT_ETHER, ip_ether_v4mapinfo, ip_ether_v6mapinfo, 497 ip_ether_v6intfid }, 498 { DL_CSMACD, IFT_ISO88023, ip_ether_v4mapinfo, ip_ether_v6mapinfo, 499 ip_nodef_v6intfid }, 500 { DL_TPB, IFT_ISO88024, ip_ether_v4mapinfo, ip_ether_v6mapinfo, 501 ip_nodef_v6intfid }, 502 { DL_TPR, IFT_ISO88025, ip_ether_v4mapinfo, ip_ether_v6mapinfo, 503 ip_nodef_v6intfid }, 504 { DL_FDDI, IFT_FDDI, ip_ether_v4mapinfo, ip_ether_v6mapinfo, 505 ip_ether_v6intfid }, 506 { DL_IB, IFT_IB, ip_ib_v4mapinfo, ip_ib_v6mapinfo, 507 ip_ib_v6intfid }, 508 { SUNW_DL_VNI, IFT_OTHER, NULL, NULL, NULL}, 509 { DL_OTHER, IFT_OTHER, ip_ether_v4mapinfo, ip_ether_v6mapinfo, 510 ip_nodef_v6intfid } 511 }; 512 513 static ill_t ill_null; /* Empty ILL for init. */ 514 char ipif_loopback_name[] = "lo0"; 515 static char *ipv4_forward_suffix = ":ip_forwarding"; 516 static char *ipv6_forward_suffix = ":ip6_forwarding"; 517 static sin6_t sin6_null; /* Zero address for quick clears */ 518 static sin_t sin_null; /* Zero address for quick clears */ 519 520 /* When set search for unused ipif_seqid */ 521 static ipif_t ipif_zero; 522 523 /* 524 * ppa arena is created after these many 525 * interfaces have been plumbed. 526 */ 527 uint_t ill_no_arena = 12; /* Setable in /etc/system */ 528 529 /* 530 * Enable soft rings if ip_squeue_soft_ring or ip_squeue_fanout 531 * is set and ip_soft_rings_cnt > 0. ip_squeue_soft_ring is 532 * set through platform specific code (Niagara/Ontario). 533 */ 534 #define SOFT_RINGS_ENABLED() (ip_soft_rings_cnt ? \ 535 (ip_squeue_soft_ring || ip_squeue_fanout) : B_FALSE) 536 537 #define ILL_CAPAB_DLS (ILL_CAPAB_SOFT_RING | ILL_CAPAB_POLL) 538 539 static uint_t 540 ipif_rand(ip_stack_t *ipst) 541 { 542 ipst->ips_ipif_src_random = ipst->ips_ipif_src_random * 1103515245 + 543 12345; 544 return ((ipst->ips_ipif_src_random >> 16) & 0x7fff); 545 } 546 547 /* 548 * Allocate per-interface mibs. 549 * Returns true if ok. False otherwise. 550 * ipsq may not yet be allocated (loopback case ). 551 */ 552 static boolean_t 553 ill_allocate_mibs(ill_t *ill) 554 { 555 /* Already allocated? */ 556 if (ill->ill_ip_mib != NULL) { 557 if (ill->ill_isv6) 558 ASSERT(ill->ill_icmp6_mib != NULL); 559 return (B_TRUE); 560 } 561 562 ill->ill_ip_mib = kmem_zalloc(sizeof (*ill->ill_ip_mib), 563 KM_NOSLEEP); 564 if (ill->ill_ip_mib == NULL) { 565 return (B_FALSE); 566 } 567 568 /* Setup static information */ 569 SET_MIB(ill->ill_ip_mib->ipIfStatsEntrySize, 570 sizeof (mib2_ipIfStatsEntry_t)); 571 if (ill->ill_isv6) { 572 ill->ill_ip_mib->ipIfStatsIPVersion = MIB2_INETADDRESSTYPE_ipv6; 573 SET_MIB(ill->ill_ip_mib->ipIfStatsAddrEntrySize, 574 sizeof (mib2_ipv6AddrEntry_t)); 575 SET_MIB(ill->ill_ip_mib->ipIfStatsRouteEntrySize, 576 sizeof (mib2_ipv6RouteEntry_t)); 577 SET_MIB(ill->ill_ip_mib->ipIfStatsNetToMediaEntrySize, 578 sizeof (mib2_ipv6NetToMediaEntry_t)); 579 SET_MIB(ill->ill_ip_mib->ipIfStatsMemberEntrySize, 580 sizeof (ipv6_member_t)); 581 SET_MIB(ill->ill_ip_mib->ipIfStatsGroupSourceEntrySize, 582 sizeof (ipv6_grpsrc_t)); 583 } else { 584 ill->ill_ip_mib->ipIfStatsIPVersion = MIB2_INETADDRESSTYPE_ipv4; 585 SET_MIB(ill->ill_ip_mib->ipIfStatsAddrEntrySize, 586 sizeof (mib2_ipAddrEntry_t)); 587 SET_MIB(ill->ill_ip_mib->ipIfStatsRouteEntrySize, 588 sizeof (mib2_ipRouteEntry_t)); 589 SET_MIB(ill->ill_ip_mib->ipIfStatsNetToMediaEntrySize, 590 sizeof (mib2_ipNetToMediaEntry_t)); 591 SET_MIB(ill->ill_ip_mib->ipIfStatsMemberEntrySize, 592 sizeof (ip_member_t)); 593 SET_MIB(ill->ill_ip_mib->ipIfStatsGroupSourceEntrySize, 594 sizeof (ip_grpsrc_t)); 595 596 /* 597 * For a v4 ill, we are done at this point, because per ill 598 * icmp mibs are only used for v6. 599 */ 600 return (B_TRUE); 601 } 602 603 ill->ill_icmp6_mib = kmem_zalloc(sizeof (*ill->ill_icmp6_mib), 604 KM_NOSLEEP); 605 if (ill->ill_icmp6_mib == NULL) { 606 kmem_free(ill->ill_ip_mib, sizeof (*ill->ill_ip_mib)); 607 ill->ill_ip_mib = NULL; 608 return (B_FALSE); 609 } 610 /* static icmp info */ 611 ill->ill_icmp6_mib->ipv6IfIcmpEntrySize = 612 sizeof (mib2_ipv6IfIcmpEntry_t); 613 /* 614 * The ipIfStatsIfindex and ipv6IfIcmpIndex will be assigned later 615 * after the phyint merge occurs in ipif_set_values -> ill_glist_insert 616 * -> ill_phyint_reinit 617 */ 618 return (B_TRUE); 619 } 620 621 /* 622 * Common code for preparation of ARP commands. Two points to remember: 623 * 1) The ill_name is tacked on at the end of the allocated space so 624 * the templates name_offset field must contain the total space 625 * to allocate less the name length. 626 * 627 * 2) The templates name_length field should contain the *template* 628 * length. We use it as a parameter to bcopy() and then write 629 * the real ill_name_length into the name_length field of the copy. 630 * (Always called as writer.) 631 */ 632 mblk_t * 633 ill_arp_alloc(ill_t *ill, uchar_t *template, caddr_t addr) 634 { 635 arc_t *arc = (arc_t *)template; 636 char *cp; 637 int len; 638 mblk_t *mp; 639 uint_t name_length = ill->ill_name_length; 640 uint_t template_len = arc->arc_name_length; 641 642 len = arc->arc_name_offset + name_length; 643 mp = allocb(len, BPRI_HI); 644 if (mp == NULL) 645 return (NULL); 646 cp = (char *)mp->b_rptr; 647 mp->b_wptr = (uchar_t *)&cp[len]; 648 if (template_len) 649 bcopy(template, cp, template_len); 650 if (len > template_len) 651 bzero(&cp[template_len], len - template_len); 652 mp->b_datap->db_type = M_PROTO; 653 654 arc = (arc_t *)cp; 655 arc->arc_name_length = name_length; 656 cp = (char *)arc + arc->arc_name_offset; 657 bcopy(ill->ill_name, cp, name_length); 658 659 if (addr) { 660 area_t *area = (area_t *)mp->b_rptr; 661 662 cp = (char *)area + area->area_proto_addr_offset; 663 bcopy(addr, cp, area->area_proto_addr_length); 664 if (area->area_cmd == AR_ENTRY_ADD) { 665 cp = (char *)area; 666 len = area->area_proto_addr_length; 667 if (area->area_proto_mask_offset) 668 cp += area->area_proto_mask_offset; 669 else 670 cp += area->area_proto_addr_offset + len; 671 while (len-- > 0) 672 *cp++ = (char)~0; 673 } 674 } 675 return (mp); 676 } 677 678 mblk_t * 679 ipif_area_alloc(ipif_t *ipif) 680 { 681 return (ill_arp_alloc(ipif->ipif_ill, (uchar_t *)&ip_area_template, 682 (char *)&ipif->ipif_lcl_addr)); 683 } 684 685 mblk_t * 686 ipif_ared_alloc(ipif_t *ipif) 687 { 688 return (ill_arp_alloc(ipif->ipif_ill, (uchar_t *)&ip_ared_template, 689 (char *)&ipif->ipif_lcl_addr)); 690 } 691 692 mblk_t * 693 ill_ared_alloc(ill_t *ill, ipaddr_t addr) 694 { 695 return (ill_arp_alloc(ill, (uchar_t *)&ip_ared_template, 696 (char *)&addr)); 697 } 698 699 /* 700 * Completely vaporize a lower level tap and all associated interfaces. 701 * ill_delete is called only out of ip_close when the device control 702 * stream is being closed. 703 */ 704 void 705 ill_delete(ill_t *ill) 706 { 707 ipif_t *ipif; 708 ill_t *prev_ill; 709 ip_stack_t *ipst = ill->ill_ipst; 710 711 /* 712 * ill_delete may be forcibly entering the ipsq. The previous 713 * ioctl may not have completed and may need to be aborted. 714 * ipsq_flush takes care of it. If we don't need to enter the 715 * the ipsq forcibly, the 2nd invocation of ipsq_flush in 716 * ill_delete_tail is sufficient. 717 */ 718 ipsq_flush(ill); 719 720 /* 721 * Nuke all interfaces. ipif_free will take down the interface, 722 * remove it from the list, and free the data structure. 723 * Walk down the ipif list and remove the logical interfaces 724 * first before removing the main ipif. We can't unplumb 725 * zeroth interface first in the case of IPv6 as reset_conn_ill 726 * -> ip_ll_delmulti_v6 de-references ill_ipif for checking 727 * POINTOPOINT. 728 * 729 * If ill_ipif was not properly initialized (i.e low on memory), 730 * then no interfaces to clean up. In this case just clean up the 731 * ill. 732 */ 733 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) 734 ipif_free(ipif); 735 736 /* 737 * Used only by ill_arp_on and ill_arp_off, which are writers. 738 * So nobody can be using this mp now. Free the mp allocated for 739 * honoring ILLF_NOARP 740 */ 741 freemsg(ill->ill_arp_on_mp); 742 ill->ill_arp_on_mp = NULL; 743 744 /* Clean up msgs on pending upcalls for mrouted */ 745 reset_mrt_ill(ill); 746 747 /* 748 * ipif_free -> reset_conn_ipif will remove all multicast 749 * references for IPv4. For IPv6, we need to do it here as 750 * it points only at ills. 751 */ 752 reset_conn_ill(ill); 753 754 /* 755 * ill_down will arrange to blow off any IRE's dependent on this 756 * ILL, and shut down fragmentation reassembly. 757 */ 758 ill_down(ill); 759 760 /* Let SCTP know, so that it can remove this from its list. */ 761 sctp_update_ill(ill, SCTP_ILL_REMOVE); 762 763 /* 764 * If an address on this ILL is being used as a source address then 765 * clear out the pointers in other ILLs that point to this ILL. 766 */ 767 rw_enter(&ipst->ips_ill_g_usesrc_lock, RW_WRITER); 768 if (ill->ill_usesrc_grp_next != NULL) { 769 if (ill->ill_usesrc_ifindex == 0) { /* usesrc ILL ? */ 770 ill_disband_usesrc_group(ill); 771 } else { /* consumer of the usesrc ILL */ 772 prev_ill = ill_prev_usesrc(ill); 773 prev_ill->ill_usesrc_grp_next = 774 ill->ill_usesrc_grp_next; 775 } 776 } 777 rw_exit(&ipst->ips_ill_g_usesrc_lock); 778 } 779 780 static void 781 ipif_non_duplicate(ipif_t *ipif) 782 { 783 ill_t *ill = ipif->ipif_ill; 784 mutex_enter(&ill->ill_lock); 785 if (ipif->ipif_flags & IPIF_DUPLICATE) { 786 ipif->ipif_flags &= ~IPIF_DUPLICATE; 787 ASSERT(ill->ill_ipif_dup_count > 0); 788 ill->ill_ipif_dup_count--; 789 } 790 mutex_exit(&ill->ill_lock); 791 } 792 793 /* 794 * ill_delete_tail is called from ip_modclose after all references 795 * to the closing ill are gone. The wait is done in ip_modclose 796 */ 797 void 798 ill_delete_tail(ill_t *ill) 799 { 800 mblk_t **mpp; 801 ipif_t *ipif; 802 ip_stack_t *ipst = ill->ill_ipst; 803 804 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 805 ipif_non_duplicate(ipif); 806 ipif_down_tail(ipif); 807 } 808 809 ASSERT(ill->ill_ipif_dup_count == 0 && 810 ill->ill_arp_down_mp == NULL && 811 ill->ill_arp_del_mapping_mp == NULL); 812 813 /* 814 * If polling capability is enabled (which signifies direct 815 * upcall into IP and driver has ill saved as a handle), 816 * we need to make sure that unbind has completed before we 817 * let the ill disappear and driver no longer has any reference 818 * to this ill. 819 */ 820 mutex_enter(&ill->ill_lock); 821 while (ill->ill_state_flags & ILL_DL_UNBIND_IN_PROGRESS) 822 cv_wait(&ill->ill_cv, &ill->ill_lock); 823 mutex_exit(&ill->ill_lock); 824 825 /* 826 * Clean up polling and soft ring capabilities 827 */ 828 if (ill->ill_capabilities & (ILL_CAPAB_POLL|ILL_CAPAB_SOFT_RING)) 829 ill_capability_dls_disable(ill); 830 831 if (ill->ill_net_type != IRE_LOOPBACK) 832 qprocsoff(ill->ill_rq); 833 834 /* 835 * We do an ipsq_flush once again now. New messages could have 836 * landed up from below (M_ERROR or M_HANGUP). Similarly ioctls 837 * could also have landed up if an ioctl thread had looked up 838 * the ill before we set the ILL_CONDEMNED flag, but not yet 839 * enqueued the ioctl when we did the ipsq_flush last time. 840 */ 841 ipsq_flush(ill); 842 843 /* 844 * Free capabilities. 845 */ 846 if (ill->ill_ipsec_capab_ah != NULL) { 847 ill_ipsec_capab_delete(ill, DL_CAPAB_IPSEC_AH); 848 ill_ipsec_capab_free(ill->ill_ipsec_capab_ah); 849 ill->ill_ipsec_capab_ah = NULL; 850 } 851 852 if (ill->ill_ipsec_capab_esp != NULL) { 853 ill_ipsec_capab_delete(ill, DL_CAPAB_IPSEC_ESP); 854 ill_ipsec_capab_free(ill->ill_ipsec_capab_esp); 855 ill->ill_ipsec_capab_esp = NULL; 856 } 857 858 if (ill->ill_mdt_capab != NULL) { 859 kmem_free(ill->ill_mdt_capab, sizeof (ill_mdt_capab_t)); 860 ill->ill_mdt_capab = NULL; 861 } 862 863 if (ill->ill_hcksum_capab != NULL) { 864 kmem_free(ill->ill_hcksum_capab, sizeof (ill_hcksum_capab_t)); 865 ill->ill_hcksum_capab = NULL; 866 } 867 868 if (ill->ill_zerocopy_capab != NULL) { 869 kmem_free(ill->ill_zerocopy_capab, 870 sizeof (ill_zerocopy_capab_t)); 871 ill->ill_zerocopy_capab = NULL; 872 } 873 874 if (ill->ill_lso_capab != NULL) { 875 kmem_free(ill->ill_lso_capab, sizeof (ill_lso_capab_t)); 876 ill->ill_lso_capab = NULL; 877 } 878 879 if (ill->ill_dls_capab != NULL) { 880 CONN_DEC_REF(ill->ill_dls_capab->ill_unbind_conn); 881 ill->ill_dls_capab->ill_unbind_conn = NULL; 882 kmem_free(ill->ill_dls_capab, 883 sizeof (ill_dls_capab_t) + 884 (sizeof (ill_rx_ring_t) * ILL_MAX_RINGS)); 885 ill->ill_dls_capab = NULL; 886 } 887 888 ASSERT(!(ill->ill_capabilities & ILL_CAPAB_POLL)); 889 890 while (ill->ill_ipif != NULL) 891 ipif_free_tail(ill->ill_ipif); 892 893 /* 894 * We have removed all references to ilm from conn and the ones joined 895 * within the kernel. 896 * 897 * We don't walk conns, mrts and ires because 898 * 899 * 1) reset_conn_ill and reset_mrt_ill cleans up conns and mrts. 900 * 2) ill_down ->ill_downi walks all the ires and cleans up 901 * ill references. 902 */ 903 ASSERT(ilm_walk_ill(ill) == 0); 904 /* 905 * Take us out of the list of ILLs. ill_glist_delete -> ill_phyint_free 906 * could free the phyint. No more reference to the phyint after this 907 * point. 908 */ 909 (void) ill_glist_delete(ill); 910 911 rw_enter(&ipst->ips_ip_g_nd_lock, RW_WRITER); 912 if (ill->ill_ndd_name != NULL) 913 nd_unload(&ipst->ips_ip_g_nd, ill->ill_ndd_name); 914 rw_exit(&ipst->ips_ip_g_nd_lock); 915 916 917 if (ill->ill_frag_ptr != NULL) { 918 uint_t count; 919 920 for (count = 0; count < ILL_FRAG_HASH_TBL_COUNT; count++) { 921 mutex_destroy(&ill->ill_frag_hash_tbl[count].ipfb_lock); 922 } 923 mi_free(ill->ill_frag_ptr); 924 ill->ill_frag_ptr = NULL; 925 ill->ill_frag_hash_tbl = NULL; 926 } 927 928 freemsg(ill->ill_nd_lla_mp); 929 /* Free all retained control messages. */ 930 mpp = &ill->ill_first_mp_to_free; 931 do { 932 while (mpp[0]) { 933 mblk_t *mp; 934 mblk_t *mp1; 935 936 mp = mpp[0]; 937 mpp[0] = mp->b_next; 938 for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) { 939 mp1->b_next = NULL; 940 mp1->b_prev =