1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/dlpi.h> 31 #include <sys/stropts.h> 32 #include <sys/sysmacros.h> 33 #include <sys/strsubr.h> 34 #include <sys/strlog.h> 35 #include <sys/strsun.h> 36 #include <sys/zone.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/xti_inet.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/cmn_err.h> 43 #include <sys/debug.h> 44 #include <sys/kobj.h> 45 #include <sys/modctl.h> 46 #include <sys/atomic.h> 47 #include <sys/policy.h> 48 #include <sys/priv.h> 49 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/kmem.h> 53 #include <sys/sdt.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/mac.h> 58 #include <net/if.h> 59 #include <net/if_arp.h> 60 #include <net/route.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <net/if_dl.h> 64 65 #include <inet/common.h> 66 #include <inet/mi.h> 67 #include <inet/mib2.h> 68 #include <inet/nd.h> 69 #include <inet/arp.h> 70 #include <inet/snmpcom.h> 71 #include <inet/optcom.h> 72 #include <inet/kstatcom.h> 73 74 #include <netinet/igmp_var.h> 75 #include <netinet/ip6.h> 76 #include <netinet/icmp6.h> 77 #include <netinet/sctp.h> 78 79 #include <inet/ip.h> 80 #include <inet/ip_impl.h> 81 #include <inet/ip6.h> 82 #include <inet/ip6_asp.h> 83 #include <inet/tcp.h> 84 #include <inet/tcp_impl.h> 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_ftable.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <inet/ip_listutils.h> 92 #include <netinet/igmp.h> 93 #include <netinet/ip_mroute.h> 94 #include <inet/ipp_common.h> 95 96 #include <net/pfkeyv2.h> 97 #include <inet/ipsec_info.h> 98 #include <inet/sadb.h> 99 #include <inet/ipsec_impl.h> 100 #include <sys/iphada.h> 101 #include <inet/tun.h> 102 #include <inet/ipdrop.h> 103 #include <inet/ip_netinfo.h> 104 105 #include <sys/ethernet.h> 106 #include <net/if_types.h> 107 #include <sys/cpuvar.h> 108 109 #include <ipp/ipp.h> 110 #include <ipp/ipp_impl.h> 111 #include <ipp/ipgpc/ipgpc.h> 112 113 #include <sys/multidata.h> 114 #include <sys/pattr.h> 115 116 #include <inet/ipclassifier.h> 117 #include <inet/sctp_ip.h> 118 #include <inet/sctp/sctp_impl.h> 119 #include <inet/udp_impl.h> 120 #include <inet/rawip_impl.h> 121 #include <inet/rts_impl.h> 122 #include <sys/sunddi.h> 123 124 #include <sys/tsol/label.h> 125 #include <sys/tsol/tnet.h> 126 127 #include <rpc/pmap_prot.h> 128 129 /* 130 * Values for squeue switch: 131 * IP_SQUEUE_ENTER_NODRAIN: squeue_enter_nodrain 132 * IP_SQUEUE_ENTER: squeue_enter 133 * IP_SQUEUE_FILL: squeue_fill 134 */ 135 int ip_squeue_enter = 2; /* Setable in /etc/system */ 136 137 squeue_func_t ip_input_proc; 138 #define SET_BPREV_FLAG(x) ((mblk_t *)(uintptr_t)(x)) 139 140 /* 141 * Setable in /etc/system 142 */ 143 int ip_poll_normal_ms = 100; 144 int ip_poll_normal_ticks = 0; 145 int ip_modclose_ackwait_ms = 3000; 146 147 /* 148 * It would be nice to have these present only in DEBUG systems, but the 149 * current design of the global symbol checking logic requires them to be 150 * unconditionally present. 151 */ 152 uint_t ip_thread_data; /* TSD key for debug support */ 153 krwlock_t ip_thread_rwlock; 154 list_t ip_thread_list; 155 156 /* 157 * Structure to represent a linked list of msgblks. Used by ip_snmp_ functions. 158 */ 159 160 struct listptr_s { 161 mblk_t *lp_head; /* pointer to the head of the list */ 162 mblk_t *lp_tail; /* pointer to the tail of the list */ 163 }; 164 165 typedef struct listptr_s listptr_t; 166 167 /* 168 * This is used by ip_snmp_get_mib2_ip_route_media and 169 * ip_snmp_get_mib2_ip6_route_media to carry the lists of return data. 170 */ 171 typedef struct iproutedata_s { 172 uint_t ird_idx; 173 listptr_t ird_route; /* ipRouteEntryTable */ 174 listptr_t ird_netmedia; /* ipNetToMediaEntryTable */ 175 listptr_t ird_attrs; /* ipRouteAttributeTable */ 176 } iproutedata_t; 177 178 /* 179 * Cluster specific hooks. These should be NULL when booted as a non-cluster 180 */ 181 182 /* 183 * Hook functions to enable cluster networking 184 * On non-clustered systems these vectors must always be NULL. 185 * 186 * Hook function to Check ip specified ip address is a shared ip address 187 * in the cluster 188 * 189 */ 190 int (*cl_inet_isclusterwide)(uint8_t protocol, 191 sa_family_t addr_family, uint8_t *laddrp) = NULL; 192 193 /* 194 * Hook function to generate cluster wide ip fragment identifier 195 */ 196 uint32_t (*cl_inet_ipident)(uint8_t protocol, sa_family_t addr_family, 197 uint8_t *laddrp, uint8_t *faddrp) = NULL; 198 199 /* 200 * Hook function to generate cluster wide SPI. 201 */ 202 void (*cl_inet_getspi)(uint8_t, uint8_t *, size_t) = NULL; 203 204 /* 205 * Hook function to verify if the SPI is already utlized. 206 */ 207 208 int (*cl_inet_checkspi)(uint8_t, uint32_t) = NULL; 209 210 /* 211 * Hook function to delete the SPI from the cluster wide repository. 212 */ 213 214 void (*cl_inet_deletespi)(uint8_t, uint32_t) = NULL; 215 216 /* 217 * Hook function to inform the cluster when packet received on an IDLE SA 218 */ 219 220 void (*cl_inet_idlesa)(uint8_t, uint32_t, sa_family_t, in6_addr_t, 221 in6_addr_t) = NULL; 222 223 /* 224 * Synchronization notes: 225 * 226 * IP is a fully D_MP STREAMS module/driver. Thus it does not depend on any 227 * MT level protection given by STREAMS. IP uses a combination of its own 228 * internal serialization mechanism and standard Solaris locking techniques. 229 * The internal serialization is per phyint (no IPMP) or per IPMP group. 230 * This is used to serialize plumbing operations, IPMP operations, certain 231 * multicast operations, most set ioctls, igmp/mld timers etc. 232 * 233 * Plumbing is a long sequence of operations involving message 234 * exchanges between IP, ARP and device drivers. Many set ioctls are typically 235 * involved in plumbing operations. A natural model is to serialize these 236 * ioctls one per ill. For example plumbing of hme0 and qfe0 can go on in 237 * parallel without any interference. But various set ioctls on hme0 are best 238 * serialized. However if the system uses IPMP, the operations are easier if 239 * they are serialized on a per IPMP group basis since IPMP operations 240 * happen across ill's of a group. Thus the lowest common denominator is to 241 * serialize most set ioctls, multicast join/leave operations, IPMP operations 242 * igmp/mld timer operations, and processing of DLPI control messages received 243 * from drivers on a per IPMP group basis. If the system does not employ 244 * IPMP the serialization is on a per phyint basis. This serialization is 245 * provided by the ipsq_t and primitives operating on this. Details can 246 * be found in ip_if.c above the core primitives operating on ipsq_t. 247 * 248 * Lookups of an ipif or ill by a thread return a refheld ipif / ill. 249 * Simiarly lookup of an ire by a thread also returns a refheld ire. 250 * In addition ipif's and ill's referenced by the ire are also indirectly 251 * refheld. Thus no ipif or ill can vanish nor can critical parameters like 252 * the ipif's address or netmask change as long as an ipif is refheld 253 * directly or indirectly. For example an SIOCLIFADDR ioctl that changes the 254 * address of an ipif has to go through the ipsq_t. This ensures that only 255 * 1 such exclusive operation proceeds at any time on the ipif. It then 256 * deletes all ires associated with this ipif, and waits for all refcnts 257 * associated with this ipif to come down to zero. The address is changed 258 * only after the ipif has been quiesced. Then the ipif is brought up again. 259 * More details are described above the comment in ip_sioctl_flags. 260 * 261 * Packet processing is based mostly on IREs and are fully multi-threaded 262 * using standard Solaris MT techniques. 263 * 264 * There are explicit locks in IP to handle: 265 * - The ip_g_head list maintained by mi_open_link() and friends. 266 * 267 * - The reassembly data structures (one lock per hash bucket) 268 * 269 * - conn_lock is meant to protect conn_t fields. The fields actually 270 * protected by conn_lock are documented in the conn_t definition. 271 * 272 * - ire_lock to protect some of the fields of the ire, IRE tables 273 * (one lock per hash bucket). Refer to ip_ire.c for details. 274 * 275 * - ndp_g_lock and nce_lock for protecting NCEs. 276 * 277 * - ill_lock protects fields of the ill and ipif. Details in ip.h 278 * 279 * - ill_g_lock: This is a global reader/writer lock. Protects the following 280 * * The AVL tree based global multi list of all ills. 281 * * The linked list of all ipifs of an ill 282 * * The <ill-ipsq> mapping 283 * * The ipsq->ipsq_phyint_list threaded by phyint_ipsq_next 284 * * The illgroup list threaded by ill_group_next. 285 * * <ill-phyint> association 286 * Insertion/deletion of an ill in the system, insertion/deletion of an ipif 287 * into an ill, changing the <ill-ipsq> mapping of an ill, insertion/deletion 288 * of an ill into the illgrp list, changing the <ill-phyint> assoc of an ill 289 * will all have to hold the ill_g_lock as writer for the actual duration 290 * of the insertion/deletion/change. More details about the <ill-ipsq> mapping 291 * may be found in the IPMP section. 292 * 293 * - ill_lock: This is a per ill mutex. 294 * It protects some members of the ill and is documented below. 295 * It also protects the <ill-ipsq> mapping 296 * It also protects the illgroup list threaded by ill_group_next. 297 * It also protects the <ill-phyint> assoc. 298 * It also protects the list of ipifs hanging off the ill. 299 * 300 * - ipsq_lock: This is a per ipsq_t mutex lock. 301 * This protects all the other members of the ipsq struct except 302 * ipsq_refs and ipsq_phyint_list which are protected by ill_g_lock 303 * 304 * - illgrp_lock: This is a per ill_group mutex lock. 305 * The only thing it protects is the illgrp_ill_schednext member of ill_group 306 * which dictates which is the next ill in an ill_group that is to be chosen 307 * for sending outgoing packets, through creation of an IRE_CACHE that 308 * references this ill. 309 * 310 * - phyint_lock: This is a per phyint mutex lock. Protects just the 311 * phyint_flags 312 * 313 * - ip_g_nd_lock: This is a global reader/writer lock. 314 * Any call to nd_load to load a new parameter to the ND table must hold the 315 * lock as writer. ND_GET/ND_SET routines that read the ND table hold the lock 316 * as reader. 317 * 318 * - ip_addr_avail_lock: This is used to ensure the uniqueness of IP addresses. 319 * This lock is held in ipif_up_done and the ipif is marked IPIF_UP and the 320 * uniqueness check also done atomically. 321 * 322 * - ipsec_capab_ills_lock: This readers/writer lock protects the global 323 * lists of IPsec capable ills (ipsec_capab_ills_{ah,esp}). It is taken 324 * as a writer when adding or deleting elements from these lists, and 325 * as a reader when walking these lists to send a SADB update to the 326 * IPsec capable ills. 327 * 328 * - ill_g_usesrc_lock: This readers/writer lock protects the usesrc 329 * group list linked by ill_usesrc_grp_next. It also protects the 330 * ill_usesrc_ifindex field. It is taken as a writer when a member of the 331 * group is being added or deleted. This lock is taken as a reader when 332 * walking the list/group(eg: to get the number of members in a usesrc group). 333 * Note, it is only necessary to take this lock if the ill_usesrc_grp_next 334 * field is changing state i.e from NULL to non-NULL or vice-versa. For 335 * example, it is not necessary to take this lock in the initial portion 336 * of ip_sioctl_slifusesrc or at all in ip_sioctl_groupname and 337 * ip_sioctl_flags since the these operations are executed exclusively and 338 * that ensures that the "usesrc group state" cannot change. The "usesrc 339 * group state" change can happen only in the latter part of 340 * ip_sioctl_slifusesrc and in ill_delete. 341 * 342 * Changing <ill-phyint>, <ill-ipsq>, <ill-illgroup> assocications. 343 * 344 * To change the <ill-phyint> association, the ill_g_lock must be held 345 * as writer, and the ill_locks of both the v4 and v6 instance of the ill 346 * must be held. 347 * 348 * To change the <ill-ipsq> association the ill_g_lock must be held as writer 349 * and the ill_lock of the ill in question must be held. 350 * 351 * To change the <ill-illgroup> association the ill_g_lock must be held as 352 * writer and the ill_lock of the ill in question must be held. 353 * 354 * To add or delete an ipif from the list of ipifs hanging off the ill, 355 * ill_g_lock (writer) and ill_lock must be held and the thread must be 356 * a writer on the associated ipsq,. 357 * 358 * To add or delete an ill to the system, the ill_g_lock must be held as 359 * writer and the thread must be a writer on the associated ipsq. 360 * 361 * To add or delete an ilm to an ill, the ill_lock must be held and the thread 362 * must be a writer on the associated ipsq. 363 * 364 * Lock hierarchy 365 * 366 * Some lock hierarchy scenarios are listed below. 367 * 368 * ill_g_lock -> conn_lock -> ill_lock -> ipsq_lock 369 * ill_g_lock -> illgrp_lock -> ill_lock 370 * ill_g_lock -> ill_lock(s) -> phyint_lock 371 * ill_g_lock -> ndp_g_lock -> ill_lock -> nce_lock 372 * ill_g_lock -> ip_addr_avail_lock 373 * conn_lock -> irb_lock -> ill_lock -> ire_lock 374 * ill_g_lock -> ip_g_nd_lock 375 * 376 * When more than 1 ill lock is needed to be held, all ill lock addresses 377 * are sorted on address and locked starting from highest addressed lock 378 * downward. 379 * 380 * IPsec scenarios 381 * 382 * ipsa_lock -> ill_g_lock -> ill_lock 383 * ipsec_capab_ills_lock -> ill_g_lock -> ill_lock 384 * ipsec_capab_ills_lock -> ipsa_lock 385 * ill_g_usesrc_lock -> ill_g_lock -> ill_lock 386 * 387 * Trusted Solaris scenarios 388 * 389 * igsa_lock -> gcgrp_rwlock -> gcgrp_lock 390 * igsa_lock -> gcdb_lock 391 * gcgrp_rwlock -> ire_lock 392 * gcgrp_rwlock -> gcdb_lock 393 * 394 * 395 * Routing/forwarding table locking notes: 396 * 397 * Lock acquisition order: Radix tree lock, irb_lock. 398 * Requirements: 399 * i. Walker must not hold any locks during the walker callback. 400 * ii Walker must not see a truncated tree during the walk because of any node 401 * deletion. 402 * iii Existing code assumes ire_bucket is valid if it is non-null and is used 403 * in many places in the code to walk the irb list. Thus even if all the 404 * ires in a bucket have been deleted, we still can't free the radix node 405 * until the ires have actually been inactive'd (freed). 406 * 407 * Tree traversal - Need to hold the global tree lock in read mode. 408 * Before dropping the global tree lock, need to either increment the ire_refcnt 409 * to ensure that the radix node can't be deleted. 410 * 411 * Tree add - Need to hold the global tree lock in write mode to add a 412 * radix node. To prevent the node from being deleted, increment the 413 * irb_refcnt, after the node is added to the tree. The ire itself is 414 * added later while holding the irb_lock, but not the tree lock. 415 * 416 * Tree delete - Need to hold the global tree lock and irb_lock in write mode. 417 * All associated ires must be inactive (i.e. freed), and irb_refcnt 418 * must be zero. 419 * 420 * Walker - Increment irb_refcnt before calling the walker callback. Hold the 421 * global tree lock (read mode) for traversal. 422 * 423 * IPsec notes : 424 * 425 * IP interacts with the IPsec code (AH/ESP) by tagging a M_CTL message 426 * in front of the actual packet. For outbound datagrams, the M_CTL 427 * contains a ipsec_out_t (defined in ipsec_info.h), which has the 428 * information used by the IPsec code for applying the right level of 429 * protection. The information initialized by IP in the ipsec_out_t 430 * is determined by the per-socket policy or global policy in the system. 431 * For inbound datagrams, the M_CTL contains a ipsec_in_t (defined in 432 * ipsec_info.h) which starts out with nothing in it. It gets filled 433 * with the right information if it goes through the AH/ESP code, which 434 * happens if the incoming packet is secure. The information initialized 435 * by AH/ESP, is later used by IP(during fanouts to ULP) to see whether 436 * the policy requirements needed by per-socket policy or global policy 437 * is met or not. 438 * 439 * If there is both per-socket policy (set using setsockopt) and there 440 * is also global policy match for the 5 tuples of the socket, 441 * ipsec_override_policy() makes the decision of which one to use. 442 * 443 * For fully connected sockets i.e dst, src [addr, port] is known, 444 * conn_policy_cached is set indicating that policy has been cached. 445 * conn_in_enforce_policy may or may not be set depending on whether 446 * there is a global policy match or per-socket policy match. 447 * Policy inheriting happpens in ip_bind during the ipa_conn_t bind. 448 * Once the right policy is set on the conn_t, policy cannot change for 449 * this socket. This makes life simpler for TCP (UDP ?) where 450 * re-transmissions go out with the same policy. For symmetry, policy 451 * is cached for fully connected UDP sockets also. Thus if policy is cached, 452 * it also implies that policy is latched i.e policy cannot change 453 * on these sockets. As we have the right policy on the conn, we don't 454 * have to lookup global policy for every outbound and inbound datagram 455 * and thus serving as an optimization. Note that a global policy change 456 * does not affect fully connected sockets if they have policy. If fully 457 * connected sockets did not have any policy associated with it, global 458 * policy change may affect them. 459 * 460 * IP Flow control notes: 461 * 462 * Non-TCP streams are flow controlled by IP. On the send side, if the packet 463 * cannot be sent down to the driver by IP, because of a canput failure, IP 464 * does a putq on the conn_wq. This will cause ip_wsrv to run on the conn_wq. 465 * ip_wsrv in turn, inserts the conn in a list of conn's that need to be drained 466 * when the flowcontrol condition subsides. Ultimately STREAMS backenables the 467 * ip_wsrv on the IP module, which in turn does a qenable of the conn_wq of the 468 * first conn in the list of conn's to be drained. ip_wsrv on this conn drains 469 * the queued messages, and removes the conn from the drain list, if all 470 * messages were drained. It also qenables the next conn in the drain list to 471 * continue the drain process. 472 * 473 * In reality the drain list is not a single list, but a configurable number 474 * of lists. The ip_wsrv on the IP module, qenables the first conn in each 475 * list. If the ip_wsrv of the next qenabled conn does not run, because the 476 * stream closes, ip_close takes responsibility to qenable the next conn in 477 * the drain list. The directly called ip_wput path always does a putq, if 478 * it cannot putnext. Thus synchronization problems are handled between 479 * ip_wsrv and ip_close. conn_drain_insert and conn_drain_tail are the only 480 * functions that manipulate this drain list. Furthermore conn_drain_insert 481 * is called only from ip_wsrv, and there can be only 1 instance of ip_wsrv 482 * running on a queue at any time. conn_drain_tail can be simultaneously called 483 * from both ip_wsrv and ip_close. 484 * 485 * IPQOS notes: 486 * 487 * IPQoS Policies are applied to packets using IPPF (IP Policy framework) 488 * and IPQoS modules. IPPF includes hooks in IP at different control points 489 * (callout positions) which direct packets to IPQoS modules for policy 490 * processing. Policies, if present, are global. 491 * 492 * The callout positions are located in the following paths: 493 * o local_in (packets destined for this host) 494 * o local_out (packets orginating from this host ) 495 * o fwd_in (packets forwarded by this m/c - inbound) 496 * o fwd_out (packets forwarded by this m/c - outbound) 497 * Hooks at these callout points can be enabled/disabled using the ndd variable 498 * ip_policy_mask (a bit mask with the 4 LSB indicating the callout positions). 499 * By default all the callout positions are enabled. 500 * 501 * Outbound (local_out) 502 * Hooks are placed in ip_wput_ire and ipsec_out_process. 503 * 504 * Inbound (local_in) 505 * Hooks are placed in ip_proto_input, icmp_inbound, ip_fanout_proto and 506 * TCP and UDP fanout routines. 507 * 508 * Forwarding (in and out) 509 * Hooks are placed in ip_rput_forward. 510 * 511 * IP Policy Framework processing (IPPF processing) 512 * Policy processing for a packet is initiated by ip_process, which ascertains 513 * that the classifier (ipgpc) is loaded and configured, failing which the 514 * packet resumes normal processing in IP. If the clasifier is present, the 515 * packet is acted upon by one or more IPQoS modules (action instances), per 516 * filters configured in ipgpc and resumes normal IP processing thereafter. 517 * An action instance can drop a packet in course of its processing. 518 * 519 * A boolean variable, ip_policy, is used in all the fanout routines that can 520 * invoke ip_process for a packet. This variable indicates if the packet should 521 * to be sent for policy processing. The variable is set to B_TRUE by default, 522 * i.e. when the routines are invoked in the normal ip procesing path for a 523 * packet. The two exceptions being ip_wput_local and icmp_inbound_error_fanout; 524 * ip_policy is set to B_FALSE for all the routines called in these two 525 * functions because, in the former case, we don't process loopback traffic 526 * currently while in the latter, the packets have already been processed in 527 * icmp_inbound. 528 * 529 * Zones notes: 530 * 531 * The partitioning rules for networking are as follows: 532 * 1) Packets coming from a zone must have a source address belonging to that 533 * zone. 534 * 2) Packets coming from a zone can only be sent on a physical interface on 535 * which the zone has an IP address. 536 * 3) Between two zones on the same machine, packet delivery is only allowed if 537 * there's a matching route for the destination and zone in the forwarding 538 * table. 539 * 4) The TCP and UDP port spaces are per-zone; that is, two processes in 540 * different zones can bind to the same port with the wildcard address 541 * (INADDR_ANY). 542 * 543 * The granularity of interface partitioning is at the logical interface level. 544 * Therefore, every zone has its own IP addresses, and incoming packets can be 545 * attributed to a zone unambiguously. A logical interface is placed into a zone 546 * using the SIOCSLIFZONE ioctl; this sets the ipif_zoneid field in the ipif_t 547 * structure. Rule (1) is implemented by modifying the source address selection 548 * algorithm so that the list of eligible addresses is filtered based on the 549 * sending process zone. 550 * 551 * The Internet Routing Entries (IREs) are either exclusive to a zone or shared 552 * across all zones, depending on their type. Here is the break-up: 553 * 554 * IRE type Shared/exclusive 555 * -------- ---------------- 556 * IRE_BROADCAST Exclusive 557 * IRE_DEFAULT (default routes) Shared (*) 558 * IRE_LOCAL Exclusive (x) 559 * IRE_LOOPBACK Exclusive 560 * IRE_PREFIX (net routes) Shared (*) 561 * IRE_CACHE Exclusive 562 * IRE_IF_NORESOLVER (interface routes) Exclusive 563 * IRE_IF_RESOLVER (interface routes) Exclusive 564 * IRE_HOST (host routes) Shared (*) 565 * 566 * (*) A zone can only use a default or off-subnet route if the gateway is 567 * directly reachable from the zone, that is, if the gateway's address matches 568 * one of the zone's logical interfaces. 569 * 570 * (x) IRE_LOCAL are handled a bit differently, since for all other entries 571 * in ire_ctable and IRE_INTERFACE, ire_src_addr is what can be used as source 572 * when sending packets using the IRE. For IRE_LOCAL ire_src_addr is the IP 573 * address of the zone itself (the destination). Since IRE_LOCAL is used 574 * for communication between zones, ip_wput_ire has special logic to set 575 * the right source address when sending using an IRE_LOCAL. 576 * 577 * Furthermore, when ip_restrict_interzone_loopback is set (the default), 578 * ire_cache_lookup restricts loopback using an IRE_LOCAL 579 * between zone to the case when L2 would have conceptually looped the packet 580 * back, i.e. the loopback which is required since neither Ethernet drivers 581 * nor Ethernet hardware loops them back. This is the case when the normal 582 * routes (ignoring IREs with different zoneids) would send out the packet on 583 * the same ill (or ill group) as the ill with which is IRE_LOCAL is 584 * associated. 585 * 586 * Multiple zones can share a common broadcast address; typically all zones 587 * share the 255.255.255.255 address. Incoming as well as locally originated 588 * broadcast packets must be dispatched to all the zones on the broadcast 589 * network. For directed broadcasts (e.g. 10.16.72.255) this is not trivial 590 * since some zones may not be on the 10.16.72/24 network. To handle this, each 591 * zone has its own set of IRE_BROADCAST entries; then, broadcast packets are 592 * sent to every zone that has an IRE_BROADCAST entry for the destination 593 * address on the input ill, see conn_wantpacket(). 594 * 595 * Applications in different zones can join the same multicast group address. 596 * For IPv4, group memberships are per-logical interface, so they're already 597 * inherently part of a zone. For IPv6, group memberships are per-physical 598 * interface, so we distinguish IPv6 group memberships based on group address, 599 * interface and zoneid. In both cases, received multicast packets are sent to 600 * every zone for which a group membership entry exists. On IPv6 we need to 601 * check that the target zone still has an address on the receiving physical 602 * interface; it could have been removed since the application issued the 603 * IPV6_JOIN_GROUP. 604 */ 605 606 /* 607 * Squeue Fanout flags: 608 * 0: No fanout. 609 * 1: Fanout across all squeues 610 */ 611 boolean_t ip_squeue_fanout = 0; 612 613 /* 614 * Maximum dups allowed per packet. 615 */ 616 uint_t ip_max_frag_dups = 10; 617 618 #define IS_SIMPLE_IPH(ipha) \ 619 ((ipha)->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION) 620 621 /* RFC1122 Conformance */ 622 #define IP_FORWARD_DEFAULT IP_FORWARD_NEVER 623 624 #define ILL_MAX_NAMELEN LIFNAMSIZ 625 626 static int conn_set_held_ipif(conn_t *, ipif_t **, ipif_t *); 627 628 static int ip_open(queue_t *q, dev_t *devp, int flag, int sflag, 629 cred_t *credp, boolean_t isv6); 630 static mblk_t *ip_wput_attach_llhdr(mblk_t *, ire_t *, ip_proc_t, uint32_t, 631 ipha_t **); 632 633 static void icmp_frag_needed(queue_t *, mblk_t *, int, zoneid_t, 634 ip_stack_t *); 635 static void icmp_inbound(queue_t *, mblk_t *, boolean_t, ill_t *, int, 636 uint32_t, boolean_t, boolean_t, ill_t *, zoneid_t); 637 static ipaddr_t icmp_get_nexthop_addr(ipha_t *, ill_t *, zoneid_t, mblk_t *mp); 638 static boolean_t icmp_inbound_too_big(icmph_t *, ipha_t *, ill_t *, zoneid_t, 639 mblk_t *, int, ip_stack_t *); 640 static void icmp_inbound_error_fanout(queue_t *, ill_t *, mblk_t *, 641 icmph_t *, ipha_t *, int, int, boolean_t, boolean_t, 642 ill_t *, zoneid_t); 643 static void icmp_options_update(ipha_t *); 644 static void icmp_param_problem(queue_t *, mblk_t *, uint8_t, zoneid_t, 645 ip_stack_t *); 646 static void icmp_pkt(queue_t *, mblk_t *, void *, size_t, boolean_t, 647 zoneid_t zoneid, ip_stack_t *); 648 static mblk_t *icmp_pkt_err_ok(mblk_t *, ip_stack_t *); 649 static void icmp_redirect(ill_t *, mblk_t *); 650 static void icmp_send_redirect(queue_t *, mblk_t *, ipaddr_t, 651 ip_stack_t *); 652 653 static void ip_arp_news(queue_t *, mblk_t *); 654 static boolean_t ip_bind_insert_ire(mblk_t *, ire_t *, iulp_t *, 655 ip_stack_t *); 656 mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); 657 char *ip_dot_addr(ipaddr_t, char *); 658 mblk_t *ip_carve_mp(mblk_t **, ssize_t); 659 int ip_close(queue_t *, int); 660 static char *ip_dot_saddr(uchar_t *, char *); 661 static void ip_fanout_proto(queue_t *, mblk_t *, ill_t *, ipha_t *, uint_t, 662 boolean_t, boolean_t, ill_t *, zoneid_t); 663 static void ip_fanout_tcp(queue_t *, mblk_t *, ill_t *, ipha_t *, uint_t, 664 boolean_t, boolean_t, zoneid_t); 665 static void ip_fanout_udp(queue_t *, mblk_t *, ill_t *, ipha_t *, uint32_t, 666 boolean_t, uint_t, boolean_t, boolean_t, ill_t *, zoneid_t); 667 static void ip_lrput(queue_t *, mblk_t *); 668 ipaddr_t ip_net_mask(ipaddr_t); 669 void ip_newroute(queue_t *, mblk_t *, ipaddr_t, conn_t *, zoneid_t, 670 ip_stack_t *); 671 static void ip_newroute_ipif(queue_t *, mblk_t *, ipif_t *, ipaddr_t, 672 conn_t *, uint32_t, zoneid_t, ip_opt_info_t *); 673 char *ip_nv_lookup(nv_t *, int); 674 static boolean_t ip_check_for_ipsec_opt(queue_t *, mblk_t *); 675 static int ip_param_get(queue_t *, mblk_t *, caddr_t, cred_t *); 676 static int ip_param_generic_get(queue_t *, mblk_t *, caddr_t, cred_t *); 677 static boolean_t ip_param_register(IDP *ndp, ipparam_t *, size_t, 678 ipndp_t *, size_t); 679 static int ip_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *); 680 void ip_rput(queue_t *, mblk_t *); 681 static void ip_rput_dlpi_writer(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, 682 void *dummy_arg); 683 void ip_rput_forward(ire_t *, ipha_t *, mblk_t *, ill_t *); 684 static int ip_rput_forward_options(mblk_t *, ipha_t *, ire_t *, 685 ip_stack_t *); 686 static boolean_t ip_rput_local_options(queue_t *, mblk_t *, ipha_t *, 687 ire_t *, ip_stack_t *); 688 static boolean_t ip_rput_multimblk_ipoptions(queue_t *, ill_t *, 689 mblk_t *, ipha_t **, ipaddr_t *, ip_stack_t *); 690 static int ip_rput_options(queue_t *, mblk_t *, ipha_t *, ipaddr_t *, 691 ip_stack_t *); 692 static boolean_t ip_rput_fragment(queue_t *, mblk_t **, ipha_t *, uint32_t *, 693 uint16_t *); 694 int ip_snmp_get(queue_t *, mblk_t *, int); 695 static mblk_t *ip_snmp_get_mib2_ip(queue_t *, mblk_t *, 696 mib2_ipIfStatsEntry_t *, ip_stack_t *); 697 static mblk_t *ip_snmp_get_mib2_ip_traffic_stats(queue_t *, mblk_t *, 698 ip_stack_t *); 699 static mblk_t *ip_snmp_get_mib2_ip6(queue_t *, mblk_t *, ip_stack_t *); 700 static mblk_t *ip_snmp_get_mib2_icmp(queue_t *, mblk_t *, ip_stack_t *ipst); 701 static mblk_t *ip_snmp_get_mib2_icmp6(queue_t *, mblk_t *, ip_stack_t *ipst); 702 static mblk_t *ip_snmp_get_mib2_igmp(queue_t *, mblk_t *, ip_stack_t *ipst); 703 static mblk_t *ip_snmp_get_mib2_multi(queue_t *, mblk_t *, ip_stack_t *ipst); 704 static mblk_t *ip_snmp_get_mib2_ip_addr(queue_t *, mblk_t *, 705 ip_stack_t *ipst); 706 static mblk_t *ip_snmp_get_mib2_ip6_addr(queue_t *, mblk_t *, 707 ip_stack_t *ipst); 708 static mblk_t *ip_snmp_get_mib2_ip_group_src(queue_t *, mblk_t *, 709 ip_stack_t *ipst); 710 static mblk_t *ip_snmp_get_mib2_ip6_group_src(queue_t *, mblk_t *, 711 ip_stack_t *ipst); 712 static mblk_t *ip_snmp_get_mib2_ip_group_mem(queue_t *, mblk_t *, 713 ip_stack_t *ipst); 714 static mblk_t *ip_snmp_get_mib2_ip6_group_mem(queue_t *, mblk_t *, 715 ip_stack_t *ipst); 716 static mblk_t *ip_snmp_get_mib2_virt_multi(queue_t *, mblk_t *, 717 ip_stack_t *ipst); 718 static mblk_t *ip_snmp_get_mib2_multi_rtable(queue_t *, mblk_t *, 719 ip_stack_t *ipst); 720 static mblk_t *ip_snmp_get_mib2_ip_route_media(queue_t *, mblk_t *, 721 ip_stack_t *ipst); 722 static mblk_t *ip_snmp_get_mib2_ip6_route_media(queue_t *, mblk_t *, 723 ip_stack_t *ipst); 724 static void ip_snmp_get2_v4(ire_t *, iproutedata_t *); 725 static void ip_snmp_get2_v6_route(ire_t *, iproutedata_t *); 726 static int ip_snmp_get2_v6_media(nce_t *, iproutedata_t *); 727 int ip_snmp_set(queue_t *, int, int, uchar_t *, int); 728 static boolean_t ip_source_routed(ipha_t *, ip_stack_t *); 729 static boolean_t ip_source_route_included(ipha_t *); 730 static void ip_trash_ire_reclaim_stack(ip_stack_t *); 731 732 static void ip_wput_frag(ire_t *, mblk_t *, ip_pkt_t, uint32_t, uint32_t, 733 zoneid_t, ip_stack_t *); 734 static mblk_t *ip_wput_frag_copyhdr(uchar_t *, int, int, ip_stack_t *); 735 static void ip_wput_local_options(ipha_t *, ip_stack_t *); 736 static int ip_wput_options(queue_t *, mblk_t *, ipha_t *, boolean_t, 737 zoneid_t, ip_stack_t *); 738 739 static void conn_drain_init(ip_stack_t *); 740 static void conn_drain_fini(ip_stack_t *); 741 static void conn_drain_tail(conn_t *connp, boolean_t closing); 742 743 static void conn_walk_drain(ip_stack_t *); 744 static void conn_walk_fanout_table(connf_t *, uint_t, pfv_t, void *, 745 zoneid_t); 746 747 static void *ip_stack_init(netstackid_t stackid, netstack_t *ns); 748 static void ip_stack_shutdown(netstackid_t stackid, void *arg); 749 static void ip_stack_fini(netstackid_t stackid, void *arg); 750 751 static boolean_t conn_wantpacket(conn_t *, ill_t *, ipha_t *, int, 752 zoneid_t); 753 static void ip_arp_done(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, 754 void *dummy_arg); 755 756 static int ip_forward_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *); 757 758 static int ip_multirt_apply_membership(int (*fn)(conn_t *, boolean_t, 759 ipaddr_t, ipaddr_t, uint_t *, mcast_record_t, ipaddr_t, mblk_t *), ire_t *, 760 conn_t *, boolean_t, ipaddr_t, mcast_record_t, ipaddr_t, mblk_t *); 761 static void ip_multirt_bad_mtu(ire_t *, uint32_t); 762 763 static int ip_cgtp_filter_get(queue_t *, mblk_t *, caddr_t, cred_t *); 764 static int ip_cgtp_filter_set(queue_t *, mblk_t *, char *, 765 caddr_t, cred_t *); 766 extern int ip_squeue_bind_set(queue_t *q, mblk_t *mp, char *value, 767 caddr_t cp, cred_t *cr); 768 extern int ip_squeue_profile_set(queue_t *, mblk_t *, char *, caddr_t, 769 cred_t *); 770 static int ip_input_proc_set(queue_t *q, mblk_t *mp, char *value, 771 caddr_t cp, cred_t *cr); 772 static int ip_int_set(queue_t *, mblk_t *, char *, caddr_t, 773 cred_t *); 774 static int ipmp_hook_emulation_set(queue_t *, mblk_t *, char *, caddr_t, 775 cred_t *); 776 static squeue_func_t ip_squeue_switch(int); 777 778 static void *ip_kstat_init(netstackid_t, ip_stack_t *); 779 static void ip_kstat_fini(netstackid_t, kstat_t *); 780 static int ip_kstat_update(kstat_t *kp, int rw); 781 static void *icmp_kstat_init(netstackid_t); 782 static void icmp_kstat_fini(netstackid_t, kstat_t *); 783 static int icmp_kstat_update(kstat_t *kp, int rw); 784 static void *ip_kstat2_init(netstackid_t, ip_stat_t *); 785 static void ip_kstat2_fini(netstackid_t, kstat_t *); 786 787 static int ip_conn_report(queue_t *, mblk_t *, caddr_t, cred_t *); 788 789 static mblk_t *ip_tcp_input(mblk_t *, ipha_t *, ill_t *, boolean_t, 790 ire_t *, mblk_t *, uint_t, queue_t *, ill_rx_ring_t *); 791 792 static void ip_rput_process_forward(queue_t *, mblk_t *, ire_t *, 793 ipha_t *, ill_t *, boolean_t); 794 ipaddr_t ip_g_all_ones = IP_HOST_MASK; 795 796 /* How long, in seconds, we allow frags to hang around. */ 797 #define IP_FRAG_TIMEOUT 60 798 799 /* 800 * Threshold which determines whether MDT should be used when 801 * generating IP fragments; payload size must be greater than 802 * this threshold for MDT to take place. 803 */ 804 #define IP_WPUT_FRAG_MDT_MIN 32768 805 806 /* Setable in /etc/system only */ 807 int ip_wput_frag_mdt_min = IP_WPUT_FRAG_MDT_MIN; 808 809 static long ip_rput_pullups; 810 int dohwcksum = 1; /* use h/w cksum if supported by the hardware */ 811 812 vmem_t *ip_minor_arena_sa; /* for minor nos. from INET_MIN_DEV+2 thru 2^^18-1 */ 813 vmem_t *ip_minor_arena_la; /* for minor nos. from 2^^18 thru 2^^32-1 */ 814 815 int ip_debug; 816 817 #ifdef DEBUG 818 uint32_t ipsechw_debug = 0; 819 #endif 820 821 /* 822 * Multirouting/CGTP stuff 823 */ 824 int ip_cgtp_filter_rev = CGTP_FILTER_REV; /* CGTP hooks version */ 825 826 /* 827 * XXX following really should only be in a header. Would need more 828 * header and .c clean up first. 829 */ 830 extern optdb_obj_t ip_opt_obj; 831 832 ulong_t ip_squeue_enter_unbound = 0; 833 834 /* 835 * Named Dispatch Parameter Table. 836 * All of these are alterable, within the min/max values given, at run time. 837 */ 838 static ipparam_t lcl_param_arr[] = { 839 /* min max value name */ 840 { 0, 1, 0, "ip_respond_to_address_mask_broadcast"}, 841 { 0, 1, 1, "ip_respond_to_echo_broadcast"}, 842 { 0, 1, 1, "ip_respond_to_echo_multicast"}, 843 { 0, 1, 0, "ip_respond_to_timestamp"}, 844 { 0, 1, 0, "ip_respond_to_timestamp_broadcast"}, 845 { 0, 1, 1, "ip_send_redirects"}, 846 { 0, 1, 0, "ip_forward_directed_broadcasts"}, 847 { 0, 10, 0, "ip_mrtdebug"}, 848 { 5000, 999999999, 60000, "ip_ire_timer_interval" }, 849 { 60000, 999999999, 1200000, "ip_ire_arp_interval" }, 850 { 60000, 999999999, 60000, "ip_ire_redirect_interval" }, 851 { 1, 255, 255, "ip_def_ttl" }, 852 { 0, 1, 0, "ip_forward_src_routed"}, 853 { 0, 256, 32, "ip_wroff_extra" }, 854 { 5000, 999999999, 600000, "ip_ire_pathmtu_interval" }, 855 { 8, 65536, 64, "ip_icmp_return_data_bytes" }, 856 { 0, 1, 1, "ip_path_mtu_discovery" }, 857 { 0, 240, 30, "ip_ignore_delete_time" }, 858 { 0, 1, 0, "ip_ignore_redirect" }, 859 { 0, 1, 1, "ip_output_queue" }, 860 { 1, 254, 1, "ip_broadcast_ttl" }, 861 { 0, 99999, 100, "ip_icmp_err_interval" }, 862 { 1, 99999, 10, "ip_icmp_err_burst" }, 863 { 0, 999999999, 1000000, "ip_reass_queue_bytes" }, 864 { 0, 1, 0, "ip_strict_dst_multihoming" }, 865 { 1, MAX_ADDRS_PER_IF, 256, "ip_addrs_per_if"}, 866 { 0, 1, 0, "ipsec_override_persocket_policy" }, 867 { 0, 1, 1, "icmp_accept_clear_messages" }, 868 { 0, 1, 1, "igmp_accept_clear_messages" }, 869 { 2, 999999999, ND_DELAY_FIRST_PROBE_TIME, 870 "ip_ndp_delay_first_probe_time"}, 871 { 1, 999999999, ND_MAX_UNICAST_SOLICIT, 872 "ip_ndp_max_unicast_solicit"}, 873 { 1, 255, IPV6_MAX_HOPS, "ip6_def_hops" }, 874 { 8, IPV6_MIN_MTU, IPV6_MIN_MTU, "ip6_icmp_return_data_bytes" }, 875 { 0, 1, 0, "ip6_forward_src_routed"}, 876 { 0, 1, 1, "ip6_respond_to_echo_multicast"}, 877 { 0, 1, 1, "ip6_send_redirects"}, 878 { 0, 1, 0, "ip6_ignore_redirect" }, 879 { 0, 1, 0, "ip6_strict_dst_multihoming" }, 880 881 { 1, 8, 3, "ip_ire_reclaim_fraction" }, 882 883 { 0, 999999, 1000, "ipsec_policy_log_interval" }, 884 885 { 0, 1, 1, "pim_accept_clear_messages" }, 886 { 1000, 20000, 2000, "ip_ndp_unsolicit_interval" }, 887 { 1, 20, 3, "ip_ndp_unsolicit_count" }, 888 { 0, 1, 1, "ip6_ignore_home_address_opt" }, 889 { 0, 15, 0, "ip_policy_mask" }, 890 { 1000, 60000, 1000, "ip_multirt_resolution_interval" }, 891 { 0, 255, 1, "ip_multirt_ttl" }, 892 { 0, 1, 1, "ip_multidata_outbound" }, 893 { 0, 3600000, 300000, "ip_ndp_defense_interval" }, 894 { 0, 999999, 60*60*24, "ip_max_temp_idle" }, 895 { 0, 1000, 1, "ip_max_temp_defend" }, 896 { 0, 1000, 3, "ip_max_defend" }, 897 { 0, 999999, 30, "ip_defend_interval" }, 898 { 0, 3600000, 300000, "ip_dup_recovery" }, 899 { 0, 1, 1, "ip_restrict_interzone_loopback" }, 900 { 0, 1, 1, "ip_lso_outbound" }, 901 { IGMP_V1_ROUTER, IGMP_V3_ROUTER, IGMP_V3_ROUTER, "igmp_max_version" }, 902 { MLD_V1_ROUTER, MLD_V2_ROUTER, MLD_V2_ROUTER, "mld_max_version" }, 903 #ifdef DEBUG 904 { 0, 1, 0, "ip6_drop_inbound_icmpv6" }, 905 #else 906 { 0, 0, 0, "" }, 907 #endif 908 }; 909 910 /* 911 * Extended NDP table 912 * The addresses for the first two are filled in to be ips_ip_g_forward 913 * and ips_ipv6_forward at init time. 914 */ 915 static ipndp_t lcl_ndp_arr[] = { 916 /* getf setf data name */ 917 #define IPNDP_IP_FORWARDING_OFFSET 0 918 { ip_param_generic_get, ip_forward_set, NULL, 919 "ip_forwarding" }, 920 #define IPNDP_IP6_FORWARDING_OFFSET 1 921 { ip_param_generic_get, ip_forward_set, NULL, 922 "ip6_forwarding" }, 923 { ip_ill_report, NULL, NULL, 924 "ip_ill_status" }, 925 { ip_ipif_report, NULL, NULL, 926 "ip_ipif_status" }, 927 { ip_conn_report, NULL, NULL, 928 "ip_conn_status" }, 929 { nd_get_long, nd_set_long, (caddr_t)&ip_rput_pullups, 930 "ip_rput_pullups" }, 931 { ip_srcid_report, NULL, NULL, 932 "ip_srcid_status" }, 933 { ip_param_generic_get, ip_squeue_profile_set, 934 (caddr_t)&ip_squeue_profile, "ip_squeue_profile" }, 935 { ip_param_generic_get, ip_squeue_bind_set, 936 (caddr_t)&ip_squeue_bind, "ip_squeue_bind" }, 937 { ip_param_generic_get, ip_input_proc_set, 938 (caddr_t)&ip_squeue_enter, "ip_squeue_enter" }, 939 { ip_param_generic_get, ip_int_set, 940 (caddr_t)&ip_squeue_fanout, "ip_squeue_fanout" }, 941 #define IPNDP_CGTP_FILTER_OFFSET 11 942 { ip_cgtp_filter_get, ip_cgtp_filter_set, NULL, 943 "ip_cgtp_filter" }, 944 { ip_param_generic_get, ip_int_set, 945 (caddr_t)&ip_soft_rings_cnt, "ip_soft_rings_cnt" }, 946 #define IPNDP_IPMP_HOOK_OFFSET 13 947 { ip_param_generic_get, ipmp_hook_emulation_set, NULL, 948 "ipmp_hook_emulation" }, 949 { ip_param_generic_get, ip_int_set, (caddr_t)&ip_debug, 950 "ip_debug" }, 951 }; 952 953 /* 954 * Table of IP ioctls encoding the various properties of the ioctl and 955 * indexed based on the last byte of the ioctl command. Occasionally there 956 * is a clash, and there is more than 1 ioctl with the same last byte. 957 * In such a case 1 ioctl is encoded in the ndx table and the remaining 958 * ioctls are encoded in the misc table. An entry in the ndx table is 959 * retrieved by indexing on the last byte of the ioctl command and comparing 960 * the ioctl command with the value in the ndx table. In the event of a 961 * mismatch the misc table is then searched sequentially for the desired 962 * ioctl command. 963 * 964 * Entry: <command> <copyin_size> <flags> <cmd_type> <function> <restart_func> 965 */ 966 ip_ioctl_cmd_t ip_ndx_ioctl_table[] = { 967 /* 000 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 968 /* 001 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 969 /* 002 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 970 /* 003 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 971 /* 004 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 972 /* 005 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 973 /* 006 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 974 /* 007 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 975 /* 008 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 976 /* 009 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, 977 978 /* 010 */ { SIOCADDRT, sizeof (struct rtentry), IPI_PRIV, 979 MISC_CMD, ip_siocaddrt, NULL }, 980 /* 011 */ { SIOCDELRT, sizeof (struct rtentry), IPI_PRIV, 981 MISC_CMD, ip_siocdelrt, NULL }, 982 983 /* 012 */ { SIOCSIFADDR, sizeof (struct ifreq), IPI_PRIV | IPI_WR, 984 IF_CMD, ip_sioctl_addr, ip_sioctl_addr_restart }, 985 /* 013 */ { SIOCGIFADDR, sizeof (struct ifreq), IPI_GET_CMD | IPI_REPL, 986 IF_CMD, ip_sioctl_get_addr, NULL }, 987 988 /* 014 */ { SIOCSIFDSTADDR, sizeof (struct ifreq), IPI_PRIV | IPI_WR, 989 IF_CMD, ip_sioctl_dstaddr, ip_sioctl_dstaddr_restart }, 990 /* 015 */ { SIOCGIFDSTADDR, sizeof (struct ifreq), 991 IPI_GET_CMD | IPI_REPL, 992 IF_CMD, ip_sioctl_get_dstaddr, NULL }, 993 994 /* 016 */ { SIOCSIFFLAGS, sizeof (struct ifreq), 995 IPI_PRIV | IPI_WR |