1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1503 ericheng * Common Development and Distribution License (the "License"). 6 1503 ericheng * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 8485 Peter * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel 26 0 stevel /* 27 0 stevel * IP PACKET CLASSIFIER 28 0 stevel * 29 0 stevel * The IP packet classifier provides mapping between IP packets and persistent 30 0 stevel * connection state for connection-oriented protocols. It also provides 31 0 stevel * interface for managing connection states. 32 0 stevel * 33 0 stevel * The connection state is kept in conn_t data structure and contains, among 34 0 stevel * other things: 35 0 stevel * 36 0 stevel * o local/remote address and ports 37 0 stevel * o Transport protocol 38 0 stevel * o squeue for the connection (for TCP only) 39 0 stevel * o reference counter 40 0 stevel * o Connection state 41 0 stevel * o hash table linkage 42 0 stevel * o interface/ire information 43 0 stevel * o credentials 44 0 stevel * o ipsec policy 45 0 stevel * o send and receive functions. 46 0 stevel * o mutex lock. 47 0 stevel * 48 0 stevel * Connections use a reference counting scheme. They are freed when the 49 0 stevel * reference counter drops to zero. A reference is incremented when connection 50 0 stevel * is placed in a list or table, when incoming packet for the connection arrives 51 0 stevel * and when connection is processed via squeue (squeue processing may be 52 0 stevel * asynchronous and the reference protects the connection from being destroyed 53 0 stevel * before its processing is finished). 54 0 stevel * 55 11042 Erik * conn_recv is used to pass up packets to the ULP. 56 11042 Erik * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for 57 11042 Erik * a listener, and changes to tcp_input_listener as the listener has picked a 58 11042 Erik * good squeue. For other cases it is set to tcp_input_data. 59 11042 Erik * 60 11042 Erik * conn_recvicmp is used to pass up ICMP errors to the ULP. 61 0 stevel * 62 0 stevel * Classifier uses several hash tables: 63 0 stevel * 64 0 stevel * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 65 0 stevel * ipcl_bind_fanout: contains all connections in BOUND state 66 0 stevel * ipcl_proto_fanout: IPv4 protocol fanout 67 0 stevel * ipcl_proto_fanout_v6: IPv6 protocol fanout 68 0 stevel * ipcl_udp_fanout: contains all UDP connections 69 10616 Sebastien * ipcl_iptun_fanout: contains all IP tunnel connections 70 0 stevel * ipcl_globalhash_fanout: contains all connections 71 0 stevel * 72 0 stevel * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 73 0 stevel * which need to view all existing connections. 74 0 stevel * 75 0 stevel * All tables are protected by per-bucket locks. When both per-bucket lock and 76 0 stevel * connection lock need to be held, the per-bucket lock should be acquired 77 0 stevel * first, followed by the connection lock. 78 0 stevel * 79 0 stevel * All functions doing search in one of these tables increment a reference 80 0 stevel * counter on the connection found (if any). This reference should be dropped 81 0 stevel * when the caller has finished processing the connection. 82 0 stevel * 83 0 stevel * 84 0 stevel * INTERFACES: 85 0 stevel * =========== 86 0 stevel * 87 0 stevel * Connection Lookup: 88 0 stevel * ------------------ 89 0 stevel * 90 11042 Erik * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack) 91 11042 Erik * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack) 92 0 stevel * 93 0 stevel * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 94 0 stevel * it can't find any associated connection. If the connection is found, its 95 0 stevel * reference counter is incremented. 96 0 stevel * 97 0 stevel * mp: mblock, containing packet header. The full header should fit 98 0 stevel * into a single mblock. It should also contain at least full IP 99 0 stevel * and TCP or UDP header. 100 0 stevel * 101 0 stevel * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 102 0 stevel * 103 0 stevel * hdr_len: The size of IP header. It is used to find TCP or UDP header in 104 0 stevel * the packet. 105 0 stevel * 106 11042 Erik * ira->ira_zoneid: The zone in which the returned connection must be; the 107 11042 Erik * zoneid corresponding to the ire_zoneid on the IRE located for 108 11042 Erik * the packet's destination address. 109 11042 Erik * 110 11042 Erik * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and 111 11042 Erik * IRAF_TX_SHARED_ADDR flags 112 0 stevel * 113 0 stevel * For TCP connections, the lookup order is as follows: 114 0 stevel * 5-tuple {src, dst, protocol, local port, remote port} 115 0 stevel * lookup in ipcl_conn_fanout table. 116 0 stevel * 3-tuple {dst, remote port, protocol} lookup in 117 0 stevel * ipcl_bind_fanout table. 118 0 stevel * 119 0 stevel * For UDP connections, a 5-tuple {src, dst, protocol, local port, 120 0 stevel * remote port} lookup is done on ipcl_udp_fanout. Note that, 121 0 stevel * these interfaces do not handle cases where a packets belongs 122 0 stevel * to multiple UDP clients, which is handled in IP itself. 123 1676 jpk * 124 1676 jpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 125 1676 jpk * determine which actual zone gets the segment. This is used only in a 126 1676 jpk * labeled environment. The matching rules are: 127 1676 jpk * 128 1676 jpk * - If it's not a multilevel port, then the label on the packet selects 129 1676 jpk * the zone. Unlabeled packets are delivered to the global zone. 130 1676 jpk * 131 1676 jpk * - If it's a multilevel port, then only the zone registered to receive 132 1676 jpk * packets on that port matches. 133 1676 jpk * 134 1676 jpk * Also, in a labeled environment, packet labels need to be checked. For fully 135 1676 jpk * bound TCP connections, we can assume that the packet label was checked 136 1676 jpk * during connection establishment, and doesn't need to be checked on each 137 1676 jpk * packet. For others, though, we need to check for strict equality or, for 138 1676 jpk * multilevel ports, membership in the range or set. This part currently does 139 1676 jpk * a tnrh lookup on each packet, but could be optimized to use cached results 140 1676 jpk * if that were necessary. (SCTP doesn't come through here, but if it did, 141 1676 jpk * we would apply the same rules as TCP.) 142 1676 jpk * 143 1676 jpk * An implication of the above is that fully-bound TCP sockets must always use 144 1676 jpk * distinct 4-tuples; they can't be discriminated by label alone. 145 1676 jpk * 146 1676 jpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 147 1676 jpk * as there's no connection set-up handshake and no shared state. 148 1676 jpk * 149 1676 jpk * Labels on looped-back packets within a single zone do not need to be 150 1676 jpk * checked, as all processes in the same zone have the same label. 151 1676 jpk * 152 1676 jpk * Finally, for unlabeled packets received by a labeled system, special rules 153 1676 jpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 154 1676 jpk * socket in the zone whose label matches the default label of the sender, if 155 1676 jpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 156 1676 jpk * receiver's label must dominate the sender's default label. 157 0 stevel * 158 11042 Erik * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack); 159 3448 dh155122 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 160 3448 dh155122 * ip_stack); 161 0 stevel * 162 0 stevel * Lookup routine to find a exact match for {src, dst, local port, 163 0 stevel * remote port) for TCP connections in ipcl_conn_fanout. The address and 164 0 stevel * ports are read from the IP and TCP header respectively. 165 0 stevel * 166 3448 dh155122 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 167 3448 dh155122 * zoneid, ip_stack); 168 3448 dh155122 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 169 3448 dh155122 * zoneid, ip_stack); 170 0 stevel * 171 0 stevel * Lookup routine to find a listener with the tuple {lport, laddr, 172 0 stevel * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 173 0 stevel * parameter interface index is also compared. 174 0 stevel * 175 3448 dh155122 * void ipcl_walk(func, arg, ip_stack) 176 0 stevel * 177 0 stevel * Apply 'func' to every connection available. The 'func' is called as 178 0 stevel * (*func)(connp, arg). The walk is non-atomic so connections may be 179 0 stevel * created and destroyed during the walk. The CONN_CONDEMNED and 180 0 stevel * CONN_INCIPIENT flags ensure that connections which are newly created 181 0 stevel * or being destroyed are not selected by the walker. 182 0 stevel * 183 0 stevel * Table Updates 184 0 stevel * ------------- 185 0 stevel * 186 11042 Erik * int ipcl_conn_insert(connp); 187 11042 Erik * int ipcl_conn_insert_v4(connp); 188 11042 Erik * int ipcl_conn_insert_v6(connp); 189 0 stevel * 190 0 stevel * Insert 'connp' in the ipcl_conn_fanout. 191 0 stevel * Arguements : 192 0 stevel * connp conn_t to be inserted 193 0 stevel * 194 0 stevel * Return value : 195 0 stevel * 0 if connp was inserted 196 0 stevel * EADDRINUSE if the connection with the same tuple 197 0 stevel * already exists. 198 0 stevel * 199 11042 Erik * int ipcl_bind_insert(connp); 200 11042 Erik * int ipcl_bind_insert_v4(connp); 201 11042 Erik * int ipcl_bind_insert_v6(connp); 202 0 stevel * 203 0 stevel * Insert 'connp' in ipcl_bind_fanout. 204 0 stevel * Arguements : 205 0 stevel * connp conn_t to be inserted 206 0 stevel * 207 0 stevel * 208 0 stevel * void ipcl_hash_remove(connp); 209 0 stevel * 210 0 stevel * Removes the 'connp' from the connection fanout table. 211 0 stevel * 212 0 stevel * Connection Creation/Destruction 213 0 stevel * ------------------------------- 214 0 stevel * 215 3448 dh155122 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 216 0 stevel * 217 0 stevel * Creates a new conn based on the type flag, inserts it into 218 0 stevel * globalhash table. 219 0 stevel * 220 0 stevel * type: This flag determines the type of conn_t which needs to be 221 5240 nordmark * created i.e., which kmem_cache it comes from. 222 0 stevel * IPCL_TCPCONN indicates a TCP connection 223 5240 nordmark * IPCL_SCTPCONN indicates a SCTP connection 224 5240 nordmark * IPCL_UDPCONN indicates a UDP conn_t. 225 5240 nordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 226 5240 nordmark * IPCL_RTSCONN indicates a RTS conn_t. 227 5240 nordmark * IPCL_IPCCONN indicates all other connections. 228 0 stevel * 229 0 stevel * void ipcl_conn_destroy(connp) 230 0 stevel * 231 0 stevel * Destroys the connection state, removes it from the global 232 0 stevel * connection hash table and frees its memory. 233 0 stevel */ 234 0 stevel 235 0 stevel #include <sys/types.h> 236 0 stevel #include <sys/stream.h> 237 0 stevel #include <sys/stropts.h> 238 0 stevel #include <sys/sysmacros.h> 239 0 stevel #include <sys/strsubr.h> 240 0 stevel #include <sys/strsun.h> 241 0 stevel #define _SUN_TPI_VERSION 2 242 0 stevel #include <sys/ddi.h> 243 0 stevel #include <sys/cmn_err.h> 244 0 stevel #include <sys/debug.h> 245 0 stevel 246 0 stevel #include <sys/systm.h> 247 0 stevel #include <sys/param.h> 248 0 stevel #include <sys/kmem.h> 249 0 stevel #include <sys/isa_defs.h> 250 0 stevel #include <inet/common.h> 251 0 stevel #include <netinet/ip6.h> 252 0 stevel #include <netinet/icmp6.h> 253 0 stevel 254 0 stevel #include <inet/ip.h> 255 11042 Erik #include <inet/ip_if.h> 256 11042 Erik #include <inet/ip_ire.h> 257 0 stevel #include <inet/ip6.h> 258 0 stevel #include <inet/ip_ndp.h> 259 8348 Eric #include <inet/ip_impl.h> 260 741 masputra #include <inet/udp_impl.h> 261 0 stevel #include <inet/sctp_ip.h> 262 3448 dh155122 #include <inet/sctp/sctp_impl.h> 263 5240 nordmark #include <inet/rawip_impl.h> 264 5240 nordmark #include <inet/rts_impl.h> 265 10616 Sebastien #include <inet/iptun/iptun_impl.h> 266 0 stevel 267 0 stevel #include <sys/cpuvar.h> 268 0 stevel 269 0 stevel #include <inet/ipclassifier.h> 270 8348 Eric #include <inet/tcp.h> 271 0 stevel #include <inet/ipsec_impl.h> 272 1676 jpk 273 1676 jpk #include <sys/tsol/tnet.h> 274 8348 Eric #include <sys/sockio.h> 275 0 stevel 276 3448 dh155122 /* Old value for compatibility. Setable in /etc/system */ 277 0 stevel uint_t tcp_conn_hash_size = 0; 278 0 stevel 279 3448 dh155122 /* New value. Zero means choose automatically. Setable in /etc/system */ 280 0 stevel uint_t ipcl_conn_hash_size = 0; 281 0 stevel uint_t ipcl_conn_hash_memfactor = 8192; 282 0 stevel uint_t ipcl_conn_hash_maxsize = 82500; 283 0 stevel 284 0 stevel /* bind/udp fanout table size */ 285 0 stevel uint_t ipcl_bind_fanout_size = 512; 286 1503 ericheng uint_t ipcl_udp_fanout_size = 16384; 287 0 stevel 288 0 stevel /* Raw socket fanout size. Must be a power of 2. */ 289 0 stevel uint_t ipcl_raw_fanout_size = 256; 290 10616 Sebastien 291 10616 Sebastien /* 292 10616 Sebastien * The IPCL_IPTUN_HASH() function works best with a prime table size. We 293 10616 Sebastien * expect that most large deployments would have hundreds of tunnels, and 294 10616 Sebastien * thousands in the extreme case. 295 10616 Sebastien */ 296 10616 Sebastien uint_t ipcl_iptun_fanout_size = 6143; 297 0 stevel 298 0 stevel /* 299 0 stevel * Power of 2^N Primes useful for hashing for N of 0-28, 300 0 stevel * these primes are the nearest prime <= 2^N - 2^(N-2). 301 0 stevel */ 302 0 stevel 303 0 stevel #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 304 0 stevel 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 305 0 stevel 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 306 0 stevel 50331599, 100663291, 201326557, 0} 307 0 stevel 308 0 stevel /* 309 5240 nordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 310 5240 nordmark * are aligned on cache lines. 311 0 stevel */ 312 5240 nordmark typedef union itc_s { 313 5240 nordmark conn_t itc_conn; 314 5240 nordmark char itcu_filler[CACHE_ALIGN(conn_s)]; 315 0 stevel } itc_t; 316 0 stevel 317 5240 nordmark struct kmem_cache *tcp_conn_cache; 318 5240 nordmark struct kmem_cache *ip_conn_cache; 319 0 stevel extern struct kmem_cache *sctp_conn_cache; 320 0 stevel extern struct kmem_cache *tcp_sack_info_cache; 321 5240 nordmark struct kmem_cache *udp_conn_cache; 322 5240 nordmark struct kmem_cache *rawip_conn_cache; 323 5240 nordmark struct kmem_cache *rts_conn_cache; 324 0 stevel 325 0 stevel extern void tcp_timermp_free(tcp_t *); 326 0 stevel extern mblk_t *tcp_timermp_alloc(int); 327 0 stevel 328 5240 nordmark static int ip_conn_constructor(void *, void *, int); 329 5240 nordmark static void ip_conn_destructor(void *, void *); 330 5240 nordmark 331 5240 nordmark static int tcp_conn_constructor(void *, void *, int); 332 5240 nordmark static void tcp_conn_destructor(void *, void *); 333 5240 nordmark 334 5240 nordmark static int udp_conn_constructor(void *, void *, int); 335 5240 nordmark static void udp_conn_destructor(void *, void *); 336 5240 nordmark 337 5240 nordmark static int rawip_conn_constructor(void *, void *, int); 338 5240 nordmark static void rawip_conn_destructor(void *, void *); 339 5240 nordmark 340 5240 nordmark static int rts_conn_constructor(void *, void *, int); 341 5240 nordmark static void rts_conn_destructor(void *, void *); 342 0 stevel 343 0 stevel /* 344 3448 dh155122 * Global (for all stack instances) init routine 345 0 stevel */ 346 0 stevel void 347 3448 dh155122 ipcl_g_init(void) 348 0 stevel { 349 5240 nordmark ip_conn_cache = kmem_cache_create("ip_conn_cache", 350 0 stevel sizeof (conn_t), CACHE_ALIGN_SIZE, 351 5240 nordmark ip_conn_constructor, ip_conn_destructor, 352 5240 nordmark NULL, NULL, NULL, 0); 353 0 stevel 354 5240 nordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 355 5240 nordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 356 5240 nordmark tcp_conn_constructor, tcp_conn_destructor, 357 5240 nordmark NULL, NULL, NULL, 0); 358 5240 nordmark 359 5240 nordmark udp_conn_cache = kmem_cache_create("udp_conn_cache", 360 5240 nordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 361 5240 nordmark udp_conn_constructor, udp_conn_destructor, 362 5240 nordmark NULL, NULL, NULL, 0); 363 5240 nordmark 364 5240 nordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 365 5240 nordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 366 5240 nordmark rawip_conn_constructor, rawip_conn_destructor, 367 5240 nordmark NULL, NULL, NULL, 0); 368 5240 nordmark 369 5240 nordmark rts_conn_cache = kmem_cache_create("rts_conn_cache", 370 5240 nordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 371 5240 nordmark rts_conn_constructor, rts_conn_destructor, 372 0 stevel NULL, NULL, NULL, 0); 373 3448 dh155122 } 374 3448 dh155122 375 3448 dh155122 /* 376 3448 dh155122 * ipclassifier intialization routine, sets up hash tables. 377 3448 dh155122 */ 378 3448 dh155122 void 379 3448 dh155122 ipcl_init(ip_stack_t *ipst) 380 3448 dh155122 { 381 3448 dh155122 int i; 382 3448 dh155122 int sizes[] = P2Ps(); 383 0 stevel 384 0 stevel /* 385 3448 dh155122 * Calculate size of conn fanout table from /etc/system settings 386 0 stevel */ 387 0 stevel if (ipcl_conn_hash_size != 0) { 388 3448 dh155122 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 389 0 stevel } else if (tcp_conn_hash_size != 0) { 390 3448 dh155122 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 391 0 stevel } else { 392 0 stevel extern pgcnt_t freemem; 393 0 stevel 394 3448 dh155122 ipst->ips_ipcl_conn_fanout_size = 395 0 stevel (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 396 0 stevel 397 3448 dh155122 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 398 3448 dh155122 ipst->ips_ipcl_conn_fanout_size = 399 3448 dh155122 ipcl_conn_hash_maxsize; 400 3448 dh155122 } 401 0 stevel } 402 0 stevel 403 0 stevel for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 404 3448 dh155122 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 405 0 stevel break; 406 0 stevel } 407 0 stevel } 408 3448 dh155122 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 409 0 stevel /* Out of range, use the 2^16 value */ 410 3448 dh155122 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 411 0 stevel } 412 0 stevel 413 3448 dh155122 /* Take values from /etc/system */ 414 3448 dh155122 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 415 3448 dh155122 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 416 3448 dh155122 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 417 10616 Sebastien ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size; 418 3448 dh155122 419 3448 dh155122 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 420 3448 dh155122 421 3448 dh155122 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 422 3448 dh155122 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 423 3448 dh155122 424 3448 dh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 425 3448 dh155122 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 426 0 stevel MUTEX_DEFAULT, NULL); 427 0 stevel } 428 0 stevel 429 3448 dh155122 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 430 3448 dh155122 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 431 0 stevel 432 3448 dh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 433 3448 dh155122 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 434 0 stevel MUTEX_DEFAULT, NULL); 435 0 stevel } 436 0 stevel 437 11042 Erik ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX * 438 3448 dh155122 sizeof (connf_t), KM_SLEEP); 439 3448 dh155122 for (i = 0; i < IPPROTO_MAX; i++) { 440 11042 Erik mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL, 441 0 stevel MUTEX_DEFAULT, NULL); 442 0 stevel } 443 0 stevel 444 3448 dh155122 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 445 3448 dh155122 sizeof (connf_t), KM_SLEEP); 446 3448 dh155122 for (i = 0; i < IPPROTO_MAX; i++) { 447 3448 dh155122 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 448 0 stevel MUTEX_DEFAULT, NULL); 449 0 stevel } 450 0 stevel 451 3448 dh155122 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 452 3448 dh155122 mutex_init(&ipst->ips_rts_clients->connf_lock, 453 3448 dh155122 NULL, MUTEX_DEFAULT, NULL); 454 0 stevel 455 3448 dh155122 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 456 3448 dh155122 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 457 3448 dh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 458 3448 dh155122 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 459 10616 Sebastien MUTEX_DEFAULT, NULL); 460 10616 Sebastien } 461 10616 Sebastien 462 10616 Sebastien ipst->ips_ipcl_iptun_fanout = kmem_zalloc( 463 10616 Sebastien ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP); 464 10616 Sebastien for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 465 10616 Sebastien mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL, 466 0 stevel MUTEX_DEFAULT, NULL); 467 0 stevel } 468 0 stevel 469 3448 dh155122 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 470 3448 dh155122 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 471 3448 dh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 472 3448 dh155122 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 473 3448 dh155122 MUTEX_DEFAULT, NULL); 474 3448 dh155122 } 475 0 stevel 476 3448 dh155122 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 477 3448 dh155122 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 478 0 stevel for (i = 0; i < CONN_G_HASH_SIZE; i++) { 479 3448 dh155122 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 480 3448 dh155122 NULL, MUTEX_DEFAULT, NULL); 481 0 stevel } 482 0 stevel } 483 0 stevel 484 0 stevel void 485 3448 dh155122 ipcl_g_destroy(void) 486 3448 dh155122 { 487 5240 nordmark kmem_cache_destroy(ip_conn_cache); 488 5240 nordmark kmem_cache_destroy(tcp_conn_cache); 489 5240 nordmark kmem_cache_destroy(udp_conn_cache); 490 5240 nordmark kmem_cache_destroy(rawip_conn_cache); 491 5240 nordmark kmem_cache_destroy(rts_conn_cache); 492 3448 dh155122 } 493 3448 dh155122 494 3448 dh155122 /* 495 3448 dh155122 * All user-level and kernel use of the stack must be gone 496 3448 dh155122 * by now. 497 3448 dh155122 */ 498 3448 dh155122 void 499 3448 dh155122 ipcl_destroy(ip_stack_t *ipst) 500 0 stevel { 501 0 stevel int i; 502 0 stevel 503 3448 dh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 504 3448 dh155122 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 505 3448 dh155122 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 506 3448 dh155122 } 507 3448 dh155122 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 508 3448 dh155122 sizeof (connf_t)); 509 3448 dh155122 ipst->ips_ipcl_conn_fanout = NULL; 510 0 stevel 511 3448 dh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 512 3448 dh155122 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 513 3448 dh155122 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 514 3448 dh155122 } 515 3448 dh155122 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 516 3448 dh155122 sizeof (connf_t)); 517 3448 dh155122 ipst->ips_ipcl_bind_fanout = NULL; 518 0 stevel 519 3448 dh155122 for (i = 0; i < IPPROTO_MAX; i++) { 520 11042 Erik ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL); 521 11042 Erik mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock); 522 3448 dh155122 } 523 11042 Erik kmem_free(ipst->ips_ipcl_proto_fanout_v4, 524 11042 Erik IPPROTO_MAX * sizeof (connf_t)); 525 11042 Erik ipst->ips_ipcl_proto_fanout_v4 = NULL; 526 0 stevel 527 3448 dh155122 for (i = 0; i < IPPROTO_MAX; i++) { 528 3448 dh155122 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 529 3448 dh155122 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 530 3448 dh155122 } 531 3448 dh155122 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 532 3448 dh155122 IPPROTO_MAX * sizeof (connf_t)); 533 3448 dh155122 ipst->ips_ipcl_proto_fanout_v6 = NULL; 534 3448 dh155122 535 3448 dh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 536 3448 dh155122 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 537 3448 dh155122 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 538 3448 dh155122 } 539 3448 dh155122 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 540 3448 dh155122 sizeof (connf_t)); 541 3448 dh155122 ipst->ips_ipcl_udp_fanout = NULL; 542 10616 Sebastien 543 10616 Sebastien for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 544 10616 Sebastien ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL); 545 10616 Sebastien mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock); 546 10616 Sebastien } 547 10616 Sebastien kmem_free(ipst->ips_ipcl_iptun_fanout, 548 10616 Sebastien ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t)); 549 10616 Sebastien ipst->ips_ipcl_iptun_fanout = NULL; 550 3448 dh155122 551 3448 dh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 552 3448 dh155122 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 553 3448 dh155122 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 554 3448 dh155122 } 555 3448 dh155122 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 556 3448 dh155122 sizeof (connf_t)); 557 3448 dh155122 ipst->ips_ipcl_raw_fanout = NULL; 558 3448 dh155122 559 3448 dh155122 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 560 3448 dh155122 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 561 3448 dh155122 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 562 3448 dh155122 } 563 3448 dh155122 kmem_free(ipst->ips_ipcl_globalhash_fanout, 564 3448 dh155122 sizeof (connf_t) * CONN_G_HASH_SIZE); 565 3448 dh155122 ipst->ips_ipcl_globalhash_fanout = NULL; 566 3448 dh155122 567 3448 dh155122 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 568 3448 dh155122 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 569 3448 dh155122 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 570 3448 dh155122 ipst->ips_rts_clients = NULL; 571 0 stevel } 572 0 stevel 573 0 stevel /* 574 0 stevel * conn creation routine. initialize the conn, sets the reference 575 0 stevel * and inserts it in the global hash table. 576 0 stevel */ 577 0 stevel conn_t * 578 3448 dh155122 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 579 0 stevel { 580 0 stevel conn_t *connp; 581 5240 nordmark struct kmem_cache *conn_cache; 582 0 stevel 583 0 stevel switch (type) { 584 0 stevel case IPCL_SCTPCONN: 585 0 stevel if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 586 0 stevel return (NULL); 587 4691 kcpoon sctp_conn_init(connp); 588 3448 dh155122 netstack_hold(ns); 589 3448 dh155122 connp->conn_netstack = ns; 590 11042 Erik connp->conn_ixa->ixa_ipst = ns->netstack_ip; 591 11042 Erik ipcl_globalhash_insert(connp); 592 5240 nordmark return (connp); 593 5240 nordmark 594 5240 nordmark case IPCL_TCPCONN: 595 5240 nordmark conn_cache = tcp_conn_cache; 596 0 stevel break; 597 5240 nordmark 598 5240 nordmark case IPCL_UDPCONN: 599 5240 nordmark conn_cache = udp_conn_cache; 600 5240 nordmark break; 601 5240 nordmark 602 5240 nordmark case IPCL_RAWIPCONN: 603 5240 nordmark conn_cache = rawip_conn_cache; 604 5240 nordmark break; 605 5240 nordmark 606 5240 nordmark case IPCL_RTSCONN: 607 5240 nordmark conn_cache = rts_conn_cache; 608 5240 nordmark break; 609 5240 nordmark 610 0 stevel case IPCL_IPCCONN: 611 5240 nordmark conn_cache = ip_conn_cache; 612 0 stevel break; 613 5240 nordmark 614 741 masputra default: 615 741 masputra connp = NULL; 616 741 masputra ASSERT(0); 617 0 stevel } 618 0 stevel 619 5240 nordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 620 5240 nordmark return (NULL); 621 5240 nordmark 622 5240 nordmark connp->conn_ref = 1; 623 5240 nordmark netstack_hold(ns); 624 5240 nordmark connp->conn_netstack = ns; 625 11042 Erik connp->conn_ixa->ixa_ipst = ns->netstack_ip; 626 5240 nordmark ipcl_globalhash_insert(connp); 627 0 stevel return (connp); 628 0 stevel } 629 0 stevel 630 0 stevel void 631 0 stevel ipcl_conn_destroy(conn_t *connp) 632 0 stevel { 633 0 stevel mblk_t *mp; 634 3448 dh155122 netstack_t *ns = connp->conn_netstack; 635 0 stevel 636 0 stevel ASSERT(!MUTEX_HELD(&connp->conn_lock)); 637 0 stevel ASSERT(connp->conn_ref == 0); 638 7502 aruna 639 7502 aruna DTRACE_PROBE1(conn__destroy, conn_t *, connp); 640 1676 jpk 641 1676 jpk if (connp->conn_cred != NULL) { 642 1676 jpk crfree(connp->conn_cred); 643 1676 jpk connp->conn_cred = NULL; 644 1676 jpk } 645 1676 jpk 646 11042 Erik if (connp->conn_ht_iphc != NULL) { 647 11042 Erik kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated); 648 11042 Erik connp->conn_ht_iphc = NULL; 649 11042 Erik connp->conn_ht_iphc_allocated = 0; 650 11042 Erik connp->conn_ht_iphc_len = 0; 651 11042 Erik connp->conn_ht_ulp = NULL; 652 11042 Erik connp->conn_ht_ulp_len = 0; 653 11042 Erik } 654 11042 Erik ip_pkt_free(&connp->conn_xmit_ipp); 655 11042 Erik 656 0 stevel ipcl_globalhash_remove(connp); 657 0 stevel 658 11042 Erik if (connp->conn_latch != NULL) { 659 11042 Erik IPLATCH_REFRELE(connp->conn_latch); 660 11042 Erik connp->conn_latch = NULL; 661 11042 Erik } 662 11042 Erik if (connp->conn_latch_in_policy != NULL) { 663 11042 Erik IPPOL_REFRELE(connp->conn_latch_in_policy); 664 11042 Erik connp->conn_latch_in_policy = NULL; 665 11042 Erik } 666 11042 Erik if (connp->conn_latch_in_action != NULL) { 667 11042 Erik IPACT_REFRELE(connp->conn_latch_in_action); 668 11042 Erik connp->conn_latch_in_action = NULL; 669 11042 Erik } 670 11042 Erik if (connp->conn_policy != NULL) { 671 11042 Erik IPPH_REFRELE(connp->conn_policy, ns); 672 11042 Erik connp->conn_policy = NULL; 673 11042 Erik } 674 11042 Erik 675 11042 Erik if (connp->conn_ipsec_opt_mp != NULL) { 676 11042 Erik freemsg(connp->conn_ipsec_opt_mp); 677 11042 Erik connp->conn_ipsec_opt_mp = NULL; 678 11042 Erik } 679 11042 Erik 680 0 stevel if (connp->conn_flags & IPCL_TCPCONN) { 681 11042 Erik tcp_t *tcp = connp->conn_tcp; 682 741 masputra 683 0 stevel tcp_free(tcp); 684 0 stevel mp = tcp->tcp_timercache; 685 11042 Erik 686 11042 Erik tcp->tcp_tcps = NULL; 687 0 stevel 688 0 stevel if (tcp->tcp_sack_info != NULL) { 689 0 stevel bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 690 0 stevel kmem_cache_free(tcp_sack_info_cache, 691 0 stevel tcp->tcp_sack_info); 692 0 stevel } 693 8014 Kacheong 694 8014 Kacheong /* 695 8014 Kacheong * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 696 8014 Kacheong * the mblk. 697 8014 Kacheong */ 698 8014 Kacheong if (tcp->tcp_rsrv_mp != NULL) { 699 8014 Kacheong freeb(tcp->tcp_rsrv_mp); 700 8014 Kacheong tcp->tcp_rsrv_mp = NULL; 701 8014 Kacheong mutex_destroy(&tcp->tcp_rsrv_mp_lock); 702 8014 Kacheong } 703 0 stevel 704 11042 Erik ipcl_conn_cleanup(connp); 705 11042 Erik connp->conn_flags = IPCL_TCPCONN; 706 3448 dh155122 if (ns != NULL) { 707 3448 dh155122 ASSERT(tcp->tcp_tcps == NULL); 708 3448 dh155122 connp->conn_netstack = NULL; 709 11042 Erik connp->conn_ixa->ixa_ipst = NULL; 710 3448 dh155122 netstack_rele(ns); 711 3448 dh155122 } 712 5240 nordmark 713 5240 nordmark bzero(tcp, sizeof (tcp_t)); 714 5240 nordmark 715 5240 nordmark tcp->tcp_timercache = mp; 716 5240 nordmark tcp->tcp_connp = connp; 717 5240 nordmark kmem_cache_free(tcp_conn_cache, connp); 718 5240 nordmark return; 719 5240 nordmark } 720 5240 nordmark 721 5240 nordmark if (connp->conn_flags & IPCL_SCTPCONN) { 722 3448 dh155122 ASSERT(ns != NULL); 723 0 stevel sctp_free(connp); 724 5240 nordmark return; 725 5240 nordmark } 726 5240 nordmark 727 11042 Erik ipcl_conn_cleanup(connp); 728 5240 nordmark if (ns != NULL) { 729 5240 nordmark connp->conn_netstack = NULL; 730 11042 Erik connp->conn_ixa->ixa_ipst = NULL; 731 5240 nordmark netstack_rele(ns); 732 5240 nordmark } 733 5240 nordmark 734 5240 nordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 735 5240 nordmark if (connp->conn_flags & IPCL_UDPCONN) { 736 5240 nordmark connp->conn_flags = IPCL_UDPCONN; 737 5240 nordmark kmem_cache_free(udp_conn_cache, connp); 738 5240 nordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) { 739 5240 nordmark connp->conn_flags = IPCL_RAWIPCONN; 740 11042 Erik connp->conn_proto = IPPROTO_ICMP; 741 11042 Erik connp->conn_ixa->ixa_protocol = connp->conn_proto; 742 5240 nordmark kmem_cache_free(rawip_conn_cache, connp); 743 5240 nordmark } else if (connp->conn_flags & IPCL_RTSCONN) { 744 5240 nordmark connp->conn_flags = IPCL_RTSCONN; 745 5240 nordmark kmem_cache_free(rts_conn_cache, connp); 746 0 stevel } else { 747 5240 nordmark connp->conn_flags = IPCL_IPCCONN; 748 5240 nordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 749 5240 nordmark ASSERT(connp->conn_priv == NULL); 750 5240 nordmark kmem_cache_free(ip_conn_cache, connp); 751 0 stevel } 752 0 stevel } 753 0 stevel 754 0 stevel /* 755 0 stevel * Running in cluster mode - deregister listener information 756 0 stevel */ 757 0 stevel static void 758 0 stevel ipcl_conn_unlisten(conn_t *connp) 759 0 stevel { 760 0 stevel ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 761 0 stevel ASSERT(connp->conn_lport != 0); 762 0 stevel 763 0 stevel if (cl_inet_unlisten != NULL) { 764 0 stevel sa_family_t addr_family; 765 0 stevel uint8_t *laddrp; 766 0 stevel 767 11042 Erik if (connp->conn_ipversion == IPV6_VERSION) { 768 0 stevel addr_family = AF_INET6; 769 11042 Erik laddrp = (uint8_t *)&connp->conn_bound_addr_v6; 770 0 stevel } else { 771 0 stevel addr_family = AF_INET; 772 11042 Erik laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 773 0 stevel } 774 8392 Huafeng (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, 775 8392 Huafeng IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); 776 0 stevel } 777 0 stevel connp->conn_flags &= ~IPCL_CL_LISTENER; 778 0 stevel } 779 0 stevel 780 0 stevel /* 781 0 stevel * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 782 0 stevel * which table the conn belonged to). So for debugging we can see which hash 783 0 stevel * table this connection was in. 784 0 stevel */ 785 0 stevel #define IPCL_HASH_REMOVE(connp) { \ 786 0 stevel connf_t *connfp = (connp)->conn_fanout; \ 787 0 stevel ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 788 0 stevel if (connfp != NULL) { \ 789 0 stevel mutex_enter(&connfp->connf_lock); \ 790 0 stevel if ((connp)->conn_next != NULL) \ 791 0 stevel (connp)->conn_next->conn_prev = \ 792 0 stevel (connp)->conn_prev; \ 793 0 stevel if ((connp)->conn_prev != NULL) \ 794 0 stevel (connp)->conn_prev->conn_next = \ 795 0 stevel (connp)->conn_next; \ 796 0 stevel else \ 797 0 stevel connfp->connf_head = (connp)->conn_next; \ 798 0 stevel (connp)->conn_fanout = NULL; \ 799 0 stevel (connp)->conn_next = NULL; \ 800 0 stevel (connp)->conn_prev = NULL; \ 801 0 stevel (connp)->conn_flags |= IPCL_REMOVED; \ 802 0 stevel if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 803 0 stevel ipcl_conn_unlisten((connp)); \ 804 0 stevel CONN_DEC_REF((connp)); \ 805 0 stevel mutex_exit(&connfp->connf_lock); \ 806 0 stevel } \ 807 0 stevel } 808 0 stevel 809 0 stevel void 810 0 stevel ipcl_hash_remove(conn_t *connp) 811 0 stevel { 812 11042 Erik uint8_t protocol = connp->conn_proto; 813 11042 Erik 814 0 stevel IPCL_HASH_REMOVE(connp); 815 11042 Erik if (protocol == IPPROTO_RSVP) 816 11042 Erik ill_set_inputfn_all(connp->conn_netstack->netstack_ip); 817 0 stevel } 818 0 stevel 819 0 stevel /* 820 0 stevel * The whole purpose of this function is allow removal of 821 0 stevel * a conn_t from the connected hash for timewait reclaim. 822 0 stevel * This is essentially a TW reclaim fastpath where timewait 823 0 stevel * collector checks under fanout lock (so no one else can 824 0 stevel * get access to the conn_t) that refcnt is 2 i.e. one for 825 0 stevel * TCP and one for the classifier hash list. If ref count 826 0 stevel * is indeed 2, we can just remove the conn under lock and 827 0 stevel * avoid cleaning up the conn under squeue. This gives us 828 0 stevel * improved performance. 829 0 stevel */ 830 0 stevel void 831 0 stevel ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 832 0 stevel { 833 0 stevel ASSERT(MUTEX_HELD(&connfp->connf_lock)); 834 0 stevel ASSERT(MUTEX_HELD(&connp->conn_lock)); 835 0 stevel ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 836 0 stevel 837 0 stevel if ((connp)->conn_next != NULL) { 838 4691 kcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev; 839 0 stevel } 840 0 stevel if ((connp)->conn_prev != NULL) { 841 4691 kcpoon (connp)->conn_prev->conn_next = (connp)->conn_next; 842 0 stevel } else { 843 0 stevel connfp->connf_head = (connp)->conn_next; 844 0 stevel } 845 0 stevel (connp)->conn_fanout = NULL; 846 0 stevel (connp)->conn_next = NULL; 847 0 stevel (connp)->conn_prev = NULL; 848 0 stevel (connp)->conn_flags |= IPCL_REMOVED; 849 0 stevel ASSERT((connp)->conn_ref == 2); 850 0 stevel (connp)->conn_ref--; 851 0 stevel } 852 0 stevel 853 0 stevel #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 854 0 stevel ASSERT((connp)->conn_fanout == NULL); \ 855 0 stevel ASSERT((connp)->conn_next == NULL); \ 856 0 stevel ASSERT((connp)->conn_prev == NULL); \ 857 0 stevel if ((connfp)->connf_head != NULL) { \ 858 0 stevel (connfp)->connf_head->conn_prev = (connp); \ 859 0 stevel (connp)->conn_next = (connfp)->connf_head; \ 860 0 stevel } \ 861 0 stevel (connp)->conn_fanout = (connfp); \ 862 0 stevel (connfp)->connf_head = (connp); \ 863 0 stevel (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 864 0 stevel IPCL_CONNECTED; \ 865 0 stevel CONN_INC_REF(connp); \ 866 0 stevel } 867 0 stevel 868 0 stevel #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 869 0 stevel IPCL_HASH_REMOVE((connp)); \ 870 0 stevel mutex_enter(&(connfp)->connf_lock); \ 871 0 stevel IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 872 0 stevel mutex_exit(&(connfp)->connf_lock); \ 873 0 stevel } 874 0 stevel 875 0 stevel #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 876 0 stevel conn_t *pconnp = NULL, *nconnp; \ 877 0 stevel IPCL_HASH_REMOVE((connp)); \ 878 0 stevel mutex_enter(&(connfp)->connf_lock); \ 879 0 stevel nconnp = (connfp)->connf_head; \ 880 153 ethindra while (nconnp != NULL && \ 881 11042 Erik !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \ 882 153 ethindra pconnp = nconnp; \ 883 153 ethindra nconnp = nconnp->conn_next; \ 884 0 stevel } \ 885 0 stevel if (pconnp != NULL) { \ 886 0 stevel pconnp->conn_next = (connp); \ 887 0 stevel (connp)->conn_prev = pconnp; \ 888 0 stevel } else { \ 889 0 stevel (connfp)->connf_head = (connp); \ 890 0 stevel } \ 891 0 stevel if (nconnp != NULL) { \ 892 0 stevel (connp)->conn_next = nconnp; \ 893 0 stevel nconnp->conn_prev = (connp); \ 894 0 stevel } \ 895 0 stevel (connp)->conn_fanout = (connfp); \ 896 0 stevel (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 897 0 stevel IPCL_BOUND; \ 898 0 stevel CONN_INC_REF(connp); \ 899 0 stevel mutex_exit(&(connfp)->connf_lock); \ 900 0 stevel } 901 0 stevel 902 0 stevel #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 903 0 stevel conn_t **list, *prev, *next; \ 904 0 stevel boolean_t isv4mapped = \ 905 11042 Erik IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \ 906 0 stevel IPCL_HASH_REMOVE((connp)); \ 907 0 stevel mutex_enter(&(connfp)->connf_lock); \ 908 0 stevel list = &(connfp)->connf_head; \ 909 0 stevel prev = NULL; \ 910 0 stevel while ((next = *list) != NULL) { \ 911 0 stevel if (isv4mapped && \ 912 11042 Erik IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \ 913 0 stevel connp->conn_zoneid == next->conn_zoneid) { \ 914 0 stevel (connp)->conn_next = next; \ 915 0 stevel if (prev != NULL) \ 916 0 stevel prev = next->conn_prev; \ 917 0 stevel next->conn_prev = (connp); \ 918 0 stevel break; \ 919 0 stevel } \ 920 0 stevel list = &next->conn_next; \ 921 0 stevel prev = next; \ 922 0 stevel } \ 923 0 stevel (connp)->conn_prev = prev; \ 924 0 stevel *list = (connp); \ 925 0 stevel (connp)->conn_fanout = (connfp); \ 926 0 stevel (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 927 0 stevel IPCL_BOUND; \ 928 0 stevel CONN_INC_REF((connp)); \ 929 0 stevel mutex_exit(&(connfp)->connf_lock); \ 930 0 stevel } 931 0 stevel 932 0 stevel void 933 0 stevel ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 934 0 stevel { 935 0 stevel IPCL_HASH_INSERT_WILDCARD(connfp, connp); 936 0 stevel } 937 0 stevel 938 0 stevel /* 939 10616 Sebastien * Because the classifier is used to classify inbound packets, the destination 940 10616 Sebastien * address is meant to be our local tunnel address (tunnel source), and the 941 10616 Sebastien * source the remote tunnel address (tunnel destination). 942 11042 Erik * 943 11042 Erik * Note that conn_proto can't be used for fanout since the upper protocol 944 11042 Erik * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 945 10616 Sebastien */ 946 10616 Sebastien conn_t * 947 10616 Sebastien ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst) 948 10616 Sebastien { 949 10616 Sebastien connf_t *connfp; 950 10616 Sebastien conn_t *connp; 951 10616 Sebastien 952 10616 Sebastien /* first look for IPv4 tunnel links */ 953 10616 Sebastien connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)]; 954 10616 Sebastien mutex_enter(&connfp->connf_lock); 955 10616 Sebastien for (connp = connfp->connf_head; connp != NULL; 956 10616 Sebastien connp = connp->conn_next) { 957 10616 Sebastien if (IPCL_IPTUN_MATCH(connp, *dst, *src)) 958 10616 Sebastien break; 959 10616 Sebastien } 960 10616 Sebastien if (connp != NULL) 961 10616 Sebastien goto done; 962 10616 Sebastien 963 10616 Sebastien mutex_exit(&connfp->connf_lock); 964 10616 Sebastien 965 10616 Sebastien /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */ 966 10616 Sebastien connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, 967 10616 Sebastien INADDR_ANY)]; 968 10616 Sebastien mutex_enter(&connfp->connf_lock); 969 10616 Sebastien for (connp = connfp->connf_head; connp != NULL; 970 10616 Sebastien connp = connp->conn_next) { 971 10616 Sebastien if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY)) 972 10616 Sebastien break; 973 10616 Sebastien } 974 10616 Sebastien done: 975 10616 Sebastien if (connp != NULL) 976 10616 Sebastien CONN_INC_REF(connp); 977 10616 Sebastien mutex_exit(&connfp->connf_lock); 978 10616 Sebastien return (connp); 979 10616 Sebastien } 980 10616 Sebastien 981 10616 Sebastien conn_t * 982 10616 Sebastien ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst) 983 10616 Sebastien { 984 10616 Sebastien connf_t *connfp; 985 10616 Sebastien conn_t *connp; 986 10616 Sebastien 987 10616 Sebastien /* Look for an IPv6 tunnel link */ 988 10616 Sebastien connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)]; 989 10616 Sebastien mutex_enter(&connfp->connf_lock); 990 10616 Sebastien for (connp = connfp->connf_head; connp != NULL; 991 10616 Sebastien connp = connp->conn_next) { 992 10616 Sebastien if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) { 993 10616 Sebastien CONN_INC_REF(connp); 994 10616 Sebastien break; 995 10616 Sebastien } 996 10616 Sebastien } 997 10616 Sebastien mutex_exit(&connfp->connf_lock); 998 10616 Sebastien return (connp); 999 10616 Sebastien } 1000 10616 Sebastien 1001 10616 Sebastien /* 1002 0 stevel * This function is used only for inserting SCTP raw socket now. 1003 0 stevel * This may change later. 1004 0 stevel * 1005 0 stevel * Note that only one raw socket can be bound to a port. The param 1006 0 stevel * lport is in network byte order. 1007 0 stevel */ 1008 0 stevel static int 1009 0 stevel ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 1010 0 stevel { 1011 0 stevel connf_t *connfp; 1012 0 stevel conn_t *oconnp; 1013 3448 dh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1014 0 stevel 1015 3448 dh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 1016 0 stevel 1017 0 stevel /* Check for existing raw socket already bound to the port. */ 1018 0 stevel mutex_enter(&connfp->connf_lock); 1019 0 stevel for (oconnp = connfp->connf_head; oconnp != NULL; 1020 409 kcpoon oconnp = oconnp->conn_next) { 1021 0 stevel if (oconnp->conn_lport == lport && 1022 0 stevel oconnp->conn_zoneid == connp->conn_zoneid && 1023 11042 Erik oconnp->conn_family == connp->conn_family && 1024 11042 Erik ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 1025 11042 Erik IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) || 1026 11042 Erik IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) || 1027 11042 Erik IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) || 1028 11042 Erik IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6, 1029 11042 Erik &connp->conn_laddr_v6))) { 1030 0 stevel break; 1031 0 stevel } 1032 0 stevel } 1033 0 stevel mutex_exit(&connfp->connf_lock); 1034 0 stevel if (oconnp != NULL) 1035 0 stevel return (EADDRNOTAVAIL); 1036 0 stevel 1037 11042 Erik if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) || 1038 11042 Erik IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1039 11042 Erik if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 1040 11042 Erik IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) { 1041 0 stevel IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1042 0 stevel } else { 1043 0 stevel IPCL_HASH_INSERT_BOUND(connfp, connp); 1044 0 stevel } 1045 0 stevel } else { 1046 0 stevel IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1047 0 stevel } 1048 10616 Sebastien return (0); 1049 10616 Sebastien } 1050 10616 Sebastien 1051 10616 Sebastien static int 1052 11042 Erik ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst) 1053 10616 Sebastien { 1054 10616 Sebastien connf_t *connfp; 1055 10616 Sebastien conn_t *tconnp; 1056 11042 Erik ipaddr_t laddr = connp->conn_laddr_v4; 1057 11042 Erik ipaddr_t faddr = connp->conn_faddr_v4; 1058 10616 Sebastien 1059 11042 Erik connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)]; 1060 10616 Sebastien mutex_enter(&connfp->connf_lock); 1061 10616 Sebastien for (tconnp = connfp->connf_head; tconnp != NULL; 1062 10616 Sebastien tconnp = tconnp->conn_next) { 1063 11042 Erik if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) { 1064 10616 Sebastien /* A tunnel is already bound to these addresses. */ 1065 10616 Sebastien mutex_exit(&connfp->connf_lock); 1066 10616 Sebastien return (EADDRINUSE); 1067 10616 Sebastien } 1068 10616 Sebastien } 1069 10616 Sebastien IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1070 10616 Sebastien mutex_exit(&connfp->connf_lock); 1071 10616 Sebastien return (0); 1072 10616 Sebastien } 1073 10616 Sebastien 1074 10616 Sebastien static int 1075 11042 Erik ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst) 1076 10616 Sebastien { 1077 10616 Sebastien connf_t *connfp; 1078 10616 Sebastien conn_t *tconnp; 1079 11042 Erik in6_addr_t *laddr = &connp->conn_laddr_v6; 1080 11042 Erik in6_addr_t *faddr = &connp->conn_faddr_v6; 1081 10616 Sebastien 1082 11042 Erik connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)]; 1083 10616 Sebastien mutex_enter(&connfp->connf_lock); 1084 10616 Sebastien for (tconnp = connfp->connf_head; tconnp != NULL; 1085 10616 Sebastien tconnp = tconnp->conn_next) { 1086 11042 Erik if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) { 1087 10616 Sebastien /* A tunnel is already bound to these addresses. */ 1088 10616 Sebastien mutex_exit(&connfp->connf_lock); 1089 10616 Sebastien return (EADDRINUSE); 1090 10616 Sebastien } 1091 10616 Sebastien } 1092 10616 Sebastien IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1093 10616 Sebastien mutex_exit(&connfp->connf_lock); 1094 0 stevel return (0); 1095 0 stevel } 1096 0 stevel 1097 0 stevel /* 1098 1676 jpk * Check for a MAC exemption conflict on a labeled system. Note that for 1099 1676 jpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 1100 1676 jpk * transport layer. This check is for binding all other protocols. 1101 1676 jpk * 1102 1676 jpk * Returns true if there's a conflict. 1103 1676 jpk */ 1104 1676 jpk static boolean_t 1105 3448 dh155122 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 1106 1676 jpk { 1107 1676 jpk connf_t *connfp; 1108 1676 jpk conn_t *tconn; 1109 1676 jpk 1110 11042 Erik connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto]; 1111 1676 jpk mutex_enter(&connfp->connf_lock); 1112 1676 jpk for (tconn = connfp->connf_head; tconn != NULL; 1113 1676 jpk tconn = tconn->conn_next) { 1114 1676 jpk /* We don't allow v4 fallback for v6 raw socket */ 1115 11042 Erik if (connp->conn_family != tconn->conn_family) 1116 1676 jpk continue; 1117 1676 jpk /* If neither is exempt, then there's no conflict */ 1118 10934 sommerfeld if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 1119 10934 sommerfeld (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 1120 1676 jpk continue; 1121 9710 Ken /* We are only concerned about sockets for a different zone */ 1122 9710 Ken if (connp->conn_zoneid == tconn->conn_zoneid) 1123 9710 Ken continue; 1124 1676 jpk /* If both are bound to different specific addrs, ok */ 1125 11042 Erik if (connp->conn_laddr_v4 != INADDR_ANY && 1126 11042 Erik tconn->conn_laddr_v4 != INADDR_ANY && 1127 11042 Erik connp->conn_laddr_v4 != tconn->conn_laddr_v4) 1128 1676 jpk continue; 1129 1676 jpk /* These two conflict; fail */ 1130 1676 jpk break; 1131 1676 jpk } 1132 1676 jpk mutex_exit(&connfp->connf_lock); 1133 1676 jpk return (tconn != NULL); 1134 1676 jpk } 1135 1676 jpk 1136 1676 jpk static boolean_t 1137 3448 dh155122 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 1138 1676 jpk { 1139 1676 jpk connf_t *connfp; 1140 1676 jpk conn_t *tconn; 1141 1676 jpk 1142 11042 Erik connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto]; 1143 1676 jpk mutex_enter(&connfp->connf_lock); 1144 1676 jpk for (tconn = connfp->connf_head; tconn != NULL; 1145 1676 jpk tconn = tconn->conn_next) { 1146 1676 jpk /* We don't allow v4 fallback for v6 raw socket */ 1147 11042 Erik if (connp->conn_family != tconn->conn_family) 1148 1676 jpk continue; 1149 1676 jpk /* If neither is exempt, then there's no conflict */ 1150 10934 sommerfeld if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 1151 10934 sommerfeld (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 1152 9710 Ken continue; 1153 9710 Ken /* We are only concerned about sockets for a different zone */ 1154 9710 Ken if (connp->conn_zoneid == tconn->conn_zoneid) 1155 1676 jpk continue; 1156 1676 jpk /* If both are bound to different addrs, ok */ 1157 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) && 1158 11042 Erik !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) && 1159 11042 Erik !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 1160 11042 Erik &tconn->conn_laddr_v6)) 1161 1676 jpk continue; 1162 1676 jpk /* These two conflict; fail */ 1163 1676 jpk break; 1164 1676 jpk } 1165 1676 jpk mutex_exit(&connfp->connf_lock); 1166 1676 jpk return (tconn != NULL); 1167 1676 jpk } 1168 1676 jpk 1169 1676 jpk /* 1170 0 stevel * (v4, v6) bind hash insertion routines 1171 11042 Erik * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport) 1172 0 stevel */ 1173 11042 Erik 1174 0 stevel int 1175 11042 Erik ipcl_bind_insert(conn_t *connp) 1176 11042 Erik { 1177 11042 Erik if (connp->conn_ipversion == IPV6_VERSION) 1178 11042 Erik return (ipcl_bind_insert_v6(connp)); 1179 11042 Erik else 1180 11042 Erik return (ipcl_bind_insert_v4(connp)); 1181 11042 Erik } 1182 11042 Erik 1183 11042 Erik int 1184 11042 Erik ipcl_bind_insert_v4(conn_t *connp) 1185 0 stevel { 1186 0 stevel connf_t *connfp; 1187 0 stevel int ret = 0; 1188 3448 dh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1189 11042 Erik uint16_t lport = connp->conn_lport; 1190 11042 Erik uint8_t protocol = connp->conn_proto; 1191 0 stevel 1192 10616 Sebastien if (IPCL_IS_IPTUN(connp)) 1193 11042 Erik return (ipcl_iptun_hash_insert(connp, ipst)); 1194 10616 Sebastien 1195 0 stevel switch (protocol) { 1196 1676 jpk default: 1197 3448 dh155122 if (is_system_labeled() && 1198 3448 dh155122 check_exempt_conflict_v4(connp, ipst)) 1199 1676 jpk return (EADDRINUSE); 1200 1676 jpk /* FALLTHROUGH */ 1201 0 stevel case IPPROTO_UDP: 1202 0 stevel if (protocol == IPPROTO_UDP) { 1203 3448 dh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 1204 3448 dh155122 IPCL_UDP_HASH(lport, ipst)]; 1205 0 stevel } else { 1206 11042 Erik connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 1207 0 stevel } 1208 0 stevel 1209 11042 Erik if (connp->conn_faddr_v4 != INADDR_ANY) { 1210 0 stevel IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1211 11042 Erik } else if (connp->conn_laddr_v4 != INADDR_ANY) { 1212 0 stevel IPCL_HASH_INSERT_BOUND(connfp, connp); 1213 0 stevel } else { 1214 0 stevel IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1215 0 stevel } 1216 11042 Erik if (protocol == IPPROTO_RSVP) 1217 11042 Erik ill_set_inputfn_all(ipst); 1218 0 stevel break; 1219 0 stevel 1220 0 stevel case IPPROTO_TCP: 1221 0 stevel /* Insert it in the Bind Hash */ 1222 1676 jpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1223 3448 dh155122 connfp = &ipst->ips_ipcl_bind_fanout[ 1224 3448 dh155122 IPCL_BIND_HASH(lport, ipst)]; 1225 11042 Erik if (connp->conn_laddr_v4 != INADDR_ANY) { 1226 0 stevel IPCL_HASH_INSERT_BOUND(connfp, connp); 1227 0 stevel } else { 1228 0 stevel IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1229 0 stevel } 1230 0 stevel if (cl_inet_listen != NULL) { 1231 11042 Erik ASSERT(connp->conn_ipversion == IPV4_VERSION); 1232 0 stevel connp->conn_flags |= IPCL_CL_LISTENER; 1233 8392 Huafeng (*cl_inet_listen)( 1234 8392 Huafeng connp->conn_netstack->netstack_stackid, 1235 8392 Huafeng IPPROTO_TCP, AF_INET, 1236 11042 Erik (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL); 1237 0 stevel } 1238 0 stevel break; 1239 0 stevel 1240 0 stevel case IPPROTO_SCTP: 1241 0 stevel ret = ipcl_sctp_hash_insert(connp, lport); 1242 0 stevel break; 1243 0 stevel } 1244 0 stevel 1245 0 stevel return (ret); 1246 0 stevel } 1247 0 stevel 1248 0 stevel int 1249 11042 Erik ipcl_bind_insert_v6(conn_t *connp) 1250 0 stevel { 1251 10616 Sebastien connf_t *connfp; 1252 10616 Sebastien int ret = 0; 1253 3448 dh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1254 11042 Erik uint16_t lport = connp->conn_lport; 1255 11042 Erik uint8_t protocol = connp->conn_proto; 1256 10616 Sebastien 1257 10616 Sebastien if (IPCL_IS_IPTUN(connp)) { 1258 11042 Erik return (ipcl_iptun_hash_insert_v6(connp, ipst)); 1259 10616 Sebastien } 1260 0 stevel 1261 0 stevel switch (protocol) { 1262 1676 jpk default: 1263 3448 dh155122 if (is_system_labeled() && 1264 3448 dh155122 check_exempt_conflict_v6(connp, ipst)) 1265 1676 jpk return (EADDRINUSE); 1266 1676 jpk /* FALLTHROUGH */ 1267 0 stevel case IPPROTO_UDP: 1268 0 stevel if (protocol == IPPROTO_UDP) { 1269 3448 dh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 1270 3448 dh155122 IPCL_UDP_HASH(lport, ipst)]; 1271 0 stevel } else { 1272 3448 dh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1273 0 stevel } 1274 0 stevel 1275 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 1276 0 stevel IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1277 11042 Erik } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1278 0 stevel IPCL_HASH_INSERT_BOUND(connfp, connp); 1279 0 stevel } else { 1280 0 stevel IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1281 0 stevel } 1282 0 stevel break; 1283 0 stevel 1284 0 stevel case IPPROTO_TCP: 1285 0 stevel /* Insert it in the Bind Hash */ 1286 1676 jpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1287 3448 dh155122 connfp = &ipst->ips_ipcl_bind_fanout[ 1288 3448 dh155122 IPCL_BIND_HASH(lport, ipst)]; 1289 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1290 0 stevel IPCL_HASH_INSERT_BOUND(connfp, connp); 1291 0 stevel } else { 1292 0 stevel IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1293 0 stevel } 1294 0 stevel if (cl_inet_listen != NULL) { 1295 0 stevel sa_family_t addr_family; 1296 0 stevel uint8_t *laddrp; 1297 0 stevel 1298 11042 Erik if (connp->conn_ipversion == IPV6_VERSION) { 1299 0 stevel addr_family = AF_INET6; 1300 0 stevel laddrp = 1301 11042 Erik (uint8_t *)&connp->conn_bound_addr_v6; 1302 0 stevel } else { 1303 0 stevel addr_family = AF_INET; 1304 11042 Erik laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 1305 0 stevel } 1306 0 stevel connp->conn_flags |= IPCL_CL_LISTENER; 1307 8392 Huafeng (*cl_inet_listen)( 1308 8392 Huafeng connp->conn_netstack->netstack_stackid, 1309 8392 Huafeng IPPROTO_TCP, addr_family, laddrp, lport, NULL); 1310 0 stevel } 1311 0 stevel break; 1312 0 stevel 1313 0 stevel case IPPROTO_SCTP: 1314 0 stevel ret = ipcl_sctp_hash_insert(connp, lport); 1315 0 stevel break; 1316 0 stevel } 1317 0 stevel 1318 0 stevel return (ret); 1319 0 stevel } 1320 0 stevel 1321 0 stevel /* 1322 0 stevel * ipcl_conn_hash insertion routines. 1323 11042 Erik * The caller has already set conn_proto and the addresses/ports in the conn_t. 1324 0 stevel */ 1325 11042 Erik 1326 0 stevel int 1327 11042 Erik ipcl_conn_insert(conn_t *connp) 1328 11042 Erik { 1329 11042 Erik if (connp->conn_ipversion == IPV6_VERSION) 1330 11042 Erik return (ipcl_conn_insert_v6(connp)); 1331 11042 Erik else 1332 11042 Erik return (ipcl_conn_insert_v4(connp)); 1333 11042 Erik } 1334 11042 Erik 1335 11042 Erik int 1336 11042 Erik ipcl_conn_insert_v4(conn_t *connp) 1337 0 stevel { 1338 0 stevel connf_t *connfp; 1339 0 stevel conn_t *tconnp; 1340 0 stevel int ret = 0; 1341 3448 dh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1342 11042 Erik uint16_t lport = connp->conn_lport; 1343 11042 Erik uint8_t protocol = connp->conn_proto; 1344 10616 Sebastien 1345 10616 Sebastien if (IPCL_IS_IPTUN(connp)) 1346 11042 Erik return (ipcl_iptun_hash_insert(connp, ipst)); 1347 0 stevel 1348 0 stevel switch (protocol) { 1349 0 stevel case IPPROTO_TCP: 1350 8432 Jonathan /* 1351 11042 Erik * For TCP, we check whether the connection tuple already 1352 8432 Jonathan * exists before allowing the connection to proceed. We 1353 8432 Jonathan * also allow indexing on the zoneid. This is to allow 1354 8432 Jonathan * multiple shared stack zones to have the same tcp 1355 8432 Jonathan * connection tuple. In practice this only happens for 1356 8432 Jonathan * INADDR_LOOPBACK as it's the only local address which 1357 8432 Jonathan * doesn't have to be unique. 1358 8432 Jonathan */ 1359 3448 dh155122 connfp = &ipst->ips_ipcl_conn_fanout[ 1360 11042 Erik IPCL_CONN_HASH(connp->conn_faddr_v4, 1361 3448 dh155122 connp->conn_ports, ipst)]; 1362 0 stevel mutex_enter(&connfp->connf_lock); 1363 0 stevel for (tconnp = connfp->connf_head; tconnp != NULL; 1364 0 stevel tconnp = tconnp->conn_next) { 1365 11042 Erik if (IPCL_CONN_MATCH(tconnp, connp->conn_proto, 1366 11042 Erik connp->conn_faddr_v4, connp->conn_laddr_v4, 1367 11042 Erik connp->conn_ports) && 1368 11042 Erik IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 1369 0 stevel /* Already have a conn. bail out */ 1370 0 stevel mutex_exit(&connfp->connf_lock); 1371 0 stevel return (EADDRINUSE); 1372 0 stevel } 1373 0 stevel } 1374 0 stevel if (connp->conn_fanout != NULL) { 1375 0 stevel /* 1376 0 stevel * Probably a XTI/TLI application trying to do a 1377 0 stevel * rebind. Let it happen. 1378 0 stevel */ 1379 0 stevel mutex_exit(&connfp->connf_lock); 1380 0 stevel IPCL_HASH_REMOVE(connp); 1381 0 stevel mutex_enter(&connfp->connf_lock); 1382 0 stevel } 1383 3104 jprakash 1384 3104 jprakash ASSERT(connp->conn_recv != NULL); 1385 11042 Erik ASSERT(connp->conn_recvicmp != NULL); 1386 3104 jprakash 1387 0 stevel IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1388 0 stevel mutex_exit(&connfp->connf_lock); 1389 0 stevel break; 1390 0 stevel 1391 0 stevel case IPPROTO_SCTP: 1392 409 kcpoon /* 1393 409 kcpoon * The raw socket may have already been bound, remove it 1394 409 kcpoon * from the hash first. 1395 409 kcpoon */ 1396 409 kcpoon IPCL_HASH_REMOVE(connp); 1397 0 stevel ret = ipcl_sctp_hash_insert(connp, lport); 1398 0 stevel break; 1399 0 stevel 1400 1676 jpk default: 1401 1676 jpk /* 1402 1676 jpk * Check for conflicts among MAC exempt bindings. For 1403 1676 jpk * transports with port numbers, this is done by the upper 1404 1676 jpk * level per-transport binding logic. For all others, it's 1405 1676 jpk * done here. 1406 1676 jpk */ 1407 3448 dh155122 if (is_system_labeled() && 1408 3448 dh155122 check_exempt_conflict_v4(connp, ipst)) 1409 1676 jpk return (EADDRINUSE); 1410 1676 jpk /* FALLTHROUGH */ 1411 1676 jpk 1412 0 stevel case IPPROTO_UDP: 1413 0 stevel if (protocol == IPPROTO_UDP) { 1414 3448 dh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 1415 11042 Erik IPCL_UDP_HASH(lport, ipst)]; 1416 0 stevel } else { 1417 11042 Erik connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 1418 0 stevel } 1419 0 stevel 1420 11042 Erik if (connp->conn_faddr_v4 != INADDR_ANY) { 1421 0 stevel IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1422 11042 Erik } else if (connp->conn_laddr_v4 != INADDR_ANY) { 1423 0 stevel IPCL_HASH_INSERT_BOUND(connfp, connp); 1424 0 stevel } else { 1425 0 stevel IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1426 0 stevel } 1427 0 stevel break; 1428 0 stevel } 1429 0 stevel 1430 0 stevel return (ret); 1431 0 stevel } 1432 0 stevel 1433 0 stevel int 1434 11042 Erik ipcl_conn_insert_v6(conn_t *connp) 1435 0 stevel { 1436 0 stevel connf_t *connfp; 1437 0 stevel conn_t *tconnp; 1438 0 stevel int ret = 0; 1439 3448 dh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1440 11042 Erik uint16_t lport = connp->conn_lport; 1441 11042 Erik uint8_t protocol = connp->conn_proto; 1442 11042 Erik uint_t ifindex = connp->conn_bound_if; 1443 10616 Sebastien 1444 10616 Sebastien if (IPCL_IS_IPTUN(connp)) 1445 11042 Erik return (ipcl_iptun_hash_insert_v6(connp, ipst)); 1446 0 stevel 1447 0 stevel switch (protocol) { 1448 0 stevel case IPPROTO_TCP: 1449 8432 Jonathan 1450 8432 Jonathan /* 1451 8432 Jonathan * For tcp, we check whether the connection tuple already 1452 8432 Jonathan * exists before allowing the connection to proceed. We 1453 8432 Jonathan * also allow indexing on the zoneid. This is to allow 1454 8432 Jonathan * multiple shared stack zones to have the same tcp 1455 8432 Jonathan * connection tuple. In practice this only happens for 1456 8432 Jonathan * ipv6_loopback as it's the only local address which 1457 8432 Jonathan * doesn't have to be unique. 1458 8432 Jonathan */ 1459 3448 dh155122 connfp = &ipst->ips_ipcl_conn_fanout[ 1460 11042 Erik IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports, 1461 3448 dh155122 ipst)]; 1462 0 stevel mutex_enter(&connfp->connf_lock); 1463 0 stevel for (tconnp = connfp->connf_head; tconnp != NULL; 1464 0 stevel tconnp = tconnp->conn_next) { 1465 11042 Erik /* NOTE: need to match zoneid. Bug in onnv-gate */ 1466 11042 Erik if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto, 1467 11042 Erik connp->conn_faddr_v6, connp->conn_laddr_v6, 1468 0 stevel connp->conn_ports) && 1469 11042 Erik (tconnp->conn_bound_if == 0 || 1470 11042 Erik tconnp->conn_bound_if == ifindex) && 1471 11042 Erik IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 1472 0 stevel /* Already have a conn. bail out */ 1473 0 stevel mutex_exit(&connfp->connf_lock); 1474 0 stevel return (EADDRINUSE); 1475 0 stevel } 1476 0 stevel } 1477 0 stevel if (connp->conn_fanout != NULL) { 1478 0 stevel /* 1479 0 stevel * Probably a XTI/TLI application trying to do a 1480 0 stevel * rebind. Let it happen. 1481 0 stevel */ 1482 0 stevel mutex_exit(&connfp->connf_lock); 1483 0 stevel IPCL_HASH_REMOVE(connp); 1484 0 stevel mutex_enter(&connfp->connf_lock); 1485 0 stevel } 1486 0 stevel IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1487 0 stevel mutex_exit(&connfp->connf_lock); 1488 0 stevel break; 1489 0 stevel 1490 0 stevel case IPPROTO_SCTP: 1491 409 kcpoon IPCL_HASH_REMOVE(connp); 1492 0 stevel ret = ipcl_sctp_hash_insert(connp, lport); 1493 0 stevel break; 1494 0 stevel 1495 1676 jpk default: 1496 3448 dh155122 if (is_system_labeled() && 1497 3448 dh155122 check_exempt_conflict_v6(connp, ipst)) 1498 1676 jpk return (EADDRINUSE); 1499 1676 jpk /* FALLTHROUGH */ 1500 0 stevel case IPPROTO_UDP: 1501 0 stevel if (protocol == IPPROTO_UDP) { 1502 3448 dh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 1503 11042 Erik IPCL_UDP_HASH(lport, ipst)]; 1504 0 stevel } else { 1505 3448 dh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1506 0 stevel } 1507 0 stevel 1508 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 1509 0 stevel IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1510 11042 Erik } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1511 0 stevel IPCL_HASH_INSERT_BOUND(connfp, connp); 1512 0 stevel } else { 1513 0 stevel IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1514 0 stevel } 1515 0 stevel break; 1516 0 stevel } 1517 0 stevel 1518 0 stevel return (ret); 1519 0 stevel } 1520 0 stevel 1521 0 stevel /* 1522 0 stevel * v4 packet classifying function. looks up the fanout table to 1523 0 stevel * find the conn, the packet belongs to. returns the conn with 1524 0 stevel * the reference held, null otherwise. 1525 1676 jpk * 1526 1676 jpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 1527 1676 jpk * Lookup" comment block are applied. Labels are also checked as described 1528 1676 jpk * above. If the packet is from the inside (looped back), and is from the same 1529 1676 jpk * zone, then label checks are omitted. 1530 0 stevel */ 1531 0 stevel conn_t * 1532 11042 Erik ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 1533 11042 Erik ip_recv_attr_t *ira, ip_stack_t *ipst) 1534 0 stevel { 1535 0 stevel ipha_t *ipha; 1536 0 stevel connf_t *connfp, *bind_connfp; 1537 0 stevel uint16_t lport; 1538 0 stevel uint16_t fport; 1539 0 stevel uint32_t ports; 1540 0 stevel conn_t *connp; 1541 0 stevel uint16_t *up; 1542 11042 Erik zoneid_t zoneid = ira->ira_zoneid; 1543 0 stevel 1544 0 stevel ipha = (ipha_t *)mp->b_rptr; 1545 0 stevel up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1546 0 stevel 1547 0 stevel switch (protocol) { 1548 0 stevel case IPPROTO_TCP: 1549 0 stevel ports = *(uint32_t *)up; 1550 0 stevel connfp = 1551 3448 dh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1552 3448 dh155122 ports, ipst)]; 1553 0 stevel mutex_enter(&connfp->connf_lock); 1554 0 stevel for (connp = connfp->connf_head; connp != NULL; 1555 0 stevel connp = connp->conn_next) { 1556 11042 Erik if (IPCL_CONN_MATCH(connp, protocol, 1557 11042 Erik ipha->ipha_src, ipha->ipha_dst, ports) && 1558 11042 Erik (connp->conn_zoneid == zoneid || 1559 11042 Erik connp->conn_allzones || 1560 11042 Erik ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1561 11042 Erik (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1562 11042 Erik (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1563 0 stevel break; 1564 11042 Erik } 1565 11042 Erik 1566 11042 Erik if (connp != NULL) { 1567 11042 Erik /* 1568 11042 Erik * We have a fully-bound TCP connection. 1569 11042 Erik * 1570 11042 Erik * For labeled systems, there's no need to check the 1571 11042 Erik * label here. It's known to be good as we checked 1572 11042 Erik * before allowing the connection to become bound. 1573 11042 Erik */ 1574 11042 Erik CONN_INC_REF(connp); 1575 11042 Erik mutex_exit(&connfp->connf_lock); 1576 11042 Erik return (connp); 1577 11042 Erik } 1578 11042 Erik 1579 11042 Erik mutex_exit(&connfp->connf_lock); 1580 11042 Erik lport = up[1]; 1581 11042 Erik bind_connfp = 1582 11042 Erik &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1583 11042 Erik mutex_enter(&bind_connfp->connf_lock); 1584 11042 Erik for (connp = bind_connfp->connf_head; connp != NULL; 1585 11042 Erik connp = connp->conn_next) { 1586 11042 Erik if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1587 11042 Erik lport) && 1588 11042 Erik (connp->conn_zoneid == zoneid || 1589 11042 Erik connp->conn_allzones || 1590 11042 Erik ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1591 11042 Erik (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1592 11042 Erik (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1593 11042 Erik break; 1594 11042 Erik } 1595 11042 Erik 1596 11042 Erik /* 1597 11042 Erik * If the matching connection is SLP on a private address, then 1598 11042 Erik * the label on the packet must match the local zone's label. 1599 11042 Erik * Otherwise, it must be in the label range defined by tnrh. 1600 11042 Erik * This is ensured by tsol_receive_local. 1601 11042 Erik * 1602 11042 Erik * Note that we don't check tsol_receive_local for 1603 11042 Erik * the connected case. 1604 11042 Erik */ 1605 11042 Erik if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1606 11042 Erik !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1607 11042 Erik ira, connp)) { 1608 11042 Erik DTRACE_PROBE3(tx__ip__log__info__classify__tcp, 1609 11042 Erik char *, "connp(1) could not receive mp(2)", 1610 11042 Erik conn_t *, connp, mblk_t *, mp); 1611 11042 Erik connp = NULL; 1612 11042 Erik } 1613 11042 Erik 1614 11042 Erik if (connp != NULL) { 1615 11042 Erik /* Have a listener at least */ 1616 11042 Erik CONN_INC_REF(connp); 1617 11042 Erik mutex_exit(&bind_connfp->connf_lock); 1618 11042 Erik return (connp); 1619 11042 Erik } 1620 11042 Erik 1621 11042 Erik mutex_exit(&bind_connfp->connf_lock); 1622 11042 Erik break; 1623 11042 Erik 1624 11042 Erik case IPPROTO_UDP: 1625 11042 Erik lport = up[1]; 1626 11042 Erik fport = up[0]; 1627 11042 Erik connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1628 11042 Erik mutex_enter(&connfp->connf_lock); 1629 11042 Erik for (connp = connfp->connf_head; connp != NULL; 1630 11042 Erik connp = connp->conn_next) { 1631 11042 Erik if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1632 11042 Erik fport, ipha->ipha_src) && 1633 11042 Erik (connp->conn_zoneid == zoneid || 1634 11042 Erik connp->conn_allzones || 1635 11042 Erik ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1636 11042 Erik (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE)))) 1637 11042 Erik break; 1638 11042 Erik } 1639 11042 Erik 1640 11042 Erik if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1641 11042 Erik !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1642 11042 Erik ira, connp)) { 1643 11042 Erik DTRACE_PROBE3(tx__ip__log__info__classify__udp, 1644 11042 Erik char *, "connp(1) could not receive mp(2)", 1645 11042 Erik conn_t *, connp, mblk_t *, mp); 1646 11042 Erik connp = NULL; 1647 11042 Erik } 1648 11042 Erik 1649 11042 Erik if (connp != NULL) { 1650 11042 Erik CONN_INC_REF(connp); 1651 11042 Erik mutex_exit(&connfp->connf_lock); 1652 11042 Erik return (connp); 1653 11042 Erik } 1654 11042 Erik 1655 11042 Erik /* 1656 11042 Erik * We shouldn't come here for multicast/broadcast packets 1657 11042 Erik */ 1658 11042 Erik mutex_exit(&connfp->connf_lock); 1659 11042 Erik 1660 11042 Erik break; 1661 11042 Erik 1662 11042 Erik case IPPROTO_ENCAP: 1663 11042 Erik case IPPROTO_IPV6: 1664 11042 Erik return (ipcl_iptun_classify_v4(&ipha->ipha_src, 1665 11042 Erik &ipha->ipha_dst, ipst)); 1666 11042 Erik } 1667 11042 Erik 1668 11042 Erik return (NULL); 1669 11042 Erik } 1670 11042 Erik 1671 11042 Erik conn_t * 1672 11042 Erik ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 1673 11042 Erik ip_recv_attr_t *ira, ip_stack_t *ipst) 1674 11042 Erik { 1675 11042 Erik ip6_t *ip6h; 1676 11042 Erik connf_t *connfp, *bind_connfp; 1677 11042 Erik uint16_t lport; 1678 11042 Erik uint16_t fport; 1679 11042 Erik tcpha_t *tcpha; 1680 11042 Erik uint32_t ports; 1681 11042 Erik conn_t *connp; 1682 11042 Erik uint16_t *up; 1683 11042 Erik zoneid_t zoneid = ira->ira_zoneid; 1684 11042 Erik 1685 11042 Erik ip6h = (ip6_t *)mp->b_rptr; 1686 11042 Erik 1687 11042 Erik switch (protocol) { 1688 11042 Erik case IPPROTO_TCP: 1689 11042 Erik tcpha = (tcpha_t *)&mp->b_rptr[hdr_len]; 1690 11042 Erik up = &tcpha->tha_lport; 1691 11042 Erik ports = *(uint32_t *)up; 1692 11042 Erik 1693 11042 Erik connfp = 1694 11042 Erik &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1695 11042 Erik ports, ipst)]; 1696 11042 Erik mutex_enter(&connfp->connf_lock); 1697 11042 Erik for (connp = connfp->connf_head; connp != NULL; 1698 11042 Erik connp = connp->conn_next) { 1699 11042 Erik if (IPCL_CONN_MATCH_V6(connp, protocol, 1700 11042 Erik ip6h->ip6_src, ip6h->ip6_dst, ports) && 1701 11042 Erik (connp->conn_zoneid == zoneid || 1702 11042 Erik connp->conn_allzones || 1703 11042 Erik ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1704 11042 Erik (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1705 11042 Erik (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1706 11042 Erik break; 1707 0 stevel } 1708 0 stevel 1709 0 stevel if (connp != NULL) { 1710 1676 jpk /* 1711 1676 jpk * We have a fully-bound TCP connection. 1712 1676 jpk * 1713 1676 jpk * For labeled systems, there's no need to check the 1714 1676 jpk * label here. It's known to be good as we checked 1715 1676 jpk * before allowing the connection to become bound. 1716 1676 jpk */ 1717 0 stevel CONN_INC_REF(connp); 1718 0 stevel mutex_exit(&connfp->connf_lock); 1719 0 stevel return (connp); 1720 0 stevel } 1721 0 stevel 1722 0 stevel mutex_exit(&connfp->connf_lock); 1723 0 stevel 1724 0 stevel lport = up[1]; 1725 3448 dh155122 bind_connfp = 1726 3448 dh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1727 0 stevel mutex_enter(&bind_connfp->connf_lock); 1728 0 stevel for (connp = bind_connfp->connf_head; connp != NULL; 1729 0 stevel connp = connp->conn_next) { 1730 0 stevel if (IPCL_BIND_MATCH_V6(connp, protocol, 1731 0 stevel ip6h->ip6_dst, lport) && 1732 11042 Erik (connp->conn_zoneid == zoneid || 1733 11042 Erik connp->conn_allzones || 1734 11042 Erik ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1735 11042 Erik (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1736 11042 Erik (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1737 0 stevel break; 1738 1676 jpk } 1739 1676 jpk 1740 11042 Erik if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1741 1676 jpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1742 11042 Erik ira, connp)) { 1743 1676 jpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 1744 1676 jpk char *, "connp(1) could not receive mp(2)", 1745 1676 jpk conn_t *, connp, mblk_t *, mp); 1746 1676 jpk connp = NULL; 1747 0 stevel } 1748 0 stevel 1749 0 stevel if (connp != NULL) { 1750 0 stevel /* Have a listner at least */ 1751 0 stevel CONN_INC_REF(connp); 1752 0 stevel mutex_exit(&bind_connfp->connf_lock); 1753 0 stevel return (connp); 1754 0 stevel } 1755 0 stevel 1756 0 stevel mutex_exit(&bind_connfp->connf_lock); 1757 0 stevel break; 1758 0 stevel 1759 0 stevel case IPPROTO_UDP: 1760 0 stevel up = (uint16_t *)&mp->b_rptr[hdr_len]; 1761 0 stevel lport = up[1]; 1762 0 stevel fport = up[0]; 1763 3448 dh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1764 0 stevel mutex_enter(&connfp->connf_lock); 1765 0 stevel for (connp = connfp->connf_head; connp != NULL; 1766 0 stevel connp = connp->conn_next) { 1767 0 stevel if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 1768 0 stevel fport, ip6h->ip6_src) && 1769 11042 Erik (connp->conn_zoneid == zoneid || 1770 11042 Erik connp->conn_allzones || 1771 11042 Erik ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1772 11042 Erik (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1773 11042 Erik (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1774 0 stevel break; 1775 1676 jpk } 1776 1676 jpk 1777 11042 Erik if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1778 1676 jpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1779 11042 Erik ira, connp)) { 1780 1676 jpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 1781 1676 jpk char *, "connp(1) could not receive mp(2)", 1782 1676 jpk conn_t *, connp, mblk_t *, mp); 1783 1676 jpk connp = NULL; 1784 0 stevel } 1785 0 stevel 1786 0 stevel if (connp != NULL) { 1787 0 stevel CONN_INC_REF(connp); 1788 0 stevel mutex_exit(&connfp->connf_lock); 1789 0 stevel return (connp); 1790 0 stevel } 1791 0 stevel 1792 0 stevel /* 1793 0 stevel * We shouldn't come here for multicast/broadcast packets 1794 0 stevel */ 1795 0 stevel mutex_exit(&connfp->connf_lock); 1796 0 stevel break; 1797 10616 Sebastien case IPPROTO_ENCAP: 1798 10616 Sebastien case IPPROTO_IPV6: 1799 10616 Sebastien return (ipcl_iptun_classify_v6(&ip6h->ip6_src, 1800 10616 Sebastien &ip6h->ip6_dst, ipst)); 1801 0 stevel } 1802 0 stevel 1803 0 stevel return (NULL); 1804 0 stevel } 1805 0 stevel 1806 0 stevel /* 1807 0 stevel * wrapper around ipcl_classify_(v4,v6) routines. 1808 0 stevel */ 1809 0 stevel conn_t * 1810 11042 Erik ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst) 1811 0 stevel { 1812 11042 Erik if (ira->ira_flags & IRAF_IS_IPV4) { 1813 11042 Erik return (ipcl_classify_v4(mp, ira->ira_protocol, 1814 11042 Erik ira->ira_ip_hdr_length, ira, ipst)); 1815 11042 Erik } else { 1816 11042 Erik return (ipcl_classify_v6(mp, ira->ira_protocol, 1817 11042 Erik ira->ira_ip_hdr_length, ira, ipst)); 1818 0 stevel } 1819 0 stevel } 1820 0 stevel 1821 11042 Erik /* 1822 11042 Erik * Only used to classify SCTP RAW sockets 1823 11042 Erik */ 1824 0 stevel conn_t * 1825 11042 Erik ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports, 1826 11042 Erik ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst) 1827 0 stevel { 1828 1676 jpk connf_t *connfp; 1829 0 stevel conn_t *connp; 1830 0 stevel in_port_t lport; 1831 11042 Erik int ipversion; 1832 1676 jpk const void *dst; 1833 11042 Erik zoneid_t zoneid = ira->ira_zoneid; 1834 0 stevel 1835 0 stevel lport = ((uint16_t *)&ports)[1]; 1836 11042 Erik if (ira->ira_flags & IRAF_IS_IPV4) { 1837 11042 Erik dst = (const void *)&ipha->ipha_dst; 1838 11042 Erik ipversion = IPV4_VERSION; 1839 11042 Erik } else { 1840 11042 Erik dst = (const void *)&ip6h->ip6_dst; 1841 11042 Erik ipversion = IPV6_VERSION; 1842 1676 jpk } 1843 1676 jpk 1844 3448 dh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 1845 0 stevel mutex_enter(&connfp->connf_lock); 1846 0 stevel for (connp = connfp->connf_head; connp != NULL; 1847 0 stevel connp = connp->conn_next) { 1848 0 stevel /* We don't allow v4 fallback for v6 raw socket. */ 1849 11042 Erik if (ipversion != connp->conn_ipversion) 1850 0 stevel continue; 1851 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 1852 11042 Erik !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1853 11042 Erik if (ipversion == IPV4_VERSION) { 1854 1676 jpk if (!IPCL_CONN_MATCH(connp, protocol, 1855 11042 Erik ipha->ipha_src, ipha->ipha_dst, ports)) 1856 1676 jpk continue; 1857 0 stevel } else { 1858 1676 jpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 1859 11042 Erik ip6h->ip6_src, ip6h->ip6_dst, ports)) 1860 1676 jpk continue; 1861 0 stevel } 1862 0 stevel } else { 1863 11042 Erik if (ipversion == IPV4_VERSION) { 1864 1676 jpk if (!IPCL_BIND_MATCH(connp, protocol, 1865 11042 Erik ipha->ipha_dst, lport)) 1866 1676 jpk continue; 1867 0 stevel } else { 1868 1676 jpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 1869 11042 Erik ip6h->ip6_dst, lport)) 1870 1676 jpk continue; 1871 0 stevel } 1872 0 stevel } 1873 1676 jpk 1874 11042 Erik if (connp->conn_zoneid == zoneid || 1875 11042 Erik connp->conn_allzones || 1876 11042 Erik ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1877 11042 Erik (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1878 11042 Erik (ira->ira_flags & IRAF_TX_SHARED_ADDR))) 1879 1676 jpk break; 1880 1676 jpk } 1881 11042 Erik 1882 11042 Erik if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1883 11042 Erik !tsol_receive_local(mp, dst, ipversion, ira, connp)) { 1884 1676 jpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 1885 1676 jpk char *, "connp(1) could not receive mp(2)", 1886 1676 jpk conn_t *, connp, mblk_t *, mp); 1887 1676 jpk connp = NULL; 1888 0 stevel } 1889 409 kcpoon 1890 409 kcpoon if (connp != NULL) 1891 409 kcpoon goto found; 1892 409 kcpoon mutex_exit(&connfp->connf_lock); 1893 409 kcpoon 1894 11042 Erik /* Try to look for a wildcard SCTP RAW socket match. */ 1895 3448 dh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 1896 409 kcpoon mutex_enter(&connfp->connf_lock); 1897 409 kcpoon for (connp = connfp->connf_head; connp != NULL; 1898 409 kcpoon connp = connp->conn_next) { 1899 409 kcpoon /* We don't allow v4 fallback for v6 raw socket. */ 1900 11042 Erik if (ipversion != connp->conn_ipversion) 1901 409 kcpoon continue; 1902 11042 Erik if (!IPCL_ZONE_MATCH(connp, zoneid)) 1903 11042 Erik continue; 1904 11042 Erik 1905 11042 Erik if (ipversion == IPV4_VERSION) { 1906 11042 Erik if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst)) 1907 409 kcpoon break; 1908 409 kcpoon } else { 1909 11042 Erik if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) { 1910 409 kcpoon break; 1911 409 kcpoon } 1912 409 kcpoon } 1913 0 stevel } 1914 409 kcpoon 1915 409 kcpoon if (connp != NULL) 1916 409 kcpoon goto found; 1917 409 kcpoon 1918 0 stevel mutex_exit(&connfp->connf_lock); 1919 0 stevel return (NULL); 1920 409 kcpoon 1921 409 kcpoon found: 1922 409 kcpoon ASSERT(connp != NULL); 1923 409 kcpoon CONN_INC_REF(connp); 1924 409 kcpoon mutex_exit(&connfp->connf_lock); 1925 409 kcpoon return (connp); 1926 0 stevel } 1927 0 stevel 1928 0 stevel /* ARGSUSED */ 1929 0 stevel static int 1930 5240 nordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 1931 0 stevel { 1932 0 stevel itc_t *itc = (itc_t *)buf; 1933 0 stevel conn_t *connp = &itc->itc_conn; 1934 5240 nordmark tcp_t *tcp = (tcp_t *)&itc[1]; 1935 5240 nordmark 1936 5240 nordmark bzero(connp, sizeof (conn_t)); 1937 5240 nordmark bzero(tcp, sizeof (tcp_t)); 1938 5240 nordmark 1939 5240 nordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 1940 5240 nordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 1941 8348 Eric cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 1942 11042 Erik tcp->tcp_timercache = tcp_timermp_alloc(kmflags); 1943 11042 Erik if (tcp->tcp_timercache == NULL) 1944 11042 Erik return (ENOMEM); 1945 0 stevel connp->conn_tcp = tcp; 1946 0 stevel connp->conn_flags = IPCL_TCPCONN; 1947 11042 Erik connp->conn_proto = IPPROTO_TCP; 1948 0 stevel tcp->tcp_connp = connp; 1949 11042 Erik rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 1950 11042 Erik 1951 11042 Erik connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 1952 11042 Erik if (connp->conn_ixa == NULL) { 1953 11042 Erik tcp_timermp_free(tcp); 1954 11042 Erik return (ENOMEM); 1955 11042 Erik } 1956 11042 Erik connp->conn_ixa->ixa_refcnt = 1; 1957 11042 Erik connp->conn_ixa->ixa_protocol = connp->conn_proto; 1958 11042 Erik connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 1959 0 stevel return (0); 1960 0 stevel } 1961 0 stevel 1962 0 stevel /* ARGSUSED */ 1963 0 stevel static void 1964 5240 nordmark tcp_conn_destructor(void *buf, void *cdrarg) 1965 0 stevel { 1966 5240 nordmark itc_t *itc = (itc_t *)buf; 1967 5240 nordmark conn_t *connp = &itc->itc_conn; 1968 5240 nordmark tcp_t *tcp = (tcp_t *)&itc[1]; 1969 5240 nordmark 1970 5240 nordmark ASSERT(connp->conn_flags & IPCL_TCPCONN); 1971 5240 nordmark ASSERT(tcp->tcp_connp == connp); 1972 5240 nordmark ASSERT(connp->conn_tcp == tcp); 1973 5240 nordmark tcp_timermp_free(tcp); 1974 5240 nordmark mutex_destroy(&connp->conn_lock); 1975 5240 nordmark cv_destroy(&connp->conn_cv); 1976 8348 Eric cv_destroy(&connp->conn_sq_cv); 1977 11042 Erik rw_destroy(&connp->conn_ilg_lock); 1978 11042 Erik 1979 11042 Erik /* Can be NULL if constructor failed */ 1980 11042 Erik if (connp->conn_ixa != NULL) { 1981 11042 Erik ASSERT(connp->conn_ixa->ixa_refcnt == 1); 1982 11042 Erik ASSERT(connp->conn_ixa->ixa_ire == NULL); 1983 11042 Erik ASSERT(connp->conn_ixa->ixa_nce == NULL); 1984 11042 Erik ixa_refrele(connp->conn_ixa); 1985 11042 Erik } 1986 5240 nordmark } 1987 5240 nordmark 1988 5240 nordmark /* ARGSUSED */ 1989 5240 nordmark static int 1990 5240 nordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 1991 5240 nordmark { 1992 5240 nordmark itc_t *itc = (itc_t *)buf; 1993 5240 nordmark conn_t *connp = &itc->itc_conn; 1994 5240 nordmark 1995 5240 nordmark bzero(connp, sizeof (conn_t)); 1996 5240 nordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 1997 5240 nordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 1998 5240 nordmark connp->conn_flags = IPCL_IPCCONN; 1999 11042 Erik rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2000 5240 nordmark 2001 11042 Erik connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2002 11042 Erik if (connp->conn_ixa == NULL) 2003 11042 Erik return (ENOMEM); 2004 11042 Erik connp->conn_ixa->ixa_refcnt = 1; 2005 11042 Erik connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2006 5240 nordmark return (0); 2007 5240 nordmark } 2008 5240 nordmark 2009 5240 nordmark /* ARGSUSED */ 2010 5240 nordmark static void 2011 5240 nordmark ip_conn_destructor(void *buf, void *cdrarg) 2012 5240 nordmark { 2013 5240 nordmark itc_t *itc = (itc_t *)buf; 2014 5240 nordmark conn_t *connp = &itc->itc_conn; 2015 5240 nordmark 2016 5240 nordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 2017 5240 nordmark ASSERT(connp->conn_priv == NULL); 2018 5240 nordmark mutex_destroy(&connp->conn_lock); 2019 5240 nordmark cv_destroy(&connp->conn_cv); 2020 11042 Erik rw_destroy(&connp->conn_ilg_lock); 2021 11042 Erik 2022 11042 Erik /* Can be NULL if constructor failed */ 2023 11042 Erik if (connp->conn_ixa != NULL) { 2024 11042 Erik ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2025 11042 Erik ASSERT(connp->conn_ixa->ixa_ire == NULL); 2026 11042 Erik ASSERT(connp->conn_ixa->ixa_nce == NULL); 2027 11042 Erik ixa_refrele(connp->conn_ixa); 2028 11042 Erik } 2029 5240 nordmark } 2030 5240 nordmark 2031 5240 nordmark /* ARGSUSED */ 2032 5240 nordmark static int 2033 5240 nordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2034 5240 nordmark { 2035 5240 nordmark itc_t *itc = (itc_t *)buf; 2036 5240 nordmark conn_t *connp = &itc->itc_conn; 2037 5240 nordmark udp_t *udp = (udp_t *)&itc[1]; 2038 5240 nordmark 2039 5240 nordmark bzero(connp, sizeof (conn_t)); 2040 5240 nordmark bzero(udp, sizeof (udp_t)); 2041 5240 nordmark 2042 5240 nordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2043 5240 nordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2044 5240 nordmark connp->conn_udp = udp; 2045 5240 nordmark connp->conn_flags = IPCL_UDPCONN; 2046 11042 Erik connp->conn_proto = IPPROTO_UDP; 2047 5240 nordmark udp->udp_connp = connp; 2048 11042 Erik rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2049 11042 Erik connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2050 11042 Erik if (connp->conn_ixa == NULL) 2051 11042 Erik return (ENOMEM); 2052 11042 Erik connp->conn_ixa->ixa_refcnt = 1; 2053 11042 Erik connp->conn_ixa->ixa_protocol = connp->conn_proto; 2054 11042 Erik connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2055 5240 nordmark return (0); 2056 5240 nordmark } 2057 5240 nordmark 2058 5240 nordmark /* ARGSUSED */ 2059 5240 nordmark static void 2060 5240 nordmark udp_conn_destructor(void *buf, void *cdrarg) 2061 5240 nordmark { 2062 5240 nordmark itc_t *itc = (itc_t *)buf; 2063 5240 nordmark conn_t *connp = &itc->itc_conn; 2064 5240 nordmark udp_t *udp = (udp_t *)&itc[1]; 2065 5240 nordmark 2066 5240 nordmark ASSERT(connp->conn_flags & IPCL_UDPCONN); 2067 5240 nordmark ASSERT(udp->udp_connp == connp); 2068 5240 nordmark ASSERT(connp->conn_udp == udp); 2069 5240 nordmark mutex_destroy(&connp->conn_lock); 2070 5240 nordmark cv_destroy(&connp->conn_cv); 2071 11042 Erik rw_destroy(&connp->conn_ilg_lock); 2072 11042 Erik 2073 11042 Erik /* Can be NULL if constructor failed */ 2074 11042 Erik if (connp->conn_ixa != NULL) { 2075 11042 Erik ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2076 11042 Erik ASSERT(connp->conn_ixa->ixa_ire == NULL); 2077 11042 Erik ASSERT(connp->conn_ixa->ixa_nce == NULL); 2078 11042 Erik ixa_refrele(connp->conn_ixa); 2079 11042 Erik } 2080 5240 nordmark } 2081 5240 nordmark 2082 5240 nordmark /* ARGSUSED */ 2083 5240 nordmark static int 2084 5240 nordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2085 5240 nordmark { 2086 5240 nordmark itc_t *itc = (itc_t *)buf; 2087 5240 nordmark conn_t *connp = &itc->itc_conn; 2088 5240 nordmark icmp_t *icmp = (icmp_t *)&itc[1]; 2089 5240 nordmark 2090 5240 nordmark bzero(connp, sizeof (conn_t)); 2091 5240 nordmark bzero(icmp, sizeof (icmp_t)); 2092 5240 nordmark 2093 5240 nordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2094 5240 nordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2095 5240 nordmark connp->conn_icmp = icmp; 2096 5240 nordmark connp->conn_flags = IPCL_RAWIPCONN; 2097 11042 Erik connp->conn_proto = IPPROTO_ICMP; 2098 5240 nordmark icmp->icmp_connp = connp; 2099 11042 Erik rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2100 11042 Erik connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2101 11042 Erik if (connp->conn_ixa == NULL) 2102 11042 Erik return (ENOMEM); 2103 11042 Erik connp->conn_ixa->ixa_refcnt = 1; 2104 11042 Erik connp->conn_ixa->ixa_protocol = connp->conn_proto; 2105 11042 Erik connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2106 5240 nordmark return (0); 2107 5240 nordmark } 2108 5240 nordmark 2109 5240 nordmark /* ARGSUSED */ 2110 5240 nordmark static void 2111 5240 nordmark rawip_conn_destructor(void *buf, void *cdrarg) 2112 5240 nordmark { 2113 5240 nordmark itc_t *itc = (itc_t *)buf; 2114 5240 nordmark conn_t *connp = &itc->itc_conn; 2115 5240 nordmark icmp_t *icmp = (icmp_t *)&itc[1]; 2116 5240 nordmark 2117 5240 nordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2118 5240 nordmark ASSERT(icmp->icmp_connp == connp); 2119 5240 nordmark ASSERT(connp->conn_icmp == icmp); 2120 5240 nordmark mutex_destroy(&connp->conn_lock); 2121 5240 nordmark cv_destroy(&connp->conn_cv); 2122 11042 Erik rw_destroy(&connp->conn_ilg_lock); 2123 11042 Erik 2124 11042 Erik /* Can be NULL if constructor failed */ 2125 11042 Erik if (connp->conn_ixa != NULL) { 2126 11042 Erik ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2127 11042 Erik ASSERT(connp->conn_ixa->ixa_ire == NULL); 2128 11042 Erik ASSERT(connp->conn_ixa->ixa_nce == NULL); 2129 11042 Erik ixa_refrele(connp->conn_ixa); 2130 11042 Erik } 2131 5240 nordmark } 2132 5240 nordmark 2133 5240 nordmark /* ARGSUSED */ 2134 5240 nordmark static int 2135 5240 nordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 2136 5240 nordmark { 2137 5240 nordmark itc_t *itc = (itc_t *)buf; 2138 5240 nordmark conn_t *connp = &itc->itc_conn; 2139 5240 nordmark rts_t *rts = (rts_t *)&itc[1]; 2140 5240 nordmark 2141 5240 nordmark bzero(connp, sizeof (conn_t)); 2142 5240 nordmark bzero(rts, sizeof (rts_t)); 2143 5240 nordmark 2144 5240 nordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2145 5240 nordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2146 5240 nordmark connp->conn_rts = rts; 2147 5240 nordmark connp->conn_flags = IPCL_RTSCONN; 2148 5240 nordmark rts->rts_connp = connp; 2149 11042 Erik rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2150 11042 Erik connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2151 11042 Erik if (connp->conn_ixa == NULL) 2152 11042 Erik return (ENOMEM); 2153 11042 Erik connp->conn_ixa->ixa_refcnt = 1; 2154 11042 Erik connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2155 5240 nordmark return (0); 2156 5240 nordmark } 2157 5240 nordmark 2158 5240 nordmark /* ARGSUSED */ 2159 5240 nordmark static void 2160 5240 nordmark rts_conn_destructor(void *buf, void *cdrarg) 2161 5240 nordmark { 2162 5240 nordmark itc_t *itc = (itc_t *)buf; 2163 5240 nordmark conn_t *connp = &itc->itc_conn; 2164 5240 nordmark rts_t *rts = (rts_t *)&itc[1]; 2165 5240 nordmark 2166 5240 nordmark ASSERT(connp->conn_flags & IPCL_RTSCONN); 2167 5240 nordmark ASSERT(rts->rts_connp == connp); 2168 5240 nordmark ASSERT(connp->conn_rts == rts); 2169 5240 nordmark mutex_destroy(&connp->conn_lock); 2170 5240 nordmark cv_destroy(&connp->conn_cv); 2171 11042 Erik rw_destroy(&connp->conn_ilg_lock); 2172 11042 Erik 2173 11042 Erik /* Can be NULL if constructor failed */ 2174 11042 Erik if (connp->conn_ixa != NULL) { 2175 11042 Erik ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2176 11042 Erik ASSERT(connp->conn_ixa->ixa_ire == NULL); 2177 11042 Erik ASSERT(connp->conn_ixa->ixa_nce == NULL); 2178 11042 Erik ixa_refrele(connp->conn_ixa); 2179 11042 Erik } 2180 5240 nordmark } 2181 8348 Eric 2182 5240 nordmark /* 2183 5240 nordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers 2184 5240 nordmark * in the conn_t. 2185 11042 Erik * 2186 11042 Erik * Below we list all the pointers in the conn_t as a documentation aid. 2187 11042 Erik * The ones that we can not ASSERT to be NULL are #ifdef'ed out. 2188 11042 Erik * If you add any pointers to the conn_t please add an ASSERT here 2189 11042 Erik * and #ifdef it out if it can't be actually asserted to be NULL. 2190 11042 Erik * In any case, we bzero most of the conn_t at the end of the function. 2191 5240 nordmark */ 2192 5240 nordmark void 2193 5240 nordmark ipcl_conn_cleanup(conn_t *connp) 2194 5240 nordmark { 2195 11042 Erik ip_xmit_attr_t *ixa; 2196 11042 Erik 2197 5240 nordmark ASSERT(connp->conn_latch == NULL); 2198 11042 Erik ASSERT(connp->conn_latch_in_policy == NULL); 2199 11042 Erik ASSERT(connp->conn_latch_in_action == NULL); 2200 5240 nordmark #ifdef notdef 2201 5240 nordmark ASSERT(connp->conn_rq == NULL); 2202 5240 nordmark ASSERT(connp->conn_wq == NULL); 2203 5240 nordmark #endif 2204 5240 nordmark ASSERT(connp->conn_cred == NULL); 2205 5240 nordmark ASSERT(connp->conn_g_fanout == NULL); 2206 5240 nordmark ASSERT(connp->conn_g_next == NULL); 2207 5240 nordmark ASSERT(connp->conn_g_prev == NULL); 2208 5240 nordmark ASSERT(connp->conn_policy == NULL); 2209 5240 nordmark ASSERT(connp->conn_fanout == NULL); 2210 5240 nordmark ASSERT(connp->conn_next == NULL); 2211 5240 nordmark ASSERT(connp->conn_prev == NULL); 2212 5240 nordmark ASSERT(connp->conn_oper_pending_ill == NULL); 2213 5240 nordmark ASSERT(connp->conn_ilg == NULL); 2214 5240 nordmark ASSERT(connp->conn_drain_next == NULL); 2215 5240 nordmark ASSERT(connp->conn_drain_prev == NULL); 2216 5277 nordmark #ifdef notdef 2217 5277 nordmark /* conn_idl is not cleared when removed from idl list */ 2218 5240 nordmark ASSERT(connp->conn_idl == NULL); 2219 5277 nordmark #endif 2220 5240 nordmark ASSERT(connp->conn_ipsec_opt_mp == NULL); 2221 11042 Erik #ifdef notdef 2222 11042 Erik /* conn_netstack is cleared by the caller; needed by ixa_cleanup */ 2223 5240 nordmark ASSERT(connp->conn_netstack == NULL); 2224 11042 Erik #endif 2225 5240 nordmark 2226 8348 Eric ASSERT(connp->conn_helper_info == NULL); 2227 11042 Erik ASSERT(connp->conn_ixa != NULL); 2228 11042 Erik ixa = connp->conn_ixa; 2229 11042 Erik ASSERT(ixa->ixa_refcnt == 1); 2230 11042 Erik /* Need to preserve ixa_protocol */ 2231 11042 Erik ixa_cleanup(ixa); 2232 11042 Erik ixa->ixa_flags = 0; 2233 11042 Erik 2234 5240 nordmark /* Clear out the conn_t fields that are not preserved */ 2235 5240 nordmark bzero(&connp->conn_start_clr, 2236 5240 nordmark sizeof (conn_t) - 2237 5240 nordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 2238 0 stevel } 2239 0 stevel 2240 0 stevel /* 2241 0 stevel * All conns are inserted in a global multi-list for the benefit of 2242 0 stevel * walkers. The walk is guaranteed to walk all open conns at the time 2243 0 stevel * of the start of the walk exactly once. This property is needed to 2244 0 stevel * achieve some cleanups during unplumb of interfaces. This is achieved 2245 0 stevel * as follows. 2246 0 stevel * 2247 0 stevel * ipcl_conn_create and ipcl_conn_destroy are the only functions that 2248 0 stevel * call the insert and delete functions below at creation and deletion 2249 0 stevel * time respectively. The conn never moves or changes its position in this 2250 0 stevel * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 2251 0 stevel * won't increase due to walkers, once the conn deletion has started. Note 2252 0 stevel * that we can't remove the conn from the global list and then wait for 2253 0 stevel * the refcnt to drop to zero, since walkers would then see a truncated 2254 0 stevel * list. CONN_INCIPIENT ensures that walkers don't start looking at 2255 0 stevel * conns until ip_open is ready to make them globally visible. 2256 0 stevel * The global round robin multi-list locks are held only to get the 2257 0 stevel * next member/insertion/deletion and contention should be negligible 2258 0 stevel * if the multi-list is much greater than the number of cpus. 2259 0 stevel */ 2260 0 stevel void 2261 0 stevel ipcl_globalhash_insert(conn_t *connp) 2262 0 stevel { 2263 0 stevel int index; 2264 3448 dh155122 struct connf_s *connfp; 2265 3448 dh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2266 0 stevel 2267 0 stevel /* 2268 0 stevel * No need for atomic here. Approximate even distribution 2269 0 stevel * in the global lists is sufficient. 2270 0 stevel */ 2271 3448 dh155122 ipst->ips_conn_g_index++; 2272 3448 dh155122 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 2273 0 stevel 2274 0 stevel connp->conn_g_prev = NULL; 2275 0 stevel /* 2276 0 stevel * Mark as INCIPIENT, so that walkers will ignore this 2277 0 stevel * for now, till ip_open is ready to make it visible globally. 2278 0 stevel */ 2279 0 stevel connp->conn_state_flags |= CONN_INCIPIENT; 2280 0 stevel 2281 3448 dh155122 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 2282 0 stevel /* Insert at the head of the list */ 2283 3448 dh155122 mutex_enter(&connfp->connf_lock); 2284 3448 dh155122 connp->conn_g_next = connfp->connf_head; 2285 0 stevel if (connp->conn_g_next != NULL) 2286 0 stevel connp->conn_g_next->conn_g_prev = connp; 2287 3448 dh155122 connfp->connf_head = connp; 2288 0 stevel 2289 0 stevel /* The fanout bucket this conn points to */ 2290 3448 dh155122 connp->conn_g_fanout = connfp; 2291 0 stevel 2292 3448 dh155122 mutex_exit(&connfp->connf_lock); 2293 0 stevel } 2294 0 stevel 2295 0 stevel void 2296 0 stevel ipcl_globalhash_remove(conn_t *connp) 2297 0 stevel { 2298 3448 dh155122 struct connf_s *connfp; 2299 3448 dh155122 2300 0 stevel /* 2301 0 stevel * We were never inserted in the global multi list. 2302 0 stevel * IPCL_NONE variety is never inserted in the global multilist 2303 0 stevel * since it is presumed to not need any cleanup and is transient. 2304 0 stevel */ 2305 0 stevel if (connp->conn_g_fanout == NULL) 2306 0 stevel return; 2307 0 stevel 2308 3448 dh155122 connfp = connp->conn_g_fanout; 2309 3448 dh155122 mutex_enter(&connfp->connf_lock); 2310 0 stevel if (connp->conn_g_prev != NULL) 2311 0 stevel connp->conn_g_prev->conn_g_next = connp->conn_g_next; 2312 0 stevel else 2313 3448 dh155122 connfp->connf_head = connp->conn_g_next; 2314 0 stevel if (connp->conn_g_next != NULL) 2315 0 stevel connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2316 3448 dh155122 mutex_exit(&connfp->connf_lock); 2317 0 stevel 2318 0 stevel /* Better to stumble on a null pointer than to corrupt memory */ 2319 0 stevel connp->conn_g_next = NULL; 2320 0 stevel connp->conn_g_prev = NULL; 2321 5240 nordmark connp->conn_g_fanout = NULL; 2322 0 stevel } 2323 0 stevel 2324 0 stevel /* 2325 0 stevel * Walk the list of all conn_t's in the system, calling the function provided 2326 11042 Erik * With the specified argument for each. 2327 0 stevel * Applies to both IPv4 and IPv6. 2328 0 stevel * 2329 11042 Erik * CONNs may hold pointers to ills (conn_dhcpinit_ill and 2330 11042 Erik * conn_oper_pending_ill). To guard against stale pointers 2331 0 stevel * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 2332 0 stevel * unplumbed or removed. New conn_t's that are created while we are walking 2333 0 stevel * may be missed by this walk, because they are not necessarily inserted 2334 0 stevel * at the tail of the list. They are new conn_t's and thus don't have any 2335 0 stevel * stale pointers. The CONN_CLOSING flag ensures that no new reference 2336 0 stevel * is created to the struct that is going away. 2337 0 stevel */ 2338 0 stevel void 2339 3448 dh155122 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 2340 0 stevel { 2341 0 stevel int i; 2342 0 stevel conn_t *connp; 2343 0 stevel conn_t *prev_connp; 2344 0 stevel 2345 0 stevel for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2346 3448 dh155122 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2347 0 stevel prev_connp = NULL; 2348 3448 dh155122 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 2349 0 stevel while (connp != NULL) { 2350 0 stevel mutex_enter(&connp->conn_lock); 2351 0 stevel if (connp->conn_state_flags & 2352 0 stevel (CONN_CONDEMNED | CONN_INCIPIENT)) { 2353 0 stevel mutex_exit(&connp->conn_lock); 2354 0 stevel connp = connp->conn_g_next; 2355 0 stevel continue; 2356 0 stevel } 2357 0 stevel CONN_INC_REF_LOCKED(connp); 2358 0 stevel mutex_exit(&connp->conn_lock); 2359 3448 dh155122 mutex_exit( 2360 3448 dh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2361 0 stevel (*func)(connp, arg); 2362 0 stevel if (prev_connp != NULL) 2363 0 stevel CONN_DEC_REF(prev_connp); 2364 3448 dh155122 mutex_enter( 2365 3448 dh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2366 0 stevel prev_connp = connp; 2367 0 stevel connp = connp->conn_g_next; 2368 0 stevel } 2369 3448 dh155122 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2370 0 stevel if (prev_connp != NULL) 2371 0 stevel CONN_DEC_REF(prev_connp); 2372 0 stevel } 2373 0 stevel } 2374 0 stevel 2375 0 stevel /* 2376 0 stevel * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 2377 0 stevel * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2378 0 stevel * held; caller must call CONN_DEC_REF. Only checks for connected entries 2379 2323 ethindra * (peer tcp in ESTABLISHED state). 2380 0 stevel */ 2381 0 stevel conn_t * 2382 11042 Erik ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha, 2383 3448 dh155122 ip_stack_t *ipst) 2384 0 stevel { 2385 0 stevel uint32_t ports; 2386 0 stevel uint16_t *pports = (uint16_t *)&ports; 2387 0 stevel connf_t *connfp; 2388 0 stevel conn_t *tconnp; 2389 0 stevel boolean_t zone_chk; 2390 0 stevel 2391 0 stevel /* 2392 0 stevel * If either the source of destination address is loopback, then 2393 0 stevel * both endpoints must be in the same Zone. Otherwise, both of 2394 0 stevel * the addresses are system-wide unique (tcp is in ESTABLISHED 2395 0 stevel * state) and the endpoints may reside in different Zones. 2396 0 stevel */ 2397 0 stevel zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 2398 0 stevel ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 2399 0 stevel 2400 11042 Erik pports[0] = tcpha->tha_fport; 2401 11042 Erik pports[1] = tcpha->tha_lport; 2402 0 stevel 2403 3448 dh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2404 3448 dh155122 ports, ipst)]; 2405 0 stevel 2406 0 stevel mutex_enter(&connfp->connf_lock); 2407 0 stevel for (tconnp = connfp->connf_head; tconnp != NULL; 2408 0 stevel tconnp = tconnp->conn_next) { 2409 0 stevel 2410 0 stevel if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2411 0 stevel ipha->ipha_dst, ipha->ipha_src, ports) && 2412 2323 ethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2413 0 stevel (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2414 0 stevel 2415 0 stevel ASSERT(tconnp != connp); 2416 0 stevel CONN_INC_REF(tconnp); 2417 0 stevel mutex_exit(&connfp->connf_lock); 2418 0 stevel return (tconnp); 2419 0 stevel } 2420 0 stevel } 2421 0 stevel mutex_exit(&connfp->connf_lock); 2422 0 stevel return (NULL); 2423 0 stevel } 2424 0 stevel 2425 0 stevel /* 2426 0 stevel * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 2427 0 stevel * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2428 0 stevel * held; caller must call CONN_DEC_REF. Only checks for connected entries 2429 2323 ethindra * (peer tcp in ESTABLISHED state). 2430 0 stevel */ 2431 0 stevel conn_t * 2432 11042 Erik ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha, 2433 3448 dh155122 ip_stack_t *ipst) 2434 0 stevel { 2435 0 stevel uint32_t ports; 2436 0 stevel uint16_t *pports = (uint16_t *)&ports; 2437 0 stevel connf_t *connfp; 2438 0 stevel conn_t *tconnp; 2439 0 stevel boolean_t zone_chk; 2440 0 stevel 2441 0 stevel /* 2442 0 stevel * If either the source of destination address is loopback, then 2443 0 stevel * both endpoints must be in the same Zone. Otherwise, both of 2444 0 stevel * the addresses are system-wide unique (tcp is in ESTABLISHED 2445 0 stevel * state) and the endpoints may reside in different Zones. We 2446 0 stevel * don't do Zone check for link local address(es) because the 2447 0 stevel * current Zone implementation treats each link local address as 2448 0 stevel * being unique per system node, i.e. they belong to global Zone. 2449 0 stevel */ 2450 0 stevel zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 2451 0 stevel IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 2452 0 stevel 2453 11042 Erik pports[0] = tcpha->tha_fport; 2454 11042 Erik pports[1] = tcpha->tha_lport; 2455 0 stevel 2456 3448 dh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2457 3448 dh155122 ports, ipst)]; 2458 0 stevel 2459 0 stevel mutex_enter(&connfp->connf_lock); 2460 0 stevel for (tconnp = connfp->connf_head; tconnp != NULL; 2461 0 stevel tconnp = tconnp->conn_next) { 2462 0 stevel 2463 11042 Erik /* We skip conn_bound_if check here as this is loopback tcp */ 2464 0 stevel if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2465 0 stevel ip6h->ip6_dst, ip6h->ip6_src, ports) && 2466 2323 ethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2467 0 stevel (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2468 0 stevel 2469 0 stevel ASSERT(tconnp != connp); 2470 0 stevel CONN_INC_REF(tconnp); 2471 0 stevel mutex_exit(&connfp->connf_lock); 2472 0 stevel return (tconnp); 2473 0 stevel } 2474 0 stevel } 2475 0 stevel mutex_exit(&connfp->connf_lock); 2476 0 stevel return (NULL); 2477 0 stevel } 2478 0 stevel 2479 0 stevel /* 2480 0 stevel * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2481 0 stevel * Returns with conn reference held. Caller must call CONN_DEC_REF. 2482 0 stevel * Only checks for connected entries i.e. no INADDR_ANY checks. 2483 0 stevel */ 2484 0 stevel conn_t * 2485 11042 Erik ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state, 2486 3448 dh155122 ip_stack_t *ipst) 2487 0 stevel { 2488 0 stevel uint32_t ports; 2489 0 stevel uint16_t *pports; 2490 0 stevel connf_t *connfp; 2491 0 stevel conn_t *tconnp; 2492 0 stevel 2493 0 stevel pports = (uint16_t *)&ports; 2494 11042 Erik pports[0] = tcpha->tha_fport; 2495 11042 Erik pports[1] = tcpha->tha_lport; 2496 0 stevel 2497 3448 dh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2498 4691 kcpoon ports, ipst)]; 2499 0 stevel 2500 0 stevel mutex_enter(&connfp->connf_lock); 2501 0 stevel for (tconnp = connfp->connf_head; tconnp != NULL; 2502 0 stevel tconnp = tconnp->conn_next) { 2503 0 stevel 2504 0 stevel if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2505 0 stevel ipha->ipha_dst, ipha->ipha_src, ports) && 2506 0 stevel tconnp->conn_tcp->tcp_state >= min_state) { 2507 0 stevel 2508 0 stevel CONN_INC_REF(tconnp); 2509 0 stevel mutex_exit(&connfp->connf_lock); 2510 0 stevel return (tconnp); 2511 0 stevel } 2512 0 stevel } 2513 0 stevel mutex_exit(&connfp->connf_lock); 2514 0 stevel return (NULL); 2515 0 stevel } 2516 0 stevel 2517 0 stevel /* 2518 0 stevel * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2519 0 stevel * Returns with conn reference held. Caller must call CONN_DEC_REF. 2520 0 stevel * Only checks for connected entries i.e. no INADDR_ANY checks. 2521 0 stevel * Match on ifindex in addition to addresses. 2522 0 stevel */ 2523 0 stevel conn_t * 2524 0 stevel ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2525 3448 dh155122 uint_t ifindex, ip_stack_t *ipst) 2526 0 stevel { 2527 0 stevel tcp_t *tcp; 2528 0 stevel uint32_t ports; 2529 0 stevel uint16_t *pports; 2530 0 stevel connf_t *connfp; 2531 0 stevel conn_t *tconnp; 2532 0 stevel 2533 0 stevel pports = (uint16_t *)&ports; 2534 0 stevel pports[0] = tcpha->tha_fport; 2535 0 stevel pports[1] = tcpha->tha_lport; 2536 0 stevel 2537 3448 dh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2538 4691 kcpoon ports, ipst)]; 2539 0 stevel 2540 0 stevel mutex_enter(&connfp->connf_lock); 2541 0 stevel for (tconnp = connfp->connf_head; tconnp != NULL; 2542 0 stevel tconnp = tconnp->conn_next) { 2543 0 stevel 2544 0 stevel tcp = tconnp->conn_tcp; 2545 0 stevel if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2546 0 stevel ip6h->ip6_dst, ip6h->ip6_src, ports) && 2547 0 stevel tcp->tcp_state >= min_state && 2548 11042 Erik (tconnp->conn_bound_if == 0 || 2549 11042 Erik tconnp->conn_bound_if == ifindex)) { 2550 0 stevel 2551 0 stevel CONN_INC_REF(tconnp); 2552 0 stevel mutex_exit(&connfp->connf_lock); 2553 0 stevel return (tconnp); 2554 0 stevel } 2555 0 stevel } 2556 0 stevel mutex_exit(&connfp->connf_lock); 2557 0 stevel return (NULL); 2558 0 stevel } 2559 0 stevel 2560 0 stevel /* 2561 1676 jpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 2562 1676 jpk * a listener when changing state. 2563 0 stevel */ 2564 0 stevel conn_t * 2565 3448 dh155122 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2566 3448 dh155122 ip_stack_t *ipst) 2567 0 stevel { 2568 0 stevel connf_t *bind_connfp; 2569 0 stevel conn_t *connp; 2570 0 stevel tcp_t *tcp; 2571 0 stevel 2572 0 stevel /* 2573 0 stevel * Avoid false matches for packets sent to an IP destination of 2574 0 stevel * all zeros. 2575 0 stevel */ 2576 0 stevel if (laddr == 0) 2577 0 stevel return (NULL); 2578 1676 jpk 2579 1676 jpk ASSERT(zoneid != ALL_ZONES); 2580 0 stevel 2581 3448 dh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2582 0 stevel mutex_enter(&bind_connfp->connf_lock); 2583 0 stevel for (connp = bind_connfp->connf_head; connp != NULL; 2584 0 stevel connp = connp->conn_next) { 2585 0 stevel tcp = connp->conn_tcp; 2586 0 stevel if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 2587 2263 sommerfe IPCL_ZONE_MATCH(connp, zoneid) && 2588 0 stevel (tcp->tcp_listener == NULL)) { 2589 0 stevel CONN_INC_REF(connp); 2590 0 stevel mutex_exit(&bind_connfp->connf_lock); 2591 0 stevel return (connp); 2592 0 stevel } 2593 0 stevel } 2594 0 stevel mutex_exit(&bind_connfp->connf_lock); 2595 0 stevel return (NULL); 2596 0 stevel } 2597 0 stevel 2598 1676 jpk /* 2599 1676 jpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 2600 1676 jpk * a listener when changing state. 2601 1676 jpk */ 2602 0 stevel conn_t * 2603 0 stevel ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2604 3448 dh155122 zoneid_t zoneid, ip_stack_t *ipst) 2605 0 stevel { 2606 0 stevel connf_t *bind_connfp; 2607 0 stevel conn_t *connp = NULL; 2608 0 stevel tcp_t *tcp; 2609 0 stevel 2610 0 stevel /* 2611 0 stevel * Avoid false matches for packets sent to an IP destination of 2612 0 stevel * all zeros. 2613 0 stevel */ 2614 0 stevel if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 2615 0 stevel return (NULL); 2616 0 stevel 2617 1676 jpk ASSERT(zoneid != ALL_ZONES); 2618 0 stevel 2619 3448 dh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2620 0 stevel mutex_enter(&bind_connfp->connf_lock); 2621 0 stevel for (connp = bind_connfp->connf_head; connp != NULL; 2622 0 stevel connp = connp->conn_next) { 2623 0 stevel tcp = connp->conn_tcp; 2624 0 stevel if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 2625 2263 sommerfe IPCL_ZONE_MATCH(connp, zoneid) && 2626 11042 Erik (connp->conn_bound_if == 0 || 2627 11042 Erik connp->conn_bound_if == ifindex) && 2628 0 stevel tcp->tcp_listener == NULL) { 2629 0 stevel CONN_INC_REF(connp); 2630 0 stevel mutex_exit(&bind_connfp->connf_lock); 2631 0 stevel return (connp); 2632 0 stevel } 2633 0 stevel } 2634 0 stevel mutex_exit(&bind_connfp->connf_lock); 2635 0 stevel return (NULL); 2636 0 stevel } 2637 0 stevel 2638 741 masputra /* 2639 741 masputra * ipcl_get_next_conn 2640 741 masputra * get the next entry in the conn global list 2641 741 masputra * and put a reference on the next_conn. 2642 741 masputra * decrement the reference on the current conn. 2643 741 masputra * 2644 741 masputra * This is an iterator based walker function that also provides for 2645 741 masputra * some selection by the caller. It walks through the conn_hash bucket 2646 741 masputra * searching for the next valid connp in the list, and selects connections 2647 741 masputra * that are neither closed nor condemned. It also REFHOLDS the conn 2648 741 masputra * thus ensuring that the conn exists when the caller uses the conn. 2649 741 masputra */ 2650 741 masputra conn_t * 2651 741 masputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2652 741 masputra { 2653 741 masputra conn_t *next_connp; 2654 741 masputra 2655 741 masputra if (connfp == NULL) 2656 741 masputra return (NULL); 2657 741 masputra 2658 741 masputra mutex_enter(&connfp->connf_lock); 2659 741 masputra 2660 741 masputra next_connp = (connp == NULL) ? 2661 741 masputra connfp->connf_head : connp->conn_g_next; 2662 741 masputra 2663 741 masputra while (next_connp != NULL) { 2664 741 masputra mutex_enter(&next_connp->conn_lock); 2665 741 masputra if (!(next_connp->conn_flags & conn_flags) || 2666 741 masputra (next_connp->conn_state_flags & 2667 741 masputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2668 741 masputra /* 2669 741 masputra * This conn has been condemned or 2670 741 masputra * is closing, or the flags don't match 2671 741 masputra */ 2672 741 masputra mutex_exit(&next_connp->conn_lock); 2673 741 masputra next_connp = next_connp->conn_g_next; 2674 741 masputra continue; 2675 741 masputra } 2676 741 masputra CONN_INC_REF_LOCKED(next_connp); 2677 741 masputra mutex_exit(&next_connp->conn_lock); 2678 741 masputra break; 2679 741 masputra } 2680 741 masputra 2681 741 masputra mutex_exit(&connfp->connf_lock); 2682 741 masputra 2683 741 masputra if (connp != NULL) 2684 741 masputra CONN_DEC_REF(connp); 2685 741 masputra 2686 741 masputra return (next_connp); 2687 741 masputra } 2688 741 masputra 2689 0 stevel #ifdef CONN_DEBUG 2690 0 stevel /* 2691 0 stevel * Trace of the last NBUF refhold/refrele 2692 0 stevel */ 2693 0 stevel int 2694 0 stevel conn_trace_ref(conn_t *connp) 2695 0 stevel { 2696 0 stevel int last; 2697 0 stevel conn_trace_t *ctb; 2698 0 stevel 2699 0 stevel ASSERT(MUTEX_HELD(&connp->conn_lock)); 2700 0 stevel last = connp->conn_trace_last; 2701 0 stevel last++; 2702 0 stevel if (last == CONN_TRACE_MAX) 2703 0 stevel last = 0; 2704 0 stevel 2705 0 stevel ctb = &connp->conn_trace_buf[last]; 2706 5023 carlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 2707 0 stevel connp->conn_trace_last = last; 2708 0 stevel return (1); 2709 0 stevel } 2710 0 stevel 2711 0 stevel int 2712 0 stevel conn_untrace_ref(conn_t *connp) 2713 0 stevel { 2714 0 stevel int last; 2715 0 stevel conn_trace_t *ctb; 2716 0 stevel 2717 0 stevel ASSERT(MUTEX_HELD(&connp->conn_lock)); 2718 0 stevel last = connp->conn_trace_last; 2719 0 stevel last++; 2720 0 stevel if (last == CONN_TRACE_MAX) 2721 0 stevel last = 0; 2722 0 stevel 2723 0 stevel ctb = &connp->conn_trace_buf[last]; 2724 5023 carlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 2725 0 stevel connp->conn_trace_last = last; 2726 0 stevel return (1); 2727 0 stevel } 2728 0 stevel #endif 2729