1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1503 ericheng * Common Development and Distribution License (the "License"). 6 1503 ericheng * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 9210 Thirumalai * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel 26 0 stevel /* 27 0 stevel * IP interface to squeues. 28 0 stevel * 29 8275 Eric * IP uses squeues to force serialization of packets, both incoming and 30 8275 Eric * outgoing. Each squeue is associated with a connection instance (conn_t) 31 8275 Eric * above, and a soft ring (if enabled) below. Each CPU will have a default 32 8275 Eric * squeue for outbound connections, and each soft ring of an interface will 33 8275 Eric * have an squeue to which it sends incoming packets. squeues are never 34 8275 Eric * destroyed, and if they become unused they are kept around against future 35 8275 Eric * needs. 36 0 stevel * 37 8275 Eric * IP organizes its squeues using squeue sets (squeue_set_t). For each CPU 38 8275 Eric * in the system there will be one squeue set, all of whose squeues will be 39 8275 Eric * bound to that CPU, plus one additional set known as the unbound set. Sets 40 8275 Eric * associated with CPUs will have one default squeue, for outbound 41 8275 Eric * connections, and a linked list of squeues used by various NICs for inbound 42 8275 Eric * packets. The unbound set also has a linked list of squeues, but no default 43 8275 Eric * squeue. 44 8275 Eric * 45 8275 Eric * When a CPU goes offline its squeue set is destroyed, and all its squeues 46 8275 Eric * are moved to the unbound set. When a CPU comes online, a new squeue set is 47 8275 Eric * created and the default set is searched for a default squeue formerly bound 48 8275 Eric * to this CPU. If no default squeue is found, a new one is created. 49 8275 Eric * 50 8275 Eric * Two fields of the squeue_t, namely sq_next and sq_set, are owned by IP 51 8275 Eric * and not the squeue code. squeue.c will not touch them, and we can modify 52 8275 Eric * them without holding the squeue lock because of the guarantee that squeues 53 8275 Eric * are never destroyed. ip_squeue locks must be held, however. 54 8275 Eric * 55 8275 Eric * All the squeue sets are protected by a single lock, the sqset_lock. This 56 8275 Eric * is also used to protect the sq_next and sq_set fields of an squeue_t. 57 8275 Eric * 58 8275 Eric * The lock order is: cpu_lock --> ill_lock --> sqset_lock --> sq_lock 59 0 stevel * 60 0 stevel * There are two modes of associating connection with squeues. The first mode 61 0 stevel * associates each connection with the CPU that creates the connection (either 62 0 stevel * during open time or during accept time). The second mode associates each 63 0 stevel * connection with a random CPU, effectively distributing load over all CPUs 64 0 stevel * and all squeues in the system. The mode is controlled by the 65 0 stevel * ip_squeue_fanout variable. 66 0 stevel * 67 0 stevel * NOTE: The fact that there is an association between each connection and 68 0 stevel * squeue and squeue and CPU does not mean that each connection is always 69 0 stevel * processed on this CPU and on this CPU only. Any thread calling squeue_enter() 70 0 stevel * may process the connection on whatever CPU it is scheduled. The squeue to CPU 71 0 stevel * binding is only relevant for the worker thread. 72 0 stevel * 73 0 stevel * INTERFACE: 74 0 stevel * 75 8275 Eric * squeue_t *ip_squeue_get(ill_rx_ring_t) 76 0 stevel * 77 8275 Eric * Returns the squeue associated with an ill receive ring. If the ring is 78 8275 Eric * not bound to a CPU, and we're currently servicing the interrupt which 79 8275 Eric * generated the packet, then bind the squeue to CPU. 80 0 stevel * 81 0 stevel * 82 0 stevel * DR Notes 83 0 stevel * ======== 84 0 stevel * 85 0 stevel * The ip_squeue_init() registers a call-back function with the CPU DR 86 0 stevel * subsystem using register_cpu_setup_func(). The call-back function does two 87 0 stevel * things: 88 0 stevel * 89 0 stevel * o When the CPU is going off-line or unconfigured, the worker thread is 90 0 stevel * unbound from the CPU. This allows the CPU unconfig code to move it to 91 0 stevel * another CPU. 92 0 stevel * 93 0 stevel * o When the CPU is going online, it creates a new squeue for this CPU if 94 0 stevel * necessary and binds the squeue worker thread to this CPU. 95 0 stevel * 96 8275 Eric * TUNABLES: 97 0 stevel * 98 8275 Eric * ip_squeue_fanout: used when TCP calls IP_SQUEUE_GET(). If 1, then 99 8275 Eric * pick the default squeue from a random CPU, otherwise use our CPU's default 100 8275 Eric * squeue. 101 0 stevel * 102 8275 Eric * ip_squeue_fanout can be accessed and changed using ndd on /dev/tcp or 103 8275 Eric * /dev/ip. 104 0 stevel * 105 8275 Eric * ip_squeue_worker_wait: global value for the sq_wait field for all squeues * 106 8275 Eric * created. This is the time squeue code waits before waking up the worker 107 8275 Eric * thread after queuing a request. 108 0 stevel */ 109 0 stevel 110 0 stevel #include <sys/types.h> 111 0 stevel #include <sys/debug.h> 112 0 stevel #include <sys/kmem.h> 113 0 stevel #include <sys/cpuvar.h> 114 0 stevel #include <sys/cmn_err.h> 115 0 stevel 116 0 stevel #include <inet/common.h> 117 0 stevel #include <inet/ip.h> 118 8275 Eric #include <netinet/ip6.h> 119 0 stevel #include <inet/ip_if.h> 120 8275 Eric #include <inet/ip_ire.h> 121 0 stevel #include <inet/nd.h> 122 0 stevel #include <inet/ipclassifier.h> 123 0 stevel #include <sys/types.h> 124 0 stevel #include <sys/conf.h> 125 0 stevel #include <sys/sunddi.h> 126 2546 carlsonj #include <sys/dlpi.h> 127 0 stevel #include <sys/squeue_impl.h> 128 8275 Eric #include <sys/tihdr.h> 129 8275 Eric #include <inet/udp_impl.h> 130 8275 Eric #include <sys/strsubr.h> 131 8275 Eric #include <sys/zone.h> 132 8275 Eric #include <sys/dld.h> 133 8130 George #include <sys/atomic.h> 134 0 stevel 135 0 stevel /* 136 8275 Eric * List of all created squeue sets. The list and its size are protected by 137 8275 Eric * sqset_lock. 138 0 stevel */ 139 8275 Eric static squeue_set_t **sqset_global_list; /* list 0 is the unbound list */ 140 8275 Eric static uint_t sqset_global_size; 141 8275 Eric kmutex_t sqset_lock; 142 1184 krgopi 143 0 stevel static void (*ip_squeue_create_callback)(squeue_t *) = NULL; 144 0 stevel 145 0 stevel /* 146 0 stevel * ip_squeue_worker_wait: global value for the sq_wait field for all squeues 147 0 stevel * created. This is the time squeue code waits before waking up the worker 148 0 stevel * thread after queuing a request. 149 0 stevel */ 150 0 stevel uint_t ip_squeue_worker_wait = 10; 151 0 stevel 152 8275 Eric static squeue_t *ip_squeue_create(pri_t); 153 8275 Eric static squeue_set_t *ip_squeue_set_create(processorid_t); 154 0 stevel static int ip_squeue_cpu_setup(cpu_setup_t, int, void *); 155 8275 Eric static void ip_squeue_set_move(squeue_t *, squeue_set_t *); 156 8275 Eric static void ip_squeue_set_destroy(cpu_t *); 157 4360 meem static void ip_squeue_clean(void *, mblk_t *, void *); 158 0 stevel 159 0 stevel #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS)) 160 0 stevel 161 8275 Eric static squeue_t * 162 8275 Eric ip_squeue_create(pri_t pri) 163 8275 Eric { 164 8275 Eric squeue_t *sqp; 165 8275 Eric 166 8275 Eric sqp = squeue_create(ip_squeue_worker_wait, pri); 167 8275 Eric ASSERT(sqp != NULL); 168 8275 Eric if (ip_squeue_create_callback != NULL) 169 8275 Eric ip_squeue_create_callback(sqp); 170 8275 Eric return (sqp); 171 8275 Eric } 172 8275 Eric 173 0 stevel /* 174 8275 Eric * Create a new squeue_set. If id == -1, then we're creating the unbound set, 175 8275 Eric * which should only happen once when we are first initialized. Otherwise id 176 8275 Eric * is the id of the CPU that needs a set, either because we are initializing 177 8275 Eric * or because the CPU has come online. 178 8275 Eric * 179 8275 Eric * If id != -1, then we need at a minimum to provide a default squeue for the 180 8275 Eric * new set. We search the unbound set for candidates, and if none are found we 181 8275 Eric * create a new one. 182 0 stevel */ 183 0 stevel static squeue_set_t * 184 8275 Eric ip_squeue_set_create(processorid_t id) 185 0 stevel { 186 0 stevel squeue_set_t *sqs; 187 8275 Eric squeue_set_t *src = sqset_global_list[0]; 188 8275 Eric squeue_t **lastsqp, *sq; 189 8275 Eric squeue_t **defaultq_lastp = NULL; 190 0 stevel 191 8275 Eric sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP); 192 8275 Eric sqs->sqs_cpuid = id; 193 0 stevel 194 8275 Eric if (id == -1) { 195 8275 Eric ASSERT(sqset_global_size == 0); 196 8275 Eric sqset_global_list[0] = sqs; 197 8275 Eric sqset_global_size = 1; 198 8275 Eric return (sqs); 199 0 stevel } 200 0 stevel 201 8275 Eric /* 202 8275 Eric * When we create an squeue set id != -1, we need to give it a 203 8275 Eric * default squeue, in order to support fanout of conns across 204 8275 Eric * CPUs. Try to find a former default squeue that matches this 205 8275 Eric * cpu id on the unbound squeue set. If no such squeue is found, 206 9979 Thirumalai * find some non-default TCP squeue that is free. If still no such 207 8275 Eric * candidate is found, create a new squeue. 208 8275 Eric */ 209 0 stevel 210 8275 Eric ASSERT(MUTEX_HELD(&cpu_lock)); 211 8275 Eric mutex_enter(&sqset_lock); 212 8275 Eric lastsqp = &src->sqs_head; 213 0 stevel 214 8275 Eric while (*lastsqp) { 215 8275 Eric if ((*lastsqp)->sq_bind == id && 216 8275 Eric (*lastsqp)->sq_state & SQS_DEFAULT) { 217 9979 Thirumalai /* 218 9979 Thirumalai * Exact match. Former default squeue of cpu 'id' 219 9979 Thirumalai */ 220 9979 Thirumalai ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND)); 221 8275 Eric defaultq_lastp = lastsqp; 222 8275 Eric break; 223 8275 Eric } 224 8275 Eric if (defaultq_lastp == NULL && 225 9979 Thirumalai !((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) { 226 9979 Thirumalai /* 227 9979 Thirumalai * A free non-default TCP squeue 228 9979 Thirumalai */ 229 8275 Eric defaultq_lastp = lastsqp; 230 8275 Eric } 231 8275 Eric lastsqp = &(*lastsqp)->sq_next; 232 9979 Thirumalai } 233 0 stevel 234 9979 Thirumalai if (defaultq_lastp != NULL) { 235 8275 Eric /* Remove from src set and set SQS_DEFAULT */ 236 8275 Eric sq = *defaultq_lastp; 237 8275 Eric *defaultq_lastp = sq->sq_next; 238 8275 Eric sq->sq_next = NULL; 239 8275 Eric if (!(sq->sq_state & SQS_DEFAULT)) { 240 8275 Eric mutex_enter(&sq->sq_lock); 241 8275 Eric sq->sq_state |= SQS_DEFAULT; 242 8275 Eric mutex_exit(&sq->sq_lock); 243 8275 Eric } 244 8275 Eric } else { 245 8275 Eric sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY); 246 8275 Eric sq->sq_state |= SQS_DEFAULT; 247 0 stevel } 248 0 stevel 249 8275 Eric sq->sq_set = sqs; 250 8275 Eric sqs->sqs_default = sq; 251 8275 Eric squeue_bind(sq, id); /* this locks squeue mutex */ 252 0 stevel 253 8275 Eric ASSERT(sqset_global_size <= NCPU); 254 0 stevel sqset_global_list[sqset_global_size++] = sqs; 255 8275 Eric mutex_exit(&sqset_lock); 256 0 stevel return (sqs); 257 8275 Eric } 258 8275 Eric 259 8275 Eric /* 260 8275 Eric * Called by ill_ring_add() to find an squeue to associate with a new ring. 261 8275 Eric */ 262 8275 Eric 263 8275 Eric squeue_t * 264 8275 Eric ip_squeue_getfree(pri_t pri) 265 8275 Eric { 266 8275 Eric squeue_set_t *sqs = sqset_global_list[0]; 267 8275 Eric squeue_t *sq; 268 8275 Eric 269 8275 Eric mutex_enter(&sqset_lock); 270 8275 Eric for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) { 271 8275 Eric /* 272 9979 Thirumalai * Select a non-default TCP squeue that is free i.e. not 273 9979 Thirumalai * bound to any ill. 274 8275 Eric */ 275 8275 Eric if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND))) 276 8275 Eric break; 277 8275 Eric } 278 8275 Eric 279 8275 Eric if (sq == NULL) { 280 8275 Eric sq = ip_squeue_create(pri); 281 8275 Eric sq->sq_set = sqs; 282 8275 Eric sq->sq_next = sqs->sqs_head; 283 8275 Eric sqs->sqs_head = sq; 284 8275 Eric } 285 8275 Eric 286 8275 Eric ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL | 287 8275 Eric SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE | 288 8275 Eric SQS_POLL_THR_QUIESCED))); 289 8275 Eric 290 8275 Eric mutex_enter(&sq->sq_lock); 291 8275 Eric sq->sq_state |= SQS_ILL_BOUND; 292 8275 Eric mutex_exit(&sq->sq_lock); 293 8275 Eric mutex_exit(&sqset_lock); 294 8275 Eric 295 8275 Eric if (sq->sq_priority != pri) { 296 8275 Eric thread_lock(sq->sq_worker); 297 8275 Eric (void) thread_change_pri(sq->sq_worker, pri, 0); 298 8275 Eric thread_unlock(sq->sq_worker); 299 8275 Eric 300 8275 Eric thread_lock(sq->sq_poll_thr); 301 8275 Eric (void) thread_change_pri(sq->sq_poll_thr, pri, 0); 302 8275 Eric thread_unlock(sq->sq_poll_thr); 303 8275 Eric 304 8275 Eric sq->sq_priority = pri; 305 8275 Eric } 306 8275 Eric return (sq); 307 0 stevel } 308 0 stevel 309 0 stevel /* 310 0 stevel * Initialize IP squeues. 311 0 stevel */ 312 0 stevel void 313 0 stevel ip_squeue_init(void (*callback)(squeue_t *)) 314 0 stevel { 315 0 stevel int i; 316 8275 Eric squeue_set_t *sqs; 317 0 stevel 318 0 stevel ASSERT(sqset_global_list == NULL); 319 0 stevel 320 0 stevel ip_squeue_create_callback = callback; 321 0 stevel squeue_init(); 322 8275 Eric mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL); 323 0 stevel sqset_global_list = 324 8275 Eric kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP); 325 0 stevel sqset_global_size = 0; 326 8275 Eric /* 327 8275 Eric * We are called at system boot time and we don't 328 8275 Eric * expect memory allocation failure. 329 8275 Eric */ 330 8275 Eric sqs = ip_squeue_set_create(-1); 331 8275 Eric ASSERT(sqs != NULL); 332 8275 Eric 333 0 stevel mutex_enter(&cpu_lock); 334 0 stevel /* Create squeue for each active CPU available */ 335 0 stevel for (i = 0; i < NCPU; i++) { 336 8275 Eric cpu_t *cp = cpu_get(i); 337 0 stevel if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) { 338 8275 Eric /* 339 8275 Eric * We are called at system boot time and we don't 340 8275 Eric * expect memory allocation failure then 341 8275 Eric */ 342 8275 Eric cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id); 343 8275 Eric ASSERT(cp->cpu_squeue_set != NULL); 344 0 stevel } 345 0 stevel } 346 0 stevel 347 0 stevel register_cpu_setup_func(ip_squeue_cpu_setup, NULL); 348 0 stevel mutex_exit(&cpu_lock); 349 0 stevel } 350 0 stevel 351 0 stevel /* 352 8275 Eric * Get a default squeue, either from the current CPU or a CPU derived by hash 353 8275 Eric * from the index argument, depending upon the setting of ip_squeue_fanout. 354 0 stevel */ 355 0 stevel squeue_t * 356 0 stevel ip_squeue_random(uint_t index) 357 0 stevel { 358 8275 Eric squeue_set_t *sqs = NULL; 359 8275 Eric squeue_t *sq; 360 0 stevel 361 8275 Eric /* 362 8275 Eric * The minimum value of sqset_global_size is 2, one for the unbound 363 8275 Eric * squeue set and another for the squeue set of the zeroth CPU. 364 8275 Eric * Even though the value could be changing, it can never go below 2, 365 8275 Eric * so the assert does not need the lock protection. 366 8275 Eric */ 367 8275 Eric ASSERT(sqset_global_size > 1); 368 8275 Eric 369 8275 Eric /* Protect against changes to sqset_global_list */ 370 8275 Eric mutex_enter(&sqset_lock); 371 8275 Eric 372 8275 Eric if (!ip_squeue_fanout) 373 8275 Eric sqs = CPU->cpu_squeue_set; 374 8275 Eric 375 8275 Eric /* 376 8275 Eric * sqset_global_list[0] corresponds to the unbound squeue set. 377 8275 Eric * The computation below picks a set other than the unbound set. 378 8275 Eric */ 379 8275 Eric if (sqs == NULL) 380 8275 Eric sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1]; 381 8275 Eric sq = sqs->sqs_default; 382 8275 Eric 383 8275 Eric mutex_exit(&sqset_lock); 384 8275 Eric ASSERT(sq); 385 8275 Eric return (sq); 386 0 stevel } 387 0 stevel 388 8275 Eric /* 389 8275 Eric * Move squeue from its current set to newset. Not used for default squeues. 390 8275 Eric * Bind or unbind the worker thread as appropriate. 391 8275 Eric */ 392 8275 Eric 393 4360 meem static void 394 8275 Eric ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset) 395 0 stevel { 396 8275 Eric squeue_set_t *set; 397 8275 Eric squeue_t **lastsqp; 398 8275 Eric processorid_t cpuid = newset->sqs_cpuid; 399 0 stevel 400 8275 Eric ASSERT(!(sq->sq_state & SQS_DEFAULT)); 401 8275 Eric ASSERT(!MUTEX_HELD(&sq->sq_lock)); 402 8275 Eric ASSERT(MUTEX_HELD(&sqset_lock)); 403 0 stevel 404 8275 Eric set = sq->sq_set; 405 8275 Eric if (set == newset) 406 8275 Eric return; 407 8275 Eric 408 8275 Eric lastsqp = &set->sqs_head; 409 8275 Eric while (*lastsqp != sq) 410 8275 Eric lastsqp = &(*lastsqp)->sq_next; 411 8275 Eric 412 8275 Eric *lastsqp = sq->sq_next; 413 8275 Eric sq->sq_next = newset->sqs_head; 414 8275 Eric newset->sqs_head = sq; 415 8275 Eric sq->sq_set = newset; 416 8275 Eric if (cpuid == -1) 417 8275 Eric squeue_unbind(sq); 418 8275 Eric else 419 8275 Eric squeue_bind(sq, cpuid); 420 8275 Eric } 421 8275 Eric 422 8275 Eric /* 423 8275 Eric * Move squeue from its current set to cpuid's set and bind to cpuid. 424 8275 Eric */ 425 8275 Eric 426 8275 Eric int 427 8275 Eric ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid) 428 8275 Eric { 429 8275 Eric cpu_t *cpu; 430 8275 Eric squeue_set_t *set; 431 8275 Eric 432 8275 Eric if (sq->sq_state & SQS_DEFAULT) 433 8275 Eric return (-1); 434 8275 Eric 435 8275 Eric ASSERT(MUTEX_HELD(&cpu_lock)); 436 8275 Eric 437 8275 Eric cpu = cpu_get(cpuid); 438 8275 Eric if (!CPU_ISON(cpu)) 439 8275 Eric return (-1); 440 8275 Eric 441 8275 Eric mutex_enter(&sqset_lock); 442 8275 Eric set = cpu->cpu_squeue_set; 443 8275 Eric if (set != NULL) 444 8275 Eric ip_squeue_set_move(sq, set); 445 8275 Eric mutex_exit(&sqset_lock); 446 8275 Eric return ((set == NULL) ? -1 : 0); 447 8275 Eric } 448 8275 Eric 449 8275 Eric /* 450 8275 Eric * The mac layer is calling, asking us to move an squeue to a 451 8275 Eric * new CPU. This routine is called with cpu_lock held. 452 8275 Eric */ 453 8275 Eric void 454 8275 Eric ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid) 455 8275 Eric { 456 8275 Eric ASSERT(ILL_MAC_PERIM_HELD(ill)); 457 8275 Eric ASSERT(rx_ring->rr_ill == ill); 458 8275 Eric 459 8275 Eric mutex_enter(&ill->ill_lock); 460 8275 Eric if (rx_ring->rr_ring_state == RR_FREE || 461 8275 Eric rx_ring->rr_ring_state == RR_FREE_INPROG) { 462 8275 Eric mutex_exit(&ill->ill_lock); 463 0 stevel return; 464 0 stevel } 465 0 stevel 466 8275 Eric if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1) 467 8275 Eric rx_ring->rr_ring_state = RR_SQUEUE_BOUND; 468 8275 Eric 469 8275 Eric mutex_exit(&ill->ill_lock); 470 8275 Eric } 471 8275 Eric 472 8275 Eric void * 473 8275 Eric ip_squeue_add_ring(ill_t *ill, void *mrp) 474 8275 Eric { 475 8275 Eric mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 476 8275 Eric ill_rx_ring_t *rx_ring, *ring_tbl; 477 8275 Eric int ip_rx_index; 478 8275 Eric squeue_t *sq = NULL; 479 8275 Eric pri_t pri; 480 8275 Eric 481 8275 Eric ASSERT(ILL_MAC_PERIM_HELD(ill)); 482 8275 Eric ASSERT(mrfp->mrf_type == MAC_RX_FIFO); 483 8275 Eric ASSERT(ill->ill_dld_capab != NULL); 484 8275 Eric 485 8275 Eric ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl; 486 8275 Eric 487 8275 Eric mutex_enter(&ill->ill_lock); 488 8275 Eric for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) { 489 8275 Eric rx_ring = &ring_tbl[ip_rx_index]; 490 8275 Eric if (rx_ring->rr_ring_state == RR_FREE) 491 8275 Eric break; 492 8275 Eric } 493 8275 Eric 494 8275 Eric if (ip_rx_index == ILL_MAX_RINGS) { 495 8275 Eric /* 496 8275 Eric * We ran out of ILL_MAX_RINGS worth rx_ring structures. If 497 8275 Eric * we have devices which can overwhelm this limit, 498 8275 Eric * ILL_MAX_RING should be made configurable. Meanwhile it 499 8275 Eric * cause no panic because driver will pass ip_input a NULL 500 8275 Eric * handle which will make IP allocate the default squeue and 501 8275 Eric * Polling mode will not be used for this ring. 502 8275 Eric */ 503 8275 Eric cmn_err(CE_NOTE, 504 8275 Eric "Reached maximum number of receiving rings (%d) for %s\n", 505 8275 Eric ILL_MAX_RINGS, ill->ill_name); 506 8275 Eric mutex_exit(&ill->ill_lock); 507 8275 Eric return (NULL); 508 8275 Eric } 509 8275 Eric 510 8275 Eric bzero(rx_ring, sizeof (ill_rx_ring_t)); 511 8275 Eric rx_ring->rr_rx = (ip_mac_rx_t)mrfp->mrf_receive; 512 8275 Eric /* XXX: Hard code it to tcp accept for now */ 513 8275 Eric rx_ring->rr_ip_accept = (ip_accept_t)ip_accept_tcp; 514 8275 Eric 515 8275 Eric rx_ring->rr_intr_handle = mrfp->mrf_intr_handle; 516 8275 Eric rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable; 517 8275 Eric rx_ring->rr_intr_disable = 518 8275 Eric (ip_mac_intr_disable_t)mrfp->mrf_intr_disable; 519 8275 Eric rx_ring->rr_rx_handle = mrfp->mrf_rx_arg; 520 8275 Eric rx_ring->rr_ill = ill; 521 8275 Eric 522 8275 Eric pri = mrfp->mrf_flow_priority; 523 8275 Eric 524 8275 Eric sq = ip_squeue_getfree(pri); 525 8275 Eric 526 8275 Eric mutex_enter(&sq->sq_lock); 527 8275 Eric sq->sq_rx_ring = rx_ring; 528 8275 Eric rx_ring->rr_sqp = sq; 529 8275 Eric 530 8275 Eric sq->sq_state |= SQS_POLL_CAPAB; 531 8275 Eric 532 8275 Eric rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND; 533 8275 Eric sq->sq_ill = ill; 534 8275 Eric mutex_exit(&sq->sq_lock); 535 8275 Eric mutex_exit(&ill->ill_lock); 536 8275 Eric 537 8275 Eric DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int, 538 8275 Eric ip_rx_index, void *, mrfp->mrf_rx_arg); 539 8275 Eric 540 8275 Eric /* Assign the squeue to the specified CPU as well */ 541 8275 Eric mutex_enter(&cpu_lock); 542 8275 Eric (void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id); 543 8275 Eric mutex_exit(&cpu_lock); 544 8275 Eric 545 8275 Eric return (rx_ring); 546 8275 Eric } 547 8275 Eric 548 8275 Eric /* 549 8275 Eric * sanitize the squeue etc. Some of the processing 550 8275 Eric * needs to be done from inside the perimeter. 551 8275 Eric */ 552 8275 Eric void 553 8275 Eric ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 554 8275 Eric { 555 8275 Eric squeue_t *sqp; 556 8275 Eric 557 8275 Eric ASSERT(ILL_MAC_PERIM_HELD(ill)); 558 8275 Eric ASSERT(rx_ring != NULL); 559 8275 Eric 560 8275 Eric /* Just clean one squeue */ 561 8275 Eric mutex_enter(&ill->ill_lock); 562 8275 Eric if (rx_ring->rr_ring_state == RR_FREE) { 563 8275 Eric mutex_exit(&ill->ill_lock); 564 8275 Eric return; 565 8275 Eric } 566 8275 Eric rx_ring->rr_ring_state = RR_FREE_INPROG; 567 8275 Eric sqp = rx_ring->rr_sqp; 568 8275 Eric 569 0 stevel mutex_enter(&sqp->sq_lock); 570 8275 Eric sqp->sq_state |= SQS_POLL_CLEANUP; 571 8275 Eric cv_signal(&sqp->sq_worker_cv); 572 8275 Eric mutex_exit(&ill->ill_lock); 573 8275 Eric while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE)) 574 8275 Eric cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 575 9979 Thirumalai sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE; 576 8275 Eric 577 8275 Eric ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL | 578 8275 Eric SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE | 579 8275 Eric SQS_POLL_THR_QUIESCED))); 580 8275 Eric 581 8275 Eric cv_signal(&sqp->sq_worker_cv); 582 0 stevel mutex_exit(&sqp->sq_lock); 583 0 stevel 584 8275 Eric /* 585 9979 Thirumalai * Move the squeue to sqset_global_list[0] which holds the set of 586 9979 Thirumalai * squeues not bound to any cpu. Note that the squeue is still 587 9979 Thirumalai * considered bound to an ill as long as SQS_ILL_BOUND is set. 588 8275 Eric */ 589 8275 Eric mutex_enter(&sqset_lock); 590 8275 Eric ip_squeue_set_move(sqp, sqset_global_list[0]); 591 8275 Eric mutex_exit(&sqset_lock); 592 9979 Thirumalai 593 9979 Thirumalai /* 594 9979 Thirumalai * CPU going offline can also trigger a move of the squeue to the 595 9979 Thirumalai * unbound set sqset_global_list[0]. However the squeue won't be 596 9979 Thirumalai * recycled for the next use as long as the SQS_ILL_BOUND flag 597 9979 Thirumalai * is set. Hence we clear the SQS_ILL_BOUND flag only towards the 598 9979 Thirumalai * end after the move. 599 9979 Thirumalai */ 600 9979 Thirumalai mutex_enter(&sqp->sq_lock); 601 9979 Thirumalai sqp->sq_state &= ~SQS_ILL_BOUND; 602 9979 Thirumalai mutex_exit(&sqp->sq_lock); 603 0 stevel 604 0 stevel mutex_enter(&ill->ill_lock); 605 8275 Eric rx_ring->rr_ring_state = RR_FREE; 606 0 stevel mutex_exit(&ill->ill_lock); 607 4360 meem } 608 4360 meem 609 4360 meem /* 610 8275 Eric * Stop the squeue from polling. This needs to be done 611 8275 Eric * from inside the perimeter. 612 4360 meem */ 613 8275 Eric void 614 8275 Eric ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 615 4360 meem { 616 4360 meem squeue_t *sqp; 617 4360 meem 618 8275 Eric ASSERT(ILL_MAC_PERIM_HELD(ill)); 619 4360 meem ASSERT(rx_ring != NULL); 620 4360 meem 621 8275 Eric sqp = rx_ring->rr_sqp; 622 8275 Eric mutex_enter(&sqp->sq_lock); 623 8275 Eric sqp->sq_state |= SQS_POLL_QUIESCE; 624 8275 Eric cv_signal(&sqp->sq_worker_cv); 625 8275 Eric while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) 626 8275 Eric cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 627 8275 Eric 628 8275 Eric mutex_exit(&sqp->sq_lock); 629 8275 Eric } 630 8275 Eric 631 8275 Eric /* 632 8275 Eric * Restart polling etc. Needs to be inside the perimeter to 633 8275 Eric * prevent races. 634 8275 Eric */ 635 8275 Eric void 636 8275 Eric ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring) 637 8275 Eric { 638 8275 Eric squeue_t *sqp; 639 8275 Eric 640 8275 Eric ASSERT(ILL_MAC_PERIM_HELD(ill)); 641 8275 Eric ASSERT(rx_ring != NULL); 642 8275 Eric 643 8275 Eric sqp = rx_ring->rr_sqp; 644 8275 Eric mutex_enter(&sqp->sq_lock); 645 4360 meem /* 646 8275 Eric * Handle change in number of rings between the quiesce and 647 8275 Eric * restart operations by checking for a previous quiesce before 648 8275 Eric * attempting a restart. 649 4360 meem */ 650 8275 Eric if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) { 651 8275 Eric mutex_exit(&sqp->sq_lock); 652 4360 meem return; 653 4360 meem } 654 8275 Eric sqp->sq_state |= SQS_POLL_RESTART; 655 8275 Eric cv_signal(&sqp->sq_worker_cv); 656 8275 Eric while (!(sqp->sq_state & SQS_POLL_RESTART_DONE)) 657 8275 Eric cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock); 658 8275 Eric sqp->sq_state &= ~SQS_POLL_RESTART_DONE; 659 8275 Eric mutex_exit(&sqp->sq_lock); 660 4360 meem } 661 4360 meem 662 8275 Eric /* 663 8275 Eric * sanitize all squeues associated with the ill. 664 8275 Eric */ 665 4360 meem void 666 4360 meem ip_squeue_clean_all(ill_t *ill) 667 4360 meem { 668 4360 meem int idx; 669 8275 Eric ill_rx_ring_t *rx_ring; 670 4360 meem 671 4360 meem for (idx = 0; idx < ILL_MAX_RINGS; idx++) { 672 8275 Eric rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx]; 673 8275 Eric ip_squeue_clean_ring(ill, rx_ring); 674 4360 meem } 675 1184 krgopi } 676 1184 krgopi 677 1184 krgopi /* 678 8275 Eric * Used by IP to get the squeue associated with a ring. If the squeue isn't 679 8275 Eric * yet bound to a CPU, and we're being called directly from the NIC's 680 8275 Eric * interrupt, then we know what CPU we want to assign the squeue to, so 681 8275 Eric * dispatch that task to a taskq. 682 0 stevel */ 683 0 stevel squeue_t * 684 0 stevel ip_squeue_get(ill_rx_ring_t *ill_rx_ring) 685 0 stevel { 686 0 stevel squeue_t *sqp; 687 0 stevel 688 8275 Eric if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL)) 689 11066 rafael return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM())); 690 0 stevel 691 8275 Eric return (sqp); 692 0 stevel } 693 0 stevel 694 0 stevel /* 695 8275 Eric * Called when a CPU goes offline. It's squeue_set_t is destroyed, and all 696 8275 Eric * squeues are unboudn and moved to the unbound set. 697 0 stevel */ 698 8275 Eric static void 699 8275 Eric ip_squeue_set_destroy(cpu_t *cpu) 700 8275 Eric { 701 8275 Eric int i; 702 8275 Eric squeue_t *sqp, *lastsqp = NULL; 703 8275 Eric squeue_set_t *sqs, *unbound = sqset_global_list[0]; 704 0 stevel 705 8275 Eric mutex_enter(&sqset_lock); 706 8275 Eric if ((sqs = cpu->cpu_squeue_set) == NULL) { 707 8275 Eric mutex_exit(&sqset_lock); 708 8275 Eric return; 709 0 stevel } 710 0 stevel 711 8275 Eric /* Move all squeues to unbound set */ 712 0 stevel 713 8275 Eric for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) { 714 8275 Eric squeue_unbind(sqp); 715 8275 Eric sqp->sq_set = unbound; 716 8275 Eric } 717 8275 Eric if (sqs->sqs_head) { 718 8275 Eric lastsqp->sq_next = unbound->sqs_head; 719 8275 Eric unbound->sqs_head = sqs->sqs_head; 720 8275 Eric } 721 0 stevel 722 8275 Eric /* Also move default squeue to unbound set */ 723 0 stevel 724 8275 Eric sqp = sqs->sqs_default; 725 9979 Thirumalai ASSERT(sqp != NULL); 726 8275 Eric ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT); 727 0 stevel 728 8275 Eric sqp->sq_next = unbound->sqs_head; 729 8275 Eric unbound->sqs_head = sqp; 730 8275 Eric squeue_unbind(sqp); 731 8275 Eric sqp->sq_set = unbound; 732 0 stevel 733 8275 Eric for (i = 1; i < sqset_global_size; i++) 734 8275 Eric if (sqset_global_list[i] == sqs) 735 8275 Eric break; 736 0 stevel 737 8275 Eric ASSERT(i < sqset_global_size); 738 8275 Eric sqset_global_list[i] = sqset_global_list[sqset_global_size - 1]; 739 8275 Eric sqset_global_list[sqset_global_size - 1] = NULL; 740 8275 Eric sqset_global_size--; 741 8275 Eric 742 8275 Eric mutex_exit(&sqset_lock); 743 8275 Eric kmem_free(sqs, sizeof (*sqs)); 744 0 stevel } 745 0 stevel 746 0 stevel /* 747 0 stevel * Reconfiguration callback 748 0 stevel */ 749 0 stevel /* ARGSUSED */ 750 0 stevel static int 751 0 stevel ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg) 752 0 stevel { 753 8275 Eric cpu_t *cp = cpu_get(id); 754 0 stevel 755 0 stevel ASSERT(MUTEX_HELD(&cpu_lock)); 756 0 stevel switch (what) { 757 405 akolb case CPU_CONFIG: 758 0 stevel case CPU_ON: 759 0 stevel case CPU_INIT: 760 0 stevel case CPU_CPUPART_IN: 761 9210 Thirumalai if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) 762 8275 Eric cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id); 763 0 stevel break; 764 0 stevel case CPU_UNCONFIG: 765 0 stevel case CPU_OFF: 766 0 stevel case CPU_CPUPART_OUT: 767 0 stevel if (cp->cpu_squeue_set != NULL) { 768 8275 Eric ip_squeue_set_destroy(cp); 769 8275 Eric cp->cpu_squeue_set = NULL; 770 0 stevel } 771 0 stevel break; 772 0 stevel default: 773 0 stevel break; 774 0 stevel } 775 0 stevel return (0); 776 0 stevel } 777