Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/param.h>
     28 #include <sys/sysmacros.h>
     29 #include <sys/vm.h>
     30 #include <sys/proc.h>
     31 #include <sys/tuneable.h>
     32 #include <sys/systm.h>
     33 #include <sys/cmn_err.h>
     34 #include <sys/debug.h>
     35 #include <sys/sdt.h>
     36 #include <sys/mutex.h>
     37 #include <sys/bitmap.h>
     38 #include <sys/atomic.h>
     39 #include <sys/kobj.h>
     40 #include <sys/disp.h>
     41 #include <vm/seg_kmem.h>
     42 #include <sys/zone.h>
     43 #include <sys/netstack.h>
     44 
     45 /*
     46  * What we use so that the zones framework can tell us about new zones,
     47  * which we use to create new stacks.
     48  */
     49 static zone_key_t netstack_zone_key;
     50 
     51 static int	netstack_initialized = 0;
     52 
     53 /*
     54  * Track the registered netstacks.
     55  * The global lock protects
     56  * - ns_reg
     57  * - the list starting at netstack_head and following the netstack_next
     58  *   pointers.
     59  */
     60 static kmutex_t netstack_g_lock;
     61 
     62 /*
     63  * Registry of netstacks with their create/shutdown/destory functions.
     64  */
     65 static struct netstack_registry	ns_reg[NS_MAX];
     66 
     67 /*
     68  * Global list of existing stacks.  We use this when a new zone with
     69  * an exclusive IP instance is created.
     70  *
     71  * Note that in some cases a netstack_t needs to stay around after the zone
     72  * has gone away. This is because there might be outstanding references
     73  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
     74  * structure and all the foo_stack_t's hanging off of it will be cleaned up
     75  * when the last reference to it is dropped.
     76  * However, the same zone might be rebooted. That is handled using the
     77  * assumption that the zones framework picks a new zoneid each time a zone
     78  * is (re)booted. We assert for that condition in netstack_zone_create().
     79  * Thus the old netstack_t can take its time for things to time out.
     80  */
     81 static netstack_t *netstack_head;
     82 
     83 /*
     84  * To support kstat_create_netstack() using kstat_zone_add we need
     85  * to track both
     86  *  - all zoneids that use the global/shared stack
     87  *  - all kstats that have been added for the shared stack
     88  */
     89 struct shared_zone_list {
     90 	struct shared_zone_list *sz_next;
     91 	zoneid_t		sz_zoneid;
     92 };
     93 
     94 struct shared_kstat_list {
     95 	struct shared_kstat_list *sk_next;
     96 	kstat_t			 *sk_kstat;
     97 };
     98 
     99 static kmutex_t netstack_shared_lock;	/* protects the following two */
    100 static struct shared_zone_list	*netstack_shared_zones;
    101 static struct shared_kstat_list	*netstack_shared_kstats;
    102 
    103 static void	*netstack_zone_create(zoneid_t zoneid);
    104 static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
    105 static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
    106 
    107 static void	netstack_shared_zone_add(zoneid_t zoneid);
    108 static void	netstack_shared_zone_remove(zoneid_t zoneid);
    109 static void	netstack_shared_kstat_add(kstat_t *ks);
    110 static void	netstack_shared_kstat_remove(kstat_t *ks);
    111 
    112 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
    113 
    114 static void	apply_all_netstacks(int, applyfn_t *);
    115 static void	apply_all_modules(netstack_t *, applyfn_t *);
    116 static void	apply_all_modules_reverse(netstack_t *, applyfn_t *);
    117 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
    118 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
    119 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
    120 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
    121 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
    122     kmutex_t *);
    123 
    124 void
    125 netstack_init(void)
    126 {
    127 	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
    128 	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
    129 
    130 	netstack_initialized = 1;
    131 
    132 	/*
    133 	 * We want to be informed each time a zone is created or
    134 	 * destroyed in the kernel, so we can maintain the
    135 	 * stack instance information.
    136 	 */
    137 	zone_key_create(&netstack_zone_key, netstack_zone_create,
    138 	    netstack_zone_shutdown, netstack_zone_destroy);
    139 }
    140 
    141 /*
    142  * Register a new module with the framework.
    143  * This registers interest in changes to the set of netstacks.
    144  * The createfn and destroyfn are required, but the shutdownfn can be
    145  * NULL.
    146  * Note that due to the current zsd implementation, when the create
    147  * function is called the zone isn't fully present, thus functions
    148  * like zone_find_by_* will fail, hence the create function can not
    149  * use many zones kernel functions including zcmn_err().
    150  */
    151 void
    152 netstack_register(int moduleid,
    153     void *(*module_create)(netstackid_t, netstack_t *),
    154     void (*module_shutdown)(netstackid_t, void *),
    155     void (*module_destroy)(netstackid_t, void *))
    156 {
    157 	netstack_t *ns;
    158 
    159 	ASSERT(netstack_initialized);
    160 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
    161 	ASSERT(module_create != NULL);
    162 
    163 	/*
    164 	 * Make instances created after this point in time run the create
    165 	 * callback.
    166 	 */
    167 	mutex_enter(&netstack_g_lock);
    168 	ASSERT(ns_reg[moduleid].nr_create == NULL);
    169 	ASSERT(ns_reg[moduleid].nr_flags == 0);
    170 	ns_reg[moduleid].nr_create = module_create;
    171 	ns_reg[moduleid].nr_shutdown = module_shutdown;
    172 	ns_reg[moduleid].nr_destroy = module_destroy;
    173 	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
    174 
    175 	/*
    176 	 * Determine the set of stacks that exist before we drop the lock.
    177 	 * Set NSS_CREATE_NEEDED for each of those.
    178 	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
    179 	 * set, but check NSF_CLOSING to be sure.
    180 	 */
    181 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
    182 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
    183 
    184 		mutex_enter(&ns->netstack_lock);
    185 		if (!(ns->netstack_flags & NSF_CLOSING) &&
    186 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
    187 			nms->nms_flags |= NSS_CREATE_NEEDED;
    188 			DTRACE_PROBE2(netstack__create__needed,
    189 			    netstack_t *, ns, int, moduleid);
    190 		}
    191 		mutex_exit(&ns->netstack_lock);
    192 	}
    193 	mutex_exit(&netstack_g_lock);
    194 
    195 	/*
    196 	 * At this point in time a new instance can be created or an instance
    197 	 * can be destroyed, or some other module can register or unregister.
    198 	 * Make sure we either run all the create functions for this moduleid
    199 	 * or we wait for any other creators for this moduleid.
    200 	 */
    201 	apply_all_netstacks(moduleid, netstack_apply_create);
    202 }
    203 
    204 void
    205 netstack_unregister(int moduleid)
    206 {
    207 	netstack_t *ns;
    208 
    209 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
    210 
    211 	ASSERT(ns_reg[moduleid].nr_create != NULL);
    212 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
    213 
    214 	mutex_enter(&netstack_g_lock);
    215 	/*
    216 	 * Determine the set of stacks that exist before we drop the lock.
    217 	 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
    218 	 * That ensures that when we return all the callbacks for existing
    219 	 * instances have completed. And since we set NRF_DYING no new
    220 	 * instances can use this module.
    221 	 */
    222 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
    223 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
    224 
    225 		mutex_enter(&ns->netstack_lock);
    226 		if (ns_reg[moduleid].nr_shutdown != NULL &&
    227 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
    228 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
    229 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
    230 			DTRACE_PROBE2(netstack__shutdown__needed,
    231 			    netstack_t *, ns, int, moduleid);
    232 		}
    233 		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
    234 		    ns_reg[moduleid].nr_destroy != NULL &&
    235 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
    236 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
    237 			nms->nms_flags |= NSS_DESTROY_NEEDED;
    238 			DTRACE_PROBE2(netstack__destroy__needed,
    239 			    netstack_t *, ns, int, moduleid);
    240 		}
    241 		mutex_exit(&ns->netstack_lock);
    242 	}
    243 	/*
    244 	 * Prevent any new netstack from calling the registered create
    245 	 * function, while keeping the function pointers in place until the
    246 	 * shutdown and destroy callbacks are complete.
    247 	 */
    248 	ns_reg[moduleid].nr_flags |= NRF_DYING;
    249 	mutex_exit(&netstack_g_lock);
    250 
    251 	apply_all_netstacks(moduleid, netstack_apply_shutdown);
    252 	apply_all_netstacks(moduleid, netstack_apply_destroy);
    253 
    254 	/*
    255 	 * Clear the nms_flags so that we can handle this module
    256 	 * being loaded again.
    257 	 * Also remove the registered functions.
    258 	 */
    259 	mutex_enter(&netstack_g_lock);
    260 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
    261 	ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
    262 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
    263 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
    264 
    265 		mutex_enter(&ns->netstack_lock);
    266 		if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
    267 			nms->nms_flags = 0;
    268 			DTRACE_PROBE2(netstack__destroy__done,
    269 			    netstack_t *, ns, int, moduleid);
    270 		}
    271 		mutex_exit(&ns->netstack_lock);
    272 	}
    273 
    274 	ns_reg[moduleid].nr_create = NULL;
    275 	ns_reg[moduleid].nr_shutdown = NULL;
    276 	ns_reg[moduleid].nr_destroy = NULL;
    277 	ns_reg[moduleid].nr_flags = 0;
    278 	mutex_exit(&netstack_g_lock);
    279 }
    280 
    281 /*
    282  * Lookup and/or allocate a netstack for this zone.
    283  */
    284 static void *
    285 netstack_zone_create(zoneid_t zoneid)
    286 {
    287 	netstackid_t stackid;
    288 	netstack_t *ns;
    289 	netstack_t **nsp;
    290 	zone_t	*zone;
    291 	int i;
    292 
    293 	ASSERT(netstack_initialized);
    294 
    295 	zone = zone_find_by_id_nolock(zoneid);
    296 	ASSERT(zone != NULL);
    297 
    298 	if (zone->zone_flags & ZF_NET_EXCL) {
    299 		stackid = zoneid;
    300 	} else {
    301 		/* Look for the stack instance for the global */
    302 		stackid = GLOBAL_NETSTACKID;
    303 	}
    304 
    305 	/* Allocate even if it isn't needed; simplifies locking */
    306 	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
    307 
    308 	/* Look if there is a matching stack instance */
    309 	mutex_enter(&netstack_g_lock);
    310 	for (nsp = &netstack_head; *nsp != NULL;
    311 	    nsp = &((*nsp)->netstack_next)) {
    312 		if ((*nsp)->netstack_stackid == stackid) {
    313 			/*
    314 			 * Should never find a pre-existing exclusive stack
    315 			 */
    316 			ASSERT(stackid == GLOBAL_NETSTACKID);
    317 			kmem_free(ns, sizeof (netstack_t));
    318 			ns = *nsp;
    319 			mutex_enter(&ns->netstack_lock);
    320 			ns->netstack_numzones++;
    321 			mutex_exit(&ns->netstack_lock);
    322 			mutex_exit(&netstack_g_lock);
    323 			DTRACE_PROBE1(netstack__inc__numzones,
    324 			    netstack_t *, ns);
    325 			/* Record that we have a new shared stack zone */
    326 			netstack_shared_zone_add(zoneid);
    327 			zone->zone_netstack = ns;
    328 			return (ns);
    329 		}
    330 	}
    331 	/* Not found */
    332 	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
    333 	cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
    334 	ns->netstack_stackid = zoneid;
    335 	ns->netstack_numzones = 1;
    336 	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
    337 	ns->netstack_flags = NSF_UNINIT;
    338 	*nsp = ns;
    339 	zone->zone_netstack = ns;
    340 
    341 	mutex_enter(&ns->netstack_lock);
    342 	/*
    343 	 * Mark this netstack as having a CREATE running so
    344 	 * any netstack_register/netstack_unregister waits for
    345 	 * the existing create callbacks to complete in moduleid order
    346 	 */
    347 	ns->netstack_flags |= NSF_ZONE_CREATE;
    348 
    349 	/*
    350 	 * Determine the set of module create functions that need to be
    351 	 * called before we drop the lock.
    352 	 * Set NSS_CREATE_NEEDED for each of those.
    353 	 * Skip any with NRF_DYING set, since those are in the process of
    354 	 * going away, by checking for flags being exactly NRF_REGISTERED.
    355 	 */
    356 	for (i = 0; i < NS_MAX; i++) {
    357 		nm_state_t *nms = &ns->netstack_m_state[i];
    358 
    359 		cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
    360 
    361 		if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
    362 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
    363 			nms->nms_flags |= NSS_CREATE_NEEDED;
    364 			DTRACE_PROBE2(netstack__create__needed,
    365 			    netstack_t *, ns, int, i);
    366 		}
    367 	}
    368 	mutex_exit(&ns->netstack_lock);
    369 	mutex_exit(&netstack_g_lock);
    370 
    371 	apply_all_modules(ns, netstack_apply_create);
    372 
    373 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
    374 	mutex_enter(&ns->netstack_lock);
    375 	ns->netstack_flags &= ~NSF_UNINIT;
    376 	ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
    377 	ns->netstack_flags &= ~NSF_ZONE_CREATE;
    378 	cv_broadcast(&ns->netstack_cv);
    379 	mutex_exit(&ns->netstack_lock);
    380 
    381 	return (ns);
    382 }
    383 
    384 /* ARGSUSED */
    385 static void
    386 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
    387 {
    388 	netstack_t *ns = (netstack_t *)arg;
    389 	int i;
    390 
    391 	ASSERT(arg != NULL);
    392 
    393 	mutex_enter(&ns->netstack_lock);
    394 	ASSERT(ns->netstack_numzones > 0);
    395 	if (ns->netstack_numzones != 1) {
    396 		/* Stack instance being used by other zone */
    397 		mutex_exit(&ns->netstack_lock);
    398 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
    399 		return;
    400 	}
    401 	mutex_exit(&ns->netstack_lock);
    402 
    403 	mutex_enter(&netstack_g_lock);
    404 	mutex_enter(&ns->netstack_lock);
    405 	/*
    406 	 * Mark this netstack as having a SHUTDOWN running so
    407 	 * any netstack_register/netstack_unregister waits for
    408 	 * the existing create callbacks to complete in moduleid order
    409 	 */
    410 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
    411 	ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
    412 
    413 	/*
    414 	 * Determine the set of stacks that exist before we drop the lock.
    415 	 * Set NSS_SHUTDOWN_NEEDED for each of those.
    416 	 */
    417 	for (i = 0; i < NS_MAX; i++) {
    418 		nm_state_t *nms = &ns->netstack_m_state[i];
    419 
    420 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
    421 		    ns_reg[i].nr_shutdown != NULL &&
    422 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
    423 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
    424 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
    425 			DTRACE_PROBE2(netstack__shutdown__needed,
    426 			    netstack_t *, ns, int, i);
    427 		}
    428 	}
    429 	mutex_exit(&ns->netstack_lock);
    430 	mutex_exit(&netstack_g_lock);
    431 
    432 	/*
    433 	 * Call the shutdown function for all registered modules for this
    434 	 * netstack.
    435 	 */
    436 	apply_all_modules_reverse(ns, netstack_apply_shutdown);
    437 
    438 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
    439 	mutex_enter(&ns->netstack_lock);
    440 	ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
    441 	ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
    442 	cv_broadcast(&ns->netstack_cv);
    443 	mutex_exit(&ns->netstack_lock);
    444 }
    445 
    446 /*
    447  * Common routine to release a zone.
    448  * If this was the last zone using the stack instance then prepare to
    449  * have the refcnt dropping to zero free the zone.
    450  */
    451 /* ARGSUSED */
    452 static void
    453 netstack_zone_destroy(zoneid_t zoneid, void *arg)
    454 {
    455 	netstack_t *ns = (netstack_t *)arg;
    456 
    457 	ASSERT(arg != NULL);
    458 
    459 	mutex_enter(&ns->netstack_lock);
    460 	ASSERT(ns->netstack_numzones > 0);
    461 	ns->netstack_numzones--;
    462 	if (ns->netstack_numzones != 0) {
    463 		/* Stack instance being used by other zone */
    464 		mutex_exit(&ns->netstack_lock);
    465 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
    466 		/* Record that we a shared stack zone has gone away */
    467 		netstack_shared_zone_remove(zoneid);
    468 		return;
    469 	}
    470 	/*
    471 	 * Set CLOSING so that netstack_find_by will not find it.
    472 	 */
    473 	ns->netstack_flags |= NSF_CLOSING;
    474 	mutex_exit(&ns->netstack_lock);
    475 	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
    476 	/* No other thread can call zone_destroy for this stack */
    477 
    478 	/*
    479 	 * Decrease refcnt to account for the one in netstack_zone_init()
    480 	 */
    481 	netstack_rele(ns);
    482 }
    483 
    484 /*
    485  * Called when the reference count drops to zero.
    486  * Call the destroy functions for each registered module.
    487  */
    488 static void
    489 netstack_stack_inactive(netstack_t *ns)
    490 {
    491 	int i;
    492 
    493 	mutex_enter(&netstack_g_lock);
    494 	mutex_enter(&ns->netstack_lock);
    495 	/*
    496 	 * Mark this netstack as having a DESTROY running so
    497 	 * any netstack_register/netstack_unregister waits for
    498 	 * the existing destroy callbacks to complete in reverse moduleid order
    499 	 */
    500 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
    501 	ns->netstack_flags |= NSF_ZONE_DESTROY;
    502 	/*
    503 	 * If the shutdown callback wasn't called earlier (e.g., if this is
    504 	 * a netstack shared between multiple zones), then we schedule it now.
    505 	 *
    506 	 * Determine the set of stacks that exist before we drop the lock.
    507 	 * Set NSS_DESTROY_NEEDED for each of those. That
    508 	 * ensures that when we return all the callbacks for existing
    509 	 * instances have completed.
    510 	 */
    511 	for (i = 0; i < NS_MAX; i++) {
    512 		nm_state_t *nms = &ns->netstack_m_state[i];
    513 
    514 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
    515 		    ns_reg[i].nr_shutdown != NULL &&
    516 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
    517 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
    518 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
    519 			DTRACE_PROBE2(netstack__shutdown__needed,
    520 			    netstack_t *, ns, int, i);
    521 		}
    522 
    523 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
    524 		    ns_reg[i].nr_destroy != NULL &&
    525 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
    526 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
    527 			nms->nms_flags |= NSS_DESTROY_NEEDED;
    528 			DTRACE_PROBE2(netstack__destroy__needed,
    529 			    netstack_t *, ns, int, i);
    530 		}
    531 	}
    532 	mutex_exit(&ns->netstack_lock);
    533 	mutex_exit(&netstack_g_lock);
    534 
    535 	/*
    536 	 * Call the shutdown and destroy functions for all registered modules
    537 	 * for this netstack.
    538 	 *
    539 	 * Since there are some ordering dependencies between the modules we
    540 	 * tear them down in the reverse order of what was used to create them.
    541 	 *
    542 	 * Since a netstack_t is never reused (when a zone is rebooted it gets
    543 	 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
    544 	 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
    545 	 * That is different than in the netstack_unregister() case.
    546 	 */
    547 	apply_all_modules_reverse(ns, netstack_apply_shutdown);
    548 	apply_all_modules_reverse(ns, netstack_apply_destroy);
    549 
    550 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
    551 	mutex_enter(&ns->netstack_lock);
    552 	ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
    553 	ns->netstack_flags &= ~NSF_ZONE_DESTROY;
    554 	cv_broadcast(&ns->netstack_cv);
    555 	mutex_exit(&ns->netstack_lock);
    556 }
    557 
    558 /*
    559  * Apply a function to all netstacks for a particular moduleid.
    560  *
    561  * If there is any zone activity (due to a zone being created, shutdown,
    562  * or destroyed) we wait for that to complete before we proceed. This ensures
    563  * that the moduleids are processed in order when a zone is created or
    564  * destroyed.
    565  *
    566  * The applyfn has to drop netstack_g_lock if it does some work.
    567  * In that case we don't follow netstack_next,
    568  * even if it is possible to do so without any hazards. This is
    569  * because we want the design to allow for the list of netstacks threaded
    570  * by netstack_next to change in any arbitrary way during the time the
    571  * lock was dropped.
    572  *
    573  * It is safe to restart the loop at netstack_head since the applyfn
    574  * changes netstack_m_state as it processes things, so a subsequent
    575  * pass through will have no effect in applyfn, hence the loop will terminate
    576  * in at worst O(N^2).
    577  */
    578 static void
    579 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
    580 {
    581 	netstack_t *ns;
    582 
    583 	mutex_enter(&netstack_g_lock);
    584 	ns = netstack_head;
    585 	while (ns != NULL) {
    586 		if (wait_for_zone_creator(ns, &netstack_g_lock)) {
    587 			/* Lock dropped - restart at head */
    588 			ns = netstack_head;
    589 		} else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
    590 			/* Lock dropped - restart at head */
    591 			ns = netstack_head;
    592 		} else {
    593 			ns = ns->netstack_next;
    594 		}
    595 	}
    596 	mutex_exit(&netstack_g_lock);
    597 }
    598 
    599 /*
    600  * Apply a function to all moduleids for a particular netstack.
    601  *
    602  * Since the netstack linkage doesn't matter in this case we can
    603  * ignore whether the function drops the lock.
    604  */
    605 static void
    606 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
    607 {
    608 	int i;
    609 
    610 	mutex_enter(&netstack_g_lock);
    611 	for (i = 0; i < NS_MAX; i++) {
    612 		/*
    613 		 * We don't care whether the lock was dropped
    614 		 * since we are not iterating over netstack_head.
    615 		 */
    616 		(void) (applyfn)(&netstack_g_lock, ns, i);
    617 	}
    618 	mutex_exit(&netstack_g_lock);
    619 }
    620 
    621 /* Like the above but in reverse moduleid order */
    622 static void
    623 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
    624 {
    625 	int i;
    626 
    627 	mutex_enter(&netstack_g_lock);
    628 	for (i = NS_MAX-1; i >= 0; i--) {
    629 		/*
    630 		 * We don't care whether the lock was dropped
    631 		 * since we are not iterating over netstack_head.
    632 		 */
    633 		(void) (applyfn)(&netstack_g_lock, ns, i);
    634 	}
    635 	mutex_exit(&netstack_g_lock);
    636 }
    637 
    638 /*
    639  * Call the create function for the ns and moduleid if CREATE_NEEDED
    640  * is set.
    641  * If some other thread gets here first and sets *_INPROGRESS, then
    642  * we wait for that thread to complete so that we can ensure that
    643  * all the callbacks are done when we've looped over all netstacks/moduleids.
    644  *
    645  * When we call the create function, we temporarily drop the netstack_lock
    646  * held by the caller, and return true to tell the caller it needs to
    647  * re-evalute the state.
    648  */
    649 static boolean_t
    650 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
    651 {
    652 	void *result;
    653 	netstackid_t stackid;
    654 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
    655 	boolean_t dropped = B_FALSE;
    656 
    657 	ASSERT(MUTEX_HELD(lockp));
    658 	mutex_enter(&ns->netstack_lock);
    659 
    660 	if (wait_for_nms_inprogress(ns, nms, lockp))
    661 		dropped = B_TRUE;
    662 
    663 	if (nms->nms_flags & NSS_CREATE_NEEDED) {
    664 		nms->nms_flags &= ~NSS_CREATE_NEEDED;
    665 		nms->nms_flags |= NSS_CREATE_INPROGRESS;
    666 		DTRACE_PROBE2(netstack__create__inprogress,
    667 		    netstack_t *, ns, int, moduleid);
    668 		mutex_exit(&ns->netstack_lock);
    669 		mutex_exit(lockp);
    670 		dropped = B_TRUE;
    671 
    672 		ASSERT(ns_reg[moduleid].nr_create != NULL);
    673 		stackid = ns->netstack_stackid;
    674 		DTRACE_PROBE2(netstack__create__start,
    675 		    netstackid_t, stackid,
    676 		    netstack_t *, ns);
    677 		result = (ns_reg[moduleid].nr_create)(stackid, ns);
    678 		DTRACE_PROBE2(netstack__create__end,
    679 		    void *, result, netstack_t *, ns);
    680 
    681 		ASSERT(result != NULL);
    682 		mutex_enter(lockp);
    683 		mutex_enter(&ns->netstack_lock);
    684 		ns->netstack_modules[moduleid] = result;
    685 		nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
    686 		nms->nms_flags |= NSS_CREATE_COMPLETED;
    687 		cv_broadcast(&nms->nms_cv);
    688 		DTRACE_PROBE2(netstack__create__completed,
    689 		    netstack_t *, ns, int, moduleid);
    690 		mutex_exit(&ns->netstack_lock);
    691 		return (dropped);
    692 	} else {
    693 		mutex_exit(&ns->netstack_lock);
    694 		return (dropped);
    695 	}
    696 }
    697 
    698 /*
    699  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
    700  * is set.
    701  * If some other thread gets here first and sets *_INPROGRESS, then
    702  * we wait for that thread to complete so that we can ensure that
    703  * all the callbacks are done when we've looped over all netstacks/moduleids.
    704  *
    705  * When we call the shutdown function, we temporarily drop the netstack_lock
    706  * held by the caller, and return true to tell the caller it needs to
    707  * re-evalute the state.
    708  */
    709 static boolean_t
    710 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
    711 {
    712 	netstackid_t stackid;
    713 	void * netstack_module;
    714 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
    715 	boolean_t dropped = B_FALSE;
    716 
    717 	ASSERT(MUTEX_HELD(lockp));
    718 	mutex_enter(&ns->netstack_lock);
    719 
    720 	if (wait_for_nms_inprogress(ns, nms, lockp))
    721 		dropped = B_TRUE;
    722 
    723 	if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
    724 		nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
    725 		nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
    726 		DTRACE_PROBE2(netstack__shutdown__inprogress,
    727 		    netstack_t *, ns, int, moduleid);
    728 		mutex_exit(&ns->netstack_lock);
    729 		mutex_exit(lockp);
    730 		dropped = B_TRUE;
    731 
    732 		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
    733 		stackid = ns->netstack_stackid;
    734 		netstack_module = ns->netstack_modules[moduleid];
    735 		DTRACE_PROBE2(netstack__shutdown__start,
    736 		    netstackid_t, stackid,
    737 		    void *, netstack_module);
    738 		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
    739 		DTRACE_PROBE1(netstack__shutdown__end,
    740 		    netstack_t *, ns);
    741 
    742 		mutex_enter(lockp);
    743 		mutex_enter(&ns->netstack_lock);
    744 		nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
    745 		nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
    746 		cv_broadcast(&nms->nms_cv);
    747 		DTRACE_PROBE2(netstack__shutdown__completed,
    748 		    netstack_t *, ns, int, moduleid);
    749 		mutex_exit(&ns->netstack_lock);
    750 		return (dropped);
    751 	} else {
    752 		mutex_exit(&ns->netstack_lock);
    753 		return (dropped);
    754 	}
    755 }
    756 
    757 /*
    758  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
    759  * is set.
    760  * If some other thread gets here first and sets *_INPROGRESS, then
    761  * we wait for that thread to complete so that we can ensure that
    762  * all the callbacks are done when we've looped over all netstacks/moduleids.
    763  *
    764  * When we call the destroy function, we temporarily drop the netstack_lock
    765  * held by the caller, and return true to tell the caller it needs to
    766  * re-evalute the state.
    767  */
    768 static boolean_t
    769 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
    770 {
    771 	netstackid_t stackid;
    772 	void * netstack_module;
    773 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
    774 	boolean_t dropped = B_FALSE;
    775 
    776 	ASSERT(MUTEX_HELD(lockp));
    777 	mutex_enter(&ns->netstack_lock);
    778 
    779 	if (wait_for_nms_inprogress(ns, nms, lockp))
    780 		dropped = B_TRUE;
    781 
    782 	if (nms->nms_flags & NSS_DESTROY_NEEDED) {
    783 		nms->nms_flags &= ~NSS_DESTROY_NEEDED;
    784 		nms->nms_flags |= NSS_DESTROY_INPROGRESS;
    785 		DTRACE_PROBE2(netstack__destroy__inprogress,
    786 		    netstack_t *, ns, int, moduleid);
    787 		mutex_exit(&ns->netstack_lock);
    788 		mutex_exit(lockp);
    789 		dropped = B_TRUE;
    790 
    791 		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
    792 		stackid = ns->netstack_stackid;
    793 		netstack_module = ns->netstack_modules[moduleid];
    794 		DTRACE_PROBE2(netstack__destroy__start,
    795 		    netstackid_t, stackid,
    796 		    void *, netstack_module);
    797 		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
    798 		DTRACE_PROBE1(netstack__destroy__end,
    799 		    netstack_t *, ns);
    800 
    801 		mutex_enter(lockp);
    802 		mutex_enter(&ns->netstack_lock);
    803 		ns->netstack_modules[moduleid] = NULL;
    804 		nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
    805 		nms->nms_flags |= NSS_DESTROY_COMPLETED;
    806 		cv_broadcast(&nms->nms_cv);
    807 		DTRACE_PROBE2(netstack__destroy__completed,
    808 		    netstack_t *, ns, int, moduleid);
    809 		mutex_exit(&ns->netstack_lock);
    810 		return (dropped);
    811 	} else {
    812 		mutex_exit(&ns->netstack_lock);
    813 		return (dropped);
    814 	}
    815 }
    816 
    817 /*
    818  * If somebody  is creating the netstack (due to a new zone being created)
    819  * then we wait for them to complete. This ensures that any additional
    820  * netstack_register() doesn't cause the create functions to run out of
    821  * order.
    822  * Note that we do not need such a global wait in the case of the shutdown
    823  * and destroy callbacks, since in that case it is sufficient for both
    824  * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
    825  * Returns true if lockp was temporarily dropped while waiting.
    826  */
    827 static boolean_t
    828 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
    829 {
    830 	boolean_t dropped = B_FALSE;
    831 
    832 	mutex_enter(&ns->netstack_lock);
    833 	while (ns->netstack_flags & NSF_ZONE_CREATE) {
    834 		DTRACE_PROBE1(netstack__wait__zone__inprogress,
    835 		    netstack_t *, ns);
    836 		if (lockp != NULL) {
    837 			dropped = B_TRUE;
    838 			mutex_exit(lockp);
    839 		}
    840 		cv_wait(&ns->netstack_cv, &ns->netstack_lock);
    841 		if (lockp != NULL) {
    842 			/* First drop netstack_lock to preserve order */
    843 			mutex_exit(&ns->netstack_lock);
    844 			mutex_enter(lockp);
    845 			mutex_enter(&ns->netstack_lock);
    846 		}
    847 	}
    848 	mutex_exit(&ns->netstack_lock);
    849 	return (dropped);
    850 }
    851 
    852 /*
    853  * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
    854  * combination.
    855  * Returns true if lockp was temporarily dropped while waiting.
    856  */
    857 static boolean_t
    858 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
    859 {
    860 	boolean_t dropped = B_FALSE;
    861 
    862 	while (nms->nms_flags & NSS_ALL_INPROGRESS) {
    863 		DTRACE_PROBE2(netstack__wait__nms__inprogress,
    864 		    netstack_t *, ns, nm_state_t *, nms);
    865 		if (lockp != NULL) {
    866 			dropped = B_TRUE;
    867 			mutex_exit(lockp);
    868 		}
    869 		cv_wait(&nms->nms_cv, &ns->netstack_lock);
    870 		if (lockp != NULL) {
    871 			/* First drop netstack_lock to preserve order */
    872 			mutex_exit(&ns->netstack_lock);
    873 			mutex_enter(lockp);
    874 			mutex_enter(&ns->netstack_lock);
    875 		}
    876 	}
    877 	return (dropped);
    878 }
    879 
    880 /*
    881  * Get the stack instance used in caller's zone.
    882  * Increases the reference count, caller must do a netstack_rele.
    883  * It can't be called after zone_destroy() has started.
    884  */
    885 netstack_t *
    886 netstack_get_current(void)
    887 {
    888 	netstack_t *ns;
    889 
    890 	ns = curproc->p_zone->zone_netstack;
    891 	ASSERT(ns != NULL);
    892 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
    893 		return (NULL);
    894 
    895 	netstack_hold(ns);
    896 
    897 	return (ns);
    898 }
    899 
    900 /*
    901  * Find a stack instance given the cred.
    902  * This is used by the modules to potentially allow for a future when
    903  * something other than the zoneid is used to determine the stack.
    904  */
    905 netstack_t *
    906 netstack_find_by_cred(const cred_t *cr)
    907 {
    908 	zoneid_t zoneid = crgetzoneid(cr);
    909 
    910 	/* Handle the case when cr_zone is NULL */
    911 	if (zoneid == (zoneid_t)-1)
    912 		zoneid = GLOBAL_ZONEID;
    913 
    914 	/* For performance ... */
    915 	if (curproc->p_zone->zone_id == zoneid)
    916 		return (netstack_get_current());
    917 	else
    918 		return (netstack_find_by_zoneid(zoneid));
    919 }
    920 
    921 /*
    922  * Find a stack instance given the zoneid.
    923  * Increases the reference count if found; caller must do a
    924  * netstack_rele().
    925  *
    926  * If there is no exact match then assume the shared stack instance
    927  * matches.
    928  *
    929  * Skip the unitialized ones.
    930  */
    931 netstack_t *
    932 netstack_find_by_zoneid(zoneid_t zoneid)
    933 {
    934 	netstack_t *ns;
    935 	zone_t *zone;
    936 
    937 	zone = zone_find_by_id(zoneid);
    938 
    939 	if (zone == NULL)
    940 		return (NULL);
    941 
    942 	ns = zone->zone_netstack;
    943 	ASSERT(ns != NULL);
    944 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
    945 		ns = NULL;
    946 	else
    947 		netstack_hold(ns);
    948 
    949 	zone_rele(zone);
    950 	return (ns);
    951 }
    952 
    953 /*
    954  * Find a stack instance given the zoneid. Can only be called from
    955  * the create callback. See the comments in zone_find_by_id_nolock why
    956  * that limitation exists.
    957  *
    958  * Increases the reference count if found; caller must do a
    959  * netstack_rele().
    960  *
    961  * If there is no exact match then assume the shared stack instance
    962  * matches.
    963  *
    964  * Skip the unitialized ones.
    965  */
    966 netstack_t *
    967 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
    968 {
    969 	netstack_t *ns;
    970 	zone_t *zone;
    971 
    972 	zone = zone_find_by_id_nolock(zoneid);
    973 
    974 	if (zone == NULL)
    975 		return (NULL);
    976 
    977 	ns = zone->zone_netstack;
    978 	ASSERT(ns != NULL);
    979 
    980 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
    981 		ns = NULL;
    982 	else
    983 		netstack_hold(ns);
    984 
    985 	/* zone_find_by_id_nolock does not have a hold on the zone */
    986 	return (ns);
    987 }
    988 
    989 /*
    990  * Find a stack instance given the stackid with exact match?
    991  * Increases the reference count if found; caller must do a
    992  * netstack_rele().
    993  *
    994  * Skip the unitialized ones.
    995  */
    996 netstack_t *
    997 netstack_find_by_stackid(netstackid_t stackid)
    998 {
    999 	netstack_t *ns;
   1000 
   1001 	mutex_enter(&netstack_g_lock);
   1002 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
   1003 		mutex_enter(&ns->netstack_lock);
   1004 		if (ns->netstack_stackid == stackid &&
   1005 		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
   1006 			mutex_exit(&ns->netstack_lock);
   1007 			netstack_hold(ns);
   1008 			mutex_exit(&netstack_g_lock);
   1009 			return (ns);
   1010 		}
   1011 		mutex_exit(&ns->netstack_lock);
   1012 	}
   1013 	mutex_exit(&netstack_g_lock);
   1014 	return (NULL);
   1015 }
   1016 
   1017 void
   1018 netstack_rele(netstack_t *ns)
   1019 {
   1020 	netstack_t **nsp;
   1021 	boolean_t found;
   1022 	int refcnt, numzones;
   1023 	int i;
   1024 
   1025 	mutex_enter(&ns->netstack_lock);
   1026 	ASSERT(ns->netstack_refcnt > 0);
   1027 	ns->netstack_refcnt--;
   1028 	/*
   1029 	 * As we drop the lock additional netstack_rele()s can come in
   1030 	 * and decrement the refcnt to zero and free the netstack_t.
   1031 	 * Store pointers in local variables and if we were not the last
   1032 	 * then don't reference the netstack_t after that.
   1033 	 */
   1034 	refcnt = ns->netstack_refcnt;
   1035 	numzones = ns->netstack_numzones;
   1036 	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
   1037 	mutex_exit(&ns->netstack_lock);
   1038 
   1039 	if (refcnt == 0 && numzones == 0) {
   1040 		/*
   1041 		 * Time to call the destroy functions and free up
   1042 		 * the structure
   1043 		 */
   1044 		netstack_stack_inactive(ns);
   1045 
   1046 		/* Make sure nothing increased the references */
   1047 		ASSERT(ns->netstack_refcnt == 0);
   1048 		ASSERT(ns->netstack_numzones == 0);
   1049 
   1050 		/* Finally remove from list of netstacks */
   1051 		mutex_enter(&netstack_g_lock);
   1052 		found = B_FALSE;
   1053 		for (nsp = &netstack_head; *nsp != NULL;
   1054 		    nsp = &(*nsp)->netstack_next) {
   1055 			if (*nsp == ns) {
   1056 				*nsp = ns->netstack_next;
   1057 				ns->netstack_next = NULL;
   1058 				found = B_TRUE;
   1059 				break;
   1060 			}
   1061 		}
   1062 		ASSERT(found);
   1063 		mutex_exit(&netstack_g_lock);
   1064 
   1065 		/* Make sure nothing increased the references */
   1066 		ASSERT(ns->netstack_refcnt == 0);
   1067 		ASSERT(ns->netstack_numzones == 0);
   1068 
   1069 		ASSERT(ns->netstack_flags & NSF_CLOSING);
   1070 
   1071 		for (i = 0; i < NS_MAX; i++) {
   1072 			nm_state_t *nms = &ns->netstack_m_state[i];
   1073 
   1074 			cv_destroy(&nms->nms_cv);
   1075 		}
   1076 		mutex_destroy(&ns->netstack_lock);
   1077 		cv_destroy(&ns->netstack_cv);
   1078 		kmem_free(ns, sizeof (*ns));
   1079 	}
   1080 }
   1081 
   1082 void
   1083 netstack_hold(netstack_t *ns)
   1084 {
   1085 	mutex_enter(&ns->netstack_lock);
   1086 	ns->netstack_refcnt++;
   1087 	ASSERT(ns->netstack_refcnt > 0);
   1088 	mutex_exit(&ns->netstack_lock);
   1089 	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
   1090 }
   1091 
   1092 /*
   1093  * To support kstat_create_netstack() using kstat_zone_add we need
   1094  * to track both
   1095  *  - all zoneids that use the global/shared stack
   1096  *  - all kstats that have been added for the shared stack
   1097  */
   1098 kstat_t *
   1099 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
   1100     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
   1101     netstackid_t ks_netstackid)
   1102 {
   1103 	kstat_t *ks;
   1104 
   1105 	if (ks_netstackid == GLOBAL_NETSTACKID) {
   1106 		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
   1107 		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
   1108 		if (ks != NULL)
   1109 			netstack_shared_kstat_add(ks);
   1110 		return (ks);
   1111 	} else {
   1112 		zoneid_t zoneid = ks_netstackid;
   1113 
   1114 		return (kstat_create_zone(ks_module, ks_instance, ks_name,
   1115 		    ks_class, ks_type, ks_ndata, ks_flags, zoneid));
   1116 	}
   1117 }
   1118 
   1119 void
   1120 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
   1121 {
   1122 	if (ks_netstackid == GLOBAL_NETSTACKID) {
   1123 		netstack_shared_kstat_remove(ks);
   1124 	}
   1125 	kstat_delete(ks);
   1126 }
   1127 
   1128 static void
   1129 netstack_shared_zone_add(zoneid_t zoneid)
   1130 {
   1131 	struct shared_zone_list *sz;
   1132 	struct shared_kstat_list *sk;
   1133 
   1134 	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
   1135 	sz->sz_zoneid = zoneid;
   1136 
   1137 	/* Insert in list */
   1138 	mutex_enter(&netstack_shared_lock);
   1139 	sz->sz_next = netstack_shared_zones;
   1140 	netstack_shared_zones = sz;
   1141 
   1142 	/*
   1143 	 * Perform kstat_zone_add for each existing shared stack kstat.
   1144 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
   1145 	 */
   1146 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
   1147 		kstat_zone_add(sk->sk_kstat, zoneid);
   1148 	}
   1149 	mutex_exit(&netstack_shared_lock);
   1150 }
   1151 
   1152 static void
   1153 netstack_shared_zone_remove(zoneid_t zoneid)
   1154 {
   1155 	struct shared_zone_list **szp, *sz;
   1156 	struct shared_kstat_list *sk;
   1157 
   1158 	/* Find in list */
   1159 	mutex_enter(&netstack_shared_lock);
   1160 	sz = NULL;
   1161 	for (szp = &netstack_shared_zones; *szp != NULL;
   1162 	    szp = &((*szp)->sz_next)) {
   1163 		if ((*szp)->sz_zoneid == zoneid) {
   1164 			sz = *szp;
   1165 			break;
   1166 		}
   1167 	}
   1168 	/* We must find it */
   1169 	ASSERT(sz != NULL);
   1170 	*szp = sz->sz_next;
   1171 	sz->sz_next = NULL;
   1172 
   1173 	/*
   1174 	 * Perform kstat_zone_remove for each existing shared stack kstat.
   1175 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
   1176 	 */
   1177 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
   1178 		kstat_zone_remove(sk->sk_kstat, zoneid);
   1179 	}
   1180 	mutex_exit(&netstack_shared_lock);
   1181 
   1182 	kmem_free(sz, sizeof (*sz));
   1183 }
   1184 
   1185 static void
   1186 netstack_shared_kstat_add(kstat_t *ks)
   1187 {
   1188 	struct shared_zone_list *sz;
   1189 	struct shared_kstat_list *sk;
   1190 
   1191 	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
   1192 	sk->sk_kstat = ks;
   1193 
   1194 	/* Insert in list */
   1195 	mutex_enter(&netstack_shared_lock);
   1196 	sk->sk_next = netstack_shared_kstats;
   1197 	netstack_shared_kstats = sk;
   1198 
   1199 	/*
   1200 	 * Perform kstat_zone_add for each existing shared stack zone.
   1201 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
   1202 	 */
   1203 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
   1204 		kstat_zone_add(ks, sz->sz_zoneid);
   1205 	}
   1206 	mutex_exit(&netstack_shared_lock);
   1207 }
   1208 
   1209 static void
   1210 netstack_shared_kstat_remove(kstat_t *ks)
   1211 {
   1212 	struct shared_zone_list *sz;
   1213 	struct shared_kstat_list **skp, *sk;
   1214 
   1215 	/* Find in list */
   1216 	mutex_enter(&netstack_shared_lock);
   1217 	sk = NULL;
   1218 	for (skp = &netstack_shared_kstats; *skp != NULL;
   1219 	    skp = &((*skp)->sk_next)) {
   1220 		if ((*skp)->sk_kstat == ks) {
   1221 			sk = *skp;
   1222 			break;
   1223 		}
   1224 	}
   1225 	/* Must find it */
   1226 	ASSERT(sk != NULL);
   1227 	*skp = sk->sk_next;
   1228 	sk->sk_next = NULL;
   1229 
   1230 	/*
   1231 	 * Perform kstat_zone_remove for each existing shared stack kstat.
   1232 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
   1233 	 */
   1234 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
   1235 		kstat_zone_remove(ks, sz->sz_zoneid);
   1236 	}
   1237 	mutex_exit(&netstack_shared_lock);
   1238 	kmem_free(sk, sizeof (*sk));
   1239 }
   1240 
   1241 /*
   1242  * If a zoneid is part of the shared zone, return true
   1243  */
   1244 static boolean_t
   1245 netstack_find_shared_zoneid(zoneid_t zoneid)
   1246 {
   1247 	struct shared_zone_list *sz;
   1248 
   1249 	mutex_enter(&netstack_shared_lock);
   1250 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
   1251 		if (sz->sz_zoneid == zoneid) {
   1252 			mutex_exit(&netstack_shared_lock);
   1253 			return (B_TRUE);
   1254 		}
   1255 	}
   1256 	mutex_exit(&netstack_shared_lock);
   1257 	return (B_FALSE);
   1258 }
   1259 
   1260 /*
   1261  * Hide the fact that zoneids and netstackids are allocated from
   1262  * the same space in the current implementation.
   1263  * We currently do not check that the stackid/zoneids are valid, since there
   1264  * is no need for that. But this should only be done for ids that are
   1265  * valid.
   1266  */
   1267 zoneid_t
   1268 netstackid_to_zoneid(netstackid_t stackid)
   1269 {
   1270 	return (stackid);
   1271 }
   1272 
   1273 netstackid_t
   1274 zoneid_to_netstackid(zoneid_t zoneid)
   1275 {
   1276 	if (netstack_find_shared_zoneid(zoneid))
   1277 		return (GLOBAL_ZONEID);
   1278 	else
   1279 		return (zoneid);
   1280 }
   1281 
   1282 zoneid_t
   1283 netstack_get_zoneid(netstack_t *ns)
   1284 {
   1285 	return (netstackid_to_zoneid(ns->netstack_stackid));
   1286 }
   1287 
   1288 /*
   1289  * Simplistic support for walking all the handles.
   1290  * Example usage:
   1291  *	netstack_handle_t nh;
   1292  *	netstack_t *ns;
   1293  *
   1294  *	netstack_next_init(&nh);
   1295  *	while ((ns = netstack_next(&nh)) != NULL) {
   1296  *		do something;
   1297  *		netstack_rele(ns);
   1298  *	}
   1299  *	netstack_next_fini(&nh);
   1300  */
   1301 void
   1302 netstack_next_init(netstack_handle_t *handle)
   1303 {
   1304 	*handle = 0;
   1305 }
   1306 
   1307 /* ARGSUSED */
   1308 void
   1309 netstack_next_fini(netstack_handle_t *handle)
   1310 {
   1311 }
   1312 
   1313 netstack_t *
   1314 netstack_next(netstack_handle_t *handle)
   1315 {
   1316 	netstack_t *ns;
   1317 	int i, end;
   1318 
   1319 	end = *handle;
   1320 	/* Walk skipping *handle number of instances */
   1321 
   1322 	/* Look if there is a matching stack instance */
   1323 	mutex_enter(&netstack_g_lock);
   1324 	ns = netstack_head;
   1325 	for (i = 0; i < end; i++) {
   1326 		if (ns == NULL)
   1327 			break;
   1328 		ns = ns->netstack_next;
   1329 	}
   1330 	/* skip those with that aren't really here */
   1331 	while (ns != NULL) {
   1332 		mutex_enter(&ns->netstack_lock);
   1333 		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
   1334 			mutex_exit(&ns->netstack_lock);
   1335 			break;
   1336 		}
   1337 		mutex_exit(&ns->netstack_lock);
   1338 		end++;
   1339 		ns = ns->netstack_next;
   1340 	}
   1341 	if (ns != NULL) {
   1342 		*handle = end + 1;
   1343 		netstack_hold(ns);
   1344 	}
   1345 	mutex_exit(&netstack_g_lock);
   1346 	return (ns);
   1347 }
   1348