Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * This module implements a STREAMS driver that provides layer-two (Ethernet)
     29  * bridging functionality.  The STREAMS interface is used to provide
     30  * observability (snoop/wireshark) and control, but not for interface plumbing.
     31  */
     32 
     33 #include <sys/types.h>
     34 #include <sys/bitmap.h>
     35 #include <sys/cmn_err.h>
     36 #include <sys/conf.h>
     37 #include <sys/ddi.h>
     38 #include <sys/errno.h>
     39 #include <sys/kstat.h>
     40 #include <sys/modctl.h>
     41 #include <sys/note.h>
     42 #include <sys/param.h>
     43 #include <sys/policy.h>
     44 #include <sys/sdt.h>
     45 #include <sys/stat.h>
     46 #include <sys/stream.h>
     47 #include <sys/stropts.h>
     48 #include <sys/strsun.h>
     49 #include <sys/sunddi.h>
     50 #include <sys/sysmacros.h>
     51 #include <sys/systm.h>
     52 #include <sys/time.h>
     53 #include <sys/dlpi.h>
     54 #include <sys/dls.h>
     55 #include <sys/mac_ether.h>
     56 #include <sys/mac_provider.h>
     57 #include <sys/mac_client_priv.h>
     58 #include <sys/mac_impl.h>
     59 #include <sys/vlan.h>
     60 #include <net/bridge.h>
     61 #include <net/bridge_impl.h>
     62 #include <net/trill.h>
     63 #include <sys/dld_ioc.h>
     64 
     65 /*
     66  * Locks and reference counts: object lifetime and design.
     67  *
     68  * bridge_mac_t
     69  *   Bridge mac (snoop) instances are in bmac_list, which is protected by
     70  *   bmac_rwlock.  They're allocated by bmac_alloc and freed by bridge_timer().
     71  *   Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes
     72  *   away, the bridge_mac_t remains until either all of the users go away
     73  *   (detected by a timer) or until the instance is picked up again by the same
     74  *   bridge starting back up.
     75  *
     76  * bridge_inst_t
     77  *   Bridge instances are in inst_list, which is protected by inst_lock.
     78  *   They're allocated by inst_alloc() and freed by inst_free().  After
     79  *   allocation, an instance is placed in inst_list, and the reference count is
     80  *   incremented to represent this.  That reference is decremented when the
     81  *   BIF_SHUTDOWN flag is set, and no new increments may occur.  When the last
     82  *   reference is freed, the instance is removed from the list.
     83  *
     84  *   Bridge instances have lists of links and an AVL tree of forwarding
     85  *   entries.  Each of these structures holds one reference on the bridge
     86  *   instance.  These lists and tree are protected by bi_rwlock.
     87  *
     88  * bridge_stream_t
     89  *   Bridge streams are allocated by stream_alloc() and freed by stream_free().
     90  *   These streams are created when "bridged" opens /dev/bridgectl, and are
     91  *   used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the
     92  *   links on the bridge.  When a stream closes, the bridge instance created is
     93  *   destroyed.  There's at most one bridge instance for a given control
     94  *   stream.
     95  *
     96  * bridge_link_t
     97  *   Links are allocated by bridge_add_link() and freed by link_free().  The
     98  *   bi_links list holds a reference to the link.  When the BLF_DELETED flag is
     99  *   set, that reference is dropped.  The link isn't removed from the list
    100  *   until the last reference drops.  Each forwarding entry that uses a given
    101  *   link holds a reference, as does each thread transmitting a packet via the
    102  *   link.  The MAC layer calls in via bridge_ref_cb() to hold a reference on
    103  *   a link when transmitting.
    104  *
    105  *   It's important that once BLF_DELETED is set, there's no way for the
    106  *   reference count to increase again.  If it can, then the link may be
    107  *   double-freed.  The BLF_FREED flag is intended for use with assertions to
    108  *   guard against this in testing.
    109  *
    110  * bridge_fwd_t
    111  *   Bridge forwarding entries are allocated by bridge_recv_cb() and freed by
    112  *   fwd_free().  The bi_fwd AVL tree holds one reference to the entry.  Unlike
    113  *   other data structures, the reference is dropped when the entry is removed
    114  *   from the tree by fwd_delete(), and the BFF_INTREE flag is removed.  Each
    115  *   thread that's forwarding a packet to a known destination holds a reference
    116  *   to a forwarding entry.
    117  *
    118  * TRILL notes:
    119  *
    120  *   The TRILL module does all of its I/O through bridging.  It uses references
    121  *   on the bridge_inst_t and bridge_link_t structures, and has seven entry
    122  *   points and four callbacks.  One entry point is for setting the callbacks
    123  *   (bridge_trill_register_cb).  There are four entry points for taking bridge
    124  *   and link references (bridge_trill_{br,ln}{ref,unref}).  The final two
    125  *   entry points are for decapsulated packets from TRILL (bridge_trill_decaps)
    126  *   that need to be bridged locally, and for TRILL-encapsulated output packets
    127  *   (bridge_trill_output).
    128  *
    129  *   The four callbacks comprise two notification functions for bridges and
    130  *   links being deleted, one function for raw received TRILL packets, and one
    131  *   for bridge output to non-local TRILL destinations (tunnel entry).
    132  */
    133 
    134 /*
    135  * Ethernet reserved multicast addresses for TRILL; used also in TRILL module.
    136  */
    137 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES;
    138 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES;
    139 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS;
    140 
    141 static const char *inst_kstats_list[] = { KSINST_NAMES };
    142 static const char *link_kstats_list[] = { KSLINK_NAMES };
    143 
    144 #define	KREF(p, m, vn)	p->m.vn.value.ui64
    145 #define	KINCR(p, m, vn)	++KREF(p, m, vn)
    146 #define	KDECR(p, m, vn)	--KREF(p, m, vn)
    147 
    148 #define	KIPINCR(p, vn)	KINCR(p, bi_kstats, vn)
    149 #define	KIPDECR(p, vn)	KDECR(p, bi_kstats, vn)
    150 #define	KLPINCR(p, vn)	KINCR(p, bl_kstats, vn)
    151 
    152 #define	KIINCR(vn)	KIPINCR(bip, vn)
    153 #define	KIDECR(vn)	KIPDECR(bip, vn)
    154 #define	KLINCR(vn)	KLPINCR(blp, vn)
    155 
    156 #define	Dim(x)		(sizeof (x) / sizeof (*(x)))
    157 
    158 /* Amount of overhead added when encapsulating with VLAN headers */
    159 #define	VLAN_INCR	(sizeof (struct ether_vlan_header) -	\
    160 			sizeof (struct ether_header))
    161 
    162 static dev_info_t *bridge_dev_info;
    163 static major_t bridge_major;
    164 static ddi_taskq_t *bridge_taskq;
    165 
    166 /*
    167  * These are the bridge instance management data structures.  The mutex lock
    168  * protects the list of bridge instances.  A reference count is then used on
    169  * each instance to determine when to free it.  We use mac_minor_hold() to
    170  * allocate minor_t values, which are used both for self-cloning /dev/net/
    171  * device nodes as well as client streams.  Minor node 0 is reserved for the
    172  * allocation control node.
    173  */
    174 static list_t inst_list;
    175 static kcondvar_t inst_cv;		/* Allows us to wait for shutdown */
    176 static kmutex_t inst_lock;
    177 
    178 static krwlock_t bmac_rwlock;
    179 static list_t bmac_list;
    180 
    181 /* Wait for taskq entries that use STREAMS */
    182 static kcondvar_t stream_ref_cv;
    183 static kmutex_t stream_ref_lock;
    184 
    185 static timeout_id_t bridge_timerid;
    186 static clock_t bridge_scan_interval;
    187 static clock_t bridge_fwd_age;
    188 
    189 static bridge_inst_t *bridge_find_name(const char *);
    190 static void bridge_timer(void *);
    191 static void bridge_unref(bridge_inst_t *);
    192 
    193 static const uint8_t zero_addr[ETHERADDRL] = { 0 };
    194 
    195 /* Global TRILL linkage */
    196 static trill_recv_pkt_t trill_recv_fn;
    197 static trill_encap_pkt_t trill_encap_fn;
    198 static trill_br_dstr_t trill_brdstr_fn;
    199 static trill_ln_dstr_t trill_lndstr_fn;
    200 
    201 /* special settings to accommodate DLD flow control; see dld_str.c */
    202 static struct module_info bridge_dld_modinfo = {
    203 	0,			/* mi_idnum */
    204 	BRIDGE_DEV_NAME,	/* mi_idname */
    205 	0,			/* mi_minpsz */
    206 	INFPSZ,			/* mi_maxpsz */
    207 	1,			/* mi_hiwat */
    208 	0			/* mi_lowat */
    209 };
    210 
    211 static struct qinit bridge_dld_rinit = {
    212 	NULL,			/* qi_putp */
    213 	NULL,			/* qi_srvp */
    214 	dld_open,		/* qi_qopen */
    215 	dld_close,		/* qi_qclose */
    216 	NULL,			/* qi_qadmin */
    217 	&bridge_dld_modinfo,	/* qi_minfo */
    218 	NULL			/* qi_mstat */
    219 };
    220 
    221 static struct qinit bridge_dld_winit = {
    222 	(int (*)())dld_wput,	/* qi_putp */
    223 	(int (*)())dld_wsrv,	/* qi_srvp */
    224 	NULL,			/* qi_qopen */
    225 	NULL,			/* qi_qclose */
    226 	NULL,			/* qi_qadmin */
    227 	&bridge_dld_modinfo,	/* qi_minfo */
    228 	NULL			/* qi_mstat */
    229 };
    230 
    231 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *);
    232 
    233 /* GLDv3 control ioctls used by Bridging */
    234 static dld_ioc_info_t bridge_ioc_list[] = {
    235 	{BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t),
    236 	    bridge_ioc_listfwd, NULL},
    237 };
    238 
    239 /*
    240  * Given a bridge mac pointer, get a ref-held pointer to the corresponding
    241  * bridge instance, if any.  We must hold the global bmac_rwlock so that
    242  * bm_inst doesn't slide out from under us.
    243  */
    244 static bridge_inst_t *
    245 mac_to_inst(const bridge_mac_t *bmp)
    246 {
    247 	bridge_inst_t *bip;
    248 
    249 	rw_enter(&bmac_rwlock, RW_READER);
    250 	if ((bip = bmp->bm_inst) != NULL)
    251 		atomic_inc_uint(&bip->bi_refs);
    252 	rw_exit(&bmac_rwlock);
    253 	return (bip);
    254 }
    255 
    256 static void
    257 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist)
    258 {
    259 	mblk_t *mp;
    260 	bridge_ctl_t *bcp;
    261 	bridge_link_t *blcmp;
    262 	bridge_inst_t *bip;
    263 	bridge_mac_t *bmp;
    264 
    265 	if (failed) {
    266 		if (blp->bl_flags & BLF_SDUFAIL)
    267 			return;
    268 		blp->bl_flags |= BLF_SDUFAIL;
    269 	} else {
    270 		if (!(blp->bl_flags & BLF_SDUFAIL))
    271 			return;
    272 		blp->bl_flags &= ~BLF_SDUFAIL;
    273 	}
    274 
    275 	/*
    276 	 * If this link is otherwise up, then check if there are any other
    277 	 * non-failed non-down links.  If not, then we control the state of the
    278 	 * whole bridge.
    279 	 */
    280 	bip = blp->bl_inst;
    281 	bmp = bip->bi_mac;
    282 	if (blp->bl_linkstate != LINK_STATE_DOWN) {
    283 		for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
    284 		    blcmp = list_next(&bip->bi_links, blcmp)) {
    285 			if (blp != blcmp &&
    286 			    !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) &&
    287 			    blcmp->bl_linkstate != LINK_STATE_DOWN)
    288 				break;
    289 		}
    290 		if (blcmp == NULL) {
    291 			bmp->bm_linkstate = failed ? LINK_STATE_DOWN :
    292 			    LINK_STATE_UP;
    293 			mac_link_redo(bmp->bm_mh, bmp->bm_linkstate);
    294 		}
    295 	}
    296 
    297 	/*
    298 	 * If we're becoming failed, then the link's current true state needs
    299 	 * to be reflected upwards to this link's clients.  If we're becoming
    300 	 * unfailed, then we get the state of the bridge instead on all
    301 	 * clients.
    302 	 */
    303 	if (failed) {
    304 		if (bmp->bm_linkstate != blp->bl_linkstate)
    305 			mac_link_redo(blp->bl_mh, blp->bl_linkstate);
    306 	} else {
    307 		mac_link_redo(blp->bl_mh, bmp->bm_linkstate);
    308 	}
    309 
    310 	/* get the current mblk we're going to send up */
    311 	if ((mp = blp->bl_lfailmp) == NULL &&
    312 	    (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL)
    313 		return;
    314 
    315 	/* get a new one for next time */
    316 	blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED);
    317 
    318 	/* if none for next time, then report only failures */
    319 	if (blp->bl_lfailmp == NULL && !failed) {
    320 		blp->bl_lfailmp = mp;
    321 		return;
    322 	}
    323 
    324 	/* LINTED: alignment */
    325 	bcp = (bridge_ctl_t *)mp->b_rptr;
    326 	bcp->bc_linkid = blp->bl_linkid;
    327 	bcp->bc_failed = failed;
    328 	mp->b_wptr = (uchar_t *)(bcp + 1);
    329 	mp->b_next = *mlist;
    330 	*mlist = mp;
    331 }
    332 
    333 /*
    334  * Send control messages (link SDU changes) using the stream to the
    335  * bridge instance daemon.
    336  */
    337 static void
    338 send_up_messages(bridge_inst_t *bip, mblk_t *mp)
    339 {
    340 	mblk_t *mnext;
    341 	queue_t *rq;
    342 
    343 	rq = bip->bi_control->bs_wq;
    344 	rq = OTHERQ(rq);
    345 	while (mp != NULL) {
    346 		mnext = mp->b_next;
    347 		mp->b_next = NULL;
    348 		putnext(rq, mp);
    349 		mp = mnext;
    350 	}
    351 }
    352 
    353 /* ARGSUSED */
    354 static int
    355 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val)
    356 {
    357 	return (ENOTSUP);
    358 }
    359 
    360 static int
    361 bridge_m_start(void *arg)
    362 {
    363 	bridge_mac_t *bmp = arg;
    364 
    365 	bmp->bm_flags |= BMF_STARTED;
    366 	return (0);
    367 }
    368 
    369 static void
    370 bridge_m_stop(void *arg)
    371 {
    372 	bridge_mac_t *bmp = arg;
    373 
    374 	bmp->bm_flags &= ~BMF_STARTED;
    375 }
    376 
    377 /* ARGSUSED */
    378 static int
    379 bridge_m_setpromisc(void *arg, boolean_t on)
    380 {
    381 	return (0);
    382 }
    383 
    384 /* ARGSUSED */
    385 static int
    386 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
    387 {
    388 	return (0);
    389 }
    390 
    391 /* ARGSUSED */
    392 static int
    393 bridge_m_unicst(void *arg, const uint8_t *macaddr)
    394 {
    395 	return (ENOTSUP);
    396 }
    397 
    398 static mblk_t *
    399 bridge_m_tx(void *arg, mblk_t *mp)
    400 {
    401 	_NOTE(ARGUNUSED(arg));
    402 	freemsgchain(mp);
    403 	return (NULL);
    404 }
    405 
    406 /* ARGSUSED */
    407 static int
    408 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
    409 {
    410 	bridge_listfwd_t *blf = karg;
    411 	bridge_inst_t *bip;
    412 	bridge_fwd_t *bfp, match;
    413 	avl_index_t where;
    414 
    415 	bip = bridge_find_name(blf->blf_name);
    416 	if (bip == NULL)
    417 		return (ENOENT);
    418 
    419 	bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL);
    420 	match.bf_flags |= BFF_VLANLOCAL;
    421 	rw_enter(&bip->bi_rwlock, RW_READER);
    422 	if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL)
    423 		bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER);
    424 	else
    425 		bfp = AVL_NEXT(&bip->bi_fwd, bfp);
    426 	if (bfp == NULL) {
    427 		bzero(blf, sizeof (*blf));
    428 	} else {
    429 		bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL);
    430 		blf->blf_trill_nick = bfp->bf_trill_nick;
    431 		blf->blf_ms_age =
    432 		    drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000;
    433 		blf->blf_is_local =
    434 		    (bfp->bf_flags & BFF_LOCALADDR) != 0;
    435 		blf->blf_linkid = bfp->bf_links[0]->bl_linkid;
    436 	}
    437 	rw_exit(&bip->bi_rwlock);
    438 	bridge_unref(bip);
    439 	return (0);
    440 }
    441 
    442 static int
    443 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
    444     uint_t pr_valsize, const void *pr_val)
    445 {
    446 	bridge_mac_t *bmp = arg;
    447 	bridge_inst_t *bip;
    448 	bridge_link_t *blp;
    449 	int err;
    450 	uint_t maxsdu;
    451 	mblk_t *mlist;
    452 
    453 	_NOTE(ARGUNUSED(pr_name));
    454 	switch (pr_num) {
    455 	case MAC_PROP_MTU:
    456 		if (pr_valsize < sizeof (bmp->bm_maxsdu)) {
    457 			err = EINVAL;
    458 			break;
    459 		}
    460 		(void) bcopy(pr_val, &maxsdu, sizeof (maxsdu));
    461 		if (maxsdu == bmp->bm_maxsdu) {
    462 			err = 0;
    463 		} else if ((bip = mac_to_inst(bmp)) == NULL) {
    464 			err = ENXIO;
    465 		} else {
    466 			rw_enter(&bip->bi_rwlock, RW_WRITER);
    467 			mlist = NULL;
    468 			for (blp = list_head(&bip->bi_links); blp != NULL;
    469 			    blp = list_next(&bip->bi_links, blp)) {
    470 				if (blp->bl_flags & BLF_DELETED)
    471 					continue;
    472 				if (blp->bl_maxsdu == maxsdu)
    473 					link_sdu_fail(blp, B_FALSE, &mlist);
    474 				else if (blp->bl_maxsdu == bmp->bm_maxsdu)
    475 					link_sdu_fail(blp, B_TRUE, &mlist);
    476 			}
    477 			rw_exit(&bip->bi_rwlock);
    478 			bmp->bm_maxsdu = maxsdu;
    479 			(void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
    480 			send_up_messages(bip, mlist);
    481 			bridge_unref(bip);
    482 			err = 0;
    483 		}
    484 		break;
    485 
    486 	default:
    487 		err = ENOTSUP;
    488 		break;
    489 	}
    490 	return (err);
    491 }
    492 
    493 static int
    494 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
    495     uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
    496 {
    497 	bridge_mac_t *bmp = arg;
    498 	int err = 0;
    499 
    500 	_NOTE(ARGUNUSED(pr_name));
    501 	switch (pr_num) {
    502 	case MAC_PROP_MTU: {
    503 		mac_propval_range_t range;
    504 
    505 		if (!(pr_flags & MAC_PROP_POSSIBLE))
    506 			return (ENOTSUP);
    507 		if (pr_valsize < sizeof (mac_propval_range_t))
    508 			return (EINVAL);
    509 		range.mpr_count = 1;
    510 		range.mpr_type = MAC_PROPVAL_UINT32;
    511 		range.range_uint32[0].mpur_min =
    512 		    range.range_uint32[0].mpur_max = bmp->bm_maxsdu;
    513 		bcopy(&range, pr_val, sizeof (range));
    514 		*perm = MAC_PROP_PERM_RW;
    515 		break;
    516 	}
    517 	case MAC_PROP_STATUS:
    518 		if (pr_valsize < sizeof (bmp->bm_linkstate)) {
    519 			err = EINVAL;
    520 		} else {
    521 			bcopy(&bmp->bm_linkstate, pr_val,
    522 			    sizeof (&bmp->bm_linkstate));
    523 			*perm = MAC_PROP_PERM_READ;
    524 		}
    525 		break;
    526 
    527 	default:
    528 		err = ENOTSUP;
    529 		break;
    530 	}
    531 	return (err);
    532 }
    533 
    534 static mac_callbacks_t bridge_m_callbacks = {
    535 	MC_SETPROP | MC_GETPROP,
    536 	bridge_m_getstat,
    537 	bridge_m_start,
    538 	bridge_m_stop,
    539 	bridge_m_setpromisc,
    540 	bridge_m_multicst,
    541 	bridge_m_unicst,
    542 	bridge_m_tx,
    543 	NULL,	/* ioctl */
    544 	NULL,	/* getcapab */
    545 	NULL,	/* open */
    546 	NULL,	/* close */
    547 	bridge_m_setprop,
    548 	bridge_m_getprop
    549 };
    550 
    551 /*
    552  * Create kstats from a list.
    553  */
    554 static kstat_t *
    555 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
    556     const char *unitname)
    557 {
    558 	kstat_t *ksp;
    559 	int i;
    560 
    561 	for (i = 0; i < nstat; i++)
    562 		kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64);
    563 
    564 	ksp = kstat_create_zone(BRIDGE_DEV_NAME, 0, unitname, "net",
    565 	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
    566 	if (ksp != NULL) {
    567 		ksp->ks_data = knt;
    568 		kstat_install(ksp);
    569 	}
    570 	return (ksp);
    571 }
    572 
    573 /*
    574  * Find an existing bridge_mac_t structure or allocate a new one for the given
    575  * bridge instance.  This creates the mac driver instance that snoop can use.
    576  */
    577 static int
    578 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp)
    579 {
    580 	bridge_mac_t *bmp, *bnew;
    581 	mac_register_t *mac;
    582 	int err;
    583 
    584 	*bmacp = NULL;
    585 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
    586 		return (EINVAL);
    587 
    588 	bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP);
    589 
    590 	rw_enter(&bmac_rwlock, RW_WRITER);
    591 	for (bmp = list_head(&bmac_list); bmp != NULL;
    592 	    bmp = list_next(&bmac_list, bmp)) {
    593 		if (strcmp(bip->bi_name, bmp->bm_name) == 0) {
    594 			ASSERT(bmp->bm_inst == NULL);
    595 			bmp->bm_inst = bip;
    596 			rw_exit(&bmac_rwlock);
    597 			kmem_free(bnew, sizeof (*bnew));
    598 			mac_free(mac);
    599 			*bmacp = bmp;
    600 			return (0);
    601 		}
    602 	}
    603 
    604 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
    605 	mac->m_driver = bnew;
    606 	mac->m_dip = bridge_dev_info;
    607 	mac->m_instance = (uint_t)-1;
    608 	mac->m_src_addr = (uint8_t *)zero_addr;
    609 	mac->m_callbacks = &bridge_m_callbacks;
    610 
    611 	/*
    612 	 * Note that the SDU limits are irrelevant, as nobody transmits on the
    613 	 * bridge node itself.  It's mainly for monitoring but we allow
    614 	 * setting the bridge MTU for quick transition of all links part of the
    615 	 * bridge to a new MTU.
    616 	 */
    617 	mac->m_min_sdu = 1;
    618 	mac->m_max_sdu = 1500;
    619 	err = mac_register(mac, &bnew->bm_mh);
    620 	mac_free(mac);
    621 	if (err != 0) {
    622 		rw_exit(&bmac_rwlock);
    623 		kmem_free(bnew, sizeof (*bnew));
    624 		return (err);
    625 	}
    626 
    627 	bnew->bm_inst = bip;
    628 	(void) strcpy(bnew->bm_name, bip->bi_name);
    629 	if (list_is_empty(&bmac_list)) {
    630 		bridge_timerid = timeout(bridge_timer, NULL,
    631 		    bridge_scan_interval);
    632 	}
    633 	list_insert_tail(&bmac_list, bnew);
    634 	rw_exit(&bmac_rwlock);
    635 
    636 	/*
    637 	 * Mark the MAC as unable to go "active" so that only passive clients
    638 	 * (such as snoop) can bind to it.
    639 	 */
    640 	mac_no_active(bnew->bm_mh);
    641 	*bmacp = bnew;
    642 	return (0);
    643 }
    644 
    645 /*
    646  * Disconnect the given bridge_mac_t from its bridge instance.  The bridge
    647  * instance is going away.  The mac instance can't go away until the clients
    648  * are gone (see bridge_timer).
    649  */
    650 static void
    651 bmac_disconnect(bridge_mac_t *bmp)
    652 {
    653 	bridge_inst_t *bip;
    654 
    655 	bmp->bm_linkstate = LINK_STATE_DOWN;
    656 	mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN);
    657 
    658 	rw_enter(&bmac_rwlock, RW_READER);
    659 	bip = bmp->bm_inst;
    660 	bip->bi_mac = NULL;
    661 	bmp->bm_inst = NULL;
    662 	rw_exit(&bmac_rwlock);
    663 }
    664 
    665 /* This is used by the avl trees to sort forwarding table entries */
    666 static int
    667 fwd_compare(const void *addr1, const void *addr2)
    668 {
    669 	const bridge_fwd_t *fwd1 = addr1;
    670 	const bridge_fwd_t *fwd2 = addr2;
    671 	int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL);
    672 
    673 	if (diff != 0)
    674 		return (diff > 0 ? 1 : -1);
    675 
    676 	if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) {
    677 		if (fwd1->bf_vlanid > fwd2->bf_vlanid)
    678 			return (1);
    679 		else if (fwd1->bf_vlanid < fwd2->bf_vlanid)
    680 			return (-1);
    681 	}
    682 	return (0);
    683 }
    684 
    685 static void
    686 inst_free(bridge_inst_t *bip)
    687 {
    688 	ASSERT(bip->bi_mac == NULL);
    689 	rw_destroy(&bip->bi_rwlock);
    690 	list_destroy(&bip->bi_links);
    691 	cv_destroy(&bip->bi_linkwait);
    692 	avl_destroy(&bip->bi_fwd);
    693 	if (bip->bi_ksp != NULL)
    694 		kstat_delete(bip->bi_ksp);
    695 	kmem_free(bip, sizeof (*bip));
    696 }
    697 
    698 static bridge_inst_t *
    699 inst_alloc(const char *bridge)
    700 {
    701 	bridge_inst_t *bip;
    702 
    703 	bip = kmem_zalloc(sizeof (*bip), KM_SLEEP);
    704 	bip->bi_refs = 1;
    705 	(void) strcpy(bip->bi_name, bridge);
    706 	rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL);
    707 	list_create(&bip->bi_links, sizeof (bridge_link_t),
    708 	    offsetof(bridge_link_t, bl_node));
    709 	cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL);
    710 	avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t),
    711 	    offsetof(bridge_fwd_t, bf_node));
    712 	return (bip);
    713 }
    714 
    715 static bridge_inst_t *
    716 bridge_find_name(const char *bridge)
    717 {
    718 	bridge_inst_t *bip;
    719 
    720 	mutex_enter(&inst_lock);
    721 	for (bip = list_head(&inst_list); bip != NULL;
    722 	    bip = list_next(&inst_list, bip)) {
    723 		if (!(bip->bi_flags & BIF_SHUTDOWN) &&
    724 		    strcmp(bridge, bip->bi_name) == 0) {
    725 			atomic_inc_uint(&bip->bi_refs);
    726 			break;
    727 		}
    728 	}
    729 	mutex_exit(&inst_lock);
    730 
    731 	return (bip);
    732 }
    733 
    734 static int
    735 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc,
    736     cred_t *cred)
    737 {
    738 	bridge_inst_t *bip, *bipnew;
    739 	bridge_mac_t *bmp = NULL;
    740 	int err;
    741 
    742 	*bipc = NULL;
    743 	bipnew = inst_alloc(bridge);
    744 
    745 	mutex_enter(&inst_lock);
    746 lookup_retry:
    747 	for (bip = list_head(&inst_list); bip != NULL;
    748 	    bip = list_next(&inst_list, bip)) {
    749 		if (strcmp(bridge, bip->bi_name) == 0)
    750 			break;
    751 	}
    752 
    753 	/* This should not take long; if it does, we've got a design problem */
    754 	if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) {
    755 		cv_wait(&inst_cv, &inst_lock);
    756 		goto lookup_retry;
    757 	}
    758 
    759 	if (bip == NULL) {
    760 		bip = bipnew;
    761 		bipnew = NULL;
    762 		list_insert_tail(&inst_list, bip);
    763 	}
    764 
    765 	mutex_exit(&inst_lock);
    766 	if (bipnew != NULL) {
    767 		inst_free(bipnew);
    768 		return (EEXIST);
    769 	}
    770 
    771 	bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats,
    772 	    inst_kstats_list, Dim(inst_kstats_list), bip->bi_name);
    773 
    774 	err = bmac_alloc(bip, &bmp);
    775 	if ((bip->bi_mac = bmp) == NULL)
    776 		goto fail_create;
    777 
    778 	/*
    779 	 * bm_inst is set, so the timer cannot yank the DLS rug from under us.
    780 	 * No extra locking is needed here.
    781 	 */
    782 	if (!(bmp->bm_flags & BMF_DLS)) {
    783 		err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred));
    784 		if (err != 0)
    785 			goto fail_create;
    786 		bmp->bm_flags |= BMF_DLS;
    787 	}
    788 
    789 	bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh));
    790 	*bipc = bip;
    791 	return (0);
    792 
    793 fail_create:
    794 	ASSERT(bip->bi_trilldata == NULL);
    795 	bip->bi_flags |= BIF_SHUTDOWN;
    796 	bridge_unref(bip);
    797 	return (err);
    798 }
    799 
    800 static void
    801 bridge_unref(bridge_inst_t *bip)
    802 {
    803 	if (atomic_dec_uint_nv(&bip->bi_refs) == 0) {
    804 		ASSERT(bip->bi_flags & BIF_SHUTDOWN);
    805 		/* free up mac for reuse before leaving global list */
    806 		if (bip->bi_mac != NULL)
    807 			bmac_disconnect(bip->bi_mac);
    808 		mutex_enter(&inst_lock);
    809 		list_remove(&inst_list, bip);
    810 		cv_broadcast(&inst_cv);
    811 		mutex_exit(&inst_lock);
    812 		inst_free(bip);
    813 	}
    814 }
    815 
    816 /*
    817  * Stream instances are used only for allocating bridges and serving as a
    818  * control node.  They serve no data-handling function.
    819  */
    820 static bridge_stream_t *
    821 stream_alloc(void)
    822 {
    823 	bridge_stream_t *bsp;
    824 	minor_t mn;
    825 
    826 	if ((mn = mac_minor_hold(B_FALSE)) == 0)
    827 		return (NULL);
    828 	bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP);
    829 	bsp->bs_minor = mn;
    830 	return (bsp);
    831 }
    832 
    833 static void
    834 stream_free(bridge_stream_t *bsp)
    835 {
    836 	mac_minor_rele(bsp->bs_minor);
    837 	kmem_free(bsp, sizeof (*bsp));
    838 }
    839 
    840 /* Reference hold/release functions for STREAMS-related taskq */
    841 static void
    842 stream_ref(bridge_stream_t *bsp)
    843 {
    844 	mutex_enter(&stream_ref_lock);
    845 	bsp->bs_taskq_cnt++;
    846 	mutex_exit(&stream_ref_lock);
    847 }
    848 
    849 static void
    850 stream_unref(bridge_stream_t *bsp)
    851 {
    852 	mutex_enter(&stream_ref_lock);
    853 	if (--bsp->bs_taskq_cnt == 0)
    854 		cv_broadcast(&stream_ref_cv);
    855 	mutex_exit(&stream_ref_lock);
    856 }
    857 
    858 static void
    859 link_free(bridge_link_t *blp)
    860 {
    861 	bridge_inst_t *bip = blp->bl_inst;
    862 
    863 	ASSERT(!(blp->bl_flags & BLF_FREED));
    864 	blp->bl_flags |= BLF_FREED;
    865 	if (blp->bl_ksp != NULL)
    866 		kstat_delete(blp->bl_ksp);
    867 	if (blp->bl_lfailmp != NULL)
    868 		freeb(blp->bl_lfailmp);
    869 	cv_destroy(&blp->bl_trillwait);
    870 	mutex_destroy(&blp->bl_trilllock);
    871 	kmem_free(blp, sizeof (*blp));
    872 	/* Don't unreference the bridge until the MAC is closed */
    873 	bridge_unref(bip);
    874 }
    875 
    876 static void
    877 link_unref(bridge_link_t *blp)
    878 {
    879 	if (atomic_dec_uint_nv(&blp->bl_refs) == 0) {
    880 		bridge_inst_t *bip = blp->bl_inst;
    881 
    882 		ASSERT(blp->bl_flags & BLF_DELETED);
    883 		rw_enter(&bip->bi_rwlock, RW_WRITER);
    884 		list_remove(&bip->bi_links, blp);
    885 		rw_exit(&bip->bi_rwlock);
    886 		if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links))
    887 			cv_broadcast(&bip->bi_linkwait);
    888 		link_free(blp);
    889 	}
    890 }
    891 
    892 static bridge_fwd_t *
    893 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick)
    894 {
    895 	bridge_fwd_t *bfp;
    896 
    897 	bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)),
    898 	    KM_NOSLEEP);
    899 	if (bfp != NULL) {
    900 		bcopy(addr, bfp->bf_dest, ETHERADDRL);
    901 		bfp->bf_lastheard = ddi_get_lbolt();
    902 		bfp->bf_maxlinks = nlinks;
    903 		bfp->bf_links = (bridge_link_t **)(bfp + 1);
    904 		bfp->bf_trill_nick = nick;
    905 	}
    906 	return (bfp);
    907 }
    908 
    909 static bridge_fwd_t *
    910 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid)
    911 {
    912 	bridge_fwd_t *bfp, *vbfp;
    913 	bridge_fwd_t match;
    914 
    915 	bcopy(addr, match.bf_dest, ETHERADDRL);
    916 	match.bf_flags = 0;
    917 	rw_enter(&bip->bi_rwlock, RW_READER);
    918 	if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) {
    919 		if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) {
    920 			match.bf_vlanid = vlanid;
    921 			match.bf_flags = BFF_VLANLOCAL;
    922 			vbfp = avl_find(&bip->bi_fwd, &match, NULL);
    923 			if (vbfp != NULL)
    924 				bfp = vbfp;
    925 		}
    926 		atomic_inc_uint(&bfp->bf_refs);
    927 	}
    928 	rw_exit(&bip->bi_rwlock);
    929 	return (bfp);
    930 }
    931 
    932 static void
    933 fwd_free(bridge_fwd_t *bfp)
    934 {
    935 	uint_t i;
    936 	bridge_inst_t *bip = bfp->bf_links[0]->bl_inst;
    937 
    938 	KIDECR(bki_count);
    939 	for (i = 0; i < bfp->bf_nlinks; i++)
    940 		link_unref(bfp->bf_links[i]);
    941 	kmem_free(bfp,
    942 	    sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *));
    943 }
    944 
    945 static void
    946 fwd_unref(bridge_fwd_t *bfp)
    947 {
    948 	if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) {
    949 		ASSERT(!(bfp->bf_flags & BFF_INTREE));
    950 		fwd_free(bfp);
    951 	}
    952 }
    953 
    954 static void
    955 fwd_delete(bridge_fwd_t *bfp)
    956 {
    957 	bridge_inst_t *bip;
    958 	bridge_fwd_t *bfpzero;
    959 
    960 	if (bfp->bf_flags & BFF_INTREE) {
    961 		ASSERT(bfp->bf_nlinks > 0);
    962 		bip = bfp->bf_links[0]->bl_inst;
    963 		rw_enter(&bip->bi_rwlock, RW_WRITER);
    964 		/* Another thread could beat us to this */
    965 		if (bfp->bf_flags & BFF_INTREE) {
    966 			avl_remove(&bip->bi_fwd, bfp);
    967 			bfp->bf_flags &= ~BFF_INTREE;
    968 			if (bfp->bf_flags & BFF_VLANLOCAL) {
    969 				bfp->bf_flags &= ~BFF_VLANLOCAL;
    970 				bfpzero = avl_find(&bip->bi_fwd, bfp, NULL);
    971 				if (bfpzero != NULL && bfpzero->bf_vcnt > 0)
    972 					bfpzero->bf_vcnt--;
    973 			}
    974 			rw_exit(&bip->bi_rwlock);
    975 			fwd_unref(bfp);		/* no longer in avl tree */
    976 		} else {
    977 			rw_exit(&bip->bi_rwlock);
    978 		}
    979 	}
    980 }
    981 
    982 static boolean_t
    983 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp)
    984 {
    985 	avl_index_t idx;
    986 	boolean_t retv;
    987 
    988 	rw_enter(&bip->bi_rwlock, RW_WRITER);
    989 	if (!(bip->bi_flags & BIF_SHUTDOWN) &&
    990 	    avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax &&
    991 	    avl_find(&bip->bi_fwd, bfp, &idx) == NULL) {
    992 		avl_insert(&bip->bi_fwd, bfp, idx);
    993 		bfp->bf_flags |= BFF_INTREE;
    994 		atomic_inc_uint(&bfp->bf_refs);	/* avl entry */
    995 		retv = B_TRUE;
    996 	} else {
    997 		retv = B_FALSE;
    998 	}
    999 	rw_exit(&bip->bi_rwlock);
   1000 	return (retv);
   1001 }
   1002 
   1003 static void
   1004 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr,
   1005     const uint8_t *newaddr)
   1006 {
   1007 	bridge_inst_t *bip = blp->bl_inst;
   1008 	bridge_fwd_t *bfp, *bfnew;
   1009 	bridge_fwd_t match;
   1010 	avl_index_t idx;
   1011 	boolean_t drop_ref = B_FALSE;
   1012 
   1013 	if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0)
   1014 		return;
   1015 
   1016 	if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0)
   1017 		goto no_old_addr;
   1018 
   1019 	/*
   1020 	 * Find the previous entry, and remove our link from it.
   1021 	 */
   1022 	bcopy(oldaddr, match.bf_dest, ETHERADDRL);
   1023 	rw_enter(&bip->bi_rwlock, RW_WRITER);
   1024 	if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) {
   1025 		int i;
   1026 
   1027 		/*
   1028 		 * See if we're in the list, and remove if so.
   1029 		 */
   1030 		for (i = 0; i < bfp->bf_nlinks; i++) {
   1031 			if (bfp->bf_links[i] == blp) {
   1032 				/*
   1033 				 * We assume writes are atomic, so no special
   1034 				 * MT handling is needed.  The list length is
   1035 				 * decremented first, and then we remove
   1036 				 * entries.
   1037 				 */
   1038 				bfp->bf_nlinks--;
   1039 				for (; i < bfp->bf_nlinks; i++)
   1040 					bfp->bf_links[i] = bfp->bf_links[i + 1];
   1041 				drop_ref = B_TRUE;
   1042 				break;
   1043 			}
   1044 		}
   1045 		/* If no more links, then remove and free up */
   1046 		if (bfp->bf_nlinks == 0) {
   1047 			avl_remove(&bip->bi_fwd, bfp);
   1048 			bfp->bf_flags &= ~BFF_INTREE;
   1049 		} else {
   1050 			bfp = NULL;
   1051 		}
   1052 	}
   1053 	rw_exit(&bip->bi_rwlock);
   1054 	if (bfp != NULL)
   1055 		fwd_unref(bfp);		/* no longer in avl tree */
   1056 
   1057 	/*
   1058 	 * Now get the new link address and add this link to the list.  The
   1059 	 * list should be of length 1 unless the user has configured multiple
   1060 	 * NICs with the same address.  (That's an incorrect configuration, but
   1061 	 * we support it anyway.)
   1062 	 */
   1063 no_old_addr:
   1064 	bfp = NULL;
   1065 	if ((bip->bi_flags & BIF_SHUTDOWN) ||
   1066 	    bcmp(newaddr, zero_addr, ETHERADDRL) == 0)
   1067 		goto no_new_addr;
   1068 
   1069 	bcopy(newaddr, match.bf_dest, ETHERADDRL);
   1070 	rw_enter(&bip->bi_rwlock, RW_WRITER);
   1071 	if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) {
   1072 		bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE);
   1073 		if (bfnew != NULL)
   1074 			KIINCR(bki_count);
   1075 	} else if (bfp->bf_nlinks < bfp->bf_maxlinks) {
   1076 		/* special case: link fits in existing entry */
   1077 		bfnew = bfp;
   1078 	} else {
   1079 		bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1,
   1080 		    RBRIDGE_NICKNAME_NONE);
   1081 		if (bfnew != NULL) {
   1082 			KIINCR(bki_count);
   1083 			avl_remove(&bip->bi_fwd, bfp);
   1084 			bfp->bf_flags &= ~BFF_INTREE;
   1085 			bfnew->bf_nlinks = bfp->bf_nlinks;
   1086 			bcopy(bfp->bf_links, bfnew->bf_links,
   1087 			    bfp->bf_nlinks * sizeof (bfp));
   1088 			/* reset the idx value due to removal above */
   1089 			(void) avl_find(&bip->bi_fwd, &match, &idx);
   1090 		}
   1091 	}
   1092 
   1093 	if (bfnew != NULL) {
   1094 		bfnew->bf_links[bfnew->bf_nlinks++] = blp;
   1095 		if (drop_ref)
   1096 			drop_ref = B_FALSE;
   1097 		else
   1098 			atomic_inc_uint(&blp->bl_refs);	/* bf_links entry */
   1099 
   1100 		if (bfnew != bfp) {
   1101 			/* local addresses are not subject to table limits */
   1102 			avl_insert(&bip->bi_fwd, bfnew, idx);
   1103 			bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR);
   1104 			atomic_inc_uint(&bfnew->bf_refs);	/* avl entry */
   1105 		}
   1106 	}
   1107 	rw_exit(&bip->bi_rwlock);
   1108 
   1109 no_new_addr:
   1110 	/*
   1111 	 * If we found an existing entry and we replaced it with a new one,
   1112 	 * then drop the table reference from the old one.  We removed it from
   1113 	 * the AVL tree above.
   1114 	 */
   1115 	if (bfnew != NULL && bfp != NULL && bfnew != bfp)
   1116 		fwd_unref(bfp);
   1117 
   1118 	/* Account for removed entry. */
   1119 	if (drop_ref)
   1120 		link_unref(blp);
   1121 }
   1122 
   1123 static void
   1124 bridge_new_unicst(bridge_link_t *blp)
   1125 {
   1126 	uint8_t new_mac[ETHERADDRL];
   1127 
   1128 	mac_unicast_primary_get(blp->bl_mh, new_mac);
   1129 	fwd_update_local(blp, blp->bl_local_mac, new_mac);
   1130 	bcopy(new_mac, blp->bl_local_mac, ETHERADDRL);
   1131 }
   1132 
   1133 /*
   1134  * We must shut down a link prior to freeing it, and doing that requires
   1135  * blocking to wait for running MAC threads while holding a reference.  This is
   1136  * run from a taskq to accomplish proper link shutdown followed by reference
   1137  * drop.
   1138  */
   1139 static void
   1140 link_shutdown(void *arg)
   1141 {
   1142 	bridge_link_t *blp = arg;
   1143 	mac_handle_t mh = blp->bl_mh;
   1144 	bridge_inst_t *bip;
   1145 	bridge_fwd_t *bfp, *bfnext;
   1146 	avl_tree_t fwd_scavenge;
   1147 	int i;
   1148 
   1149 	/*
   1150 	 * This link is being destroyed.  Notify TRILL now that it's no longer
   1151 	 * possible to send packets.  Data packets may still arrive until TRILL
   1152 	 * calls bridge_trill_lnunref.
   1153 	 */
   1154 	if (blp->bl_trilldata != NULL)
   1155 		trill_lndstr_fn(blp->bl_trilldata, blp);
   1156 
   1157 	if (blp->bl_flags & BLF_PROM_ADDED)
   1158 		(void) mac_promisc_remove(blp->bl_mphp);
   1159 
   1160 	if (blp->bl_flags & BLF_SET_BRIDGE)
   1161 		mac_bridge_clear(mh, (mac_handle_t)blp);
   1162 
   1163 	if (blp->bl_flags & BLF_MARGIN_ADDED) {
   1164 		(void) mac_notify_remove(blp->bl_mnh, B_TRUE);
   1165 		(void) mac_margin_remove(mh, blp->bl_margin);
   1166 	}
   1167 
   1168 	/* Tell the clients the real link state when we leave */
   1169 	mac_link_redo(blp->bl_mh,
   1170 	    mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE));
   1171 
   1172 	/* Destroy all of the forwarding entries related to this link */
   1173 	avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
   1174 	    offsetof(bridge_fwd_t, bf_node));
   1175 	bip = blp->bl_inst;
   1176 	rw_enter(&bip->bi_rwlock, RW_WRITER);
   1177 	bfnext = avl_first(&bip->bi_fwd);
   1178 	while ((bfp = bfnext) != NULL) {
   1179 		bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
   1180 		for (i = 0; i < bfp->bf_nlinks; i++) {
   1181 			if (bfp->bf_links[i] == blp)
   1182 				break;
   1183 		}
   1184 		if (i >= bfp->bf_nlinks)
   1185 			continue;
   1186 		if (bfp->bf_nlinks > 1) {
   1187 			/* note that this can't be the last reference */
   1188 			link_unref(blp);
   1189 			bfp->bf_nlinks--;
   1190 			for (; i < bfp->bf_nlinks; i++)
   1191 				bfp->bf_links[i] = bfp->bf_links[i + 1];
   1192 		} else {
   1193 			ASSERT(bfp->bf_flags & BFF_INTREE);
   1194 			avl_remove(&bip->bi_fwd, bfp);
   1195 			bfp->bf_flags &= ~BFF_INTREE;
   1196 			avl_add(&fwd_scavenge, bfp);
   1197 		}
   1198 	}
   1199 	rw_exit(&bip->bi_rwlock);
   1200 	bfnext = avl_first(&fwd_scavenge);
   1201 	while ((bfp = bfnext) != NULL) {
   1202 		bfnext = AVL_NEXT(&fwd_scavenge, bfp);
   1203 		avl_remove(&fwd_scavenge, bfp);
   1204 		fwd_unref(bfp);
   1205 	}
   1206 	avl_destroy(&fwd_scavenge);
   1207 
   1208 	if (blp->bl_flags & BLF_CLIENT_OPEN)
   1209 		mac_client_close(blp->bl_mch, 0);
   1210 
   1211 	mac_close(mh);
   1212 
   1213 	/*
   1214 	 * We are now completely removed from the active list, so drop the
   1215 	 * reference (see bridge_add_link).
   1216 	 */
   1217 	link_unref(blp);
   1218 }
   1219 
   1220 static void
   1221 shutdown_inst(bridge_inst_t *bip)
   1222 {
   1223 	bridge_link_t *blp, *blnext;
   1224 	bridge_fwd_t *bfp;
   1225 
   1226 	mutex_enter(&inst_lock);
   1227 	if (bip->bi_flags & BIF_SHUTDOWN) {
   1228 		mutex_exit(&inst_lock);
   1229 		return;
   1230 	}
   1231 
   1232 	/*
   1233 	 * Once on the inst_list, the bridge instance must not leave that list
   1234 	 * without having the shutdown flag set first.  When the shutdown flag
   1235 	 * is set, we own the list reference, so we must drop it before
   1236 	 * returning.
   1237 	 */
   1238 	bip->bi_flags |= BIF_SHUTDOWN;
   1239 	mutex_exit(&inst_lock);
   1240 
   1241 	bip->bi_control = NULL;
   1242 
   1243 	rw_enter(&bip->bi_rwlock, RW_READER);
   1244 	blnext = list_head(&bip->bi_links);
   1245 	while ((blp = blnext) != NULL) {
   1246 		blnext = list_next(&bip->bi_links, blp);
   1247 		if (!(blp->bl_flags & BLF_DELETED)) {
   1248 			blp->bl_flags |= BLF_DELETED;
   1249 			(void) ddi_taskq_dispatch(bridge_taskq, link_shutdown,
   1250 			    blp, DDI_SLEEP);
   1251 		}
   1252 	}
   1253 	while ((bfp = avl_first(&bip->bi_fwd)) != NULL) {
   1254 		atomic_inc_uint(&bfp->bf_refs);
   1255 		rw_exit(&bip->bi_rwlock);
   1256 		fwd_delete(bfp);
   1257 		fwd_unref(bfp);
   1258 		rw_enter(&bip->bi_rwlock, RW_READER);
   1259 	}
   1260 	rw_exit(&bip->bi_rwlock);
   1261 
   1262 	/*
   1263 	 * This bridge is being destroyed.  Notify TRILL once all of the
   1264 	 * links are all gone.
   1265 	 */
   1266 	mutex_enter(&inst_lock);
   1267 	while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links))
   1268 		cv_wait(&bip->bi_linkwait, &inst_lock);
   1269 	mutex_exit(&inst_lock);
   1270 	if (bip->bi_trilldata != NULL)
   1271 		trill_brdstr_fn(bip->bi_trilldata, bip);
   1272 
   1273 	bridge_unref(bip);
   1274 }
   1275 
   1276 /*
   1277  * This is called once by the TRILL module when it starts up.  It just sets the
   1278  * global TRILL callback function pointers -- data transmit/receive and bridge
   1279  * and link destroy notification.  There's only one TRILL module, so only one
   1280  * registration is needed.
   1281  *
   1282  * TRILL should call this function with NULL pointers before unloading.  It
   1283  * must not do so before dropping all references to bridges and links.  We
   1284  * assert that this is true on debug builds.
   1285  */
   1286 void
   1287 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn,
   1288     trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn)
   1289 {
   1290 #ifdef DEBUG
   1291 	if (recv_fn == NULL && trill_recv_fn != NULL) {
   1292 		bridge_inst_t *bip;
   1293 		bridge_link_t *blp;
   1294 
   1295 		mutex_enter(&inst_lock);
   1296 		for (bip = list_head(&inst_list); bip != NULL;
   1297 		    bip = list_next(&inst_list, bip)) {
   1298 			ASSERT(bip->bi_trilldata == NULL);
   1299 			rw_enter(&bip->bi_rwlock, RW_READER);
   1300 			for (blp = list_head(&bip->bi_links); blp != NULL;
   1301 			    blp = list_next(&bip->bi_links, blp)) {
   1302 				ASSERT(blp->bl_trilldata == NULL);
   1303 			}
   1304 			rw_exit(&bip->bi_rwlock);
   1305 		}
   1306 		mutex_exit(&inst_lock);
   1307 	}
   1308 #endif
   1309 	trill_recv_fn = recv_fn;
   1310 	trill_encap_fn = encap_fn;
   1311 	trill_brdstr_fn = brdstr_fn;
   1312 	trill_lndstr_fn = lndstr_fn;
   1313 }
   1314 
   1315 /*
   1316  * This registers the TRILL instance pointer with a bridge.  Before this
   1317  * pointer is set, the forwarding, TRILL receive, and bridge destructor
   1318  * functions won't be called.
   1319  *
   1320  * TRILL holds a reference on a bridge with this call.  It must free the
   1321  * reference by calling the unregister function below.
   1322  */
   1323 bridge_inst_t *
   1324 bridge_trill_brref(const char *bname, void *ptr)
   1325 {
   1326 	char bridge[MAXLINKNAMELEN];
   1327 	bridge_inst_t *bip;
   1328 
   1329 	(void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname);
   1330 	bip = bridge_find_name(bridge);
   1331 	if (bip != NULL) {
   1332 		ASSERT(bip->bi_trilldata == NULL && ptr != NULL);
   1333 		bip->bi_trilldata = ptr;
   1334 	}
   1335 	return (bip);
   1336 }
   1337 
   1338 void
   1339 bridge_trill_brunref(bridge_inst_t *bip)
   1340 {
   1341 	ASSERT(bip->bi_trilldata != NULL);
   1342 	bip->bi_trilldata = NULL;
   1343 	bridge_unref(bip);
   1344 }
   1345 
   1346 /*
   1347  * TRILL calls this function when referencing a particular link on a bridge.
   1348  *
   1349  * It holds a reference on the link, so TRILL must clear out the reference when
   1350  * it's done with the link (on unbinding).
   1351  */
   1352 bridge_link_t *
   1353 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr)
   1354 {
   1355 	bridge_link_t *blp;
   1356 
   1357 	ASSERT(ptr != NULL);
   1358 	rw_enter(&bip->bi_rwlock, RW_READER);
   1359 	for (blp = list_head(&bip->bi_links); blp != NULL;
   1360 	    blp = list_next(&bip->bi_links, blp)) {
   1361 		if (!(blp->bl_flags & BLF_DELETED) &&
   1362 		    blp->bl_linkid == linkid && blp->bl_trilldata == NULL) {
   1363 			blp->bl_trilldata = ptr;
   1364 			blp->bl_flags &= ~BLF_TRILLACTIVE;
   1365 			(void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs));
   1366 			atomic_inc_uint(&blp->bl_refs);
   1367 			break;
   1368 		}
   1369 	}
   1370 	rw_exit(&bip->bi_rwlock);
   1371 	return (blp);
   1372 }
   1373 
   1374 void
   1375 bridge_trill_lnunref(bridge_link_t *blp)
   1376 {
   1377 	mutex_enter(&blp->bl_trilllock);
   1378 	ASSERT(blp->bl_trilldata != NULL);
   1379 	blp->bl_trilldata = NULL;
   1380 	blp->bl_flags &= ~BLF_TRILLACTIVE;
   1381 	while (blp->bl_trillthreads > 0)
   1382 		cv_wait(&blp->bl_trillwait, &blp->bl_trilllock);
   1383 	mutex_exit(&blp->bl_trilllock);
   1384 	(void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs));
   1385 	link_unref(blp);
   1386 }
   1387 
   1388 /*
   1389  * This periodic timer performs three functions:
   1390  *  1. It scans the list of learned forwarding entries, and removes ones that
   1391  *     haven't been heard from in a while.  The time limit is backed down if
   1392  *     we're above the configured table limit.
   1393  *  2. It walks the links and decays away the bl_learns counter.
   1394  *  3. It scans the observability node entries looking for ones that can be
   1395  *     freed up.
   1396  */
   1397 /* ARGSUSED */
   1398 static void
   1399 bridge_timer(void *arg)
   1400 {
   1401 	bridge_inst_t *bip;
   1402 	bridge_fwd_t *bfp, *bfnext;
   1403 	bridge_mac_t *bmp, *bmnext;
   1404 	bridge_link_t *blp;
   1405 	int err;
   1406 	datalink_id_t tmpid;
   1407 	avl_tree_t fwd_scavenge;
   1408 	clock_t age_limit;
   1409 	uint32_t ldecay;
   1410 
   1411 	avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
   1412 	    offsetof(bridge_fwd_t, bf_node));
   1413 	mutex_enter(&inst_lock);
   1414 	for (bip = list_head(&inst_list); bip != NULL;
   1415 	    bip = list_next(&inst_list, bip)) {
   1416 		if (bip->bi_flags & BIF_SHUTDOWN)
   1417 			continue;
   1418 		rw_enter(&bip->bi_rwlock, RW_WRITER);
   1419 		/* compute scaled maximum age based on table limit */
   1420 		if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax)
   1421 			bip->bi_tshift++;
   1422 		else
   1423 			bip->bi_tshift = 0;
   1424 		if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) {
   1425 			if (bip->bi_tshift != 0)
   1426 				bip->bi_tshift--;
   1427 			age_limit = 1;
   1428 		}
   1429 		bfnext = avl_first(&bip->bi_fwd);
   1430 		while ((bfp = bfnext) != NULL) {
   1431 			bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
   1432 			if (!(bfp->bf_flags & BFF_LOCALADDR) &&
   1433 			    (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) {
   1434 				ASSERT(bfp->bf_flags & BFF_INTREE);
   1435 				avl_remove(&bip->bi_fwd, bfp);
   1436 				bfp->bf_flags &= ~BFF_INTREE;
   1437 				avl_add(&fwd_scavenge, bfp);
   1438 			}
   1439 		}
   1440 		for (blp = list_head(&bip->bi_links); blp != NULL;
   1441 		    blp = list_next(&bip->bi_links, blp)) {
   1442 			ldecay = mac_get_ldecay(blp->bl_mh);
   1443 			if (ldecay >= blp->bl_learns)
   1444 				blp->bl_learns = 0;
   1445 			else
   1446 				atomic_add_int(&blp->bl_learns, -(int)ldecay);
   1447 		}
   1448 		rw_exit(&bip->bi_rwlock);
   1449 		bfnext = avl_first(&fwd_scavenge);
   1450 		while ((bfp = bfnext) != NULL) {
   1451 			bfnext = AVL_NEXT(&fwd_scavenge, bfp);
   1452 			avl_remove(&fwd_scavenge, bfp);
   1453 			KIINCR(bki_expire);
   1454 			fwd_unref(bfp);	/* drop tree reference */
   1455 		}
   1456 	}
   1457 	mutex_exit(&inst_lock);
   1458 	avl_destroy(&fwd_scavenge);
   1459 
   1460 	/*
   1461 	 * Scan the bridge_mac_t entries and try to free up the ones that are
   1462 	 * no longer active.  This must be done by polling, as neither DLS nor
   1463 	 * MAC provides a driver any sort of positive control over clients.
   1464 	 */
   1465 	rw_enter(&bmac_rwlock, RW_WRITER);
   1466 	bmnext = list_head(&bmac_list);
   1467 	while ((bmp = bmnext) != NULL) {
   1468 		bmnext = list_next(&bmac_list, bmp);
   1469 
   1470 		/* ignore active bridges */
   1471 		if (bmp->bm_inst != NULL)
   1472 			continue;
   1473 
   1474 		if (bmp->bm_flags & BMF_DLS) {
   1475 			err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE);
   1476 			ASSERT(err == 0 || err == EBUSY);
   1477 			if (err == 0)
   1478 				bmp->bm_flags &= ~BMF_DLS;
   1479 		}
   1480 
   1481 		if (!(bmp->bm_flags & BMF_DLS)) {
   1482 			err = mac_unregister(bmp->bm_mh);
   1483 			ASSERT(err == 0 || err == EBUSY);
   1484 			if (err == 0) {
   1485 				list_remove(&bmac_list, bmp);
   1486 				kmem_free(bmp, sizeof (*bmp));
   1487 			}
   1488 		}
   1489 	}
   1490 	if (list_is_empty(&bmac_list)) {
   1491 		bridge_timerid = 0;
   1492 	} else {
   1493 		bridge_timerid = timeout(bridge_timer, NULL,
   1494 		    bridge_scan_interval);
   1495 	}
   1496 	rw_exit(&bmac_rwlock);
   1497 }
   1498 
   1499 static int
   1500 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
   1501 {
   1502 	bridge_stream_t	*bsp;
   1503 
   1504 	if (rq->q_ptr != NULL)
   1505 		return (0);
   1506 
   1507 	if (sflag & MODOPEN)
   1508 		return (EINVAL);
   1509 
   1510 	/*
   1511 	 * Check the minor node number being opened.  This tells us which
   1512 	 * bridge instance the user wants.
   1513 	 */
   1514 	if (getminor(*devp) != 0) {
   1515 		/*
   1516 		 * This is a regular DLPI stream for snoop or the like.
   1517 		 * Redirect it through DLD.
   1518 		 */
   1519 		rq->q_qinfo = &bridge_dld_rinit;
   1520 		OTHERQ(rq)->q_qinfo = &bridge_dld_winit;
   1521 		return (dld_open(rq, devp, oflag, sflag, credp));
   1522 	} else {
   1523 		/*
   1524 		 * Allocate the bridge control stream structure.
   1525 		 */
   1526 		if ((bsp = stream_alloc()) == NULL)
   1527 			return (ENOSR);
   1528 		rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp;
   1529 		bsp->bs_wq = WR(rq);
   1530 		*devp = makedevice(getmajor(*devp), bsp->bs_minor);
   1531 		qprocson(rq);
   1532 		return (0);
   1533 	}
   1534 }
   1535 
   1536 /*
   1537  * This is used only for bridge control streams.  DLPI goes through dld
   1538  * instead.
   1539  */
   1540 static int
   1541 bridge_close(queue_t *rq)
   1542 {
   1543 	bridge_stream_t	*bsp = rq->q_ptr;
   1544 	bridge_inst_t *bip;
   1545 
   1546 	/*
   1547 	 * Wait for any stray taskq (add/delete link) entries related to this
   1548 	 * stream to leave the system.
   1549 	 */
   1550 	mutex_enter(&stream_ref_lock);
   1551 	while (bsp->bs_taskq_cnt != 0)
   1552 		cv_wait(&stream_ref_cv, &stream_ref_lock);
   1553 	mutex_exit(&stream_ref_lock);
   1554 
   1555 	qprocsoff(rq);
   1556 	if ((bip = bsp->bs_inst) != NULL)
   1557 		shutdown_inst(bip);
   1558 	rq->q_ptr = WR(rq)->q_ptr = NULL;
   1559 	stream_free(bsp);
   1560 	if (bip != NULL)
   1561 		bridge_unref(bip);
   1562 
   1563 	return (0);
   1564 }
   1565 
   1566 static void
   1567 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick,
   1568     uint16_t vlanid)
   1569 {
   1570 	bridge_inst_t *bip = blp->bl_inst;
   1571 	bridge_fwd_t *bfp, *bfpnew;
   1572 	int i;
   1573 	boolean_t replaced = B_FALSE;
   1574 
   1575 	/* Ignore multi-destination address used as source; it's nonsense. */
   1576 	if (*saddr & 1)
   1577 		return;
   1578 
   1579 	/*
   1580 	 * If the source is known, then check whether it belongs on this link.
   1581 	 * If not, and this isn't a fixed local address, then we've detected a
   1582 	 * move.  If it's not known, learn it.
   1583 	 */
   1584 	if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) {
   1585 		/*
   1586 		 * If the packet has a fixed local source address, then there's
   1587 		 * nothing we can learn.  We must quit.  If this was a received
   1588 		 * packet, then the sender has stolen our address, but there's
   1589 		 * nothing we can do.  If it's a transmitted packet, then
   1590 		 * that's the normal case.
   1591 		 */
   1592 		if (bfp->bf_flags & BFF_LOCALADDR) {
   1593 			fwd_unref(bfp);
   1594 			return;
   1595 		}
   1596 
   1597 		/*
   1598 		 * Check if the link (and TRILL sender, if any) being used is
   1599 		 * among the ones registered for this address.  If so, then
   1600 		 * this is information that we already know.
   1601 		 */
   1602 		if (bfp->bf_trill_nick == ingress_nick) {
   1603 			for (i = 0; i < bfp->bf_nlinks; i++) {
   1604 				if (bfp->bf_links[i] == blp) {
   1605 					bfp->bf_lastheard = ddi_get_lbolt();
   1606 					fwd_unref(bfp);
   1607 					return;
   1608 				}
   1609 			}
   1610 		}
   1611 	}
   1612 
   1613 	/*
   1614 	 * Note that we intentionally "unlearn" things that appear to be under
   1615 	 * attack on this link.  The forwarding cache is a negative thing for
   1616 	 * security -- it disables reachability as a performance optimization
   1617 	 * -- so leaving out entries optimizes for success and defends against
   1618 	 * the attack.  Thus, the bare increment without a check in the delete
   1619 	 * code above is right.  (And it's ok if we skid over the limit a
   1620 	 * little, so there's no syncronization needed on the test.)
   1621 	 */
   1622 	if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) {
   1623 		if (bfp != NULL) {
   1624 			if (bfp->bf_vcnt == 0)
   1625 				fwd_delete(bfp);
   1626 			fwd_unref(bfp);
   1627 		}
   1628 		return;
   1629 	}
   1630 
   1631 	atomic_inc_uint(&blp->bl_learns);
   1632 
   1633 	if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) {
   1634 		if (bfp != NULL)
   1635 			fwd_unref(bfp);
   1636 		return;
   1637 	}
   1638 	KIINCR(bki_count);
   1639 
   1640 	if (bfp != NULL) {
   1641 		/*
   1642 		 * If this is a new destination for the same VLAN, then delete
   1643 		 * so that we can update.  If it's a different VLAN, then we're
   1644 		 * not going to delete the original.  Split off instead into an
   1645 		 * IVL entry.
   1646 		 */
   1647 		if (bfp->bf_vlanid == vlanid) {
   1648 			/* save the count of IVL duplicates */
   1649 			bfpnew->bf_vcnt = bfp->bf_vcnt;
   1650 
   1651 			/* entry deletes count as learning events */
   1652 			atomic_inc_uint(&blp->bl_learns);
   1653 
   1654 			/* destroy and create anew; node moved */
   1655 			fwd_delete(bfp);
   1656 			replaced = B_TRUE;
   1657 			KIINCR(bki_moved);
   1658 		} else {
   1659 			bfp->bf_vcnt++;
   1660 			bfpnew->bf_flags |= BFF_VLANLOCAL;
   1661 		}
   1662 		fwd_unref(bfp);
   1663 	}
   1664 	bfpnew->bf_links[0] = blp;
   1665 	bfpnew->bf_nlinks = 1;
   1666 	atomic_inc_uint(&blp->bl_refs);	/* bf_links entry */
   1667 	if (!fwd_insert(bip, bfpnew))
   1668 		fwd_free(bfpnew);
   1669 	else if (!replaced)
   1670 		KIINCR(bki_source);
   1671 }
   1672 
   1673 /*
   1674  * Process the VLAN headers for output on a given link.  There are several
   1675  * cases (noting that we don't map VLANs):
   1676  *   1. The input packet is good as it is; either
   1677  *	a. It has no tag, and output has same PVID
   1678  *	b. It has a non-zero priority-only tag for PVID, and b_band is same
   1679  *	c. It has a tag with VLAN different from PVID, and b_band is same
   1680  *   2. The tag must change: non-zero b_band is different from tag priority
   1681  *   3. The packet has a tag and should not (VLAN same as PVID, b_band zero)
   1682  *   4. The packet has no tag and needs one:
   1683  *      a. VLAN ID same as PVID, but b_band is non-zero
   1684  *      b. VLAN ID different from PVID
   1685  * We exclude case 1 first, then modify the packet.  Note that output packets
   1686  * get a priority set by the mblk, not by the header, because QoS in bridging
   1687  * requires priority recalculation at each node.
   1688  *
   1689  * The passed-in tci is the "impossible" value 0xFFFF when no tag is present.
   1690  */
   1691 static mblk_t *
   1692 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid)
   1693 {
   1694 	boolean_t source_has_tag = (tci != 0xFFFF);
   1695 	mblk_t *mpcopy;
   1696 	size_t mlen, minlen;
   1697 	struct ether_vlan_header *evh;
   1698 	int pri;
   1699 
   1700 	/* This helps centralize error handling in the caller. */
   1701 	if (mp == NULL)
   1702 		return (mp);
   1703 
   1704 	/* No forwarded packet can have hardware checksum enabled */
   1705 	DB_CKSUMFLAGS(mp) = 0;
   1706 
   1707 	/* Get the no-modification cases out of the way first */
   1708 	if (!source_has_tag && vlanid == pvid)		/* 1a */
   1709 		return (mp);
   1710 
   1711 	pri = VLAN_PRI(tci);
   1712 	if (source_has_tag && mp->b_band == pri) {
   1713 		if (vlanid != pvid)			/* 1c */
   1714 			return (mp);
   1715 		if (pri != 0 && VLAN_ID(tci) == 0)	/* 1b */
   1716 			return (mp);
   1717 	}
   1718 
   1719 	/*
   1720 	 * We now know that we must modify the packet.  Prepare for that.  Note
   1721 	 * that if a tag is present, the caller has already done a pullup for
   1722 	 * the VLAN header, so we're good to go.
   1723 	 */
   1724 	if (MBLKL(mp) < sizeof (struct ether_header)) {
   1725 		mpcopy = msgpullup(mp, sizeof (struct ether_header));
   1726 		if (mpcopy == NULL) {
   1727 			freemsg(mp);
   1728 			return (NULL);
   1729 		}
   1730 		mp = mpcopy;
   1731 	}
   1732 	if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
   1733 	    (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) {
   1734 		minlen = mlen = MBLKL(mp);
   1735 		if (!source_has_tag)
   1736 			minlen += VLAN_INCR;
   1737 		ASSERT(minlen >= sizeof (struct ether_vlan_header));
   1738 		/*
   1739 		 * We're willing to copy some data to avoid fragmentation, but
   1740 		 * not a lot.
   1741 		 */
   1742 		if (minlen > 256)
   1743 			minlen = sizeof (struct ether_vlan_header);
   1744 		mpcopy = allocb(minlen, BPRI_MED);
   1745 		if (mpcopy == NULL) {
   1746 			freemsg(mp);
   1747 			return (NULL);
   1748 		}
   1749 		if (mlen <= minlen) {
   1750 			/* We toss the first mblk when we can. */
   1751 			bcopy(mp->b_rptr, mpcopy->b_rptr, mlen);
   1752 			mpcopy->b_wptr += mlen;
   1753 			mpcopy->b_cont = mp->b_cont;
   1754 			freeb(mp);
   1755 		} else {
   1756 			/* If not, then just copy what we need */
   1757 			if (!source_has_tag)
   1758 				minlen = sizeof (struct ether_header);
   1759 			bcopy(mp->b_rptr, mpcopy->b_rptr, minlen);
   1760 			mpcopy->b_wptr += minlen;
   1761 			mpcopy->b_cont = mp;
   1762 			mp->b_rptr += minlen;
   1763 		}
   1764 		mp = mpcopy;
   1765 	}
   1766 
   1767 	/* LINTED: pointer alignment */
   1768 	evh = (struct ether_vlan_header *)mp->b_rptr;
   1769 	if (source_has_tag) {
   1770 		if (mp->b_band == 0 && vlanid == pvid) {	/* 3 */
   1771 			evh->ether_tpid = evh->ether_type;
   1772 			mlen = MBLKL(mp);
   1773 			if (mlen > sizeof (struct ether_vlan_header))
   1774 				ovbcopy(mp->b_rptr +
   1775 				    sizeof (struct ether_vlan_header),
   1776 				    mp->b_rptr + sizeof (struct ether_header),
   1777 				    mlen - sizeof (struct ether_vlan_header));
   1778 			mp->b_wptr -= VLAN_INCR;
   1779 		} else {					/* 2 */
   1780 			if (vlanid == pvid)
   1781 				vlanid = VLAN_ID_NONE;
   1782 			tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid);
   1783 			evh->ether_tci = htons(tci);
   1784 		}
   1785 	} else {
   1786 		/* case 4: no header present, but one is needed */
   1787 		mlen = MBLKL(mp);
   1788 		if (mlen > sizeof (struct ether_header))
   1789 			ovbcopy(mp->b_rptr + sizeof (struct ether_header),
   1790 			    mp->b_rptr + sizeof (struct ether_vlan_header),
   1791 			    mlen - sizeof (struct ether_header));
   1792 		mp->b_wptr += VLAN_INCR;
   1793 		ASSERT(mp->b_wptr <= DB_LIM(mp));
   1794 		if (vlanid == pvid)
   1795 			vlanid = VLAN_ID_NONE;
   1796 		tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid);
   1797 		evh->ether_type = evh->ether_tpid;
   1798 		evh->ether_tpid = htons(ETHERTYPE_VLAN);
   1799 		evh->ether_tci = htons(tci);
   1800 	}
   1801 	return (mp);
   1802 }
   1803 
   1804 /* Record VLAN information and strip header if requested . */
   1805 static void
   1806 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr)
   1807 {
   1808 	if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) {
   1809 		struct ether_vlan_header *evhp;
   1810 		uint16_t ether_type;
   1811 
   1812 		/* LINTED: alignment */
   1813 		evhp = (struct ether_vlan_header *)mp->b_rptr;
   1814 		hdr_info->mhi_istagged = B_TRUE;
   1815 		hdr_info->mhi_tci = ntohs(evhp->ether_tci);
   1816 		if (striphdr) {
   1817 			/*
   1818 			 * For VLAN tagged frames update the ether_type
   1819 			 * in hdr_info before stripping the header.
   1820 			 */
   1821 			ether_type = ntohs(evhp->ether_type);
   1822 			hdr_info->mhi_origsap = ether_type;
   1823 			hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ?
   1824 			    ether_type : DLS_SAP_LLC;
   1825 			mp->b_rptr = (uchar_t *)(evhp + 1);
   1826 		}
   1827 	} else {
   1828 		hdr_info->mhi_istagged = B_FALSE;
   1829 		hdr_info->mhi_tci = VLAN_ID_NONE;
   1830 		if (striphdr)
   1831 			mp->b_rptr += sizeof (struct ether_header);
   1832 	}
   1833 }
   1834 
   1835 /*
   1836  * Return B_TRUE if we're allowed to send on this link with the given VLAN ID.
   1837  */
   1838 static boolean_t
   1839 bridge_can_send(bridge_link_t *blp, uint16_t vlanid)
   1840 {
   1841 	ASSERT(vlanid != VLAN_ID_NONE);
   1842 	if (blp->bl_flags & BLF_DELETED)
   1843 		return (B_FALSE);
   1844 	if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING)
   1845 		return (B_FALSE);
   1846 	return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid));
   1847 }
   1848 
   1849 /*
   1850  * This function scans the bridge forwarding tables in order to forward a given
   1851  * packet.  If the packet either doesn't need forwarding (the current link is
   1852  * correct) or the current link needs a copy as well, then the packet is
   1853  * returned to the caller.
   1854  *
   1855  * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a
   1856  * TRILL tunnel.  If the destination points there, then drop instead.
   1857  */
   1858 static mblk_t *
   1859 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp,
   1860     uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit)
   1861 {
   1862 	mblk_t *mpsend, *mpcopy;
   1863 	bridge_inst_t *bip = blp->bl_inst;
   1864 	bridge_link_t *blpsend, *blpnext;
   1865 	bridge_fwd_t *bfp;
   1866 	uint_t i;
   1867 	boolean_t selfseen = B_FALSE;
   1868 	void *tdp;
   1869 	const uint8_t *daddr = hdr_info->mhi_daddr;
   1870 
   1871 	/*
   1872 	 * Check for the IEEE "reserved" multicast addresses.  Messages sent to
   1873 	 * these addresses are used for link-local control (STP and pause), and
   1874 	 * are never forwarded or redirected.
   1875 	 */
   1876 	if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 &&
   1877 	    daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) {
   1878 		if (from_trill) {
   1879 			freemsg(mp);
   1880 			mp = NULL;
   1881 		}
   1882 		return (mp);
   1883 	}
   1884 
   1885 	if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) {
   1886 
   1887 		/*
   1888 		 * If trill indicates a destination for this node, then it's
   1889 		 * clearly not intended for local delivery.  We must tell TRILL
   1890 		 * to encapsulate, as long as we didn't just decapsulate it.
   1891 		 */
   1892 		if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) {
   1893 			/*
   1894 			 * Error case: can't reencapsulate if the protocols are
   1895 			 * working correctly.
   1896 			 */
   1897 			if (from_trill) {
   1898 				freemsg(mp);
   1899 				return (NULL);
   1900 			}
   1901 			mutex_enter(&blp->bl_trilllock);
   1902 			if ((tdp = blp->bl_trilldata) != NULL) {
   1903 				blp->bl_trillthreads++;
   1904 				mutex_exit(&blp->bl_trilllock);
   1905 				update_header(mp, hdr_info, B_FALSE);
   1906 				if (is_xmit)
   1907 					mp = mac_fix_cksum(mp);
   1908 				/* all trill data frames have Inner.VLAN */
   1909 				mp = reform_vlan_header(mp, vlanid, tci, 0);
   1910 				if (mp == NULL) {
   1911 					KIINCR(bki_drops);
   1912 					fwd_unref(bfp);
   1913 					return (NULL);
   1914 				}
   1915 				trill_encap_fn(tdp, blp, hdr_info, mp,
   1916 				    bfp->bf_trill_nick);
   1917 				mutex_enter(&blp->bl_trilllock);
   1918 				if (--blp->bl_trillthreads == 0 &&
   1919 				    blp->bl_trilldata == NULL)
   1920 					cv_broadcast(&blp->bl_trillwait);
   1921 			}
   1922 			mutex_exit(&blp->bl_trilllock);
   1923 
   1924 			/* if TRILL has been disabled, then kill this stray */
   1925 			if (tdp == NULL) {
   1926 				freemsg(mp);
   1927 				fwd_delete(bfp);
   1928 			}
   1929 			fwd_unref(bfp);
   1930 			return (NULL);
   1931 		}
   1932 
   1933 		/* find first link we can send on */
   1934 		for (i = 0; i < bfp->bf_nlinks; i++) {
   1935 			blpsend = bfp->bf_links[i];
   1936 			if (blpsend == blp)
   1937 				selfseen = B_TRUE;
   1938 			else if (bridge_can_send(blpsend, vlanid))
   1939 				break;
   1940 		}
   1941 
   1942 		while (i < bfp->bf_nlinks) {
   1943 			blpsend = bfp->bf_links[i];
   1944 			for (i++; i < bfp->bf_nlinks; i++) {
   1945 				blpnext = bfp->bf_links[i];
   1946 				if (blpnext == blp)
   1947 					selfseen = B_TRUE;
   1948 				else if (bridge_can_send(blpnext, vlanid))
   1949 					break;
   1950 			}
   1951 			if (i == bfp->bf_nlinks && !selfseen) {
   1952 				mpsend = mp;
   1953 				mp = NULL;
   1954 			} else {
   1955 				mpsend = copymsg(mp);
   1956 			}
   1957 
   1958 			if (!from_trill && is_xmit)
   1959 				mpsend = mac_fix_cksum(mpsend);
   1960 
   1961 			mpsend = reform_vlan_header(mpsend, vlanid, tci,
   1962 			    blpsend->bl_pvid);
   1963 			if (mpsend == NULL) {
   1964 				KIINCR(bki_drops);
   1965 				continue;
   1966 			}
   1967 
   1968 			KIINCR(bki_forwards);
   1969 			/*
   1970 			 * No need to bump up the link reference count, as
   1971 			 * the forwarding entry itself holds a reference to
   1972 			 * the link.
   1973 			 */
   1974 			if (bfp->bf_flags & BFF_LOCALADDR) {
   1975 				mac_rx_common(blpsend->bl_mh, NULL, mpsend);
   1976 			} else {
   1977 				KLPINCR(blpsend, bkl_xmit);
   1978 				MAC_RING_TX(blpsend->bl_mh, NULL, mpsend,
   1979 				    mpsend);
   1980 				freemsg(mpsend);
   1981 			}
   1982 		}
   1983 		/*
   1984 		 * Handle a special case: if we're transmitting to the original
   1985 		 * link, then check whether the localaddr flag is set.  If it
   1986 		 * is, then receive instead.  This doesn't happen with ordinary
   1987 		 * bridging, but does happen often with TRILL decapsulation.
   1988 		 */
   1989 		if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) {
   1990 			mac_rx_common(blp->bl_mh, NULL, mp);
   1991 			mp = NULL;
   1992 		}
   1993 		fwd_unref(bfp);
   1994 	} else {
   1995 		/*
   1996 		 * TRILL has two cases to handle.  If the packet is off the
   1997 		 * wire (not from TRILL), then we need to send up into the
   1998 		 * TRILL module to have the distribution tree computed.  If the
   1999 		 * packet is from TRILL (decapsulated), then we're part of the
   2000 		 * distribution tree, and we need to copy the packet on member
   2001 		 * interfaces.
   2002 		 *
   2003 		 * Thus, the from TRILL case is identical to the STP case.
   2004 		 */
   2005 		if (!from_trill && blp->bl_trilldata != NULL) {
   2006 			mutex_enter(&blp->bl_trilllock);
   2007 			if ((tdp = blp->bl_trilldata) != NULL) {
   2008 				blp->bl_trillthreads++;
   2009 				mutex_exit(&blp->bl_trilllock);
   2010 				if ((mpsend = copymsg(mp)) != NULL) {
   2011 					update_header(mpsend,
   2012 					    hdr_info, B_FALSE);
   2013 					/*
   2014 					 * all trill data frames have
   2015 					 * Inner.VLAN
   2016 					 */
   2017 					mpsend = reform_vlan_header(mpsend,
   2018 					    vlanid, tci, 0);
   2019 					if (mpsend == NULL) {
   2020 						KIINCR(bki_drops);
   2021 					} else {
   2022 						trill_encap_fn(tdp, blp,
   2023 						    hdr_info, mpsend,
   2024 						    RBRIDGE_NICKNAME_NONE);
   2025 					}
   2026 				}
   2027 				mutex_enter(&blp->bl_trilllock);
   2028 				if (--blp->bl_trillthreads == 0 &&
   2029 				    blp->bl_trilldata == NULL)
   2030 					cv_broadcast(&blp->bl_trillwait);
   2031 			}
   2032 			mutex_exit(&blp->bl_trilllock);
   2033 		}
   2034 
   2035 		/*
   2036 		 * This is an unknown destination, so flood.
   2037 		 */
   2038 		rw_enter(&bip->bi_rwlock, RW_READER);
   2039 		for (blpnext = list_head(&bip->bi_links); blpnext != NULL;
   2040 		    blpnext = list_next(&bip->bi_links, blpnext)) {
   2041 			if (blpnext == blp)
   2042 				selfseen = B_TRUE;
   2043 			else if (bridge_can_send(blpnext, vlanid))
   2044 				break;
   2045 		}
   2046 		if (blpnext != NULL)
   2047 			atomic_inc_uint(&blpnext->bl_refs);
   2048 		rw_exit(&bip->bi_rwlock);
   2049 		while ((blpsend = blpnext) != NULL) {
   2050 			rw_enter(&bip->bi_rwlock, RW_READER);
   2051 			for (blpnext = list_next(&bip->bi_links, blpsend);
   2052 			    blpnext != NULL;
   2053 			    blpnext = list_next(&bip->bi_links, blpnext)) {
   2054 				if (blpnext == blp)
   2055 					selfseen = B_TRUE;
   2056 				else if (bridge_can_send(blpnext, vlanid))
   2057 					break;
   2058 			}
   2059 			if (blpnext != NULL)
   2060 				atomic_inc_uint(&blpnext->bl_refs);
   2061 			rw_exit(&bip->bi_rwlock);
   2062 			if (blpnext == NULL && !selfseen) {
   2063 				mpsend = mp;
   2064 				mp = NULL;
   2065 			} else {
   2066 				mpsend = copymsg(mp);
   2067 			}
   2068 
   2069 			if (!from_trill && is_xmit)
   2070 				mpsend = mac_fix_cksum(mpsend);
   2071 
   2072 			mpsend = reform_vlan_header(mpsend, vlanid, tci,
   2073 			    blpsend->bl_pvid);
   2074 			if (mpsend == NULL) {
   2075 				KIINCR(bki_drops);
   2076 				continue;
   2077 			}
   2078 
   2079 			if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST)
   2080 				KIINCR(bki_unknown);
   2081 			else
   2082 				KIINCR(bki_mbcast);
   2083 			KLPINCR(blpsend, bkl_xmit);
   2084 			if ((mpcopy = copymsg(mpsend)) != NULL)
   2085 				mac_rx_common(blpsend->bl_mh, NULL, mpcopy);
   2086 			MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend);
   2087 			freemsg(mpsend);
   2088 			link_unref(blpsend);
   2089 		}
   2090 	}
   2091 
   2092 	/*
   2093 	 * At this point, if np is non-NULL, it means that the caller needs to
   2094 	 * continue on the selected link.
   2095 	 */
   2096 	return (mp);
   2097 }
   2098 
   2099 /*
   2100  * Extract and validate the VLAN information for a given packet.  This checks
   2101  * conformance with the rules for use of the PVID on the link, and for the
   2102  * allowed (configured) VLAN set.
   2103  *
   2104  * Returns B_TRUE if the packet passes, B_FALSE if it fails.
   2105  */
   2106 static boolean_t
   2107 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp,
   2108     uint16_t *vlanidp, uint16_t *tcip)
   2109 {
   2110 	uint16_t tci, vlanid;
   2111 
   2112 	if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) {
   2113 		ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci);
   2114 		ptrdiff_t mlen;
   2115 
   2116 		/*
   2117 		 * Extract the VLAN ID information, regardless of alignment,
   2118 		 * and without a pullup.  This isn't attractive, but we do this
   2119 		 * to avoid having to deal with the pointers stashed in
   2120 		 * hdr_info moving around or having the caller deal with a new
   2121 		 * mblk_t pointer.
   2122 		 */
   2123 		while (mp != NULL) {
   2124 			mlen = MBLKL(mp);
   2125 			if (mlen > tpos && mlen > 0)
   2126 				break;
   2127 			tpos -= mlen;
   2128 			mp = mp->b_cont;
   2129 		}
   2130 		if (mp == NULL)
   2131 			return (B_FALSE);
   2132 		tci = mp->b_rptr[tpos] << 8;
   2133 		if (++tpos >= mlen) {
   2134 			do {
   2135 				mp = mp->b_cont;
   2136 			} while (mp != NULL && MBLKL(mp) == 0);
   2137 			if (mp == NULL)
   2138 				return (B_FALSE);
   2139 			tpos = 0;
   2140 		}
   2141 		tci |= mp->b_rptr[tpos];
   2142 
   2143 		vlanid = VLAN_ID(tci);
   2144 		if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX)
   2145 			return (B_FALSE);
   2146 		if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid)
   2147 			goto input_no_vlan;
   2148 		if (!BRIDGE_VLAN_ISSET(blp, vlanid))
   2149 			return (B_FALSE);
   2150 	} else {
   2151 		tci = 0xFFFF;
   2152 input_no_vlan:
   2153 		/*
   2154 		 * If PVID is set to zero, then untagged traffic is not
   2155 		 * supported here.  Do not learn or forward.
   2156 		 */
   2157 		if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE)
   2158 			return (B_FALSE);
   2159 	}
   2160 
   2161 	*tcip = tci;
   2162 	*vlanidp = vlanid;
   2163 	return (B_TRUE);
   2164 }
   2165 
   2166 /*
   2167  * Handle MAC notifications.
   2168  */
   2169 static void
   2170 bridge_notify_cb(void *arg, mac_notify_type_t note_type)
   2171 {
   2172 	bridge_link_t *blp = arg;
   2173 
   2174 	switch (note_type) {
   2175 	case MAC_NOTE_UNICST:
   2176 		bridge_new_unicst(blp);
   2177 		break;
   2178 
   2179 	case MAC_NOTE_SDU_SIZE: {
   2180 		uint_t maxsdu;
   2181 		bridge_inst_t *bip = blp->bl_inst;
   2182 		bridge_mac_t *bmp = bip->bi_mac;
   2183 		boolean_t notify = B_FALSE;
   2184 		mblk_t *mlist = NULL;
   2185 
   2186 		mac_sdu_get(blp->bl_mh, NULL, &maxsdu);
   2187 		rw_enter(&bip->bi_rwlock, RW_READER);
   2188 		if (list_prev(&bip->bi_links, blp) == NULL &&
   2189 		    list_next(&bip->bi_links, blp) == NULL) {
   2190 			notify = (maxsdu != bmp->bm_maxsdu);
   2191 			bmp->bm_maxsdu = maxsdu;
   2192 		}
   2193 		blp->bl_maxsdu = maxsdu;
   2194 		if (maxsdu != bmp->bm_maxsdu)
   2195 			link_sdu_fail(blp, B_TRUE, &mlist);
   2196 		else if (notify)
   2197 			(void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
   2198 		rw_exit(&bip->bi_rwlock);
   2199 		send_up_messages(bip, mlist);
   2200 		break;
   2201 	}
   2202 	}
   2203 }
   2204 
   2205 /*
   2206  * This is called by the MAC layer.  As with the transmit side, we're right in
   2207  * the data path for all I/O on this port, so if we don't need to forward this
   2208  * packet anywhere, we have to send it upwards via mac_rx_common.
   2209  */
   2210 static void
   2211 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext)
   2212 {
   2213 	mblk_t *mp, *mpcopy;
   2214 	bridge_link_t *blp = (bridge_link_t *)mh;
   2215 	bridge_inst_t *bip = blp->bl_inst;
   2216 	bridge_mac_t *bmp = bip->bi_mac;
   2217 	mac_header_info_t hdr_info;
   2218 	uint16_t vlanid, tci;
   2219 	boolean_t trillmode = B_FALSE;
   2220 
   2221 	KIINCR(bki_recv);
   2222 	KLINCR(bkl_recv);
   2223 
   2224 	/*
   2225 	 * Regardless of state, check for inbound TRILL packets when TRILL is
   2226 	 * active.  These are pulled out of band and sent for TRILL handling.
   2227 	 */
   2228 	if (blp->bl_trilldata != NULL) {
   2229 		void *tdp;
   2230 		mblk_t *newhead;
   2231 		mblk_t *tail = NULL;
   2232 
   2233 		mutex_enter(&blp->bl_trilllock);
   2234 		if ((tdp = blp->bl_trilldata) != NULL) {
   2235 			blp->bl_trillthreads++;
   2236 			mutex_exit(&blp->bl_trilllock);
   2237 			trillmode = B_TRUE;
   2238 			newhead = mpnext;
   2239 			while ((mp = mpnext) != NULL) {
   2240 				boolean_t raw_isis, bridge_group;
   2241 
   2242 				mpnext = mp->b_next;
   2243 
   2244 				/*
   2245 				 * If the header isn't readable, then leave on
   2246 				 * the list and continue.
   2247 				 */
   2248 				if (mac_header_info(blp->bl_mh, mp,
   2249 				    &hdr_info) != 0) {
   2250 					tail = mp;
   2251 					continue;
   2252 				}
   2253 
   2254 				/*
   2255 				 * The TRILL document specifies that, on
   2256 				 * Ethernet alone, IS-IS packets arrive with
   2257 				 * LLC rather than Ethertype, and using a
   2258 				 * specific destination address.  We must check
   2259 				 * for that here.  Also, we need to give BPDUs
   2260 				 * to TRILL for processing.
   2261 				 */
   2262 				raw_isis = bridge_group = B_FALSE;
   2263 				if (hdr_info.mhi_dsttype ==
   2264 				    MAC_ADDRTYPE_MULTICAST) {
   2265 					if (memcmp(hdr_info.mhi_daddr,
   2266 					    all_isis_rbridges, ETHERADDRL) == 0)
   2267 						raw_isis = B_TRUE;
   2268 					else if (memcmp(hdr_info.mhi_daddr,
   2269 					    bridge_group_address, ETHERADDRL) ==
   2270 					    0)
   2271 						bridge_group = B_TRUE;
   2272 				}
   2273 				if (!raw_isis && !bridge_group &&
   2274 				    hdr_info.mhi_bindsap != ETHERTYPE_TRILL &&
   2275 				    (hdr_info.mhi_bindsap != ETHERTYPE_VLAN ||
   2276 				    /* LINTED: alignment */
   2277 				    ((struct ether_vlan_header *)mp->b_rptr)->
   2278 				    ether_type != htons(ETHERTYPE_TRILL))) {
   2279 					tail = mp;
   2280 					continue;
   2281 				}
   2282 
   2283 				/*
   2284 				 * We've got TRILL input.  Remove from the list
   2285 				 * and send up through the TRILL module.  (Send
   2286 				 * a copy through promiscuous receive just to
   2287 				 * support snooping on TRILL.  Order isn't
   2288 				 * preserved strictly, but that doesn't matter
   2289 				 * here.)
   2290 				 */
   2291 				if (tail != NULL)
   2292 					tail->b_next = mpnext;
   2293 				mp->b_next = NULL;
   2294 				if (mp == newhead)
   2295 					newhead = mpnext;
   2296 				mac_trill_snoop(blp->bl_mh, mp);
   2297 				update_header(mp, &hdr_info, B_TRUE);
   2298 				/*
   2299 				 * On raw IS-IS and BPDU frames, we have to
   2300 				 * make sure that the length is trimmed
   2301 				 * properly.  We use origsap in order to cope
   2302 				 * with jumbograms for IS-IS.  (Regular mac
   2303 				 * can't.)
   2304 				 */
   2305 				if (raw_isis || bridge_group) {
   2306 					size_t msglen = msgdsize(mp);
   2307 
   2308 					if (msglen > hdr_info.mhi_origsap) {
   2309 						(void) adjmsg(mp,
   2310 						    hdr_info.mhi_origsap -
   2311 						    msglen);
   2312 					} else if (msglen <
   2313 					    hdr_info.mhi_origsap) {
   2314 						freemsg(mp);
   2315 						continue;
   2316 					}
   2317 				}
   2318 				trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info);
   2319 			}
   2320 			mpnext = newhead;
   2321 			mutex_enter(&blp->bl_trilllock);
   2322 			if (--blp->bl_trillthreads == 0 &&
   2323 			    blp->bl_trilldata == NULL)
   2324 				cv_broadcast(&blp->bl_trillwait);
   2325 		}
   2326 		mutex_exit(&blp->bl_trilllock);
   2327 		if (mpnext == NULL)
   2328 			return;
   2329 	}
   2330 
   2331 	/*
   2332 	 * If this is a TRILL RBridge, then just check whether this link is
   2333 	 * used at all for forwarding.  If not, then we're done.
   2334 	 */
   2335 	if (trillmode) {
   2336 		if (!(blp->bl_flags & BLF_TRILLACTIVE) ||
   2337 		    (blp->bl_flags & BLF_SDUFAIL)) {
   2338 			mac_rx_common(blp->bl_mh, rsrc, mpnext);
   2339 			return;
   2340 		}
   2341 	} else {
   2342 		/*
   2343 		 * For regular (STP) bridges, if we're in blocking or listening
   2344 		 * state, then do nothing.  We don't learn or forward until
   2345 		 * told to do so.
   2346 		 */
   2347 		if (blp->bl_state == BLS_BLOCKLISTEN) {
   2348 			mac_rx_common(blp->bl_mh, rsrc, mpnext);
   2349 			return;
   2350 		}
   2351 	}
   2352 
   2353 	/*
   2354 	 * Send a copy of the message chain up to the observability node users.
   2355 	 * For TRILL, we must obey the VLAN AF rules, so we go packet-by-
   2356 	 * packet.
   2357 	 */
   2358 	if (!trillmode && blp->bl_state == BLS_FORWARDING &&
   2359 	    (bmp->bm_flags & BMF_STARTED) &&
   2360 	    (mp = copymsgchain(mpnext)) != NULL) {
   2361 		mac_rx(bmp->bm_mh, NULL, mp);
   2362 	}
   2363 
   2364 	/*
   2365 	 * We must be in learning or forwarding state, or using TRILL on a link
   2366 	 * with one or more VLANs active.  For each packet in the list, process
   2367 	 * the source address, and then attempt to forward.
   2368 	 */
   2369 	while ((mp = mpnext) != NULL) {
   2370 		mpnext = mp->b_next;
   2371 		mp->b_next = NULL;
   2372 
   2373 		/*
   2374 		 * If we can't decode the header or if the header specifies a
   2375 		 * multicast source address (impossible!), then don't bother
   2376 		 * learning or forwarding, but go ahead and forward up the
   2377 		 * stack for subsequent processing.
   2378 		 */
   2379 		if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 ||
   2380 		    (hdr_info.mhi_saddr[0] & 1) != 0) {
   2381 			KIINCR(bki_drops);
   2382 			KLINCR(bkl_drops);
   2383 			mac_rx_common(blp->bl_mh, rsrc, mp);
   2384 			continue;
   2385 		}
   2386 
   2387 		/*
   2388 		 * Extract and validate the VLAN ID for this packet.
   2389 		 */
   2390 		if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) ||
   2391 		    !BRIDGE_AF_ISSET(blp, vlanid)) {
   2392 			mac_rx_common(blp->bl_mh, rsrc, mp);
   2393 			continue;
   2394 		}
   2395 
   2396 		if (trillmode) {
   2397 			/*
   2398 			 * Special test required by TRILL document: must
   2399 			 * discard frames with outer address set to ESADI.
   2400 			 */
   2401 			if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges,
   2402 			    ETHERADDRL) == 0) {
   2403 				mac_rx_common(blp->bl_mh, rsrc, mp);
   2404 				continue;
   2405 			}
   2406 
   2407 			/*
   2408 			 * If we're in TRILL mode, then the call above to get
   2409 			 * the VLAN ID has also checked that we're the
   2410 			 * appointed forwarder, so report that we're handling
   2411 			 * this packet to any observability node users.
   2412 			 */
   2413 			if ((bmp->bm_flags & BMF_STARTED) &&
   2414 			    (mpcopy = copymsg(mp)) != NULL)
   2415 				mac_rx(bmp->bm_mh, NULL, mpcopy);
   2416 		}
   2417 
   2418 		/*
   2419 		 * First process the source address and learn from it.  For
   2420 		 * TRILL, we learn only if we're the appointed forwarder.
   2421 		 */
   2422 		bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE,
   2423 		    vlanid);
   2424 
   2425 		/*
   2426 		 * Now check whether we're forwarding and look up the
   2427 		 * destination.  If we can forward, do so.
   2428 		 */
   2429 		if (trillmode || blp->bl_state == BLS_FORWARDING) {
   2430 			mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci,
   2431 			    B_FALSE, B_FALSE);
   2432 		}
   2433 		if (mp != NULL)
   2434 			mac_rx_common(blp->bl_mh, rsrc, mp);
   2435 	}
   2436 }
   2437 
   2438 
   2439 /* ARGSUSED */
   2440 static mblk_t *
   2441 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext)
   2442 {
   2443 	bridge_link_t *blp = (bridge_link_t *)mh;
   2444 	bridge_inst_t *bip = blp->bl_inst;
   2445 	bridge_mac_t *bmp = bip->bi_mac;
   2446 	mac_header_info_t hdr_info;
   2447 	uint16_t vlanid, tci;
   2448 	mblk_t *mp, *mpcopy;
   2449 	boolean_t trillmode;
   2450 
   2451 	trillmode = blp->bl_trilldata != NULL;
   2452 
   2453 	/*
   2454 	 * If we're using STP and we're in blocking or listening state, or if
   2455 	 * we're using TRILL and no VLANs are active, then behave as though the
   2456 	 * bridge isn't here at all, and send on the local link alone.
   2457 	 */
   2458 	if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) ||
   2459 	    (trillmode &&
   2460 	    (!(blp->bl_flags & BLF_TRILLACTIVE) ||
   2461 	    (blp->bl_flags & BLF_SDUFAIL)))) {
   2462 		KIINCR(bki_sent);
   2463 		KLINCR(bkl_xmit);
   2464 		MAC_RING_TX(blp->bl_mh, rh, mpnext, mp);
   2465 		return (mp);
   2466 	}
   2467 
   2468 	/*
   2469 	 * Send a copy of the message up to the observability node users.
   2470 	 * TRILL needs to check on a packet-by-packet basis.
   2471 	 */
   2472 	if (!trillmode && blp->bl_state == BLS_FORWARDING &&
   2473 	    (bmp->bm_flags & BMF_STARTED) &&
   2474 	    (mp = copymsgchain(mpnext)) != NULL) {
   2475 		mac_rx(bmp->bm_mh, NULL, mp);
   2476 	}
   2477 
   2478 	while ((mp = mpnext) != NULL) {
   2479 		mpnext = mp->b_next;
   2480 		mp->b_next = NULL;
   2481 
   2482 		if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) {
   2483 			freemsg(mp);
   2484 			continue;
   2485 		}
   2486 
   2487 		/*
   2488 		 * Extract and validate the VLAN ID for this packet.
   2489 		 */
   2490 		if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) ||
   2491 		    !BRIDGE_AF_ISSET(blp, vlanid)) {
   2492 			freemsg(mp);
   2493 			continue;
   2494 		}
   2495 
   2496 		/*
   2497 		 * If we're using TRILL, then we've now validated that we're
   2498 		 * the forwarder for this VLAN, so go ahead and let
   2499 		 * observability node users know about the packet.
   2500 		 */
   2501 		if (trillmode && (bmp->bm_flags & BMF_STARTED) &&
   2502 		    (mpcopy = copymsg(mp)) != NULL) {
   2503 			mac_rx(bmp->bm_mh, NULL, mpcopy);
   2504 		}
   2505 
   2506 		/*
   2507 		 * We have to learn from our own transmitted packets, because
   2508 		 * there may be a Solaris DLPI raw sender (who can specify his
   2509 		 * own source address) using promiscuous mode for receive.  The
   2510 		 * mac layer information won't (and can't) tell us everything
   2511 		 * we need to know.
   2512 		 */
   2513 		bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE,
   2514 		    vlanid);
   2515 
   2516 		/* attempt forwarding */
   2517 		if (trillmode || blp->bl_state == BLS_FORWARDING) {
   2518 			mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci,
   2519 			    B_FALSE, B_TRUE);
   2520 		}
   2521 		if (mp != NULL) {
   2522 			MAC_RING_TX(blp->bl_mh, rh, mp, mp);
   2523 			if (mp == NULL) {
   2524 				KIINCR(bki_sent);
   2525 				KLINCR(bkl_xmit);
   2526 			}
   2527 		}
   2528 		/*
   2529 		 * If we get stuck, then stop.  Don't let the user's output
   2530 		 * packets get out of order.  (More importantly: don't try to
   2531 		 * bridge the same packet multiple times if flow control is
   2532 		 * asserted.)
   2533 		 */
   2534 		if (mp != NULL) {
   2535 			mp->b_next = mpnext;
   2536 			break;
   2537 		}
   2538 	}
   2539 	return (mp);
   2540 }
   2541 
   2542 /*
   2543  * This is called by TRILL when it decapsulates an packet, and we must forward
   2544  * locally.  On failure, we just drop.
   2545  *
   2546  * Note that the ingress_nick reported by TRILL must not represent this local
   2547  * node.
   2548  */
   2549 void
   2550 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick)
   2551 {
   2552 	mac_header_info_t hdr_info;
   2553 	uint16_t vlanid, tci;
   2554 	bridge_inst_t *bip = blp->bl_inst;	/* used by macros */
   2555 	mblk_t *mpcopy;
   2556 
   2557 	if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) {
   2558 		freemsg(mp);
   2559 		return;
   2560 	}
   2561 
   2562 	/* Extract VLAN ID for this packet. */
   2563 	if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) {
   2564 		struct ether_vlan_header *evhp;
   2565 
   2566 		/* LINTED: alignment */
   2567 		evhp = (struct ether_vlan_header *)mp->b_rptr;
   2568 		tci = ntohs(evhp->ether_tci);
   2569 		vlanid = VLAN_ID(tci);
   2570 	} else {
   2571 		/* Inner VLAN headers are required in TRILL data packets */
   2572 		DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *,
   2573 		    blp, mblk_t *, mp, uint16_t, ingress_nick);
   2574 		freemsg(mp);
   2575 		return;
   2576 	}
   2577 
   2578 	/* Learn the location of this sender in the RBridge network */
   2579 	bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid);
   2580 
   2581 	/* attempt forwarding */
   2582 	mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE);
   2583 	if (mp != NULL) {
   2584 		if (bridge_can_send(blp, vlanid)) {
   2585 			/* Deliver a copy locally as well */
   2586 			if ((mpcopy = copymsg(mp)) != NULL)
   2587 				mac_rx_common(blp->bl_mh, NULL, mpcopy);
   2588 			MAC_RING_TX(blp->bl_mh, NULL, mp, mp);
   2589 		}
   2590 		if (mp == NULL) {
   2591 			KIINCR(bki_sent);
   2592 			KLINCR(bkl_xmit);
   2593 		} else {
   2594 			freemsg(mp);
   2595 		}
   2596 	}
   2597 }
   2598 
   2599 /*
   2600  * This function is used by TRILL _only_ to transmit TRILL-encapsulated
   2601  * packets.  It sends on a single underlying link and does not bridge.
   2602  */
   2603 mblk_t *
   2604 bridge_trill_output(bridge_link_t *blp, mblk_t *mp)
   2605 {
   2606 	bridge_inst_t *bip = blp->bl_inst;	/* used by macros */
   2607 
   2608 	mac_trill_snoop(blp->bl_mh, mp);
   2609 	MAC_RING_TX(blp->bl_mh, NULL, mp, mp);
   2610 	if (mp == NULL) {
   2611 		KIINCR(bki_sent);
   2612 		KLINCR(bkl_xmit);
   2613 	}
   2614 	return (mp);
   2615 }
   2616 
   2617 /*
   2618  * Set the "appointed forwarder" flag array for this link.  TRILL controls
   2619  * forwarding on a VLAN basis.  The "trillactive" flag is an optimization for
   2620  * the forwarder.
   2621  */
   2622 void
   2623 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr)
   2624 {
   2625 	int i;
   2626 	uint_t newflags = 0;
   2627 
   2628 	for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) {
   2629 		if ((blp->bl_afs[i] = arr[i]) != 0)
   2630 			newflags = BLF_TRILLACTIVE;
   2631 	}
   2632 	blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags;
   2633 }
   2634 
   2635 void
   2636 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill)
   2637 {
   2638 	bridge_inst_t *bip = blp->bl_inst;
   2639 	bridge_fwd_t *bfp, *bfnext;
   2640 	avl_tree_t fwd_scavenge;
   2641 	int i;
   2642 
   2643 	_NOTE(ARGUNUSED(vlan));
   2644 
   2645 	avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
   2646 	    offsetof(bridge_fwd_t, bf_node));
   2647 	rw_enter(&bip->bi_rwlock, RW_WRITER);
   2648 	bfnext = avl_first(&bip->bi_fwd);
   2649 	while ((bfp = bfnext) != NULL) {
   2650 		bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
   2651 		if (bfp->bf_flags & BFF_LOCALADDR)
   2652 			continue;
   2653 		if (dotrill) {
   2654 			/* port doesn't matter if we're flushing TRILL */
   2655 			if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE)
   2656 				continue;
   2657 		} else {
   2658 			if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE)
   2659 				continue;
   2660 			for (i = 0; i < bfp->bf_nlinks; i++) {
   2661 				if (bfp->bf_links[i] == blp)
   2662 					break;
   2663 			}
   2664 			if (i >= bfp->bf_nlinks)
   2665 				continue;
   2666 		}
   2667 		ASSERT(bfp->bf_flags & BFF_INTREE);
   2668 		avl_remove(&bip->bi_fwd, bfp);
   2669 		bfp->bf_flags &= ~BFF_INTREE;
   2670 		avl_add(&fwd_scavenge, bfp);
   2671 	}
   2672 	rw_exit(&bip->bi_rwlock);
   2673 	bfnext = avl_first(&fwd_scavenge);
   2674 	while ((bfp = bfnext) != NULL) {
   2675 		bfnext = AVL_NEXT(&fwd_scavenge, bfp);
   2676 		avl_remove(&fwd_scavenge, bfp);
   2677 		fwd_unref(bfp);
   2678 	}
   2679 	avl_destroy(&fwd_scavenge);
   2680 }
   2681 
   2682 /*
   2683  * Let the mac module take or drop a reference to a bridge link.  When this is
   2684  * called, the mac module is holding the mi_bridge_lock, so the link cannot be
   2685  * in the process of entering or leaving a bridge.
   2686  */
   2687 static void
   2688 bridge_ref_cb(mac_handle_t mh, boolean_t hold)
   2689 {
   2690 	bridge_link_t *blp = (bridge_link_t *)mh;
   2691 
   2692 	if (hold)
   2693 		atomic_inc_uint(&blp->bl_refs);
   2694 	else
   2695 		link_unref(blp);
   2696 }
   2697 
   2698 /*
   2699  * Handle link state changes reported by the mac layer.  This acts as a filter
   2700  * for link state changes: if a link is reporting down, but there are other
   2701  * links still up on the bridge, then the state is changed to "up."  When the
   2702  * last link goes down, all are marked down, and when the first link goes up,
   2703  * all are marked up.  (Recursion is avoided by the use of the "redo" function.)
   2704  *
   2705  * We treat unknown as equivalent to "up."
   2706  */
   2707 static link_state_t
   2708 bridge_ls_cb(mac_handle_t mh, link_state_t newls)
   2709 {
   2710 	bridge_link_t *blp = (bridge_link_t *)mh;
   2711 	bridge_link_t *blcmp;
   2712 	bridge_inst_t *bip;
   2713 	bridge_mac_t *bmp;
   2714 
   2715 	if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN ||
   2716 	    (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) {
   2717 		blp->bl_linkstate = newls;
   2718 		return (newls);
   2719 	}
   2720 
   2721 	/*
   2722 	 * Scan first to see if there are any other non-down links.  If there
   2723 	 * are, then we're done.  Otherwise, if all others are down, then the
   2724 	 * state of this link is the state of the bridge.
   2725 	 */
   2726 	bip = blp->bl_inst;
   2727 	rw_enter(&bip->bi_rwlock, RW_WRITER);
   2728 	for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
   2729 	    blcmp = list_next(&bip->bi_links, blcmp)) {
   2730 		if (blcmp != blp &&
   2731 		    !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) &&
   2732 		    blcmp->bl_linkstate != LINK_STATE_DOWN)
   2733 			break;
   2734 	}
   2735 
   2736 	if (blcmp != NULL) {
   2737 		/*
   2738 		 * If there are other links that are considered up, then tell
   2739 		 * the caller that the link is actually still up, regardless of
   2740 		 * this link's underlying state.
   2741 		 */
   2742 		blp->bl_linkstate = newls;
   2743 		newls = LINK_STATE_UP;
   2744 	} else if (blp->bl_linkstate != newls) {
   2745 		/*
   2746 		 * If we've found no other 'up' links, and this link has
   2747 		 * changed state, then report the new state of the bridge to
   2748 		 * all other clients.
   2749 		 */
   2750 		blp->bl_linkstate = newls;
   2751 		for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
   2752 		    blcmp = list_next(&bip->bi_links, blcmp)) {
   2753 			if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED))
   2754 				mac_link_redo(blcmp->bl_mh, newls);
   2755 		}
   2756 		bmp = bip->bi_mac;
   2757 		if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN)
   2758 			bmp->bm_linkstate = LINK_STATE_UP;
   2759 		mac_link_redo(bmp->bm_mh, bmp->bm_linkstate);
   2760 	}
   2761 	rw_exit(&bip->bi_rwlock);
   2762 	return (newls);
   2763 }
   2764 
   2765 static void
   2766 bridge_add_link(void *arg)
   2767 {
   2768 	mblk_t *mp = arg;
   2769 	bridge_stream_t *bsp;
   2770 	bridge_inst_t *bip, *bipt;
   2771 	bridge_mac_t *bmp;
   2772 	datalink_id_t linkid;
   2773 	int err;
   2774 	mac_handle_t mh;
   2775 	uint_t maxsdu;
   2776 	bridge_link_t *blp = NULL, *blpt;
   2777 	const mac_info_t *mip;
   2778 	boolean_t macopen = B_FALSE;
   2779 	char linkname[MAXLINKNAMELEN];
   2780 	char kstatname[KSTAT_STRLEN];
   2781 	int i;
   2782 	link_state_t linkstate;
   2783 	mblk_t *mlist;
   2784 
   2785 	bsp = (bridge_stream_t *)mp->b_next;
   2786 	mp->b_next = NULL;
   2787 	bip = bsp->bs_inst;
   2788 	/* LINTED: alignment */
   2789 	linkid = *(datalink_id_t *)mp->b_cont->b_rptr;
   2790 
   2791 	/*
   2792 	 * First make sure that there is no other bridge that has this link.
   2793 	 * We don't want to overlap operations from two bridges; the MAC layer
   2794 	 * supports only one bridge on a given MAC at a time.
   2795 	 *
   2796 	 * We rely on the fact that there's just one taskq thread for the
   2797 	 * bridging module: once we've checked for a duplicate, we can drop the
   2798 	 * lock, because no other thread could possibly be adding another link
   2799 	 * until we're done.
   2800 	 */
   2801 	mutex_enter(&inst_lock);
   2802 	for (bipt = list_head(&inst_list); bipt != NULL;
   2803 	    bipt = list_next(&inst_list, bipt)) {
   2804 		rw_enter(&bipt->bi_rwlock, RW_READER);
   2805 		for (blpt = list_head(&bipt->bi_links); blpt != NULL;
   2806 		    blpt = list_next(&bipt->bi_links, blpt)) {
   2807 			if (linkid == blpt->bl_linkid)
   2808 				break;
   2809 		}
   2810 		rw_exit(&bipt->bi_rwlock);
   2811 		if (blpt != NULL)
   2812 			break;
   2813 	}
   2814 	mutex_exit(&inst_lock);
   2815 	if (bipt != NULL) {
   2816 		err = EBUSY;
   2817 		goto fail;
   2818 	}
   2819 
   2820 	if ((err = mac_open_by_linkid(linkid, &mh)) != 0)
   2821 		goto fail;
   2822 	macopen = B_TRUE;
   2823 
   2824 	/* we bridge only Ethernet */
   2825 	mip = mac_info(mh);
   2826 	if (mip->mi_media != DL_ETHER) {
   2827 		err = ENOTSUP;
   2828 		goto fail;
   2829 	}
   2830 
   2831 	/*
   2832 	 * Get the current maximum SDU on this interface.  If there are other
   2833 	 * links on the bridge, then this one must match, or it errors out.
   2834 	 * Otherwise, the first link becomes the standard for the new bridge.
   2835 	 */
   2836 	mac_sdu_get(mh, NULL, &maxsdu);
   2837 	bmp = bip->bi_mac;
   2838 	if (list_is_empty(&bip->bi_links)) {
   2839 		bmp->bm_maxsdu = maxsdu;
   2840 		(void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
   2841 	}
   2842 
   2843 	/* figure the kstat name; also used as the mac client name */
   2844 	i = MBLKL(mp->b_cont) - sizeof (datalink_id_t);
   2845 	if (i < 0 || i >= MAXLINKNAMELEN)
   2846 		i = MAXLINKNAMELEN - 1;
   2847 	bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i);
   2848 	linkname[i] = '\0';
   2849 	(void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name,
   2850 	    linkname);
   2851 
   2852 	if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) {
   2853 		err = ENOMEM;
   2854 		goto fail;
   2855 	}
   2856 	blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED);
   2857 	if (blp->bl_lfailmp == NULL) {
   2858 		kmem_free(blp, sizeof (*blp));
   2859 		err = ENOMEM;
   2860 		goto fail;
   2861 	}
   2862 
   2863 	atomic_inc_uint(&bip->bi_refs);
   2864 	blp->bl_inst = bip;
   2865 	blp->bl_mh = mh;
   2866 	blp->bl_linkid = linkid;
   2867 	blp->bl_maxsdu = maxsdu;
   2868 	cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL);
   2869 	mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL);
   2870 	(void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs));
   2871 
   2872 	err = mac_client_open(mh, &blp->bl_mch, kstatname, 0);
   2873 	if (err != 0)
   2874 		goto fail;
   2875 	blp->bl_flags |= BLF_CLIENT_OPEN;
   2876 
   2877 	err = mac_margin_add(mh, &blp->bl_margin, B_TRUE);
   2878 	if (err != 0)
   2879 		goto fail;
   2880 	blp->bl_flags |= BLF_MARGIN_ADDED;
   2881 
   2882 	blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp);
   2883 
   2884 	err = mac_bridge_set(mh, (mac_handle_t)blp);
   2885 	if (err != 0)
   2886 		goto fail;
   2887 	blp->bl_flags |= BLF_SET_BRIDGE;
   2888 
   2889 	err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL,
   2890 	    blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP);
   2891 	if (err != 0)
   2892 		goto fail;
   2893 	blp->bl_flags |= BLF_PROM_ADDED;
   2894 
   2895 	bridge_new_unicst(blp);
   2896 
   2897 	blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats,
   2898 	    link_kstats_list, Dim(link_kstats_list), kstatname);
   2899 
   2900 	/*
   2901 	 * The link holds a reference to the bridge instance, so that the
   2902 	 * instance can't go away before the link is freed.  The insertion into
   2903 	 * bi_links holds a reference on the link.  When marking as removed
   2904 	 * from bi_links (BLF_DELETED), drop the reference on the link.  When
   2905 	 * freeing the link, drop the reference on the instance.
   2906 	 */
   2907 	rw_enter(&bip->bi_rwlock, RW_WRITER);
   2908 	list_insert_tail(&bip->bi_links, blp);
   2909 	atomic_inc_uint(&blp->bl_refs);
   2910 
   2911 	/*
   2912 	 * If the new link is no good on this bridge, then let the daemon know
   2913 	 * about the problem.
   2914 	 */
   2915 	mlist = NULL;
   2916 	if (maxsdu != bmp->bm_maxsdu)
   2917 		link_sdu_fail(blp, B_TRUE, &mlist);
   2918 	rw_exit(&bip->bi_rwlock);
   2919 	send_up_messages(bip, mlist);
   2920 
   2921 	/*
   2922 	 * Trigger a link state update so that if this link is the first one
   2923 	 * "up" in the bridge, then we notify everyone.  This triggers a trip
   2924 	 * through bridge_ls_cb.
   2925 	 */
   2926 	linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE);
   2927 	blp->bl_linkstate = LINK_STATE_DOWN;
   2928 	mac_link_update(mh, linkstate);
   2929 
   2930 	/*
   2931 	 * We now need to report back to the stream that invoked us, and then
   2932 	 * drop the reference on the stream that we're holding.
   2933 	 */
   2934 	miocack(bsp->bs_wq, mp, 0, 0);
   2935 	stream_unref(bsp);
   2936 	return;
   2937 
   2938 fail:
   2939 	if (blp == NULL) {
   2940 		if (macopen)
   2941 			mac_close(mh);
   2942 	} else {
   2943 		link_shutdown(blp);
   2944 		link_free(blp);
   2945 	}
   2946 	miocnak(bsp->bs_wq, mp, 0, err);
   2947 	stream_unref(bsp);
   2948 }
   2949 
   2950 static void
   2951 bridge_rem_link(void *arg)
   2952 {
   2953 	mblk_t *mp = arg;
   2954 	bridge_stream_t *bsp;
   2955 	bridge_inst_t *bip;
   2956 	bridge_mac_t *bmp;
   2957 	datalink_id_t linkid;
   2958 	bridge_link_t *blp, *blsave;
   2959 	boolean_t found;
   2960 	mblk_t *mlist;
   2961 
   2962 	bsp = (bridge_stream_t *)mp->b_next;
   2963 	mp->b_next = NULL;
   2964 	bip = bsp->bs_inst;
   2965 	/* LINTED: alignment */
   2966 	linkid = *(datalink_id_t *)mp->b_cont->b_rptr;
   2967 
   2968 	/*
   2969 	 * We become reader here so that we can loop over the other links and
   2970 	 * deliver link up/down notification.
   2971 	 */
   2972 	rw_enter(&bip->bi_rwlock, RW_READER);
   2973 	found = B_FALSE;
   2974 	for (blp = list_head(&bip->bi_links); blp != NULL;
   2975 	    blp = list_next(&bip->bi_links, blp)) {
   2976 		if (blp->bl_linkid == linkid &&
   2977 		    !(blp->bl_flags & BLF_DELETED)) {
   2978 			blp->bl_flags |= BLF_DELETED;
   2979 			(void) ddi_taskq_dispatch(bridge_taskq, link_shutdown,
   2980 			    blp, DDI_SLEEP);
   2981 			found = B_TRUE;
   2982 			break;
   2983 		}
   2984 	}
   2985 
   2986 	/*
   2987 	 * Check if this link is up and the remainder of the links are all
   2988 	 * down.
   2989 	 */
   2990 	if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) {
   2991 		for (blp = list_head(&bip->bi_links); blp != NULL;
   2992 		    blp = list_next(&bip->bi_links, blp)) {
   2993 			if (blp->bl_linkstate != LINK_STATE_DOWN &&
   2994 			    !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)))
   2995 				break;
   2996 		}
   2997 		if (blp == NULL) {
   2998 			for (blp = list_head(&bip->bi_links); blp != NULL;
   2999 			    blp = list_next(&bip->bi_links, blp)) {
   3000 				if (!(blp->bl_flags & BLF_DELETED))
   3001 					mac_link_redo(blp->bl_mh,
   3002 					    LINK_STATE_DOWN);
   3003 			}
   3004 			bmp = bip->bi_mac;
   3005 			bmp->bm_linkstate = LINK_STATE_DOWN;
   3006 			mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN);
   3007 		}
   3008 	}
   3009 
   3010 	/*
   3011 	 * Check if there's just one working link left on the bridge.  If so,
   3012 	 * then that link is now authoritative for bridge MTU.
   3013 	 */
   3014 	blsave = NULL;
   3015 	for (blp = list_head(&bip->bi_links); blp != NULL;
   3016 	    blp = list_next(&bip->bi_links, blp)) {
   3017 		if (!(blp->bl_flags & BLF_DELETED)) {
   3018 			if (blsave == NULL)
   3019 				blsave = blp;
   3020 			else
   3021 				break;
   3022 		}
   3023 	}
   3024 	mlist = NULL;
   3025 	bmp = bip->bi_mac;
   3026 	if (blsave != NULL && blp == NULL &&
   3027 	    blsave->bl_maxsdu != bmp->bm_maxsdu) {
   3028 		bmp->bm_maxsdu = blsave->bl_maxsdu;
   3029 		(void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu);
   3030 		link_sdu_fail(blsave, B_FALSE, &mlist);
   3031 	}
   3032 	rw_exit(&bip->bi_rwlock);
   3033 	send_up_messages(bip, mlist);
   3034 
   3035 	if (found)
   3036 		miocack(bsp->bs_wq, mp, 0, 0);
   3037 	else
   3038 		miocnak(bsp->bs_wq, mp, 0, ENOENT);
   3039 	stream_unref(bsp);
   3040 }
   3041 
   3042 /*
   3043  * This function intentionally returns with bi_rwlock held; it is intended for
   3044  * quick checks and updates.
   3045  */
   3046 static bridge_link_t *
   3047 enter_link(bridge_inst_t *bip, datalink_id_t linkid)
   3048 {
   3049 	bridge_link_t *blp;
   3050 
   3051 	rw_enter(&bip->bi_rwlock, RW_READER);
   3052 	for (blp = list_head(&bip->bi_links); blp != NULL;
   3053 	    blp = list_next(&bip->bi_links, blp)) {
   3054 		if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED))
   3055 			break;
   3056 	}
   3057 	return (blp);
   3058 }
   3059 
   3060 static void
   3061 bridge_ioctl(queue_t *wq, mblk_t *mp)
   3062 {
   3063 	bridge_stream_t *bsp = wq->q_ptr;
   3064 	bridge_inst_t *bip;
   3065 	struct iocblk *iop;
   3066 	int rc = EINVAL;
   3067 	int len = 0;
   3068 	bridge_link_t *blp;
   3069 	cred_t *cr;
   3070 
   3071 	/* LINTED: alignment */
   3072 	iop = (struct iocblk *)mp->b_rptr;
   3073 
   3074 	/*
   3075 	 * For now, all of the bridge ioctls are privileged.
   3076 	 */
   3077 	if ((cr = msg_getcred(mp, NULL)) == NULL)
   3078 		cr = iop->ioc_cr;
   3079 	if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) {
   3080 		miocnak(wq, mp, 0, EPERM);
   3081 		return;
   3082 	}
   3083 
   3084 	switch (iop->ioc_cmd) {
   3085 	case BRIOC_NEWBRIDGE: {
   3086 		bridge_newbridge_t *bnb;
   3087 
   3088 		if (bsp->bs_inst != NULL ||
   3089 		    (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0)
   3090 			break;
   3091 		/* LINTED: alignment */
   3092 		bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr;
   3093 		bnb->bnb_name[MAXNAMELEN-1] = '\0';
   3094 		rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr);
   3095 		if (rc != 0)
   3096 			break;
   3097 
   3098 		rw_enter(&bip->bi_rwlock, RW_WRITER);
   3099 		if (bip->bi_control != NULL) {
   3100 			rw_exit(&bip->bi_rwlock);
   3101 			bridge_unref(bip);
   3102 			rc = EBUSY;
   3103 		} else {
   3104 			atomic_inc_uint(&bip->bi_refs);
   3105 			bsp->bs_inst = bip;	/* stream holds reference */
   3106 			bip->bi_control = bsp;
   3107 			rw_exit(&bip->bi_rwlock);
   3108 			rc = 0;
   3109 		}
   3110 		break;
   3111 	}
   3112 
   3113 	case BRIOC_ADDLINK:
   3114 		if ((bip = bsp->bs_inst) == NULL ||
   3115 		    (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0)
   3116 			break;
   3117 		/*
   3118 		 * We cannot perform the action in this thread, because we're
   3119 		 * not in process context, and we may already be holding
   3120 		 * MAC-related locks.  Place the request on taskq.
   3121 		 */
   3122 		mp->b_next = (mblk_t *)bsp;
   3123 		stream_ref(bsp);
   3124 		(void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp,
   3125 		    DDI_SLEEP);
   3126 		return;
   3127 
   3128 	case BRIOC_REMLINK:
   3129 		if ((bip = bsp->bs_inst) == NULL ||
   3130 		    (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0)
   3131 			break;
   3132 		/*
   3133 		 * We cannot perform the action in this thread, because we're
   3134 		 * not in process context, and we may already be holding
   3135 		 * MAC-related locks.  Place the request on taskq.
   3136 		 */
   3137 		mp->b_next = (mblk_t *)bsp;
   3138 		stream_ref(bsp);
   3139 		(void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp,
   3140 		    DDI_SLEEP);
   3141 		return;
   3142 
   3143 	case BRIOC_SETSTATE: {
   3144 		bridge_setstate_t *bss;
   3145 
   3146 		if ((bip = bsp->bs_inst) == NULL ||
   3147 		    (rc = miocpullup(mp, sizeof (*bss))) != 0)
   3148 			break;
   3149 		/* LINTED: alignment */
   3150 		bss = (bridge_setstate_t *)mp->b_cont->b_rptr;
   3151 		if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) {
   3152 			rc = ENOENT;
   3153 		} else {
   3154 			rc = 0;
   3155 			blp->bl_state = bss->bss_state;
   3156 		}
   3157 		rw_exit(&bip->bi_rwlock);
   3158 		break;
   3159 	}
   3160 
   3161 	case BRIOC_SETPVID: {
   3162 		bridge_setpvid_t *bsv;
   3163 
   3164 		if ((bip = bsp->bs_inst) == NULL ||
   3165 		    (rc = miocpullup(mp, sizeof (*bsv))) != 0)
   3166 			break;
   3167 		/* LINTED: alignment */
   3168 		bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr;
   3169 		if (bsv->bsv_vlan > VLAN_ID_MAX)
   3170 			break;
   3171 		if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) {
   3172 			rc = ENOENT;
   3173 		} else if (blp->bl_pvid == bsv->bsv_vlan) {
   3174 			rc = 0;
   3175 		} else {
   3176 			rc = 0;
   3177 			BRIDGE_VLAN_CLR(blp, blp->bl_pvid);
   3178 			blp->bl_pvid = bsv->bsv_vlan;
   3179 			if (blp->bl_pvid != 0)
   3180 				BRIDGE_VLAN_SET(blp, blp->bl_pvid);
   3181 		}
   3182 		rw_exit(&bip->bi_rwlock);
   3183 		break;
   3184 	}
   3185 
   3186 	case BRIOC_VLANENAB: {
   3187 		bridge_vlanenab_t *bve;
   3188 
   3189 		if ((bip = bsp->bs_inst) == NULL ||
   3190 		    (rc = miocpullup(mp, sizeof (*bve))) != 0)
   3191 			break;
   3192 		/* LINTED: alignment */
   3193 		bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr;
   3194 		if (bve->bve_vlan > VLAN_ID_MAX)
   3195 			break;
   3196 		if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) {
   3197 			rc = ENOENT;
   3198 		} else {
   3199 			rc = 0;
   3200 			/* special case: vlan 0 means "all" */
   3201 			if (bve->bve_vlan == 0) {
   3202 				(void) memset(blp->bl_vlans,
   3203 				    bve->bve_onoff ? ~0 : 0,
   3204 				    sizeof (blp->bl_vlans));
   3205 				BRIDGE_VLAN_CLR(blp, 0);
   3206 				if (blp->bl_pvid != 0)
   3207 					BRIDGE_VLAN_SET(blp, blp->bl_pvid);
   3208 			} else if (bve->bve_vlan == blp->bl_pvid) {
   3209 				rc = EINVAL;
   3210 			} else if (bve->bve_onoff) {
   3211 				BRIDGE_VLAN_SET(blp, bve->bve_vlan);
   3212 			} else {
   3213 				BRIDGE_VLAN_CLR(blp, bve->bve_vlan);
   3214 			}
   3215 		}
   3216 		rw_exit(&bip->bi_rwlock);
   3217 		break;
   3218 	}
   3219 
   3220 	case BRIOC_FLUSHFWD: {
   3221 		bridge_flushfwd_t *bff;
   3222 		bridge_fwd_t *bfp, *bfnext;
   3223 		avl_tree_t fwd_scavenge;
   3224 		int i;
   3225 
   3226 		if ((bip = bsp->bs_inst) == NULL ||
   3227 		    (rc = miocpullup(mp, sizeof (*bff))) != 0)
   3228 			break;
   3229 		/* LINTED: alignment */
   3230 		bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr;
   3231 		rw_enter(&bip->bi_rwlock, RW_WRITER);
   3232 		/* This case means "all" */
   3233 		if (bff->bff_linkid == DATALINK_INVALID_LINKID) {
   3234 			blp = NULL;
   3235 		} else {
   3236 			for (blp = list_head(&bip->bi_links); blp != NULL;
   3237 			    blp = list_next(&bip->bi_links, blp)) {
   3238 				if (blp->bl_linkid == bff->bff_linkid &&
   3239 				    !(blp->bl_flags & BLF_DELETED))
   3240 					break;
   3241 			}
   3242 			if (blp == NULL) {
   3243 				rc = ENOENT;
   3244 				rw_exit(&bip->bi_rwlock);
   3245 				break;
   3246 			}
   3247 		}
   3248 		avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
   3249 		    offsetof(bridge_fwd_t, bf_node));
   3250 		bfnext = avl_first(&bip->bi_fwd);
   3251 		while ((bfp = bfnext) != NULL) {
   3252 			bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
   3253 			if (bfp->bf_flags & BFF_LOCALADDR)
   3254 				continue;
   3255 			if (blp != NULL) {
   3256 				for (i = 0; i < bfp->bf_maxlinks; i++) {
   3257 					if (bfp->bf_links[i] == blp)
   3258 						break;
   3259 				}
   3260 				/*
   3261 				 * If the link is there and we're excluding,
   3262 				 * then skip.  If the link is not there and
   3263 				 * we're doing only that link, then skip.
   3264 				 */
   3265 				if ((i < bfp->bf_maxlinks) == bff->bff_exclude)
   3266 					continue;
   3267 			}
   3268 			ASSERT(bfp->bf_flags & BFF_INTREE);
   3269 			avl_remove(&bip->bi_fwd, bfp);
   3270 			bfp->bf_flags &= ~BFF_INTREE;
   3271 			avl_add(&fwd_scavenge, bfp);
   3272 		}
   3273 		rw_exit(&bip->bi_rwlock);
   3274 		bfnext = avl_first(&fwd_scavenge);
   3275 		while ((bfp = bfnext) != NULL) {
   3276 			bfnext = AVL_NEXT(&fwd_scavenge, bfp);
   3277 			avl_remove(&fwd_scavenge, bfp);
   3278 			fwd_unref(bfp);	/* drop tree reference */
   3279 		}
   3280 		avl_destroy(&fwd_scavenge);
   3281 		break;
   3282 	}
   3283 
   3284 	case BRIOC_TABLEMAX:
   3285 		if ((bip = bsp->bs_inst) == NULL ||
   3286 		    (rc = miocpullup(mp, sizeof (uint32_t))) != 0)
   3287 			break;
   3288 		/* LINTED: alignment */
   3289 		bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr;
   3290 		break;
   3291 	}
   3292 
   3293 	if (rc == 0)
   3294 		miocack(wq, mp, len, 0);
   3295 	else
   3296 		miocnak(wq, mp, 0, rc);
   3297 }
   3298 
   3299 static void
   3300 bridge_wput(queue_t *wq, mblk_t *mp)
   3301 {
   3302 	switch (DB_TYPE(mp)) {
   3303 	case M_IOCTL:
   3304 		bridge_ioctl(wq, mp);
   3305 		break;
   3306 	case M_FLUSH:
   3307 		if (*mp->b_rptr & FLUSHW)
   3308 			*mp->b_rptr &= ~FLUSHW;
   3309 		if (*mp->b_rptr & FLUSHR)
   3310 			qreply(wq, mp);
   3311 		else
   3312 			freemsg(mp);
   3313 		break;
   3314 	default:
   3315 		freemsg(mp);
   3316 		break;
   3317 	}
   3318 }
   3319 
   3320 /*
   3321  * This function allocates the main data structures for the bridge driver and
   3322  * connects us into devfs.
   3323  */
   3324 static void
   3325 bridge_inst_init(void)
   3326 {
   3327 	bridge_scan_interval = 5 * drv_usectohz(1000000);
   3328 	bridge_fwd_age = 25 * drv_usectohz(1000000);
   3329 
   3330 	rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL);
   3331 	list_create(&bmac_list, sizeof (bridge_mac_t),
   3332 	    offsetof(bridge_mac_t, bm_node));
   3333 	list_create(&inst_list, sizeof (bridge_inst_t),
   3334 	    offsetof(bridge_inst_t, bi_node));
   3335 	cv_init(&inst_cv, NULL, CV_DRIVER, NULL);
   3336 	mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL);
   3337 	cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL);
   3338 	mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL);
   3339 
   3340 	mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb,
   3341 	    bridge_ls_cb);
   3342 }
   3343 
   3344 /*
   3345  * This function disconnects from devfs and destroys all data structures in
   3346  * preparation for unload.  It's assumed that there are no active bridge
   3347  * references left at this point.
   3348  */
   3349 static void
   3350 bridge_inst_fini(void)
   3351 {
   3352 	mac_bridge_vectors(NULL, NULL, NULL, NULL);
   3353 	if (bridge_timerid != 0)
   3354 		(void) untimeout(bridge_timerid);
   3355 	rw_destroy(&bmac_rwlock);
   3356 	list_destroy(&bmac_list);
   3357 	list_destroy(&inst_list);
   3358 	cv_destroy(&inst_cv);
   3359 	mutex_destroy(&inst_lock);
   3360 	cv_destroy(&stream_ref_cv);
   3361 	mutex_destroy(&stream_ref_lock);
   3362 }
   3363 
   3364 /*
   3365  * bridge_attach()
   3366  *
   3367  * Description:
   3368  *    Attach bridge driver to the system.
   3369  */
   3370 static int
   3371 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
   3372 {
   3373 	if (cmd != DDI_ATTACH)
   3374 		return (DDI_FAILURE);
   3375 
   3376 	if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO,
   3377 	    CLONE_DEV) == DDI_FAILURE) {
   3378 		return (DDI_FAILURE);
   3379 	}
   3380 
   3381 	if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list,
   3382 	    DLDIOCCNT(bridge_ioc_list)) != 0) {
   3383 		ddi_remove_minor_node(dip, BRIDGE_CTL);
   3384 		return (DDI_FAILURE);
   3385 	}
   3386 
   3387 	bridge_dev_info = dip;
   3388 	bridge_major = ddi_driver_major(dip);
   3389 	bridge_taskq = ddi_taskq_create(dip, BRIDGE_DEV_NAME, 1,
   3390 	    TASKQ_DEFAULTPRI, 0);
   3391 	return (DDI_SUCCESS);
   3392 }
   3393 
   3394 /*
   3395  * bridge_detach()
   3396  *
   3397  * Description:
   3398  *    Detach an interface to the system.
   3399  */
   3400 static int
   3401 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   3402 {
   3403 	if (cmd != DDI_DETACH)
   3404 		return (DDI_FAILURE);
   3405 
   3406 	ddi_remove_minor_node(dip, NULL);
   3407 	ddi_taskq_destroy(bridge_taskq);
   3408 	bridge_dev_info = NULL;
   3409 	return (DDI_SUCCESS);
   3410 }
   3411 
   3412 /*
   3413  * bridge_info()
   3414  *
   3415  * Description:
   3416  *    Translate "dev_t" to a pointer to the associated "dev_info_t".
   3417  */
   3418 /* ARGSUSED */
   3419 static int
   3420 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
   3421 	void **result)
   3422 {
   3423 	int	rc;
   3424 
   3425 	switch (infocmd) {
   3426 	case DDI_INFO_DEVT2DEVINFO:
   3427 		if (bridge_dev_info == NULL) {
   3428 			rc = DDI_FAILURE;
   3429 		} else {
   3430 			*result = (void *)bridge_dev_info;
   3431 			rc = DDI_SUCCESS;
   3432 		}
   3433 		break;
   3434 	case DDI_INFO_DEVT2INSTANCE:
   3435 		*result = NULL;
   3436 		rc = DDI_SUCCESS;
   3437 		break;
   3438 	default:
   3439 		rc = DDI_FAILURE;
   3440 		break;
   3441 	}
   3442 	return (rc);
   3443 }
   3444 
   3445 static struct module_info bridge_modinfo = {
   3446 	2105,			/* mi_idnum */
   3447 	BRIDGE_DEV_NAME,	/* mi_idname */
   3448 	0,			/* mi_minpsz */
   3449 	16384,			/* mi_maxpsz */
   3450 	65536,			/* mi_hiwat */
   3451 	128			/* mi_lowat */
   3452 };
   3453 
   3454 static struct qinit bridge_rinit = {
   3455 	NULL,			/* qi_putp */
   3456 	NULL,			/* qi_srvp */
   3457 	bridge_open,		/* qi_qopen */
   3458 	bridge_close,		/* qi_qclose */
   3459 	NULL,			/* qi_qadmin */
   3460 	&bridge_modinfo,	/* qi_minfo */
   3461 	NULL			/* qi_mstat */
   3462 };
   3463 
   3464 static struct qinit bridge_winit = {
   3465 	(int (*)())bridge_wput, /* qi_putp */
   3466 	NULL,			/* qi_srvp */
   3467 	NULL,			/* qi_qopen */
   3468 	NULL,			/* qi_qclose */
   3469 	NULL,			/* qi_qadmin */
   3470 	&bridge_modinfo,	/* qi_minfo */
   3471 	NULL			/* qi_mstat */
   3472 };
   3473 
   3474 static struct streamtab bridge_tab = {
   3475 	&bridge_rinit,	/* st_rdinit */
   3476 	&bridge_winit	/* st_wrinit */
   3477 };
   3478 
   3479 /* No STREAMS perimeters; we do all our own locking */
   3480 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach,
   3481     bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab,
   3482     ddi_quiesce_not_supported);
   3483 
   3484 static struct modldrv modldrv = {
   3485 	&mod_driverops,
   3486 	"bridging driver",
   3487 	&bridge_ops
   3488 };
   3489 
   3490 static struct modlinkage modlinkage = {
   3491 	MODREV_1,
   3492 	(void *)&modldrv,
   3493 	NULL
   3494 };
   3495 
   3496 int
   3497 _init(void)
   3498 {
   3499 	int retv;
   3500 
   3501 	mac_init_ops(NULL, BRIDGE_DEV_NAME);
   3502 	bridge_inst_init();
   3503 	if ((retv = mod_install(&modlinkage)) != 0)
   3504 		bridge_inst_fini();
   3505 	return (retv);
   3506 }
   3507 
   3508 int
   3509 _fini(void)
   3510 {
   3511 	int retv;
   3512 
   3513 	rw_enter(&bmac_rwlock, RW_READER);
   3514 	retv = list_is_empty(&bmac_list) ? 0 : EBUSY;
   3515 	rw_exit(&bmac_rwlock);
   3516 	if (retv == 0 &&
   3517 	    (retv = mod_remove(&modlinkage)) == 0)
   3518 		bridge_inst_fini();
   3519 	return (retv);
   3520 }
   3521 
   3522 int
   3523 _info(struct modinfo *modinfop)
   3524 {
   3525 	return (mod_info(&modlinkage, modinfop));
   3526 }
   3527