Home | History | Annotate | Download | only in sppptun
      1 /*
      2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
      3  * Use is subject to license terms.
      4  */
      5 
      6 #include <sys/types.h>
      7 #include <sys/debug.h>
      8 #include <sys/param.h>
      9 #include <sys/stat.h>
     10 #include <sys/systm.h>
     11 #include <sys/socket.h>
     12 #include <sys/stream.h>
     13 #include <sys/stropts.h>
     14 #include <sys/errno.h>
     15 #include <sys/time.h>
     16 #include <sys/cmn_err.h>
     17 #include <sys/sdt.h>
     18 #include <sys/conf.h>
     19 #include <sys/dlpi.h>
     20 #include <sys/ddi.h>
     21 #include <sys/kstat.h>
     22 #include <sys/strsun.h>
     23 #include <sys/bitmap.h>
     24 #include <sys/sysmacros.h>
     25 #include <sys/note.h>
     26 #include <sys/policy.h>
     27 #include <net/ppp_defs.h>
     28 #include <net/pppio.h>
     29 #include <net/sppptun.h>
     30 #include <net/pppoe.h>
     31 #include <netinet/in.h>
     32 
     33 #include "s_common.h"
     34 #include "sppptun_mod.h"
     35 #include "sppptun_impl.h"
     36 
     37 #define	NTUN_INITIAL 16			/* Initial number of sppptun slots */
     38 #define	NTUN_PERCENT 5			/* Percent of memory to use */
     39 
     40 /*
     41  * This is used to tag official Solaris sources.  Please do not define
     42  * "INTERNAL_BUILD" when building this software outside of Sun
     43  * Microsystems.
     44  */
     45 #ifdef INTERNAL_BUILD
     46 /* MODINFO is limited to 32 characters. */
     47 const char sppptun_driver_description[] = "PPP 4.0 tunnel driver";
     48 const char sppptun_module_description[] = "PPP 4.0 tunnel module";
     49 #else
     50 const char sppptun_driver_description[] = "ANU PPP tundrv";
     51 const char sppptun_module_description[] = "ANU PPP tunmod";
     52 
     53 /* LINTED */
     54 static const char buildtime[] = "Built " __DATE__ " at " __TIME__
     55 #ifdef DEBUG
     56 " DEBUG"
     57 #endif
     58 "\n";
     59 #endif
     60 
     61 /*
     62  * Tunable values; these are similar to the values used in ptms_conf.c.
     63  * Override these settings via /etc/system.
     64  */
     65 uint_t	sppptun_cnt = 0;		/* Minimum number of tunnels */
     66 size_t	sppptun_max_pty = 0;		/* Maximum number of tunnels */
     67 uint_t	sppptun_init_cnt = NTUN_INITIAL; /* Initial number of tunnel slots */
     68 uint_t	sppptun_pctofmem = NTUN_PERCENT; /* Percent of memory to use */
     69 
     70 typedef struct ether_dest_s {
     71 	ether_addr_t addr;
     72 	ushort_t type;
     73 } ether_dest_t;
     74 
     75 /* Allows unaligned access. */
     76 #define	GETLONG(x)	(((x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
     77 
     78 static const char *tll_kstats_list[] = { TLL_KSTATS_NAMES };
     79 static const char *tcl_kstats_list[] = { TCL_KSTATS_NAMES };
     80 
     81 #define	KREF(p, m, vn)	p->m.vn.value.ui64
     82 #define	KINCR(p, m, vn)	++KREF(p, m, vn)
     83 #define	KDECR(p, m, vn)	--KREF(p, m, vn)
     84 
     85 #define	KLINCR(vn)	KINCR(tll, tll_kstats, vn)
     86 #define	KLDECR(vn)	KDECR(tll, tll_kstats, vn)
     87 
     88 #define	KCINCR(vn)	KINCR(tcl, tcl_kstats, vn)
     89 #define	KCDECR(vn)	KDECR(tcl, tcl_kstats, vn)
     90 
     91 static int	sppptun_open(queue_t *, dev_t *, int, int, cred_t *);
     92 static int	sppptun_close(queue_t *);
     93 static void	sppptun_urput(queue_t *, mblk_t *);
     94 static void	sppptun_uwput(queue_t *, mblk_t *);
     95 static int	sppptun_ursrv(queue_t *);
     96 static int	sppptun_uwsrv(queue_t *);
     97 static void	sppptun_lrput(queue_t *, mblk_t *);
     98 static void	sppptun_lwput(queue_t *, mblk_t *);
     99 
    100 /*
    101  * This is the hash table of clients.  Clients are the programs that
    102  * open /dev/sppptun as a device.  There may be a large number of
    103  * these; one per tunneled PPP session.
    104  *
    105  * Note: slots are offset from minor node value by 1 because
    106  * vmem_alloc returns 0 for failure.
    107  *
    108  * The tcl_slots array entries are modified only when exclusive on
    109  * both inner and outer perimeters.  This ensures that threads on
    110  * shared perimeters always view this as unchanging memory with no
    111  * need to lock around accesses.  (Specifically, the tcl_slots array
    112  * is modified by entry to sppptun_open, sppptun_close, and _fini.)
    113  */
    114 static tuncl_t **tcl_slots = NULL;	/* Slots for tuncl_t */
    115 static size_t tcl_nslots = 0;		/* Size of slot array */
    116 static size_t tcl_minormax = 0;		/* Maximum number of tunnels */
    117 static size_t tcl_inuse = 0;		/* # of tunnels currently allocated */
    118 static krwlock_t tcl_rwlock;
    119 static struct kmem_cache *tcl_cache = NULL;	/* tunnel cache */
    120 static vmem_t *tcl_minor_arena = NULL; /* Arena for device minors */
    121 
    122 /*
    123  * This is the simple list of lower layers.  For PPPoE, there is one
    124  * of these per Ethernet interface.  Lower layers are established by
    125  * "plumbing" -- using I_PLINK to connect the tunnel multiplexor to
    126  * the physical interface.
    127  */
    128 static struct qelem tunll_list;
    129 static int tunll_index;
    130 
    131 /* Test value; if all zeroes, then address hasn't been set yet. */
    132 static const ether_addr_t zero_mac_addr = { 0, 0, 0, 0, 0, 0 };
    133 
    134 #define	MIN_SET_FASTPATH_UNITDATAREQ_SIZE	\
    135 	(sizeof (dl_unitdata_req_t) + 4)
    136 
    137 #define	TUN_MI_ID	2104	/* officially allocated module ID */
    138 #define	TUN_MI_MINPSZ	(0)
    139 #define	TUN_MI_MAXPSZ	(PPP_MAXMTU)
    140 #define	TUN_MI_HIWAT	(PPP_MTU * 8)
    141 #define	TUN_MI_LOWAT	(128)
    142 
    143 static struct module_info sppptun_modinfo = {
    144 	TUN_MI_ID,		/* mi_idnum */
    145 	PPP_TUN_NAME,		/* mi_idname */
    146 	TUN_MI_MINPSZ,		/* mi_minpsz */
    147 	TUN_MI_MAXPSZ,		/* mi_maxpsz */
    148 	TUN_MI_HIWAT,		/* mi_hiwat */
    149 	TUN_MI_LOWAT		/* mi_lowat */
    150 };
    151 
    152 static struct qinit sppptun_urinit = {
    153 	(int (*)())sppptun_urput, /* qi_putp */
    154 	sppptun_ursrv,		/* qi_srvp */
    155 	sppptun_open,		/* qi_qopen */
    156 	sppptun_close,		/* qi_qclose */
    157 	NULL,			/* qi_qadmin */
    158 	&sppptun_modinfo,	/* qi_minfo */
    159 	NULL			/* qi_mstat */
    160 };
    161 
    162 static struct qinit sppptun_uwinit = {
    163 	(int (*)())sppptun_uwput, /* qi_putp */
    164 	sppptun_uwsrv,		/* qi_srvp */
    165 	NULL,			/* qi_qopen */
    166 	NULL,			/* qi_qclose */
    167 	NULL,			/* qi_qadmin */
    168 	&sppptun_modinfo,	/* qi_minfo */
    169 	NULL			/* qi_mstat */
    170 };
    171 
    172 static struct qinit sppptun_lrinit = {
    173 	(int (*)())sppptun_lrput, /* qi_putp */
    174 	NULL,			/* qi_srvp */
    175 	NULL,			/* qi_qopen */
    176 	NULL,			/* qi_qclose */
    177 	NULL,			/* qi_qadmin */
    178 	&sppptun_modinfo,	/* qi_minfo */
    179 	NULL			/* qi_mstat */
    180 };
    181 
    182 static struct qinit sppptun_lwinit = {
    183 	(int (*)())sppptun_lwput, /* qi_putp */
    184 	NULL,			/* qi_srvp */
    185 	NULL,			/* qi_qopen */
    186 	NULL,			/* qi_qclose */
    187 	NULL,			/* qi_qadmin */
    188 	&sppptun_modinfo,	/* qi_minfo */
    189 	NULL			/* qi_mstat */
    190 };
    191 
    192 /*
    193  * This is referenced in sppptun_mod.c.
    194  */
    195 struct streamtab sppptun_tab = {
    196 	&sppptun_urinit,	/* st_rdinit */
    197 	&sppptun_uwinit,	/* st_wrinit */
    198 	&sppptun_lrinit,	/* st_muxrinit */
    199 	&sppptun_lwinit		/* st_muxwrinit */
    200 };
    201 
    202 /*
    203  * Allocate another slot table twice as large as the original one
    204  * (limited to global maximum).  Migrate all tunnels to the new slot
    205  * table and free the original one.  Assumes we're exclusive on both
    206  * inner and outer perimeters, and thus there are no other users of
    207  * the tcl_slots array.
    208  */
    209 static minor_t
    210 tcl_grow(void)
    211 {
    212 	minor_t old_size = tcl_nslots;
    213 	minor_t new_size = 2 * old_size;
    214 	tuncl_t **tcl_old = tcl_slots;
    215 	tuncl_t **tcl_new;
    216 	void  *vaddr;			/* vmem_add return value */
    217 
    218 	ASSERT(RW_LOCK_HELD(&tcl_rwlock));
    219 
    220 	/* Allocate new ptms array */
    221 	tcl_new = kmem_zalloc(new_size * sizeof (tuncl_t *), KM_NOSLEEP);
    222 	if (tcl_new == NULL)
    223 		return ((minor_t)0);
    224 
    225 	/* Increase clone index space */
    226 	vaddr = vmem_add(tcl_minor_arena, (void*)((uintptr_t)old_size + 1),
    227 	    new_size - old_size, VM_NOSLEEP);
    228 
    229 	if (vaddr == NULL) {
    230 		kmem_free(tcl_new, new_size * sizeof (tuncl_t *));
    231 		return ((minor_t)0);
    232 	}
    233 
    234 	/* Migrate tuncl_t entries to a new location */
    235 	tcl_nslots = new_size;
    236 	bcopy(tcl_old, tcl_new, old_size * sizeof (tuncl_t *));
    237 	tcl_slots = tcl_new;
    238 	kmem_free(tcl_old, old_size * sizeof (tuncl_t *));
    239 
    240 	/* Allocate minor number and return it */
    241 	return ((minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1, VM_NOSLEEP));
    242 }
    243 
    244 /*
    245  * Allocate new minor number and tunnel client entry.  Returns the new
    246  * entry or NULL if no memory or maximum number of entries reached.
    247  * Assumes we're exclusive on both inner and outer perimeters, and
    248  * thus there are no other users of the tcl_slots array.
    249  */
    250 static tuncl_t *
    251 tuncl_alloc(int wantminor)
    252 {
    253 	minor_t dminor;
    254 	tuncl_t *tcl = NULL;
    255 
    256 	rw_enter(&tcl_rwlock, RW_WRITER);
    257 
    258 	ASSERT(tcl_slots != NULL);
    259 
    260 	/*
    261 	 * Always try to allocate new pty when sppptun_cnt minimum
    262 	 * limit is not achieved. If it is achieved, the maximum is
    263 	 * determined by either user-specified value (if it is
    264 	 * non-zero) or our memory estimations - whatever is less.
    265 	 */
    266 	if (tcl_inuse >= sppptun_cnt) {
    267 		/*
    268 		 * When system achieved required minimum of tunnels,
    269 		 * check for the denial of service limits.
    270 		 *
    271 		 * Get user-imposed maximum, if configured, or
    272 		 * calculated memory constraint.
    273 		 */
    274 		size_t user_max = (sppptun_max_pty == 0 ? tcl_minormax :
    275 		    min(sppptun_max_pty, tcl_minormax));
    276 
    277 		/* Do not try to allocate more than allowed */
    278 		if (tcl_inuse >= user_max) {
    279 			rw_exit(&tcl_rwlock);
    280 			return (NULL);
    281 		}
    282 	}
    283 	tcl_inuse++;
    284 
    285 	/*
    286 	 * Allocate new minor number. If this fails, all slots are
    287 	 * busy and we need to grow the hash.
    288 	 */
    289 	if (wantminor <= 0) {
    290 		dminor = (minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1,
    291 		    VM_NOSLEEP);
    292 		if (dminor == 0) {
    293 			/* Grow the cache and retry allocation */
    294 			dminor = tcl_grow();
    295 		}
    296 	} else {
    297 		dminor = (minor_t)(uintptr_t)vmem_xalloc(tcl_minor_arena, 1,
    298 		    0, 0, 0, (void *)(uintptr_t)wantminor,
    299 		    (void *)((uintptr_t)wantminor+1), VM_NOSLEEP);
    300 		if (dminor != 0 && dminor != wantminor) {
    301 			vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor,
    302 			    1);
    303 			dminor = 0;
    304 		}
    305 	}
    306 
    307 	if (dminor == 0) {
    308 		/* Not enough memory now */
    309 		tcl_inuse--;
    310 		rw_exit(&tcl_rwlock);
    311 		return (NULL);
    312 	}
    313 
    314 	tcl = kmem_cache_alloc(tcl_cache, KM_NOSLEEP);
    315 	if (tcl == NULL) {
    316 		/* Not enough memory - this entry can't be used now. */
    317 		vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor, 1);
    318 		tcl_inuse--;
    319 	} else {
    320 		bzero(tcl, sizeof (*tcl));
    321 		tcl->tcl_lsessid = dminor;
    322 		ASSERT(tcl_slots[dminor - 1] == NULL);
    323 		tcl_slots[dminor - 1] = tcl;
    324 	}
    325 
    326 	rw_exit(&tcl_rwlock);
    327 	return (tcl);
    328 }
    329 
    330 /*
    331  * This routine frees an upper level (client) stream by removing it
    332  * from the minor number pool and freeing the state structure storage.
    333  * Assumes we're exclusive on both inner and outer perimeters, and
    334  * thus there are no other concurrent users of the tcl_slots array or
    335  * of any entry in that array.
    336  */
    337 static void
    338 tuncl_free(tuncl_t *tcl)
    339 {
    340 	rw_enter(&tcl_rwlock, RW_WRITER);
    341 	ASSERT(tcl->tcl_lsessid <= tcl_nslots);
    342 	ASSERT(tcl_slots[tcl->tcl_lsessid - 1] == tcl);
    343 	ASSERT(tcl_inuse > 0);
    344 	tcl_inuse--;
    345 	tcl_slots[tcl->tcl_lsessid - 1] = NULL;
    346 
    347 	if (tcl->tcl_ksp != NULL) {
    348 		kstat_delete(tcl->tcl_ksp);
    349 		tcl->tcl_ksp = NULL;
    350 	}
    351 
    352 	/* Return minor number to the pool of minors */
    353 	vmem_free(tcl_minor_arena, (void *)(uintptr_t)tcl->tcl_lsessid, 1);
    354 
    355 	/* Return tuncl_t to the cache */
    356 	kmem_cache_free(tcl_cache, tcl);
    357 	rw_exit(&tcl_rwlock);
    358 }
    359 
    360 /*
    361  * Get tuncl_t structure by minor number.  Returns NULL when minor is
    362  * out of range.  Note that lookup of tcl pointers (and use of those
    363  * pointers) is safe because modification is done only when exclusive
    364  * on both inner and outer perimeters.
    365  */
    366 static tuncl_t *
    367 tcl_by_minor(minor_t dminor)
    368 {
    369 	tuncl_t *tcl = NULL;
    370 
    371 	if ((dminor >= 1) && (dminor <= tcl_nslots) && tcl_slots != NULL) {
    372 		tcl = tcl_slots[dminor - 1];
    373 	}
    374 
    375 	return (tcl);
    376 }
    377 
    378 /*
    379  * Set up kstats for upper or lower stream.
    380  */
    381 static kstat_t *
    382 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
    383     const char *modname, int unitnum)
    384 {
    385 	kstat_t *ksp;
    386 	char unitname[KSTAT_STRLEN];
    387 	int i;
    388 
    389 	for (i = 0; i < nstat; i++) {
    390 		kstat_set_string(knt[i].name, names[i]);
    391 		knt[i].data_type = KSTAT_DATA_UINT64;
    392 	}
    393 	(void) sprintf(unitname, "%s" "%d", modname, unitnum);
    394 	ksp = kstat_create(modname, unitnum, unitname, "net",
    395 	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL);
    396 	if (ksp != NULL) {
    397 		ksp->ks_data = (void *)knt;
    398 		kstat_install(ksp);
    399 	}
    400 	return (ksp);
    401 }
    402 
    403 /*
    404  * sppptun_open()
    405  *
    406  * MT-Perimeters:
    407  *    exclusive inner, exclusive outer.
    408  *
    409  * Description:
    410  *    Common open procedure for module and driver.
    411  */
    412 static int
    413 sppptun_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
    414 {
    415 	_NOTE(ARGUNUSED(oflag))
    416 
    417 	/* Allow a re-open */
    418 	if (q->q_ptr != NULL)
    419 		return (0);
    420 
    421 	/* In the off chance that we're on our way out, just return error */
    422 	if (tcl_slots == NULL)
    423 		return (EINVAL);
    424 
    425 	if (sflag & MODOPEN) {
    426 		tunll_t *tll;
    427 		char *cp;
    428 
    429 		/* ordinary users have no need to push this module */
    430 		if (secpolicy_net_config(credp, B_FALSE) != 0)
    431 			return (EPERM);
    432 
    433 		tll = kmem_zalloc(sizeof (tunll_t), KM_SLEEP);
    434 
    435 		tll->tll_index = tunll_index++;
    436 
    437 		tll->tll_wq = WR(q);
    438 
    439 		/* Insert at end of list */
    440 		insque(&tll->tll_next, tunll_list.q_back);
    441 		q->q_ptr = WR(q)->q_ptr = tll;
    442 
    443 		tll->tll_style = PTS_PPPOE;
    444 		tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
    445 
    446 		tll->tll_ksp = kstat_setup((kstat_named_t *)&tll->tll_kstats,
    447 		    tll_kstats_list, Dim(tll_kstats_list), "tll",
    448 		    tll->tll_index);
    449 
    450 		/*
    451 		 * Find the name of the driver somewhere beneath us.
    452 		 * Note that we have no driver under us until after
    453 		 * qprocson().
    454 		 */
    455 		qprocson(q);
    456 		for (q = WR(q); q->q_next != NULL; q = q->q_next)
    457 			;
    458 		cp = NULL;
    459 		if (q->q_qinfo != NULL && q->q_qinfo->qi_minfo != NULL)
    460 			cp = q->q_qinfo->qi_minfo->mi_idname;
    461 		if (cp != NULL && *cp == '\0')
    462 			cp = NULL;
    463 
    464 		/* Set initial name; user should overwrite. */
    465 		if (cp == NULL)
    466 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
    467 			    PPP_TUN_NAME "%d", tll->tll_index);
    468 		else
    469 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
    470 			    "%s:tun%d", cp, tll->tll_index);
    471 	} else {
    472 		tuncl_t	*tcl;
    473 
    474 		ASSERT(devp != NULL);
    475 		if (sflag & CLONEOPEN) {
    476 			tcl = tuncl_alloc(-1);
    477 		} else {
    478 			minor_t mn;
    479 
    480 			/*
    481 			 * Support of non-clone open (ie, mknod with
    482 			 * defined minor number) is supported for
    483 			 * testing purposes so that 'arbitrary' minor
    484 			 * numbers can be used.
    485 			 */
    486 			mn = getminor(*devp);
    487 			if (mn == 0 || (tcl = tcl_by_minor(mn)) != NULL) {
    488 				return (EPERM);
    489 			}
    490 			tcl = tuncl_alloc(mn);
    491 		}
    492 		if (tcl == NULL)
    493 			return (ENOSR);
    494 		tcl->tcl_rq = q;		/* save read queue pointer */
    495 		tcl->tcl_flags |= TCLF_ISCLIENT;	/* sanity check */
    496 
    497 		q->q_ptr = WR(q)->q_ptr = (caddr_t)tcl;
    498 		*devp = makedevice(getmajor(*devp), tcl->tcl_lsessid);
    499 
    500 		tcl->tcl_ksp = kstat_setup((kstat_named_t *)&tcl->tcl_kstats,
    501 		    tcl_kstats_list, Dim(tcl_kstats_list), "tcl",
    502 		    tcl->tcl_lsessid);
    503 
    504 		qprocson(q);
    505 	}
    506 	return (0);
    507 }
    508 
    509 /*
    510  * Create an appropriate control message for this client event.
    511  */
    512 static mblk_t *
    513 make_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tclto)
    514 {
    515 	struct ppptun_control *ptc;
    516 	mblk_t *mp = allocb(sizeof (*ptc), BPRI_HI);
    517 
    518 	if (mp != NULL) {
    519 		MTYPE(mp) = M_PROTO;
    520 		ptc = (struct ppptun_control *)mp->b_wptr;
    521 		mp->b_wptr += sizeof (*ptc);
    522 		if (tclabout != NULL) {
    523 			ptc->ptc_rsessid = tclabout->tcl_rsessid;
    524 			ptc->ptc_address = tclabout->tcl_address;
    525 		} else {
    526 			bzero(ptc, sizeof (*ptc));
    527 		}
    528 		ptc->ptc_discrim = tclto->tcl_ctlval;
    529 		ptc->ptc_action = action;
    530 		(void) strncpy(ptc->ptc_name, tllabout->tll_name,
    531 		    sizeof (ptc->ptc_name));
    532 	}
    533 	return (mp);
    534 }
    535 
    536 /*
    537  * Send an appropriate control message up this client session.
    538  */
    539 static void
    540 send_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tcl)
    541 {
    542 	mblk_t *mp;
    543 
    544 	if (tcl->tcl_rq != NULL) {
    545 		mp = make_control(tclabout, tllabout, action, tcl);
    546 		if (mp != NULL) {
    547 			KCINCR(cks_octrl_spec);
    548 			putnext(tcl->tcl_rq, mp);
    549 		}
    550 	}
    551 }
    552 
    553 /*
    554  * If a lower stream is being unplumbed, then the upper streams
    555  * connected to this lower stream must be disconnected.  This routine
    556  * accomplishes this by sending M_HANGUP to data streams and M_PROTO
    557  * messages to control streams.  This is called by vmem_walk, and
    558  * handles a span of minor node numbers.
    559  *
    560  * No need to update lks_clients here; the lower stream is on its way
    561  * out.
    562  */
    563 static void
    564 tclvm_remove_tll(void *arg, void *firstv, size_t numv)
    565 {
    566 	tunll_t *tll = (tunll_t *)arg;
    567 	int minorn = (int)(uintptr_t)firstv;
    568 	int minormax = minorn + numv;
    569 	tuncl_t *tcl;
    570 	mblk_t *mp;
    571 
    572 	while (minorn < minormax) {
    573 		tcl = tcl_slots[minorn - 1];
    574 		ASSERT(tcl != NULL);
    575 		if (tcl->tcl_data_tll == tll && tcl->tcl_rq != NULL) {
    576 			tcl->tcl_data_tll = NULL;
    577 			mp = allocb(0, BPRI_HI);
    578 			if (mp != NULL) {
    579 				MTYPE(mp) = M_HANGUP;
    580 				putnext(tcl->tcl_rq, mp);
    581 				if (tcl->tcl_ctrl_tll == tll)
    582 					tcl->tcl_ctrl_tll = NULL;
    583 			}
    584 		}
    585 		if (tcl->tcl_ctrl_tll == tll) {
    586 			send_control(tcl, tll, PTCA_UNPLUMB, tcl);
    587 			tcl->tcl_ctrl_tll = NULL;
    588 		}
    589 		minorn++;
    590 	}
    591 }
    592 
    593 /*
    594  * sppptun_close()
    595  *
    596  * MT-Perimeters:
    597  *    exclusive inner, exclusive outer.
    598  *
    599  * Description:
    600  *    Common close procedure for module and driver.
    601  */
    602 static int
    603 sppptun_close(queue_t *q)
    604 {
    605 	int err;
    606 	void *qptr;
    607 	tunll_t *tll;
    608 	tuncl_t *tcl;
    609 
    610 	qptr = q->q_ptr;
    611 
    612 	err = 0;
    613 	tll = qptr;
    614 	if (!(tll->tll_flags & TLLF_NOTLOWER)) {
    615 		/* q_next is set on modules */
    616 		ASSERT(WR(q)->q_next != NULL);
    617 
    618 		/* unlink any clients using this lower layer. */
    619 		vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_remove_tll, tll);
    620 
    621 		/* tell daemon that this has been removed. */
    622 		if ((tcl = tll->tll_defcl) != NULL)
    623 			send_control(NULL, tll, PTCA_UNPLUMB, tcl);
    624 
    625 		tll->tll_flags |= TLLF_CLOSING;
    626 		while (!(tll->tll_flags & TLLF_CLOSE_DONE)) {
    627 			qenable(tll->tll_wq);
    628 			qwait(tll->tll_wq);
    629 		}
    630 		tll->tll_error = 0;
    631 		while (!(tll->tll_flags & TLLF_SHUTDOWN_DONE)) {
    632 			if (!qwait_sig(tll->tll_wq))
    633 				break;
    634 		}
    635 
    636 		qprocsoff(q);
    637 		q->q_ptr = WR(q)->q_ptr = NULL;
    638 		tll->tll_wq = NULL;
    639 		remque(&tll->tll_next);
    640 		err = tll->tll_error;
    641 		if (tll->tll_ksp != NULL)
    642 			kstat_delete(tll->tll_ksp);
    643 		kmem_free(tll, sizeof (*tll));
    644 	} else {
    645 		tcl = qptr;
    646 
    647 		/* devices are end of line; no q_next. */
    648 		ASSERT(WR(q)->q_next == NULL);
    649 
    650 		qprocsoff(q);
    651 		DTRACE_PROBE1(sppptun__client__close, tuncl_t *, tcl);
    652 		tcl->tcl_rq = NULL;
    653 		q->q_ptr = WR(q)->q_ptr = NULL;
    654 
    655 		tll = TO_TLL(tunll_list.q_forw);
    656 		while (tll != TO_TLL(&tunll_list)) {
    657 			if (tll->tll_defcl == tcl)
    658 				tll->tll_defcl = NULL;
    659 			if (tll->tll_lastcl == tcl)
    660 				tll->tll_lastcl = NULL;
    661 			tll = TO_TLL(tll->tll_next);
    662 		}
    663 		/*
    664 		 * If this was a normal session, then tell the daemon.
    665 		 */
    666 		if (!(tcl->tcl_flags & TCLF_DAEMON) &&
    667 		    (tll = tcl->tcl_ctrl_tll) != NULL &&
    668 		    tll->tll_defcl != NULL) {
    669 			send_control(tcl, tll, PTCA_DISCONNECT,
    670 			    tll->tll_defcl);
    671 		}
    672 
    673 		/* Update statistics for references being dropped. */
    674 		if ((tll = tcl->tcl_data_tll) != NULL) {
    675 			KLDECR(lks_clients);
    676 		}
    677 		if ((tll = tcl->tcl_ctrl_tll) != NULL) {
    678 			KLDECR(lks_clients);
    679 		}
    680 
    681 		tuncl_free(tcl);
    682 	}
    683 
    684 	return (err);
    685 }
    686 
    687 /*
    688  * Allocate and initialize a DLPI or TPI template of the specified
    689  * length.
    690  */
    691 static mblk_t *
    692 pi_alloc(size_t len, int prim)
    693 {
    694 	mblk_t	*mp;
    695 
    696 	mp = allocb(len, BPRI_MED);
    697 	if (mp != NULL) {
    698 		MTYPE(mp) = M_PROTO;
    699 		mp->b_wptr = mp->b_rptr + len;
    700 		bzero(mp->b_rptr, len);
    701 		*(int *)mp->b_rptr = prim;
    702 	}
    703 	return (mp);
    704 }
    705 
    706 #define	dlpi_alloc(l, p)	pi_alloc((l), (p))
    707 
    708 /*
    709  * Prepend some room to an mblk.  Try to reuse the existing buffer, if
    710  * at all possible, rather than allocating a new one.  (Fast-path
    711  * output should be able to use this.)
    712  *
    713  * (XXX why isn't this a library function ...?)
    714  */
    715 static mblk_t *
    716 prependb(mblk_t *mp, size_t len, size_t align)
    717 {
    718 	mblk_t *newmp;
    719 
    720 
    721 	if (align == 0)
    722 		align = 8;
    723 	if (DB_REF(mp) > 1 || mp->b_datap->db_base+len > mp->b_rptr ||
    724 	    ((uint_t)((uintptr_t)mp->b_rptr - len) % align) != 0) {
    725 		if ((newmp = allocb(len, BPRI_LO)) == NULL) {
    726 			freemsg(mp);
    727 			return (NULL);
    728 		}
    729 		newmp->b_wptr = newmp->b_rptr + len;
    730 		newmp->b_cont = mp;
    731 		return (newmp);
    732 	}
    733 	mp->b_rptr -= len;
    734 	return (mp);
    735 }
    736 
    737 /*
    738  * sppptun_outpkt()
    739  *
    740  * MT-Perimeters:
    741  *	shared inner, shared outer (if called from sppptun_uwput),
    742  *	exclusive inner, shared outer (if called from sppptun_uwsrv).
    743  *
    744  * Description:
    745  *    Called from sppptun_uwput or sppptun_uwsrv when processing a
    746  *    M_DATA, M_PROTO, or M_PCPROTO message.  For all cases, it tries
    747  *    to prepare the data to be sent to the module below this driver
    748  *    if there is a lower stream linked underneath.  If no lower
    749  *    stream exists, then the data will be discarded and an ENXIO
    750  *    error returned.
    751  *
    752  * Returns:
    753  *	pointer to queue if caller should do putnext, otherwise
    754  *	*mpp != NULL if message should be enqueued, otherwise
    755  *	*mpp == NULL if message is gone.
    756  */
    757 static queue_t *
    758 sppptun_outpkt(queue_t *q, mblk_t **mpp)
    759 {
    760 	mblk_t *mp;
    761 	tuncl_t *tcl;
    762 	tunll_t *tll;
    763 	mblk_t *encmb;
    764 	mblk_t *datamb;
    765 	dl_unitdata_req_t *dur;
    766 	queue_t *lowerq;
    767 	poep_t *poep;
    768 	int len;
    769 	ether_dest_t *edestp;
    770 	enum { luNone, luCopy, luSend } loopup;
    771 	boolean_t isdata;
    772 	struct ppptun_control *ptc;
    773 
    774 	mp = *mpp;
    775 	tcl = q->q_ptr;
    776 
    777 	*mpp = NULL;
    778 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
    779 		merror(q, mp, EINVAL);
    780 		return (NULL);
    781 	}
    782 
    783 	isdata = (MTYPE(mp) == M_DATA);
    784 	if (isdata) {
    785 		tll = tcl->tcl_data_tll;
    786 		ptc = NULL;
    787 	} else {
    788 		/*
    789 		 * If data are unaligned or otherwise unsuitable, then
    790 		 * discard.
    791 		 */
    792 		if (MBLKL(mp) != sizeof (*ptc) || DB_REF(mp) > 1 ||
    793 		    !IS_P2ALIGNED(mp->b_rptr, sizeof (ptc))) {
    794 			KCINCR(cks_octrl_drop);
    795 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
    796 			    mblk_t *, mp);
    797 			merror(q, mp, EINVAL);
    798 			return (NULL);
    799 		}
    800 		ptc = (struct ppptun_control *)mp->b_rptr;
    801 
    802 		/* Set stream discriminator value if not yet set. */
    803 		if (tcl->tcl_ctlval == 0)
    804 			tcl->tcl_ctlval = ptc->ptc_discrim;
    805 
    806 		/* If this is a test message, then reply to caller. */
    807 		if (ptc->ptc_action == PTCA_TEST) {
    808 			DTRACE_PROBE2(sppptun__test, tuncl_t *, tcl,
    809 			    struct ppptun_control *, ptc);
    810 			if (mp->b_cont != NULL) {
    811 				freemsg(mp->b_cont);
    812 				mp->b_cont = NULL;
    813 			}
    814 			ptc->ptc_discrim = tcl->tcl_ctlval;
    815 			putnext(RD(q), mp);
    816 			return (NULL);
    817 		}
    818 
    819 		/* If this one isn't for us, then discard it */
    820 		if (tcl->tcl_ctlval != ptc->ptc_discrim) {
    821 			DTRACE_PROBE2(sppptun__bad__discrim, tuncl_t *, tcl,
    822 			    struct ppptun_control *, ptc);
    823 			freemsg(mp);
    824 			return (NULL);
    825 		}
    826 
    827 		/* Don't allow empty control packets. */
    828 		if (mp->b_cont == NULL) {
    829 			KCINCR(cks_octrl_drop);
    830 			merror(q, mp, EINVAL);
    831 			return (NULL);
    832 		}
    833 		tll = tcl->tcl_ctrl_tll;
    834 	}
    835 
    836 	if (tll == NULL || (lowerq = tll->tll_wq) == NULL) {
    837 		DTRACE_PROBE3(sppptun__cannot__send, tuncl_t *, tcl,
    838 		    tunll_t *, tll, mblk_t *, mp);
    839 		merror(q, mp, ENXIO);
    840 		if (isdata) {
    841 			tcl->tcl_stats.ppp_oerrors++;
    842 		} else {
    843 			KCINCR(cks_octrl_drop);
    844 		}
    845 		return (NULL);
    846 	}
    847 
    848 	/*
    849 	 * If so, then try to send it down.  The lower queue is only
    850 	 * ever detached while holding an exclusive lock on the whole
    851 	 * driver, so we can be confident that the lower queue is
    852 	 * still there.
    853 	 */
    854 	if (!bcanputnext(lowerq, mp->b_band)) {
    855 		DTRACE_PROBE3(sppptun__flow__control, tuncl_t *, tcl,
    856 		    tunll_t *, tll, mblk_t *, mp);
    857 		*mpp = mp;
    858 		return (NULL);
    859 	}
    860 
    861 	/*
    862 	 * Note: DLPI and TPI expect that the first buffer contains
    863 	 * the control (unitdata-req) header, destination address, and
    864 	 * nothing else.  Any protocol headers must go in the next
    865 	 * buffer.
    866 	 */
    867 	loopup = luNone;
    868 	encmb = NULL;
    869 	if (isdata) {
    870 		if (tll->tll_alen != 0 &&
    871 		    bcmp(&tcl->tcl_address, &tll->tll_lcladdr,
    872 		    tll->tll_alen) == 0)
    873 			loopup = luSend;
    874 		switch (tll->tll_style) {
    875 		case PTS_PPPOE:
    876 			/* Strip address and control fields if present. */
    877 			if (mp->b_rptr[0] == 0xFF) {
    878 				if (MBLKL(mp) < 3) {
    879 					encmb = msgpullup(mp, 3);
    880 					freemsg(mp);
    881 					if ((mp = encmb) == NULL)
    882 						break;
    883 				}
    884 				mp->b_rptr += 2;
    885 			}
    886 			/* Broadcasting data is probably not a good idea. */
    887 			if (tcl->tcl_address.pta_pppoe.ptma_mac[0] & 1)
    888 				break;
    889 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
    890 			    DL_UNITDATA_REQ);
    891 			if (encmb == NULL)
    892 				break;
    893 
    894 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
    895 			dur->dl_dest_addr_length = sizeof (*edestp);
    896 			dur->dl_dest_addr_offset = sizeof (*dur);
    897 			edestp = (ether_dest_t *)(dur + 1);
    898 			ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
    899 			    edestp->addr);
    900 			/* DLPI SAPs are in host byte order! */
    901 			edestp->type = ETHERTYPE_PPPOES;
    902 
    903 			/* Make sure the protocol field isn't compressed. */
    904 			len = (*mp->b_rptr & 1);
    905 			mp = prependb(mp, sizeof (*poep) + len, POE_HDR_ALIGN);
    906 			if (mp == NULL)
    907 				break;
    908 			poep = (poep_t *)mp->b_rptr;
    909 			poep->poep_version_type = POE_VERSION;
    910 			poep->poep_code = POECODE_DATA;
    911 			poep->poep_session_id = htons(tcl->tcl_rsessid);
    912 			poep->poep_length = htons(msgsize(mp) -
    913 			    sizeof (*poep));
    914 			if (len > 0)
    915 				*(char *)(poep + 1) = '\0';
    916 			break;
    917 
    918 		default:
    919 			ASSERT(0);
    920 		}
    921 	} else {
    922 		/*
    923 		 * Control side encapsulation.
    924 		 */
    925 		if (bcmp(&ptc->ptc_address, &tll->tll_lcladdr, tll->tll_alen)
    926 		    == 0)
    927 			loopup = luSend;
    928 		datamb = mp->b_cont;
    929 		switch (tll->tll_style) {
    930 		case PTS_PPPOE:
    931 			/*
    932 			 * Don't allow a loopback session to establish
    933 			 * itself.  PPPoE is broken; it uses only one
    934 			 * session ID for both data directions, so the
    935 			 * loopback data path can simply never work.
    936 			 */
    937 			if (loopup == luSend &&
    938 			    ((poep_t *)datamb->b_rptr)->poep_code ==
    939 			    POECODE_PADR)
    940 				break;
    941 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
    942 			    DL_UNITDATA_REQ);
    943 			if (encmb == NULL)
    944 				break;
    945 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
    946 			dur->dl_dest_addr_length = sizeof (*edestp);
    947 			dur->dl_dest_addr_offset = sizeof (*dur);
    948 
    949 			edestp = (ether_dest_t *)(dur + 1);
    950 			/* DLPI SAPs are in host byte order! */
    951 			edestp->type = ETHERTYPE_PPPOED;
    952 
    953 			/*
    954 			 * If destination isn't set yet, then we have to
    955 			 * allow anything at all.  Otherwise, force use
    956 			 * of configured peer address.
    957 			 */
    958 			if (bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
    959 			    zero_mac_addr, sizeof (zero_mac_addr)) == 0 ||
    960 			    (tcl->tcl_flags & TCLF_DAEMON)) {
    961 				ether_copy(ptc->ptc_address.pta_pppoe.ptma_mac,
    962 				    edestp->addr);
    963 			} else {
    964 				ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
    965 				    edestp->addr);
    966 			}
    967 			/* Reflect multicast/broadcast back up. */
    968 			if (edestp->addr[0] & 1)
    969 				loopup = luCopy;
    970 			break;
    971 
    972 		case PTS_PPTP:
    973 			/*
    974 			 * PPTP's control side is actually done over
    975 			 * separate TCP connections.
    976 			 */
    977 		default:
    978 			ASSERT(0);
    979 		}
    980 		freeb(mp);
    981 		mp = datamb;
    982 	}
    983 	if (mp == NULL || encmb == NULL) {
    984 		DTRACE_PROBE1(sppptun__output__failure, tuncl_t *, tcl);
    985 		freemsg(mp);
    986 		freemsg(encmb);
    987 		if (isdata) {
    988 			tcl->tcl_stats.ppp_oerrors++;
    989 		} else {
    990 			KCINCR(cks_octrl_drop);
    991 			KLINCR(lks_octrl_drop);
    992 		}
    993 		lowerq = NULL;
    994 	} else {
    995 		if (isdata) {
    996 			tcl->tcl_stats.ppp_obytes += msgsize(mp);
    997 			tcl->tcl_stats.ppp_opackets++;
    998 		} else {
    999 			KCINCR(cks_octrls);
   1000 			KLINCR(lks_octrls);
   1001 		}
   1002 		if (encmb != mp)
   1003 			encmb->b_cont = mp;
   1004 		switch (loopup) {
   1005 		case luNone:
   1006 			*mpp = encmb;
   1007 			break;
   1008 		case luCopy:
   1009 			mp = copymsg(encmb);
   1010 			if (mp != NULL)
   1011 				sppptun_urput(RD(lowerq), mp);
   1012 			*mpp = encmb;
   1013 			break;
   1014 		case luSend:
   1015 			sppptun_urput(RD(lowerq), encmb);
   1016 			lowerq = NULL;
   1017 			break;
   1018 		}
   1019 	}
   1020 	return (lowerq);
   1021 }
   1022 
   1023 /*
   1024  * Enqueue a message to be sent when the lower stream is closed.  This
   1025  * is done so that we're guaranteed that we always have the necessary
   1026  * resources to properly detach ourselves from the system.  (If we
   1027  * waited until the close was done to allocate these messages, then
   1028  * the message allocation could fail, and we'd be unable to properly
   1029  * detach.)
   1030  */
   1031 static void
   1032 save_for_close(tunll_t *tll, mblk_t *mp)
   1033 {
   1034 	mblk_t *onc;
   1035 
   1036 	if ((onc = tll->tll_onclose) == NULL)
   1037 		tll->tll_onclose = mp;
   1038 	else {
   1039 		while (onc->b_next != NULL)
   1040 			onc = onc->b_next;
   1041 		onc->b_next = mp;
   1042 	}
   1043 }
   1044 
   1045 /*
   1046  * Given the lower stream name, locate the state structure.  Note that
   1047  * lookup of tcl pointers (and use of those pointers) is safe because
   1048  *