Home | History | Annotate | Download | only in inet
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 /* Copyright (c) 1990 Mentat Inc. */
     26 
     27 /*
     28  * This file contains common code for handling Options Management requests.
     29  */
     30 
     31 #include <sys/types.h>
     32 #include <sys/stream.h>
     33 #include <sys/stropts.h>
     34 #include <sys/strsubr.h>
     35 #include <sys/errno.h>
     36 #define	_SUN_TPI_VERSION 2
     37 #include <sys/tihdr.h>
     38 #include <sys/socket.h>
     39 #include <sys/socketvar.h>
     40 #include <sys/ddi.h>
     41 #include <sys/debug.h>		/* for ASSERT */
     42 #include <sys/policy.h>
     43 
     44 #include <inet/common.h>
     45 #include <inet/mi.h>
     46 #include <inet/nd.h>
     47 #include <netinet/ip6.h>
     48 #include <inet/ip.h>
     49 #include <inet/mib2.h>
     50 #include <netinet/in.h>
     51 #include "optcom.h"
     52 
     53 #include <inet/optcom.h>
     54 #include <inet/ipclassifier.h>
     55 #include <inet/proto_set.h>
     56 
     57 /*
     58  * Function prototypes
     59  */
     60 static t_scalar_t process_topthdrs_first_pass(mblk_t *, cred_t *, optdb_obj_t *,
     61     size_t *);
     62 static t_scalar_t do_options_second_pass(queue_t *q, mblk_t *reqmp,
     63     mblk_t *ack_mp, cred_t *, optdb_obj_t *dbobjp,
     64     t_uscalar_t *worst_statusp);
     65 static t_uscalar_t get_worst_status(t_uscalar_t, t_uscalar_t);
     66 static int do_opt_default(queue_t *, struct T_opthdr *, uchar_t **,
     67     t_uscalar_t *, cred_t *, optdb_obj_t *);
     68 static void do_opt_current(queue_t *, struct T_opthdr *, uchar_t **,
     69     t_uscalar_t *, cred_t *cr, optdb_obj_t *);
     70 static void do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
     71     uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
     72     cred_t *, optdb_obj_t *dbobjp);
     73 static boolean_t opt_level_valid(t_uscalar_t, optlevel_t *, uint_t);
     74 static size_t opt_level_allopts_lengths(t_uscalar_t, opdes_t *, uint_t);
     75 static boolean_t opt_length_ok(opdes_t *, t_uscalar_t optlen);
     76 static t_uscalar_t optcom_max_optbuf_len(opdes_t *, uint_t);
     77 static boolean_t opt_bloated_maxsize(opdes_t *);
     78 
     79 /* Common code for sending back a T_ERROR_ACK. */
     80 void
     81 optcom_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
     82 {
     83 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
     84 		qreply(q, mp);
     85 }
     86 
     87 /*
     88  * The option management routines svr4_optcom_req() and tpi_optcom_req() use
     89  * callback functions as arguments. Here is the expected interfaces
     90  * assumed from the callback functions
     91  *
     92  *
     93  * (1) deffn(q, optlevel, optname, optvalp)
     94  *
     95  *	- Function only called when default value comes from protocol
     96  *	 specific code and not the option database table (indicated by
     97  *	  OP_DEF_FN property in option database.)
     98  *	- Error return is -1. Valid returns are >=0.
     99  *	- When valid, the return value represents the length used for storing
    100  *		the default value of the option.
    101  *      - Error return implies the called routine did not recognize this
    102  *              option. Something downstream could so input is left unchanged
    103  *              in request buffer.
    104  *
    105  * (2) getfn(q, optlevel, optname, optvalp)
    106  *
    107  *	- Error return is -1. Valid returns are >=0.
    108  *	- When valid, the return value represents the length used for storing
    109  *		the actual value of the option.
    110  *      - Error return implies the called routine did not recognize this
    111  *              option. Something downstream could so input is left unchanged
    112  *              in request buffer.
    113  *
    114  * (3) setfn(q, optset_context, optlevel, optname, inlen, invalp,
    115  *	outlenp, outvalp, attrp, cr);
    116  *
    117  *	- OK return is 0, Error code is returned as a non-zero argument.
    118  *      - If negative it is ignored by svr4_optcom_req(). If positive, error
    119  *        is returned. A negative return implies that option, while handled on
    120  *	  this stack is not handled at this level and will be handled further
    121  *	  downstream.
    122  *	- Both negative and positive errors are treats as errors in an
    123  *	  identical manner by tpi_optcom_req(). The errors affect "status"
    124  *	  field of each option's T_opthdr. If sucessfull, an appropriate sucess
    125  *	  result is carried. If error, it instantiated to "failure" at the
    126  *	  topmost level and left unchanged at other levels. (This "failure" can
    127  *	  turn to a success at another level).
    128  *	- optset_context passed for tpi_optcom_req(). It is interpreted as:
    129  *        - SETFN_OPTCOM_CHECKONLY
    130  *		semantics are to pretend to set the value and report
    131  *		back if it would be successful.
    132  *		This is used with T_CHECK semantics in XTI
    133  *        - SETFN_OPTCOM_NEGOTIATE
    134  *		set the value. Call from option management primitive
    135  *		T_OPTMGMT_REQ when T_NEGOTIATE flags is used.
    136  *	  - SETFN_UD_NEGOTIATE
    137  *		option request came riding on UNITDATA primitive most often
    138  *		has  "this datagram" semantics to influence prpoerties
    139  *		affecting an outgoig datagram or associated with recived
    140  *		datagram
    141  *		[ Note: XTI permits this use outside of "this datagram"
    142  *		semantics also and permits setting "management related"
    143  *		options in this	context and its test suite enforces it ]
    144  *	  - SETFN_CONN_NEGOTATE
    145  *		option request came riding on CONN_REQ/RES primitive and
    146  *		most often has "this connection" (negotiation during
    147  *		"connection estblishment") semantics.
    148  *		[ Note: XTI permits use of these outside of "this connection"
    149  *		semantics and permits "management related" options in this
    150  *		context and its test suite enforces it. ]
    151  *
    152  *	- inlen, invalp is the option length,value requested to be set.
    153  *	- outlenp, outvalp represent return parameters which contain the
    154  *	  value set and it might be different from one passed on input.
    155  *	- attrp points to a data structure that's used by v6 modules to
    156  *	  store ancillary data options or sticky options.
    157  *	- cr points to the caller's credentials
    158  *	- the caller might pass same buffers for input and output and the
    159  *	  routine should protect against this case by not updating output
    160  *	  buffers until it is done referencing input buffers and any other
    161  *	  issues (e.g. not use bcopy() if we do not trust what it does).
    162  *      - If option is not known, it returns error. We randomly pick EINVAL.
    163  *        It can however get called with options that are handled downstream
    164  *        opr upstream so for svr4_optcom_req(), it does not return error for
    165  *        negative return values.
    166  *
    167  */
    168 
    169 /*
    170  * Upper Level Protocols call this routine when they receive
    171  * a T_SVR4_OPTMGMT_REQ message.  They supply callback functions
    172  * for setting a new value for a single options, getting the
    173  * current value for a single option, and checking for support
    174  * of a single option.  svr4_optcom_req validates the option management
    175  * buffer passed in, and calls the appropriate routines to do the
    176  * job requested.
    177  * XXX Code below needs some restructuring after we have some more
    178  * macros to support 'struct opthdr' in the headers.
    179  */
    180 void
    181 svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp)
    182 {
    183 	pfi_t	deffn = dbobjp->odb_deffn;
    184 	pfi_t	getfn = dbobjp->odb_getfn;
    185 	opt_set_fn setfn = dbobjp->odb_setfn;
    186 	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
    187 	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
    188 	t_uscalar_t max_optbuf_len;
    189 	int len;
    190 	mblk_t	*mp1 = NULL;
    191 	struct opthdr *next_opt;
    192 	struct opthdr *opt;
    193 	struct opthdr *opt1;
    194 	struct opthdr *opt_end;
    195 	struct opthdr *opt_start;
    196 	opdes_t	*optd;
    197 	struct T_optmgmt_ack *toa;
    198 	struct T_optmgmt_req *tor;
    199 	int error;
    200 
    201 	tor = (struct T_optmgmt_req *)mp->b_rptr;
    202 	/* Verify message integrity. */
    203 	if (mp->b_wptr - mp->b_rptr < sizeof (struct T_optmgmt_req))
    204 		goto bad_opt;
    205 	/* Verify MGMT_flags legal */
    206 	switch (tor->MGMT_flags) {
    207 	case T_DEFAULT:
    208 	case T_NEGOTIATE:
    209 	case T_CURRENT:
    210 	case T_CHECK:
    211 		/* OK - legal request flags */
    212 		break;
    213 	default:
    214 		optcom_err_ack(q, mp, TBADFLAG, 0);
    215 		return;
    216 	}
    217 	if (tor->MGMT_flags == T_DEFAULT) {
    218 		/* Is it a request for default option settings? */
    219 
    220 		/*
    221 		 * Note: XXX TLI and TPI specification was unclear about
    222 		 * semantics of T_DEFAULT and the following historical note
    223 		 * and its interpretation is incorrect (it implies a request
    224 		 * for default values of only the identified options not all.
    225 		 * The semantics have been explained better in XTI spec.)
    226 		 * However, we do not modify (comment or code) here to keep
    227 		 * compatibility.
    228 		 * We can rethink this if it ever becomes an issue.
    229 		 * ----historical comment start------
    230 		 * As we understand it, the input buffer is meaningless
    231 		 * so we ditch the message.  A T_DEFAULT request is a
    232 		 * request to obtain a buffer containing defaults for
    233 		 * all supported options, so we allocate a maximum length
    234 		 * reply.
    235 		 * ----historical comment end -------
    236 		 */
    237 		/* T_DEFAULT not passed down */
    238 		freemsg(mp);
    239 		max_optbuf_len = optcom_max_optbuf_len(opt_arr,
    240 		    opt_arr_cnt);
    241 		mp = allocb(max_optbuf_len, BPRI_MED);
    242 		if (!mp) {
    243 no_mem:;
    244 			optcom_err_ack(q, mp, TSYSERR, ENOMEM);
    245 			return;
    246 		}
    247 
    248 		/* Initialize the T_optmgmt_ack header. */
    249 		toa = (struct T_optmgmt_ack *)mp->b_rptr;
    250 		bzero((char *)toa, max_optbuf_len);
    251 		toa->PRIM_type = T_OPTMGMT_ACK;
    252 		toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
    253 		/* TODO: Is T_DEFAULT the right thing to put in MGMT_flags? */
    254 		toa->MGMT_flags = T_DEFAULT;
    255 
    256 		/* Now walk the table of options passed in */
    257 		opt = (struct opthdr *)&toa[1];
    258 		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
    259 			/*
    260 			 * All the options in the table of options passed
    261 			 * in are by definition supported by the protocol
    262 			 * calling this function.
    263 			 */
    264 			if (!OA_READ_PERMISSION(optd, cr))
    265 				continue;
    266 			opt->level = optd->opdes_level;
    267 			opt->name = optd->opdes_name;
    268 			if (!(optd->opdes_props & OP_DEF_FN) ||
    269 			    ((len = (*deffn)(q, opt->level,
    270 			    opt->name, (uchar_t *)&opt[1])) < 0)) {
    271 				/*
    272 				 * Fill length and value from table.
    273 				 *
    274 				 * Default value not instantiated from function
    275 				 * (or the protocol specific function failed it;
    276 				 * In this interpretation of T_DEFAULT, this is
    277 				 * the best we can do)
    278 				 */
    279 				switch (optd->opdes_size) {
    280 				/*
    281 				 * Since options are guaranteed aligned only
    282 				 * on a 4 byte boundary (t_scalar_t) any
    283 				 * option that is greater in size will default
    284 				 * to the bcopy below
    285 				 */
    286 				case sizeof (int32_t):
    287 					*(int32_t *)&opt[1] =
    288 					    (int32_t)optd->opdes_default;
    289 					break;
    290 				case sizeof (int16_t):
    291 					*(int16_t *)&opt[1] =
    292 					    (int16_t)optd->opdes_default;
    293 					break;
    294 				case sizeof (int8_t):
    295 					*(int8_t *)&opt[1] =
    296 					    (int8_t)optd->opdes_default;
    297 					break;
    298 				default:
    299 					/*
    300 					 * other length but still assume
    301 					 * fixed - use bcopy
    302 					 */
    303 					bcopy(optd->opdes_defbuf,
    304 					    &opt[1], optd->opdes_size);
    305 					break;
    306 				}
    307 				opt->len = optd->opdes_size;
    308 			}
    309 			else
    310 				opt->len = (t_uscalar_t)len;
    311 			opt = (struct opthdr *)((char *)&opt[1] +
    312 			    _TPI_ALIGN_OPT(opt->len));
    313 		}
    314 
    315 		/* Now record the final length. */
    316 		toa->OPT_length = (t_scalar_t)((char *)opt - (char *)&toa[1]);
    317 		mp->b_wptr = (uchar_t *)opt;
    318 		mp->b_datap->db_type = M_PCPROTO;
    319 		/* Ship it back. */
    320 		qreply(q, mp);
    321 		return;
    322 	}
    323 	/* T_DEFAULT processing complete - no more T_DEFAULT */
    324 
    325 	/*
    326 	 * For T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make a
    327 	 * pass through the input buffer validating the details and
    328 	 * making sure each option is supported by the protocol.
    329 	 */
    330 	if ((opt_start = (struct opthdr *)mi_offset_param(mp,
    331 	    tor->OPT_offset, tor->OPT_length)) == NULL)
    332 		goto bad_opt;
    333 	if (!__TPI_OPT_ISALIGNED(opt_start))
    334 		goto bad_opt;
    335 
    336 	opt_end = (struct opthdr *)((uchar_t *)opt_start +
    337 	    tor->OPT_length);
    338 
    339 	for (opt = opt_start; opt < opt_end; opt = next_opt) {
    340 		/*
    341 		 * Verify we have room to reference the option header
    342 		 * fields in the option buffer.
    343 		 */
    344 		if ((uchar_t *)opt + sizeof (struct opthdr) >
    345 		    (uchar_t *)opt_end)
    346 			goto bad_opt;
    347 		/*
    348 		 * We now compute pointer to next option in buffer 'next_opt'
    349 		 * The next_opt computation above below 'opt->len' initialized
    350 		 * by application which cannot be trusted. The usual value
    351 		 * too large will be captured by the loop termination condition
    352 		 * above. We check for the following which it will miss.
    353 		 * 	-pointer space wraparound arithmetic overflow
    354 		 *	-last option in buffer with 'opt->len' being too large
    355 		 *	 (only reason 'next_opt' should equal or exceed
    356 		 *	 'opt_end' for last option is roundup unless length is
    357 		 *	 too-large/invalid)
    358 		 */
    359 		next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
    360 		    _TPI_ALIGN_OPT(opt->len));
    361 
    362 		if ((uchar_t *)next_opt < (uchar_t *)&opt[1] ||
    363 		    ((next_opt >= opt_end) &&
    364 		    (((uchar_t *)next_opt - (uchar_t *)opt_end) >=
    365 		    __TPI_ALIGN_SIZE)))
    366 			goto bad_opt;
    367 
    368 		/* sanity check */
    369 		if (opt->name == T_ALLOPT)
    370 			goto bad_opt;
    371 
    372 		error = proto_opt_check(opt->level, opt->name, opt->len, NULL,
    373 		    opt_arr, opt_arr_cnt,
    374 		    tor->MGMT_flags == T_NEGOTIATE, tor->MGMT_flags == T_CHECK,
    375 		    cr);
    376 		if (error < 0) {
    377 			optcom_err_ack(q, mp, -error, 0);
    378 			return;
    379 		} else if (error > 0) {
    380 			optcom_err_ack(q, mp, TSYSERR, error);
    381 			return;
    382 		}
    383 	} /* end for loop scanning option buffer */
    384 
    385 	/* Now complete the operation as required. */
    386 	switch (tor->MGMT_flags) {
    387 	case T_CHECK:
    388 		/*
    389 		 * Historically used same as T_CURRENT (which was added to
    390 		 * standard later). Code retained for compatibility.
    391 		 */
    392 		/* FALLTHROUGH */
    393 	case T_CURRENT:
    394 		/*
    395 		 * Allocate a maximum size reply.  Perhaps we are supposed to
    396 		 * assume that the input buffer includes space for the answers
    397 		 * as well as the opthdrs, but we don't know that for sure.
    398 		 * So, instead, we create a new output buffer, using the
    399 		 * input buffer only as a list of options.
    400 		 */
    401 		max_optbuf_len = optcom_max_optbuf_len(opt_arr,
    402 		    opt_arr_cnt);
    403 		mp1 = allocb_tmpl(max_optbuf_len, mp);
    404 		if (!mp1)
    405 			goto no_mem;
    406 		/* Initialize the header. */
    407 		mp1->b_datap->db_type = M_PCPROTO;
    408 		mp1->b_wptr = &mp1->b_rptr[sizeof (struct T_optmgmt_ack)];
    409 		toa = (struct T_optmgmt_ack *)mp1->b_rptr;
    410 		toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
    411 		toa->MGMT_flags = tor->MGMT_flags;
    412 		/*
    413 		 * Walk through the input buffer again, this time adding
    414 		 * entries to the output buffer for each option requested.
    415 		 * Note, sanity of option header, last option etc, verified
    416 		 * in first pass.
    417 		 */
    418 		opt1 = (struct opthdr *)&toa[1];
    419 
    420 		for (opt = opt_start; opt < opt_end; opt = next_opt) {
    421 
    422 			next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
    423 			    _TPI_ALIGN_OPT(opt->len));
    424 
    425 			opt1->name = opt->name;
    426 			opt1->level = opt->level;
    427 			len = (*getfn)(q, opt->level,
    428 			    opt->name, (uchar_t *)&opt1[1]);
    429 			/*
    430 			 * Failure means option is not recognized. Copy input
    431 			 * buffer as is
    432 			 */
    433 			if (len < 0) {
    434 				opt1->len = opt->len;
    435 				bcopy(&opt[1], &opt1[1], opt->len);
    436 			} else {
    437 				opt1->len = (t_uscalar_t)len;
    438 			}
    439 			opt1 = (struct opthdr *)((uchar_t *)&opt1[1] +
    440 			    _TPI_ALIGN_OPT(opt1->len));
    441 		} /* end for loop */
    442 
    443 		/* Record the final length. */
    444 		toa->OPT_length = (t_scalar_t)((uchar_t *)opt1 -
    445 		    (uchar_t *)&toa[1]);
    446 		mp1->b_wptr = (uchar_t *)opt1;
    447 		/* Ditch the input buffer. */
    448 		freemsg(mp);
    449 		mp = mp1;
    450 		break;
    451 
    452 	case T_NEGOTIATE:
    453 		/*
    454 		 * Here we are expecting that the response buffer is exactly
    455 		 * the same size as the input buffer.  We pass each opthdr
    456 		 * to the protocol's set function.  If the protocol doesn't
    457 		 * like it, it can update the value in it return argument.
    458 		 */
    459 		/*
    460 		 * Pass each negotiated option through the protocol set
    461 		 * function.
    462 		 * Note: sanity check on option header values done in first
    463 		 * pass and not repeated here.
    464 		 */
    465 		toa = (struct T_optmgmt_ack *)tor;
    466 
    467 		for (opt = opt_start; opt < opt_end; opt = next_opt) {
    468 			int error;
    469 
    470 			next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
    471 			    _TPI_ALIGN_OPT(opt->len));
    472 
    473 			error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE,
    474 			    opt->level, opt->name,
    475 			    opt->len, (uchar_t *)&opt[1],
    476 			    &opt->len, (uchar_t *)&opt[1], NULL, cr);
    477 			/*
    478 			 * Treat positive "errors" as real.
    479 			 * Note: negative errors are to be treated as
    480 			 * non-fatal by svr4_optcom_req() and are
    481 			 * returned by setfn() when it is passed an
    482 			 * option it does not handle. Since the option
    483 			 * passed proto_opt_lookup(), it is implied that
    484 			 * it is valid but was either handled upstream
    485 			 * or will be handled downstream.
    486 			 */
    487 			if (error > 0) {
    488 				optcom_err_ack(q, mp, TSYSERR, error);
    489 				return;
    490 			}
    491 			/*
    492 			 * error < 0 means option is not recognized.
    493 			 */
    494 		}
    495 		break;
    496 	default:
    497 		optcom_err_ack(q, mp, TBADFLAG, 0);
    498 		return;
    499 	}
    500 
    501 	/* Set common fields in the header. */
    502 	toa->MGMT_flags = T_SUCCESS;
    503 	mp->b_datap->db_type = M_PCPROTO;
    504 	toa->PRIM_type = T_OPTMGMT_ACK;
    505 	qreply(q, mp);
    506 	return;
    507 bad_opt:;
    508 	optcom_err_ack(q, mp, TBADOPT, 0);
    509 }
    510 
    511 /*
    512  * New optcom_req inspired by TPI/XTI semantics
    513  */
    514 void
    515 tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp)
    516 {
    517 	t_scalar_t t_error;
    518 	mblk_t *toa_mp;
    519 	size_t toa_len;
    520 	struct T_optmgmt_ack *toa;
    521 	struct T_optmgmt_req *tor =
    522 	    (struct T_optmgmt_req *)mp->b_rptr;
    523 	t_uscalar_t worst_status;
    524 
    525 	/* Verify message integrity. */
    526 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_optmgmt_req)) {
    527 		optcom_err_ack(q, mp, TBADOPT, 0);
    528 		return;
    529 	}
    530 
    531 	/* Verify MGMT_flags legal */
    532 	switch (tor->MGMT_flags) {
    533 	case T_DEFAULT:
    534 	case T_NEGOTIATE:
    535 	case T_CURRENT:
    536 	case T_CHECK:
    537 		/* OK - legal request flags */
    538 		break;
    539 	default:
    540 		optcom_err_ack(q, mp, TBADFLAG, 0);
    541 		return;
    542 	}
    543 
    544 	/*
    545 	 * In this design, there are two passes required on the input buffer
    546 	 * mostly to accomodate variable length options and "T_ALLOPT" option
    547 	 * which has the semantics "all options of the specified level".
    548 	 *
    549 	 * For T_DEFAULT, T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make
    550 	 * a pass through the input buffer validating the details and making
    551 	 * sure each option is supported by the protocol. We also determine the
    552 	 * length of the option buffer to return. (Variable length options and
    553 	 * T_ALLOPT mean that length can be different for output buffer).
    554 	 */
    555 
    556 	toa_len = 0;		/* initial value */
    557 
    558 	/*
    559 	 * First pass, we do the following
    560 	 *	- estimate cumulative length needed for results
    561 	 *	- set "status" field based on permissions, option header check
    562 	 *	  etc.
    563 	 */
    564 	if ((t_error = process_topthdrs_first_pass(mp, cr, dbobjp,
    565 	    &toa_len)) != 0) {
    566 		optcom_err_ack(q, mp, t_error, 0);
    567 		return;
    568 	}
    569 
    570 	/*
    571 	 * A validation phase of the input buffer is done. We have also
    572 	 * obtained the length requirement and and other details about the
    573 	 * input and we liked input buffer so far.  We make another scan
    574 	 * through the input now and generate the output necessary to complete
    575 	 * the operation.
    576 	 */
    577 
    578 	toa_mp = allocb_tmpl(toa_len, mp);
    579 	if (!toa_mp) {
    580 		optcom_err_ack(q, mp, TSYSERR, ENOMEM);
    581 		return;
    582 	}
    583 
    584 	/*
    585 	 * Set initial values for generating output.
    586 	 */
    587 	worst_status = T_SUCCESS; /* initial value */
    588 
    589 	/*
    590 	 * This routine makes another pass through the option buffer this
    591 	 * time acting on the request based on "status" result in the
    592 	 * first pass. It also performs "expansion" of T_ALLOPT into
    593 	 * all options of a certain level and acts on each for this request.
    594 	 */
    595 	if ((t_error = do_options_second_pass(q, mp, toa_mp, cr, dbobjp,
    596 	    &worst_status)) != 0) {
    597 		freemsg(toa_mp);
    598 		optcom_err_ack(q, mp, t_error, 0);
    599 		return;
    600 	}
    601 
    602 	/*
    603 	 * Following code relies on the coincidence that T_optmgmt_req
    604 	 * and T_optmgmt_ack are identical in binary representation
    605 	 */
    606 	toa = (struct T_optmgmt_ack *)toa_mp->b_rptr;
    607 	toa->OPT_length = (t_scalar_t)(toa_mp->b_wptr - (toa_mp->b_rptr +
    608 	    sizeof (struct T_optmgmt_ack)));
    609 	toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
    610 
    611 	toa->MGMT_flags = tor->MGMT_flags;
    612 
    613 	freemsg(mp);		/* free input mblk */
    614 
    615 	toa->PRIM_type = T_OPTMGMT_ACK;
    616 	toa_mp->b_datap->db_type = M_PCPROTO;
    617 	toa->MGMT_flags |= worst_status; /* XXX "worst" or "OR" TPI ? */
    618 	qreply(q, toa_mp);
    619 }
    620 
    621 
    622 /*
    623  * Following routine makes a pass through option buffer in mp and performs the
    624  * following tasks.
    625  *	- estimate cumulative length needed for results
    626  *	- set "status" field based on permissions, option header check
    627  *	  etc.
    628  */
    629 
    630 static t_scalar_t
    631 process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
    632     size_t *toa_lenp)
    633 {
    634 	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
    635 	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
    636 	optlevel_t *valid_level_arr = dbobjp->odb_valid_levels_arr;
    637 	uint_t valid_level_arr_cnt = dbobjp->odb_valid_levels_arr_cnt;
    638 	struct T_opthdr *opt;
    639 	struct T_opthdr *opt_start, *opt_end;
    640 	opdes_t	*optd;
    641 	size_t allopt_len;
    642 	struct T_optmgmt_req *tor =
    643 	    (struct T_optmgmt_req *)mp->b_rptr;
    644 
    645 	*toa_lenp = sizeof (struct T_optmgmt_ack); /* initial value */
    646 
    647 	if ((opt_start = (struct T_opthdr *)
    648 	    mi_offset_param(mp, tor->OPT_offset, tor->OPT_length)) == NULL) {
    649 		return (TBADOPT);
    650 	}
    651 	if (!__TPI_TOPT_ISALIGNED(opt_start))
    652 		return (TBADOPT);
    653 
    654 	opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length);
    655 
    656 	for (opt = opt_start; opt && (opt < opt_end);
    657 	    opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) {
    658 		/*
    659 		 * Validate the option for length and alignment
    660 		 * before accessing anything in it.
    661 		 */
    662 		if (!(_TPI_TOPT_VALID(opt, opt_start, opt_end)))
    663 			return (TBADOPT);
    664 
    665 		/* Find the option in the opt_arr. */
    666 		if (opt->name != T_ALLOPT) {
    667 			optd = proto_opt_lookup(opt->level, opt->name,
    668 			    opt_arr, opt_arr_cnt);
    669 			if (optd == NULL) {
    670 				/*
    671 				 * Option not found
    672 				 *
    673 				 * Verify if level is "valid" or not.
    674 				 * Note: This check is required by XTI
    675 				 *
    676 				 * TPI provider always initializes
    677 				 * the "not supported" (or whatever) status
    678 				 * for the options. Other levels leave status
    679 				 * unchanged if they do not understand an
    680 				 * option.
    681 				 */
    682 				if (!opt_level_valid(opt->level,
    683 				    valid_level_arr, valid_level_arr_cnt))
    684 					return (TBADOPT);
    685 				/*
    686 				 * level is valid - initialize
    687 				 * option as not supported
    688 				 */
    689 				opt->status = T_NOTSUPPORT;
    690 				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
    691 				continue;
    692 			}
    693 		} else {
    694 			/*
    695 			 * Handle T_ALLOPT case as a special case.
    696 			 * Note: T_ALLOPT does not mean anything
    697 			 * for T_CHECK operation.
    698 			 */
    699 			allopt_len = 0;
    700 			if (tor->MGMT_flags == T_CHECK ||
    701 			    ((allopt_len = opt_level_allopts_lengths(opt->level,
    702 			    opt_arr, opt_arr_cnt)) == 0)) {
    703 				/*
    704 				 * This is confusing but correct !
    705 				 * It is not valid to to use T_ALLOPT with
    706 				 * T_CHECK flag.
    707 				 *
    708 				 * opt_level_allopts_lengths() is used to verify
    709 				 * that "level" associated with the T_ALLOPT is
    710 				 * supported.
    711 				 *
    712 				 */
    713 				opt->status = T_FAILURE;
    714 				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
    715 				continue;
    716 			}
    717 			ASSERT(allopt_len != 0); /* remove ? */
    718 
    719 			*toa_lenp += allopt_len;
    720 			opt->status = T_SUCCESS;
    721 			continue;
    722 		}
    723 
    724 		/* Additional checks dependent on operation. */
    725 		switch (tor->MGMT_flags) {
    726 		case T_DEFAULT:
    727 		case T_CURRENT:
    728 
    729 			/*
    730 			 * The proto_opt_lookup() routine call above approved of
    731 			 * this option so we can work on the status for it
    732 			 * based on the permissions for the operation. (This
    733 			 * can override any status for it set at higher levels)
    734 			 * We assume this override is OK since chkfn at this
    735 			 * level approved of this option.
    736 			 *
    737 			 * T_CURRENT semantics:
    738 			 * The read access is required. Else option
    739 			 * status is T_NOTSUPPORT.
    740 			 *
    741 			 * T_DEFAULT semantics:
    742 			 * Note: specification is not clear on this but we
    743 			 * interpret T_DEFAULT semantics such that access to
    744 			 * read value is required for access even the default
    745 			 * value. Otherwise the option status is T_NOTSUPPORT.
    746 			 */
    747 			if (!OA_READ_PERMISSION(optd, cr)) {
    748 				opt->status = T_NOTSUPPORT;
    749 				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
    750 				/* skip to next */
    751 				continue;
    752 			}
    753 
    754 			/*
    755 			 * T_DEFAULT/T_CURRENT semantics:
    756 			 * We know that read access is set. If no other access
    757 			 * is set, then status is T_READONLY.
    758 			 */
    759 			if (OA_READONLY_PERMISSION(optd, cr))
    760 				opt->status = T_READONLY;
    761 			else
    762 				opt->status = T_SUCCESS;
    763 			/*
    764 			 * Option passes all checks. Make room for it in the
    765 			 * ack. Note: size stored in table does not include
    766 			 * space for option header.
    767 			 */
    768 			*toa_lenp += sizeof (struct T_opthdr) +
    769 			    _TPI_ALIGN_TOPT(optd->opdes_size);
    770 			break;
    771 
    772 		case T_CHECK:
    773 		case T_NEGOTIATE:
    774 
    775 			/*
    776 			 * T_NEGOTIATE semantics:
    777 			 * If for fixed length option value on input is not the
    778 			 * same as value supplied, then status is T_FAILURE.
    779 			 *
    780 			 * T_CHECK semantics:
    781 			 * If value is supplied, semantics same as T_NEGOTIATE.
    782 			 * It is however ok not to supply a value with T_CHECK.
    783 			 */
    784 
    785 			if (tor->MGMT_flags == T_NEGOTIATE ||
    786 			    (opt->len != sizeof (struct T_opthdr))) {
    787 				/*
    788 				 * Implies "value" is specified in T_CHECK or
    789 				 * it is a T_NEGOTIATE request.
    790 				 * Verify size.
    791 				 * Note: This can override anything about this
    792 				 * option request done at a higher level.
    793 				 */
    794 				if (opt->len < sizeof (struct T_opthdr) ||
    795 				    !opt_length_ok(optd,
    796 				    opt->len - sizeof (struct T_opthdr))) {
    797 					/* bad size */
    798 					*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
    799 					opt->status = T_FAILURE;
    800 					continue;
    801 				}
    802 			}
    803 			/*
    804 			 * The proto_opt_lookup()  routine above() approved of
    805 			 * this option so we can work on the status for it based
    806 			 * on the permissions for the operation. (This can
    807 			 * override anything set at a higher level).
    808 			 *
    809 			 * T_CHECK/T_NEGOTIATE semantics:
    810 			 * Set status to T_READONLY if read is the only access
    811 			 * permitted
    812 			 */
    813 			if (OA_READONLY_PERMISSION(optd, cr)) {
    814 				opt->status = T_READONLY;
    815 				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
    816 				/* skip to next */
    817 				continue;
    818 			}
    819 
    820 			/*
    821 			 * T_CHECK/T_NEGOTIATE semantics:
    822 			 * If write (or execute) access is not set, then status
    823 			 * is T_NOTSUPPORT.
    824 			 */
    825 			if (!OA_WRITE_OR_EXECUTE(optd, cr)) {
    826 				opt->status = T_NOTSUPPORT;
    827 				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
    828 				/* skip to next option */
    829 				continue;
    830 			}
    831 			/*
    832 			 * Option passes all checks. Make room for it in the
    833 			 * ack and set success in status.
    834 			 * Note: size stored in table does not include header
    835 			 * length.
    836 			 */
    837 			opt->status = T_SUCCESS;
    838 			*toa_lenp += sizeof (struct T_opthdr) +
    839 			    _TPI_ALIGN_TOPT(optd->opdes_size);
    840 			break;
    841 
    842 		default:
    843 			return (TBADFLAG);
    844 		}
    845 	} /* for loop scanning input buffer */
    846 
    847 	return (0);		/* OK return */
    848 }
    849 
    850 /*
    851  * This routine makes another pass through the option buffer this
    852  * time acting on the request based on "status" result in the
    853  * first pass. It also performs "expansion" of T_ALLOPT into
    854  * all options of a certain level and acts on each for this request.
    855  */
    856 static t_scalar_t
    857 do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr,
    858     optdb_obj_t *dbobjp, t_uscalar_t *worst_statusp)
    859 {
    860 	int failed_option;
    861 	struct T_opthdr *opt;
    862 	struct T_opthdr *opt_start, *opt_end;
    863 	uchar_t *optr;
    864 	uint_t optset_context;
    865 	struct T_optmgmt_req *tor = (struct T_optmgmt_req *)reqmp->b_rptr;
    866 
    867 	optr = (uchar_t *)ack_mp->b_rptr +
    868 	    sizeof (struct T_optmgmt_ack); /* assumed int32_t aligned */
    869 
    870 	/*
    871 	 * Set initial values for scanning input
    872 	 */
    873 	opt_start = (struct T_opthdr *)mi_offset_param(reqmp,
    874 	    tor->OPT_offset, tor->OPT_length);
    875 	if (opt_start == NULL)
    876 		return (TBADOPT);
    877 	opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length);
    878 	ASSERT(__TPI_TOPT_ISALIGNED(opt_start)); /* verified in first pass */
    879 
    880 	for (opt = opt_start; opt && (opt < opt_end);
    881 	    opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) {
    882 
    883 		/* verified in first pass */
    884 		ASSERT(_TPI_TOPT_VALID(opt, opt_start, opt_end));
    885 
    886 		/*
    887 		 * If the first pass in process_topthdrs_first_pass()
    888 		 * has marked the option as a failure case for the MGMT_flags
    889 		 * semantics then there is not much to do.
    890 		 *
    891 		 * Note: For all practical purposes, T_READONLY status is
    892 		 * a "success" for T_DEFAULT/T_CURRENT and "failure" for
    893 		 * T_CHECK/T_NEGOTIATE
    894 		 */
    895 		failed_option =
    896 		    (opt->status == T_NOTSUPPORT) ||
    897 		    (opt->status == T_FAILURE) ||
    898 		    ((tor->MGMT_flags & (T_NEGOTIATE|T_CHECK)) &&
    899 		    (opt->status == T_READONLY));
    900 
    901 		if (failed_option) {
    902 			/*
    903 			 * According to T_DEFAULT/T_CURRENT semantics, the
    904 			 * input values, even if present, are to be ignored.
    905 			 * Note: Specification is not clear on this, but we
    906 			 * interpret that even though we ignore the values, we
    907 			 * can return them as is. So we process them similar to
    908 			 * T_CHECK/T_NEGOTIATE case which has the semantics to
    909 			 * return the values as is. XXX If interpretation is
    910 			 * ever determined incorrect fill in appropriate code
    911 			 * here to treat T_DEFAULT/T_CURRENT differently.
    912 			 *
    913 			 * According to T_CHECK/T_NEGOTIATE semantics,
    914 			 * in the case of T_NOTSUPPORT/T_FAILURE/T_READONLY,
    915 			 * the semantics are to return the "value" part of
    916 			 * option untouched. So here we copy the option
    917 			 * head including value part if any to output.
    918 			 */
    919 
    920 			bcopy(opt, optr, opt->len);
    921 			optr += _TPI_ALIGN_TOPT(opt->len);
    922 
    923 			*worst_statusp = get_worst_status(opt->status,
    924 			    *worst_statusp);
    925 
    926 			/* skip to process next option in buffer */
    927 			continue;
    928 
    929 		} /* end if "failed option" */
    930 		/*
    931 		 * The status is T_SUCCESS or T_READONLY
    932 		 * We process the value part here
    933 		 */
    934 		ASSERT(opt->status == T_SUCCESS || opt->status == T_READONLY);
    935 		switch (tor->MGMT_flags) {
    936 		case T_DEFAULT:
    937 			/*
    938 			 * We fill default value from table or protocol specific
    939 			 * function. If this call fails, we pass input through.
    940 			 */
    941 			if (do_opt_default(q, opt, &optr, worst_statusp,
    942 			    cr, dbobjp) < 0) {
    943 				opt->status = T_FAILURE;
    944 				bcopy(opt, optr, opt->len);
    945 				optr += _TPI_ALIGN_TOPT(opt->len);
    946 				*worst_statusp = get_worst_status(opt->status,
    947 				    *worst_statusp);
    948 			}
    949 			break;
    950 
    951 		case T_CURRENT:
    952 
    953 			do_opt_current(q, opt, &optr, worst_statusp, cr,
    954 			    dbobjp);
    955 			break;
    956 
    957 		case T_CHECK:
    958 		case T_NEGOTIATE:
    959 			if (tor->MGMT_flags == T_CHECK)
    960 				optset_context = SETFN_OPTCOM_CHECKONLY;
    961 			else	/* T_NEGOTIATE */
    962 				optset_context = SETFN_OPTCOM_NEGOTIATE;
    963 			do_opt_check_or_negotiate(q, opt, optset_context,
    964 			    &optr, worst_statusp, cr, dbobjp);
    965 			break;
    966 		default:
    967 			return (TBADFLAG);
    968 		}
    969 	} /* end for loop scanning option buffer */
    970 
    971 	ack_mp->b_wptr = optr;
    972 	ASSERT(ack_mp->b_wptr <= ack_mp->b_datap->db_lim);
    973 
    974 	return (0);		/* OK return */
    975 }
    976 
    977 
    978 static t_uscalar_t
    979 get_worst_status(t_uscalar_t status, t_uscalar_t current_worst_status)
    980 {
    981 	/*
    982 	 * Return the "worst" among the arguments "status" and
    983 	 * "current_worst_status".
    984 	 *
    985 	 * Note: Tracking "worst_status" can be made a bit simpler
    986 	 * if we use the property that status codes are bitwise
    987 	 * distinct.
    988 	 *
    989 	 * The pecking order is
    990 	 *
    991 	 * T_SUCCESS ..... best
    992 	 * T_PARTSUCCESS
    993 	 * T_FAILURE
    994 	 * T_READONLY
    995 	 * T_NOTSUPPORT... worst
    996 	 */
    997 	if (status == current_worst_status)
    998 		return (current_worst_status);
    999 	switch (current_worst_status) {
   1000 	case T_SUCCESS:
   1001 		if (status == T_PARTSUCCESS)
   1002 			return (T_PARTSUCCESS);
   1003 		/* FALLTHROUGH */
   1004 	case T_PARTSUCCESS:
   1005 		if (status == T_FAILURE)
   1006 			return (T_FAILURE);
   1007 		/* FALLTHROUGH */
   1008 	case T_FAILURE:
   1009 		if (status == T_READONLY)
   1010 			return (T_READONLY);
   1011 		/* FALLTHROUGH */
   1012 	case T_READONLY:
   1013 		if (status == T_NOTSUPPORT)
   1014 			return (T_NOTSUPPORT);
   1015 		/* FALLTHROUGH */
   1016 	case T_NOTSUPPORT:
   1017 	default:
   1018 		return (current_worst_status);
   1019 	}
   1020 }
   1021 
   1022 static int
   1023 do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
   1024     t_uscalar_t *worst_statusp, cred_t *cr, optdb_obj_t *dbobjp)
   1025 {
   1026 	pfi_t	deffn = dbobjp->odb_deffn;
   1027 	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
   1028 	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
   1029 
   1030 	struct T_opthdr *topth;
   1031 	opdes_t *optd;
   1032 
   1033 	if (reqopt->name != T_ALLOPT) {
   1034 		/*
   1035 		 * lookup the option in the table and fill default value
   1036 		 */
   1037 		optd = proto_opt_lookup(reqopt->level, reqopt->name,
   1038 		    opt_arr, opt_arr_cnt);
   1039 
   1040 		/* Calling routine should have verified it it exists */
   1041 		ASSERT(optd != NULL);
   1042 
   1043 		topth = (struct T_opthdr *)(*resptrp);
   1044 		topth->level = reqopt->level;
   1045 		topth->name = reqopt->name;
   1046 		topth->status = reqopt->status;
   1047 
   1048 		*worst_statusp = get_worst_status(reqopt->status,
   1049 		    *worst_statusp);
   1050 
   1051 		if (optd->opdes_props & OP_NODEFAULT) {
   1052 			/* header only, no default "value" part */
   1053 			topth->len = sizeof (struct T_opthdr);
   1054 			*resptrp += sizeof (struct T_opthdr);
   1055 		} else {
   1056 			int deflen;
   1057 
   1058 			if (optd->opdes_props & OP_DEF_FN) {
   1059 				deflen = (*deffn)(q, reqopt->level,
   1060 				    reqopt->name, _TPI_TOPT_DATA(topth));
   1061 				if (deflen >= 0) {
   1062 					topth->len = (t_uscalar_t)
   1063 					    (sizeof (struct T_opthdr) + deflen);
   1064 				} else {
   1065 					/*
   1066 					 * return error, this should 'pass
   1067 					 * through' the option and maybe some
   1068 					 * other level will fill it in or
   1069 					 * already did.
   1070 					 * (No change in 'resptrp' upto here)
   1071 					 */
   1072 					return (-1);
   1073 				}
   1074 			} else {
   1075 				/* fill length and value part */
   1076 				switch (optd->opdes_size) {
   1077 				/*
   1078 				 * Since options are guaranteed aligned only
   1079 				 * on a 4 byte boundary (t_scalar_t) any
   1080 				 * option that is greater in size will default
   1081 				 * to the bcopy below
   1082 				 */
   1083 				case sizeof (int32_t):
   1084 					*(int32_t *)_TPI_TOPT_DATA(topth) =
   1085 					    (int32_t)optd->opdes_default;
   1086 					break;
   1087 				case sizeof (int16_t):
   1088 					*(int16_t *)_TPI_TOPT_DATA(topth) =
   1089 					    (int16_t)optd->opdes_default;
   1090 					break;
   1091 				case sizeof (int8_t):
   1092 					*(int8_t *)_TPI_TOPT_DATA(topth) =
   1093 					    (int8_t)optd->opdes_default;
   1094 					break;
   1095 				default:
   1096 					/*
   1097 					 * other length but still assume
   1098 					 * fixed - use bcopy
   1099 					 */
   1100 					bcopy(optd->opdes_defbuf,
   1101 					    _TPI_TOPT_DATA(topth),
   1102 					    optd->opdes_size);
   1103 					break;
   1104 				}
   1105 				topth->len = (t_uscalar_t)(optd->opdes_size +
   1106 				    sizeof (struct T_opthdr));
   1107 			}
   1108 			*resptrp += _TPI_ALIGN_TOPT(topth->len);
   1109 		}
   1110 		return (0);	/* OK return */
   1111 	}
   1112 
   1113 	/*
   1114 	 * T_ALLOPT processing
   1115 	 *
   1116 	 * lookup and stuff default values of all the options of the
   1117 	 * level specified
   1118 	 */
   1119 	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
   1120 		if (reqopt->level != optd->opdes_level)
   1121 			continue;
   1122 		/*
   1123 		 *
   1124 		 * T_DEFAULT semantics:
   1125 		 * XXX: we interpret T_DEFAULT semantics such that access to
   1126 		 * read value is required for access even the default value.
   1127 		 * Else option is ignored for T_ALLOPT request.
   1128 		 */
   1129 		if (!OA_READ_PERMISSION(optd, cr))
   1130 			/* skip this one */
   1131 			continue;
   1132 
   1133 		/*
   1134 		 * Found option of same level as T_ALLOPT request
   1135 		 * that we can return.
   1136 		 */
   1137 
   1138 		topth = (struct T_opthdr *)(*resptrp);
   1139 		topth->level = optd->opdes_level;
   1140 		topth->name = optd->opdes_name;
   1141 
   1142 		/*
   1143 		 * T_DEFAULT semantics:
   1144 		 * We know that read access is set. If no other access is set,
   1145 		 * then status is T_READONLY
   1146 		 */
   1147 		if (OA_READONLY_PERMISSION(optd, cr)) {
   1148 			topth->status = T_READONLY;
   1149 			*worst_statusp = get_worst_status(T_READONLY,
   1150 			    *worst_statusp);
   1151 		} else {
   1152 			topth->status = T_SUCCESS;
   1153 			/*
   1154 			 * Note: *worst_statusp has to be T_SUCCESS or
   1155 			 * worse so no need to adjust
   1156 			 */
   1157 		}
   1158 
   1159 		if (optd->opdes_props & OP_NODEFAULT) {
   1160 			/* header only, no value part */
   1161 			topth->len = sizeof (struct T_opthdr);
   1162 			*resptrp += sizeof (struct T_opthdr);
   1163 		} else {
   1164 			int deflen;
   1165 
   1166 			if (optd->opdes_props & OP_DEF_FN) {
   1167 				deflen = (*deffn)(q, reqopt->level,
   1168 				    reqopt->name, _TPI_TOPT_DATA(topth));
   1169 				if (deflen >= 0) {
   1170 					topth->len = (t_uscalar_t)(deflen +
   1171 					    sizeof (struct T_opthdr));
   1172 				} else {
   1173 					/*
   1174 					 * deffn failed.
   1175 					 * return just the header as T_ALLOPT
   1176 					 * expansion.
   1177 					 * Some other level deffn may
   1178 					 * supply value part.
   1179 					 */
   1180 					topth->len = sizeof (struct T_opthdr);
   1181 					topth->status = T_FAILURE;
   1182 					*worst_statusp =
   1183 					    get_worst_status(T_FAILURE,
   1184 					    *worst_statusp);
   1185 				}
   1186 			} else {
   1187 				/*
   1188 				 * fill length and value part from
   1189 				 * table
   1190 				 */
   1191 				switch (optd->opdes_size) {
   1192 				/*
   1193 				 * Since options are guaranteed aligned only
   1194 				 * on a 4 byte boundary (t_scalar_t) any
   1195 				 * option that is greater in size will default
   1196 				 * to the bcopy below
   1197 				 */
   1198 				case sizeof (int32_t):
   1199 					*(int32_t *)_TPI_TOPT_DATA(topth) =
   1200 					    (int32_t)optd->opdes_default;
   1201 					break;
   1202 				case sizeof (int16_t):
   1203 					*(int16_t *)_TPI_TOPT_DATA(topth) =
   1204 					    (int16_t)optd->opdes_default;
   1205 					break;
   1206 				case sizeof (int8_t):
   1207 					*(int8_t *)_TPI_TOPT_DATA(topth) =
   1208 					    (int8_t)optd->opdes_default;
   1209 					break;
   1210 				default:
   1211 					/*
   1212 					 * other length but still assume
   1213 					 * fixed - use bcopy
   1214 					 */
   1215 					bcopy(optd->opdes_defbuf,
   1216 					    _TPI_TOPT_DATA(topth),
   1217 					    optd->opdes_size);
   1218 				}
   1219 				topth->len = (t_uscalar_t)(optd->opdes_size +
   1220 				    sizeof (struct T_opthdr));
   1221 			}
   1222 			*resptrp += _TPI_ALIGN_TOPT(topth->len);
   1223 		}
   1224 	}
   1225 	return (0);
   1226 }
   1227 
   1228 static void
   1229 do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
   1230     t_uscalar_t *worst_statusp, cred_t *cr, optdb_obj_t *dbobjp)
   1231 {
   1232 	pfi_t	getfn = dbobjp->odb_getfn;
   1233 	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
   1234 	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
   1235 	struct T_opthdr *topth;
   1236 	opdes_t *optd;
   1237 	int optlen;
   1238 	uchar_t *initptr = *resptrp;
   1239 
   1240 	/*
   1241 	 * We call getfn to get the current value of an option. The call may
   1242 	 * fail in which case we copy the values from the input buffer. Maybe
   1243 	 * something downstream will fill it in or something upstream did.
   1244 	 */
   1245 
   1246 	if (reqopt->name != T_ALLOPT) {
   1247 		topth = (struct T_opthdr *)*resptrp;
   1248 		*resptrp += sizeof (struct T_opthdr);
   1249 		optlen = (*getfn)(q, reqopt->level, reqopt->name, *resptrp);
   1250 		if (optlen >= 0) {
   1251 			topth->len = (t_uscalar_t)(optlen +
   1252 			    sizeof (struct T_opthdr));
   1253 			topth->level = reqopt->level;
   1254 			topth->name = reqopt->name;
   1255 			topth->status = reqopt->status;
   1256 			*resptrp += _TPI_ALIGN_TOPT(optlen);
   1257 			*worst_statusp = get_worst_status(topth->status,
   1258 			    *worst_statusp);
   1259 		} else {
   1260 			/* failed - reset "*resptrp" pointer */
   1261 			*resptrp -= sizeof (struct T_opthdr);
   1262 		}
   1263 	} else {		/* T_ALLOPT processing */
   1264 		/* scan and get all options */
   1265 		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
   1266 			/* skip other levels */
   1267 			if (reqopt->level != optd->opdes_level)
   1268 				continue;
   1269 
   1270 			if (!OA_READ_PERMISSION(optd, cr))
   1271 				/* skip this one */
   1272 				continue;
   1273 
   1274 			topth = (struct T_opthdr *)*resptrp;
   1275 			*resptrp += sizeof (struct T_opthdr);
   1276 
   1277 			/* get option of this level */
   1278 			optlen = (*getfn)(q, reqopt->level, optd->opdes_name,
   1279 			    *resptrp);
   1280 			if (optlen >= 0) {
   1281 				/* success */
   1282 				topth->len = (t_uscalar_t)(optlen +
   1283 				    sizeof (struct T_opthdr));
   1284 				topth->level = reqopt->level;
   1285 				topth->name = optd->opdes_name;
   1286 				if (OA_READONLY_PERMISSION(optd, cr))
   1287 					topth->status = T_READONLY;
   1288 				else
   1289 					topth->status = T_SUCCESS;
   1290 				*resptrp += _TPI_ALIGN_TOPT(optlen);
   1291 			} else {
   1292 				/*
   1293 				 * failed, return as T_FAILURE and null value
   1294 				 * part. Maybe something downstream will
   1295 				 * handle this one and fill in a value. Here
   1296 				 * it is just part of T_ALLOPT expansion.
   1297 				 */
   1298 				topth->len = sizeof (struct T_opthdr);
   1299 				topth->level = reqopt->level;
   1300 				topth->name = optd->opdes_name;
   1301 				topth->status = T_FAILURE;
   1302 			}
   1303 			*worst_statusp = get_worst_status(topth->status,
   1304 			    *worst_statusp);
   1305 		} /* end for loop */
   1306 	}
   1307 	if (*resptrp == initptr) {
   1308 		/*
   1309 		 * getfn failed and does not want to handle this option.
   1310 		 */
   1311 		reqopt->status = T_FAILURE;
   1312 		bcopy(reqopt, *resptrp, reqopt->len);
   1313 		*resptrp += _TPI_ALIGN_TOPT(reqopt->len);
   1314 		*worst_statusp = get_worst_status(reqopt->status,
   1315 		    *worst_statusp);
   1316 	}
   1317 }
   1318 
   1319 static void
   1320 do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
   1321     uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
   1322     cred_t *cr, optdb_obj_t *dbobjp)
   1323 {
   1324 	pfi_t	deffn = dbobjp->odb_deffn;
   1325 	opt_set_fn setfn = dbobjp->odb_setfn;
   1326 	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
   1327 	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
   1328 	struct T_opthdr *topth;
   1329 	opdes_t *optd;
   1330 	int error;
   1331 	t_uscalar_t optlen;
   1332 	t_scalar_t optsize;
   1333 	uchar_t *initptr = *resptrp;
   1334 
   1335 	ASSERT(reqopt->status == T_SUCCESS);
   1336 
   1337 	if (reqopt->name != T_ALLOPT) {
   1338 		topth = (struct T_opthdr *)*resptrp;
   1339 		*resptrp += sizeof (struct T_opthdr);
   1340 		error = (*setfn)(q, optset_context, reqopt->level, reqopt->name,
   1341 		    reqopt->len - sizeof (struct T_opthdr),
   1342 		    _TPI_TOPT_DATA(reqopt), &optlen, _TPI_TOPT_DATA(topth),
   1343 		    NULL, cr);
   1344 		if (error) {
   1345 			/* failed - reset "*resptrp" */
   1346 			*resptrp -= sizeof (struct T_opthdr);
   1347 		} else {
   1348 			/*
   1349 			 * success - "value" already filled in setfn()
   1350 			 */
   1351 			topth->len = (t_uscalar_t)(optlen +
   1352 			    sizeof (struct T_opthdr));
   1353 			topth->level = reqopt->level;
   1354 			topth->name = reqopt->name;
   1355 			topth->status = reqopt->status;
   1356 			*resptrp += _TPI_ALIGN_TOPT(optlen);
   1357 			*worst_statusp = get_worst_status(topth->status,
   1358 			    *worst_statusp);
   1359 		}
   1360 	} else {		/* T_ALLOPT processing */
   1361 		/* only for T_NEGOTIATE case */
   1362 		ASSERT(optset_context == SETFN_OPTCOM_NEGOTIATE);
   1363 
   1364 		/* scan and set all options to default value */
   1365 		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
   1366 
   1367 			/* skip other levels */
   1368 			if (reqopt->level != optd->opdes_level)
   1369 				continue;
   1370 
   1371 			if (OA_EXECUTE_PERMISSION(optd, cr) ||
   1372 			    OA_NO_PERMISSION(optd, cr)) {
   1373 				/*
   1374 				 * skip this one too. Does not make sense to
   1375 				 * set anything to default value for "execute"
   1376 				 * options.
   1377 				 */
   1378 				continue;
   1379 			}
   1380 
   1381 			if (OA_READONLY_PERMISSION(optd, cr)) {
   1382 				/*
   1383 				 * Return with T_READONLY status (and no value
   1384 				 * part). Note: spec is not clear but
   1385 				 * XTI test suite needs this.
   1386 				 */
   1387 				topth = (struct T_opthdr *)*resptrp;
   1388 				topth->len = sizeof (struct T_opthdr);
   1389 				*resptrp += topth->len;
   1390 				topth->level = reqopt->level;
   1391 				topth->name = optd->opdes_name;
   1392 				topth->status = T_READONLY;
   1393 				*worst_statusp = get_worst_status(topth->status,
   1394 				    *worst_statusp);
   1395 				continue;
   1396 			}
   1397 
   1398 			/*
   1399 			 * It is not read only or execute type
   1400 			 * the it must have write permission
   1401 			 */
   1402 			ASSERT(OA_WRITE_PERMISSION(optd, cr));
   1403 
   1404 			topth = (struct T_opthdr *)*resptrp;
   1405 			*resptrp += sizeof (struct T_opthdr);
   1406 
   1407 			topth->len = sizeof (struct T_opthdr);
   1408 			topth->level = reqopt->level;
   1409 			topth->name = optd->opdes_name;
   1410 			if (optd->opdes_props & OP_NODEFAULT) {
   1411 				/*
   1412 				 * Option of "no default value" so it does not
   1413 				 * make sense to try to set it. We just return
   1414 				 * header with status of T_SUCCESS
   1415 				 * XXX should this be failure ?
   1416 				 */
   1417 				topth->status = T_SUCCESS;
   1418 				continue; /* skip setting */
   1419 			}
   1420 			if (optd->opdes_props & OP_DEF_FN) {
   1421 				if ((optd->opdes_props & OP_VARLEN) ||
   1422 				    ((optsize = (*deffn)(q, reqopt->level,
   1423 				    optd->opdes_name,
   1424 				    (uchar_t *)optd->opdes_defbuf)) < 0)) {
   1425 					/* XXX - skip these too */
   1426 					topth->status = T_SUCCESS;
   1427 					continue; /* skip setting */
   1428 				}
   1429 			} else {
   1430 				optsize = optd->opdes_size;
   1431 			}
   1432 
   1433 
   1434 			/* set option of this level */
   1435 			error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE,
   1436 			    reqopt->level, optd->opdes_name, optsize,
   1437 			    (uchar_t *)optd->opdes_defbuf, &optlen,
   1438 			    _TPI_TOPT_DATA(topth), NULL, cr);
   1439 			if (error) {
   1440 				/*
   1441 				 * failed, return as T_FAILURE and null value
   1442 				 * part. Maybe something downstream will
   1443 				 * handle this one and fill in a value. Here
   1444 				 * it is just part of T_ALLOPT expansion.
   1445 				 */
   1446 				topth->status = T_FAILURE;
   1447 				*worst_statusp = get_worst_status(topth->status,
   1448 				    *worst_statusp);
   1449 			} else {
   1450 				/* success */
   1451 				topth->len += optlen;
   1452 				topth->status = T_SUCCESS;
   1453 				*resptrp += _TPI_ALIGN_TOPT(optlen);
   1454 			}
   1455 		} /* end for loop */
   1456 		/* END T_ALLOPT */
   1457 	}
   1458 
   1459 	if (*resptrp == initptr) {
   1460 		/*
   1461 		 * setfn failed and does not want to handle this option.
   1462 		 */
   1463 		reqopt->status = T_FAILURE;
   1464 		bcopy(reqopt, *resptrp, reqopt->len);
   1465 		*resptrp += _TPI_ALIGN_TOPT(reqopt->len);
   1466 		*worst_statusp = get_worst_status(reqopt->status,
   1467 		    *worst_statusp);
   1468 	}
   1469 }
   1470 
   1471 /*
   1472  * The following routines process options buffer passed with
   1473  * T_CONN_REQ, T_CONN_RES and T_UNITDATA_REQ.
   1474  * This routine does the consistency check applied to the
   1475  * sanity of formatting of multiple options packed in the
   1476  * buffer.
   1477  *
   1478  * XTI brain damage alert:
   1479  * XTI interface adopts the notion of an option being an
   1480  * "absolute requirement" from OSI transport service (but applies
   1481  * it to all transports including Internet transports).
   1482  * The main effect of that is action on failure to "negotiate" a
   1483  * requested option to the exact requested value
   1484  *
   1485  *          - if the option is an "absolute requirement", the primitive
   1486  *            is aborted (e.g T_DISCON_REQ or T_UDERR generated)
   1487  *          - if the option is NOT and "absolute requirement" it can
   1488  *            just be ignored.
   1489  *
   1490  * We would not support "negotiating" of options on connection
   1491  * primitives for Internet transports. However just in case we
   1492  * forced to in order to pass strange test suites, the design here
   1493  * tries to support these notions.
   1494  *
   1495  * tpi_optcom_buf(q, mp, opt_lenp, opt_offset, cred, dbobjp, thisdg_attrs,
   1496  *	*is_absreq_failurep)
   1497  *
   1498  * - Verify the option buffer, if formatted badly, return error 1
   1499  *
   1500  * - If it is a "permissions" failure (read-only), return error 2
   1501  *
   1502  * - Else, process the option "in place", the following can happen,
   1503  *	     - if a "privileged" option, mark it as "ignored".
   1504  *	     - if "not supported", mark "ignored"
   1505  *	     - if "supported" attempt negotiation and fill result in
   1506  *	       the outcome
   1507  *			- if "absolute requirement", set "*is_absreq_failurep"
   1508  *			- if NOT an "absolute requirement", then our
   1509  *			  interpretation is to mark is at ignored if
   1510  *			  negotiation fails (Spec allows partial success
   1511  *			  as in OSI protocols but not failure)
   1512  *
   1513  *   Then delete "ignored" options from option buffer and return success.
   1514  *
   1515  */
   1516 int
   1517 tpi_optcom_buf(queue_t *q, mblk_t *mp, t_scalar_t *opt_lenp,
   1518     t_scalar_t opt_offset, cred_t *cr, optdb_obj_t *dbobjp,
   1519     void *thisdg_attrs, int *is_absreq_failurep)
   1520 {
   1521 	opt_set_fn setfn = dbobjp->odb_setfn;
   1522 	opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
   1523 	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
   1524 	struct T_opthdr *opt, *opt_start, *opt_end;
   1525 	mblk_t  *copy_mp_head;
   1526 	uchar_t *optr, *init_optr;
   1527 	opdes_t *optd;
   1528 	uint_t optset_context;
   1529 	t_uscalar_t olen;
   1530 	int error = 0;
   1531 
   1532 	ASSERT((uchar_t *)opt_lenp > mp->b_rptr &&
   1533 	    (uchar_t *)opt_lenp < mp->b_wptr);
   1534 
   1535 	copy_mp_head = NULL;
   1536 	*is_absreq_failurep = 0;
   1537 	switch (((union T_primitives *)mp->b_rptr)->type) {
   1538 	case T_CONN_REQ:
   1539 	case T_CONN_RES:
   1540 		optset_context = SETFN_CONN_NEGOTIATE;
   1541 		break;
   1542 	case T_UNITDATA_REQ:
   1543 		optset_context = SETFN_UD_NEGOTIATE;
   1544 		break;
   1545 	default:
   1546 		/*
   1547 		 * should never get here, all possible TPI primitives
   1548 		 * where this can be called from should be accounted
   1549 		 * for in the cases above
   1550 		 */
   1551 		return (EINVAL);
   1552 	}
   1553 
   1554 	if ((opt_start = (struct T_opthdr *)
   1555 	    mi_offset_param(mp, opt_offset, *opt_lenp)) == NULL) {
   1556 		error = ENOPROTOOPT;
   1557 		goto error_ret;
   1558 	}
   1559 	if (!__TPI_TOPT_ISALIGNED(opt_start)) {
   1560 		error = ENOPROTOOPT;
   1561 		goto error_ret;
   1562 	}
   1563 
   1564 	opt_end = (struct T_opthdr *)((uchar_t *)opt_start
   1565 	    + *opt_lenp);
   1566 
   1567 	if ((copy_mp_head = copyb(mp)) == (mblk_t *)NULL) {
   1568 		error = ENOMEM;
   1569 		goto error_ret;
   1570 	}
   1571 
   1572 	init_optr = optr = (uchar_t *)&copy_mp_head->b_rptr[opt_offset];
   1573 
   1574 	for (opt = opt_start; opt && (opt < opt_end);
   1575 	    opt = _TPI_TOPT_NEXTHDR(opt_start, *opt_lenp, opt)) {
   1576 		/*
   1577 		 * Validate the option for length and alignment
   1578 		 * before accessing anything in it
   1579 		 */
   1580 		if (!_TPI_TOPT_VALID(opt, opt_start, opt_end)) {
   1581 			error = ENOPROTOOPT;
   1582 			goto error_ret;
   1583 		}
   1584 
   1585 		/* Find the option in the opt_arr. */
   1586 		optd = proto_opt_lookup(opt->level, opt->name,
   1587 		    opt_arr, opt_arr_cnt);
   1588 
   1589 		if (optd == NULL) {
   1590 			/*
   1591 			 * Option not found
   1592 			 */
   1593 			opt->status = T_NOTSUPPORT;
   1594 			continue;
   1595 		}
   1596 
   1597 		/*
   1598 		 * Weird but as in XTI spec.
   1599 		 * Sec 6.3.6 "Privileged and ReadOnly Options"
   1600 		 * Permission problems (e.g.readonly) fail with bad access
   1601 		 * BUT "privileged" option request from those NOT PRIVILEGED
   1602 		 * are to be merely "ignored".
   1603 		 * XXX Prevents "probing" of privileged options ?
   1604 		 */
   1605 		if (OA_READONLY_PERMISSION(optd, cr)) {
   1606 			error = EACCES;
   1607 			goto error_ret;
   1608 		}
   1609 		if (OA_MATCHED_PRIV(optd, cr)) {
   1610 			/*
   1611 			 * For privileged options, we DO perform
   1612 			 * access checks as is common sense
   1613 			 */
   1614 			if (!OA_WX_ANYPRIV(optd)) {
   1615 				error = EACCES;
   1616 				goto error_ret;
   1617 			}
   1618 		} else {
   1619 			/*
   1620 			 * For non privileged, we fail instead following
   1621 			 * "ignore" semantics dictated by XTI spec for
   1622 			 * permissions problems.
   1623 			 * Sec 6.3.6 "Privileged and ReadOnly Options"
   1624 			 * XXX Should we do "ignore" semantics ?
   1625 			 */
   1626 			if (!OA_WX_NOPRIV(optd)) { /* nopriv */
   1627 				opt->status = T_FAILURE;
   1628 				continue;
   1629 			}
   1630 		}
   1631 		/*
   1632 		 *
   1633 		 * If the negotiation fails, for options that
   1634 		 * are "absolute requirement", it is a fatal error.
   1635 		 * For options that are NOT "absolute requirements",
   1636 		 * and the value fails to negotiate, the XTI spec
   1637 		 * only considers the possibility of partial success
   1638 		 * (T_PARTSUCCES - not likely for Internet protocols).
   1639 		 * The spec is in denial about complete failure
   1640 		 * (T_FAILURE) to negotiate for options that are
   1641 		 * carried on T_CONN_REQ/T_CONN_RES/T_UNITDATA
   1642 		 * We interpret the T_FAILURE to negotiate an option
   1643 		 * that is NOT an absolute requirement that it is safe
   1644 		 * to ignore it.
   1645 		 */
   1646 
   1647 		/* verify length */
   1648 		if (opt->len < (t_uscalar_t)sizeof (struct T_opthdr) ||
   1649 		    !opt_length_ok(optd, opt->len - sizeof (struct T_opthdr))) {
   1650 			/* bad size */
   1651 			if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) {
   1652 				/* option is absolute requirement */
   1653 				*is_absreq_failurep = 1;
   1654 				error = EINVAL;
   1655 				goto error_ret;
   1656 			}
   1657 			opt->status = T_FAILURE;
   1658 			continue;
   1659 		}
   1660 
   1661 		/*
   1662 		 * verified generic attributes. Now call set function.
   1663 		 * Note: We assume the following to simplify code.
   1664 		 * XXX If this is found not to be valid, this routine
   1665 		 * will need to be rewritten. At this point it would
   1666 		 * be premature to introduce more complexity than is
   1667 		 * needed.
   1668 		 * Assumption: For variable length options, we assume
   1669 		 * that the value returned will be same or less length
   1670 		 * (size does not increase). This makes it OK to pass the
   1671 		 * same space for output as it is on input.
   1672 		 */
   1673 
   1674 		error = (*setfn)(q, optset_context, opt->level, opt->name,
   1675 		    opt->len - (t_uscalar_t)sizeof (struct T_opthdr),
   1676 		    _TPI_TOPT_DATA(opt), &olen, _TPI_TOPT_DATA(opt),
   1677 		    thisdg_attrs, cr);
   1678 
   1679 		if (olen > (int)(opt->len - sizeof (struct T_opthdr))) {
   1680 			/*
   1681 			 * Space on output more than space on input. Should
   1682 			 * not happen and we consider it a bug/error.
   1683 			 * More of a restriction than an error in our
   1684 			 * implementation. Will see if we can live with this
   1685 			 * otherwise code will get more hairy with multiple
   1686 			 * passes.
   1687 			 */
   1688 			error = EINVAL;
   1689 			goto error_ret;
   1690 		}
   1691 		if (error != 0) {
   1692 			if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) {
   1693 				/* option is absolute requirement. */
   1694 				*is_absreq_failurep = 1;
   1695 				goto error_ret;
   1696 			}
   1697 			/*
   1698 			 * failed - but option "not an absolute
   1699 			 * requirement"
   1700 			 */
   1701 			opt->status = T_FAILURE;
   1702 			continue;
   1703 		}
   1704 		/*
   1705 		 * Fill in the only possible successful result
   1706 		 * (Note: TPI allows for T_PARTSUCCESS - partial
   1707 		 * sucess result code which is relevant in OSI world
   1708 		 * and not possible in Internet code)
   1709 		 */
   1710 		opt->status = T_SUCCESS;
   1711 
   1712 		/*
   1713 		 * Add T_SUCCESS result code options to the "output" options.
   1714 		 * No T_FAILURES or T_NOTSUPPORT here as they are to be
   1715 		 * ignored.
   1716 		 * This code assumes output option buffer will
   1717 		 * be <= input option buffer.
   1718 		 *
   1719 		 * Copy option header+value
   1720 		 */
   1721 		bcopy(opt, optr, opt->len);
   1722 		optr +=  _TPI_ALIGN_TOPT(opt->len);
   1723 	}
   1724 	/*
   1725 	 * Overwrite the input mblk option buffer now with the output
   1726 	 * and update length, and contents in original mbl
   1727 	 * (offset remains unchanged).
   1728 	 */
   1729 	*opt_lenp = (t_scalar_t)(optr - init_optr);
   1730 	if (*opt_lenp > 0) {
   1731 		bcopy(init_optr, opt_start, *opt_lenp);
   1732 	}
   1733 
   1734 error_ret:
   1735 	if (copy_mp_head != NULL)
   1736 		freeb(copy_mp_head);
   1737 	return (error);
   1738 }
   1739 
   1740 static boolean_t
   1741 opt_level_valid(t_uscalar_t level, optlevel_t *valid_level_arr,
   1742     uint_t valid_level_arr_cnt)
   1743 {
   1744 	optlevel_t		*olp;
   1745 
   1746 	for (olp = valid_level_arr;
   1747 	    olp < &valid_level_arr[valid_level_arr_cnt];
   1748 	    olp++) {
   1749 		if (level == (uint_t)(*olp))
   1750 			return (B_TRUE);
   1751 	}
   1752 	return (B_FALSE);
   1753 }
   1754 
   1755 
   1756 /*
   1757  * Compute largest possible size for an option buffer containing
   1758  * all options in one buffer.
   1759  *
   1760  * XXX TBD, investigate use of opt_bloated_maxsize() to avoid
   1761  *     wastefully large buffer allocation.
   1762  */
   1763 static size_t
   1764 opt_level_allopts_lengths(t_uscalar_t level, opdes_t *opt_arr,
   1765     uint_t opt_arr_cnt)
   1766 {
   1767 	opdes_t		*optd;
   1768 	size_t allopt_len = 0;	/* 0 implies no option at this level */
   1769 
   1770 	/*
   1771 	 * Scan opt_arr computing aggregate length
   1772 	 * requirement for storing values of all
   1773 	 * options.
   1774 	 * Note: we do not filter for permissions
   1775 	 * etc. This will be >= the real aggregate
   1776 	 * length required (upper bound).
   1777 	 */
   1778 
   1779 	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt];
   1780 	    optd++) {
   1781 		if (level == optd->opdes_level) {
   1782 			allopt_len += sizeof (struct T_opthdr) +
   1783 			    _TPI_ALIGN_TOPT(optd->opdes_size);
   1784 		}
   1785 	}
   1786 	return (allopt_len);	/* 0 implies level not found */
   1787 }
   1788 
   1789 /*
   1790  * Compute largest possible size for an option buffer containing
   1791  * all options in one buffer - a (theoretical?) worst case scenario
   1792  * for certain cases.
   1793  */
   1794 t_uscalar_t
   1795 optcom_max_optbuf_len(opdes_t *opt_arr, uint_t opt_arr_cnt)
   1796 {
   1797 	t_uscalar_t max_optbuf_len = sizeof (struct T_info_ack);
   1798 	opdes_t		*optd;
   1799 
   1800 	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
   1801 		max_optbuf_len += (t_uscalar_t)sizeof (struct T_opthdr) +
   1802 		    (t_uscalar_t)_TPI_ALIGN_TOPT(optd->opdes_size);
   1803 	}
   1804 	return (max_optbuf_len);
   1805 }
   1806 
   1807 /*
   1808  * Compute largest possible size for OPT_size for a transport.
   1809  * Heuristic used is to add all but certain extremely large
   1810  * size options; this is done by calling opt_bloated_maxsize().
   1811  * It affects user level allocations in TLI/XTI code using t_alloc()
   1812  * and other TLI/XTI implementation instance strucutures.
   1813  * The large size options excluded are presumed to be
   1814  * never accessed through the (theoretical?) worst case code paths
   1815  * through TLI/XTI as they are currently IPv6 specific options.
   1816  */
   1817 
   1818 t_uscalar_t
   1819 optcom_max_optsize(opdes_t *opt_arr, uint_t opt_arr_cnt)
   1820 {
   1821 	t_uscalar_t max_optbuf_len = sizeof (struct T_info_ack);
   1822 	opdes_t		*optd;
   1823 
   1824 	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
   1825 		if (!opt_bloated_maxsize(optd)) {
   1826 			max_optbuf_len +=
   1827 			    (t_uscalar_t)sizeof (struct T_opthdr) +
   1828 			    (t_uscalar_t)_TPI_ALIGN_TOPT(optd->opdes_size);
   1829 		}
   1830 	}
   1831 	return (max_optbuf_len);
   1832 }
   1833 
   1834 /*
   1835  * The theoretical model used in optcom_max_optsize() and
   1836  * opt_level_allopts_lengths() accounts for the worst case of all
   1837  * possible options for the theoretical cases and results in wasteful
   1838  * memory allocations for certain theoretically correct usage scenarios.
   1839  * In practice, the "features" they support are rarely, if ever,
   1840  * used and even then only by test suites for those features (VSU, VST).
   1841  * However, they result in large allocations due to the increased transport
   1842  * T_INFO_ACK OPT_size field affecting t_alloc() users and TLI/XTI library
   1843  * instance data structures for applications.
   1844  *
   1845  * The following routine opt_bloated_maxsize() supports a hack that avoids
   1846  * paying the tax for the bloated options by excluding them and pretending
   1847  * they don't exist for certain features without affecting features that
   1848  * do use them.
   1849  *
   1850  * XXX Currently implemented only for optcom_max_optsize()
   1851  *     (to reduce risk late in release).
   1852  *     TBD for future, investigate use in optcom_level_allopts_lengths() and
   1853  *     all the instances of T_ALLOPT processing to exclude "bloated options".
   1854  *     Will not affect VSU/VST tests as they do not test with IPPROTO_IPV6
   1855  *     level options which are the only ones that fit the "bloated maxsize"
   1856  *     option profile now.
   1857  */
   1858 static boolean_t
   1859 opt_bloated_maxsize(opdes_t *optd)
   1860 {
   1861 	if (optd->opdes_level != IPPROTO_IPV6)
   1862 		return (B_FALSE);
   1863 	switch (optd->opdes_name) {
   1864 	case IPV6_HOPOPTS:
   1865 	case IPV6_DSTOPTS:
   1866 	case IPV6_RTHDRDSTOPTS:
   1867 	case IPV6_RTHDR:
   1868 	case IPV6_PATHMTU:
   1869 		return (B_TRUE);
   1870 	default:
   1871 		break;
   1872 	}
   1873 	return (B_FALSE);
   1874 }
   1875 
   1876 /*
   1877  * optlen is the length of the option content
   1878  * Caller should check the optlen is at least sizeof (struct T_opthdr)
   1879  */
   1880 static boolean_t
   1881 opt_length_ok(opdes_t *optd, t_uscalar_t optlen)
   1882 {
   1883 	/*
   1884 	 * Verify length.
   1885 	 * Value specified should match length of fixed length option or be
   1886 	 * less than maxlen of variable length option.
   1887 	 */
   1888 	if (optd->opdes_props & OP_VARLEN) {
   1889 		if (optlen <= optd->opdes_size)
   1890 			return (B_TRUE);
   1891 	} else {
   1892 		/* fixed length option */
   1893 		if (optlen == optd->opdes_size)
   1894 			return (B_TRUE);
   1895 	}
   1896 	return (B_FALSE);
   1897 }
   1898 
   1899 /*
   1900  * This routine manages the allocation and free of the space for
   1901  * an extension header or option. Returns failure if memory
   1902  * can not be allocated.
   1903  */
   1904 int
   1905 optcom_pkt_set(uchar_t *invalp, uint_t inlen,
   1906     uchar_t **optbufp, uint_t *optlenp)
   1907 {
   1908 	uchar_t *optbuf;
   1909 	uchar_t	*optp;
   1910 
   1911 	if (inlen == *optlenp) {
   1912 		/* Unchanged length - no need to reallocate */
   1913 		optp = *optbufp;
   1914 		bcopy(invalp, optp, inlen);
   1915 		return (0);
   1916 	}
   1917 	if (inlen > 0) {
   1918 		/* Allocate new buffer before free */
   1919 		optbuf = kmem_alloc(inlen, KM_NOSLEEP);
   1920 		if (optbuf == NULL)
   1921 			return (ENOMEM);
   1922 	} else {
   1923 		optbuf = NULL;
   1924 	}
   1925 
   1926 	/* Free old buffer */
   1927 	if (*optlenp != 0)
   1928 		kmem_free(*optbufp, *optlenp);
   1929 
   1930 	if (inlen > 0)
   1931 		bcopy(invalp, optbuf, inlen);
   1932 
   1933 	*optbufp = optbuf;
   1934 	*optlenp = inlen;
   1935 	return (0);
   1936 }
   1937 
   1938 int
   1939 process_auxiliary_options(conn_t *connp, void *control, t_uscalar_t controllen,
   1940     void *optbuf, optdb_obj_t *dbobjp, int (*opt_set_fn)(conn_t *,
   1941     uint_t, int, int, uint_t, uchar_t *, uint_t *, uchar_t *, void *, cred_t *),
   1942     cred_t *cr)
   1943 {
   1944 	struct cmsghdr *cmsg;
   1945 	opdes_t *optd;
   1946 	t_uscalar_t outlen;
   1947 	int error = EOPNOTSUPP;
   1948 	t_uscalar_t len;
   1949 	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
   1950 	opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
   1951 
   1952 	for (cmsg = (struct cmsghdr *)control;
   1953 	    CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
   1954 	    cmsg = CMSG_NEXT(cmsg)) {
   1955 
   1956 		len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
   1957 		/* Find the option in the opt_arr. */
   1958 		optd = proto_opt_lookup(cmsg->cmsg_level, cmsg->cmsg_type,
   1959 		    opt_arr, opt_arr_cnt);
   1960 		if (optd == NULL) {
   1961 			return (EINVAL);
   1962 		}
   1963 		if (OA_READONLY_PERMISSION(optd, cr)) {
   1964 			return (EACCES);
   1965 		}
   1966 		if (OA_MATCHED_PRIV(optd, cr)) {
   1967 			/*
   1968 			 * For privileged options, we DO perform
   1969 			 * access checks as is common sense
   1970 			 */
   1971 			if (!OA_WX_ANYPRIV(optd)) {
   1972 				return (EACCES);
   1973 			}
   1974 		} else {
   1975 			/*
   1976 			 * For non privileged, we fail instead following
   1977 			 * "ignore" semantics dictated by XTI spec for
   1978 			 * permissions problems.
   1979 			 */
   1980 			if (!OA_WX_NOPRIV(optd)) { /* nopriv */
   1981 				return (EACCES);
   1982 			}
   1983 		}
   1984 		error = opt_set_fn(connp, SETFN_UD_NEGOTIATE, optd->opdes_level,
   1985 		    optd->opdes_name, len, (uchar_t *)CMSG_CONTENT(cmsg),
   1986 		    &outlen, (uchar_t *)CMSG_CONTENT(cmsg), optbuf, cr);
   1987 		if (error > 0) {
   1988 			return (error);
   1989 		} else if (outlen > len) {
   1990 			return (EINVAL);
   1991 		} else {
   1992 			/*
   1993 			 * error can be -ve if the protocol wants to
   1994 			 * pass the option to IP. We donot pass auxiliary
   1995 			 * options to IP.
   1996 			 */
   1997 			error = 0;
   1998 		}
   1999 	}
   2000 	return (error);
   2001 }
   2002