Home | History | Annotate | Download | only in ip
      1      0      stevel /*
      2      0      stevel  * CDDL HEADER START
      3      0      stevel  *
      4      0      stevel  * The contents of this file are subject to the terms of the
      5   1289     ja97890  * Common Development and Distribution License (the "License").
      6   1289     ja97890  * You may not use this file except in compliance with the License.
      7      0      stevel  *
      8      0      stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0      stevel  * or http://www.opensolaris.org/os/licensing.
     10      0      stevel  * See the License for the specific language governing permissions
     11      0      stevel  * and limitations under the License.
     12      0      stevel  *
     13      0      stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0      stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0      stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0      stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0      stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0      stevel  *
     19      0      stevel  * CDDL HEADER END
     20      0      stevel  */
     21      0      stevel /*
     22   8477         Rao  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23      0      stevel  * Use is subject to license terms.
     24      0      stevel  */
     25      0      stevel /* Copyright (c) 1990 Mentat Inc. */
     26      0      stevel 
     27      0      stevel #include <sys/types.h>
     28      0      stevel #include <sys/stream.h>
     29      0      stevel #include <sys/stropts.h>
     30      0      stevel #include <sys/strlog.h>
     31      0      stevel #include <sys/strsun.h>
     32      0      stevel #define	_SUN_TPI_VERSION 2
     33      0      stevel #include <sys/tihdr.h>
     34      0      stevel #include <sys/timod.h>
     35      0      stevel #include <sys/ddi.h>
     36      0      stevel #include <sys/sunddi.h>
     37   1676         jpk #include <sys/strsubr.h>
     38  11042        Erik #include <sys/suntpi.h>
     39  11042        Erik #include <sys/xti_inet.h>
     40      0      stevel #include <sys/cmn_err.h>
     41      0      stevel #include <sys/kmem.h>
     42  11042        Erik #include <sys/cred_impl.h>
     43      0      stevel #include <sys/policy.h>
     44   1676         jpk #include <sys/priv.h>
     45  11042        Erik #include <sys/ucred.h>
     46      0      stevel #include <sys/zone.h>
     47      0      stevel 
     48   8348        Eric #include <sys/sockio.h>
     49      0      stevel #include <sys/socket.h>
     50   8348        Eric #include <sys/socketvar.h>
     51  11042        Erik #include <sys/vtrace.h>
     52  11042        Erik #include <sys/sdt.h>
     53  11042        Erik #include <sys/debug.h>
     54      0      stevel #include <sys/isa_defs.h>
     55  11042        Erik #include <sys/random.h>
     56      0      stevel #include <netinet/in.h>
     57      0      stevel #include <netinet/ip6.h>
     58      0      stevel #include <netinet/icmp6.h>
     59  11042        Erik #include <netinet/udp.h>
     60  11042        Erik 
     61      0      stevel #include <inet/common.h>
     62      0      stevel #include <inet/ip.h>
     63  11042        Erik #include <inet/ip_impl.h>
     64  11042        Erik #include <inet/ipsec_impl.h>
     65      0      stevel #include <inet/ip6.h>
     66  11042        Erik #include <inet/ip_ire.h>
     67  11042        Erik #include <inet/ip_if.h>
     68  11042        Erik #include <inet/ip_multi.h>
     69  11042        Erik #include <inet/ip_ndp.h>
     70   8348        Eric #include <inet/proto_set.h>
     71  11042        Erik #include <inet/mib2.h>
     72      0      stevel #include <inet/nd.h>
     73      0      stevel #include <inet/optcom.h>
     74      0      stevel #include <inet/snmpcom.h>
     75      0      stevel #include <inet/kstatcom.h>
     76      0      stevel #include <inet/ipclassifier.h>
     77   1676         jpk 
     78   1676         jpk #include <sys/tsol/label.h>
     79   1676         jpk #include <sys/tsol/tnet.h>
     80   3318     rshoaib 
     81  11042        Erik #include <inet/rawip_impl.h>
     82  11042        Erik 
     83   8348        Eric #include <sys/disp.h>
     84      0      stevel 
     85      0      stevel /*
     86      0      stevel  * Synchronization notes:
     87      0      stevel  *
     88  11042        Erik  * RAWIP is MT and uses the usual kernel synchronization primitives. We use
     89  11042        Erik  * conn_lock to protect the icmp_t.
     90   5240    nordmark  *
     91   5240    nordmark  * Plumbing notes:
     92   5240    nordmark  * ICMP is always a device driver. For compatibility with mibopen() code
     93   5240    nordmark  * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
     94   5240    nordmark  * dummy module.
     95      0      stevel  */
     96      0      stevel 
     97      0      stevel static void	icmp_addr_req(queue_t *q, mblk_t *mp);
     98   8348        Eric static void	icmp_tpi_bind(queue_t *q, mblk_t *mp);
     99  11042        Erik static void	icmp_bind_proto(icmp_t *icmp);
    100  11042        Erik static int	icmp_build_hdr_template(conn_t *, const in6_addr_t *,
    101  11042        Erik     const in6_addr_t *, uint32_t);
    102      0      stevel static void	icmp_capability_req(queue_t *q, mblk_t *mp);
    103   8348        Eric static int	icmp_close(queue_t *q, int flags);
    104  11042        Erik static void	icmp_close_free(conn_t *);
    105   8348        Eric static void	icmp_tpi_connect(queue_t *q, mblk_t *mp);
    106   8348        Eric static void	icmp_tpi_disconnect(queue_t *q, mblk_t *mp);
    107      0      stevel static void	icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
    108  11042        Erik     int sys_error);
    109      0      stevel static void	icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
    110  11042        Erik     t_scalar_t tlierr, int sys_error);
    111  11042        Erik static void	icmp_icmp_input(void *arg1, mblk_t *mp, void *arg2,
    112  11042        Erik     ip_recv_attr_t *);
    113  11042        Erik static void	icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
    114  11042        Erik     ip_recv_attr_t *);
    115      0      stevel static void	icmp_info_req(queue_t *q, mblk_t *mp);
    116  11042        Erik static void	icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
    117   8348        Eric static conn_t 	*icmp_open(int family, cred_t *credp, int *err, int flags);
    118   5240    nordmark static int	icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
    119   5240    nordmark 		    cred_t *credp);
    120   5240    nordmark static int	icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
    121   5240    nordmark 		    cred_t *credp);
    122      0      stevel static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
    123   8348        Eric int		icmp_opt_set(conn_t *connp, uint_t optset_context,
    124      0      stevel 		    int level, int name, uint_t inlen,
    125      0      stevel 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
    126   8348        Eric 		    void *thisdg_attrs, cred_t *cr);
    127   8348        Eric int		icmp_opt_get(conn_t *connp, int level, int name,
    128      0      stevel 		    uchar_t *ptr);
    129  11042        Erik static int	icmp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
    130  11042        Erik 		    sin6_t *sin6, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa);
    131      0      stevel static int	icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
    132   3448    dh155122 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt);
    133      0      stevel static int	icmp_param_set(queue_t *q, mblk_t *mp, char *value,
    134      0      stevel 		    caddr_t cp, cred_t *cr);
    135  11042        Erik static mblk_t	*icmp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
    136  11042        Erik     const in6_addr_t *, const in6_addr_t *, uint32_t, mblk_t *, int *);
    137  11042        Erik static mblk_t	*icmp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
    138  11042        Erik     mblk_t *, const in6_addr_t *, uint32_t, int *);
    139      0      stevel static int	icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
    140      0      stevel 		    uchar_t *ptr, int len);
    141      0      stevel static void	icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
    142   8348        Eric static void	icmp_tpi_unbind(queue_t *q, mblk_t *mp);
    143      0      stevel static void	icmp_wput(queue_t *q, mblk_t *mp);
    144   8348        Eric static void	icmp_wput_fallback(queue_t *q, mblk_t *mp);
    145      0      stevel static void	icmp_wput_other(queue_t *q, mblk_t *mp);
    146      0      stevel static void	icmp_wput_iocdata(queue_t *q, mblk_t *mp);
    147      0      stevel static void	icmp_wput_restricted(queue_t *q, mblk_t *mp);
    148  11042        Erik static void	icmp_ulp_recv(conn_t *, mblk_t *, uint_t);
    149      0      stevel 
    150   3448    dh155122 static void	*rawip_stack_init(netstackid_t stackid, netstack_t *ns);
    151   3448    dh155122 static void	rawip_stack_fini(netstackid_t stackid, void *arg);
    152   3448    dh155122 
    153   3448    dh155122 static void	*rawip_kstat_init(netstackid_t stackid);
    154   3448    dh155122 static void	rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
    155      0      stevel static int	rawip_kstat_update(kstat_t *kp, int rw);
    156   8348        Eric static void	rawip_stack_shutdown(netstackid_t stackid, void *arg);
    157  11042        Erik 
    158  11042        Erik /* Common routines for TPI and socket module */
    159  11042        Erik static conn_t	*rawip_do_open(int, cred_t *, int *, int);
    160  11042        Erik static void	rawip_do_close(conn_t *);
    161  11042        Erik static int	rawip_do_bind(conn_t *, struct sockaddr *, socklen_t);
    162  11042        Erik static int	rawip_do_unbind(conn_t *);
    163  11042        Erik static int	rawip_do_connect(conn_t *, const struct sockaddr *, socklen_t,
    164  11042        Erik     cred_t *, pid_t);
    165   8348        Eric 
    166   8348        Eric int		rawip_getsockname(sock_lower_handle_t, struct sockaddr *,
    167   8348        Eric 		    socklen_t *, cred_t *);
    168   8348        Eric int		rawip_getpeername(sock_lower_handle_t, struct sockaddr *,
    169   8348        Eric 		    socklen_t *, cred_t *);
    170      0      stevel 
    171   5240    nordmark static struct module_info icmp_mod_info =  {
    172      0      stevel 	5707, "icmp", 1, INFPSZ, 512, 128
    173      0      stevel };
    174      0      stevel 
    175   5240    nordmark /*
    176   5240    nordmark  * Entry points for ICMP as a device.
    177   5240    nordmark  * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
    178   5240    nordmark  */
    179   5240    nordmark static struct qinit icmprinitv4 = {
    180   5240    nordmark 	NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
    181   5240    nordmark };
    182   5240    nordmark 
    183   5240    nordmark static struct qinit icmprinitv6 = {
    184   5240    nordmark 	NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
    185   5240    nordmark };
    186   5240    nordmark 
    187   5240    nordmark static struct qinit icmpwinit = {
    188  11042        Erik 	(pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info
    189   8348        Eric };
    190   8348        Eric 
    191   8348        Eric /* ICMP entry point during fallback */
    192   8348        Eric static struct qinit icmp_fallback_sock_winit = {
    193   8348        Eric 	(pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info
    194   5240    nordmark };
    195   5240    nordmark 
    196   5240    nordmark /* For AF_INET aka /dev/icmp */
    197   5240    nordmark struct streamtab icmpinfov4 = {
    198   5240    nordmark 	&icmprinitv4, &icmpwinit
    199   5240    nordmark };
    200   5240    nordmark 
    201   5240    nordmark /* For AF_INET6 aka /dev/icmp6 */
    202   5240    nordmark struct streamtab icmpinfov6 = {
    203   5240    nordmark 	&icmprinitv6, &icmpwinit
    204      0      stevel };
    205      0      stevel 
    206      0      stevel static sin_t	sin_null;	/* Zero address for quick clears */
    207      0      stevel static sin6_t	sin6_null;	/* Zero address for quick clears */
    208      0      stevel 
    209      0      stevel /* Default structure copied into T_INFO_ACK messages */
    210      0      stevel static struct T_info_ack icmp_g_t_info_ack = {
    211      0      stevel 	T_INFO_ACK,
    212      0      stevel 	IP_MAXPACKET,	 /* TSDU_size.  icmp allows maximum size messages. */
    213      0      stevel 	T_INVALID,	/* ETSDU_size.  icmp does not support expedited data. */
    214      0      stevel 	T_INVALID,	/* CDATA_size. icmp does not support connect data. */
    215      0      stevel 	T_INVALID,	/* DDATA_size. icmp does not support disconnect data. */
    216      0      stevel 	0,		/* ADDR_size - filled in later. */
    217      0      stevel 	0,		/* OPT_size - not initialized here */
    218      0      stevel 	IP_MAXPACKET,	/* TIDU_size.  icmp allows maximum size messages. */
    219      0      stevel 	T_CLTS,		/* SERV_type.  icmp supports connection-less. */
    220      0      stevel 	TS_UNBND,	/* CURRENT_state.  This is set from icmp_state. */
    221      0      stevel 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
    222      0      stevel };
    223      0      stevel 
    224      0      stevel /*
    225   3448    dh155122  * Table of ND variables supported by icmp.  These are loaded into is_nd
    226   3448    dh155122  * when the stack instance is created.
    227      0      stevel  * All of these are alterable, within the min/max values given, at run time.
    228      0      stevel  */
    229      0      stevel static icmpparam_t	icmp_param_arr[] = {
    230      0      stevel 	/* min	max	value	name */
    231      0      stevel 	{ 0,	128,	32,	"icmp_wroff_extra" },
    232      0      stevel 	{ 1,	255,	255,	"icmp_ipv4_ttl" },
    233      0      stevel 	{ 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS,	"icmp_ipv6_hoplimit"},
    234      0      stevel 	{ 0,	1,	1,	"icmp_bsd_compat" },
    235      0      stevel 	{ 4096,	65536,	8192,	"icmp_xmit_hiwat"},
    236      0      stevel 	{ 0,	65536,	1024,	"icmp_xmit_lowat"},
    237      0      stevel 	{ 4096,	65536,	8192,	"icmp_recv_hiwat"},
    238      0      stevel 	{ 65536, 1024*1024*1024, 256*1024,	"icmp_max_buf"},
    239  11042        Erik 	{ 0,	1,	0,	"icmp_pmtu_discovery" },
    240  11042        Erik 	{ 0,	1,	0,	"icmp_sendto_ignerr" },
    241      0      stevel };
    242   3448    dh155122 #define	is_wroff_extra			is_param_arr[0].icmp_param_value
    243   3448    dh155122 #define	is_ipv4_ttl			is_param_arr[1].icmp_param_value
    244   3448    dh155122 #define	is_ipv6_hoplimit		is_param_arr[2].icmp_param_value
    245   3448    dh155122 #define	is_bsd_compat			is_param_arr[3].icmp_param_value
    246   3448    dh155122 #define	is_xmit_hiwat			is_param_arr[4].icmp_param_value
    247   3448    dh155122 #define	is_xmit_lowat			is_param_arr[5].icmp_param_value
    248   3448    dh155122 #define	is_recv_hiwat			is_param_arr[6].icmp_param_value
    249   3448    dh155122 #define	is_max_buf			is_param_arr[7].icmp_param_value
    250  11042        Erik #define	is_pmtu_discovery		is_param_arr[8].icmp_param_value
    251  11042        Erik #define	is_sendto_ignerr		is_param_arr[9].icmp_param_value
    252  11042        Erik 
    253  11042        Erik typedef union T_primitives *t_primp_t;
    254   8348        Eric 
    255      0      stevel /*
    256      0      stevel  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
    257      0      stevel  * passed to icmp_wput.
    258  11042        Erik  * It calls IP to verify the local IP address, and calls IP to insert
    259  11042        Erik  * the conn_t in the fanout table.
    260  11042        Erik  * If everything is ok it then sends the T_BIND_ACK back up.
    261      0      stevel  */
    262      0      stevel static void
    263   8348        Eric icmp_tpi_bind(queue_t *q, mblk_t *mp)
    264   8348        Eric {
    265   8348        Eric 	int	error;
    266   8348        Eric 	struct sockaddr *sa;
    267   8348        Eric 	struct T_bind_req *tbr;
    268   8348        Eric 	socklen_t	len;
    269      0      stevel 	sin_t	*sin;
    270      0      stevel 	sin6_t	*sin6;
    271   8348        Eric 	icmp_t		*icmp;
    272   8348        Eric 	conn_t	*connp = Q_TO_CONN(q);
    273   8348        Eric 	mblk_t *mp1;
    274   8778        Erik 	cred_t *cr;
    275   8778        Erik 
    276   8778        Erik 	/*
    277   8778        Erik 	 * All Solaris components should pass a db_credp
    278   8778        Erik 	 * for this TPI message, hence we ASSERT.
    279   8778        Erik 	 * But in case there is some other M_PROTO that looks
    280   8778        Erik 	 * like a TPI message sent by some other kernel
    281   8778        Erik 	 * component, we check and return an error.
    282   8778        Erik 	 */
    283   8778        Erik 	cr = msg_getcred(mp, NULL);
    284   8778        Erik 	ASSERT(cr != NULL);
    285   8778        Erik 	if (cr == NULL) {
    286   8778        Erik 		icmp_err_ack(q, mp, TSYSERR, EINVAL);
    287   8778        Erik 		return;
    288   8778        Erik 	}
    289   5240    nordmark 
    290   5240    nordmark 	icmp = connp->conn_icmp;
    291      0      stevel 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
    292      0      stevel 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
    293      0      stevel 		    "icmp_bind: bad req, len %u",
    294      0      stevel 		    (uint_t)(mp->b_wptr - mp->b_rptr));
    295      0      stevel 		icmp_err_ack(q, mp, TPROTO, 0);
    296      0      stevel 		return;
    297      0      stevel 	}
    298   8348        Eric 
    299      0      stevel 	if (icmp->icmp_state != TS_UNBND) {
    300      0      stevel 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
    301  11042        Erik 		    "icmp_bind: bad state, %u", icmp->icmp_state);
    302      0      stevel 		icmp_err_ack(q, mp, TOUTSTATE, 0);
    303      0      stevel 		return;
    304      0      stevel 	}
    305   8348        Eric 
    306      0      stevel 	/*
    307      0      stevel 	 * Reallocate the message to make sure we have enough room for an
    308  11042        Erik 	 * address.
    309  11042        Erik 	 */
    310  11042        Erik 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
    311  11042        Erik 	if (mp1 == NULL) {
    312      0      stevel 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
    313      0      stevel 		return;
    314      0      stevel 	}
    315      0      stevel 	mp = mp1;
    316   8348        Eric 
    317   8348        Eric 	/* Reset the message type in preparation for shipping it back. */
    318   8348        Eric 	DB_TYPE(mp) = M_PCPROTO;
    319      0      stevel 	tbr = (struct T_bind_req *)mp->b_rptr;
    320   8348        Eric 	len = tbr->ADDR_length;
    321   8348        Eric 	switch (len) {
    322   8348        Eric 	case 0:	/* request for a generic port */
    323      0      stevel 		tbr->ADDR_offset = sizeof (struct T_bind_req);
    324  11042        Erik 		if (connp->conn_family == AF_INET) {
    325      0      stevel 			tbr->ADDR_length = sizeof (sin_t);
    326      0      stevel 			sin = (sin_t *)&tbr[1];
    327      0      stevel 			*sin = sin_null;
    328      0      stevel 			sin->sin_family = AF_INET;
    329      0      stevel 			mp->b_wptr = (uchar_t *)&sin[1];
    330   8348        Eric 			sa = (struct sockaddr *)sin;
    331   8348        Eric 			len = sizeof (sin_t);
    332      0      stevel 		} else {
    333  11042        Erik 			ASSERT(connp->conn_family == AF_INET6);
    334      0      stevel 			tbr->ADDR_length = sizeof (sin6_t);
    335      0      stevel 			sin6 = (sin6_t *)&tbr[1];
    336      0      stevel 			*sin6 = sin6_null;
    337      0      stevel 			sin6->sin6_family = AF_INET6;
    338      0      stevel 			mp->b_wptr = (uchar_t *)&sin6[1];
    339   8348        Eric 			sa = (struct sockaddr *)sin6;
    340   8348        Eric 			len = sizeof (sin6_t);
    341   8348        Eric 		}
    342   8348        Eric 		break;
    343   8348        Eric 
    344   8348        Eric 	case sizeof (sin_t):	/* Complete IPv4 address */
    345   8348        Eric 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
    346      0      stevel 		    sizeof (sin_t));
    347   8348        Eric 		break;
    348   8348        Eric 
    349   8348        Eric 	case sizeof (sin6_t):	/* Complete IPv6 address */
    350   8348        Eric 		sa = (struct sockaddr *)mi_offset_param(mp,
    351   8348        Eric 		    tbr->ADDR_offset, sizeof (sin6_t));
    352   8348        Eric 		break;
    353   8348        Eric 
    354      0      stevel 	default:
    355      0      stevel 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
    356  11042        Erik 		    "icmp_bind: bad ADDR_length %u", tbr->ADDR_length);
    357      0      stevel 		icmp_err_ack(q, mp, TBADADDR, 0);
    358      0      stevel 		return;
    359      0      stevel 	}
    360   5240    nordmark 
    361   8348        Eric 	error = rawip_do_bind(connp, sa, len);
    362   8348        Eric 	if (error != 0) {
    363   8348        Eric 		if (error > 0) {
    364   8348        Eric 			icmp_err_ack(q, mp, TSYSERR, error);
    365   8348        Eric 		} else {
    366   8348        Eric 			icmp_err_ack(q, mp, -error, 0);
    367   8348        Eric 		}
    368   8348        Eric 	} else {
    369   8348        Eric 		tbr->PRIM_type = T_BIND_ACK;
    370   8348        Eric 		qreply(q, mp);
    371   8348        Eric 	}
    372   8348        Eric }
    373   8348        Eric 
    374   8348        Eric static int
    375   8348        Eric rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len)
    376   8348        Eric {
    377   8348        Eric 	sin_t		*sin;
    378   8348        Eric 	sin6_t		*sin6;
    379  11042        Erik 	icmp_t		*icmp = connp->conn_icmp;
    380  11042        Erik 	int		error = 0;
    381  11042        Erik 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
    382  11042        Erik 	in_port_t	lport;		/* Network byte order */
    383  11042        Erik 	ipaddr_t	v4src;		/* Set if AF_INET */
    384  11042        Erik 	in6_addr_t	v6src;
    385  11042        Erik 	uint_t		scopeid = 0;
    386  11042        Erik 	zoneid_t	zoneid = IPCL_ZONEID(connp);
    387  11042        Erik 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
    388   8348        Eric 
    389   8348        Eric 	if (sa == NULL || !OK_32PTR((char *)sa)) {
    390   8348        Eric 		return (EINVAL);
    391   8348        Eric 	}
    392   8348        Eric 
    393   8348        Eric 	switch (len) {
    394   8348        Eric 	case sizeof (sin_t):    /* Complete IPv4 address */
    395   8348        Eric 		sin = (sin_t *)sa;
    396   8348        Eric 		if (sin->sin_family != AF_INET ||
    397  11042        Erik 		    connp->conn_family != AF_INET) {
    398   8348        Eric 			/* TSYSERR, EAFNOSUPPORT */
    399  11042        Erik 			return (EAFNOSUPPORT);
    400  11042        Erik 		}
    401  11042        Erik 		v4src = sin->sin_addr.s_addr;
    402  11042        Erik 		IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
    403  11042        Erik 		if (v4src != INADDR_ANY) {
    404  11042        Erik 			laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
    405  11042        Erik 			    B_TRUE);
    406  11042        Erik 		}
    407  11042        Erik 		lport = sin->sin_port;
    408   8348        Eric 		break;
    409   8348        Eric 	case sizeof (sin6_t): /* Complete IPv6 address */
    410   8348        Eric 		sin6 = (sin6_t *)sa;
    411   8348        Eric 		if (sin6->sin6_family != AF_INET6 ||
    412  11042        Erik 		    connp->conn_family != AF_INET6) {
    413   8348        Eric 			/* TSYSERR, EAFNOSUPPORT */
    414  11042        Erik 			return (EAFNOSUPPORT);
    415   8348        Eric 		}
    416   8348        Eric 		/* No support for mapped addresses on raw sockets */
    417   8348        Eric 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
    418   8348        Eric 			/* TSYSERR, EADDRNOTAVAIL */
    419  11042        Erik 			return (EADDRNOTAVAIL);
    420  11042        Erik 		}
    421  11042        Erik 		v6src = sin6->sin6_addr;
    422  11042        Erik 		if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
    423  11042        Erik 			if (IN6_IS_ADDR_LINKSCOPE(&v6src))
    424  11042        Erik 				scopeid = sin6->sin6_scope_id;
    425  11042        Erik 			laddr_type = ip_laddr_verify_v6(&v6src, zoneid, ipst,
    426  11042        Erik 			    B_TRUE, scopeid);
    427  11042        Erik 		}
    428  11042        Erik 		lport = sin6->sin6_port;
    429   8348        Eric 		break;
    430   8348        Eric 
    431   8348        Eric 	default:
    432   8348        Eric 		/* TBADADDR */
    433  11042        Erik 		return (EADDRNOTAVAIL);
    434  11042        Erik 	}
    435  11042        Erik 
    436  11042        Erik 	/* Is the local address a valid unicast, multicast, or broadcast? */
    437  11042        Erik 	if (laddr_type == IPVL_BAD)
    438  11042        Erik 		return (EADDRNOTAVAIL);
    439  11042        Erik 
    440  11042        Erik 	/*
    441  11042        Erik 	 * The state must be TS_UNBND.
    442  11042        Erik 	 */
    443  11042        Erik 	mutex_enter(&connp->conn_lock);
    444  11042        Erik 	if (icmp->icmp_state != TS_UNBND) {
    445  11042        Erik 		mutex_exit(&connp->conn_lock);
    446  11042        Erik 		return (-TOUTSTATE);
    447  11042        Erik 	}
    448   5240    nordmark 
    449      0      stevel 	/*
    450      0      stevel 	 * Copy the source address into our icmp structure.  This address
    451      0      stevel 	 * may still be zero; if so, ip will fill in the correct address
    452      0      stevel 	 * each time an outbound packet is passed to it.
    453   5240    nordmark 	 * If we are binding to a broadcast or multicast address then
    454  11042        Erik 	 * we just set the conn_bound_addr since we don't want to use
    455  11042        Erik 	 * that as the source address when sending.
    456  11042        Erik 	 */
    457  11042        Erik 	connp->conn_bound_addr_v6 = v6src;
    458  11042        Erik 	connp->conn_laddr_v6 = v6src;
    459  11042        Erik 	if (scopeid != 0) {
    460  11042        Erik 		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
    461  11042        Erik 		connp->conn_ixa->ixa_scopeid = scopeid;
    462  11042        Erik 		connp->conn_incoming_ifindex = scopeid;
    463  11042        Erik 	} else {
    464  11042        Erik 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
    465  11042        Erik 		connp->conn_incoming_ifindex = connp->conn_bound_if;
    466  11042        Erik 	}
    467  11042        Erik 
    468  11042        Erik 	switch (laddr_type) {
    469  11042        Erik 	case IPVL_UNICAST_UP:
    470  11042        Erik 	case IPVL_UNICAST_DOWN:
    471  11042        Erik 		connp->conn_saddr_v6 = v6src;
    472  11042        Erik 		connp->conn_mcbc_bind = B_FALSE;
    473  11042        Erik 		break;
    474  11042        Erik 	case IPVL_MCAST:
    475  11042        Erik 	case IPVL_BCAST:
    476  11042        Erik 		/* ip_set_destination will pick a source address later */
    477  11042        Erik 		connp->conn_saddr_v6 = ipv6_all_zeros;
    478  11042        Erik 		connp->conn_mcbc_bind = B_TRUE;
    479  11042        Erik 		break;
    480  11042        Erik 	}
    481  11042        Erik 
    482  11042        Erik 	/* Any errors after this point should use late_error */
    483  11042        Erik 
    484  11042        Erik 	/*
    485  11042        Erik 	 * Use sin_port/sin6_port since applications like psh use SOCK_RAW
    486  11042        Erik 	 * with IPPROTO_TCP.
    487  11042        Erik 	 */
    488  11042        Erik 	connp->conn_lport = lport;
    489  11042        Erik 	connp->conn_fport = 0;
    490  11042        Erik 
    491  11042        Erik 	if (connp->conn_family == AF_INET) {
    492  11042        Erik 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
    493  11042        Erik 	} else {
    494  11042        Erik 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
    495  11042        Erik 	}
    496  11042        Erik 
    497  11042        Erik 	icmp->icmp_state = TS_IDLE;
    498  11042        Erik 
    499  11042        Erik 	/*
    500  11042        Erik 	 * We create an initial header template here to make a subsequent
    501  11042        Erik 	 * sendto have a starting point. Since conn_last_dst is zero the
    502  11042        Erik 	 * first sendto will always follow the 'dst changed' code path.
    503  11042        Erik 	 * Note that we defer massaging options and the related checksum
    504  11042        Erik 	 * adjustment until we have a destination address.
    505  11042        Erik 	 */
    506  11042        Erik 	error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
    507  11042        Erik 	    &connp->conn_faddr_v6, connp->conn_flowinfo);
    508  11042        Erik 	if (error != 0) {
    509  11042        Erik 		mutex_exit(&connp->conn_lock);
    510  11042        Erik 		goto late_error;
    511  11042        Erik 	}
    512  11042        Erik 	/* Just in case */
    513  11042        Erik 	connp->conn_faddr_v6 = ipv6_all_zeros;
    514  11042        Erik 	connp->conn_v6lastdst = ipv6_all_zeros;
    515  11042        Erik 	mutex_exit(&connp->conn_lock);
    516  11042        Erik 
    517  11042        Erik 	error = ip_laddr_fanout_insert(connp);
    518  11042        Erik 	if (error != 0)
    519  11042        Erik 		goto late_error;
    520  11042        Erik 
    521  11042        Erik 	/* Bind succeeded */
    522  11042        Erik 	return (0);
    523  11042        Erik 
    524  11042        Erik late_error:
    525  11042        Erik 	mutex_enter(&connp->conn_lock);
    526  11042        Erik 	connp->conn_saddr_v6 = ipv6_all_zeros;
    527  11042        Erik 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
    528  11042        Erik 	connp->conn_laddr_v6 = ipv6_all_zeros;
    529  11042        Erik 	if (scopeid != 0) {
    530  11042        Erik 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
    531  11042        Erik 		connp->conn_incoming_ifindex = connp->conn_bound_if;
    532  11042        Erik 	}
    533  11042        Erik 	icmp->icmp_state = TS_UNBND;
    534  11042        Erik 	connp->conn_v6lastdst = ipv6_all_zeros;
    535  11042        Erik 	connp->conn_lport = 0;
    536  11042        Erik 
    537  11042        Erik 	/* Restore the header that was built above - different source address */
    538  11042        Erik 	(void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
    539  11042        Erik 	    &connp->conn_faddr_v6, connp->conn_flowinfo);
    540  11042        Erik 	mutex_exit(&connp->conn_lock);
    541  11042        Erik 	return (error);
    542  11042        Erik }
    543  11042        Erik 
    544  11042        Erik /*
    545  11042        Erik  * Tell IP to just bind to the protocol.
    546  11042        Erik  */
    547  11042        Erik static void
    548  11042        Erik icmp_bind_proto(icmp_t *icmp)
    549  11042        Erik {
    550  11042        Erik 	conn_t	*connp = icmp->icmp_connp;
    551  11042        Erik 
    552  11042        Erik 	mutex_enter(&connp->conn_lock);
    553  11042        Erik 	connp->conn_saddr_v6 = ipv6_all_zeros;
    554  11042        Erik 	connp->conn_laddr_v6 = ipv6_all_zeros;
    555  11042        Erik 	connp->conn_faddr_v6 = ipv6_all_zeros;
    556  11042        Erik 	connp->conn_v6lastdst = ipv6_all_zeros;
    557  11042        Erik 	mutex_exit(&connp->conn_lock);
    558  11042        Erik 
    559  11042        Erik 	(void) ip_laddr_fanout_insert(connp);
    560  11042        Erik }
    561  11042        Erik 
    562  11042        Erik /*
    563  11042        Erik  * This routine handles each T_CONN_REQ message passed to icmp.  It
    564  11042        Erik  * associates a default destination address with the stream.
    565  11042        Erik  *
    566  11042        Erik  * After various error checks are completed, icmp_connect() lays
    567  11042        Erik  * the target address and port into the composite header template.
    568  11042        Erik  * Then we ask IP for information, including a source address if we didn't
    569  11042        Erik  * already have one. Finally we send up the T_OK_ACK reply message.
    570  11042        Erik  */
    571   8348        Eric static void
    572   8348        Eric icmp_tpi_connect(queue_t *q, mblk_t *mp)
    573   8348        Eric {
    574   8348        Eric 	conn_t	*connp = Q_TO_CONN(q);
    575      0      stevel 	struct T_conn_req	*tcr;
    576   8348        Eric 	struct sockaddr *sa;
    577   8348        Eric 	socklen_t len;
    578   8348        Eric 	int error;
    579   8778        Erik 	cred_t *cr;
    580  11042        Erik 	pid_t pid;
    581   8778        Erik 	/*
    582   8778        Erik 	 * All Solaris components should pass a db_credp
    583   8778        Erik 	 * for this TPI message, hence we ASSERT.
    584   8778        Erik 	 * But in case there is some other M_PROTO that looks
    585   8778        Erik 	 * like a TPI message sent by some other kernel
    586   8778        Erik 	 * component, we check and return an error.
    587   8778        Erik 	 */
    588  11042        Erik 	cr = msg_getcred(mp, &pid);
    589   8778        Erik 	ASSERT(cr != NULL);
    590   8778        Erik 	if (cr == NULL) {
    591   8778        Erik 		icmp_err_ack(q, mp, TSYSERR, EINVAL);
    592   8778        Erik 		return;
    593   8778        Erik 	}
    594   5240    nordmark 
    595      0      stevel 	tcr = (struct T_conn_req *)mp->b_rptr;
    596      0      stevel 	/* Sanity checks */
    597   5240    nordmark 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
    598      0      stevel 		icmp_err_ack(q, mp, TPROTO, 0);
    599      0      stevel 		return;
    600      0      stevel 	}
    601      0      stevel 
    602      0      stevel 	if (tcr->OPT_length != 0) {
    603      0      stevel 		icmp_err_ack(q, mp, TBADOPT, 0);
    604      0      stevel 		return;
    605      0      stevel 	}
    606   5240    nordmark 
    607   8348        Eric 	len = tcr->DEST_length;
    608   8348        Eric 
    609   8348        Eric 	switch (len) {
    610      0      stevel 	default:
    611      0      stevel 		icmp_err_ack(q, mp, TBADADDR, 0);
    612      0      stevel 		return;
    613      0      stevel 	case sizeof (sin_t):
    614   8348        Eric 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
    615      0      stevel 		    sizeof (sin_t));
    616   8348        Eric 		break;
    617   8348        Eric 	case sizeof (sin6_t):
    618   8348        Eric 		sa = (struct sockaddr *)mi_offset_param(mp,
    619   8348        Eric 		    tcr->DEST_offset, sizeof (sin6_t));
    620   8348        Eric 		break;
    621   8348        Eric 	}
    622   8348        Eric 
    623  11042        Erik 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
    624   8348        Eric 	if (error != 0) {
    625   8348        Eric 		icmp_err_ack(q, mp, TSYSERR, error);
    626   8348        Eric 		return;
    627   8348        Eric 	}
    628   8348        Eric 
    629  11042        Erik 	error = rawip_do_connect(connp, sa, len, cr, pid);
    630   8348        Eric 	if (error != 0) {
    631   8348        Eric 		if (error < 0) {
    632   8348        Eric 			icmp_err_ack(q, mp, -error, 0);
    633   8348        Eric 		} else {
    634   8348        Eric 			icmp_err_ack(q, mp, 0, error);
    635   8348        Eric 		}
    636   8348        Eric 	} else {
    637   8348        Eric 		mblk_t *mp1;
    638   8348        Eric 
    639   8348        Eric 		/*
    640   8348        Eric 		 * We have to send a connection confirmation to
    641   8348        Eric 		 * keep TLI happy.
    642   8348        Eric 		 */
    643  11042        Erik 		if (connp->conn_family == AF_INET) {
    644   8348        Eric 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
    645   8348        Eric 			    sizeof (sin_t), NULL, 0);
    646   8348        Eric 		} else {
    647  11042        Erik 			ASSERT(connp->conn_family == AF_INET6);
    648   8348        Eric 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
    649   8348        Eric 			    sizeof (sin6_t), NULL, 0);
    650   8348        Eric 		}
    651   8348        Eric 		if (mp1 == NULL) {
    652   8348        Eric 			icmp_err_ack(q, mp, TSYSERR, ENOMEM);
    653   8348        Eric 			return;
    654   8348        Eric 		}
    655   8348        Eric 
    656   8348        Eric 		/*
    657   8348        Eric 		 * Send ok_ack for T_CONN_REQ
    658   8348        Eric 		 */
    659   8348        Eric 		mp = mi_tpi_ok_ack_alloc(mp);
    660   8348        Eric 		if (mp == NULL) {
    661   8348        Eric 			/* Unable to reuse the T_CONN_REQ for the ack. */
    662   8348        Eric 			icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
    663   8348        Eric 			return;
    664   8348        Eric 		}
    665   8348        Eric 		putnext(connp->conn_rq, mp);
    666   8348        Eric 		putnext(connp->conn_rq, mp1);
    667   8348        Eric 	}
    668   8348        Eric }
    669   8348        Eric 
    670   8348        Eric static int
    671   8778        Erik rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
    672  11042        Erik     cred_t *cr, pid_t pid)
    673  11042        Erik {
    674  11042        Erik 	icmp_t		*icmp;
    675  11042        Erik 	sin_t		*sin;
    676  11042        Erik 	sin6_t		*sin6;
    677  11042        Erik 	int		error;
    678  11042        Erik 	uint16_t 	dstport;
    679   8348        Eric 	ipaddr_t	v4dst;
    680   8348        Eric 	in6_addr_t	v6dst;
    681  11042        Erik 	uint32_t	flowinfo;
    682  11042        Erik 	ip_xmit_attr_t	*ixa;
    683  11042        Erik 	uint_t		scopeid = 0;
    684  11042        Erik 	uint_t		srcid = 0;
    685  11042        Erik 	in6_addr_t	v6src = connp->conn_saddr_v6;
    686   8348        Eric 
    687   8348        Eric 	icmp = connp->conn_icmp;
    688   8348        Eric 
    689   8348        Eric 	if (sa == NULL || !OK_32PTR((char *)sa)) {
    690   8348        Eric 		return (EINVAL);
    691   8348        Eric 	}
    692   8348        Eric 
    693   8348        Eric 	ASSERT(sa != NULL && len != 0);
    694   8348        Eric 
    695  11042        Erik 	/*
    696  11042        Erik 	 * Determine packet type based on type of address passed in
    697  11042        Erik 	 * the request should contain an IPv4 or IPv6 address.
    698  11042        Erik 	 * Make sure that address family matches the type of
    699  11042        Erik 	 * family of the address passed down.
    700  11042        Erik 	 */
    701   8348        Eric 	switch (len) {
    702   8348        Eric 	case sizeof (sin_t):
    703   8348        Eric 		sin = (sin_t *)sa;
    704   8348        Eric 
    705      0      stevel 		v4dst = sin->sin_addr.s_addr;
    706  11042        Erik 		dstport = sin->sin_port;
    707  11042        Erik 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
    708  11042        Erik 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
    709  11042        Erik 		break;
    710  11042        Erik 
    711  11042        Erik 	case sizeof (sin6_t):
    712  11042        Erik 		sin6 = (sin6_t *)sa;
    713  11042        Erik 
    714  11042        Erik 		/* No support for mapped addresses on raw sockets */
    715  11042        Erik 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
    716  11042        Erik 			return (EADDRNOTAVAIL);
    717  11042        Erik 		}
    718  11042        Erik 		v6dst = sin6->sin6_addr;
    719  11042        Erik 		dstport = sin6->sin6_port;
    720  11042        Erik 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
    721  11042        Erik 		flowinfo = sin6->sin6_flowinfo;
    722  11042        Erik 		if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
    723  11042        Erik 			scopeid = sin6->sin6_scope_id;
    724  11042        Erik 		srcid = sin6->__sin6_src_id;
    725  11042        Erik 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
    726  11042        Erik 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
    727  11042        Erik 			    connp->conn_netstack);
    728  11042        Erik 		}
    729  11042        Erik 		break;
    730  11042        Erik 	}
    731  11042        Erik 
    732  11042        Erik 	/*
    733  11042        Erik 	 * If there is a different thread using conn_ixa then we get a new
    734  11042        Erik 	 * copy and cut the old one loose from conn_ixa. Otherwise we use
    735  11042        Erik 	 * conn_ixa and prevent any other thread from using/changing it.
    736  11042        Erik 	 * Once connect() is done other threads can use conn_ixa since the
    737  11042        Erik 	 * refcnt will be back at one.
    738  11042        Erik 	 */
    739  11042        Erik 	ixa = conn_get_ixa(connp, B_TRUE);
    740  11042        Erik 	if (ixa == NULL)
    741  11042        Erik 		return (ENOMEM);
    742  11042        Erik 
    743  11042        Erik 	ASSERT(ixa->ixa_refcnt >= 2);
    744  11042        Erik 	ASSERT(ixa == connp->conn_ixa);
    745  11042        Erik 
    746  11042        Erik 	mutex_enter(&connp->conn_lock);
    747  11042        Erik 	/*
    748  11042        Erik 	 * This icmp_t must have bound already before doing a connect.
    749  11042        Erik 	 * Reject if a connect is in progress (we drop conn_lock during
    750  11042        Erik 	 * rawip_do_connect).
    751  11042        Erik 	 */
    752  11042        Erik 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_state == TS_WCON_CREQ) {
    753  11042        Erik 		mutex_exit(&connp->conn_lock);
    754  11042        Erik 		ixa_refrele(ixa);
    755  11042        Erik 		return (-TOUTSTATE);
    756  11042        Erik 	}
    757  11042        Erik 
    758  11042        Erik 	if (icmp->icmp_state == TS_DATA_XFER) {
    759  11042        Erik 		/* Already connected - clear out state */
    760  11042        Erik 		if (connp->conn_mcbc_bind)
    761  11042        Erik 			connp->conn_saddr_v6 = ipv6_all_zeros;
    762  11042        Erik 		else
    763  11042        Erik 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
    764  11042        Erik 		connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
    765  11042        Erik 		connp->conn_faddr_v6 = ipv6_all_zeros;
    766  11042        Erik 		icmp->icmp_state = TS_IDLE;
    767  11042        Erik 	}
    768  11042        Erik 
    769  11042        Erik 	/*
    770  11042        Erik 	 * Use sin_port/sin6_port since applications like psh use SOCK_RAW
    771  11042        Erik 	 * with IPPROTO_TCP.
    772  11042        Erik 	 */
    773  11042        Erik 	connp->conn_fport = dstport;
    774  11042        Erik 	if (connp->conn_ipversion == IPV4_VERSION) {
    775   8348        Eric 		/*
    776   8348        Eric 		 * Interpret a zero destination to mean loopback.
    777   8348        Eric 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
    778   8348        Eric 		 * generate the T_CONN_CON.
    779   8348        Eric 		 */
    780   8348        Eric 		if (v4dst == INADDR_ANY) {
    781   8348        Eric 			v4dst = htonl(INADDR_LOOPBACK);
    782  11042        Erik 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
    783  11042        Erik 			ASSERT(connp->conn_family == AF_INET);
    784  11042        Erik 			sin->sin_addr.s_addr = v4dst;
    785  11042        Erik 		}
    786  11042        Erik 		connp->conn_faddr_v6 = v6dst;
    787  11042        Erik 		connp->conn_flowinfo = 0;
    788  11042        Erik 	} else {
    789  11042        Erik 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
    790      0      stevel 		/*
    791      0      stevel 		 * Interpret a zero destination to mean loopback.
    792      0      stevel 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
    793      0      stevel 		 * generate the T_CONN_CON.
    794      0      stevel 		 */
    795  11042        Erik 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
    796  11042        Erik 			v6dst = ipv6_loopback;
    797  11042        Erik 			sin6->sin6_addr = v6dst;
    798  11042        Erik 		}
    799  11042        Erik 		connp->conn_faddr_v6 = v6dst;
    800  11042        Erik 		connp->conn_flowinfo = flowinfo;
    801  11042        Erik 	}
    802  11042        Erik 
    803  11042        Erik 	ixa->ixa_cred = cr;
    804  11042        Erik 	ixa->ixa_cpid = pid;
    805  11042        Erik 	if (is_system_labeled()) {
    806  11042        Erik 		/* We need to restart with a label based on the cred */
    807  11042        Erik 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
    808  11042        Erik 	}
    809  11042        Erik 
    810  11042        Erik 	if (scopeid != 0) {
    811  11042        Erik 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
    812  11042        Erik 		ixa->ixa_scopeid = scopeid;
    813  11042        Erik 		connp->conn_incoming_ifindex = scopeid;
    814  11042        Erik 	} else {
    815  11042        Erik 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
    816  11042        Erik 		connp->conn_incoming_ifindex = connp->conn_bound_if;
    817  11042        Erik 	}
    818  11042        Erik 
    819  11042        Erik 	/*
    820  11042        Erik 	 * conn_connect will drop conn_lock and reacquire it.
    821  11042        Erik 	 * To prevent a send* from messing with this icmp_t while the lock
    822  11042        Erik 	 * is dropped we set icmp_state and clear conn_v6lastdst.
    823  11042        Erik 	 * That will make all send* fail with EISCONN.
    824  11042        Erik 	 */
    825  11042        Erik 	connp->conn_v6lastdst = ipv6_all_zeros;
    826  11042        Erik 	icmp->icmp_state = TS_WCON_CREQ;
    827  11042        Erik 
    828  11042        Erik 	error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
    829  11042        Erik 	mutex_exit(&connp->conn_lock);
    830  11042        Erik 	if (error != 0)
    831  11042        Erik 		goto connect_failed;
    832  11042        Erik 
    833  11042        Erik 	/*
    834  11042        Erik 	 * The addresses have been verified. Time to insert in
    835  11042        Erik 	 * the correct fanout list.
    836  11042        Erik 	 */
    837  11042        Erik 	error = ipcl_conn_insert(connp);
    838  11042        Erik 	if (error != 0)
    839  11042        Erik 		goto connect_failed;
    840  11042        Erik 
    841  11042        Erik 	mutex_enter(&connp->conn_lock);
    842  11042        Erik 	error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
    843  11042        Erik 	    &connp->conn_faddr_v6, connp->conn_flowinfo);
    844  11042        Erik 	if (error != 0) {
    845  11042        Erik 		mutex_exit(&connp->conn_lock);
    846  11042        Erik 		goto connect_failed;
    847   5240    nordmark 	}
    848   5240    nordmark 
    849      0      stevel 	icmp->icmp_state = TS_DATA_XFER;
    850  11042        Erik 	/* Record this as the "last" send even though we haven't sent any */
    851  11042        Erik 	connp->conn_v6lastdst = connp->conn_faddr_v6;
    852  11042        Erik 	connp->conn_lastipversion = connp->conn_ipversion;
    853  11042        Erik 	connp->conn_lastdstport = connp->conn_fport;
    854  11042        Erik 	connp->conn_lastflowinfo = connp->conn_flowinfo;
    855  11042        Erik 	connp->conn_lastscopeid = scopeid;
    856  11042        Erik 	connp->conn_lastsrcid = srcid;
    857  11042        Erik 	/* Also remember a source to use together with lastdst */
    858  11042        Erik 	connp->conn_v6lastsrc = v6src;
    859  11042        Erik 	mutex_exit(&connp->conn_lock);
    860  11042        Erik 
    861  11042        Erik 	ixa_refrele(ixa);
    862  11042        Erik 	return (0);
    863  11042        Erik 
    864  11042        Erik connect_failed:
    865  11042        Erik 	if (ixa != NULL)
    866  11042        Erik 		ixa_refrele(ixa);
    867  11042        Erik 	mutex_enter(&connp->conn_lock);
    868  11042        Erik 	icmp->icmp_state = TS_IDLE;
    869  11042        Erik 	/* In case the source address was set above */
    870  11042        Erik 	if (connp->conn_mcbc_bind)
    871  11042        Erik 		connp->conn_saddr_v6 = ipv6_all_zeros;
    872  11042        Erik 	else
    873  11042        Erik 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
    874  11042        Erik 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
    875  11042        Erik 	connp->conn_faddr_v6 = ipv6_all_zeros;
    876  11042        Erik 	connp->conn_v6lastdst = ipv6_all_zeros;
    877  11042        Erik 	connp->conn_flowinfo = 0;
    878  11042        Erik 
    879  11042        Erik 	(void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
    880  11042        Erik 	    &connp->conn_faddr_v6, connp->conn_flowinfo);
    881  11042        Erik 	mutex_exit(&connp->conn_lock);
    882  11042        Erik 	return (error);
    883  11042        Erik }
    884  11042        Erik 
    885  11042        Erik static void
    886   8348        Eric rawip_do_close(conn_t *connp)
    887   8348        Eric {
    888   5240    nordmark 	ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
    889   5240    nordmark 
    890   5240    nordmark 	ip_quiesce_conn(connp);
    891   5240    nordmark 
    892   8348        Eric 	if (!IPCL_IS_NONSTR(connp)) {
    893   8348        Eric 		qprocsoff(connp->conn_rq);
    894   8348        Eric 	}
    895   8348        Eric 
    896   5240    nordmark 	icmp_close_free(connp);
    897   5240    nordmark 
    898   5240    nordmark 	/*
    899   5240    nordmark 	 * Now we are truly single threaded on this stream, and can
    900   5240    nordmark 	 * delete the things hanging off the connp, and finally the connp.
    901   5240    nordmark 	 * We removed this connp from the fanout list, it cannot be
    902   5240    nordmark 	 * accessed thru the fanouts, and we already waited for the
    903   5240    nordmark 	 * conn_ref to drop to 0. We are already in close, so
    904   5240    nordmark 	 * there cannot be any other thread from the top. qprocsoff
    905   5240    nordmark 	 * has completed, and service has completed or won't run in
    906   5240    nordmark 	 * future.
    907   5240    nordmark 	 */
    908   5240    nordmark 	ASSERT(connp->conn_ref == 1);
    909   5240    nordmark 
    910   8348        Eric 	if (!IPCL_IS_NONSTR(connp)) {
    911   8348        Eric 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
    912   8348        Eric 	} else {
    913   8477         Rao 		ip_free_helper_stream(connp);
    914   8348        Eric 	}
    915   5240    nordmark 
    916   5240    nordmark 	connp->conn_ref--;
    917   5240    nordmark 	ipcl_conn_destroy(connp);
    918   8348        Eric }
    919   8348        Eric 
    920   8348        Eric static int
    921   8348        Eric icmp_close(queue_t *q, int flags)
    922   8348        Eric {
    923   8348        Eric 	conn_t  *connp;
    924   8348        Eric 
    925   8348        Eric 	if (flags & SO_FALLBACK) {
    926   8348        Eric 		/*
    927   8348        Eric 		 * stream is being closed while in fallback
    928   8348        Eric 		 * simply free the resources that were allocated
    929   8348        Eric 		 */
    930   8348        Eric 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
    931   8348        Eric 		qprocsoff(q);
    932   8348        Eric 		goto done;
    933   8348        Eric 	}
    934   8348        Eric 
    935   8348        Eric 	connp = Q_TO_CONN(q);
    936   8348        Eric 	(void) rawip_do_close(connp);
    937   8348        Eric done:
    938   5240    nordmark 	q->q_ptr = WR(q)->q_ptr = NULL;
    939   5240    nordmark 	return (0);
    940      0      stevel }
    941      0      stevel 
    942  11042        Erik static void
    943  11042        Erik icmp_close_free(conn_t *connp)
    944  11042        Erik {
    945  11042        Erik 	icmp_t *icmp = connp->conn_icmp;
    946  11042        Erik 
    947  11042        Erik 	if (icmp->icmp_filter != NULL) {
    948  11042        Erik 		kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
    949  11042        Erik 		icmp->icmp_filter = NULL;
    950  11042        Erik 	}
    951  11042        Erik 
    952  11042        Erik 	/*
    953  11042        Erik 	 * Clear any fields which the kmem_cache constructor clears.
    954  11042        Erik 	 * Only icmp_connp needs to be preserved.
    955  11042        Erik 	 * TBD: We should make this more efficient to avoid clearing
    956  11042        Erik 	 * everything.
    957  11042        Erik 	 */
    958  11042        Erik 	ASSERT(icmp->icmp_connp == connp);
    959  11042        Erik 	bzero(icmp, sizeof (icmp_t));
    960  11042        Erik 	icmp->icmp_connp = connp;
    961  11042        Erik }
    962  11042        Erik 
    963      0      stevel /*
    964      0      stevel  * This routine handles each T_DISCON_REQ message passed to icmp
    965      0      stevel  * as an indicating that ICMP is no longer connected. This results
    966  11042        Erik  * in telling IP to restore the binding to just the local address.
    967   8348        Eric  */
    968   8348        Eric static int
    969   8348        Eric icmp_do_disconnect(conn_t *connp)
    970   8348        Eric {
    971  11042        Erik 	icmp_t	*icmp = connp->conn_icmp;
    972  11042        Erik 	int	error;
    973  11042        Erik 
    974  11042        Erik 	mutex_enter(&connp->conn_lock);
    975  11042        Erik 	if (icmp->icmp_state != TS_DATA_XFER) {
    976  11042        Erik 		mutex_exit(&connp->conn_lock);
    977   8348        Eric 		return (-TOUTSTATE);
    978      0      stevel 	}
    979  11042        Erik 	if (connp->conn_mcbc_bind)
    980  11042        Erik 		connp->conn_saddr_v6 = ipv6_all_zeros;
    981  11042        Erik 	else
    982  11042        Erik 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
    983  11042        Erik 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
    984  11042        Erik 	connp->conn_faddr_v6 = ipv6_all_zeros;
    985      0      stevel 	icmp->icmp_state = TS_IDLE;
    986      0      stevel 
    987  11042        Erik 	connp->conn_v6lastdst = ipv6_all_zeros;
    988  11042        Erik 	error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
    989  11042        Erik 	    &connp->conn_faddr_v6, connp->conn_flowinfo);
    990  11042        Erik 	mutex_exit(&connp->conn_lock);
    991  11042        Erik 	if (error != 0)
    992  11042        Erik 		return (error);
    993  11042        Erik 
    994  11042        Erik 	/*
    995  11042        Erik 	 * Tell IP to remove the full binding and revert
    996  11042        Erik 	 * to the local address binding.
    997  11042        Erik 	 */
    998  11042        Erik 	return (ip_laddr_fanout_insert(connp));
    999   8348        Eric }
   1000   8348        Eric 
   1001   8348        Eric static void
   1002   8348        Eric icmp_tpi_disconnect(queue_t *q, mblk_t *mp)
   1003   8348        Eric {
   1004   8348        Eric 	conn_t	*connp = Q_TO_CONN(q);
   1005   8348        Eric 	int	error;
   1006   8348        Eric 
   1007   8348        Eric 	/*
   1008   8348        Eric 	 * Allocate the largest primitive we need to send back
   1009   8348        Eric 	 * T_error_ack is > than T_ok_ack
   1010   8348        Eric 	 */
   1011   8348        Eric 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
   1012   8348        Eric 	if (mp == NULL) {
   1013   8348        Eric 		/* Unable to reuse the T_DISCON_REQ for the ack. */
   1014   8348        Eric 		icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
   1015   8348        Eric 		return;
   1016   8348        Eric 	}
   1017   8348        Eric 
   1018   8348        Eric 	error = icmp_do_disconnect(connp);
   1019   8348        Eric 
   1020   8348        Eric 	if (error != 0) {
   1021   8348        Eric 		if (error > 0) {
   1022   8348        Eric 			icmp_err_ack(q, mp, 0, error);
   1023   8348        Eric 		} else {
   1024   8348        Eric 			icmp_err_ack(q, mp, -error, 0);
   1025   8348        Eric 		}
   1026   8348        Eric 	} else {
   1027   8348        Eric 		mp = mi_tpi_ok_ack_alloc(mp);
   1028   8348        Eric 		ASSERT(mp != NULL);
   1029   8348        Eric 		qreply(q, mp);
   1030   8348        Eric 	}
   1031   8348        Eric }
   1032   8348        Eric 
   1033   8348        Eric static int
   1034   8348        Eric icmp_disconnect(conn_t *connp)
   1035   8348        Eric {
   1036   8348        Eric 	int	error;
   1037  11042        Erik 
   1038  11042        Erik 	connp->conn_dgram_errind = B_FALSE;
   1039   8348        Eric 
   1040   8348        Eric 	error = icmp_do_disconnect(connp);
   1041   8348        Eric 
   1042   8348        Eric 	if (error < 0)
   1043   8348        Eric 		error = proto_tlitosyserr(-error);
   1044   8348        Eric 	return (error);
   1045      0      stevel }
   1046      0      stevel 
   1047      0      stevel /* This routine creates a T_ERROR_ACK message and passes it upstream. */
   1048      0      stevel static void
   1049      0      stevel icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
   1050      0      stevel {
   1051      0      stevel 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
   1052      0      stevel 		qreply(q, mp);
   1053      0      stevel }
   1054      0      stevel 
   1055      0      stevel /* Shorthand to generate and send TPI error acks to our client */
   1056      0      stevel static void
   1057      0      stevel icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
   1058      0      stevel     t_scalar_t t_error, int sys_error)
   1059      0      stevel {
   1060      0      stevel 	struct T_error_ack	*teackp;
   1061      0      stevel 
   1062      0      stevel 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
   1063      0      stevel 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
   1064      0      stevel 		teackp = (struct T_error_ack *)mp->b_rptr;
   1065      0      stevel 		teackp->ERROR_prim = primitive;
   1066      0      stevel 		teackp->TLI_error = t_error;
   1067      0      stevel 		teackp->UNIX_error = sys_error;
   1068      0      stevel 		qreply(q, mp);
   1069      0      stevel 	}
   1070      0      stevel }
   1071      0      stevel 
   1072      0      stevel /*
   1073  11042        Erik  * icmp_icmp_input is called as conn_recvicmp to process ICMP messages.
   1074  11042        Erik  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
   1075  11042        Erik  * Assumes that IP has pulled up everything up to and including the ICMP header.
   1076  11042        Erik  */
   1077  11042        Erik /* ARGSUSED2 */
   1078  11042        Erik static void
   1079  11042        Erik icmp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
   1080  11042        Erik {
   1081  11042        Erik 	conn_t		*connp = (conn_t *)arg1;
   1082  11042        Erik 	icmp_t		*icmp = connp->conn_icmp;
   1083  11042        Erik 	icmph_t		*icmph;
   1084  11042        Erik 	ipha_t		*ipha;
   1085  11042        Erik 	int		iph_hdr_length;
   1086  11042        Erik 	sin_t		sin;
   1087  11042        Erik 	mblk_t		*mp1;
   1088  11042        Erik 	int		error = 0;
   1089      0      stevel 
   1090      0      stevel 	ipha = (ipha_t *)mp->b_rptr;
   1091   5240    nordmark 
   1092   5240    nordmark 	ASSERT(OK_32PTR(mp->b_rptr));
   1093      0      stevel 
   1094      0      stevel 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
   1095      0      stevel 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
   1096  11042        Erik 		icmp_icmp_error_ipv6(connp, mp, ira);
   1097  11042        Erik 		return;
   1098  11042        Erik 	}
   1099  11042        Erik 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
   1100      0      stevel 
   1101   5240    nordmark 	/* Skip past the outer IP and ICMP headers */
   1102  11042        Erik 	ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
   1103  11042        Erik 	iph_hdr_length = ira->ira_ip_hdr_length;
   1104  11042        Erik 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
   1105  11042        Erik 	ipha = (ipha_t *)&icmph[1];	/* Inner IP header */
   1106  11042        Erik 
   1107      0      stevel 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
   1108      0      stevel 
   1109      0      stevel 	switch (icmph->icmph_type) {
   1110      0      stevel 	case ICMP_DEST_UNREACHABLE:
   1111      0      stevel 		switch (icmph->icmph_code) {
   1112  11042        Erik 		case ICMP_FRAGMENTATION_NEEDED: {
   1113  11042        Erik 			ipha_t		*ipha;
   1114  11042        Erik 			ip_xmit_attr_t	*ixa;
   1115      0      stevel 			/*
   1116      0      stevel 			 * IP has already adjusted the path MTU.
   1117  11042        Erik 			 * But we need to adjust DF for IPv4.
   1118  11042        Erik 			 */
   1119  11042        Erik 			if (connp->conn_ipversion != IPV4_VERSION)
   1120  11042        Erik 				break;
   1121  11042        Erik 
   1122  11042        Erik 			ixa = conn_get_ixa(connp, B_FALSE);
   1123  11042        Erik 			if (ixa == NULL || ixa->ixa_ire == NULL) {
   1124  11042        Erik 				/*
   1125  11042        Erik 				 * Some other thread holds conn_ixa. We will
   1126  11042        Erik 				 * redo this on the next ICMP too big.
   1127  11042        Erik 				 */
   1128  11042        Erik 				if (ixa != NULL)
   1129  11042        Erik 					ixa_refrele(ixa);
   1130  11042        Erik 				break;
   1131  11042        Erik 			}
   1132  11042        Erik 			(void) ip_get_pmtu(ixa);
   1133  11042        Erik 
   1134  11042        Erik 			mutex_enter(&connp->conn_lock);
   1135  11042        Erik 			ipha = (ipha_t *)connp->conn_ht_iphc;
   1136  11042        Erik 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
   1137  11042        Erik 				ipha->ipha_fragment_offset_and_flags |=
   1138  11042        Erik 				    IPH_DF_HTONS;
   1139  11042        Erik 			} else {
   1140  11042        Erik 				ipha->ipha_fragment_offset_and_flags &=
   1141  11042        Erik 				    ~IPH_DF_HTONS;
   1142  11042        Erik 			}
   1143  11042        Erik 			mutex_exit(&connp->conn_lock);
   1144  11042        Erik 			ixa_refrele(ixa);
   1145  11042        Erik 			break;
   1146  11042        Erik 		}
   1147      0      stevel 		case ICMP_PORT_UNREACHABLE:
   1148      0      stevel 		case ICMP_PROTOCOL_UNREACHABLE:
   1149      0      stevel 			error = ECONNREFUSED;
   1150      0      stevel 			break;
   1151      0      stevel 		default:
   1152      0      stevel 			/* Transient errors */
   1153      0      stevel 			break;
   1154      0      stevel 		}
   1155      0      stevel 		break;
   1156      0      stevel 	default:
   1157      0      stevel 		/* Transient errors */
   1158      0      stevel 		break;
   1159      0      stevel 	}
   1160      0      stevel 	if (error == 0) {
   1161   5240    nordmark 		freemsg(mp);
   1162   5240    nordmark 		return;
   1163   5240    nordmark 	}
   1164   5240    nordmark 
   1165   5240    nordmark 	/*
   1166   5240    nordmark 	 * Deliver T_UDERROR_IND when the application has asked for it.
   1167   5240    nordmark 	 * The socket layer enables this automatically when connected.
   1168   5240    nordmark 	 */
   1169  11042        Erik 	if (!connp->conn_dgram_errind) {
   1170      0      stevel 		freemsg(mp);
   1171      0      stevel 		return;
   1172      0      stevel 	}
   1173      0      stevel 
   1174   8348        Eric 	sin = sin_null;
   1175   8348        Eric 	sin.sin_family = AF_INET;
   1176   8348        Eric 	sin.sin_addr.s_addr = ipha->ipha_dst;
   1177   8963      Anders 
   1178   8348        Eric 	if (IPCL_IS_NONSTR(connp)) {
   1179  11042        Erik 		mutex_enter(&connp->conn_lock);
   1180   8348        Eric 		if (icmp->icmp_state == TS_DATA_XFER) {
   1181  11042        Erik 			if (sin.sin_addr.s_addr == connp->conn_faddr_v4) {
   1182  11042        Erik 				mutex_exit(&connp->conn_lock);
   1183   8348        Eric 				(*connp->conn_upcalls->su_set_error)
   1184   8348        Eric 				    (connp->conn_upper_handle, error);
   1185   8348        Eric 				goto done;
   1186   8348        Eric 			}
   1187   8348        Eric 		} else {
   1188   8348        Eric 			icmp->icmp_delayed_error = error;
   1189   8348        Eric 			*((sin_t *)&icmp->icmp_delayed_addr) = sin;
   1190   8348        Eric 		}
   1191  11042        Erik 		mutex_exit(&connp->conn_lock);
   1192  11042        Erik 	} else {
   1193  11042        Erik 		mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0,
   1194  11042        Erik 		    error);
   1195   8348        Eric 		if (mp1 != NULL)
   1196   8348        Eric 			putnext(connp->conn_rq, mp1);
   1197   8348        Eric 	}
   1198   8348        Eric done:
   1199      0      stevel 	freemsg(mp);
   1200      0      stevel }
   1201      0      stevel 
   1202      0      stevel /*
   1203  11042        Erik  * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMP for IPv6.
   1204  11042        Erik  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
   1205  11042        Erik  * Assumes that IP has pulled up all the extension headers as well as the
   1206  11042        Erik  * ICMPv6 header.
   1207  11042        Erik  */
   1208  11042        Erik static void
   1209  11042        Erik icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
   1210      0      stevel {
   1211      0      stevel 	icmp6_t		*icmp6;
   1212      0      stevel 	ip6_t		*ip6h, *outer_ip6h;
   1213      0      stevel 	uint16_t	iph_hdr_length;
   1214      0      stevel 	uint8_t		*nexthdrp;
   1215      0      stevel 	sin6_t		sin6;
   1216      0      stevel 	mblk_t		*mp1;
   1217      0      stevel 	int		error = 0;
   1218   8348        Eric 	icmp_t		*icmp = connp->conn_icmp;
   1219      0      stevel 
   1220      0      stevel 	outer_ip6h = (ip6_t *)mp->b_rptr;
   1221  11042        Erik #ifdef DEBUG
   1222      0      stevel 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
   1223      0      stevel 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
   1224      0      stevel 	else
   1225      0      stevel 		iph_hdr_length = IPV6_HDR_LEN;
   1226  11042        Erik 	ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
   1227  11042        Erik #endif
   1228  11042        Erik 	/* Skip past the outer IP and ICMP headers */
   1229  11042        Erik 	iph_hdr_length = ira->ira_ip_hdr_length;
   1230      0      stevel 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
   1231  11042        Erik 
   1232  11042        Erik 	ip6h = (ip6_t *)&icmp6[1];	/* Inner IP header */
   1233      0      stevel 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
   1234      0      stevel 		freemsg(mp);
   1235      0      stevel 		return;
   1236      0      stevel 	}
   1237   5240    nordmark 
   1238      0      stevel 	switch (icmp6->icmp6_type) {
   1239      0      stevel 	case ICMP6_DST_UNREACH:
   1240      0      stevel 		switch (icmp6->icmp6_code) {
   1241      0      stevel 		case ICMP6_DST_UNREACH_NOPORT:
   1242      0      stevel 			error = ECONNREFUSED;
   1243      0      stevel 			break;
   1244      0      stevel 		case ICMP6_DST_UNREACH_ADMIN:
   1245      0      stevel 		case ICMP6_DST_UNREACH_NOROUTE:
   1246      0      stevel 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
   1247      0      stevel 		case ICMP6_DST_UNREACH_ADDR:
   1248      0      stevel 			/* Transient errors */
   1249      0      stevel 			break;
   1250      0      stevel 		default:
   1251      0      stevel 			break;
   1252      0      stevel 		}
   1253      0      stevel 		break;
   1254      0      stevel 	case ICMP6_PACKET_TOO_BIG: {
   1255      0      stevel 		struct T_unitdata_ind	*tudi;
   1256      0      stevel 		struct T_opthdr		*toh;
   1257      0      stevel 		size_t			udi_size;
   1258      0      stevel 		mblk_t			*newmp;
   1259      0      stevel 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
   1260      0      stevel 		    sizeof (struct ip6_mtuinfo);
   1261      0      stevel 		sin6_t			*sin6;
   1262      0      stevel 		struct ip6_mtuinfo	*mtuinfo;
   1263      0      stevel 
   1264      0      stevel 		/*
   1265      0      stevel 		 * If the application has requested to receive path mtu
   1266      0      stevel 		 * information, send up an empty message containing an
   1267      0      stevel 		 * IPV6_PATHMTU ancillary data item.
   1268      0      stevel 		 */
   1269  11042        Erik 		if (!connp->conn_ipv6_recvpathmtu)
   1270      0      stevel 			break;
   1271      0      stevel 
   1272      0      stevel 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
   1273      0      stevel 		    opt_length;
   1274      0      stevel 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
   1275   5240    nordmark 			BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
   1276      0      stevel 			break;
   1277      0      stevel 		}
   1278      0      stevel 
   1279      0      stevel 		/*
   1280      0      stevel 		 * newmp->b_cont is left to NULL on purpose.  This is an
   1281      0      stevel 		 * empty message containing only ancillary data.
   1282      0      stevel 		 */
   1283      0      stevel 		newmp->b_datap->db_type = M_PROTO;
   1284      0      stevel 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
   1285      0      stevel 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
   1286      0      stevel 		tudi->PRIM_type = T_UNITDATA_IND;
   1287      0      stevel 		tudi->SRC_length = sizeof (sin6_t);
   1288      0      stevel 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
   1289      0      stevel 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
   1290      0      stevel 		tudi->OPT_length = opt_length;
   1291      0      stevel 
   1292      0      stevel 		sin6 = (sin6_t *)&tudi[1];
   1293      0      stevel 		bzero(sin6, sizeof (sin6_t));
   1294      0      stevel 		sin6->sin6_family = AF_INET6;
   1295  11042        Erik 		sin6->sin6_addr = connp->conn_faddr_v6;
   1296      0      stevel 
   1297      0      stevel 		toh = (struct T_opthdr *)&sin6[1];
   1298      0      stevel 		toh->level = IPPROTO_IPV6;
   1299      0      stevel 		toh->name = IPV6_PATHMTU;
   1300      0      stevel 		toh->len = opt_length;
   1301      0      stevel 		toh->status = 0;
   1302      0      stevel 
   1303      0      stevel 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
   1304      0      stevel 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
   1305      0      stevel 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
   1306      0      stevel 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
   1307      0      stevel 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
   1308      0      stevel 		/*
   1309      0      stevel 		 * We've consumed everything we need from the original
   1310      0      stevel 		 * message.  Free it, then send our empty message.
   1311      0      stevel 		 */
   1312      0      stevel 		freemsg(mp);
   1313  11042        Erik 		icmp_ulp_recv(connp, newmp, msgdsize(newmp));
   1314      0      stevel 		return;
   1315      0      stevel 	}
   1316      0      stevel 	case ICMP6_TIME_EXCEEDED:
   1317      0      stevel 		/* Transient errors */
   1318      0      stevel 		break;
   1319      0      stevel 	case ICMP6_PARAM_PROB:
   1320      0      stevel 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
   1321      0      stevel 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
   1322      0      stevel 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
   1323      0      stevel 		    (uchar_t *)nexthdrp) {
   1324      0      stevel 			error = ECONNREFUSED;
   1325      0      stevel 			break;
   1326      0      stevel 		}
   1327      0      stevel 		break;
   1328      0      stevel 	}
   1329      0      stevel 	if (error == 0) {
   1330      0      stevel 		freemsg(mp);
   1331      0      stevel 		return;
   1332      0      stevel 	}
   1333      0      stevel 
   1334   5240    nordmark 	/*
   1335   5240    nordmark 	 * Deliver T_UDERROR_IND when the application has asked for it.
   1336   5240    nordmark 	 * The socket layer enables this automatically when connected.
   1337   5240    nordmark 	 */
   1338  11042        Erik 	if (!connp->conn_dgram_errind) {
   1339   5240    nordmark 		freemsg(mp);
   1340   5240    nordmark 		return;
   1341   5240    nordmark 	}
   1342   5240    nordmark 
   1343      0      stevel 	sin6 = sin6_null;
   1344      0      stevel 	sin6.sin6_family = AF_INET6;
   1345      0      stevel 	sin6.sin6_addr = ip6h->ip6_dst;
   1346      0      stevel 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
   1347   8348        Eric 	if (IPCL_IS_NONSTR(connp)) {
   1348  11042        Erik 		mutex_enter(&connp->conn_lock);
   1349   8348        Eric 		if (icmp->icmp_state == TS_DATA_XFER) {
   1350   8348        Eric 			if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
   1351  11042        Erik 			    &connp->conn_faddr_v6)) {
   1352  11042        Erik 				mutex_exit(&connp->conn_lock);
   1353   8348        Eric 				(*connp->conn_upcalls->su_set_error)
   1354   8348        Eric 				    (connp->conn_upper_handle, error);
   1355   8348        Eric 				goto done;
   1356   8348        Eric 			}
   1357   8348        Eric 		} else {
   1358   8348        Eric 			icmp->icmp_delayed_error = error;
   1359   8348        Eric 			*((sin6_t *)&icmp->icmp_delayed_addr) = sin6;
   1360   8348        Eric 		}
   1361  11042        Erik 		mutex_exit(&connp->conn_lock);
   1362   8348        Eric 	} else {
   1363   8348        Eric 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
   1364   8348        Eric 		    NULL, 0, error);
   1365   8348        Eric 		if (mp1 != NULL)
   1366   8348        Eric 			putnext(connp->conn_rq, mp1);
   1367   8348        Eric 	}
   1368   8348        Eric done:
   1369      0      stevel 	freemsg(mp);
   1370      0      stevel }
   1371      0      stevel 
   1372      0      stevel /*
   1373      0      stevel  * This routine responds to T_ADDR_REQ messages.  It is called by icmp_wput.
   1374      0      stevel  * The local address is filled in if endpoint is bound. The remote address
   1375      0      stevel  * is filled in if remote address has been precified ("connected endpoint")
   1376      0      stevel  * (The concept of connected CLTS sockets is alien to published TPI
   1377      0      stevel  *  but we support it anyway).
   1378      0      stevel  */
   1379      0      stevel static void
   1380      0      stevel icmp_addr_req(queue_t *q, mblk_t *mp)
   1381      0      stevel {
   1382  11042        Erik 	struct sockaddr *sa;
   1383      0      stevel 	mblk_t	*ackmp;
   1384      0      stevel 	struct T_addr_ack *taa;
   1385  11042        Erik 	icmp_t	*icmp = Q_TO_ICMP(q);
   1386  11042        Erik 	conn_t	*connp = icmp->icmp_connp;
   1387  11042        Erik 	uint_t	addrlen;
   1388      0      stevel 
   1389      0      stevel 	/* Make it large enough for worst case */
   1390      0      stevel 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
   1391      0      stevel 	    2 * sizeof (sin6_t), 1);
   1392      0      stevel 	if (ackmp == NULL) {
   1393      0      stevel 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
   1394      0      stevel 		return;
   1395      0      stevel 	}
   1396      0      stevel 	taa = (struct T_addr_ack *)ackmp->b_rptr;
   1397      0      stevel 
   1398      0      stevel 	bzero(taa, sizeof (struct T_addr_ack));
   1399      0      stevel 	ackmp->b_wptr = (uchar_t *)&taa[1];
   1400      0      stevel 
   1401      0      stevel 	taa->PRIM_type = T_ADDR_ACK;
   1402      0      stevel 	ackmp->b_datap->db_type = M_PCPROTO;
   1403  11042        Erik 
   1404  11042        Erik 	if (connp->conn_family == AF_INET)
   1405  11042        Erik 		addrlen = sizeof (sin_t);
   1406  11042        Erik 	else
   1407  11042        Erik 		addrlen = sizeof (sin6_t);
   1408  11042        Erik 
   1409  11042        Erik 	mutex_enter(&connp->conn_lock);
   1410      0      stevel 	/*
   1411      0      stevel 	 * Note: Following code assumes 32 bit alignment of basic
   1412      0      stevel 	 * data structures like sin_t and struct T_addr_ack.
   1413      0      stevel 	 */
   1414      0      stevel 	if (icmp->icmp_state != TS_UNBND) {
   1415      0      stevel 		/*
   1416  11042        Erik 		 * Fill in local address first
   1417      0      stevel 		 */
   1418      0      stevel 		taa->LOCADDR_offset = sizeof (*taa);
   1419  11042        Erik 		taa->LOCADDR_length = addrlen;
   1420  11042        Erik 		sa = (struct sockaddr *)&taa[1];
   1421  11042        Erik 		(void) conn_getsockname(connp, sa, &addrlen);
   1422  11042        Erik 		ackmp->b_wptr += addrlen;
   1423  11042        Erik 	}
   1424  11042        Erik 	if (icmp->icmp_state == TS_DATA_XFER) {
   1425  11042        Erik 		/*
   1426  11042        Erik 		 * connected, fill remote address too
   1427  11042        Erik 		 */
   1428  11042        Erik 		taa->REMADDR_length = addrlen;
   1429  11042        Erik 		/* assumed 32-bit alignment */
   1430  11042        Erik 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
   1431  11042        Erik 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
   1432  11042        Erik 		(void) conn_getpeername(connp, sa, &addrlen);
   1433  11042        Erik 		ackmp->b_wptr += addrlen;
   1434  11042        Erik 	}
   1435  11042        Erik 	mutex_exit(&connp->conn_lock);
   1436      0      stevel 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
   1437      0      stevel 	qreply(q, ackmp);
   1438      0      stevel }
   1439      0      stevel 
   1440      0      stevel static void
   1441      0      stevel icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
   1442      0      stevel {
   1443  11042        Erik 	conn_t		*connp = icmp->icmp_connp;
   1444  11042        Erik 
   1445      0      stevel 	*tap = icmp_g_t_info_ack;
   1446      0      stevel 
   1447  11042        Erik 	if (connp->conn_family == AF_INET6)
   1448      0      stevel 		tap->ADDR_size = sizeof (sin6_t);
   1449      0      stevel 	else
   1450      0      stevel 		tap->ADDR_size = sizeof (sin_t);
   1451      0      stevel 	tap->CURRENT_state = icmp->icmp_state;
   1452      0      stevel 	tap->OPT_size = icmp_max_optsize;
   1453      0      stevel }
   1454      0      stevel 
   1455   8348        Eric static void
   1456   8348        Eric icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap,
   1457   8348        Eric     t_uscalar_t cap_bits1)
   1458   8348        Eric {
   1459   8348        Eric 	tcap->CAP_bits1 = 0;
   1460   8348        Eric 
   1461   8348        Eric 	if (cap_bits1 & TC1_INFO) {
   1462   8348        Eric 		icmp_copy_info(&tcap->INFO_ack, icmp);
   1463   8348        Eric 		tcap->CAP_bits1 |= TC1_INFO;
   1464   8348        Eric 	}
   1465   8348        Eric }
   1466   8348        Eric 
   1467      0      stevel /*
   1468      0      stevel  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
   1469      0      stevel  * icmp_wput.  Much of the T_CAPABILITY_ACK information is copied from
   1470      0      stevel  * icmp_g_t_info_ack.  The current state of the stream is copied from
   1471      0      stevel  * icmp_state.
   1472      0      stevel  */
   1473      0      stevel static void
   1474      0      stevel icmp_capability_req(queue_t *q, mblk_t *mp)
   1475      0      stevel {
   1476   5240    nordmark 	icmp_t			*icmp = Q_TO_ICMP(q);
   1477      0      stevel 	t_uscalar_t		cap_bits1;
   1478      0      stevel 	struct T_capability_ack	*tcap;
   1479      0      stevel 
   1480      0      stevel 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
   1481      0      stevel 
   1482      0      stevel 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
   1483   5240    nordmark 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
   1484      0      stevel 	if (!mp)
   1485      0      stevel 		return;
   1486      0      stevel 
   1487      0      stevel 	tcap = (struct T_capability_ack *)mp->b_rptr;
   1488   8348        Eric 
   1489   8348        Eric 	icmp_do_capability_ack(icmp, tcap, cap_bits1);
   1490      0      stevel 
   1491      0      stevel 	qreply(q, mp);
   1492      0      stevel }
   1493      0      stevel 
   1494      0      stevel /*
   1495      0      stevel  * This routine responds to T_INFO_REQ messages.  It is called by icmp_wput.
   1496      0      stevel  * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
   1497      0      stevel  * The current state of the stream is copied from icmp_state.
   1498      0      stevel  */
   1499      0      stevel static void
   1500      0      stevel icmp_info_req(queue_t *q, mblk_t *mp)
   1501      0      stevel {
   1502   5240    nordmark 	icmp_t	*icmp = Q_TO_ICMP(q);
   1503      0      stevel 
   1504  11042        Erik 	/* Create a T_INFO_ACK message. */
   1505      0      stevel 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
   1506      0      stevel 	    T_INFO_ACK);
   1507      0      stevel 	if (!mp)
   1508      0      stevel 		return;
   1509      0      stevel 	icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
   1510      0      stevel 	qreply(q, mp);
   1511      0      stevel }
   1512      0      stevel 
   1513   5240    nordmark static int
   1514   8348        Eric icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
   1515   8348        Eric     int family)
   1516   8348        Eric {
   1517   8348        Eric 	conn_t *connp;
   1518   8348        Eric 	dev_t	conn_dev;
   1519  11042        Erik 	int	error;
   1520   8348        Eric 
   1521   8348        Eric 	/* If the stream is already open, return immediately. */
   1522   8348        Eric 	if (q->q_ptr != NULL)
   1523   8348        Eric 		return (0);
   1524   8348        Eric 
   1525   8348        Eric 	if (sflag == MODOPEN)
   1526   8348        Eric 		return (EINVAL);
   1527   8348        Eric 
   1528   8348        Eric 	/*
   1529   8348        Eric 	 * Since ICMP is not used so heavily, allocating from the small
   1530   8348        Eric 	 * arena should be sufficient.
   1531   8348        Eric 	 */
   1532   8348        Eric 	if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
   1533   8348        Eric 		return (EBUSY);
   1534   8348        Eric 	}
   1535   8348        Eric 
   1536   8348        Eric 	if (flag & SO_FALLBACK) {
   1537   8348        Eric 		/*
   1538   8348        Eric 		 * Non streams socket needs a stream to fallback to
   1539   8348        Eric 		 */
   1540   8348        Eric 		RD(q)->q_ptr = (void *)conn_dev;
   1541   8348        Eric 		WR(q)->q_qinfo = &icmp_fallback_sock_winit;
   1542   8348        Eric 		WR(q)->q_ptr = (void *)ip_minor_arena_sa;
   1543   8348        Eric 		qprocson(q);
   1544   8348        Eric 		return (0);
   1545   8348        Eric 	}
   1546   8348        Eric 
   1547  11042        Erik 	connp = rawip_do_open(family, credp, &error, KM_SLEEP);
   1548   8348        Eric 	if (connp == NULL) {
   1549  11042        Erik 		ASSERT(error != 0);
   1550   8348        Eric 		inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
   1551   8348        Eric 		return (error);
   1552   8348        Eric 	}
   1553   8348        Eric 
   1554   8348        Eric 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
   1555   8348        Eric 	connp->conn_dev = conn_dev;
   1556   8348        Eric 	connp->conn_minor_arena = ip_minor_arena_sa;
   1557   8348        Eric 
   1558   8348        Eric 	/*
   1559   8348        Eric 	 * Initialize the icmp_t structure for this stream.
   1560   8348        Eric 	 */
   1561   8348        Eric 	q->q_ptr = connp;
   1562   8348        Eric 	WR(q)->q_ptr = connp;
   1563   8348        Eric 	connp->conn_rq = q;
   1564   8348        Eric 	connp->conn_wq = WR(q);
   1565   8348        Eric 
   1566  11042        Erik 	WR(q)->q_hiwat = connp->conn_sndbuf;
   1567  11042        Erik 	WR(q)->q_lowat = connp->conn_sndlowat;
   1568   8348        Eric 
   1569   8348        Eric 	qprocson(q);
   1570   8348        Eric 
   1571   8348        Eric 	/* Set the Stream head write offset. */
   1572  11042        Erik 	(void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
   1573  11042        Erik 	(void) proto_set_rx_hiwat(connp->conn_rq, connp, connp->conn_rcvbuf);
   1574   8348        Eric 
   1575   8348        Eric 	mutex_enter(&connp->conn_lock);
   1576   8348        Eric 	connp->conn_state_flags &= ~CONN_INCIPIENT;
   1577   8348        Eric 	mutex_exit(&connp->conn_lock);
   1578   8348        Eric 
   1579  11042        Erik 	icmp_bind_proto(connp->conn_icmp);
   1580  11042        Erik 
   1581  11042        Erik 	return (0);
   1582  11042        Erik }
   1583  11042        Erik 
   1584  11042        Erik /* For /dev/icmp aka AF_INET open */
   1585   8348        Eric static int
   1586   5240    nordmark icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
   1587   5240    nordmark {
   1588   8348        Eric 	return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET));
   1589   5240    nordmark }
   1590   5240    nordmark 
   1591   5240    nordmark /* For /dev/icmp6 aka AF_INET6 open */
   1592   5240    nordmark static int
   1593   5240    nordmark icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
   1594   5240    nordmark {
   1595   8348        Eric 	return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6));
   1596   1676         jpk }
   1597   1676         jpk 
   1598      0      stevel /*
   1599      0      stevel  * This is the open routine for icmp.  It allocates a icmp_t structure for
   1600      0      stevel  * the stream and, on the first open of the module, creates an ND table.
   1601      0      stevel  */
   1602   8348        Eric static conn_t *
   1603  11042        Erik rawip_do_open(int family, cred_t *credp, int *err, int flags)
   1604   8348        Eric {
   1605      0      stevel 	icmp_t	*icmp;
   1606   5240    nordmark 	conn_t *connp;
   1607   5240    nordmark 	zoneid_t zoneid;
   1608   3448    dh155122 	netstack_t *ns;
   1609   3448    dh155122 	icmp_stack_t *is;
   1610  11042        Erik 	int len;
   1611   8348        Eric 	boolean_t isv6 = B_FALSE;
   1612   8348        Eric 
   1613   8348        Eric 	*err = secpolicy_net_icmpaccess(credp);
   1614   8348        Eric 	if (*err != 0)
   1615   8348        Eric 		return (NULL);
   1616   8348        Eric 
   1617   8348        Eric 	if (family == AF_INET6)
   1618   8348        Eric 		isv6 = B_TRUE;
   1619  11042        Erik 
   1620   3448    dh155122 	ns = netstack_find_by_cred(credp);
   1621   3448    dh155122 	ASSERT(ns != NULL);
   1622   3448    dh155122 	is = ns->netstack_icmp;
   1623   3448    dh155122 	ASSERT(is != NULL);
   1624   3448    dh155122 
   1625   3448    dh155122 	/*
   1626   3448    dh155122 	 * For exclusive stacks we set the zoneid to zero
   1627   3448    dh155122 	 * to make ICMP operate as if in the global zone.
   1628   3448    dh155122 	 */
   1629   5240    nordmark 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
   1630   3448    dh155122 		zoneid = GLOBAL_ZONEID;
   1631   3448    dh155122 	else
   1632   3448    dh155122 		zoneid = crgetzoneid(credp);
   1633   3448    dh155122 
   1634   8348        Eric 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
   1635   8348        Eric 
   1636   8348        Eric 	connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns);
   1637   8348        Eric 	icmp = connp->conn_icmp;
   1638   5240    nordmark 
   1639   5240    nordmark 	/*
   1640   5240    nordmark 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
   1641   5240    nordmark 	 * done by netstack_find_by_cred()
   1642   5240    nordmark 	 */
   1643   5240    nordmark 	netstack_rele(ns);
   1644   5240    nordmark 
   1645  11042        Erik 	/*
   1646  11042        Erik 	 * Since this conn_t/icmp_t is not yet visible to anybody else we don't
   1647  11042        Erik 	 * need to lock anything.
   1648  11042        Erik 	 */
   1649  11042        Erik 	ASSERT(connp->conn_proto == IPPROTO_ICMP);
   1650   5240    nordmark 	ASSERT(connp->conn_icmp == icmp);
   1651   5240    nordmark 	ASSERT(icmp->icmp_connp == connp);
   1652      0      stevel 
   1653      0      stevel 	/* Set the initial state of the stream and the privilege status. */
   1654      0      stevel 	icmp->icmp_state = TS_UNBND;
   1655  11042        Erik 	connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
   1656   5240    nordmark 	if (isv6) {
   1657  11042        Erik 		connp->conn_family = AF_INET6;
   1658  11042        Erik 		connp->conn_ipversion = IPV6_VERSION;
   1659  11042        Erik 		connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
   1660  11042        Erik 		connp->conn_proto = IPPROTO_ICMPV6;
   1661      0      stevel 		/* May be changed by a SO_PROTOTYPE socket option. */
   1662  11042        Erik 		connp->conn_proto = IPPROTO_ICMPV6;
   1663  11042        Erik 		connp->conn_ixa->ixa_protocol = connp->conn_proto;
   1664  11042        Erik 		connp->conn_ixa->ixa_raw_cksum_offset = 2;
   1665  11042        Erik 		connp->conn_default_ttl = is->is_ipv6_hoplimit;
   1666  11042        Erik 		len = sizeof (ip6_t);
   1667  11042        Erik 	} else {
   1668  11042        Erik 		connp->conn_family = AF_INET;
   1669  11042        Erik 		connp->conn_ipversion = IPV4_VERSION;
   1670  11042        Erik 		connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
   1671      0      stevel 		/* May be changed by a SO_PROTOTYPE socket option. */
   1672  11042        Erik 		connp->conn_proto = IPPROTO_ICMP;
   1673  11042        Erik 		connp->conn_ixa->ixa_protocol = connp->conn_proto;
   1674  11042        Erik 		connp->conn_default_ttl = is->is_ipv4_ttl;
   1675  11042        Erik 		len = sizeof (ipha_t);
   1676  11042        Erik 	}
   1677  11042        Erik 	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
   1678  11042        Erik 
   1679  11042        Erik 	connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
   1680  11042        Erik 
   1681  11042        Erik 	/*
   1682  11042        Erik 	 * For the socket of protocol IPPROTO_RAW or when IP_HDRINCL is set,
   1683  11042        Erik 	 * the checksum is provided in the pre-built packet. We clear
   1684  11042        Erik 	 * IXAF_SET_ULP_CKSUM to tell IP that the application has sent a
   1685  11042        Erik 	 * complete IP header and not to compute the transport checksum.
   1686  11042        Erik 	 */
   1687  11042        Erik 	connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
   1688  11042        Erik 	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
   1689  11042        Erik 	connp->conn_ixa->ixa_zoneid = zoneid;
   1690  11042        Erik 
   1691   5240    nordmark 	connp->conn_zoneid = zoneid;
   1692   5240    nordmark 
   1693   5240    nordmark 	/*
   1694   5240    nordmark 	 * If the caller has the process-wide flag set, then default to MAC
   1695   5240    nordmark 	 * exempt mode.  This allows read-down to unlabeled hosts.
   1696   5240    nordmark 	 */
   1697   5240    nordmark 	if (getpflags(NET_MAC_AWARE, credp) != 0)
   1698  10934  sommerfeld 		connp->conn_mac_mode = CONN_MAC_AWARE;
   1699   5240    nordmark 
   1700  11042        Erik 	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
   1701   5240    nordmark 
   1702   5240    nordmark 	icmp->icmp_is = is;
   1703   5240    nordmark 
   1704  11042        Erik 	connp->conn_rcvbuf = is->is_recv_hiwat;
   1705  11042        Erik 	connp->conn_sndbuf = is->is_xmit_hiwat;
   1706  11042        Erik 	connp->conn_sndlowat = is->is_xmit_lowat;
   1707  11042        Erik 	connp->conn_rcvlowat = icmp_mod_info.mi_lowat;
   1708  11042        Erik 
   1709  11042        Erik 	connp->conn_wroff = len + is->is_wroff_extra;
   1710  11042        Erik 	connp->conn_so_type = SOCK_RAW;
   1711  11042        Erik 
   1712   5240    nordmark 	connp->conn_recv = icmp_input;
   1713  11042        Erik 	connp->conn_recvicmp = icmp_icmp_input;
   1714   5240    nordmark 	crhold(credp);
   1715   5240    nordmark 	connp->conn_cred = credp;
   1716  11042        Erik 	connp->conn_cpid = curproc->p_pid;
   1717  11066      rafael 	connp->conn_open_time = ddi_get_lbolt64();
   1718  11042        Erik 	/* Cache things in ixa without an extra refhold */
   1719  11042        Erik 	connp->conn_ixa->ixa_cred = connp->conn_cred;
   1720  11042        Erik 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
   1721  11042        Erik 	if (is_system_labeled())
   1722  11042        Erik 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
   1723   8348        Eric 
   1724   8348        Eric 	connp->conn_flow_cntrld = B_FALSE;
   1725  11042        Erik 
   1726  11042        Erik 	if (is->is_pmtu_discovery)
   1727  11042        Erik 		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
   1728  11042        Erik 
   1729   8348        Eric 	return (connp);
   1730      0      stevel }
   1731      0      stevel 
   1732      0      stevel /*
   1733      0      stevel  * Which ICMP options OK to set through T_UNITDATA_REQ...
   1734      0      stevel  */
   1735      0      stevel /* ARGSUSED */
   1736      0      stevel static boolean_t
   1737      0      stevel icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
   1738      0      stevel {
   1739      0      stevel 	return (B_TRUE);
   1740      0      stevel }
   1741      0      stevel 
   1742      0      stevel /*
   1743      0      stevel  * This routine gets default values of certain options whose default
   1744      0      stevel  * values are maintained by protcol specific code
   1745      0      stevel  */
   1746  11042        Erik int
   1747  11042        Erik icmp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
   1748      0      stevel {
   1749   5240    nordmark 	icmp_t *icmp = Q_TO_ICMP(q);
   1750   3448    dh155122 	icmp_stack_t *is = icmp->icmp_is;
   1751      0      stevel 	int *i1 = (int *)ptr;
   1752      0      stevel 
   1753      0      stevel 	switch (level) {
   1754      0      stevel 	case IPPROTO_IP:
   1755      0      stevel 		switch (name) {
   1756      0      stevel 		case IP_MULTICAST_TTL:
   1757      0      stevel 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
   1758      0      stevel 			return (sizeof (uchar_t));
   1759      0      stevel 		case IP_MULTICAST_LOOP:
   1760      0      stevel 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
   1761      0      stevel 			return (sizeof (uchar_t));
   1762      0      stevel 		}
   1763      0      stevel 		break;
   1764      0      stevel 	case IPPROTO_IPV6:
   1765      0      stevel 		switch (name) {
   1766      0      stevel 		case IPV6_MULTICAST_HOPS:
   1767      0      stevel 			*i1 = IP_DEFAULT_MULTICAST_TTL;
   1768      0      stevel 			return (sizeof (int));
   1769      0      stevel 		case IPV6_MULTICAST_LOOP:
   1770      0      stevel 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
   1771      0      stevel 			return (sizeof (int));
   1772      0      stevel 		case IPV6_UNICAST_HOPS:
   1773   3448    dh155122 			*i1 = is->is_ipv6_hoplimit;
   1774      0      stevel 			return (sizeof (int));
   1775      0      stevel 		}
   1776      0      stevel 		break;
   1777      0      stevel 	case IPPROTO_ICMPV6:
   1778      0      stevel 		switch (name) {
   1779      0      stevel 		case ICMP6_FILTER:
   1780      0      stevel 			/* Make it look like "pass all" */
   1781      0      stevel 			ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
   1782      0      stevel 			return (sizeof (icmp6_filter_t));
   1783      0      stevel 		}
   1784      0      stevel 		break;
   1785      0      stevel 	}
   1786      0      stevel 	return (-1);
   1787      0      stevel }
   1788      0      stevel 
   1789      0      stevel /*
   1790      0      stevel  * This routine retrieves the current status of socket options.
   1791  11042        Erik  * It returns the size of the option retrieved, or -1.
   1792      0      stevel  */
   1793      0      stevel int
   1794   8348        Eric icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
   1795   8348        Eric {
   1796   8348        Eric 	icmp_t		*icmp = connp->conn_icmp;
   1797   8348        Eric 	int		*i1 = (int *)ptr;
   1798  11042        Erik 	conn_opt_arg_t	coas;
   1799  11042        Erik 	int		retval;
   1800  11042        Erik 
   1801  11042        Erik 	coas.coa_connp = connp;
   1802  11042        Erik 	coas.coa_ixa = connp->conn_ixa;
   1803  11042        Erik 	coas.coa_ipp = &connp->conn_xmit_ipp;
   1804  11042        Erik 	coas.coa_ancillary = B_FALSE;
   1805  11042        Erik 	coas.coa_changed = 0;
   1806  11042        Erik 
   1807  11042        Erik 	/*
   1808  11042        Erik 	 * We assume that the optcom framework has checked for the set
   1809  11042        Erik 	 * of levels and names that are supported, hence we don't worry
   1810  11042        Erik 	 * about rejecting based on that.
   1811  11042        Erik 	 * First check for ICMP specific handling, then pass to common routine.
   1812  11042        Erik 	 */
   1813      0      stevel 	switch (level) {
   1814      0      stevel 	case IPPROTO_IP:
   1815      0      stevel 		/*
   1816      0      stevel 		 * Only allow IPv4 option processing on IPv4 sockets.
   1817      0      stevel 		 */
   1818  11042        Erik 		if (connp->conn_family != AF_INET)
   1819  11042        Erik 			return (-1);
   1820      0      stevel 
   1821      0      stevel 		switch (name) {
   1822      0      stevel 		case IP_OPTIONS:
   1823      0      stevel 		case T_IP_OPTIONS:
   1824      0      stevel 			/* Options are passed up with each packet */
   1825  11042        Erik 			return (0);
   1826      0      stevel 		case IP_HDRINCL:
   1827  11042        Erik 			mutex_enter(&connp->conn_lock);
   1828      0      stevel 			*i1 = (int)icmp->icmp_hdrincl;
   1829  11042        Erik 			mutex_exit(&connp->conn_lock);
   1830  11042        Erik 			return (sizeof (int));
   1831  11042        Erik 		}
   1832  11042        Erik 		break;
   1833  11042        Erik 
   1834      0      stevel 	case IPPROTO_IPV6:
   1835      0      stevel 		/*
   1836      0      stevel 		 * Only allow IPv6 option processing on native IPv6 sockets.
   1837      0      stevel 		 */
   1838  11042        Erik 		if (connp->conn_family != AF_INET6)
   1839  11042        Erik 			return (-1);
   1840  11042        Erik 
   1841  11042        Erik 		switch (name) {
   1842      0      stevel 		case IPV6_CHECKSUM:
   1843      0      stevel 			/*
   1844      0      stevel 			 * Return offset or -1 if no checksum offset.
   1845      0      stevel 			 * Does not apply to IPPROTO_ICMPV6
   1846      0      stevel 			 */
   1847  11042        Erik 			if (connp->conn_proto == IPPROTO_ICMPV6)
   1848  11042        Erik 				return (-1);
   1849  11042        Erik 
   1850  11042        Erik 			mutex_enter(&connp->conn_lock);
   1851  11042        Erik 			if (connp->conn_ixa->ixa_flags & IXAF_SET_RAW_CKSUM)
   1852  11042        Erik 				*i1 = connp->conn_ixa->ixa_raw_cksum_offset;
   1853  11042        Erik 			else
   1854      0      stevel 				*i1 = -1;
   1855  11042        Erik 			mutex_exit(&connp->conn_lock);
   1856  11042        Erik 			return (sizeof (int));
   1857  11042        Erik 		}
   1858  11042        Erik 		break;
   1859  11042        Erik 
   1860      0      stevel 	case IPPROTO_ICMPV6:
   1861      0      stevel 		/*
   1862      0      stevel 		 * Only allow IPv6 option processing on native IPv6 sockets.
   1863      0      stevel 		 */
   1864  11042        Erik 		if (connp->conn_family != AF_INET6)
   1865  11042        Erik 			return (-1);
   1866  11042        Erik 
   1867  11042        Erik 		if (connp->conn_proto != IPPROTO_ICMPV6)
   1868  11042        Erik 			return (-1);
   1869      0      stevel 
   1870      0      stevel 		switch (name) {
   1871      0      stevel 		case ICMP6_FILTER:
   1872  11042        Erik 			mutex_enter(&connp->conn_lock);
   1873      0      stevel 			if (icmp->icmp_filter == NULL) {
   1874      0      stevel 				/* Make it look like "pass all" */
   1875      0      stevel 				ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
   1876      0      stevel 			} else {
   1877      0      stevel 				(void) bcopy(icmp->icmp_filter, ptr,
   1878      0      stevel 				    sizeof (icmp6_filter_t));
   1879      0      stevel 			}
   1880  11042        Erik 			mutex_exit(&connp->conn_lock);
   1881  11042        Erik 			return (sizeof (icmp6_filter_t));
   1882  11042        Erik 		}
   1883  11042        Erik 	}
   1884  11042        Erik 	mutex_enter(&connp->conn_lock);
   1885  11042        Erik 	retval = conn_opt_get(&coas, level, name, ptr);
   1886  11042        Erik 	mutex_exit(&connp->conn_lock);
   1887  11042        Erik 	return (retval);
   1888      0      stevel }
   1889      0      stevel 
   1890   5240    nordmark /*
   1891   5240    nordmark  * This routine retrieves the current status of socket options.
   1892  11042        Erik  * It returns the size of the option retrieved, or -1.
   1893   5240    nordmark  */
   1894   5240    nordmark int
   1895   8348        Eric icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
   1896   8348        Eric {
   1897  11042        Erik 	conn_t		*connp = Q_TO_CONN(q);
   1898  11042        Erik 	int 		err;
   1899  11042        Erik 
   1900   8348        Eric 	err = icmp_opt_get(connp, level, name, ptr);
   1901   5240    nordmark 	return (err);
   1902   5240    nordmark }
   1903   5240    nordmark 
   1904  11042        Erik /*
   1905  11042        Erik  * This routine sets socket options.
   1906  11042        Erik  */
   1907  11042        Erik int
   1908  11042        Erik icmp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
   1909  11042        Erik     uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
   1910  11042        Erik {
   1911  11042        Erik 	conn_t		*connp = coa->coa_connp;
   1912  11042        Erik 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
   1913  11042        Erik 	icmp_t		*icmp = connp->conn_icmp;
   1914  11042        Erik 	icmp_stack_t	*is = icmp->icmp_is;
   1915  11042        Erik 	int		*i1 = (int *)invalp;
   1916  11042        Erik 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
   1917  11042        Erik 	int		error;
   1918  11042        Erik 
   1919  11042        Erik 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
   1920  11042        Erik 
   1921      0      stevel 	/*
   1922      0      stevel 	 * For fixed length options, no sanity check
   1923      0      stevel 	 * of passed in length is done. It is assumed *_optcom_req()
   1924      0      stevel 	 * routines do the right thing.
   1925      0      stevel 	 */
   1926  11042        Erik 
   1927      0      stevel 	switch (level) {
   1928      0      stevel 	case SOL_SOCKET:
   1929      0      stevel 		switch (name) {
   1930      0      stevel 		case SO_PROTOTYPE:
   1931      0      stevel 			if ((*i1 & 0xFF) != IPPROTO_ICMP &&
   1932      0      stevel 			    (*i1 & 0xFF) != IPPROTO_ICMPV6 &&
   1933      0      stevel 			    secpolicy_net_rawaccess(cr) != 0) {
   1934      0      stevel 				return (EACCES);
   1935      0      stevel 			}
   1936  11042        Erik 			if (checkonly)
   1937  11042        Erik 				break;
   1938  11042        Erik 
   1939  11042        Erik 			mutex_enter(&connp->conn_lock);
   1940  11042        Erik 			connp->conn_proto = *i1 & 0xFF;
   1941  11042        Erik 			ixa->ixa_protocol = connp->conn_proto;
   1942  11042        Erik 			if ((connp->conn_proto == IPPROTO_RAW ||
   1943  11042        Erik 			    connp->conn_proto == IPPROTO_IGMP) &&
   1944  11042        Erik 			    connp->conn_family == AF_INET) {
   1945      0      stevel 				icmp->icmp_hdrincl = 1;
   1946  11042        Erik 				ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM;
   1947  11042        Erik 			} else if (connp->conn_proto == IPPROTO_UDP ||
   1948  11042        Erik 			    connp->conn_proto == IPPROTO_TCP ||
   1949  11042        Erik 			    connp->conn_proto == IPPROTO_SCTP) {
   1950  11042        Erik 				/* Used by test applications like psh */
   1951      0      stevel 				icmp->icmp_hdrincl = 0;
   1952  11042        Erik 				ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM;
   1953  11042        Erik 			} else {
   1954  11042        Erik 				icmp->icmp_hdrincl = 0;
   1955  11042        Erik 				ixa->ixa_flags |= IXAF_SET_ULP_CKSUM;
   1956  11042        Erik 			}
   1957  11042        Erik 
   1958  11042        Erik 			if (connp->conn_family == AF_INET6 &&
   1959  11042        Erik 			    connp->conn_proto == IPPROTO_ICMPV6) {
   1960      0      stevel 				/* Set offset for icmp6_cksum */
   1961  11042        Erik 				ixa->ixa_flags &= ~IXAF_SET_RAW_CKSUM;
   1962  11042        Erik 				ixa->ixa_raw_cksum_offset = 2;
   1963  11042        Erik 			}
   1964      0      stevel 			if (icmp->icmp_filter != NULL &&
   1965  11042        Erik 			    connp->conn_proto != IPPROTO_ICMPV6) {
   1966      0      stevel 				kmem_free(icmp->icmp_filter,
   1967      0      stevel 				    sizeof (icmp6_filter_t));
   1968      0      stevel 				icmp->icmp_filter = NULL;
   1969      0      stevel 			}
   1970  11042        Erik 			mutex_exit(&connp->conn_lock);
   1971  11042        Erik 
   1972  11042        Erik 			coa->coa_changed |= COA_HEADER_CHANGED;
   1973    409      kcpoon 			/*
   1974    409      kcpoon 			 * For SCTP, we don't use icmp_bind_proto() for
   1975  11042        Erik 			 * raw socket binding.
   1976  11042        Erik 			 */
   1977  11042        Erik 			if (connp->conn_proto == IPPROTO_SCTP)
   1978    409      kcpoon 				return (0);
   1979    409      kcpoon 
   1980  11042        Erik 			coa->coa_changed |= COA_ICMP_BIND_NEEDED;
   1981  11042        Erik 			return (0);
   1982      0      stevel 
   1983      0      stevel 		case SO_SNDBUF:
   1984   3448    dh155122 			if (*i1 > is->is_max_buf) {
   1985      0      stevel 				return (ENOBUFS);
   1986      0      stevel 			}
   1987      0      stevel 			break;
   1988      0      stevel 		case SO_RCVBUF:
   1989   3448    dh155122 			if (*i1 > is->is_max_buf) {
   1990      0      stevel 				return (ENOBUFS);
   1991      0      stevel 			}
   1992  11042        Erik 			break;
   1993  11042        Erik 		}
   1994  11042        Erik 		break;
   1995  11042        Erik 
   1996  11042        Erik 	case IPPROTO_IP:
   1997  11042        Erik 		/*
   1998  11042        Erik 		 * Only allow IPv4 option processing on IPv4 sockets.
   1999  11042        Erik 		 */
   2000  11042        Erik 		if (connp->conn_family != AF_INET)
   2001  11042        Erik 			return (EINVAL);
   2002  11042        Erik 
   2003  11042        Erik 		switch (name) {
   2004  11042        Erik 		case IP_HDRINCL:
   2005      0      stevel 			if (!checkonly) {
   2006  11042        Erik 				mutex_enter(&connp->conn_lock);
   2007      0      stevel 				icmp->icmp_hdrincl = onoff;
   2008  11042        Erik 				if (onoff)
   2009  11042        Erik 					ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM;
   2010  11042        Erik 				else
   2011  11042        Erik 					ixa->ixa_flags |= IXAF_SET_ULP_CKSUM;
   2012  11042        Erik 				mutex_exit(&connp->conn_lock);
   2013  11042        Erik 			}
   2014  11042        Erik 			break;
   2015  11042        Erik 		}
   2016  11042        Erik 		break;
   2017  11042        Erik 
   2018  11042        Erik 	case IPPROTO_IPV6:
   2019  11042        Erik 		if (connp->conn_family != AF_INET6)
   2020  11042        Erik 			return (EINVAL);
   2021  11042        Erik 
   2022  11042        Erik 		switch (name) {
   2023      0      stevel 		case IPV6_CHECKSUM:
   2024      0      stevel 			/*
   2025      0      stevel 			 * Integer offset into the user data of where the
   2026      0      stevel 			 * checksum is located.
   2027      0      stevel 			 * Offset of -1 disables option.
   2028      0      stevel 			 * Does not apply to IPPROTO_ICMPV6.
   2029      0      stevel 			 */
   2030  11042        Erik 			if (connp->conn_proto == IPPROTO_ICMPV6 ||
   2031  11042        Erik 			    coa->coa_ancillary) {
   2032      0      stevel 				return (EINVAL);
   2033      0      stevel 			}
   2034      0      stevel 			if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) {
   2035      0      stevel 				/* Negative or not 16 bit aligned offset */
   2036      0      stevel 				return (EINVAL);
   2037      0      stevel 			}
   2038      0      stevel 			if (checkonly)
   2039      0      stevel 				break;
   2040      0      stevel 
   2041  11042        Erik 			mutex_enter(&connp->conn_lock);
   2042      0      stevel 			if (*i1 == -1) {
   2043  11042        Erik 				ixa->ixa_flags &= ~IXAF_SET_RAW_CKSUM;
   2044  11042        Erik 				ixa->ixa_raw_cksum_offset = 0;
   2045  11042        Erik 				ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM;
   2046  11042        Erik 			} else {
   2047  11042        Erik 				ixa->ixa_flags |= IXAF_SET_RAW_CKSUM;
   2048  11042        Erik 				ixa->ixa_raw_cksum_offset = *i1;
   2049  11042        Erik 				ixa->ixa_flags |= IXAF_SET_ULP_CKSUM;
   2050  11042        Erik 			}
   2051  11042        Erik 			mutex_exit(&connp->conn_lock);
   2052  11042        Erik 			break;
   2053  11042        Erik 		}
   2054  11042        Erik 		break;
   2055  11042        Erik 
   2056  11042        Erik 	case IPPROTO_ICMPV6:
   2057  11042        Erik 		/*
   2058  11042        Erik 		 * Only allow IPv6 option processing on IPv6 sockets.
   2059  11042        Erik 		 */
   2060  11042        Erik 		if (connp->conn_family != AF_INET6)
   2061  11042        Erik 			return (EINVAL);
   2062  11042        Erik 		if (connp->conn_proto != IPPROTO_ICMPV6)
   2063  11042        Erik 			return (EINVAL);
   2064  11042        Erik 
   2065  11042        Erik 		switch (name) {
   2066  11042        Erik 		case ICMP6_FILTER:
   2067  11042        Erik 			if (checkonly)
   2068  11042        Erik 				break;
   2069  11042        Erik 
   2070  11042        Erik 			if ((inlen != 0) &&
   2071  11042        Erik 			    (inlen != sizeof (icmp6_filter_t)))
   2072   8348        Eric 				return (EINVAL);
   2073  11042        Erik 
   2074  11042        Erik 			mutex_enter(&connp->conn_lock);
   2075      0      stevel 			if (inlen == 0) {
   2076  11042        Erik 				if (icmp->icmp_filter != NULL) {
   2077  11042        Erik 					kmem_free(icmp->icmp_filter,
   2078  11042        Erik 					    sizeof (icmp6_filter_t));
   2079  11042        Erik 					icmp->icmp_filter = NULL;
   2080  11042        Erik 				}
   2081  11042        Erik 			} else {
   2082  11042        Erik 				if (icmp->icmp_filter == NULL) {
   2083  11042        Erik 					icmp->icmp_filter = kmem_alloc(
   2084  11042        Erik 					    sizeof (icmp6_filter_t),
   2085  11042        Erik 					    KM_NOSLEEP);
   2086  11042        Erik 					if (icmp->icmp_filter == NULL) {
   2087  11042        Erik 						mutex_exit(&connp->conn_lock);
   2088  11042        Erik 						return (ENOBUFS);
   2089      0      stevel 					}
   2090  11042        Erik 				}
   2091  11042        Erik 				(void) bcopy(invalp, icmp->icmp_filter, inlen);
   2092  11042        Erik 			}
   2093  11042        Erik 			mutex_exit(&connp->conn_lock);
   2094  11042        Erik 			break;
   2095  11042        Erik 		}
   2096  11042        Erik 		break;
   2097  11042        Erik 	}
   2098  11042        Erik 	error = conn_opt_set(coa, level, name, inlen, invalp,
   2099  11042        Erik 	    checkonly, cr);
   2100  11042        Erik 	return (error);
   2101  11042        Erik }
   2102  11042        Erik 
   2103  11042        Erik /*
   2104  11042        Erik  * This routine sets socket options.
   2105  11042        Erik  */
   2106   5240    nordmark int
   2107   8348        Eric icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
   2108   8348        Eric     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
   2109   8348        Eric     void *thisdg_attrs, cred_t *cr)
   2110   8348        Eric {
   2111  11042        Erik 	icmp_t		*icmp = connp->conn_icmp;
   2112  11042        Erik 	int		err;
   2113  11042        Erik 	conn_opt_arg_t	coas, *coa;
   2114  11042        Erik 	boolean_t	checkonly;
   2115  11042        Erik 	icmp_stack_t	*is = icmp->icmp_is;
   2116  11042        Erik 
   2117   8348        Eric 	switch (optset_context) {
   2118   8348        Eric 	case SETFN_OPTCOM_CHECKONLY:
   2119   8348        Eric 		checkonly = B_TRUE;
   2120   8348        Eric 		/*
   2121   8348        Eric 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
   2122   8348        Eric 		 * inlen != 0 implies value supplied and
   2123   8348        Eric 		 * 	we have to "pretend" to set it.
   2124   8348        Eric 		 * inlen == 0 implies that there is no
   2125   8348        Eric 		 * 	value part in T_CHECK request and just validation
   2126   8348        Eric 		 * done elsewhere should be enough, we just return here.
   2127   8348        Eric 		 */
   2128   8348        Eric 		if (inlen == 0) {
   2129   8348        Eric 			*outlenp = 0;
   2130  11042        Erik 			return (0);
   2131   8348        Eric 		}
   2132   8348        Eric 		break;
   2133   8348        Eric 	case SETFN_OPTCOM_NEGOTIATE:
   2134   8348        Eric 		checkonly = B_FALSE;
   2135   8348        Eric 		break;
   2136   8348        Eric 	case SETFN_UD_NEGOTIATE:
   2137   8348        Eric 	case SETFN_CONN_NEGOTIATE:
   2138   8348        Eric 		checkonly = B_FALSE;
   2139   8348        Eric 		/*
   2140   8348        Eric 		 * Negotiating local and "association-related" options
   2141   8348        Eric 		 * through T_UNITDATA_REQ.
   2142   8348        Eric 		 *
   2143   8348        Eric 		 * Following routine can filter out ones we do not
   2144   8348        Eric 		 * want to be "set" this way.
   2145   8348        Eric 		 */
   2146   8348        Eric 		if (!icmp_opt_allow_udr_set(level, name)) {
   2147   8348        Eric 			*outlenp = 0;
   2148  11042        Erik 			return (EINVAL);
   2149   8348        Eric 		}
   2150   8348        Eric 		break;
   2151   8348        Eric 	default:
   2152   8348        Eric 		/*
   2153   8348        Eric 		 * We should never get here
   2154   8348        Eric 		 */
   2155   8348        Eric 		*outlenp = 0;
   2156  11042        Erik 		return (EINVAL);
   2157   8348        Eric 	}
   2158   8348        Eric 
   2159   8348        Eric 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
   2160   8348        Eric 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
   2161  11042        Erik 
   2162  11042        Erik 	if (thisdg_attrs != NULL) {
   2163  11042        Erik 		/* Options from T_UNITDATA_REQ */
   2164  11042        Erik 		coa = (conn_opt_arg_t *)thisdg_attrs;
   2165  11042        Erik 		ASSERT(coa->coa_connp == connp);
   2166  11042        Erik 		ASSERT(coa->coa_ixa != NULL);
   2167  11042        Erik 		ASSERT(coa->coa_ipp != NULL);
   2168  11042        Erik 		ASSERT(coa->coa_ancillary);
   2169  11042        Erik 	} else {
   2170  11042        Erik 		coa = &coas;
   2171  11042        Erik 		coas.coa_connp = connp;
   2172  11042        Erik 		/* Get a reference on conn_ixa to prevent concurrent mods */
   2173  11042        Erik 		coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
   2174  11042        Erik 		if (coas.coa_ixa == NULL) {
   2175  11042        Erik 			*outlenp = 0;
   2176  11042        Erik 			return (ENOMEM);
   2177  11042        Erik 		}
   2178  11042        Erik 		coas.coa_ipp = &connp->conn_xmit_ipp;
   2179  11042        Erik 		coas.coa_ancillary = B_FALSE;
   2180  11042        Erik 		coas.coa_changed = 0;
   2181  11042        Erik 	}
   2182  11042        Erik 
   2183  11042        Erik 	err = icmp_do_opt_set(coa, level, name, inlen, invalp,
   2184  11042        Erik 	    cr, checkonly);
   2185  11042        Erik 	if (err != 0) {
   2186  11042        Erik errout:
   2187  11042        Erik 		if (!coa->coa_ancillary)
   2188  11042        Erik 			ixa_refrele(coa->coa_ixa);
   2189  11042        Erik 		*outlenp = 0;
   2190  11042        Erik 		return (err);
   2191  11042        Erik 	}
   2192  11042        Erik 
   2193  11042        Erik 	/*
   2194  11042        Erik 	 * Common case of OK return with outval same as inval.
   2195  11042        Erik 	 */
   2196  11042        Erik 	if (invalp != outvalp) {
   2197  11042        Erik 		/* don't trust bcopy for identical src/dst */
   2198  11042        Erik 		(void) bcopy(invalp, outvalp, inlen);
   2199  11042        Erik 	}
   2200  11042        Erik 	*outlenp = inlen;
   2201  11042        Erik 
   2202  11042        Erik 	/*
   2203  11042        Erik 	 * If this was not ancillary data, then we rebuild the headers,
   2204  11042        Erik 	 * update the IRE/NCE, and IPsec as needed.
   2205  11042        Erik 	 * Since the label depends on the destination we go through
   2206  11042        Erik 	 * ip_set_destination first.
   2207  11042        Erik 	 */
   2208  11042        Erik 	if (coa->coa_ancillary) {
   2209  11042        Erik 		return (0);
   2210  11042        Erik 	}
   2211  11042        Erik 
   2212  11042        Erik 	if (coa->coa_changed & COA_ROUTE_CHANGED) {
   2213  11042        Erik 		in6_addr_t saddr, faddr, nexthop;
   2214  11042        Erik 		in_port_t fport;
   2215  11042        Erik 
   2216  11042        Erik 		/*
   2217  11042        Erik 		 * We clear lastdst to make sure we pick up the change
   2218  11042        Erik 		 * next time sending.
   2219  11042        Erik 		 * If we are connected we re-cache the information.
   2220  11042        Erik 		 * We ignore errors to preserve BSD behavior.
   2221  11042        Erik 		 * Note that we don't redo IPsec policy lookup here
   2222  11042        Erik 		 * since the final destination (or source) didn't change.
   2223  11042        Erik 		 */
   2224  11042        Erik 		mutex_enter(&connp->conn_lock);
   2225  11042        Erik 		connp->conn_v6lastdst = ipv6_all_zeros;
   2226  11042        Erik 
   2227  11042        Erik 		ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
   2228  11042        Erik 		    &connp->conn_faddr_v6, &nexthop);
   2229  11042        Erik 		saddr = connp->conn_saddr_v6;
   2230  11042        Erik 		faddr = connp->conn_faddr_v6;
   2231  11042        Erik 		fport = connp->conn_fport;
   2232  11042        Erik 		mutex_exit(&connp->conn_lock);
   2233  11042        Erik 
   2234  11042        Erik 		if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
   2235  11042        Erik 		    !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
   2236  11042        Erik 			(void) ip_attr_connect(connp, coa->coa_ixa,
   2237  11042        Erik 			    &saddr, &faddr, &nexthop, fport, NULL, NULL,
   2238  11042        Erik 			    IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
   2239  11042        Erik 		}
   2240  11042        Erik 	}
   2241  11042        Erik 
   2242  11042        Erik 	ixa_refrele(coa->coa_ixa);
   2243  11042        Erik 
   2244  11042        Erik 	if (coa->coa_changed & COA_HEADER_CHANGED) {
   2245  11042        Erik 		/*
   2246  11042        Erik 		 * Rebuild the header template if we are connected.
   2247  11042        Erik 		 * Otherwise clear conn_v6lastdst so we rebuild the header
   2248  11042        Erik 		 * in the data path.
   2249  11042        Erik 		 */
   2250  11042        Erik 		mutex_enter(&connp->conn_lock);
   2251  11042        Erik 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
   2252  11042        Erik 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
   2253  11042        Erik 			err = icmp_build_hdr_template(connp,
   2254  11042        Erik 			    &connp->conn_saddr_v6, &connp->conn_faddr_v6,
   2255  11042        Erik 			    connp->conn_flowinfo);
   2256  11042        Erik 			if (err != 0) {
   2257  11042        Erik 				mutex_exit(&connp->conn_lock);
   2258  11042        Erik 				return (err);
   2259  11042        Erik 			}
   2260  11042        Erik 		} else {
   2261  11042        Erik 			connp->conn_v6lastdst = ipv6_all_zeros;
   2262  11042        Erik 		}
   2263  11042        Erik 		mutex_exit(&connp->conn_lock);
   2264  11042        Erik 	}
   2265  11042        Erik 	if (coa->coa_changed & COA_RCVBUF_CHANGED) {
   2266  11042        Erik 		(void) proto_set_rx_hiwat(connp->conn_rq, connp,
   2267  11042        Erik 		    connp->conn_rcvbuf);
   2268  11042        Erik 	}
   2269  11042        Erik 	if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
   2270  11042        Erik 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
   2271  11042        Erik 	}
   2272  11042        Erik 	if (coa->coa_changed & COA_WROFF_CHANGED) {
   2273  11042        Erik 		/* Increase wroff if needed */
   2274  11042        Erik 		uint_t wroff;
   2275  11042        Erik 
   2276  11042        Erik 		mutex_enter(&connp->conn_lock);
   2277  11042        Erik 		wroff = connp->conn_ht_iphc_allocated + is->is_wroff_extra;
   2278  11042        Erik 		if (wroff > connp->conn_wroff) {
   2279  11042        Erik 			connp->conn_wroff = wroff;
   2280  11042        Erik 			mutex_exit(&connp->conn_lock);
   2281  11042        Erik 			(void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
   2282  11042        Erik 		} else {
   2283  11042        Erik 			mutex_exit(&connp->conn_lock);
   2284  11042        Erik 		}
   2285  11042        Erik 	}
   2286  11042        Erik 	if (coa->coa_changed & COA_ICMP_BIND_NEEDED) {
   2287  11042        Erik 		icmp_bind_proto(icmp);
   2288  11042        Erik 	}
   2289  11042        Erik 	return (err);
   2290   8348        Eric }
   2291   8348        Eric 
   2292   8348        Eric /* This routine sets socket options. */
   2293   8348        Eric int
   2294   8348        Eric icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
   2295   5240    nordmark     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
   2296  11042        Erik     void *thisdg_attrs, cred_t *cr)
   2297  11042        Erik {
   2298  11042        Erik 	conn_t	*connp = Q_TO_CONN(q);
   2299  11042        Erik 	int error;
   2300  11042        Erik 
   2301   8348        Eric 	error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp,
   2302   8348        Eric 	    outlenp, outvalp, thisdg_attrs, cr);
   2303  11042        Erik 	return (error);
   2304  11042        Erik }
   2305  11042        Erik 
   2306  11042        Erik /*
   2307  11042        Erik  * Setup IP headers.
   2308  11042        Erik  *
   2309  11042        Erik  * Note that IP_HDRINCL has ipha_protocol that is different than conn_proto,
   2310  11042        Erik  * but icmp_output_hdrincl restores ipha_protocol once we return.
   2311  11042        Erik  */
   2312  11042        Erik mblk_t *
   2313  11042        Erik icmp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
   2314  11042        Erik     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo,
   2315  11042        Erik     mblk_t *data_mp, int *errorp)
   2316  11042        Erik {
   2317  11042        Erik 	mblk_t		*mp;
   2318  11042        Erik 	icmp_stack_t	*is = connp->conn_netstack->netstack_icmp;
   2319  11042        Erik 	uint_t		data_len;
   2320  11042        Erik 	uint32_t	cksum;
   2321  11042        Erik 
   2322  11042        Erik 	data_len = msgdsize(data_mp);
   2323  11042        Erik 	mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, connp->conn_proto,
   2324  11042        Erik 	    flowinfo, 0, data_mp, data_len, is->is_wroff_extra, &cksum, errorp);
   2325  11042        Erik 	if (mp == NULL) {
   2326  11042        Erik 		ASSERT(*errorp != 0);
   2327  11042        Erik 		return (NULL);
   2328  11042        Erik 	}
   2329  11042        Erik 
   2330  11042        Erik 	ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
   2331  11042        Erik 
   2332  11042        Erik 	/*
   2333  11042        Erik 	 * If there was a routing option/header then conn_prepend_hdr
   2334  11042        Erik 	 * has massaged it and placed the pseudo-header checksum difference
   2335  11042        Erik 	 * in the cksum argument.
   2336  11042        Erik 	 *
   2337  11042        Erik 	 * Prepare for ICMPv6 checksum done in IP.
   2338  11042        Erik 	 *
   2339  11042        Erik 	 * We make it easy for IP to include our pseudo header
   2340  11042        Erik 	 * by putting our length (and any routing header adjustment)
   2341  11042        Erik 	 * in the ICMPv6 checksum field.
   2342  11042        Erik 	 * The IP source, destination, and length have already been set by
   2343  11042        Erik 	 * conn_prepend_hdr.
   2344  11042        Erik 	 */
   2345  11042        Erik 	cksum += data_len;
   2346  11042        Erik 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
   2347  11042        Erik 	ASSERT(cksum < 0x10000);
   2348  11042        Erik 
   2349  11042        Erik 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
   2350  11042        Erik 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
   2351  11042        Erik 
   2352  11042        Erik 		ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
   2353  11042        Erik 	} else {
   2354  11042        Erik 		ip6_t	*ip6h = (ip6_t *)mp->b_rptr;
   2355  11042        Erik 		uint_t	cksum_offset = 0;
   2356  11042        Erik 
   2357  11042        Erik 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
   2358  11042        Erik 
   2359  11042        Erik 		if (ixa->ixa_flags & IXAF_SET_ULP_CKSUM) {
   2360  11042        Erik 			if (connp->conn_proto == IPPROTO_ICMPV6) {
   2361  11042        Erik 				cksum_offset = ixa->ixa_ip_hdr_length +
   2362  11042        Erik 				    offsetof(icmp6_t, icmp6_cksum);
   2363  11042        Erik 			} else if (ixa->ixa_flags & IXAF_SET_RAW_CKSUM) {
   2364  11042        Erik 				cksum_offset = ixa->ixa_ip_hdr_length +
   2365  11042        Erik 				    ixa->ixa_raw_cksum_offset;
   2366  11042        Erik 			}
   2367  11042        Erik 		}
   2368  11042        Erik 		if (cksum_offset != 0) {
   2369  11042        Erik 			uint16_t *ptr;
   2370  11042        Erik 
   2371  11042        Erik 			/* Make sure the checksum fits in the first mblk */
   2372  11042        Erik 			if (cksum_offset + sizeof (short) > MBLKL(mp)) {
   2373  11042        Erik 				mblk_t *mp1;
   2374  11042        Erik 
   2375  11042        Erik 				mp1 = msgpullup(mp,
   2376  11042        Erik 				    cksum_offset + sizeof (short));
   2377  11042        Erik 				freemsg(mp);
   2378  11042        Erik 				if (mp1 == NULL) {
   2379  11042        Erik 					*errorp = ENOMEM;
   2380  11042        Erik 					return (NULL);
   2381  11042        Erik 				}
   2382  11042        Erik 				mp = mp1;
   2383  11042        Erik 				ip6h = (ip6_t *)mp->b_rptr;
   2384  11042        Erik 			}
   2385  11042        Erik 			ptr = (uint16_t *)(mp->b_rptr + cksum_offset);
   2386  11042        Erik 			*ptr = htons(cksum);
   2387  11042        Erik 		}
   2388  11042        Erik 	}
   2389  11042        Erik 
   2390  11042        Erik 	/* Note that we don't try to update wroff due to ancillary data */
   2391  11042        Erik 	return (mp);
   2392  11042        Erik }
   2393  11042        Erik 
   2394  11042        Erik static int
   2395  11042        Erik icmp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
   2396  11042        Erik     const in6_addr_t *v6dst, uint32_t flowinfo)
   2397  11042        Erik {
   2398  11042        Erik 	int		error;
   2399  11042        Erik 
   2400  11042        Erik 	ASSERT(MUTEX_HELD(&connp->conn_lock));
   2401  11042        Erik 	/*
   2402  11042        Erik 	 * We clear lastdst to make sure we don't use the lastdst path
   2403  11042        Erik 	 * next time sending since we might not have set v6dst yet.
   2404  11042        Erik 	 */
   2405  11042        Erik 	connp->conn_v6lastdst = ipv6_all_zeros;
   2406  11042        Erik 
   2407  11042        Erik 	error = conn_build_hdr_template(connp, 0, 0, v6src, v6dst, flowinfo);
   2408  11042        Erik 	if (error != 0)
   2409  11042        Erik 		return (error);
   2410  11042        Erik 
   2411  11042        Erik 	/*
   2412  11042        Erik 	 * Any routing header/option has been massaged. The checksum difference
   2413  11042        Erik 	 * is stored in conn_sum.
   2414  11042        Erik 	 */
   2415      0      stevel 	return (0);
   2416      0      stevel }
   2417      0      stevel 
   2418      0      stevel /*
   2419      0      stevel  * This routine retrieves the value of an ND variable in a icmpparam_t
   2420      0      stevel  * structure.  It is called through nd_getset when a user reads the
   2421      0      stevel  * variable.
   2422      0      stevel  */
   2423      0      stevel /* ARGSUSED */
   2424      0      stevel static int
   2425      0      stevel icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
   2426      0      stevel {
   2427      0      stevel 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
   2428      0      stevel 
   2429      0      stevel 	(void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value);
   2430      0      stevel 	return (0);
   2431      0      stevel }
   2432      0      stevel 
   2433      0      stevel /*
   2434      0      stevel  * Walk through the param array specified registering each element with the
   2435      0      stevel  * named dispatch (ND) handler.
   2436      0      stevel  */
   2437      0      stevel static boolean_t
   2438   3448    dh155122 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt)
   2439      0      stevel {
   2440      0      stevel 	for (; cnt-- > 0; icmppa++) {
   2441      0      stevel 		if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) {
   2442   3448    dh155122 			if (!nd_load(ndp, icmppa->icmp_param_name,
   2443      0      stevel 			    icmp_param_get, icmp_param_set,
   2444      0      stevel 			    (caddr_t)icmppa)) {
   2445   3448    dh155122 				nd_free(ndp);
   2446      0      stevel 				return (B_FALSE);
   2447      0      stevel 			}
   2448      0      stevel 		}
   2449      0      stevel 	}
   2450      0      stevel 	return (B_TRUE);
   2451      0      stevel }
   2452      0      stevel 
   2453      0      stevel /* This routine sets an ND variable in a icmpparam_t structure. */
   2454      0      stevel /* ARGSUSED */
   2455      0      stevel static int
   2456      0      stevel icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
   2457      0      stevel {
   2458      0      stevel 	long		new_value;
   2459      0      stevel 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
   2460      0      stevel 
   2461      0      stevel 	/*
   2462      0      stevel 	 * Fail the request if the new value does not lie within the
   2463      0      stevel 	 * required bounds.
   2464      0      stevel 	 */
   2465      0      stevel 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
   2466      0      stevel 	    new_value < icmppa->icmp_param_min ||
   2467      0      stevel 	    new_value > icmppa->icmp_param_max) {
   2468      0      stevel 		return (EINVAL);
   2469      0      stevel 	}
   2470      0      stevel 	/* Set the new value */
   2471      0      stevel 	icmppa->icmp_param_value = new_value;
   2472      0      stevel 	return (0);
   2473      0      stevel }
   2474   8963      Anders 
   2475   8963      Anders static mblk_t *
   2476   8348        Eric icmp_queue_fallback(icmp_t *icmp, mblk_t *mp)
   2477   8348        Eric {
   2478   8348        Eric 	ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock));
   2479   8348        Eric 	if (IPCL_IS_NONSTR(icmp->icmp_connp)) {
   2480   8348        Eric 		/*
   2481   8348        Eric 		 * fallback has started but messages have not been moved yet
   2482   8348        Eric 		 */
   2483   8348        Eric 		if (icmp->icmp_fallback_queue_head == NULL) {
   2484   8348        Eric 			ASSERT(icmp->icmp_fallback_queue_tail == NULL);
   2485   8348        Eric 			icmp->icmp_fallback_queue_head = mp;
   2486   8348        Eric 			icmp->icmp_fallback_queue_tail = mp;
   2487   8348        Eric 		} else {
   2488   8348        Eric 			ASSERT(icmp->icmp_fallback_queue_tail != NULL);
   2489   8348        Eric 			icmp->icmp_fallback_queue_tail->b_next = mp;
   2490   8348        Eric 			icmp->icmp_fallback_queue_tail = mp;
   2491   8348        Eric 		}
   2492   8963      Anders 		return (NULL);
   2493   8963      Anders 	} else {
   2494   8963      Anders 		/*
   2495   8963      Anders 		 * Fallback completed, let the caller putnext() the mblk.
   2496   8963      Anders 		 */
   2497   8963      Anders 		return (mp);
   2498   8963      Anders 	}
   2499   8963      Anders }
   2500   8963      Anders 
   2501   8963      Anders /*
   2502   8963      Anders  * Deliver data to ULP. In case we have a socket, and it's falling back to
   2503   8963      Anders  * TPI, then we'll queue the mp for later processing.
   2504   8963      Anders  */
   2505   8963      Anders static void
   2506  11042        Erik icmp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len)
   2507  11042        Erik {
   2508   8963      Anders 	if (IPCL_IS_NONSTR(connp)) {
   2509   8963      Anders 		icmp_t *icmp = connp->conn_icmp;
   2510   8963      Anders 		int error;
   2511   8963      Anders 
   2512  11042        Erik 		ASSERT(len == msgdsize(mp));
   2513   8963      Anders 		if ((*connp->conn_upcalls->su_recv)
   2514  11042        Erik 		    (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
   2515   8963      Anders 			mutex_enter(&icmp->icmp_recv_lock);
   2516   8963      Anders 			if (error == ENOSPC) {
   2517   8963      Anders 				/*
   2518   8963      Anders 				 * let's confirm while holding the lock
   2519   8963      Anders 				 */
   2520   8963      Anders 				if ((*connp->conn_upcalls->su_recv)
   2521   8963      Anders 				    (connp->conn_upper_handle, NULL, 0, 0,
   2522   8963      Anders 				    &error, NULL) < 0) {
   2523   8963      Anders 					ASSERT(error == ENOSPC);
   2524   8963      Anders 					if (error == ENOSPC) {
   2525   8963      Anders 						connp->conn_flow_cntrld =
   2526   8963      Anders 						    B_TRUE;
   2527   8963      Anders 					}
   2528   8963      Anders 				}
   2529   8963      Anders 				mutex_exit(&icmp->icmp_recv_lock);
   2530   8963      Anders 			} else {
   2531   8963      Anders 				ASSERT(error == EOPNOTSUPP);
   2532   8963      Anders 				mp = icmp_queue_fallback(icmp, mp);
   2533   8963      Anders 				mutex_exit(&icmp->icmp_recv_lock);
   2534   8963      Anders 				if (mp != NULL)
   2535   8963      Anders 					putnext(connp->conn_rq, mp);
   2536   8963      Anders 			}
   2537   8963      Anders 		}
   2538   8963      Anders 		ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock));
   2539   8963      Anders 	} else {
   2540   8963      Anders 		putnext(connp->conn_rq, mp);
   2541   8348        Eric 	}
   2542   8348        Eric }
   2543   8348        Eric 
   2544  11042        Erik /*
   2545  11042        Erik  * This is the inbound data path.
   2546  11042        Erik  * IP has already pulled up the IP headers and verified alignment
   2547  11042        Erik  * etc.
   2548  11042        Erik  */
   2549  11042        Erik /* ARGSUSED2 */
   2550  11042        Erik static void
   2551  11042        Erik icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
   2552  11042        Erik {
   2553  11042        Erik 	conn_t			*connp = (conn_t *)arg1;
   2554      0      stevel 	struct T_unitdata_ind	*tudi;
   2555  11042        Erik 	uchar_t			*rptr;		/* Pointer to IP header */
   2556  11042        Erik 	int			ip_hdr_length;
   2557  11042        Erik 	int			udi_size;	/* Size of T_unitdata_ind */
   2558  11042        Erik 	int			pkt_len;
   2559   5240    nordmark 	icmp_t			*icmp;
   2560  11042        Erik 	ip_pkt_t		ipps;
   2561  11042        Erik 	ip6_t			*ip6h;
   2562  11042        Erik 	mblk_t			*mp1;
   2563  11042        Erik 	crb_t			recv_ancillary;
   2564   5240    nordmark 	icmp_stack_t		*is;
   2565      0      stevel 	sin_t			*sin;
   2566      0      stevel 	sin6_t			*sin6;
   2567      0      stevel 	ipha_t			*ipha;
   2568      0      stevel 
   2569   5240    nordmark 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
   2570   5240    nordmark 
   2571   5240    nordmark 	icmp = connp->conn_icmp;
   2572   5240    nordmark 	is = icmp->icmp_is;
   2573   5240    nordmark 	rptr = mp->b_rptr;
   2574  11042        Erik 
   2575  11042        Erik 	ASSERT(DB_TYPE(mp) == M_DATA);
   2576   5240    nordmark 	ASSERT(OK_32PTR(rptr));
   2577  11042        Erik 	ASSERT(ira->ira_pktlen == msgdsize(mp));
   2578  11042        Erik 	pkt_len = ira->ira_pktlen;
   2579  11042        Erik 
   2580  11042        Erik 	/*
   2581  11042        Erik 	 * Get a snapshot of these and allow other threads to change
   2582  11042        Erik 	 * them after that. We need the same recv_ancillary when determining
   2583  11042        Erik 	 * the size as when adding the ancillary data items.
   2584  11042        Erik 	 */
   2585  11042        Erik 	mutex_enter(&connp->conn_lock);
   2586  11042        Erik 	recv_ancillary = connp->conn_recv_ancillary;
   2587  11042        Erik 	mutex_exit(&connp->conn_lock);
   2588  11042        Erik 
   2589  11042        Erik 	ip_hdr_length = ira->ira_ip_hdr_length;
   2590  11042        Erik 	ASSERT(MBLKL(mp) >= ip_hdr_length);	/* IP did a pullup */
   2591  11042        Erik 
   2592  11042        Erik 	/* Initialize regardless of IP version */
   2593  11042        Erik 	ipps.ipp_fields = 0;
   2594  11042        Erik 
   2595  11042        Erik 	if (ira->ira_flags & IRAF_IS_IPV4) {
   2596  11042        Erik 		ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
   2597  11042        Erik 		ASSERT(MBLKL(mp) >= sizeof (ipha_t));
   2598  11042        Erik 		ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
   2599  11042        Erik 
   2600  11042        Erik 		ipha = (ipha_t *)mp->b_rptr;
   2601  11042        Erik 		if (recv_ancillary.crb_all != 0)
   2602  11042        Erik 			(void) ip_find_hdr_v4(ipha, &ipps, B_FALSE);
   2603  11042        Erik 
   2604  11042        Erik 		/*
   2605  11042        Erik 		 * BSD for some reason adjusts ipha_length to exclude the
   2606  11042        Erik 		 * IP header length. We do the same.
   2607  11042        Erik 		 */
   2608   3448    dh155122 		if (is->is_bsd_compat) {
   2609      0      stevel 			ushort_t len;
   2610  11042        Erik 
   2611      0      stevel 			len = ntohs(ipha->ipha_length);
   2612      0      stevel 			if (mp->b_datap->db_ref > 1) {
   2613      0      stevel 				/*
   2614      0      stevel 				 * Allocate a new IP header so that we can
   2615      0      stevel 				 * modify ipha_length.
   2616      0      stevel 				 */
   2617      0      stevel 				mblk_t	*mp1;
   2618      0      stevel 
   2619  11042        Erik 				mp1 = allocb(ip_hdr_length, BPRI_MED);
   2620  11042        Erik 				if (mp1 == NULL) {
   2621      0      stevel 					freemsg(mp);
   2622   5240    nordmark 					BUMP_MIB(&is->is_rawip_mib,
   2623   3448    dh155122 					    rawipInErrors);
   2624      0      stevel 					return;
   2625      0      stevel 				}
   2626  11042        Erik 				bcopy(rptr, mp1->b_rptr, ip_hdr_length);
   2627  11042        Erik 				mp->b_rptr = rptr + ip_hdr_length;
   2628      0      stevel 				rptr = mp1->b_rptr;
   2629      0      stevel 				ipha = (ipha_t *)rptr;
   2630      0      stevel 				mp1->b_cont = mp;
   2631  11042        Erik 				mp1->b_wptr = rptr + ip_hdr_length;
   2632      0      stevel 				mp = mp1;
   2633      0      stevel 			}
   2634  11042        Erik 			len -= ip_hdr_length;
   2635      0      stevel 			ipha->ipha_length = htons(len);
   2636      0      stevel 		}
   2637  11042        Erik 
   2638  11042        Erik 		/*
   2639  11042        Erik 		 * For RAW sockets we not pass ICMP/IPv4 packets to AF_INET6
   2640  11042        Erik 		 * sockets. This is ensured by icmp_bind and the IP fanout code.
   2641  11042        Erik 		 */
   2642  11042        Erik 		ASSERT(connp->conn_family == AF_INET);
   2643  11042        Erik 
   2644  11042        Erik 		/*
   2645  11042        Erik 		 * This is the inbound data path.  Packets are passed upstream
   2646  11042        Erik 		 * as T_UNITDATA_IND messages with full IPv4 headers still
   2647  11042        Erik 		 * attached.
   2648  11042        Erik 		 */
   2649  11042        Erik 
   2650  11042        Erik 		/*
   2651  11042        Erik 		 * Normally only send up the source address.
   2652  11042        Erik 		 * If any ancillary data items are wanted we add those.
   2653  11042        Erik 		 */
   2654  11042        Erik 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
   2655  11042        Erik 		if (recv_ancillary.crb_all != 0) {
   2656  11042        Erik 			udi_size += conn_recvancillary_size(connp,
   2657  11042        Erik 			    recv_ancillary, ira, mp, &ipps);
   2658  11042        Erik 		}
   2659  11042        Erik 
   2660  11042        Erik 		/* Allocate a message block for the T_UNITDATA_IND structure. */
   2661      0      stevel 		mp1 = allocb(udi_size, BPRI_MED);
   2662      0      stevel 		if (mp1 == NULL) {
   2663      0      stevel 			freemsg(mp);
   2664   5240    nordmark 			BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
   2665      0      stevel 			return;
   2666      0      stevel 		}
   2667      0      stevel 		mp1->b_cont = mp;
   2668  11042        Erik 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
   2669  11042        Erik 		mp1->b_datap->db_type = M_PROTO;
   2670  11042        Erik 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
   2671      0      stevel 		tudi->PRIM_type = T_UNITDATA_IND;
   2672      0      stevel 		tudi->SRC_length = sizeof (sin_t);
   2673      0      stevel 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
   2674      0      stevel 		sin = (sin_t *)&tudi[1];
   2675      0      stevel 		*sin = sin_null;
   2676      0      stevel 		sin->sin_family = AF_INET;
   2677      0      stevel 		sin->sin_addr.s_addr = ipha->ipha_src;
   2678  11042        Erik 		*(uint32_t *)&sin->sin_zero[0] = 0;
   2679  11042        Erik 		*(uint32_t *)&sin->sin_zero[4] = 0;
   2680      0      stevel 		tudi->OPT_offset =  sizeof (struct T_unitdata_ind) +
   2681      0      stevel 		    sizeof (sin_t);
   2682      0      stevel 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
   2683      0      stevel 		tudi->OPT_length = udi_size;
   2684      0      stevel 
   2685      0      stevel 		/*
   2686  11042        Erik 		 * Add options if IP_RECVIF etc is set
   2687      0      stevel 		 */
   2688      0      stevel 		if (udi_size != 0) {
   2689  11042        Erik 			conn_recvancillary_add(connp, recv_ancillary, ira,
   2690  11042        Erik 			    &ipps, (uchar_t *)&sin[1], udi_size);
   2691  11042        Erik 		}
   2692   8348        Eric 		goto deliver;
   2693      0      stevel 	}
   2694      0      stevel 
   2695  11042        Erik 	ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
   2696  11042        Erik 	/*
   2697  11042        Erik 	 * IPv6 packets can only be received by applications
   2698  11042        Erik 	 * that are prepared to receive IPv6 addresses.
   2699  11042        Erik 	 * The IP fanout must ensure this.
   2700  11042        Erik 	 */
   2701  11042        Erik 	ASSERT(connp->conn_family == AF_INET6);
   2702  11042        Erik 
   2703  11042        Erik 	/*
   2704  11042        Erik 	 * Handle IPv6 packets. We don't pass up the IP headers with the
   2705  11042        Erik 	 * payload for IPv6.
   2706  11042        Erik 	 */
   2707      0      stevel 
   2708      0      stevel 	ip6h = (ip6_t *)rptr;
   2709  11042        Erik 	if (recv_ancillary.crb_all != 0) {
   2710  11042        Erik 		/*
   2711  11042        Erik 		 * Call on ip_find_hdr_v6 which gets individual lenghts of
   2712  11042        Erik 		 * extension headers (and pointers to them).
   2713  11042        Erik 		 */
   2714  11042        Erik 		uint8_t		nexthdr;
   2715  11042        Erik 
   2716  11042        Erik 		/* We don't care about the length or nextheader. */
   2717  11042        Erik 		(void) ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, &nexthdr);
   2718  11042        Erik 
   2719  11042        Erik 		/*
   2720  11042        Erik 		 * We do not pass up hop-by-hop options or any other
   2721  11042        Erik 		 * extension header as part of the packet. Applications
   2722  11042        Erik 		 * that want to see them have to specify IPV6_RECV* socket
   2723  11042        Erik 		 * options. And conn_recvancillary_size/add explicitly
   2724  11042        Erik 		 * drops the TX option from IPV6_HOPOPTS as it does for UDP.
   2725  11042        Erik 		 *
   2726  11042        Erik 		 * If we had multilevel ICMP sockets, then we'd want to
   2727  11042        Erik 		 * modify conn_recvancillary_size/add to
   2728  11042        Erik 		 * allow the user to see the label.
   2729  11042        Erik 		 */
   2730  11042        Erik 	}
   2731  11042        Erik 
   2732      0      stevel 	/*
   2733      0      stevel 	 * Check a filter for ICMPv6 types if needed.
   2734      0      stevel 	 * Verify raw checksums if needed.
   2735      0      stevel 	 */
   2736  11042        Erik 	mutex_enter(&connp->conn_lock);
   2737  11042        Erik 	if (icmp->icmp_filter != NULL) {
   2738  11042        Erik 		int type;
   2739  11042        Erik 
   2740  11042        Erik 		/* Assumes that IP has done the pullupmsg */
   2741  11042        Erik 		type = mp->b_rptr[ip_hdr_length];
   2742  11042        Erik 
   2743  11042        Erik 		ASSERT(mp->b_rptr + ip_hdr_length <= mp->b_wptr);
   2744  11042        Erik 		if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) {
   2745  11042        Erik 			mutex_exit(&connp->conn_lock);
   2746  11042        Erik 			freemsg(mp);
   2747  11042        Erik 			return;
   2748  11042        Erik 		}
   2749  11042        Erik 	}
   2750  11042        Erik 	if (connp->conn_ixa->ixa_flags & IXAF_SET_RAW_CKSUM) {
   2751  11042        Erik 		/* Checksum */
   2752  11042        Erik 		uint16_t	*up;
   2753  11042        Erik 		uint32_t	sum;
   2754  11042        Erik 		int		remlen;
   2755  11042        Erik 
   2756  11042        Erik 		up = (uint16_t *)&ip6h->ip6_src;
   2757  11042        Erik 
   2758  11042        Erik 		remlen = msgdsize(mp) - ip_hdr_length;
   2759  11042        Erik 		sum = htons(connp->conn_proto + remlen)
   2760  11042        Erik 		    + up[0] + up[1] + up[2] + up[3]
   2761  11042        Erik 		    + up[4] + up[5] + up[6] + up[7]
   2762  11042        Erik 		    + up[8] + up[9] + up[10] + up[11]
   2763  11042        Erik 		    + up[12] + up[13] + up[14] + up[15];
   2764  11042        Erik 		sum = (sum & 0xffff) + (sum >> 16);
   2765  11042        Erik 		sum = IP_CSUM(mp, ip_hdr_length, sum);
   2766  11042        Erik 		if (sum != 0) {
   2767  11042        Erik 			/* IPv6 RAW checksum failed */
   2768  11042        Erik 			ip0dbg(("icmp_rput: RAW checksum failed %x\n", sum));
   2769  11042        Erik 			mutex_exit(&connp->conn_lock);
   2770  11042        Erik 			freemsg(mp);
   2771  11042        Erik 			BUMP_MIB(&is->is_rawip_mib, rawipInCksumErrs);
   2772  11042        Erik 			return;
   2773  11042        Erik 		}
   2774  11042        Erik 	}
   2775  11042        Erik 	mutex_exit(&connp->conn_lock);
   2776      0      stevel 
   2777      0      stevel 	udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
   2778      0      stevel 
   2779  11042        Erik 	if (recv_ancillary.crb_all != 0) {
   2780  11042        Erik 		udi_size += conn_recvancillary_size(connp,
   2781  11042        Erik 		    recv_ancillary, ira, mp, &ipps);
   2782   5401    nordmark 	}
   2783   5401    nordmark 
   2784      0      stevel 	mp1 = allocb(udi_size, BPRI_MED);
   2785      0      stevel 	if (mp1 == NULL) {
   2786      0      stevel 		freemsg(mp);
   2787   5240    nordmark 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
   2788      0      stevel 		return;
   2789      0      stevel 	}
   2790      0      stevel 	mp1->b_cont = mp;
   2791  11042        Erik 	mp1->b_datap->db_type = M_PROTO;
   2792  11042        Erik 	tudi = (struct T_unitdata_ind *)mp1->b_rptr;
   2793  11042        Erik 	mp1->b_wptr = (uchar_t *)tudi + udi_size;
   2794      0      stevel 	tudi->PRIM_type = T_UNITDATA_IND;
   2795      0      stevel 	tudi->SRC_length = sizeof (sin6_t);
   2796      0      stevel 	tudi->SRC_offset = sizeof (struct T_unitdata_ind);
   2797      0      stevel 	tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
   2798      0      stevel 	udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
   2799      0      stevel 	tudi->OPT_length = udi_size;
   2800      0      stevel 	sin6 = (sin6_t *)&tudi[1];
   2801  11042        Erik 	*sin6 = sin6_null;
   2802      0      stevel 	sin6->sin6_port = 0;
   2803      0      stevel 	sin6->sin6_family = AF_INET6;
   2804      0      stevel 
   2805      0      stevel 	sin6->sin6_addr = ip6h->ip6_src;
   2806      0      stevel 	/* No sin6_flowinfo per API */
   2807      0      stevel 	sin6->sin6_flowinfo = 0;
   2808  11042        Erik 	/* For link-scope pass up scope id */
   2809  11042        Erik 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
   2810  11042        Erik 		sin6->sin6_scope_id = ira->ira_ruifindex;
   2811      0      stevel 	else
   2812      0      stevel 		sin6->sin6_scope_id = 0;
   2813      0      stevel 	sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst,
   2814  11042        Erik 	    IPCL_ZONEID(connp), is->is_netstack);
   2815      0      stevel 
   2816      0      stevel 	if (udi_size != 0) {
   2817  11042        Erik 		conn_recvancillary_add(connp, recv_ancillary, ira,
   2818  11042        Erik 		    &ipps, (uchar_t *)&sin6[1], udi_size);
   2819  11042        Erik 	}
   2820  11042        Erik 
   2821  11042        Erik 	/* Skip all the IPv6 headers per API */
   2822  11042        Erik 	mp->b_rptr += ip_hdr_length;
   2823  11042        Erik 	pkt_len -= ip_hdr_length;
   2824  11042        Erik 
   2825  11042        Erik deliver:
   2826   5240    nordmark 	BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
   2827  11042        Erik 	icmp_ulp_recv(connp, mp1, pkt_len);
   2828  11042        Erik }
   2829  11042        Erik 
   2830  11042        Erik /*
   2831  11042        Erik  * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
   2832  11042        Erik  * information that can be changing beneath us.
   2833      0      stevel  */
   2834   5240    nordmark mblk_t *
   2835      0      stevel icmp_snmp_get(queue_t *q, mblk_t *mpctl)
   2836      0      stevel {
   2837      0      stevel 	mblk_t			*mpdata;
   2838      0      stevel 	struct opthdr		*optp;
   2839   5240    nordmark 	conn_t			*connp = Q_TO_CONN(q);
   2840   5240    nordmark 	icmp_stack_t		*is = connp->conn_netstack->netstack_icmp;
   2841   5240    nordmark 	mblk_t			*mp2ctl;
   2842   5240    nordmark 
   2843   5240    nordmark 	/*
   2844   5240    nordmark 	 * make a copy of the original message
   2845   5240    nordmark 	 */
   2846   5240    nordmark 	mp2ctl = copymsg(mpctl);
   2847      0      stevel 
   2848      0      stevel 	if (mpctl == NULL ||
   2849      0      stevel 	    (mpdata = mpctl->b_cont) == NULL) {
   2850   5240    nordmark 		freemsg(mpctl);
   2851   5240    nordmark 		freemsg(mp2ctl);
   2852      0      stevel 		return (0);
   2853      0      stevel 	}
   2854      0      stevel 
   2855      0      stevel 	/* fixed length structure for IPv4 and IPv6 counters */
   2856      0      stevel 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
   2857      0      stevel 	optp->level = EXPER_RAWIP;
   2858      0      stevel 	optp->name = 0;
   2859   5240    nordmark 	(void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib,
   2860   5240    nordmark 	    sizeof (is->is_rawip_mib));
   2861      0      stevel 	optp->len = msgdsize(mpdata);
   2862      0      stevel 	qreply(q, mpctl);
   2863      0      stevel 
   2864   5240    nordmark 	return (mp2ctl);
   2865      0      stevel }
   2866      0      stevel 
   2867      0      stevel /*
   2868      0      stevel  * Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
   2869      0      stevel  * TODO:  If this ever actually tries to set anything, it needs to be
   2870      0      stevel  * to do the appropriate locking.
   2871      0      stevel  */
   2872      0      stevel /* ARGSUSED */
   2873   5240    nordmark int
   2874      0      stevel icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
   2875      0      stevel     uchar_t *ptr, int len)
   2876      0      stevel {
   2877      0      stevel 	switch (level) {
   2878      0      stevel 	case EXPER_RAWIP:
   2879      0      stevel 		return (0);
   2880      0      stevel 	default:
   2881      0      stevel 		return (1);
   2882      0      stevel 	}
   2883      0      stevel }
   2884      0      stevel 
   2885      0      stevel /*
   2886      0      stevel  * This routine creates a T_UDERROR_IND message and passes it upstream.
   2887      0      stevel  * The address and options are copied from the T_UNITDATA_REQ message
   2888      0      stevel  * passed in mp.  This message is freed.
   2889      0      stevel  */
   2890      0      stevel static void
   2891      0      stevel icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
   2892      0      stevel {
   2893  11042        Erik 	struct T_unitdata_req *tudr;
   2894      0      stevel 	mblk_t	*mp1;
   2895  11042        Erik 	uchar_t *destaddr;
   2896  11042        Erik 	t_scalar_t destlen;
   2897  11042        Erik 	uchar_t	*optaddr;
   2898  11042        Erik 	t_scalar_t optlen;
   2899  11042        Erik 
   2900  11042        Erik 	if ((mp->b_wptr < mp->b_rptr) ||
   2901  11042        Erik 	    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
   2902  11042        Erik 		goto done;
   2903  11042        Erik 	}
   2904  11042        Erik 	tudr = (struct T_unitdata_req *)mp->b_rptr;
   2905  11042        Erik 	destaddr = mp->b_rptr + tudr->DEST_offset;
   2906  11042        Erik 	if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
   2907  11042        Erik 	    destaddr + tudr->DEST_length < mp->b_rptr ||
   2908  11042        Erik 	    destaddr + tudr->DEST_length > mp->b_wptr) {
   2909  11042        Erik 		goto done;
   2910  11042        Erik 	}
   2911  11042        Erik 	optaddr = mp->b_rptr + tudr->OPT_offset;
   2912  11042        Erik 	if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
   2913  11042        Erik 	    optaddr + tudr->OPT_length < mp->b_rptr ||
   2914  11042        Erik 	    optaddr + tudr->OPT_length > mp->b_wptr) {
   2915  11042        Erik 		goto done;
   2916  11042        Erik 	}
   2917  11042        Erik 	destlen = tudr->DEST_length;
   2918  11042        Erik 	optlen = tudr->OPT_length;
   2919  11042        Erik 
   2920  11042        Erik 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
   2921  11042        Erik 	    (char *)optaddr, optlen, err);
   2922  11042        Erik 	if (mp1 != NULL)
   2923      0      stevel 		qreply(q, mp1);
   2924  11042        Erik 
   2925  11042        Erik done:
   2926      0      stevel 	freemsg(mp);
   2927      0      stevel }
   2928      0      stevel 
   2929   8348        Eric static int
   2930   8348        Eric rawip_do_unbind(conn_t *connp)
   2931   8348        Eric {
   2932  11042        Erik 	icmp_t	*icmp = connp->conn_icmp;
   2933  11042        Erik 
   2934  11042        Erik 	mutex_enter(&connp->conn_lock);
   2935      0      stevel 	/* If a bind has not been done, we can't unbind. */
   2936  11042        Erik 	if (icmp->icmp_state == TS_UNBND) {
   2937  11042        Erik 		mutex_exit(&connp->conn_lock);
   2938   8348        Eric 		return (-TOUTSTATE);
   2939      0      stevel 	}
   2940  11042        Erik 	connp->conn_saddr_v6 = ipv6_all_zeros;
   2941  11042        Erik 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
   2942  11042        Erik 	connp->conn_laddr_v6 = ipv6_all_zeros;
   2943  11042        Erik 	connp->conn_mcbc_bind = B_FALSE;
   2944  11042        Erik 	connp->conn_lport = 0;
   2945  11042        Erik 	connp->conn_fport = 0;
   2946  11042        Erik 	/* In case we were also connected */
   2947  11042        Erik 	connp->conn_faddr_v6 = ipv6_all_zeros;
   2948  11042        Erik 	connp->conn_v6lastdst = ipv6_all_zeros;
   2949  11042        Erik 
   2950  11042        Erik 	icmp->icmp_state = TS_UNBND;
   2951  11042        Erik 
   2952  11042        Erik 	(void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
   2953  11042        Erik 	    &connp->conn_faddr_v6, connp->conn_flowinfo);
   2954  11042        Erik 	mutex_exit(&connp->conn_lock);
   2955   8348        Eric 
   2956   8348        Eric 	ip_unbind(connp);
   2957   8348        Eric 	return (0);
   2958   8348        Eric }
   2959   8348        Eric 
   2960   8348        Eric /*
   2961   8348        Eric  * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
   2962   8348        Eric  * After some error checking, the message is passed downstream to ip.
   2963   8348        Eric  */
   2964   8348        Eric static void
   2965   8348        Eric icmp_tpi_unbind(queue_t *q, mblk_t *mp)
   2966   8348        Eric {
   2967   8348        Eric 	conn_t	*connp = Q_TO_CONN(q);
   2968   8348        Eric 	int	error;
   2969   8348        Eric 
   2970   8348        Eric 	ASSERT(mp->b_cont == NULL);
   2971   8348        Eric 	error = rawip_do_unbind(connp);
   2972   8348        Eric 	if (error) {
   2973   8348        Eric 		if (error < 0) {
   2974   8348        Eric 			icmp_err_ack(q, mp, -error, 0);
   2975   8348        Eric 		} else {
   2976   8348        Eric 			icmp_err_ack(q, mp, 0, error);
   2977   8348        Eric 		}
   2978   8348        Eric 		return;
   2979   8348        Eric 	}
   2980   8348        Eric 
   2981   8348        Eric 	/*
   2982   8348        Eric 	 * Convert mp into a T_OK_ACK
   2983   8348        Eric 	 */
   2984   8348        Eric 
   2985   8348        Eric 	mp = mi_tpi_ok_ack_alloc(mp);
   2986   8348        Eric 
   2987   8348        Eric 	/*
   2988   8348        Eric 	 * should not happen in practice... T_OK_ACK is smaller than the
   2989   8348        Eric 	 * original message.
   2990   8348        Eric 	 */
   2991   8348        Eric 	ASSERT(mp != NULL);
   2992   8348        Eric 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
   2993   5240    nordmark 	qreply(q, mp);
   2994      0      stevel }
   2995   8348        Eric 
   2996      0      stevel /*
   2997