1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1289 ja97890 * Common Development and Distribution License (the "License"). 6 1289 ja97890 * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 8477 Rao * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel /* Copyright (c) 1990 Mentat Inc. */ 26 0 stevel 27 0 stevel #include <sys/types.h> 28 0 stevel #include <sys/stream.h> 29 0 stevel #include <sys/stropts.h> 30 0 stevel #include <sys/strlog.h> 31 0 stevel #include <sys/strsun.h> 32 0 stevel #define _SUN_TPI_VERSION 2 33 0 stevel #include <sys/tihdr.h> 34 0 stevel #include <sys/timod.h> 35 0 stevel #include <sys/ddi.h> 36 0 stevel #include <sys/sunddi.h> 37 1676 jpk #include <sys/strsubr.h> 38 11042 Erik #include <sys/suntpi.h> 39 11042 Erik #include <sys/xti_inet.h> 40 0 stevel #include <sys/cmn_err.h> 41 0 stevel #include <sys/kmem.h> 42 11042 Erik #include <sys/cred_impl.h> 43 0 stevel #include <sys/policy.h> 44 1676 jpk #include <sys/priv.h> 45 11042 Erik #include <sys/ucred.h> 46 0 stevel #include <sys/zone.h> 47 0 stevel 48 8348 Eric #include <sys/sockio.h> 49 0 stevel #include <sys/socket.h> 50 8348 Eric #include <sys/socketvar.h> 51 11042 Erik #include <sys/vtrace.h> 52 11042 Erik #include <sys/sdt.h> 53 11042 Erik #include <sys/debug.h> 54 0 stevel #include <sys/isa_defs.h> 55 11042 Erik #include <sys/random.h> 56 0 stevel #include <netinet/in.h> 57 0 stevel #include <netinet/ip6.h> 58 0 stevel #include <netinet/icmp6.h> 59 11042 Erik #include <netinet/udp.h> 60 11042 Erik 61 0 stevel #include <inet/common.h> 62 0 stevel #include <inet/ip.h> 63 11042 Erik #include <inet/ip_impl.h> 64 11042 Erik #include <inet/ipsec_impl.h> 65 0 stevel #include <inet/ip6.h> 66 11042 Erik #include <inet/ip_ire.h> 67 11042 Erik #include <inet/ip_if.h> 68 11042 Erik #include <inet/ip_multi.h> 69 11042 Erik #include <inet/ip_ndp.h> 70 8348 Eric #include <inet/proto_set.h> 71 11042 Erik #include <inet/mib2.h> 72 0 stevel #include <inet/nd.h> 73 0 stevel #include <inet/optcom.h> 74 0 stevel #include <inet/snmpcom.h> 75 0 stevel #include <inet/kstatcom.h> 76 0 stevel #include <inet/ipclassifier.h> 77 1676 jpk 78 1676 jpk #include <sys/tsol/label.h> 79 1676 jpk #include <sys/tsol/tnet.h> 80 3318 rshoaib 81 11042 Erik #include <inet/rawip_impl.h> 82 11042 Erik 83 8348 Eric #include <sys/disp.h> 84 0 stevel 85 0 stevel /* 86 0 stevel * Synchronization notes: 87 0 stevel * 88 11042 Erik * RAWIP is MT and uses the usual kernel synchronization primitives. We use 89 11042 Erik * conn_lock to protect the icmp_t. 90 5240 nordmark * 91 5240 nordmark * Plumbing notes: 92 5240 nordmark * ICMP is always a device driver. For compatibility with mibopen() code 93 5240 nordmark * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 94 5240 nordmark * dummy module. 95 0 stevel */ 96 0 stevel 97 0 stevel static void icmp_addr_req(queue_t *q, mblk_t *mp); 98 8348 Eric static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 99 11042 Erik static void icmp_bind_proto(icmp_t *icmp); 100 11042 Erik static int icmp_build_hdr_template(conn_t *, const in6_addr_t *, 101 11042 Erik const in6_addr_t *, uint32_t); 102 0 stevel static void icmp_capability_req(queue_t *q, mblk_t *mp); 103 8348 Eric static int icmp_close(queue_t *q, int flags); 104 11042 Erik static void icmp_close_free(conn_t *); 105 8348 Eric static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 106 8348 Eric static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 107 0 stevel static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 108 11042 Erik int sys_error); 109 0 stevel static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 110 11042 Erik t_scalar_t tlierr, int sys_error); 111 11042 Erik static void icmp_icmp_input(void *arg1, mblk_t *mp, void *arg2, 112 11042 Erik ip_recv_attr_t *); 113 11042 Erik static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 114 11042 Erik ip_recv_attr_t *); 115 0 stevel static void icmp_info_req(queue_t *q, mblk_t *mp); 116 11042 Erik static void icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 117 8348 Eric static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 118 5240 nordmark static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 119 5240 nordmark cred_t *credp); 120 5240 nordmark static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 121 5240 nordmark cred_t *credp); 122 0 stevel static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 123 8348 Eric int icmp_opt_set(conn_t *connp, uint_t optset_context, 124 0 stevel int level, int name, uint_t inlen, 125 0 stevel uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 126 8348 Eric void *thisdg_attrs, cred_t *cr); 127 8348 Eric int icmp_opt_get(conn_t *connp, int level, int name, 128 0 stevel uchar_t *ptr); 129 11042 Erik static int icmp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 130 11042 Erik sin6_t *sin6, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa); 131 0 stevel static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 132 3448 dh155122 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 133 0 stevel static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 134 0 stevel caddr_t cp, cred_t *cr); 135 11042 Erik static mblk_t *icmp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 136 11042 Erik const in6_addr_t *, const in6_addr_t *, uint32_t, mblk_t *, int *); 137 11042 Erik static mblk_t *icmp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 138 11042 Erik mblk_t *, const in6_addr_t *, uint32_t, int *); 139 0 stevel static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 140 0 stevel uchar_t *ptr, int len); 141 0 stevel static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 142 8348 Eric static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 143 0 stevel static void icmp_wput(queue_t *q, mblk_t *mp); 144 8348 Eric static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 145 0 stevel static void icmp_wput_other(queue_t *q, mblk_t *mp); 146 0 stevel static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 147 0 stevel static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 148 11042 Erik static void icmp_ulp_recv(conn_t *, mblk_t *, uint_t); 149 0 stevel 150 3448 dh155122 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 151 3448 dh155122 static void rawip_stack_fini(netstackid_t stackid, void *arg); 152 3448 dh155122 153 3448 dh155122 static void *rawip_kstat_init(netstackid_t stackid); 154 3448 dh155122 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 155 0 stevel static int rawip_kstat_update(kstat_t *kp, int rw); 156 8348 Eric static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 157 11042 Erik 158 11042 Erik /* Common routines for TPI and socket module */ 159 11042 Erik static conn_t *rawip_do_open(int, cred_t *, int *, int); 160 11042 Erik static void rawip_do_close(conn_t *); 161 11042 Erik static int rawip_do_bind(conn_t *, struct sockaddr *, socklen_t); 162 11042 Erik static int rawip_do_unbind(conn_t *); 163 11042 Erik static int rawip_do_connect(conn_t *, const struct sockaddr *, socklen_t, 164 11042 Erik cred_t *, pid_t); 165 8348 Eric 166 8348 Eric int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 167 8348 Eric socklen_t *, cred_t *); 168 8348 Eric int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 169 8348 Eric socklen_t *, cred_t *); 170 0 stevel 171 5240 nordmark static struct module_info icmp_mod_info = { 172 0 stevel 5707, "icmp", 1, INFPSZ, 512, 128 173 0 stevel }; 174 0 stevel 175 5240 nordmark /* 176 5240 nordmark * Entry points for ICMP as a device. 177 5240 nordmark * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 178 5240 nordmark */ 179 5240 nordmark static struct qinit icmprinitv4 = { 180 5240 nordmark NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 181 5240 nordmark }; 182 5240 nordmark 183 5240 nordmark static struct qinit icmprinitv6 = { 184 5240 nordmark NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 185 5240 nordmark }; 186 5240 nordmark 187 5240 nordmark static struct qinit icmpwinit = { 188 11042 Erik (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 189 8348 Eric }; 190 8348 Eric 191 8348 Eric /* ICMP entry point during fallback */ 192 8348 Eric static struct qinit icmp_fallback_sock_winit = { 193 8348 Eric (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 194 5240 nordmark }; 195 5240 nordmark 196 5240 nordmark /* For AF_INET aka /dev/icmp */ 197 5240 nordmark struct streamtab icmpinfov4 = { 198 5240 nordmark &icmprinitv4, &icmpwinit 199 5240 nordmark }; 200 5240 nordmark 201 5240 nordmark /* For AF_INET6 aka /dev/icmp6 */ 202 5240 nordmark struct streamtab icmpinfov6 = { 203 5240 nordmark &icmprinitv6, &icmpwinit 204 0 stevel }; 205 0 stevel 206 0 stevel static sin_t sin_null; /* Zero address for quick clears */ 207 0 stevel static sin6_t sin6_null; /* Zero address for quick clears */ 208 0 stevel 209 0 stevel /* Default structure copied into T_INFO_ACK messages */ 210 0 stevel static struct T_info_ack icmp_g_t_info_ack = { 211 0 stevel T_INFO_ACK, 212 0 stevel IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 213 0 stevel T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 214 0 stevel T_INVALID, /* CDATA_size. icmp does not support connect data. */ 215 0 stevel T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 216 0 stevel 0, /* ADDR_size - filled in later. */ 217 0 stevel 0, /* OPT_size - not initialized here */ 218 0 stevel IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 219 0 stevel T_CLTS, /* SERV_type. icmp supports connection-less. */ 220 0 stevel TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 221 0 stevel (XPG4_1|SENDZERO) /* PROVIDER_flag */ 222 0 stevel }; 223 0 stevel 224 0 stevel /* 225 3448 dh155122 * Table of ND variables supported by icmp. These are loaded into is_nd 226 3448 dh155122 * when the stack instance is created. 227 0 stevel * All of these are alterable, within the min/max values given, at run time. 228 0 stevel */ 229 0 stevel static icmpparam_t icmp_param_arr[] = { 230 0 stevel /* min max value name */ 231 0 stevel { 0, 128, 32, "icmp_wroff_extra" }, 232 0 stevel { 1, 255, 255, "icmp_ipv4_ttl" }, 233 0 stevel { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 234 0 stevel { 0, 1, 1, "icmp_bsd_compat" }, 235 0 stevel { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 236 0 stevel { 0, 65536, 1024, "icmp_xmit_lowat"}, 237 0 stevel { 4096, 65536, 8192, "icmp_recv_hiwat"}, 238 0 stevel { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 239 11042 Erik { 0, 1, 0, "icmp_pmtu_discovery" }, 240 11042 Erik { 0, 1, 0, "icmp_sendto_ignerr" }, 241 0 stevel }; 242 3448 dh155122 #define is_wroff_extra is_param_arr[0].icmp_param_value 243 3448 dh155122 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 244 3448 dh155122 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 245 3448 dh155122 #define is_bsd_compat is_param_arr[3].icmp_param_value 246 3448 dh155122 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 247 3448 dh155122 #define is_xmit_lowat is_param_arr[5].icmp_param_value 248 3448 dh155122 #define is_recv_hiwat is_param_arr[6].icmp_param_value 249 3448 dh155122 #define is_max_buf is_param_arr[7].icmp_param_value 250 11042 Erik #define is_pmtu_discovery is_param_arr[8].icmp_param_value 251 11042 Erik #define is_sendto_ignerr is_param_arr[9].icmp_param_value 252 11042 Erik 253 11042 Erik typedef union T_primitives *t_primp_t; 254 8348 Eric 255 0 stevel /* 256 0 stevel * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 257 0 stevel * passed to icmp_wput. 258 11042 Erik * It calls IP to verify the local IP address, and calls IP to insert 259 11042 Erik * the conn_t in the fanout table. 260 11042 Erik * If everything is ok it then sends the T_BIND_ACK back up. 261 0 stevel */ 262 0 stevel static void 263 8348 Eric icmp_tpi_bind(queue_t *q, mblk_t *mp) 264 8348 Eric { 265 8348 Eric int error; 266 8348 Eric struct sockaddr *sa; 267 8348 Eric struct T_bind_req *tbr; 268 8348 Eric socklen_t len; 269 0 stevel sin_t *sin; 270 0 stevel sin6_t *sin6; 271 8348 Eric icmp_t *icmp; 272 8348 Eric conn_t *connp = Q_TO_CONN(q); 273 8348 Eric mblk_t *mp1; 274 8778 Erik cred_t *cr; 275 8778 Erik 276 8778 Erik /* 277 8778 Erik * All Solaris components should pass a db_credp 278 8778 Erik * for this TPI message, hence we ASSERT. 279 8778 Erik * But in case there is some other M_PROTO that looks 280 8778 Erik * like a TPI message sent by some other kernel 281 8778 Erik * component, we check and return an error. 282 8778 Erik */ 283 8778 Erik cr = msg_getcred(mp, NULL); 284 8778 Erik ASSERT(cr != NULL); 285 8778 Erik if (cr == NULL) { 286 8778 Erik icmp_err_ack(q, mp, TSYSERR, EINVAL); 287 8778 Erik return; 288 8778 Erik } 289 5240 nordmark 290 5240 nordmark icmp = connp->conn_icmp; 291 0 stevel if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 292 0 stevel (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 293 0 stevel "icmp_bind: bad req, len %u", 294 0 stevel (uint_t)(mp->b_wptr - mp->b_rptr)); 295 0 stevel icmp_err_ack(q, mp, TPROTO, 0); 296 0 stevel return; 297 0 stevel } 298 8348 Eric 299 0 stevel if (icmp->icmp_state != TS_UNBND) { 300 0 stevel (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 301 11042 Erik "icmp_bind: bad state, %u", icmp->icmp_state); 302 0 stevel icmp_err_ack(q, mp, TOUTSTATE, 0); 303 0 stevel return; 304 0 stevel } 305 8348 Eric 306 0 stevel /* 307 0 stevel * Reallocate the message to make sure we have enough room for an 308 11042 Erik * address. 309 11042 Erik */ 310 11042 Erik mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 311 11042 Erik if (mp1 == NULL) { 312 0 stevel icmp_err_ack(q, mp, TSYSERR, ENOMEM); 313 0 stevel return; 314 0 stevel } 315 0 stevel mp = mp1; 316 8348 Eric 317 8348 Eric /* Reset the message type in preparation for shipping it back. */ 318 8348 Eric DB_TYPE(mp) = M_PCPROTO; 319 0 stevel tbr = (struct T_bind_req *)mp->b_rptr; 320 8348 Eric len = tbr->ADDR_length; 321 8348 Eric switch (len) { 322 8348 Eric case 0: /* request for a generic port */ 323 0 stevel tbr->ADDR_offset = sizeof (struct T_bind_req); 324 11042 Erik if (connp->conn_family == AF_INET) { 325 0 stevel tbr->ADDR_length = sizeof (sin_t); 326 0 stevel sin = (sin_t *)&tbr[1]; 327 0 stevel *sin = sin_null; 328 0 stevel sin->sin_family = AF_INET; 329 0 stevel mp->b_wptr = (uchar_t *)&sin[1]; 330 8348 Eric sa = (struct sockaddr *)sin; 331 8348 Eric len = sizeof (sin_t); 332 0 stevel } else { 333 11042 Erik ASSERT(connp->conn_family == AF_INET6); 334 0 stevel tbr->ADDR_length = sizeof (sin6_t); 335 0 stevel sin6 = (sin6_t *)&tbr[1]; 336 0 stevel *sin6 = sin6_null; 337 0 stevel sin6->sin6_family = AF_INET6; 338 0 stevel mp->b_wptr = (uchar_t *)&sin6[1]; 339 8348 Eric sa = (struct sockaddr *)sin6; 340 8348 Eric len = sizeof (sin6_t); 341 8348 Eric } 342 8348 Eric break; 343 8348 Eric 344 8348 Eric case sizeof (sin_t): /* Complete IPv4 address */ 345 8348 Eric sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 346 0 stevel sizeof (sin_t)); 347 8348 Eric break; 348 8348 Eric 349 8348 Eric case sizeof (sin6_t): /* Complete IPv6 address */ 350 8348 Eric sa = (struct sockaddr *)mi_offset_param(mp, 351 8348 Eric tbr->ADDR_offset, sizeof (sin6_t)); 352 8348 Eric break; 353 8348 Eric 354 0 stevel default: 355 0 stevel (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 356 11042 Erik "icmp_bind: bad ADDR_length %u", tbr->ADDR_length); 357 0 stevel icmp_err_ack(q, mp, TBADADDR, 0); 358 0 stevel return; 359 0 stevel } 360 5240 nordmark 361 8348 Eric error = rawip_do_bind(connp, sa, len); 362 8348 Eric if (error != 0) { 363 8348 Eric if (error > 0) { 364 8348 Eric icmp_err_ack(q, mp, TSYSERR, error); 365 8348 Eric } else { 366 8348 Eric icmp_err_ack(q, mp, -error, 0); 367 8348 Eric } 368 8348 Eric } else { 369 8348 Eric tbr->PRIM_type = T_BIND_ACK; 370 8348 Eric qreply(q, mp); 371 8348 Eric } 372 8348 Eric } 373 8348 Eric 374 8348 Eric static int 375 8348 Eric rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 376 8348 Eric { 377 8348 Eric sin_t *sin; 378 8348 Eric sin6_t *sin6; 379 11042 Erik icmp_t *icmp = connp->conn_icmp; 380 11042 Erik int error = 0; 381 11042 Erik ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 382 11042 Erik in_port_t lport; /* Network byte order */ 383 11042 Erik ipaddr_t v4src; /* Set if AF_INET */ 384 11042 Erik in6_addr_t v6src; 385 11042 Erik uint_t scopeid = 0; 386 11042 Erik zoneid_t zoneid = IPCL_ZONEID(connp); 387 11042 Erik ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 388 8348 Eric 389 8348 Eric if (sa == NULL || !OK_32PTR((char *)sa)) { 390 8348 Eric return (EINVAL); 391 8348 Eric } 392 8348 Eric 393 8348 Eric switch (len) { 394 8348 Eric case sizeof (sin_t): /* Complete IPv4 address */ 395 8348 Eric sin = (sin_t *)sa; 396 8348 Eric if (sin->sin_family != AF_INET || 397 11042 Erik connp->conn_family != AF_INET) { 398 8348 Eric /* TSYSERR, EAFNOSUPPORT */ 399 11042 Erik return (EAFNOSUPPORT); 400 11042 Erik } 401 11042 Erik v4src = sin->sin_addr.s_addr; 402 11042 Erik IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 403 11042 Erik if (v4src != INADDR_ANY) { 404 11042 Erik laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 405 11042 Erik B_TRUE); 406 11042 Erik } 407 11042 Erik lport = sin->sin_port; 408 8348 Eric break; 409 8348 Eric case sizeof (sin6_t): /* Complete IPv6 address */ 410 8348 Eric sin6 = (sin6_t *)sa; 411 8348 Eric if (sin6->sin6_family != AF_INET6 || 412 11042 Erik connp->conn_family != AF_INET6) { 413 8348 Eric /* TSYSERR, EAFNOSUPPORT */ 414 11042 Erik return (EAFNOSUPPORT); 415 8348 Eric } 416 8348 Eric /* No support for mapped addresses on raw sockets */ 417 8348 Eric if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 418 8348 Eric /* TSYSERR, EADDRNOTAVAIL */ 419 11042 Erik return (EADDRNOTAVAIL); 420 11042 Erik } 421 11042 Erik v6src = sin6->sin6_addr; 422 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 423 11042 Erik if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 424 11042 Erik scopeid = sin6->sin6_scope_id; 425 11042 Erik laddr_type = ip_laddr_verify_v6(&v6src, zoneid, ipst, 426 11042 Erik B_TRUE, scopeid); 427 11042 Erik } 428 11042 Erik lport = sin6->sin6_port; 429 8348 Eric break; 430 8348 Eric 431 8348 Eric default: 432 8348 Eric /* TBADADDR */ 433 11042 Erik return (EADDRNOTAVAIL); 434 11042 Erik } 435 11042 Erik 436 11042 Erik /* Is the local address a valid unicast, multicast, or broadcast? */ 437 11042 Erik if (laddr_type == IPVL_BAD) 438 11042 Erik return (EADDRNOTAVAIL); 439 11042 Erik 440 11042 Erik /* 441 11042 Erik * The state must be TS_UNBND. 442 11042 Erik */ 443 11042 Erik mutex_enter(&connp->conn_lock); 444 11042 Erik if (icmp->icmp_state != TS_UNBND) { 445 11042 Erik mutex_exit(&connp->conn_lock); 446 11042 Erik return (-TOUTSTATE); 447 11042 Erik } 448 5240 nordmark 449 0 stevel /* 450 0 stevel * Copy the source address into our icmp structure. This address 451 0 stevel * may still be zero; if so, ip will fill in the correct address 452 0 stevel * each time an outbound packet is passed to it. 453 5240 nordmark * If we are binding to a broadcast or multicast address then 454 11042 Erik * we just set the conn_bound_addr since we don't want to use 455 11042 Erik * that as the source address when sending. 456 11042 Erik */ 457 11042 Erik connp->conn_bound_addr_v6 = v6src; 458 11042 Erik connp->conn_laddr_v6 = v6src; 459 11042 Erik if (scopeid != 0) { 460 11042 Erik connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 461 11042 Erik connp->conn_ixa->ixa_scopeid = scopeid; 462 11042 Erik connp->conn_incoming_ifindex = scopeid; 463 11042 Erik } else { 464 11042 Erik connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 465 11042 Erik connp->conn_incoming_ifindex = connp->conn_bound_if; 466 11042 Erik } 467 11042 Erik 468 11042 Erik switch (laddr_type) { 469 11042 Erik case IPVL_UNICAST_UP: 470 11042 Erik case IPVL_UNICAST_DOWN: 471 11042 Erik connp->conn_saddr_v6 = v6src; 472 11042 Erik connp->conn_mcbc_bind = B_FALSE; 473 11042 Erik break; 474 11042 Erik case IPVL_MCAST: 475 11042 Erik case IPVL_BCAST: 476 11042 Erik /* ip_set_destination will pick a source address later */ 477 11042 Erik connp->conn_saddr_v6 = ipv6_all_zeros; 478 11042 Erik connp->conn_mcbc_bind = B_TRUE; 479 11042 Erik break; 480 11042 Erik } 481 11042 Erik 482 11042 Erik /* Any errors after this point should use late_error */ 483 11042 Erik 484 11042 Erik /* 485 11042 Erik * Use sin_port/sin6_port since applications like psh use SOCK_RAW 486 11042 Erik * with IPPROTO_TCP. 487 11042 Erik */ 488 11042 Erik connp->conn_lport = lport; 489 11042 Erik connp->conn_fport = 0; 490 11042 Erik 491 11042 Erik if (connp->conn_family == AF_INET) { 492 11042 Erik ASSERT(connp->conn_ipversion == IPV4_VERSION); 493 11042 Erik } else { 494 11042 Erik ASSERT(connp->conn_ipversion == IPV6_VERSION); 495 11042 Erik } 496 11042 Erik 497 11042 Erik icmp->icmp_state = TS_IDLE; 498 11042 Erik 499 11042 Erik /* 500 11042 Erik * We create an initial header template here to make a subsequent 501 11042 Erik * sendto have a starting point. Since conn_last_dst is zero the 502 11042 Erik * first sendto will always follow the 'dst changed' code path. 503 11042 Erik * Note that we defer massaging options and the related checksum 504 11042 Erik * adjustment until we have a destination address. 505 11042 Erik */ 506 11042 Erik error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6, 507 11042 Erik &connp->conn_faddr_v6, connp->conn_flowinfo); 508 11042 Erik if (error != 0) { 509 11042 Erik mutex_exit(&connp->conn_lock); 510 11042 Erik goto late_error; 511 11042 Erik } 512 11042 Erik /* Just in case */ 513 11042 Erik connp->conn_faddr_v6 = ipv6_all_zeros; 514 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 515 11042 Erik mutex_exit(&connp->conn_lock); 516 11042 Erik 517 11042 Erik error = ip_laddr_fanout_insert(connp); 518 11042 Erik if (error != 0) 519 11042 Erik goto late_error; 520 11042 Erik 521 11042 Erik /* Bind succeeded */ 522 11042 Erik return (0); 523 11042 Erik 524 11042 Erik late_error: 525 11042 Erik mutex_enter(&connp->conn_lock); 526 11042 Erik connp->conn_saddr_v6 = ipv6_all_zeros; 527 11042 Erik connp->conn_bound_addr_v6 = ipv6_all_zeros; 528 11042 Erik connp->conn_laddr_v6 = ipv6_all_zeros; 529 11042 Erik if (scopeid != 0) { 530 11042 Erik connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 531 11042 Erik connp->conn_incoming_ifindex = connp->conn_bound_if; 532 11042 Erik } 533 11042 Erik icmp->icmp_state = TS_UNBND; 534 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 535 11042 Erik connp->conn_lport = 0; 536 11042 Erik 537 11042 Erik /* Restore the header that was built above - different source address */ 538 11042 Erik (void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6, 539 11042 Erik &connp->conn_faddr_v6, connp->conn_flowinfo); 540 11042 Erik mutex_exit(&connp->conn_lock); 541 11042 Erik return (error); 542 11042 Erik } 543 11042 Erik 544 11042 Erik /* 545 11042 Erik * Tell IP to just bind to the protocol. 546 11042 Erik */ 547 11042 Erik static void 548 11042 Erik icmp_bind_proto(icmp_t *icmp) 549 11042 Erik { 550 11042 Erik conn_t *connp = icmp->icmp_connp; 551 11042 Erik 552 11042 Erik mutex_enter(&connp->conn_lock); 553 11042 Erik connp->conn_saddr_v6 = ipv6_all_zeros; 554 11042 Erik connp->conn_laddr_v6 = ipv6_all_zeros; 555 11042 Erik connp->conn_faddr_v6 = ipv6_all_zeros; 556 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 557 11042 Erik mutex_exit(&connp->conn_lock); 558 11042 Erik 559 11042 Erik (void) ip_laddr_fanout_insert(connp); 560 11042 Erik } 561 11042 Erik 562 11042 Erik /* 563 11042 Erik * This routine handles each T_CONN_REQ message passed to icmp. It 564 11042 Erik * associates a default destination address with the stream. 565 11042 Erik * 566 11042 Erik * After various error checks are completed, icmp_connect() lays 567 11042 Erik * the target address and port into the composite header template. 568 11042 Erik * Then we ask IP for information, including a source address if we didn't 569 11042 Erik * already have one. Finally we send up the T_OK_ACK reply message. 570 11042 Erik */ 571 8348 Eric static void 572 8348 Eric icmp_tpi_connect(queue_t *q, mblk_t *mp) 573 8348 Eric { 574 8348 Eric conn_t *connp = Q_TO_CONN(q); 575 0 stevel struct T_conn_req *tcr; 576 8348 Eric struct sockaddr *sa; 577 8348 Eric socklen_t len; 578 8348 Eric int error; 579 8778 Erik cred_t *cr; 580 11042 Erik pid_t pid; 581 8778 Erik /* 582 8778 Erik * All Solaris components should pass a db_credp 583 8778 Erik * for this TPI message, hence we ASSERT. 584 8778 Erik * But in case there is some other M_PROTO that looks 585 8778 Erik * like a TPI message sent by some other kernel 586 8778 Erik * component, we check and return an error. 587 8778 Erik */ 588 11042 Erik cr = msg_getcred(mp, &pid); 589 8778 Erik ASSERT(cr != NULL); 590 8778 Erik if (cr == NULL) { 591 8778 Erik icmp_err_ack(q, mp, TSYSERR, EINVAL); 592 8778 Erik return; 593 8778 Erik } 594 5240 nordmark 595 0 stevel tcr = (struct T_conn_req *)mp->b_rptr; 596 0 stevel /* Sanity checks */ 597 5240 nordmark if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 598 0 stevel icmp_err_ack(q, mp, TPROTO, 0); 599 0 stevel return; 600 0 stevel } 601 0 stevel 602 0 stevel if (tcr->OPT_length != 0) { 603 0 stevel icmp_err_ack(q, mp, TBADOPT, 0); 604 0 stevel return; 605 0 stevel } 606 5240 nordmark 607 8348 Eric len = tcr->DEST_length; 608 8348 Eric 609 8348 Eric switch (len) { 610 0 stevel default: 611 0 stevel icmp_err_ack(q, mp, TBADADDR, 0); 612 0 stevel return; 613 0 stevel case sizeof (sin_t): 614 8348 Eric sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 615 0 stevel sizeof (sin_t)); 616 8348 Eric break; 617 8348 Eric case sizeof (sin6_t): 618 8348 Eric sa = (struct sockaddr *)mi_offset_param(mp, 619 8348 Eric tcr->DEST_offset, sizeof (sin6_t)); 620 8348 Eric break; 621 8348 Eric } 622 8348 Eric 623 11042 Erik error = proto_verify_ip_addr(connp->conn_family, sa, len); 624 8348 Eric if (error != 0) { 625 8348 Eric icmp_err_ack(q, mp, TSYSERR, error); 626 8348 Eric return; 627 8348 Eric } 628 8348 Eric 629 11042 Erik error = rawip_do_connect(connp, sa, len, cr, pid); 630 8348 Eric if (error != 0) { 631 8348 Eric if (error < 0) { 632 8348 Eric icmp_err_ack(q, mp, -error, 0); 633 8348 Eric } else { 634 8348 Eric icmp_err_ack(q, mp, 0, error); 635 8348 Eric } 636 8348 Eric } else { 637 8348 Eric mblk_t *mp1; 638 8348 Eric 639 8348 Eric /* 640 8348 Eric * We have to send a connection confirmation to 641 8348 Eric * keep TLI happy. 642 8348 Eric */ 643 11042 Erik if (connp->conn_family == AF_INET) { 644 8348 Eric mp1 = mi_tpi_conn_con(NULL, (char *)sa, 645 8348 Eric sizeof (sin_t), NULL, 0); 646 8348 Eric } else { 647 11042 Erik ASSERT(connp->conn_family == AF_INET6); 648 8348 Eric mp1 = mi_tpi_conn_con(NULL, (char *)sa, 649 8348 Eric sizeof (sin6_t), NULL, 0); 650 8348 Eric } 651 8348 Eric if (mp1 == NULL) { 652 8348 Eric icmp_err_ack(q, mp, TSYSERR, ENOMEM); 653 8348 Eric return; 654 8348 Eric } 655 8348 Eric 656 8348 Eric /* 657 8348 Eric * Send ok_ack for T_CONN_REQ 658 8348 Eric */ 659 8348 Eric mp = mi_tpi_ok_ack_alloc(mp); 660 8348 Eric if (mp == NULL) { 661 8348 Eric /* Unable to reuse the T_CONN_REQ for the ack. */ 662 8348 Eric icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 663 8348 Eric return; 664 8348 Eric } 665 8348 Eric putnext(connp->conn_rq, mp); 666 8348 Eric putnext(connp->conn_rq, mp1); 667 8348 Eric } 668 8348 Eric } 669 8348 Eric 670 8348 Eric static int 671 8778 Erik rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 672 11042 Erik cred_t *cr, pid_t pid) 673 11042 Erik { 674 11042 Erik icmp_t *icmp; 675 11042 Erik sin_t *sin; 676 11042 Erik sin6_t *sin6; 677 11042 Erik int error; 678 11042 Erik uint16_t dstport; 679 8348 Eric ipaddr_t v4dst; 680 8348 Eric in6_addr_t v6dst; 681 11042 Erik uint32_t flowinfo; 682 11042 Erik ip_xmit_attr_t *ixa; 683 11042 Erik uint_t scopeid = 0; 684 11042 Erik uint_t srcid = 0; 685 11042 Erik in6_addr_t v6src = connp->conn_saddr_v6; 686 8348 Eric 687 8348 Eric icmp = connp->conn_icmp; 688 8348 Eric 689 8348 Eric if (sa == NULL || !OK_32PTR((char *)sa)) { 690 8348 Eric return (EINVAL); 691 8348 Eric } 692 8348 Eric 693 8348 Eric ASSERT(sa != NULL && len != 0); 694 8348 Eric 695 11042 Erik /* 696 11042 Erik * Determine packet type based on type of address passed in 697 11042 Erik * the request should contain an IPv4 or IPv6 address. 698 11042 Erik * Make sure that address family matches the type of 699 11042 Erik * family of the address passed down. 700 11042 Erik */ 701 8348 Eric switch (len) { 702 8348 Eric case sizeof (sin_t): 703 8348 Eric sin = (sin_t *)sa; 704 8348 Eric 705 0 stevel v4dst = sin->sin_addr.s_addr; 706 11042 Erik dstport = sin->sin_port; 707 11042 Erik IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 708 11042 Erik ASSERT(connp->conn_ipversion == IPV4_VERSION); 709 11042 Erik break; 710 11042 Erik 711 11042 Erik case sizeof (sin6_t): 712 11042 Erik sin6 = (sin6_t *)sa; 713 11042 Erik 714 11042 Erik /* No support for mapped addresses on raw sockets */ 715 11042 Erik if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 716 11042 Erik return (EADDRNOTAVAIL); 717 11042 Erik } 718 11042 Erik v6dst = sin6->sin6_addr; 719 11042 Erik dstport = sin6->sin6_port; 720 11042 Erik ASSERT(connp->conn_ipversion == IPV6_VERSION); 721 11042 Erik flowinfo = sin6->sin6_flowinfo; 722 11042 Erik if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 723 11042 Erik scopeid = sin6->sin6_scope_id; 724 11042 Erik srcid = sin6->__sin6_src_id; 725 11042 Erik if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 726 11042 Erik ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 727 11042 Erik connp->conn_netstack); 728 11042 Erik } 729 11042 Erik break; 730 11042 Erik } 731 11042 Erik 732 11042 Erik /* 733 11042 Erik * If there is a different thread using conn_ixa then we get a new 734 11042 Erik * copy and cut the old one loose from conn_ixa. Otherwise we use 735 11042 Erik * conn_ixa and prevent any other thread from using/changing it. 736 11042 Erik * Once connect() is done other threads can use conn_ixa since the 737 11042 Erik * refcnt will be back at one. 738 11042 Erik */ 739 11042 Erik ixa = conn_get_ixa(connp, B_TRUE); 740 11042 Erik if (ixa == NULL) 741 11042 Erik return (ENOMEM); 742 11042 Erik 743 11042 Erik ASSERT(ixa->ixa_refcnt >= 2); 744 11042 Erik ASSERT(ixa == connp->conn_ixa); 745 11042 Erik 746 11042 Erik mutex_enter(&connp->conn_lock); 747 11042 Erik /* 748 11042 Erik * This icmp_t must have bound already before doing a connect. 749 11042 Erik * Reject if a connect is in progress (we drop conn_lock during 750 11042 Erik * rawip_do_connect). 751 11042 Erik */ 752 11042 Erik if (icmp->icmp_state == TS_UNBND || icmp->icmp_state == TS_WCON_CREQ) { 753 11042 Erik mutex_exit(&connp->conn_lock); 754 11042 Erik ixa_refrele(ixa); 755 11042 Erik return (-TOUTSTATE); 756 11042 Erik } 757 11042 Erik 758 11042 Erik if (icmp->icmp_state == TS_DATA_XFER) { 759 11042 Erik /* Already connected - clear out state */ 760 11042 Erik if (connp->conn_mcbc_bind) 761 11042 Erik connp->conn_saddr_v6 = ipv6_all_zeros; 762 11042 Erik else 763 11042 Erik connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 764 11042 Erik connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 765 11042 Erik connp->conn_faddr_v6 = ipv6_all_zeros; 766 11042 Erik icmp->icmp_state = TS_IDLE; 767 11042 Erik } 768 11042 Erik 769 11042 Erik /* 770 11042 Erik * Use sin_port/sin6_port since applications like psh use SOCK_RAW 771 11042 Erik * with IPPROTO_TCP. 772 11042 Erik */ 773 11042 Erik connp->conn_fport = dstport; 774 11042 Erik if (connp->conn_ipversion == IPV4_VERSION) { 775 8348 Eric /* 776 8348 Eric * Interpret a zero destination to mean loopback. 777 8348 Eric * Update the T_CONN_REQ (sin/sin6) since it is used to 778 8348 Eric * generate the T_CONN_CON. 779 8348 Eric */ 780 8348 Eric if (v4dst == INADDR_ANY) { 781 8348 Eric v4dst = htonl(INADDR_LOOPBACK); 782 11042 Erik IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 783 11042 Erik ASSERT(connp->conn_family == AF_INET); 784 11042 Erik sin->sin_addr.s_addr = v4dst; 785 11042 Erik } 786 11042 Erik connp->conn_faddr_v6 = v6dst; 787 11042 Erik connp->conn_flowinfo = 0; 788 11042 Erik } else { 789 11042 Erik ASSERT(connp->conn_ipversion == IPV6_VERSION); 790 0 stevel /* 791 0 stevel * Interpret a zero destination to mean loopback. 792 0 stevel * Update the T_CONN_REQ (sin/sin6) since it is used to 793 0 stevel * generate the T_CONN_CON. 794 0 stevel */ 795 11042 Erik if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 796 11042 Erik v6dst = ipv6_loopback; 797 11042 Erik sin6->sin6_addr = v6dst; 798 11042 Erik } 799 11042 Erik connp->conn_faddr_v6 = v6dst; 800 11042 Erik connp->conn_flowinfo = flowinfo; 801 11042 Erik } 802 11042 Erik 803 11042 Erik ixa->ixa_cred = cr; 804 11042 Erik ixa->ixa_cpid = pid; 805 11042 Erik if (is_system_labeled()) { 806 11042 Erik /* We need to restart with a label based on the cred */ 807 11042 Erik ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 808 11042 Erik } 809 11042 Erik 810 11042 Erik if (scopeid != 0) { 811 11042 Erik ixa->ixa_flags |= IXAF_SCOPEID_SET; 812 11042 Erik ixa->ixa_scopeid = scopeid; 813 11042 Erik connp->conn_incoming_ifindex = scopeid; 814 11042 Erik } else { 815 11042 Erik ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 816 11042 Erik connp->conn_incoming_ifindex = connp->conn_bound_if; 817 11042 Erik } 818 11042 Erik 819 11042 Erik /* 820 11042 Erik * conn_connect will drop conn_lock and reacquire it. 821 11042 Erik * To prevent a send* from messing with this icmp_t while the lock 822 11042 Erik * is dropped we set icmp_state and clear conn_v6lastdst. 823 11042 Erik * That will make all send* fail with EISCONN. 824 11042 Erik */ 825 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 826 11042 Erik icmp->icmp_state = TS_WCON_CREQ; 827 11042 Erik 828 11042 Erik error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 829 11042 Erik mutex_exit(&connp->conn_lock); 830 11042 Erik if (error != 0) 831 11042 Erik goto connect_failed; 832 11042 Erik 833 11042 Erik /* 834 11042 Erik * The addresses have been verified. Time to insert in 835 11042 Erik * the correct fanout list. 836 11042 Erik */ 837 11042 Erik error = ipcl_conn_insert(connp); 838 11042 Erik if (error != 0) 839 11042 Erik goto connect_failed; 840 11042 Erik 841 11042 Erik mutex_enter(&connp->conn_lock); 842 11042 Erik error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6, 843 11042 Erik &connp->conn_faddr_v6, connp->conn_flowinfo); 844 11042 Erik if (error != 0) { 845 11042 Erik mutex_exit(&connp->conn_lock); 846 11042 Erik goto connect_failed; 847 5240 nordmark } 848 5240 nordmark 849 0 stevel icmp->icmp_state = TS_DATA_XFER; 850 11042 Erik /* Record this as the "last" send even though we haven't sent any */ 851 11042 Erik connp->conn_v6lastdst = connp->conn_faddr_v6; 852 11042 Erik connp->conn_lastipversion = connp->conn_ipversion; 853 11042 Erik connp->conn_lastdstport = connp->conn_fport; 854 11042 Erik connp->conn_lastflowinfo = connp->conn_flowinfo; 855 11042 Erik connp->conn_lastscopeid = scopeid; 856 11042 Erik connp->conn_lastsrcid = srcid; 857 11042 Erik /* Also remember a source to use together with lastdst */ 858 11042 Erik connp->conn_v6lastsrc = v6src; 859 11042 Erik mutex_exit(&connp->conn_lock); 860 11042 Erik 861 11042 Erik ixa_refrele(ixa); 862 11042 Erik return (0); 863 11042 Erik 864 11042 Erik connect_failed: 865 11042 Erik if (ixa != NULL) 866 11042 Erik ixa_refrele(ixa); 867 11042 Erik mutex_enter(&connp->conn_lock); 868 11042 Erik icmp->icmp_state = TS_IDLE; 869 11042 Erik /* In case the source address was set above */ 870 11042 Erik if (connp->conn_mcbc_bind) 871 11042 Erik connp->conn_saddr_v6 = ipv6_all_zeros; 872 11042 Erik else 873 11042 Erik connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 874 11042 Erik connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 875 11042 Erik connp->conn_faddr_v6 = ipv6_all_zeros; 876 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 877 11042 Erik connp->conn_flowinfo = 0; 878 11042 Erik 879 11042 Erik (void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6, 880 11042 Erik &connp->conn_faddr_v6, connp->conn_flowinfo); 881 11042 Erik mutex_exit(&connp->conn_lock); 882 11042 Erik return (error); 883 11042 Erik } 884 11042 Erik 885 11042 Erik static void 886 8348 Eric rawip_do_close(conn_t *connp) 887 8348 Eric { 888 5240 nordmark ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 889 5240 nordmark 890 5240 nordmark ip_quiesce_conn(connp); 891 5240 nordmark 892 8348 Eric if (!IPCL_IS_NONSTR(connp)) { 893 8348 Eric qprocsoff(connp->conn_rq); 894 8348 Eric } 895 8348 Eric 896 5240 nordmark icmp_close_free(connp); 897 5240 nordmark 898 5240 nordmark /* 899 5240 nordmark * Now we are truly single threaded on this stream, and can 900 5240 nordmark * delete the things hanging off the connp, and finally the connp. 901 5240 nordmark * We removed this connp from the fanout list, it cannot be 902 5240 nordmark * accessed thru the fanouts, and we already waited for the 903 5240 nordmark * conn_ref to drop to 0. We are already in close, so 904 5240 nordmark * there cannot be any other thread from the top. qprocsoff 905 5240 nordmark * has completed, and service has completed or won't run in 906 5240 nordmark * future. 907 5240 nordmark */ 908 5240 nordmark ASSERT(connp->conn_ref == 1); 909 5240 nordmark 910 8348 Eric if (!IPCL_IS_NONSTR(connp)) { 911 8348 Eric inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 912 8348 Eric } else { 913 8477 Rao ip_free_helper_stream(connp); 914 8348 Eric } 915 5240 nordmark 916 5240 nordmark connp->conn_ref--; 917 5240 nordmark ipcl_conn_destroy(connp); 918 8348 Eric } 919 8348 Eric 920 8348 Eric static int 921 8348 Eric icmp_close(queue_t *q, int flags) 922 8348 Eric { 923 8348 Eric conn_t *connp; 924 8348 Eric 925 8348 Eric if (flags & SO_FALLBACK) { 926 8348 Eric /* 927 8348 Eric * stream is being closed while in fallback 928 8348 Eric * simply free the resources that were allocated 929 8348 Eric */ 930 8348 Eric inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 931 8348 Eric qprocsoff(q); 932 8348 Eric goto done; 933 8348 Eric } 934 8348 Eric 935 8348 Eric connp = Q_TO_CONN(q); 936 8348 Eric (void) rawip_do_close(connp); 937 8348 Eric done: 938 5240 nordmark q->q_ptr = WR(q)->q_ptr = NULL; 939 5240 nordmark return (0); 940 0 stevel } 941 0 stevel 942 11042 Erik static void 943 11042 Erik icmp_close_free(conn_t *connp) 944 11042 Erik { 945 11042 Erik icmp_t *icmp = connp->conn_icmp; 946 11042 Erik 947 11042 Erik if (icmp->icmp_filter != NULL) { 948 11042 Erik kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 949 11042 Erik icmp->icmp_filter = NULL; 950 11042 Erik } 951 11042 Erik 952 11042 Erik /* 953 11042 Erik * Clear any fields which the kmem_cache constructor clears. 954 11042 Erik * Only icmp_connp needs to be preserved. 955 11042 Erik * TBD: We should make this more efficient to avoid clearing 956 11042 Erik * everything. 957 11042 Erik */ 958 11042 Erik ASSERT(icmp->icmp_connp == connp); 959 11042 Erik bzero(icmp, sizeof (icmp_t)); 960 11042 Erik icmp->icmp_connp = connp; 961 11042 Erik } 962 11042 Erik 963 0 stevel /* 964 0 stevel * This routine handles each T_DISCON_REQ message passed to icmp 965 0 stevel * as an indicating that ICMP is no longer connected. This results 966 11042 Erik * in telling IP to restore the binding to just the local address. 967 8348 Eric */ 968 8348 Eric static int 969 8348 Eric icmp_do_disconnect(conn_t *connp) 970 8348 Eric { 971 11042 Erik icmp_t *icmp = connp->conn_icmp; 972 11042 Erik int error; 973 11042 Erik 974 11042 Erik mutex_enter(&connp->conn_lock); 975 11042 Erik if (icmp->icmp_state != TS_DATA_XFER) { 976 11042 Erik mutex_exit(&connp->conn_lock); 977 8348 Eric return (-TOUTSTATE); 978 0 stevel } 979 11042 Erik if (connp->conn_mcbc_bind) 980 11042 Erik connp->conn_saddr_v6 = ipv6_all_zeros; 981 11042 Erik else 982 11042 Erik connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 983 11042 Erik connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 984 11042 Erik connp->conn_faddr_v6 = ipv6_all_zeros; 985 0 stevel icmp->icmp_state = TS_IDLE; 986 0 stevel 987 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 988 11042 Erik error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6, 989 11042 Erik &connp->conn_faddr_v6, connp->conn_flowinfo); 990 11042 Erik mutex_exit(&connp->conn_lock); 991 11042 Erik if (error != 0) 992 11042 Erik return (error); 993 11042 Erik 994 11042 Erik /* 995 11042 Erik * Tell IP to remove the full binding and revert 996 11042 Erik * to the local address binding. 997 11042 Erik */ 998 11042 Erik return (ip_laddr_fanout_insert(connp)); 999 8348 Eric } 1000 8348 Eric 1001 8348 Eric static void 1002 8348 Eric icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 1003 8348 Eric { 1004 8348 Eric conn_t *connp = Q_TO_CONN(q); 1005 8348 Eric int error; 1006 8348 Eric 1007 8348 Eric /* 1008 8348 Eric * Allocate the largest primitive we need to send back 1009 8348 Eric * T_error_ack is > than T_ok_ack 1010 8348 Eric */ 1011 8348 Eric mp = reallocb(mp, sizeof (struct T_error_ack), 1); 1012 8348 Eric if (mp == NULL) { 1013 8348 Eric /* Unable to reuse the T_DISCON_REQ for the ack. */ 1014 8348 Eric icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 1015 8348 Eric return; 1016 8348 Eric } 1017 8348 Eric 1018 8348 Eric error = icmp_do_disconnect(connp); 1019 8348 Eric 1020 8348 Eric if (error != 0) { 1021 8348 Eric if (error > 0) { 1022 8348 Eric icmp_err_ack(q, mp, 0, error); 1023 8348 Eric } else { 1024 8348 Eric icmp_err_ack(q, mp, -error, 0); 1025 8348 Eric } 1026 8348 Eric } else { 1027 8348 Eric mp = mi_tpi_ok_ack_alloc(mp); 1028 8348 Eric ASSERT(mp != NULL); 1029 8348 Eric qreply(q, mp); 1030 8348 Eric } 1031 8348 Eric } 1032 8348 Eric 1033 8348 Eric static int 1034 8348 Eric icmp_disconnect(conn_t *connp) 1035 8348 Eric { 1036 8348 Eric int error; 1037 11042 Erik 1038 11042 Erik connp->conn_dgram_errind = B_FALSE; 1039 8348 Eric 1040 8348 Eric error = icmp_do_disconnect(connp); 1041 8348 Eric 1042 8348 Eric if (error < 0) 1043 8348 Eric error = proto_tlitosyserr(-error); 1044 8348 Eric return (error); 1045 0 stevel } 1046 0 stevel 1047 0 stevel /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1048 0 stevel static void 1049 0 stevel icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1050 0 stevel { 1051 0 stevel if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1052 0 stevel qreply(q, mp); 1053 0 stevel } 1054 0 stevel 1055 0 stevel /* Shorthand to generate and send TPI error acks to our client */ 1056 0 stevel static void 1057 0 stevel icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1058 0 stevel t_scalar_t t_error, int sys_error) 1059 0 stevel { 1060 0 stevel struct T_error_ack *teackp; 1061 0 stevel 1062 0 stevel if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1063 0 stevel M_PCPROTO, T_ERROR_ACK)) != NULL) { 1064 0 stevel teackp = (struct T_error_ack *)mp->b_rptr; 1065 0 stevel teackp->ERROR_prim = primitive; 1066 0 stevel teackp->TLI_error = t_error; 1067 0 stevel teackp->UNIX_error = sys_error; 1068 0 stevel qreply(q, mp); 1069 0 stevel } 1070 0 stevel } 1071 0 stevel 1072 0 stevel /* 1073 11042 Erik * icmp_icmp_input is called as conn_recvicmp to process ICMP messages. 1074 11042 Erik * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1075 11042 Erik * Assumes that IP has pulled up everything up to and including the ICMP header. 1076 11042 Erik */ 1077 11042 Erik /* ARGSUSED2 */ 1078 11042 Erik static void 1079 11042 Erik icmp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1080 11042 Erik { 1081 11042 Erik conn_t *connp = (conn_t *)arg1; 1082 11042 Erik icmp_t *icmp = connp->conn_icmp; 1083 11042 Erik icmph_t *icmph; 1084 11042 Erik ipha_t *ipha; 1085 11042 Erik int iph_hdr_length; 1086 11042 Erik sin_t sin; 1087 11042 Erik mblk_t *mp1; 1088 11042 Erik int error = 0; 1089 0 stevel 1090 0 stevel ipha = (ipha_t *)mp->b_rptr; 1091 5240 nordmark 1092 5240 nordmark ASSERT(OK_32PTR(mp->b_rptr)); 1093 0 stevel 1094 0 stevel if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1095 0 stevel ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1096 11042 Erik icmp_icmp_error_ipv6(connp, mp, ira); 1097 11042 Erik return; 1098 11042 Erik } 1099 11042 Erik ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1100 0 stevel 1101 5240 nordmark /* Skip past the outer IP and ICMP headers */ 1102 11042 Erik ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 1103 11042 Erik iph_hdr_length = ira->ira_ip_hdr_length; 1104 11042 Erik icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1105 11042 Erik ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 1106 11042 Erik 1107 0 stevel iph_hdr_length = IPH_HDR_LENGTH(ipha); 1108 0 stevel 1109 0 stevel switch (icmph->icmph_type) { 1110 0 stevel case ICMP_DEST_UNREACHABLE: 1111 0 stevel switch (icmph->icmph_code) { 1112 11042 Erik case ICMP_FRAGMENTATION_NEEDED: { 1113 11042 Erik ipha_t *ipha; 1114 11042 Erik ip_xmit_attr_t *ixa; 1115 0 stevel /* 1116 0 stevel * IP has already adjusted the path MTU. 1117 11042 Erik * But we need to adjust DF for IPv4. 1118 11042 Erik */ 1119 11042 Erik if (connp->conn_ipversion != IPV4_VERSION) 1120 11042 Erik break; 1121 11042 Erik 1122 11042 Erik ixa = conn_get_ixa(connp, B_FALSE); 1123 11042 Erik if (ixa == NULL || ixa->ixa_ire == NULL) { 1124 11042 Erik /* 1125 11042 Erik * Some other thread holds conn_ixa. We will 1126 11042 Erik * redo this on the next ICMP too big. 1127 11042 Erik */ 1128 11042 Erik if (ixa != NULL) 1129 11042 Erik ixa_refrele(ixa); 1130 11042 Erik break; 1131 11042 Erik } 1132 11042 Erik (void) ip_get_pmtu(ixa); 1133 11042 Erik 1134 11042 Erik mutex_enter(&connp->conn_lock); 1135 11042 Erik ipha = (ipha_t *)connp->conn_ht_iphc; 1136 11042 Erik if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 1137 11042 Erik ipha->ipha_fragment_offset_and_flags |= 1138 11042 Erik IPH_DF_HTONS; 1139 11042 Erik } else { 1140 11042 Erik ipha->ipha_fragment_offset_and_flags &= 1141 11042 Erik ~IPH_DF_HTONS; 1142 11042 Erik } 1143 11042 Erik mutex_exit(&connp->conn_lock); 1144 11042 Erik ixa_refrele(ixa); 1145 11042 Erik break; 1146 11042 Erik } 1147 0 stevel case ICMP_PORT_UNREACHABLE: 1148 0 stevel case ICMP_PROTOCOL_UNREACHABLE: 1149 0 stevel error = ECONNREFUSED; 1150 0 stevel break; 1151 0 stevel default: 1152 0 stevel /* Transient errors */ 1153 0 stevel break; 1154 0 stevel } 1155 0 stevel break; 1156 0 stevel default: 1157 0 stevel /* Transient errors */ 1158 0 stevel break; 1159 0 stevel } 1160 0 stevel if (error == 0) { 1161 5240 nordmark freemsg(mp); 1162 5240 nordmark return; 1163 5240 nordmark } 1164 5240 nordmark 1165 5240 nordmark /* 1166 5240 nordmark * Deliver T_UDERROR_IND when the application has asked for it. 1167 5240 nordmark * The socket layer enables this automatically when connected. 1168 5240 nordmark */ 1169 11042 Erik if (!connp->conn_dgram_errind) { 1170 0 stevel freemsg(mp); 1171 0 stevel return; 1172 0 stevel } 1173 0 stevel 1174 8348 Eric sin = sin_null; 1175 8348 Eric sin.sin_family = AF_INET; 1176 8348 Eric sin.sin_addr.s_addr = ipha->ipha_dst; 1177 8963 Anders 1178 8348 Eric if (IPCL_IS_NONSTR(connp)) { 1179 11042 Erik mutex_enter(&connp->conn_lock); 1180 8348 Eric if (icmp->icmp_state == TS_DATA_XFER) { 1181 11042 Erik if (sin.sin_addr.s_addr == connp->conn_faddr_v4) { 1182 11042 Erik mutex_exit(&connp->conn_lock); 1183 8348 Eric (*connp->conn_upcalls->su_set_error) 1184 8348 Eric (connp->conn_upper_handle, error); 1185 8348 Eric goto done; 1186 8348 Eric } 1187 8348 Eric } else { 1188 8348 Eric icmp->icmp_delayed_error = error; 1189 8348 Eric *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1190 8348 Eric } 1191 11042 Erik mutex_exit(&connp->conn_lock); 1192 11042 Erik } else { 1193 11042 Erik mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 1194 11042 Erik error); 1195 8348 Eric if (mp1 != NULL) 1196 8348 Eric putnext(connp->conn_rq, mp1); 1197 8348 Eric } 1198 8348 Eric done: 1199 0 stevel freemsg(mp); 1200 0 stevel } 1201 0 stevel 1202 0 stevel /* 1203 11042 Erik * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMP for IPv6. 1204 11042 Erik * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1205 11042 Erik * Assumes that IP has pulled up all the extension headers as well as the 1206 11042 Erik * ICMPv6 header. 1207 11042 Erik */ 1208 11042 Erik static void 1209 11042 Erik icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1210 0 stevel { 1211 0 stevel icmp6_t *icmp6; 1212 0 stevel ip6_t *ip6h, *outer_ip6h; 1213 0 stevel uint16_t iph_hdr_length; 1214 0 stevel uint8_t *nexthdrp; 1215 0 stevel sin6_t sin6; 1216 0 stevel mblk_t *mp1; 1217 0 stevel int error = 0; 1218 8348 Eric icmp_t *icmp = connp->conn_icmp; 1219 0 stevel 1220 0 stevel outer_ip6h = (ip6_t *)mp->b_rptr; 1221 11042 Erik #ifdef DEBUG 1222 0 stevel if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1223 0 stevel iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1224 0 stevel else 1225 0 stevel iph_hdr_length = IPV6_HDR_LEN; 1226 11042 Erik ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1227 11042 Erik #endif 1228 11042 Erik /* Skip past the outer IP and ICMP headers */ 1229 11042 Erik iph_hdr_length = ira->ira_ip_hdr_length; 1230 0 stevel icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1231 11042 Erik 1232 11042 Erik ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1233 0 stevel if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1234 0 stevel freemsg(mp); 1235 0 stevel return; 1236 0 stevel } 1237 5240 nordmark 1238 0 stevel switch (icmp6->icmp6_type) { 1239 0 stevel case ICMP6_DST_UNREACH: 1240 0 stevel switch (icmp6->icmp6_code) { 1241 0 stevel case ICMP6_DST_UNREACH_NOPORT: 1242 0 stevel error = ECONNREFUSED; 1243 0 stevel break; 1244 0 stevel case ICMP6_DST_UNREACH_ADMIN: 1245 0 stevel case ICMP6_DST_UNREACH_NOROUTE: 1246 0 stevel case ICMP6_DST_UNREACH_BEYONDSCOPE: 1247 0 stevel case ICMP6_DST_UNREACH_ADDR: 1248 0 stevel /* Transient errors */ 1249 0 stevel break; 1250 0 stevel default: 1251 0 stevel break; 1252 0 stevel } 1253 0 stevel break; 1254 0 stevel case ICMP6_PACKET_TOO_BIG: { 1255 0 stevel struct T_unitdata_ind *tudi; 1256 0 stevel struct T_opthdr *toh; 1257 0 stevel size_t udi_size; 1258 0 stevel mblk_t *newmp; 1259 0 stevel t_scalar_t opt_length = sizeof (struct T_opthdr) + 1260 0 stevel sizeof (struct ip6_mtuinfo); 1261 0 stevel sin6_t *sin6; 1262 0 stevel struct ip6_mtuinfo *mtuinfo; 1263 0 stevel 1264 0 stevel /* 1265 0 stevel * If the application has requested to receive path mtu 1266 0 stevel * information, send up an empty message containing an 1267 0 stevel * IPV6_PATHMTU ancillary data item. 1268 0 stevel */ 1269 11042 Erik if (!connp->conn_ipv6_recvpathmtu) 1270 0 stevel break; 1271 0 stevel 1272 0 stevel udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1273 0 stevel opt_length; 1274 0 stevel if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1275 5240 nordmark BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1276 0 stevel break; 1277 0 stevel } 1278 0 stevel 1279 0 stevel /* 1280 0 stevel * newmp->b_cont is left to NULL on purpose. This is an 1281 0 stevel * empty message containing only ancillary data. 1282 0 stevel */ 1283 0 stevel newmp->b_datap->db_type = M_PROTO; 1284 0 stevel tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1285 0 stevel newmp->b_wptr = (uchar_t *)tudi + udi_size; 1286 0 stevel tudi->PRIM_type = T_UNITDATA_IND; 1287 0 stevel tudi->SRC_length = sizeof (sin6_t); 1288 0 stevel tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1289 0 stevel tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1290 0 stevel tudi->OPT_length = opt_length; 1291 0 stevel 1292 0 stevel sin6 = (sin6_t *)&tudi[1]; 1293 0 stevel bzero(sin6, sizeof (sin6_t)); 1294 0 stevel sin6->sin6_family = AF_INET6; 1295 11042 Erik sin6->sin6_addr = connp->conn_faddr_v6; 1296 0 stevel 1297 0 stevel toh = (struct T_opthdr *)&sin6[1]; 1298 0 stevel toh->level = IPPROTO_IPV6; 1299 0 stevel toh->name = IPV6_PATHMTU; 1300 0 stevel toh->len = opt_length; 1301 0 stevel toh->status = 0; 1302 0 stevel 1303 0 stevel mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1304 0 stevel bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1305 0 stevel mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1306 0 stevel mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1307 0 stevel mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1308 0 stevel /* 1309 0 stevel * We've consumed everything we need from the original 1310 0 stevel * message. Free it, then send our empty message. 1311 0 stevel */ 1312 0 stevel freemsg(mp); 1313 11042 Erik icmp_ulp_recv(connp, newmp, msgdsize(newmp)); 1314 0 stevel return; 1315 0 stevel } 1316 0 stevel case ICMP6_TIME_EXCEEDED: 1317 0 stevel /* Transient errors */ 1318 0 stevel break; 1319 0 stevel case ICMP6_PARAM_PROB: 1320 0 stevel /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1321 0 stevel if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1322 0 stevel (uchar_t *)ip6h + icmp6->icmp6_pptr == 1323 0 stevel (uchar_t *)nexthdrp) { 1324 0 stevel error = ECONNREFUSED; 1325 0 stevel break; 1326 0 stevel } 1327 0 stevel break; 1328 0 stevel } 1329 0 stevel if (error == 0) { 1330 0 stevel freemsg(mp); 1331 0 stevel return; 1332 0 stevel } 1333 0 stevel 1334 5240 nordmark /* 1335 5240 nordmark * Deliver T_UDERROR_IND when the application has asked for it. 1336 5240 nordmark * The socket layer enables this automatically when connected. 1337 5240 nordmark */ 1338 11042 Erik if (!connp->conn_dgram_errind) { 1339 5240 nordmark freemsg(mp); 1340 5240 nordmark return; 1341 5240 nordmark } 1342 5240 nordmark 1343 0 stevel sin6 = sin6_null; 1344 0 stevel sin6.sin6_family = AF_INET6; 1345 0 stevel sin6.sin6_addr = ip6h->ip6_dst; 1346 0 stevel sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1347 8348 Eric if (IPCL_IS_NONSTR(connp)) { 1348 11042 Erik mutex_enter(&connp->conn_lock); 1349 8348 Eric if (icmp->icmp_state == TS_DATA_XFER) { 1350 8348 Eric if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1351 11042 Erik &connp->conn_faddr_v6)) { 1352 11042 Erik mutex_exit(&connp->conn_lock); 1353 8348 Eric (*connp->conn_upcalls->su_set_error) 1354 8348 Eric (connp->conn_upper_handle, error); 1355 8348 Eric goto done; 1356 8348 Eric } 1357 8348 Eric } else { 1358 8348 Eric icmp->icmp_delayed_error = error; 1359 8348 Eric *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1360 8348 Eric } 1361 11042 Erik mutex_exit(&connp->conn_lock); 1362 8348 Eric } else { 1363 8348 Eric mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1364 8348 Eric NULL, 0, error); 1365 8348 Eric if (mp1 != NULL) 1366 8348 Eric putnext(connp->conn_rq, mp1); 1367 8348 Eric } 1368 8348 Eric done: 1369 0 stevel freemsg(mp); 1370 0 stevel } 1371 0 stevel 1372 0 stevel /* 1373 0 stevel * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1374 0 stevel * The local address is filled in if endpoint is bound. The remote address 1375 0 stevel * is filled in if remote address has been precified ("connected endpoint") 1376 0 stevel * (The concept of connected CLTS sockets is alien to published TPI 1377 0 stevel * but we support it anyway). 1378 0 stevel */ 1379 0 stevel static void 1380 0 stevel icmp_addr_req(queue_t *q, mblk_t *mp) 1381 0 stevel { 1382 11042 Erik struct sockaddr *sa; 1383 0 stevel mblk_t *ackmp; 1384 0 stevel struct T_addr_ack *taa; 1385 11042 Erik icmp_t *icmp = Q_TO_ICMP(q); 1386 11042 Erik conn_t *connp = icmp->icmp_connp; 1387 11042 Erik uint_t addrlen; 1388 0 stevel 1389 0 stevel /* Make it large enough for worst case */ 1390 0 stevel ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1391 0 stevel 2 * sizeof (sin6_t), 1); 1392 0 stevel if (ackmp == NULL) { 1393 0 stevel icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1394 0 stevel return; 1395 0 stevel } 1396 0 stevel taa = (struct T_addr_ack *)ackmp->b_rptr; 1397 0 stevel 1398 0 stevel bzero(taa, sizeof (struct T_addr_ack)); 1399 0 stevel ackmp->b_wptr = (uchar_t *)&taa[1]; 1400 0 stevel 1401 0 stevel taa->PRIM_type = T_ADDR_ACK; 1402 0 stevel ackmp->b_datap->db_type = M_PCPROTO; 1403 11042 Erik 1404 11042 Erik if (connp->conn_family == AF_INET) 1405 11042 Erik addrlen = sizeof (sin_t); 1406 11042 Erik else 1407 11042 Erik addrlen = sizeof (sin6_t); 1408 11042 Erik 1409 11042 Erik mutex_enter(&connp->conn_lock); 1410 0 stevel /* 1411 0 stevel * Note: Following code assumes 32 bit alignment of basic 1412 0 stevel * data structures like sin_t and struct T_addr_ack. 1413 0 stevel */ 1414 0 stevel if (icmp->icmp_state != TS_UNBND) { 1415 0 stevel /* 1416 11042 Erik * Fill in local address first 1417 0 stevel */ 1418 0 stevel taa->LOCADDR_offset = sizeof (*taa); 1419 11042 Erik taa->LOCADDR_length = addrlen; 1420 11042 Erik sa = (struct sockaddr *)&taa[1]; 1421 11042 Erik (void) conn_getsockname(connp, sa, &addrlen); 1422 11042 Erik ackmp->b_wptr += addrlen; 1423 11042 Erik } 1424 11042 Erik if (icmp->icmp_state == TS_DATA_XFER) { 1425 11042 Erik /* 1426 11042 Erik * connected, fill remote address too 1427 11042 Erik */ 1428 11042 Erik taa->REMADDR_length = addrlen; 1429 11042 Erik /* assumed 32-bit alignment */ 1430 11042 Erik taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1431 11042 Erik sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1432 11042 Erik (void) conn_getpeername(connp, sa, &addrlen); 1433 11042 Erik ackmp->b_wptr += addrlen; 1434 11042 Erik } 1435 11042 Erik mutex_exit(&connp->conn_lock); 1436 0 stevel ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1437 0 stevel qreply(q, ackmp); 1438 0 stevel } 1439 0 stevel 1440 0 stevel static void 1441 0 stevel icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1442 0 stevel { 1443 11042 Erik conn_t *connp = icmp->icmp_connp; 1444 11042 Erik 1445 0 stevel *tap = icmp_g_t_info_ack; 1446 0 stevel 1447 11042 Erik if (connp->conn_family == AF_INET6) 1448 0 stevel tap->ADDR_size = sizeof (sin6_t); 1449 0 stevel else 1450 0 stevel tap->ADDR_size = sizeof (sin_t); 1451 0 stevel tap->CURRENT_state = icmp->icmp_state; 1452 0 stevel tap->OPT_size = icmp_max_optsize; 1453 0 stevel } 1454 0 stevel 1455 8348 Eric static void 1456 8348 Eric icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1457 8348 Eric t_uscalar_t cap_bits1) 1458 8348 Eric { 1459 8348 Eric tcap->CAP_bits1 = 0; 1460 8348 Eric 1461 8348 Eric if (cap_bits1 & TC1_INFO) { 1462 8348 Eric icmp_copy_info(&tcap->INFO_ack, icmp); 1463 8348 Eric tcap->CAP_bits1 |= TC1_INFO; 1464 8348 Eric } 1465 8348 Eric } 1466 8348 Eric 1467 0 stevel /* 1468 0 stevel * This routine responds to T_CAPABILITY_REQ messages. It is called by 1469 0 stevel * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1470 0 stevel * icmp_g_t_info_ack. The current state of the stream is copied from 1471 0 stevel * icmp_state. 1472 0 stevel */ 1473 0 stevel static void 1474 0 stevel icmp_capability_req(queue_t *q, mblk_t *mp) 1475 0 stevel { 1476 5240 nordmark icmp_t *icmp = Q_TO_ICMP(q); 1477 0 stevel t_uscalar_t cap_bits1; 1478 0 stevel struct T_capability_ack *tcap; 1479 0 stevel 1480 0 stevel cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1481 0 stevel 1482 0 stevel mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1483 5240 nordmark mp->b_datap->db_type, T_CAPABILITY_ACK); 1484 0 stevel if (!mp) 1485 0 stevel return; 1486 0 stevel 1487 0 stevel tcap = (struct T_capability_ack *)mp->b_rptr; 1488 8348 Eric 1489 8348 Eric icmp_do_capability_ack(icmp, tcap, cap_bits1); 1490 0 stevel 1491 0 stevel qreply(q, mp); 1492 0 stevel } 1493 0 stevel 1494 0 stevel /* 1495 0 stevel * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1496 0 stevel * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1497 0 stevel * The current state of the stream is copied from icmp_state. 1498 0 stevel */ 1499 0 stevel static void 1500 0 stevel icmp_info_req(queue_t *q, mblk_t *mp) 1501 0 stevel { 1502 5240 nordmark icmp_t *icmp = Q_TO_ICMP(q); 1503 0 stevel 1504 11042 Erik /* Create a T_INFO_ACK message. */ 1505 0 stevel mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1506 0 stevel T_INFO_ACK); 1507 0 stevel if (!mp) 1508 0 stevel return; 1509 0 stevel icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1510 0 stevel qreply(q, mp); 1511 0 stevel } 1512 0 stevel 1513 5240 nordmark static int 1514 8348 Eric icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1515 8348 Eric int family) 1516 8348 Eric { 1517 8348 Eric conn_t *connp; 1518 8348 Eric dev_t conn_dev; 1519 11042 Erik int error; 1520 8348 Eric 1521 8348 Eric /* If the stream is already open, return immediately. */ 1522 8348 Eric if (q->q_ptr != NULL) 1523 8348 Eric return (0); 1524 8348 Eric 1525 8348 Eric if (sflag == MODOPEN) 1526 8348 Eric return (EINVAL); 1527 8348 Eric 1528 8348 Eric /* 1529 8348 Eric * Since ICMP is not used so heavily, allocating from the small 1530 8348 Eric * arena should be sufficient. 1531 8348 Eric */ 1532 8348 Eric if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1533 8348 Eric return (EBUSY); 1534 8348 Eric } 1535 8348 Eric 1536 8348 Eric if (flag & SO_FALLBACK) { 1537 8348 Eric /* 1538 8348 Eric * Non streams socket needs a stream to fallback to 1539 8348 Eric */ 1540 8348 Eric RD(q)->q_ptr = (void *)conn_dev; 1541 8348 Eric WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1542 8348 Eric WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1543 8348 Eric qprocson(q); 1544 8348 Eric return (0); 1545 8348 Eric } 1546 8348 Eric 1547 11042 Erik connp = rawip_do_open(family, credp, &error, KM_SLEEP); 1548 8348 Eric if (connp == NULL) { 1549 11042 Erik ASSERT(error != 0); 1550 8348 Eric inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1551 8348 Eric return (error); 1552 8348 Eric } 1553 8348 Eric 1554 8348 Eric *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1555 8348 Eric connp->conn_dev = conn_dev; 1556 8348 Eric connp->conn_minor_arena = ip_minor_arena_sa; 1557 8348 Eric 1558 8348 Eric /* 1559 8348 Eric * Initialize the icmp_t structure for this stream. 1560 8348 Eric */ 1561 8348 Eric q->q_ptr = connp; 1562 8348 Eric WR(q)->q_ptr = connp; 1563 8348 Eric connp->conn_rq = q; 1564 8348 Eric connp->conn_wq = WR(q); 1565 8348 Eric 1566 11042 Erik WR(q)->q_hiwat = connp->conn_sndbuf; 1567 11042 Erik WR(q)->q_lowat = connp->conn_sndlowat; 1568 8348 Eric 1569 8348 Eric qprocson(q); 1570 8348 Eric 1571 8348 Eric /* Set the Stream head write offset. */ 1572 11042 Erik (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1573 11042 Erik (void) proto_set_rx_hiwat(connp->conn_rq, connp, connp->conn_rcvbuf); 1574 8348 Eric 1575 8348 Eric mutex_enter(&connp->conn_lock); 1576 8348 Eric connp->conn_state_flags &= ~CONN_INCIPIENT; 1577 8348 Eric mutex_exit(&connp->conn_lock); 1578 8348 Eric 1579 11042 Erik icmp_bind_proto(connp->conn_icmp); 1580 11042 Erik 1581 11042 Erik return (0); 1582 11042 Erik } 1583 11042 Erik 1584 11042 Erik /* For /dev/icmp aka AF_INET open */ 1585 8348 Eric static int 1586 5240 nordmark icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1587 5240 nordmark { 1588 8348 Eric return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1589 5240 nordmark } 1590 5240 nordmark 1591 5240 nordmark /* For /dev/icmp6 aka AF_INET6 open */ 1592 5240 nordmark static int 1593 5240 nordmark icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1594 5240 nordmark { 1595 8348 Eric return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1596 1676 jpk } 1597 1676 jpk 1598 0 stevel /* 1599 0 stevel * This is the open routine for icmp. It allocates a icmp_t structure for 1600 0 stevel * the stream and, on the first open of the module, creates an ND table. 1601 0 stevel */ 1602 8348 Eric static conn_t * 1603 11042 Erik rawip_do_open(int family, cred_t *credp, int *err, int flags) 1604 8348 Eric { 1605 0 stevel icmp_t *icmp; 1606 5240 nordmark conn_t *connp; 1607 5240 nordmark zoneid_t zoneid; 1608 3448 dh155122 netstack_t *ns; 1609 3448 dh155122 icmp_stack_t *is; 1610 11042 Erik int len; 1611 8348 Eric boolean_t isv6 = B_FALSE; 1612 8348 Eric 1613 8348 Eric *err = secpolicy_net_icmpaccess(credp); 1614 8348 Eric if (*err != 0) 1615 8348 Eric return (NULL); 1616 8348 Eric 1617 8348 Eric if (family == AF_INET6) 1618 8348 Eric isv6 = B_TRUE; 1619 11042 Erik 1620 3448 dh155122 ns = netstack_find_by_cred(credp); 1621 3448 dh155122 ASSERT(ns != NULL); 1622 3448 dh155122 is = ns->netstack_icmp; 1623 3448 dh155122 ASSERT(is != NULL); 1624 3448 dh155122 1625 3448 dh155122 /* 1626 3448 dh155122 * For exclusive stacks we set the zoneid to zero 1627 3448 dh155122 * to make ICMP operate as if in the global zone. 1628 3448 dh155122 */ 1629 5240 nordmark if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1630 3448 dh155122 zoneid = GLOBAL_ZONEID; 1631 3448 dh155122 else 1632 3448 dh155122 zoneid = crgetzoneid(credp); 1633 3448 dh155122 1634 8348 Eric ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1635 8348 Eric 1636 8348 Eric connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1637 8348 Eric icmp = connp->conn_icmp; 1638 5240 nordmark 1639 5240 nordmark /* 1640 5240 nordmark * ipcl_conn_create did a netstack_hold. Undo the hold that was 1641 5240 nordmark * done by netstack_find_by_cred() 1642 5240 nordmark */ 1643 5240 nordmark netstack_rele(ns); 1644 5240 nordmark 1645 11042 Erik /* 1646 11042 Erik * Since this conn_t/icmp_t is not yet visible to anybody else we don't 1647 11042 Erik * need to lock anything. 1648 11042 Erik */ 1649 11042 Erik ASSERT(connp->conn_proto == IPPROTO_ICMP); 1650 5240 nordmark ASSERT(connp->conn_icmp == icmp); 1651 5240 nordmark ASSERT(icmp->icmp_connp == connp); 1652 0 stevel 1653 0 stevel /* Set the initial state of the stream and the privilege status. */ 1654 0 stevel icmp->icmp_state = TS_UNBND; 1655 11042 Erik connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1656 5240 nordmark if (isv6) { 1657 11042 Erik connp->conn_family = AF_INET6; 1658 11042 Erik connp->conn_ipversion = IPV6_VERSION; 1659 11042 Erik connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 1660 11042 Erik connp->conn_proto = IPPROTO_ICMPV6; 1661 0 stevel /* May be changed by a SO_PROTOTYPE socket option. */ 1662 11042 Erik connp->conn_proto = IPPROTO_ICMPV6; 1663 11042 Erik connp->conn_ixa->ixa_protocol = connp->conn_proto; 1664 11042 Erik connp->conn_ixa->ixa_raw_cksum_offset = 2; 1665 11042 Erik connp->conn_default_ttl = is->is_ipv6_hoplimit; 1666 11042 Erik len = sizeof (ip6_t); 1667 11042 Erik } else { 1668 11042 Erik connp->conn_family = AF_INET; 1669 11042 Erik connp->conn_ipversion = IPV4_VERSION; 1670 11042 Erik connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 1671 0 stevel /* May be changed by a SO_PROTOTYPE socket option. */ 1672 11042 Erik connp->conn_proto = IPPROTO_ICMP; 1673 11042 Erik connp->conn_ixa->ixa_protocol = connp->conn_proto; 1674 11042 Erik connp->conn_default_ttl = is->is_ipv4_ttl; 1675 11042 Erik len = sizeof (ipha_t); 1676 11042 Erik } 1677 11042 Erik connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 1678 11042 Erik 1679 11042 Erik connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1680 11042 Erik 1681 11042 Erik /* 1682 11042 Erik * For the socket of protocol IPPROTO_RAW or when IP_HDRINCL is set, 1683 11042 Erik * the checksum is provided in the pre-built packet. We clear 1684 11042 Erik * IXAF_SET_ULP_CKSUM to tell IP that the application has sent a 1685 11042 Erik * complete IP header and not to compute the transport checksum. 1686 11042 Erik */ 1687 11042 Erik connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 1688 11042 Erik /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 1689 11042 Erik connp->conn_ixa->ixa_zoneid = zoneid; 1690 11042 Erik 1691 5240 nordmark connp->conn_zoneid = zoneid; 1692 5240 nordmark 1693 5240 nordmark /* 1694 5240 nordmark * If the caller has the process-wide flag set, then default to MAC 1695 5240 nordmark * exempt mode. This allows read-down to unlabeled hosts. 1696 5240 nordmark */ 1697 5240 nordmark if (getpflags(NET_MAC_AWARE, credp) != 0) 1698 10934 sommerfeld connp->conn_mac_mode = CONN_MAC_AWARE; 1699 5240 nordmark 1700 11042 Erik connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 1701 5240 nordmark 1702 5240 nordmark icmp->icmp_is = is; 1703 5240 nordmark 1704 11042 Erik connp->conn_rcvbuf = is->is_recv_hiwat; 1705 11042 Erik connp->conn_sndbuf = is->is_xmit_hiwat; 1706 11042 Erik connp->conn_sndlowat = is->is_xmit_lowat; 1707 11042 Erik connp->conn_rcvlowat = icmp_mod_info.mi_lowat; 1708 11042 Erik 1709 11042 Erik connp->conn_wroff = len + is->is_wroff_extra; 1710 11042 Erik connp->conn_so_type = SOCK_RAW; 1711 11042 Erik 1712 5240 nordmark connp->conn_recv = icmp_input; 1713 11042 Erik connp->conn_recvicmp = icmp_icmp_input; 1714 5240 nordmark crhold(credp); 1715 5240 nordmark connp->conn_cred = credp; 1716 11042 Erik connp->conn_cpid = curproc->p_pid; 1717 11066 rafael connp->conn_open_time = ddi_get_lbolt64(); 1718 11042 Erik /* Cache things in ixa without an extra refhold */ 1719 11042 Erik connp->conn_ixa->ixa_cred = connp->conn_cred; 1720 11042 Erik connp->conn_ixa->ixa_cpid = connp->conn_cpid; 1721 11042 Erik if (is_system_labeled()) 1722 11042 Erik connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 1723 8348 Eric 1724 8348 Eric connp->conn_flow_cntrld = B_FALSE; 1725 11042 Erik 1726 11042 Erik if (is->is_pmtu_discovery) 1727 11042 Erik connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 1728 11042 Erik 1729 8348 Eric return (connp); 1730 0 stevel } 1731 0 stevel 1732 0 stevel /* 1733 0 stevel * Which ICMP options OK to set through T_UNITDATA_REQ... 1734 0 stevel */ 1735 0 stevel /* ARGSUSED */ 1736 0 stevel static boolean_t 1737 0 stevel icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1738 0 stevel { 1739 0 stevel return (B_TRUE); 1740 0 stevel } 1741 0 stevel 1742 0 stevel /* 1743 0 stevel * This routine gets default values of certain options whose default 1744 0 stevel * values are maintained by protcol specific code 1745 0 stevel */ 1746 11042 Erik int 1747 11042 Erik icmp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1748 0 stevel { 1749 5240 nordmark icmp_t *icmp = Q_TO_ICMP(q); 1750 3448 dh155122 icmp_stack_t *is = icmp->icmp_is; 1751 0 stevel int *i1 = (int *)ptr; 1752 0 stevel 1753 0 stevel switch (level) { 1754 0 stevel case IPPROTO_IP: 1755 0 stevel switch (name) { 1756 0 stevel case IP_MULTICAST_TTL: 1757 0 stevel *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1758 0 stevel return (sizeof (uchar_t)); 1759 0 stevel case IP_MULTICAST_LOOP: 1760 0 stevel *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1761 0 stevel return (sizeof (uchar_t)); 1762 0 stevel } 1763 0 stevel break; 1764 0 stevel case IPPROTO_IPV6: 1765 0 stevel switch (name) { 1766 0 stevel case IPV6_MULTICAST_HOPS: 1767 0 stevel *i1 = IP_DEFAULT_MULTICAST_TTL; 1768 0 stevel return (sizeof (int)); 1769 0 stevel case IPV6_MULTICAST_LOOP: 1770 0 stevel *i1 = IP_DEFAULT_MULTICAST_LOOP; 1771 0 stevel return (sizeof (int)); 1772 0 stevel case IPV6_UNICAST_HOPS: 1773 3448 dh155122 *i1 = is->is_ipv6_hoplimit; 1774 0 stevel return (sizeof (int)); 1775 0 stevel } 1776 0 stevel break; 1777 0 stevel case IPPROTO_ICMPV6: 1778 0 stevel switch (name) { 1779 0 stevel case ICMP6_FILTER: 1780 0 stevel /* Make it look like "pass all" */ 1781 0 stevel ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1782 0 stevel return (sizeof (icmp6_filter_t)); 1783 0 stevel } 1784 0 stevel break; 1785 0 stevel } 1786 0 stevel return (-1); 1787 0 stevel } 1788 0 stevel 1789 0 stevel /* 1790 0 stevel * This routine retrieves the current status of socket options. 1791 11042 Erik * It returns the size of the option retrieved, or -1. 1792 0 stevel */ 1793 0 stevel int 1794 8348 Eric icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1795 8348 Eric { 1796 8348 Eric icmp_t *icmp = connp->conn_icmp; 1797 8348 Eric int *i1 = (int *)ptr; 1798 11042 Erik conn_opt_arg_t coas; 1799 11042 Erik int retval; 1800 11042 Erik 1801 11042 Erik coas.coa_connp = connp; 1802 11042 Erik coas.coa_ixa = connp->conn_ixa; 1803 11042 Erik coas.coa_ipp = &connp->conn_xmit_ipp; 1804 11042 Erik coas.coa_ancillary = B_FALSE; 1805 11042 Erik coas.coa_changed = 0; 1806 11042 Erik 1807 11042 Erik /* 1808 11042 Erik * We assume that the optcom framework has checked for the set 1809 11042 Erik * of levels and names that are supported, hence we don't worry 1810 11042 Erik * about rejecting based on that. 1811 11042 Erik * First check for ICMP specific handling, then pass to common routine. 1812 11042 Erik */ 1813 0 stevel switch (level) { 1814 0 stevel case IPPROTO_IP: 1815 0 stevel /* 1816 0 stevel * Only allow IPv4 option processing on IPv4 sockets. 1817 0 stevel */ 1818 11042 Erik if (connp->conn_family != AF_INET) 1819 11042 Erik return (-1); 1820 0 stevel 1821 0 stevel switch (name) { 1822 0 stevel case IP_OPTIONS: 1823 0 stevel case T_IP_OPTIONS: 1824 0 stevel /* Options are passed up with each packet */ 1825 11042 Erik return (0); 1826 0 stevel case IP_HDRINCL: 1827 11042 Erik mutex_enter(&connp->conn_lock); 1828 0 stevel *i1 = (int)icmp->icmp_hdrincl; 1829 11042 Erik mutex_exit(&connp->conn_lock); 1830 11042 Erik return (sizeof (int)); 1831 11042 Erik } 1832 11042 Erik break; 1833 11042 Erik 1834 0 stevel case IPPROTO_IPV6: 1835 0 stevel /* 1836 0 stevel * Only allow IPv6 option processing on native IPv6 sockets. 1837 0 stevel */ 1838 11042 Erik if (connp->conn_family != AF_INET6) 1839 11042 Erik return (-1); 1840 11042 Erik 1841 11042 Erik switch (name) { 1842 0 stevel case IPV6_CHECKSUM: 1843 0 stevel /* 1844 0 stevel * Return offset or -1 if no checksum offset. 1845 0 stevel * Does not apply to IPPROTO_ICMPV6 1846 0 stevel */ 1847 11042 Erik if (connp->conn_proto == IPPROTO_ICMPV6) 1848 11042 Erik return (-1); 1849 11042 Erik 1850 11042 Erik mutex_enter(&connp->conn_lock); 1851 11042 Erik if (connp->conn_ixa->ixa_flags & IXAF_SET_RAW_CKSUM) 1852 11042 Erik *i1 = connp->conn_ixa->ixa_raw_cksum_offset; 1853 11042 Erik else 1854 0 stevel *i1 = -1; 1855 11042 Erik mutex_exit(&connp->conn_lock); 1856 11042 Erik return (sizeof (int)); 1857 11042 Erik } 1858 11042 Erik break; 1859 11042 Erik 1860 0 stevel case IPPROTO_ICMPV6: 1861 0 stevel /* 1862 0 stevel * Only allow IPv6 option processing on native IPv6 sockets. 1863 0 stevel */ 1864 11042 Erik if (connp->conn_family != AF_INET6) 1865 11042 Erik return (-1); 1866 11042 Erik 1867 11042 Erik if (connp->conn_proto != IPPROTO_ICMPV6) 1868 11042 Erik return (-1); 1869 0 stevel 1870 0 stevel switch (name) { 1871 0 stevel case ICMP6_FILTER: 1872 11042 Erik mutex_enter(&connp->conn_lock); 1873 0 stevel if (icmp->icmp_filter == NULL) { 1874 0 stevel /* Make it look like "pass all" */ 1875 0 stevel ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1876 0 stevel } else { 1877 0 stevel (void) bcopy(icmp->icmp_filter, ptr, 1878 0 stevel sizeof (icmp6_filter_t)); 1879 0 stevel } 1880 11042 Erik mutex_exit(&connp->conn_lock); 1881 11042 Erik return (sizeof (icmp6_filter_t)); 1882 11042 Erik } 1883 11042 Erik } 1884 11042 Erik mutex_enter(&connp->conn_lock); 1885 11042 Erik retval = conn_opt_get(&coas, level, name, ptr); 1886 11042 Erik mutex_exit(&connp->conn_lock); 1887 11042 Erik return (retval); 1888 0 stevel } 1889 0 stevel 1890 5240 nordmark /* 1891 5240 nordmark * This routine retrieves the current status of socket options. 1892 11042 Erik * It returns the size of the option retrieved, or -1. 1893 5240 nordmark */ 1894 5240 nordmark int 1895 8348 Eric icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 1896 8348 Eric { 1897 11042 Erik conn_t *connp = Q_TO_CONN(q); 1898 11042 Erik int err; 1899 11042 Erik 1900 8348 Eric err = icmp_opt_get(connp, level, name, ptr); 1901 5240 nordmark return (err); 1902 5240 nordmark } 1903 5240 nordmark 1904 11042 Erik /* 1905 11042 Erik * This routine sets socket options. 1906 11042 Erik */ 1907 11042 Erik int 1908 11042 Erik icmp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1909 11042 Erik uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1910 11042 Erik { 1911 11042 Erik conn_t *connp = coa->coa_connp; 1912 11042 Erik ip_xmit_attr_t *ixa = coa->coa_ixa; 1913 11042 Erik icmp_t *icmp = connp->conn_icmp; 1914 11042 Erik icmp_stack_t *is = icmp->icmp_is; 1915 11042 Erik int *i1 = (int *)invalp; 1916 11042 Erik boolean_t onoff = (*i1 == 0) ? 0 : 1; 1917 11042 Erik int error; 1918 11042 Erik 1919 11042 Erik ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1920 11042 Erik 1921 0 stevel /* 1922 0 stevel * For fixed length options, no sanity check 1923 0 stevel * of passed in length is done. It is assumed *_optcom_req() 1924 0 stevel * routines do the right thing. 1925 0 stevel */ 1926 11042 Erik 1927 0 stevel switch (level) { 1928 0 stevel case SOL_SOCKET: 1929 0 stevel switch (name) { 1930 0 stevel case SO_PROTOTYPE: 1931 0 stevel if ((*i1 & 0xFF) != IPPROTO_ICMP && 1932 0 stevel (*i1 & 0xFF) != IPPROTO_ICMPV6 && 1933 0 stevel secpolicy_net_rawaccess(cr) != 0) { 1934 0 stevel return (EACCES); 1935 0 stevel } 1936 11042 Erik if (checkonly) 1937 11042 Erik break; 1938 11042 Erik 1939 11042 Erik mutex_enter(&connp->conn_lock); 1940 11042 Erik connp->conn_proto = *i1 & 0xFF; 1941 11042 Erik ixa->ixa_protocol = connp->conn_proto; 1942 11042 Erik if ((connp->conn_proto == IPPROTO_RAW || 1943 11042 Erik connp->conn_proto == IPPROTO_IGMP) && 1944 11042 Erik connp->conn_family == AF_INET) { 1945 0 stevel icmp->icmp_hdrincl = 1; 1946 11042 Erik ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM; 1947 11042 Erik } else if (connp->conn_proto == IPPROTO_UDP || 1948 11042 Erik connp->conn_proto == IPPROTO_TCP || 1949 11042 Erik connp->conn_proto == IPPROTO_SCTP) { 1950 11042 Erik /* Used by test applications like psh */ 1951 0 stevel icmp->icmp_hdrincl = 0; 1952 11042 Erik ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM; 1953 11042 Erik } else { 1954 11042 Erik icmp->icmp_hdrincl = 0; 1955 11042 Erik ixa->ixa_flags |= IXAF_SET_ULP_CKSUM; 1956 11042 Erik } 1957 11042 Erik 1958 11042 Erik if (connp->conn_family == AF_INET6 && 1959 11042 Erik connp->conn_proto == IPPROTO_ICMPV6) { 1960 0 stevel /* Set offset for icmp6_cksum */ 1961 11042 Erik ixa->ixa_flags &= ~IXAF_SET_RAW_CKSUM; 1962 11042 Erik ixa->ixa_raw_cksum_offset = 2; 1963 11042 Erik } 1964 0 stevel if (icmp->icmp_filter != NULL && 1965 11042 Erik connp->conn_proto != IPPROTO_ICMPV6) { 1966 0 stevel kmem_free(icmp->icmp_filter, 1967 0 stevel sizeof (icmp6_filter_t)); 1968 0 stevel icmp->icmp_filter = NULL; 1969 0 stevel } 1970 11042 Erik mutex_exit(&connp->conn_lock); 1971 11042 Erik 1972 11042 Erik coa->coa_changed |= COA_HEADER_CHANGED; 1973 409 kcpoon /* 1974 409 kcpoon * For SCTP, we don't use icmp_bind_proto() for 1975 11042 Erik * raw socket binding. 1976 11042 Erik */ 1977 11042 Erik if (connp->conn_proto == IPPROTO_SCTP) 1978 409 kcpoon return (0); 1979 409 kcpoon 1980 11042 Erik coa->coa_changed |= COA_ICMP_BIND_NEEDED; 1981 11042 Erik return (0); 1982 0 stevel 1983 0 stevel case SO_SNDBUF: 1984 3448 dh155122 if (*i1 > is->is_max_buf) { 1985 0 stevel return (ENOBUFS); 1986 0 stevel } 1987 0 stevel break; 1988 0 stevel case SO_RCVBUF: 1989 3448 dh155122 if (*i1 > is->is_max_buf) { 1990 0 stevel return (ENOBUFS); 1991 0 stevel } 1992 11042 Erik break; 1993 11042 Erik } 1994 11042 Erik break; 1995 11042 Erik 1996 11042 Erik case IPPROTO_IP: 1997 11042 Erik /* 1998 11042 Erik * Only allow IPv4 option processing on IPv4 sockets. 1999 11042 Erik */ 2000 11042 Erik if (connp->conn_family != AF_INET) 2001 11042 Erik return (EINVAL); 2002 11042 Erik 2003 11042 Erik switch (name) { 2004 11042 Erik case IP_HDRINCL: 2005 0 stevel if (!checkonly) { 2006 11042 Erik mutex_enter(&connp->conn_lock); 2007 0 stevel icmp->icmp_hdrincl = onoff; 2008 11042 Erik if (onoff) 2009 11042 Erik ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM; 2010 11042 Erik else 2011 11042 Erik ixa->ixa_flags |= IXAF_SET_ULP_CKSUM; 2012 11042 Erik mutex_exit(&connp->conn_lock); 2013 11042 Erik } 2014 11042 Erik break; 2015 11042 Erik } 2016 11042 Erik break; 2017 11042 Erik 2018 11042 Erik case IPPROTO_IPV6: 2019 11042 Erik if (connp->conn_family != AF_INET6) 2020 11042 Erik return (EINVAL); 2021 11042 Erik 2022 11042 Erik switch (name) { 2023 0 stevel case IPV6_CHECKSUM: 2024 0 stevel /* 2025 0 stevel * Integer offset into the user data of where the 2026 0 stevel * checksum is located. 2027 0 stevel * Offset of -1 disables option. 2028 0 stevel * Does not apply to IPPROTO_ICMPV6. 2029 0 stevel */ 2030 11042 Erik if (connp->conn_proto == IPPROTO_ICMPV6 || 2031 11042 Erik coa->coa_ancillary) { 2032 0 stevel return (EINVAL); 2033 0 stevel } 2034 0 stevel if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2035 0 stevel /* Negative or not 16 bit aligned offset */ 2036 0 stevel return (EINVAL); 2037 0 stevel } 2038 0 stevel if (checkonly) 2039 0 stevel break; 2040 0 stevel 2041 11042 Erik mutex_enter(&connp->conn_lock); 2042 0 stevel if (*i1 == -1) { 2043 11042 Erik ixa->ixa_flags &= ~IXAF_SET_RAW_CKSUM; 2044 11042 Erik ixa->ixa_raw_cksum_offset = 0; 2045 11042 Erik ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM; 2046 11042 Erik } else { 2047 11042 Erik ixa->ixa_flags |= IXAF_SET_RAW_CKSUM; 2048 11042 Erik ixa->ixa_raw_cksum_offset = *i1; 2049 11042 Erik ixa->ixa_flags |= IXAF_SET_ULP_CKSUM; 2050 11042 Erik } 2051 11042 Erik mutex_exit(&connp->conn_lock); 2052 11042 Erik break; 2053 11042 Erik } 2054 11042 Erik break; 2055 11042 Erik 2056 11042 Erik case IPPROTO_ICMPV6: 2057 11042 Erik /* 2058 11042 Erik * Only allow IPv6 option processing on IPv6 sockets. 2059 11042 Erik */ 2060 11042 Erik if (connp->conn_family != AF_INET6) 2061 11042 Erik return (EINVAL); 2062 11042 Erik if (connp->conn_proto != IPPROTO_ICMPV6) 2063 11042 Erik return (EINVAL); 2064 11042 Erik 2065 11042 Erik switch (name) { 2066 11042 Erik case ICMP6_FILTER: 2067 11042 Erik if (checkonly) 2068 11042 Erik break; 2069 11042 Erik 2070 11042 Erik if ((inlen != 0) && 2071 11042 Erik (inlen != sizeof (icmp6_filter_t))) 2072 8348 Eric return (EINVAL); 2073 11042 Erik 2074 11042 Erik mutex_enter(&connp->conn_lock); 2075 0 stevel if (inlen == 0) { 2076 11042 Erik if (icmp->icmp_filter != NULL) { 2077 11042 Erik kmem_free(icmp->icmp_filter, 2078 11042 Erik sizeof (icmp6_filter_t)); 2079 11042 Erik icmp->icmp_filter = NULL; 2080 11042 Erik } 2081 11042 Erik } else { 2082 11042 Erik if (icmp->icmp_filter == NULL) { 2083 11042 Erik icmp->icmp_filter = kmem_alloc( 2084 11042 Erik sizeof (icmp6_filter_t), 2085 11042 Erik KM_NOSLEEP); 2086 11042 Erik if (icmp->icmp_filter == NULL) { 2087 11042 Erik mutex_exit(&connp->conn_lock); 2088 11042 Erik return (ENOBUFS); 2089 0 stevel } 2090 11042 Erik } 2091 11042 Erik (void) bcopy(invalp, icmp->icmp_filter, inlen); 2092 11042 Erik } 2093 11042 Erik mutex_exit(&connp->conn_lock); 2094 11042 Erik break; 2095 11042 Erik } 2096 11042 Erik break; 2097 11042 Erik } 2098 11042 Erik error = conn_opt_set(coa, level, name, inlen, invalp, 2099 11042 Erik checkonly, cr); 2100 11042 Erik return (error); 2101 11042 Erik } 2102 11042 Erik 2103 11042 Erik /* 2104 11042 Erik * This routine sets socket options. 2105 11042 Erik */ 2106 5240 nordmark int 2107 8348 Eric icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 2108 8348 Eric uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2109 8348 Eric void *thisdg_attrs, cred_t *cr) 2110 8348 Eric { 2111 11042 Erik icmp_t *icmp = connp->conn_icmp; 2112 11042 Erik int err; 2113 11042 Erik conn_opt_arg_t coas, *coa; 2114 11042 Erik boolean_t checkonly; 2115 11042 Erik icmp_stack_t *is = icmp->icmp_is; 2116 11042 Erik 2117 8348 Eric switch (optset_context) { 2118 8348 Eric case SETFN_OPTCOM_CHECKONLY: 2119 8348 Eric checkonly = B_TRUE; 2120 8348 Eric /* 2121 8348 Eric * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2122 8348 Eric * inlen != 0 implies value supplied and 2123 8348 Eric * we have to "pretend" to set it. 2124 8348 Eric * inlen == 0 implies that there is no 2125 8348 Eric * value part in T_CHECK request and just validation 2126 8348 Eric * done elsewhere should be enough, we just return here. 2127 8348 Eric */ 2128 8348 Eric if (inlen == 0) { 2129 8348 Eric *outlenp = 0; 2130 11042 Erik return (0); 2131 8348 Eric } 2132 8348 Eric break; 2133 8348 Eric case SETFN_OPTCOM_NEGOTIATE: 2134 8348 Eric checkonly = B_FALSE; 2135 8348 Eric break; 2136 8348 Eric case SETFN_UD_NEGOTIATE: 2137 8348 Eric case SETFN_CONN_NEGOTIATE: 2138 8348 Eric checkonly = B_FALSE; 2139 8348 Eric /* 2140 8348 Eric * Negotiating local and "association-related" options 2141 8348 Eric * through T_UNITDATA_REQ. 2142 8348 Eric * 2143 8348 Eric * Following routine can filter out ones we do not 2144 8348 Eric * want to be "set" this way. 2145 8348 Eric */ 2146 8348 Eric if (!icmp_opt_allow_udr_set(level, name)) { 2147 8348 Eric *outlenp = 0; 2148 11042 Erik return (EINVAL); 2149 8348 Eric } 2150 8348 Eric break; 2151 8348 Eric default: 2152 8348 Eric /* 2153 8348 Eric * We should never get here 2154 8348 Eric */ 2155 8348 Eric *outlenp = 0; 2156 11042 Erik return (EINVAL); 2157 8348 Eric } 2158 8348 Eric 2159 8348 Eric ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2160 8348 Eric (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2161 11042 Erik 2162 11042 Erik if (thisdg_attrs != NULL) { 2163 11042 Erik /* Options from T_UNITDATA_REQ */ 2164 11042 Erik coa = (conn_opt_arg_t *)thisdg_attrs; 2165 11042 Erik ASSERT(coa->coa_connp == connp); 2166 11042 Erik ASSERT(coa->coa_ixa != NULL); 2167 11042 Erik ASSERT(coa->coa_ipp != NULL); 2168 11042 Erik ASSERT(coa->coa_ancillary); 2169 11042 Erik } else { 2170 11042 Erik coa = &coas; 2171 11042 Erik coas.coa_connp = connp; 2172 11042 Erik /* Get a reference on conn_ixa to prevent concurrent mods */ 2173 11042 Erik coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 2174 11042 Erik if (coas.coa_ixa == NULL) { 2175 11042 Erik *outlenp = 0; 2176 11042 Erik return (ENOMEM); 2177 11042 Erik } 2178 11042 Erik coas.coa_ipp = &connp->conn_xmit_ipp; 2179 11042 Erik coas.coa_ancillary = B_FALSE; 2180 11042 Erik coas.coa_changed = 0; 2181 11042 Erik } 2182 11042 Erik 2183 11042 Erik err = icmp_do_opt_set(coa, level, name, inlen, invalp, 2184 11042 Erik cr, checkonly); 2185 11042 Erik if (err != 0) { 2186 11042 Erik errout: 2187 11042 Erik if (!coa->coa_ancillary) 2188 11042 Erik ixa_refrele(coa->coa_ixa); 2189 11042 Erik *outlenp = 0; 2190 11042 Erik return (err); 2191 11042 Erik } 2192 11042 Erik 2193 11042 Erik /* 2194 11042 Erik * Common case of OK return with outval same as inval. 2195 11042 Erik */ 2196 11042 Erik if (invalp != outvalp) { 2197 11042 Erik /* don't trust bcopy for identical src/dst */ 2198 11042 Erik (void) bcopy(invalp, outvalp, inlen); 2199 11042 Erik } 2200 11042 Erik *outlenp = inlen; 2201 11042 Erik 2202 11042 Erik /* 2203 11042 Erik * If this was not ancillary data, then we rebuild the headers, 2204 11042 Erik * update the IRE/NCE, and IPsec as needed. 2205 11042 Erik * Since the label depends on the destination we go through 2206 11042 Erik * ip_set_destination first. 2207 11042 Erik */ 2208 11042 Erik if (coa->coa_ancillary) { 2209 11042 Erik return (0); 2210 11042 Erik } 2211 11042 Erik 2212 11042 Erik if (coa->coa_changed & COA_ROUTE_CHANGED) { 2213 11042 Erik in6_addr_t saddr, faddr, nexthop; 2214 11042 Erik in_port_t fport; 2215 11042 Erik 2216 11042 Erik /* 2217 11042 Erik * We clear lastdst to make sure we pick up the change 2218 11042 Erik * next time sending. 2219 11042 Erik * If we are connected we re-cache the information. 2220 11042 Erik * We ignore errors to preserve BSD behavior. 2221 11042 Erik * Note that we don't redo IPsec policy lookup here 2222 11042 Erik * since the final destination (or source) didn't change. 2223 11042 Erik */ 2224 11042 Erik mutex_enter(&connp->conn_lock); 2225 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 2226 11042 Erik 2227 11042 Erik ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 2228 11042 Erik &connp->conn_faddr_v6, &nexthop); 2229 11042 Erik saddr = connp->conn_saddr_v6; 2230 11042 Erik faddr = connp->conn_faddr_v6; 2231 11042 Erik fport = connp->conn_fport; 2232 11042 Erik mutex_exit(&connp->conn_lock); 2233 11042 Erik 2234 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 2235 11042 Erik !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 2236 11042 Erik (void) ip_attr_connect(connp, coa->coa_ixa, 2237 11042 Erik &saddr, &faddr, &nexthop, fport, NULL, NULL, 2238 11042 Erik IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 2239 11042 Erik } 2240 11042 Erik } 2241 11042 Erik 2242 11042 Erik ixa_refrele(coa->coa_ixa); 2243 11042 Erik 2244 11042 Erik if (coa->coa_changed & COA_HEADER_CHANGED) { 2245 11042 Erik /* 2246 11042 Erik * Rebuild the header template if we are connected. 2247 11042 Erik * Otherwise clear conn_v6lastdst so we rebuild the header 2248 11042 Erik * in the data path. 2249 11042 Erik */ 2250 11042 Erik mutex_enter(&connp->conn_lock); 2251 11042 Erik if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2252 11042 Erik !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2253 11042 Erik err = icmp_build_hdr_template(connp, 2254 11042 Erik &connp->conn_saddr_v6, &connp->conn_faddr_v6, 2255 11042 Erik connp->conn_flowinfo); 2256 11042 Erik if (err != 0) { 2257 11042 Erik mutex_exit(&connp->conn_lock); 2258 11042 Erik return (err); 2259 11042 Erik } 2260 11042 Erik } else { 2261 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 2262 11042 Erik } 2263 11042 Erik mutex_exit(&connp->conn_lock); 2264 11042 Erik } 2265 11042 Erik if (coa->coa_changed & COA_RCVBUF_CHANGED) { 2266 11042 Erik (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2267 11042 Erik connp->conn_rcvbuf); 2268 11042 Erik } 2269 11042 Erik if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 2270 11042 Erik connp->conn_wq->q_hiwat = connp->conn_sndbuf; 2271 11042 Erik } 2272 11042 Erik if (coa->coa_changed & COA_WROFF_CHANGED) { 2273 11042 Erik /* Increase wroff if needed */ 2274 11042 Erik uint_t wroff; 2275 11042 Erik 2276 11042 Erik mutex_enter(&connp->conn_lock); 2277 11042 Erik wroff = connp->conn_ht_iphc_allocated + is->is_wroff_extra; 2278 11042 Erik if (wroff > connp->conn_wroff) { 2279 11042 Erik connp->conn_wroff = wroff; 2280 11042 Erik mutex_exit(&connp->conn_lock); 2281 11042 Erik (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 2282 11042 Erik } else { 2283 11042 Erik mutex_exit(&connp->conn_lock); 2284 11042 Erik } 2285 11042 Erik } 2286 11042 Erik if (coa->coa_changed & COA_ICMP_BIND_NEEDED) { 2287 11042 Erik icmp_bind_proto(icmp); 2288 11042 Erik } 2289 11042 Erik return (err); 2290 8348 Eric } 2291 8348 Eric 2292 8348 Eric /* This routine sets socket options. */ 2293 8348 Eric int 2294 8348 Eric icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2295 5240 nordmark uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2296 11042 Erik void *thisdg_attrs, cred_t *cr) 2297 11042 Erik { 2298 11042 Erik conn_t *connp = Q_TO_CONN(q); 2299 11042 Erik int error; 2300 11042 Erik 2301 8348 Eric error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 2302 8348 Eric outlenp, outvalp, thisdg_attrs, cr); 2303 11042 Erik return (error); 2304 11042 Erik } 2305 11042 Erik 2306 11042 Erik /* 2307 11042 Erik * Setup IP headers. 2308 11042 Erik * 2309 11042 Erik * Note that IP_HDRINCL has ipha_protocol that is different than conn_proto, 2310 11042 Erik * but icmp_output_hdrincl restores ipha_protocol once we return. 2311 11042 Erik */ 2312 11042 Erik mblk_t * 2313 11042 Erik icmp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2314 11042 Erik const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo, 2315 11042 Erik mblk_t *data_mp, int *errorp) 2316 11042 Erik { 2317 11042 Erik mblk_t *mp; 2318 11042 Erik icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 2319 11042 Erik uint_t data_len; 2320 11042 Erik uint32_t cksum; 2321 11042 Erik 2322 11042 Erik data_len = msgdsize(data_mp); 2323 11042 Erik mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, connp->conn_proto, 2324 11042 Erik flowinfo, 0, data_mp, data_len, is->is_wroff_extra, &cksum, errorp); 2325 11042 Erik if (mp == NULL) { 2326 11042 Erik ASSERT(*errorp != 0); 2327 11042 Erik return (NULL); 2328 11042 Erik } 2329 11042 Erik 2330 11042 Erik ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2331 11042 Erik 2332 11042 Erik /* 2333 11042 Erik * If there was a routing option/header then conn_prepend_hdr 2334 11042 Erik * has massaged it and placed the pseudo-header checksum difference 2335 11042 Erik * in the cksum argument. 2336 11042 Erik * 2337 11042 Erik * Prepare for ICMPv6 checksum done in IP. 2338 11042 Erik * 2339 11042 Erik * We make it easy for IP to include our pseudo header 2340 11042 Erik * by putting our length (and any routing header adjustment) 2341 11042 Erik * in the ICMPv6 checksum field. 2342 11042 Erik * The IP source, destination, and length have already been set by 2343 11042 Erik * conn_prepend_hdr. 2344 11042 Erik */ 2345 11042 Erik cksum += data_len; 2346 11042 Erik cksum = (cksum >> 16) + (cksum & 0xFFFF); 2347 11042 Erik ASSERT(cksum < 0x10000); 2348 11042 Erik 2349 11042 Erik if (ixa->ixa_flags & IXAF_IS_IPV4) { 2350 11042 Erik ipha_t *ipha = (ipha_t *)mp->b_rptr; 2351 11042 Erik 2352 11042 Erik ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2353 11042 Erik } else { 2354 11042 Erik ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2355 11042 Erik uint_t cksum_offset = 0; 2356 11042 Erik 2357 11042 Erik ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2358 11042 Erik 2359 11042 Erik if (ixa->ixa_flags & IXAF_SET_ULP_CKSUM) { 2360 11042 Erik if (connp->conn_proto == IPPROTO_ICMPV6) { 2361 11042 Erik cksum_offset = ixa->ixa_ip_hdr_length + 2362 11042 Erik offsetof(icmp6_t, icmp6_cksum); 2363 11042 Erik } else if (ixa->ixa_flags & IXAF_SET_RAW_CKSUM) { 2364 11042 Erik cksum_offset = ixa->ixa_ip_hdr_length + 2365 11042 Erik ixa->ixa_raw_cksum_offset; 2366 11042 Erik } 2367 11042 Erik } 2368 11042 Erik if (cksum_offset != 0) { 2369 11042 Erik uint16_t *ptr; 2370 11042 Erik 2371 11042 Erik /* Make sure the checksum fits in the first mblk */ 2372 11042 Erik if (cksum_offset + sizeof (short) > MBLKL(mp)) { 2373 11042 Erik mblk_t *mp1; 2374 11042 Erik 2375 11042 Erik mp1 = msgpullup(mp, 2376 11042 Erik cksum_offset + sizeof (short)); 2377 11042 Erik freemsg(mp); 2378 11042 Erik if (mp1 == NULL) { 2379 11042 Erik *errorp = ENOMEM; 2380 11042 Erik return (NULL); 2381 11042 Erik } 2382 11042 Erik mp = mp1; 2383 11042 Erik ip6h = (ip6_t *)mp->b_rptr; 2384 11042 Erik } 2385 11042 Erik ptr = (uint16_t *)(mp->b_rptr + cksum_offset); 2386 11042 Erik *ptr = htons(cksum); 2387 11042 Erik } 2388 11042 Erik } 2389 11042 Erik 2390 11042 Erik /* Note that we don't try to update wroff due to ancillary data */ 2391 11042 Erik return (mp); 2392 11042 Erik } 2393 11042 Erik 2394 11042 Erik static int 2395 11042 Erik icmp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2396 11042 Erik const in6_addr_t *v6dst, uint32_t flowinfo) 2397 11042 Erik { 2398 11042 Erik int error; 2399 11042 Erik 2400 11042 Erik ASSERT(MUTEX_HELD(&connp->conn_lock)); 2401 11042 Erik /* 2402 11042 Erik * We clear lastdst to make sure we don't use the lastdst path 2403 11042 Erik * next time sending since we might not have set v6dst yet. 2404 11042 Erik */ 2405 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 2406 11042 Erik 2407 11042 Erik error = conn_build_hdr_template(connp, 0, 0, v6src, v6dst, flowinfo); 2408 11042 Erik if (error != 0) 2409 11042 Erik return (error); 2410 11042 Erik 2411 11042 Erik /* 2412 11042 Erik * Any routing header/option has been massaged. The checksum difference 2413 11042 Erik * is stored in conn_sum. 2414 11042 Erik */ 2415 0 stevel return (0); 2416 0 stevel } 2417 0 stevel 2418 0 stevel /* 2419 0 stevel * This routine retrieves the value of an ND variable in a icmpparam_t 2420 0 stevel * structure. It is called through nd_getset when a user reads the 2421 0 stevel * variable. 2422 0 stevel */ 2423 0 stevel /* ARGSUSED */ 2424 0 stevel static int 2425 0 stevel icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2426 0 stevel { 2427 0 stevel icmpparam_t *icmppa = (icmpparam_t *)cp; 2428 0 stevel 2429 0 stevel (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 2430 0 stevel return (0); 2431 0 stevel } 2432 0 stevel 2433 0 stevel /* 2434 0 stevel * Walk through the param array specified registering each element with the 2435 0 stevel * named dispatch (ND) handler. 2436 0 stevel */ 2437 0 stevel static boolean_t 2438 3448 dh155122 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 2439 0 stevel { 2440 0 stevel for (; cnt-- > 0; icmppa++) { 2441 0 stevel if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 2442 3448 dh155122 if (!nd_load(ndp, icmppa->icmp_param_name, 2443 0 stevel icmp_param_get, icmp_param_set, 2444 0 stevel (caddr_t)icmppa)) { 2445 3448 dh155122 nd_free(ndp); 2446 0 stevel return (B_FALSE); 2447 0 stevel } 2448 0 stevel } 2449 0 stevel } 2450 0 stevel return (B_TRUE); 2451 0 stevel } 2452 0 stevel 2453 0 stevel /* This routine sets an ND variable in a icmpparam_t structure. */ 2454 0 stevel /* ARGSUSED */ 2455 0 stevel static int 2456 0 stevel icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2457 0 stevel { 2458 0 stevel long new_value; 2459 0 stevel icmpparam_t *icmppa = (icmpparam_t *)cp; 2460 0 stevel 2461 0 stevel /* 2462 0 stevel * Fail the request if the new value does not lie within the 2463 0 stevel * required bounds. 2464 0 stevel */ 2465 0 stevel if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2466 0 stevel new_value < icmppa->icmp_param_min || 2467 0 stevel new_value > icmppa->icmp_param_max) { 2468 0 stevel return (EINVAL); 2469 0 stevel } 2470 0 stevel /* Set the new value */ 2471 0 stevel icmppa->icmp_param_value = new_value; 2472 0 stevel return (0); 2473 0 stevel } 2474 8963 Anders 2475 8963 Anders static mblk_t * 2476 8348 Eric icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 2477 8348 Eric { 2478 8348 Eric ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 2479 8348 Eric if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 2480 8348 Eric /* 2481 8348 Eric * fallback has started but messages have not been moved yet 2482 8348 Eric */ 2483 8348 Eric if (icmp->icmp_fallback_queue_head == NULL) { 2484 8348 Eric ASSERT(icmp->icmp_fallback_queue_tail == NULL); 2485 8348 Eric icmp->icmp_fallback_queue_head = mp; 2486 8348 Eric icmp->icmp_fallback_queue_tail = mp; 2487 8348 Eric } else { 2488 8348 Eric ASSERT(icmp->icmp_fallback_queue_tail != NULL); 2489 8348 Eric icmp->icmp_fallback_queue_tail->b_next = mp; 2490 8348 Eric icmp->icmp_fallback_queue_tail = mp; 2491 8348 Eric } 2492 8963 Anders return (NULL); 2493 8963 Anders } else { 2494 8963 Anders /* 2495 8963 Anders * Fallback completed, let the caller putnext() the mblk. 2496 8963 Anders */ 2497 8963 Anders return (mp); 2498 8963 Anders } 2499 8963 Anders } 2500 8963 Anders 2501 8963 Anders /* 2502 8963 Anders * Deliver data to ULP. In case we have a socket, and it's falling back to 2503 8963 Anders * TPI, then we'll queue the mp for later processing. 2504 8963 Anders */ 2505 8963 Anders static void 2506 11042 Erik icmp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len) 2507 11042 Erik { 2508 8963 Anders if (IPCL_IS_NONSTR(connp)) { 2509 8963 Anders icmp_t *icmp = connp->conn_icmp; 2510 8963 Anders int error; 2511 8963 Anders 2512 11042 Erik ASSERT(len == msgdsize(mp)); 2513 8963 Anders if ((*connp->conn_upcalls->su_recv) 2514 11042 Erik (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2515 8963 Anders mutex_enter(&icmp->icmp_recv_lock); 2516 8963 Anders if (error == ENOSPC) { 2517 8963 Anders /* 2518 8963 Anders * let's confirm while holding the lock 2519 8963 Anders */ 2520 8963 Anders if ((*connp->conn_upcalls->su_recv) 2521 8963 Anders (connp->conn_upper_handle, NULL, 0, 0, 2522 8963 Anders &error, NULL) < 0) { 2523 8963 Anders ASSERT(error == ENOSPC); 2524 8963 Anders if (error == ENOSPC) { 2525 8963 Anders connp->conn_flow_cntrld = 2526 8963 Anders B_TRUE; 2527 8963 Anders } 2528 8963 Anders } 2529 8963 Anders mutex_exit(&icmp->icmp_recv_lock); 2530 8963 Anders } else { 2531 8963 Anders ASSERT(error == EOPNOTSUPP); 2532 8963 Anders mp = icmp_queue_fallback(icmp, mp); 2533 8963 Anders mutex_exit(&icmp->icmp_recv_lock); 2534 8963 Anders if (mp != NULL) 2535 8963 Anders putnext(connp->conn_rq, mp); 2536 8963 Anders } 2537 8963 Anders } 2538 8963 Anders ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 2539 8963 Anders } else { 2540 8963 Anders putnext(connp->conn_rq, mp); 2541 8348 Eric } 2542 8348 Eric } 2543 8348 Eric 2544 11042 Erik /* 2545 11042 Erik * This is the inbound data path. 2546 11042 Erik * IP has already pulled up the IP headers and verified alignment 2547 11042 Erik * etc. 2548 11042 Erik */ 2549 11042 Erik /* ARGSUSED2 */ 2550 11042 Erik static void 2551 11042 Erik icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2552 11042 Erik { 2553 11042 Erik conn_t *connp = (conn_t *)arg1; 2554 0 stevel struct T_unitdata_ind *tudi; 2555 11042 Erik uchar_t *rptr; /* Pointer to IP header */ 2556 11042 Erik int ip_hdr_length; 2557 11042 Erik int udi_size; /* Size of T_unitdata_ind */ 2558 11042 Erik int pkt_len; 2559 5240 nordmark icmp_t *icmp; 2560 11042 Erik ip_pkt_t ipps; 2561 11042 Erik ip6_t *ip6h; 2562 11042 Erik mblk_t *mp1; 2563 11042 Erik crb_t recv_ancillary; 2564 5240 nordmark icmp_stack_t *is; 2565 0 stevel sin_t *sin; 2566 0 stevel sin6_t *sin6; 2567 0 stevel ipha_t *ipha; 2568 0 stevel 2569 5240 nordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2570 5240 nordmark 2571 5240 nordmark icmp = connp->conn_icmp; 2572 5240 nordmark is = icmp->icmp_is; 2573 5240 nordmark rptr = mp->b_rptr; 2574 11042 Erik 2575 11042 Erik ASSERT(DB_TYPE(mp) == M_DATA); 2576 5240 nordmark ASSERT(OK_32PTR(rptr)); 2577 11042 Erik ASSERT(ira->ira_pktlen == msgdsize(mp)); 2578 11042 Erik pkt_len = ira->ira_pktlen; 2579 11042 Erik 2580 11042 Erik /* 2581 11042 Erik * Get a snapshot of these and allow other threads to change 2582 11042 Erik * them after that. We need the same recv_ancillary when determining 2583 11042 Erik * the size as when adding the ancillary data items. 2584 11042 Erik */ 2585 11042 Erik mutex_enter(&connp->conn_lock); 2586 11042 Erik recv_ancillary = connp->conn_recv_ancillary; 2587 11042 Erik mutex_exit(&connp->conn_lock); 2588 11042 Erik 2589 11042 Erik ip_hdr_length = ira->ira_ip_hdr_length; 2590 11042 Erik ASSERT(MBLKL(mp) >= ip_hdr_length); /* IP did a pullup */ 2591 11042 Erik 2592 11042 Erik /* Initialize regardless of IP version */ 2593 11042 Erik ipps.ipp_fields = 0; 2594 11042 Erik 2595 11042 Erik if (ira->ira_flags & IRAF_IS_IPV4) { 2596 11042 Erik ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2597 11042 Erik ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2598 11042 Erik ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2599 11042 Erik 2600 11042 Erik ipha = (ipha_t *)mp->b_rptr; 2601 11042 Erik if (recv_ancillary.crb_all != 0) 2602 11042 Erik (void) ip_find_hdr_v4(ipha, &ipps, B_FALSE); 2603 11042 Erik 2604 11042 Erik /* 2605 11042 Erik * BSD for some reason adjusts ipha_length to exclude the 2606 11042 Erik * IP header length. We do the same. 2607 11042 Erik */ 2608 3448 dh155122 if (is->is_bsd_compat) { 2609 0 stevel ushort_t len; 2610 11042 Erik 2611 0 stevel len = ntohs(ipha->ipha_length); 2612 0 stevel if (mp->b_datap->db_ref > 1) { 2613 0 stevel /* 2614 0 stevel * Allocate a new IP header so that we can 2615 0 stevel * modify ipha_length. 2616 0 stevel */ 2617 0 stevel mblk_t *mp1; 2618 0 stevel 2619 11042 Erik mp1 = allocb(ip_hdr_length, BPRI_MED); 2620 11042 Erik if (mp1 == NULL) { 2621 0 stevel freemsg(mp); 2622 5240 nordmark BUMP_MIB(&is->is_rawip_mib, 2623 3448 dh155122 rawipInErrors); 2624 0 stevel return; 2625 0 stevel } 2626 11042 Erik bcopy(rptr, mp1->b_rptr, ip_hdr_length); 2627 11042 Erik mp->b_rptr = rptr + ip_hdr_length; 2628 0 stevel rptr = mp1->b_rptr; 2629 0 stevel ipha = (ipha_t *)rptr; 2630 0 stevel mp1->b_cont = mp; 2631 11042 Erik mp1->b_wptr = rptr + ip_hdr_length; 2632 0 stevel mp = mp1; 2633 0 stevel } 2634 11042 Erik len -= ip_hdr_length; 2635 0 stevel ipha->ipha_length = htons(len); 2636 0 stevel } 2637 11042 Erik 2638 11042 Erik /* 2639 11042 Erik * For RAW sockets we not pass ICMP/IPv4 packets to AF_INET6 2640 11042 Erik * sockets. This is ensured by icmp_bind and the IP fanout code. 2641 11042 Erik */ 2642 11042 Erik ASSERT(connp->conn_family == AF_INET); 2643 11042 Erik 2644 11042 Erik /* 2645 11042 Erik * This is the inbound data path. Packets are passed upstream 2646 11042 Erik * as T_UNITDATA_IND messages with full IPv4 headers still 2647 11042 Erik * attached. 2648 11042 Erik */ 2649 11042 Erik 2650 11042 Erik /* 2651 11042 Erik * Normally only send up the source address. 2652 11042 Erik * If any ancillary data items are wanted we add those. 2653 11042 Erik */ 2654 11042 Erik udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2655 11042 Erik if (recv_ancillary.crb_all != 0) { 2656 11042 Erik udi_size += conn_recvancillary_size(connp, 2657 11042 Erik recv_ancillary, ira, mp, &ipps); 2658 11042 Erik } 2659 11042 Erik 2660 11042 Erik /* Allocate a message block for the T_UNITDATA_IND structure. */ 2661 0 stevel mp1 = allocb(udi_size, BPRI_MED); 2662 0 stevel if (mp1 == NULL) { 2663 0 stevel freemsg(mp); 2664 5240 nordmark BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 2665 0 stevel return; 2666 0 stevel } 2667 0 stevel mp1->b_cont = mp; 2668 11042 Erik tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2669 11042 Erik mp1->b_datap->db_type = M_PROTO; 2670 11042 Erik mp1->b_wptr = (uchar_t *)tudi + udi_size; 2671 0 stevel tudi->PRIM_type = T_UNITDATA_IND; 2672 0 stevel tudi->SRC_length = sizeof (sin_t); 2673 0 stevel tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2674 0 stevel sin = (sin_t *)&tudi[1]; 2675 0 stevel *sin = sin_null; 2676 0 stevel sin->sin_family = AF_INET; 2677 0 stevel sin->sin_addr.s_addr = ipha->ipha_src; 2678 11042 Erik *(uint32_t *)&sin->sin_zero[0] = 0; 2679 11042 Erik *(uint32_t *)&sin->sin_zero[4] = 0; 2680 0 stevel tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2681 0 stevel sizeof (sin_t); 2682 0 stevel udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2683 0 stevel tudi->OPT_length = udi_size; 2684 0 stevel 2685 0 stevel /* 2686 11042 Erik * Add options if IP_RECVIF etc is set 2687 0 stevel */ 2688 0 stevel if (udi_size != 0) { 2689 11042 Erik conn_recvancillary_add(connp, recv_ancillary, ira, 2690 11042 Erik &ipps, (uchar_t *)&sin[1], udi_size); 2691 11042 Erik } 2692 8348 Eric goto deliver; 2693 0 stevel } 2694 0 stevel 2695 11042 Erik ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2696 11042 Erik /* 2697 11042 Erik * IPv6 packets can only be received by applications 2698 11042 Erik * that are prepared to receive IPv6 addresses. 2699 11042 Erik * The IP fanout must ensure this. 2700 11042 Erik */ 2701 11042 Erik ASSERT(connp->conn_family == AF_INET6); 2702 11042 Erik 2703 11042 Erik /* 2704 11042 Erik * Handle IPv6 packets. We don't pass up the IP headers with the 2705 11042 Erik * payload for IPv6. 2706 11042 Erik */ 2707 0 stevel 2708 0 stevel ip6h = (ip6_t *)rptr; 2709 11042 Erik if (recv_ancillary.crb_all != 0) { 2710 11042 Erik /* 2711 11042 Erik * Call on ip_find_hdr_v6 which gets individual lenghts of 2712 11042 Erik * extension headers (and pointers to them). 2713 11042 Erik */ 2714 11042 Erik uint8_t nexthdr; 2715 11042 Erik 2716 11042 Erik /* We don't care about the length or nextheader. */ 2717 11042 Erik (void) ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, &nexthdr); 2718 11042 Erik 2719 11042 Erik /* 2720 11042 Erik * We do not pass up hop-by-hop options or any other 2721 11042 Erik * extension header as part of the packet. Applications 2722 11042 Erik * that want to see them have to specify IPV6_RECV* socket 2723 11042 Erik * options. And conn_recvancillary_size/add explicitly 2724 11042 Erik * drops the TX option from IPV6_HOPOPTS as it does for UDP. 2725 11042 Erik * 2726 11042 Erik * If we had multilevel ICMP sockets, then we'd want to 2727 11042 Erik * modify conn_recvancillary_size/add to 2728 11042 Erik * allow the user to see the label. 2729 11042 Erik */ 2730 11042 Erik } 2731 11042 Erik 2732 0 stevel /* 2733 0 stevel * Check a filter for ICMPv6 types if needed. 2734 0 stevel * Verify raw checksums if needed. 2735 0 stevel */ 2736 11042 Erik mutex_enter(&connp->conn_lock); 2737 11042 Erik if (icmp->icmp_filter != NULL) { 2738 11042 Erik int type; 2739 11042 Erik 2740 11042 Erik /* Assumes that IP has done the pullupmsg */ 2741 11042 Erik type = mp->b_rptr[ip_hdr_length]; 2742 11042 Erik 2743 11042 Erik ASSERT(mp->b_rptr + ip_hdr_length <= mp->b_wptr); 2744 11042 Erik if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 2745 11042 Erik mutex_exit(&connp->conn_lock); 2746 11042 Erik freemsg(mp); 2747 11042 Erik return; 2748 11042 Erik } 2749 11042 Erik } 2750 11042 Erik if (connp->conn_ixa->ixa_flags & IXAF_SET_RAW_CKSUM) { 2751 11042 Erik /* Checksum */ 2752 11042 Erik uint16_t *up; 2753 11042 Erik uint32_t sum; 2754 11042 Erik int remlen; 2755 11042 Erik 2756 11042 Erik up = (uint16_t *)&ip6h->ip6_src; 2757 11042 Erik 2758 11042 Erik remlen = msgdsize(mp) - ip_hdr_length; 2759 11042 Erik sum = htons(connp->conn_proto + remlen) 2760 11042 Erik + up[0] + up[1] + up[2] + up[3] 2761 11042 Erik + up[4] + up[5] + up[6] + up[7] 2762 11042 Erik + up[8] + up[9] + up[10] + up[11] 2763 11042 Erik + up[12] + up[13] + up[14] + up[15]; 2764 11042 Erik sum = (sum & 0xffff) + (sum >> 16); 2765 11042 Erik sum = IP_CSUM(mp, ip_hdr_length, sum); 2766 11042 Erik if (sum != 0) { 2767 11042 Erik /* IPv6 RAW checksum failed */ 2768 11042 Erik ip0dbg(("icmp_rput: RAW checksum failed %x\n", sum)); 2769 11042 Erik mutex_exit(&connp->conn_lock); 2770 11042 Erik freemsg(mp); 2771 11042 Erik BUMP_MIB(&is->is_rawip_mib, rawipInCksumErrs); 2772 11042 Erik return; 2773 11042 Erik } 2774 11042 Erik } 2775 11042 Erik mutex_exit(&connp->conn_lock); 2776 0 stevel 2777 0 stevel udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2778 0 stevel 2779 11042 Erik if (recv_ancillary.crb_all != 0) { 2780 11042 Erik udi_size += conn_recvancillary_size(connp, 2781 11042 Erik recv_ancillary, ira, mp, &ipps); 2782 5401 nordmark } 2783 5401 nordmark 2784 0 stevel mp1 = allocb(udi_size, BPRI_MED); 2785 0 stevel if (mp1 == NULL) { 2786 0 stevel freemsg(mp); 2787 5240 nordmark BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 2788 0 stevel return; 2789 0 stevel } 2790 0 stevel mp1->b_cont = mp; 2791 11042 Erik mp1->b_datap->db_type = M_PROTO; 2792 11042 Erik tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2793 11042 Erik mp1->b_wptr = (uchar_t *)tudi + udi_size; 2794 0 stevel tudi->PRIM_type = T_UNITDATA_IND; 2795 0 stevel tudi->SRC_length = sizeof (sin6_t); 2796 0 stevel tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2797 0 stevel tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2798 0 stevel udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2799 0 stevel tudi->OPT_length = udi_size; 2800 0 stevel sin6 = (sin6_t *)&tudi[1]; 2801 11042 Erik *sin6 = sin6_null; 2802 0 stevel sin6->sin6_port = 0; 2803 0 stevel sin6->sin6_family = AF_INET6; 2804 0 stevel 2805 0 stevel sin6->sin6_addr = ip6h->ip6_src; 2806 0 stevel /* No sin6_flowinfo per API */ 2807 0 stevel sin6->sin6_flowinfo = 0; 2808 11042 Erik /* For link-scope pass up scope id */ 2809 11042 Erik if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2810 11042 Erik sin6->sin6_scope_id = ira->ira_ruifindex; 2811 0 stevel else 2812 0 stevel sin6->sin6_scope_id = 0; 2813 0 stevel sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 2814 11042 Erik IPCL_ZONEID(connp), is->is_netstack); 2815 0 stevel 2816 0 stevel if (udi_size != 0) { 2817 11042 Erik conn_recvancillary_add(connp, recv_ancillary, ira, 2818 11042 Erik &ipps, (uchar_t *)&sin6[1], udi_size); 2819 11042 Erik } 2820 11042 Erik 2821 11042 Erik /* Skip all the IPv6 headers per API */ 2822 11042 Erik mp->b_rptr += ip_hdr_length; 2823 11042 Erik pkt_len -= ip_hdr_length; 2824 11042 Erik 2825 11042 Erik deliver: 2826 5240 nordmark BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 2827 11042 Erik icmp_ulp_recv(connp, mp1, pkt_len); 2828 11042 Erik } 2829 11042 Erik 2830 11042 Erik /* 2831 11042 Erik * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 2832 11042 Erik * information that can be changing beneath us. 2833 0 stevel */ 2834 5240 nordmark mblk_t * 2835 0 stevel icmp_snmp_get(queue_t *q, mblk_t *mpctl) 2836 0 stevel { 2837 0 stevel mblk_t *mpdata; 2838 0 stevel struct opthdr *optp; 2839 5240 nordmark conn_t *connp = Q_TO_CONN(q); 2840 5240 nordmark icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 2841 5240 nordmark mblk_t *mp2ctl; 2842 5240 nordmark 2843 5240 nordmark /* 2844 5240 nordmark * make a copy of the original message 2845 5240 nordmark */ 2846 5240 nordmark mp2ctl = copymsg(mpctl); 2847 0 stevel 2848 0 stevel if (mpctl == NULL || 2849 0 stevel (mpdata = mpctl->b_cont) == NULL) { 2850 5240 nordmark freemsg(mpctl); 2851 5240 nordmark freemsg(mp2ctl); 2852 0 stevel return (0); 2853 0 stevel } 2854 0 stevel 2855 0 stevel /* fixed length structure for IPv4 and IPv6 counters */ 2856 0 stevel optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 2857 0 stevel optp->level = EXPER_RAWIP; 2858 0 stevel optp->name = 0; 2859 5240 nordmark (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 2860 5240 nordmark sizeof (is->is_rawip_mib)); 2861 0 stevel optp->len = msgdsize(mpdata); 2862 0 stevel qreply(q, mpctl); 2863 0 stevel 2864 5240 nordmark return (mp2ctl); 2865 0 stevel } 2866 0 stevel 2867 0 stevel /* 2868 0 stevel * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 2869 0 stevel * TODO: If this ever actually tries to set anything, it needs to be 2870 0 stevel * to do the appropriate locking. 2871 0 stevel */ 2872 0 stevel /* ARGSUSED */ 2873 5240 nordmark int 2874 0 stevel icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 2875 0 stevel uchar_t *ptr, int len) 2876 0 stevel { 2877 0 stevel switch (level) { 2878 0 stevel case EXPER_RAWIP: 2879 0 stevel return (0); 2880 0 stevel default: 2881 0 stevel return (1); 2882 0 stevel } 2883 0 stevel } 2884 0 stevel 2885 0 stevel /* 2886 0 stevel * This routine creates a T_UDERROR_IND message and passes it upstream. 2887 0 stevel * The address and options are copied from the T_UNITDATA_REQ message 2888 0 stevel * passed in mp. This message is freed. 2889 0 stevel */ 2890 0 stevel static void 2891 0 stevel icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2892 0 stevel { 2893 11042 Erik struct T_unitdata_req *tudr; 2894 0 stevel mblk_t *mp1; 2895 11042 Erik uchar_t *destaddr; 2896 11042 Erik t_scalar_t destlen; 2897 11042 Erik uchar_t *optaddr; 2898 11042 Erik t_scalar_t optlen; 2899 11042 Erik 2900 11042 Erik if ((mp->b_wptr < mp->b_rptr) || 2901 11042 Erik (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2902 11042 Erik goto done; 2903 11042 Erik } 2904 11042 Erik tudr = (struct T_unitdata_req *)mp->b_rptr; 2905 11042 Erik destaddr = mp->b_rptr + tudr->DEST_offset; 2906 11042 Erik if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2907 11042 Erik destaddr + tudr->DEST_length < mp->b_rptr || 2908 11042 Erik destaddr + tudr->DEST_length > mp->b_wptr) { 2909 11042 Erik goto done; 2910 11042 Erik } 2911 11042 Erik optaddr = mp->b_rptr + tudr->OPT_offset; 2912 11042 Erik if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2913 11042 Erik optaddr + tudr->OPT_length < mp->b_rptr || 2914 11042 Erik optaddr + tudr->OPT_length > mp->b_wptr) { 2915 11042 Erik goto done; 2916 11042 Erik } 2917 11042 Erik destlen = tudr->DEST_length; 2918 11042 Erik optlen = tudr->OPT_length; 2919 11042 Erik 2920 11042 Erik mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2921 11042 Erik (char *)optaddr, optlen, err); 2922 11042 Erik if (mp1 != NULL) 2923 0 stevel qreply(q, mp1); 2924 11042 Erik 2925 11042 Erik done: 2926 0 stevel freemsg(mp); 2927 0 stevel } 2928 0 stevel 2929 8348 Eric static int 2930 8348 Eric rawip_do_unbind(conn_t *connp) 2931 8348 Eric { 2932 11042 Erik icmp_t *icmp = connp->conn_icmp; 2933 11042 Erik 2934 11042 Erik mutex_enter(&connp->conn_lock); 2935 0 stevel /* If a bind has not been done, we can't unbind. */ 2936 11042 Erik if (icmp->icmp_state == TS_UNBND) { 2937 11042 Erik mutex_exit(&connp->conn_lock); 2938 8348 Eric return (-TOUTSTATE); 2939 0 stevel } 2940 11042 Erik connp->conn_saddr_v6 = ipv6_all_zeros; 2941 11042 Erik connp->conn_bound_addr_v6 = ipv6_all_zeros; 2942 11042 Erik connp->conn_laddr_v6 = ipv6_all_zeros; 2943 11042 Erik connp->conn_mcbc_bind = B_FALSE; 2944 11042 Erik connp->conn_lport = 0; 2945 11042 Erik connp->conn_fport = 0; 2946 11042 Erik /* In case we were also connected */ 2947 11042 Erik connp->conn_faddr_v6 = ipv6_all_zeros; 2948 11042 Erik connp->conn_v6lastdst = ipv6_all_zeros; 2949 11042 Erik 2950 11042 Erik icmp->icmp_state = TS_UNBND; 2951 11042 Erik 2952 11042 Erik (void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6, 2953 11042 Erik &connp->conn_faddr_v6, connp->conn_flowinfo); 2954 11042 Erik mutex_exit(&connp->conn_lock); 2955 8348 Eric 2956 8348 Eric ip_unbind(connp); 2957 8348 Eric return (0); 2958 8348 Eric } 2959 8348 Eric 2960 8348 Eric /* 2961 8348 Eric * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 2962 8348 Eric * After some error checking, the message is passed downstream to ip. 2963 8348 Eric */ 2964 8348 Eric static void 2965 8348 Eric icmp_tpi_unbind(queue_t *q, mblk_t *mp) 2966 8348 Eric { 2967 8348 Eric conn_t *connp = Q_TO_CONN(q); 2968 8348 Eric int error; 2969 8348 Eric 2970 8348 Eric ASSERT(mp->b_cont == NULL); 2971 8348 Eric error = rawip_do_unbind(connp); 2972 8348 Eric if (error) { 2973 8348 Eric if (error < 0) { 2974 8348 Eric icmp_err_ack(q, mp, -error, 0); 2975 8348 Eric } else { 2976 8348 Eric icmp_err_ack(q, mp, 0, error); 2977 8348 Eric } 2978 8348 Eric return; 2979 8348 Eric } 2980 8348 Eric 2981 8348 Eric /* 2982 8348 Eric * Convert mp into a T_OK_ACK 2983 8348 Eric */ 2984 8348 Eric 2985 8348 Eric mp = mi_tpi_ok_ack_alloc(mp); 2986 8348 Eric 2987 8348 Eric /* 2988 8348 Eric * should not happen in practice... T_OK_ACK is smaller than the 2989 8348 Eric * original message. 2990 8348 Eric */ 2991 8348 Eric ASSERT(mp != NULL); 2992 8348 Eric ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 2993 5240 nordmark qreply(q, mp); 2994 0 stevel } 2995 8348 Eric 2996 0 stevel /* 2997