1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Tunnel driver 28 * This module acts like a driver/DLPI provider as viewed from the top 29 * and a stream head/TPI user from the bottom 30 * Implements the logic for IP (IPv4 or IPv6) encapsulation 31 * within IP (IPv4 or IPv6) 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/types.h> 37 #include <sys/stream.h> 38 #include <sys/dlpi.h> 39 #include <sys/stropts.h> 40 #include <sys/strlog.h> 41 #include <sys/tihdr.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/ethernet.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/kmem.h> 49 #include <sys/netstack.h> 50 51 #include <sys/systm.h> 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <sys/vtrace.h> 55 #include <sys/isa_defs.h> 56 #include <net/if.h> 57 #include <net/if_arp.h> 58 #include <net/route.h> 59 #include <sys/sockio.h> 60 #include <netinet/in.h> 61 62 #include <inet/common.h> 63 #include <inet/mi.h> 64 #include <inet/mib2.h> 65 #include <inet/nd.h> 66 #include <inet/arp.h> 67 #include <inet/snmpcom.h> 68 69 #include <netinet/igmp_var.h> 70 71 #include <netinet/ip6.h> 72 #include <netinet/icmp6.h> 73 #include <inet/ip.h> 74 #include <inet/ip6.h> 75 #include <net/if_dl.h> 76 #include <inet/ip_if.h> 77 #include <sys/strsun.h> 78 #include <sys/strsubr.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipdrop.h> 81 #include <inet/tun.h> 82 #include <inet/ipsec_impl.h> 83 84 85 #include <sys/conf.h> 86 #include <sys/errno.h> 87 #include <sys/modctl.h> 88 #include <sys/stat.h> 89 90 #include <inet/ip_ire.h> /* for ire_route_lookup_v6 */ 91 92 static void tun_cancel_rec_evs(queue_t *, eventid_t *); 93 static void tun_bufcall_handler(void *); 94 static boolean_t tun_icmp_message_v4(queue_t *, ipha_t *, icmph_t *, mblk_t *); 95 static boolean_t tun_icmp_too_big_v4(queue_t *, ipha_t *, uint16_t, mblk_t *); 96 static boolean_t tun_icmp_message_v6(queue_t *, ip6_t *, icmp6_t *, uint8_t, 97 mblk_t *); 98 static boolean_t tun_icmp_too_big_v6(queue_t *, ip6_t *, uint32_t, uint8_t, 99 mblk_t *); 100 static void tun_sendokack(queue_t *, mblk_t *, t_uscalar_t); 101 static void tun_sendsdusize(queue_t *); 102 static void tun_senderrack(queue_t *, mblk_t *, t_uscalar_t, t_uscalar_t, 103 t_uscalar_t); 104 static int tun_fastpath(queue_t *, mblk_t *); 105 static int tun_ioctl(queue_t *, mblk_t *); 106 static void tun_timeout_handler(void *); 107 static int tun_rproc(queue_t *, mblk_t *); 108 static int tun_wproc_mdata(queue_t *, mblk_t *); 109 static int tun_wproc(queue_t *, mblk_t *); 110 static int tun_rdata(queue_t *, mblk_t *, mblk_t *, tun_t *, uint_t); 111 static int tun_rdata_v4(queue_t *, mblk_t *, mblk_t *, tun_t *); 112 static int tun_rdata_v6(queue_t *, mblk_t *, mblk_t *, tun_t *); 113 static int tun_set_sec_simple(tun_t *, ipsec_req_t *); 114 static void tun_send_ire_req(queue_t *); 115 static uint32_t tun_update_link_mtu(queue_t *, uint32_t, boolean_t); 116 static mblk_t *tun_realloc_mblk(queue_t *, mblk_t *, size_t, mblk_t *, 117 boolean_t); 118 static void tun_recover(queue_t *, mblk_t *, size_t); 119 static void tun_rem_ppa_list(tun_t *); 120 static void tun_rem_tun_byaddr_list(tun_t *); 121 static void tun_rput_icmp_err_v4(queue_t *, mblk_t *, mblk_t *); 122 static void icmp_ricmp_err_v4_v4(queue_t *, mblk_t *, mblk_t *); 123 static void icmp_ricmp_err_v6_v4(queue_t *, mblk_t *, mblk_t *); 124 static void icmp_ricmp_err_v4_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *); 125 static void icmp_ricmp_err_v6_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *); 126 static void tun_rput_icmp_err_v6(queue_t *, mblk_t *, mblk_t *); 127 static int tun_rput_tpi(queue_t *, mblk_t *); 128 static int tun_send_bind_req(queue_t *); 129 static void tun_statinit(tun_stats_t *, char *, netstackid_t); 130 static int tun_stat_kstat_update(kstat_t *, int); 131 static void tun_wdata_v4(queue_t *, mblk_t *); 132 static void tun_wdata_v6(queue_t *, mblk_t *); 133 static char *tun_who(queue_t *, char *); 134 static int tun_wput_dlpi(queue_t *, mblk_t *); 135 static int tun_wputnext_v6(queue_t *, mblk_t *); 136 static int tun_wputnext_v4(queue_t *, mblk_t *); 137 static boolean_t tun_limit_value_v6(queue_t *, mblk_t *, ip6_t *, int *); 138 static void tun_freemsg_chain(mblk_t *, uint64_t *); 139 static void *tun_stack_init(netstackid_t, netstack_t *); 140 static void tun_stack_fini(netstackid_t, void *); 141 142 /* module's defined constants, globals and data structures */ 143 144 #define IP "ip" 145 #define IP6 "ip6" 146 static major_t IP_MAJ; 147 static major_t IP6_MAJ; 148 149 #define TUN_DEBUG 150 #define TUN_LINK_EXTRA_OFF 32 151 152 #define IPV6V4_DEF_TTL 60 153 #define IPV6V4_DEF_ENCAP 60 154 155 #define TUN_WHO_BUF 60 156 157 158 #ifdef TUN_DEBUG 159 /* levels of debugging verbosity */ 160 #define TUN0DBG 0x00 /* crucial */ 161 #define TUN1DBG 0x01 /* informational */ 162 #define TUN2DBG 0x02 /* verbose */ 163 #define TUN3DBG 0x04 /* very verbose */ 164 165 /* 166 * Global variable storing debugging level for all tunnels. By default 167 * all crucial messages will be printed. Value can be masked to exclusively 168 * print certain debug levels and not others. 169 */ 170 int8_t tun_debug = TUN0DBG; 171 172 #define TUN_LEVEL(dbg, lvl) ((dbg & lvl) == lvl) 173 174 #define tun0dbg(a) printf a 175 #define tun1dbg(a) if (TUN_LEVEL(tun_debug, TUN1DBG)) printf a 176 #define tun2dbg(a) if (TUN_LEVEL(tun_debug, TUN2DBG)) printf a 177 #define tun3dbg(a) if (TUN_LEVEL(tun_debug, TUN3DBG)) printf a 178 #else 179 #define tun0dbg(a) /* */ 180 #define tun1dbg(a) /* */ 181 #define tun2dbg(a) /* */ 182 #define tun3dbg(a) /* */ 183 #endif /* TUN_DEBUG */ 184 185 #define TUN_RECOVER_WAIT (1*hz) 186 187 /* canned DL_INFO_ACK - adjusted based on tunnel type */ 188 dl_info_ack_t infoack = { 189 DL_INFO_ACK, /* dl_primitive */ 190 4196, /* dl_max_sdu */ 191 0, /* dl_min_sdu */ 192 0, /* dl_addr_length */ 193 DL_IPV4, /* dl_mac_type */ 194 0, /* dl_reserved */ 195 DL_UNATTACHED, /* dl_current_state */ 196 0, /* dl_sap_length */ 197 DL_CLDLS, /* dl_service_mode */ 198 0, /* dl_qos_length */ 199 0, /* dl_qos_offset */ 200 0, /* dl_qos_range_length */ 201 0, /* dl_qos_range_offset */ 202 DL_STYLE2, /* dl_provider_style */ 203 0, /* dl_addr_offset */ 204 DL_VERSION_2, /* dl_version */ 205 0, /* dl_brdcast_addr_length */ 206 0, /* dl_brdcst_addr_offset */ 207 0 /* dl_grow */ 208 }; 209 210 /* 211 * canned DL_BIND_ACK - IP doesn't use any of this info. 212 */ 213 dl_bind_ack_t bindack = { 214 DL_BIND_ACK, /* dl_primitive */ 215 0, /* dl_sap */ 216 0, /* dl_addr_length */ 217 0, /* dl_addr_offset */ 218 0, /* dl_max_conind */ 219 0 /* dl_xidtest_flg */ 220 }; 221 222 223 /* 224 * Canned IPv6 destination options header containing Tunnel 225 * Encapsulation Limit option. 226 */ 227 static struct tun_encap_limit tun_limit_init_upper_v4 = { 228 { IPPROTO_ENCAP, 0 }, 229 IP6OPT_TUNNEL_LIMIT, 230 1, 231 IPV6_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 232 IP6OPT_PADN, 233 1, 234 0 235 }; 236 static struct tun_encap_limit tun_limit_init_upper_v6 = { 237 { IPPROTO_IPV6, 0 }, 238 IP6OPT_TUNNEL_LIMIT, 239 1, 240 IPV6_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 241 IP6OPT_PADN, 242 1, 243 0 244 }; 245 246 static tun_stats_t *tun_add_stat(queue_t *); 247 248 static void tun_add_byaddr(tun_t *); 249 static ipsec_tun_pol_t *itp_get_byaddr_fn(uint32_t *, uint32_t *, int, 250 netstack_t *); 251 252 /* Setable in /etc/system */ 253 static boolean_t tun_do_fastpath = B_TRUE; 254 255 /* streams linkages */ 256 static struct module_info info = { 257 TUN_MODID, /* module id number */ 258 TUN_NAME, /* module name */ 259 1, /* min packet size accepted */ 260 INFPSZ, /* max packet size accepted */ 261 65536, /* hi-water mark */ 262 1024 /* lo-water mark */ 263 }; 264 265 static struct qinit tunrinit = { 266 (pfi_t)tun_rput, /* read side put procedure */ 267 (pfi_t)tun_rsrv, /* read side service procedure */ 268 tun_open, /* open procedure */ 269 tun_close, /* close procedure */ 270 NULL, /* for future use */ 271 &info, /* module information structure */ 272 NULL /* module statistics structure */ 273 }; 274 275 static struct qinit tunwinit = { 276 (pfi_t)tun_wput, /* write side put procedure */ 277 (pfi_t)tun_wsrv, /* write side service procedure */ 278 NULL, 279 NULL, 280 NULL, 281 &info, 282 NULL 283 }; 284 285 struct streamtab tuninfo = { 286 &tunrinit, /* read side queue init */ 287 &tunwinit, /* write side queue init */ 288 NULL, /* mux read side init */ 289 NULL /* mux write side init */ 290 }; 291 292 static struct fmodsw tun_fmodsw = { 293 TUN_NAME, 294 &tuninfo, 295 (D_MP | D_MTQPAIR | D_MTPUTSHARED) 296 }; 297 298 static struct modlstrmod modlstrmod = { 299 &mod_strmodops, 300 "configured tunneling module", 301 &tun_fmodsw 302 }; 303 304 static struct modlinkage modlinkage = { 305 MODREV_1, 306 &modlstrmod, 307 NULL 308 }; 309 310 int 311 _init(void) 312 { 313 int rc; 314 315 IP_MAJ = ddi_name_to_major(IP); 316 IP6_MAJ = ddi_name_to_major(IP6); 317 318 /* 319 * We want to be informed each time a stack is created or 320 * destroyed in the kernel, so we can maintain the 321 * set of tun_stack_t's. 322 */ 323 netstack_register(NS_TUN, tun_stack_init, NULL, tun_stack_fini); 324 325 rc = mod_install(&modlinkage); 326 if (rc != 0) 327 netstack_unregister(NS_TUN); 328 329 return (rc); 330 } 331 332 int 333 _fini(void) 334 { 335 int error; 336 337 error = mod_remove(&modlinkage); 338 if (error == 0) 339 netstack_unregister(NS_TUN); 340 341 return (error); 342 } 343 344 int 345 _info(struct modinfo *modinfop) 346 { 347 return (mod_info(&modlinkage, modinfop)); 348 } 349 350 /* 351 * this module is meant to be pushed on an instance of IP and 352 * have an instance of IP pushed on top of it. 353 */ 354 355 /* ARGSUSED */ 356 int 357 tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 358 { 359 tun_t *atp; 360 mblk_t *hello; 361 ipsec_info_t *ii; 362 netstack_t *ns; 363 zoneid_t zoneid; 364 365 if (q->q_ptr != NULL) { 366 /* re-open of an already open instance */ 367 return (0); 368 } 369 370 if (sflag != MODOPEN) { 371 return (EINVAL); 372 } 373 374 tun1dbg(("tun_open\n")); 375 376 ns = netstack_find_by_cred(credp); 377 ASSERT(ns != NULL); 378 379 /* 380 * For exclusive stacks we set the zoneid to zero 381 * to make IP operate as if in the global zone. 382 */ 383 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 384 zoneid = GLOBAL_ZONEID; 385 else 386 zoneid = crgetzoneid(credp); 387 388 hello = allocb(sizeof (ipsec_info_t), BPRI_HI); 389 if (hello == NULL) { 390 netstack_rele(ns); 391 return (ENOMEM); 392 } 393 394 /* allocate per-instance structure */ 395 atp = kmem_zalloc(sizeof (tun_t), KM_SLEEP); 396 397 atp->tun_state = DL_UNATTACHED; 398 atp->tun_dev = *devp; 399 atp->tun_zoneid = zoneid; 400 atp->tun_netstack = ns; 401 402 /* 403 * Based on the lower version of IP, initialize stuff that 404 * won't change 405 */ 406 if (getmajor(*devp) == IP_MAJ) { 407 ipha_t *ipha; 408 409 atp->tun_flags = TUN_L_V4 | TUN_HOP_LIM; 410 atp->tun_hop_limit = IPV6V4_DEF_TTL; 411 412 /* 413 * The tunnel MTU is recalculated when we know more 414 * about the tunnel destination. 415 */ 416 atp->tun_mtu = IP_MAXPACKET - sizeof (ipha_t); 417 ipha = &atp->tun_ipha; 418 ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 419 ipha->ipha_type_of_service = 0; 420 ipha->ipha_ident = 0; /* to be filled in by IP */ 421 ipha->ipha_fragment_offset_and_flags = htons(IPH_DF); 422 ipha->ipha_ttl = atp->tun_hop_limit; 423 ipha->ipha_hdr_checksum = 0; /* to be filled in by IP */ 424 } else if (getmajor(*devp) == IP6_MAJ) { 425 atp->tun_flags = TUN_L_V6 | TUN_HOP_LIM | TUN_ENCAP_LIM; 426 atp->tun_hop_limit = IPV6_DEFAULT_HOPS; 427 atp->tun_encap_lim = IPV6_DEFAULT_ENCAPLIMIT; 428 atp->tun_mtu = IP_MAXPACKET - sizeof (ip6_t) - 429 IPV6_TUN_ENCAP_OPT_LEN; 430 atp->tun_ip6h.ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 431 atp->tun_ip6h.ip6_hops = IPV6_DEFAULT_HOPS; 432 } else { 433 netstack_rele(ns); 434 kmem_free(atp, sizeof (tun_t)); 435 return (ENXIO); 436 } 437 438 atp->tun_extra_offset = TUN_LINK_EXTRA_OFF; 439 mutex_init(&atp->tun_lock, NULL, MUTEX_DEFAULT, NULL); 440 441 /* 442 * If this is the automatic tunneling module, atun, verify that the 443 * lower protocol is IPv4 and set TUN_AUTOMATIC. Since we don't do 444 * automatic tunneling over IPv6, trying to run over IPv6 is an error, 445 * so free memory and return an error. 446 */ 447 if (q->q_qinfo->qi_minfo->mi_idnum == ATUN_MODID) { 448 if (atp->tun_flags & TUN_L_V4) { 449 atp->tun_flags |= TUN_AUTOMATIC; 450 atp->tun_mtu = ATUN_MTU; 451 } else { 452 /* Error. */ 453 netstack_rele(ns); 454 kmem_free(atp, sizeof (tun_t)); 455 return (ENXIO); 456 } 457 } else if (q->q_qinfo->qi_minfo->mi_idnum == TUN6TO4_MODID) { 458 /* 459 * Set 6to4 flag if this is the 6to4tun module and make 460 * the same checks mentioned above. 461 */ 462 if (atp->tun_flags & TUN_L_V4) { 463 atp->tun_flags |= TUN_6TO4; 464 atp->tun_mtu = ATUN_MTU; 465 } else { 466 /* Error. */ 467 netstack_rele(ns); 468 kmem_free(atp, sizeof (tun_t)); 469 return (ENXIO); 470 } 471 } 472 473 q->q_ptr = WR(q)->q_ptr = atp; 474 atp->tun_wq = WR(q); 475 mutex_enter(&ns->netstack_tun->tuns_global_lock); 476 tun_add_byaddr(atp); 477 mutex_exit(&ns->netstack_tun->tuns_global_lock); 478 ii = (ipsec_info_t *)hello->b_rptr; 479 hello->b_wptr = hello->b_rptr + sizeof (*ii); 480 hello->b_datap->db_type = M_CTL; 481 ii->ipsec_info_type = TUN_HELLO; 482 ii->ipsec_info_len = sizeof (*ii); 483 qprocson(q); 484 putnext(WR(q), hello); 485 return (0); 486 } 487 488 /* ARGSUSED */ 489 int 490 tun_close(queue_t *q, int flag, cred_t *cred_p) 491 { 492 tun_t *atp = (tun_t *)q->q_ptr; 493 netstack_t *ns; 494 tun_stack_t *tuns; 495 496 ASSERT(atp != NULL); 497 498 ns = atp->tun_netstack; 499 tuns = ns->netstack_tun; 500 501 /* Cancel outstanding qtimeouts() or qbufcalls() */ 502 tun_cancel_rec_evs(q, &atp->tun_events); 503 504 qprocsoff(q); 505 506 /* NOTE: tun_rem_ppa_list() may unlink tun_itp from its AVL tree. */ 507 if (atp->tun_stats != NULL) 508 tun_rem_ppa_list(atp); 509 510 if (atp->tun_itp != NULL) { 511 /* In brackets because of ITP_REFRELE's brackets. */ 512 ITP_REFRELE(atp->tun_itp, ns); 513 } 514 515 netstack_rele(ns); 516 517 mutex_destroy(&atp->tun_lock); 518 519 /* remove tun_t from global list */ 520 mutex_enter(&tuns->tuns_global_lock); 521 tun_rem_tun_byaddr_list(atp); 522 mutex_exit(&tuns->tuns_global_lock); 523 524 /* free per-instance struct */ 525 kmem_free(atp, sizeof (tun_t)); 526 527 q->q_ptr = WR(q)->q_ptr = NULL; 528 529 return (0); 530 } 531 532 533 /* 534 * Cancel bufcall and timer requests 535 * Don't need to hold lock. protected by perimeter 536 */ 537 static void 538 tun_cancel_rec_evs(queue_t *q, eventid_t *evs) 539 { 540 if (evs->ev_rbufcid != 0) { 541 qunbufcall(RD(q), evs->ev_rbufcid); 542 evs->ev_rbufcid = 0; 543 } 544 if (evs->ev_wbufcid != 0) { 545 qunbufcall(WR(q), evs->ev_wbufcid); 546 evs->ev_wbufcid = 0; 547 } 548 if (evs->ev_rtimoutid != 0) { 549 (void) quntimeout(RD(q), evs->ev_rtimoutid); 550 evs->ev_rtimoutid = 0; 551 } 552 if (evs->ev_wtimoutid != 0) { 553 (void) quntimeout(WR(q), evs->ev_wtimoutid); 554 evs->ev_wtimoutid = 0; 555 } 556 } 557 558 /* 559 * Called by bufcall() when memory becomes available 560 * Don't need to hold lock. protected by perimeter 561 */ 562 static void 563 tun_bufcall_handler(void *arg) 564 { 565 queue_t *q = arg; 566 tun_t *atp = (tun_t *)q->q_ptr; 567 eventid_t *evs; 568 569 ASSERT(atp); 570 571 evs = &atp->tun_events; 572 if ((q->q_flag & QREADR) != 0) { 573 ASSERT(evs->ev_rbufcid); 574 evs->ev_rbufcid = 0; 575 } else { 576 ASSERT(evs->ev_wbufcid); 577 evs->ev_wbufcid = 0; 578 } 579 enableok(q); 580 qenable(q); 581 } 582 583 /* 584 * Called by timeout (if we couldn't do a bufcall) 585 * Don't need to hold lock. protected by perimeter 586 */ 587 static void 588 tun_timeout_handler(void *arg) 589 { 590 queue_t *q = arg; 591 tun_t *atp = (tun_t *)q->q_ptr; 592 eventid_t *evs; 593 594 ASSERT(atp); 595 evs = &atp->tun_events; 596 597 if (q->q_flag & QREADR) { 598 ASSERT(evs->ev_rtimoutid); 599 evs->ev_rtimoutid = 0; 600 } else { 601 ASSERT(evs->ev_wtimoutid); 602 evs->ev_wtimoutid = 0; 603 } 604 enableok(q); 605 qenable(q); 606 } 607 608 /* 609 * This routine is called when a message buffer can not 610 * be allocated. M_PCPROT message are converted to M_PROTO, but 611 * other than that, the mblk passed in must not be a high 612 * priority message (putting a hight priority message back on 613 * the queue is a bad idea) 614 * Side effect: the queue is disabled 615 * (timeout or bufcall handler will re-enable the queue) 616 * tun_cancel_rec_evs() must be called in close to cancel all 617 * outstanding requests. 618 */ 619 static void 620 tun_recover(queue_t *q, mblk_t *mp, size_t size) 621 { 622 tun_t *atp = (tun_t *)q->q_ptr; 623 timeout_id_t tid; 624 bufcall_id_t bid; 625 eventid_t *evs = &atp->tun_events; 626 627 ASSERT(mp != NULL); 628 629 /* 630 * To avoid re-enabling the queue, change the high priority 631 * M_PCPROTO message to a M_PROTO before putting it on the queue 632 */ 633 if (mp->b_datap->db_type == M_PCPROTO) 634 mp->b_datap->db_type = M_PROTO; 635 636 ASSERT(mp->b_datap->db_type < QPCTL); 637 638 (void) putbq(q, mp); 639 640 /* 641 * Make sure there is at most one outstanding request per queue. 642 */ 643 if (q->q_flag & QREADR) { 644 if (evs->ev_rtimoutid || evs->ev_rbufcid) 645 return; 646 } else { 647 if (evs->ev_wtimoutid || evs->ev_wbufcid) 648 return; 649 } 650 651 noenable(q); 652 /* 653 * locking is needed here because this routine may be called 654 * with two puts() running 655 */ 656 mutex_enter(&atp->tun_lock); 657 if (!(bid = qbufcall(q, size, BPRI_MED, tun_bufcall_handler, q))) { 658 tid = qtimeout(q, tun_timeout_handler, q, TUN_RECOVER_WAIT); 659 if (q->q_flag & QREADR) 660 evs->ev_rtimoutid = tid; 661 else 662 evs->ev_wtimoutid = tid; 663 } else { 664 if (q->q_flag & QREADR) 665 evs->ev_rbufcid = bid; 666 else 667 evs->ev_wbufcid = bid; 668 } 669 mutex_exit(&atp->tun_lock); 670 } 671 672 /* 673 * tun_realloc_mblk(q, mp, size, orig_mp, copy) 674 * 675 * q - pointer to a queue_t, must not be NULL 676 * mp - pointer to an mblk to copy, can be NULL 677 * size - Number of bytes being (re)allocated 678 * orig_mp - pointer to the original mblk_t which will be passed to 679 * tun_recover if the memory (re)allocation fails. This is done 680 * so that the message can be rescheduled on the queue. 681 * orig_mp must be NULL if the original mblk_t is a high priority 682 * message of type other then M_PCPROTO. 683 * copy - a boolean to specify wheater the contents of mp should be copied 684 * into the new mblk_t returned by this function. 685 * 686 * note: this routine will adjust the b_rptr and b_wptr of the 687 * mblk. Returns an mblk able to hold the requested size or 688 * NULL if allocation failed. If copy is true, original 689 * contents, if any, will be copied to new mblk 690 */ 691 static mblk_t * 692 tun_realloc_mblk(queue_t *q, mblk_t *mp, size_t size, mblk_t *orig_mp, 693 boolean_t copy) 694 { 695 /* 696 * If we are passed in an mblk.. check to make sure that 697 * it is big enough and we are the only users of the mblk 698 * If not, then try and allocate one 699 */ 700 if (mp == NULL || mp->b_datap->db_lim - mp->b_datap->db_base < size || 701 mp->b_datap->db_ref > 1) { 702 size_t asize; 703 mblk_t *newmp; 704 705 /* allocate at least as much as we had -- don't shrink */ 706 if (mp != NULL) { 707 asize = MAX(size, 708 mp->b_datap->db_lim - mp->b_datap->db_base); 709 } else { 710 asize = size; 711 } 712 newmp = allocb(asize, BPRI_HI); 713 714 if (newmp == NULL) { 715 /* 716 * Reschedule the mblk via bufcall or timeout 717 * if orig_mp is non-NULL 718 */ 719 if (orig_mp != NULL) { 720 tun_recover(q, orig_mp, asize); 721 } 722 tun1dbg(("tun_realloc_mblk: couldn't allocate" \ 723 " dl_ok_ack mblk\n")); 724 return (NULL); 725 } 726 if (mp != NULL) { 727 if (copy) 728 bcopy(mp->b_rptr, newmp->b_rptr, 729 mp->b_wptr - mp->b_rptr); 730 newmp->b_datap->db_type = mp->b_datap->db_type; 731 freemsg(mp); 732 } 733 mp = newmp; 734 } else { 735 if (mp->b_rptr != mp->b_datap->db_base) { 736 if (copy) 737 bcopy(mp->b_rptr, mp->b_datap->db_base, 738 mp->b_wptr - mp->b_rptr); 739 mp->b_rptr = mp->b_datap->db_base; 740 } 741 } 742 mp->b_wptr = mp->b_rptr + size; 743 return (mp); 744 } 745 746 747 /* send a DL_OK_ACK back upstream */ 748 static void 749 tun_sendokack(queue_t *q, mblk_t *mp, t_uscalar_t prim) 750 { 751 dl_ok_ack_t *dlok; 752 753 if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp, 754 B_FALSE)) == NULL) { 755 return; 756 } 757 dlok = (dl_ok_ack_t *)mp->b_rptr; 758 dlok->dl_primitive = DL_OK_ACK; 759 dlok->dl_correct_primitive = prim; 760 mp->b_datap->db_type = M_PCPROTO; 761 qreply(q, mp); 762 } 763 764 /* 765 * Send a DL_NOTIFY_IND message with DL_NOTE_SDU_SIZE up to notify IP of a 766 * link MTU change. 767 */ 768 static void 769 tun_sendsdusize(queue_t *q) 770 { 771 tun_t *atp = (tun_t *)q->q_ptr; 772 mblk_t *mp = NULL; 773 dl_notify_ind_t *notify; 774 775 if (!(atp->tun_notifications & DL_NOTE_SDU_SIZE)) 776 return; 777 778 if ((mp = tun_realloc_mblk(q, NULL, DL_NOTIFY_IND_SIZE, NULL, 779 B_FALSE)) == NULL) { 780 return; 781 } 782 mp->b_datap->db_type = M_PROTO; 783 notify = (dl_notify_ind_t *)mp->b_rptr; 784 notify->dl_primitive = DL_NOTIFY_IND; 785 notify->dl_notification = DL_NOTE_SDU_SIZE; 786 notify->dl_data = atp->tun_mtu; 787 notify->dl_addr_length = 0; 788 notify->dl_addr_offset = 0; 789 790 tun1dbg(("tun_sendsdusize: notifying ip of new mtu: %d", atp->tun_mtu)); 791 792 /* 793 * We send this notification to the upper IP instance who is using 794 * us as a device. 795 */ 796 putnext(RD(q), mp); 797 } 798 799 /* send a DL_ERROR_ACK back upstream */ 800 static void 801 tun_senderrack(queue_t *q, mblk_t *mp, t_uscalar_t prim, t_uscalar_t dl_err, 802 t_uscalar_t error) 803 { 804 dl_error_ack_t *dl_err_ack; 805 806 if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_error_ack_t), mp, 807 B_FALSE)) == NULL) { 808 return; 809 } 810 811 dl_err_ack = (dl_error_ack_t *)mp->b_rptr; 812 dl_err_ack->dl_error_primitive = prim; 813 dl_err_ack->dl_primitive = DL_ERROR_ACK; 814 dl_err_ack->dl_errno = dl_err; 815 dl_err_ack->dl_unix_errno = error; 816 mp->b_datap->db_type = M_PCPROTO; 817 qreply(q, mp); 818 } 819 820 /* 821 * Free all messages in an mblk chain and optionally collect 822 * byte-counter stats. Caller responsible for per-packet stats 823 */ 824 static void 825 tun_freemsg_chain(mblk_t *mp, uint64_t *bytecount) 826 { 827 mblk_t *mpnext; 828 while (mp != NULL) { 829 ASSERT(mp->b_prev == NULL); 830 mpnext = mp->b_next; 831 mp->b_next = NULL; 832 if (bytecount != NULL) 833 atomic_add_64(bytecount, (int64_t)msgdsize(mp)); 834 freemsg(mp); 835 mp = mpnext; 836 } 837 } 838 839 /* 840 * Send all messages in a chain of mblk chains and optionally collect 841 * byte-counter stats. Caller responsible for per-packet stats, and insuring 842 * mp is always non-NULL. 843 * 844 * This is a macro so we can save stack. Assume the caller function 845 * has local-variable "nmp" as a placeholder. Define two versions, one with 846 * byte-counting stats and one without. 847 */ 848 #define TUN_PUTMSG_CHAIN_STATS(q, mp, nmp, bytecount) \ 849 (nmp) = NULL; \ 850 ASSERT((mp) != NULL); \ 851 do { \ 852 if ((nmp) != NULL) \ 853 putnext(q, (nmp)); \ 854 ASSERT((mp)->b_prev == NULL); \ 855 (nmp) = (mp); \ 856 (mp) = (mp)->b_next; \ 857 (nmp)->b_next = NULL; \ 858 atomic_add_64(bytecount, (int64_t)msgdsize(nmp)); \ 859 } while ((mp) != NULL); \ 860 \ 861 putnext((q), (nmp)) /* trailing semicolon provided by instantiator. */ 862 863 #define TUN_PUTMSG_CHAIN(q, mp, nmp) \ 864 (nmp) = NULL; \ 865 ASSERT((mp) != NULL); \ 866 do { \ 867 if ((nmp) != NULL) \ 868 putnext(q, (nmp)); \ 869 ASSERT((mp)->b_prev == NULL); \ 870 (nmp) = (mp); \ 871 (mp) = (mp)->b_next; \ 872 (nmp)->b_next = NULL; \ 873 } while ((mp) != NULL); \ 874 \ 875 putnext((q), (nmp)) /* trailing semicolon provided by instantiator. */ 876 877 /* 878 * Macro that not only checks tun_itp, but also sees if one got loaded 879 * via ipsecconf(1m)/PF_POLICY behind our backs. Note the sleazy update of 880 * (tun)->tun_itp_gen so we don't lose races with other possible updates via 881 * PF_POLICY. 882 */ 883 #define tun_policy_present(tun, ns, ipss) \ 884 (((tun)->tun_itp != NULL) || \ 885 (((tun)->tun_itp_gen < ipss->ipsec_tunnel_policy_gen) && \ 886 ((tun)->tun_itp_gen = ipss->ipsec_tunnel_policy_gen) && \ 887 (((tun)->tun_itp = get_tunnel_policy((tun)->tun_lifname, ns)) \ 888 != NULL))) 889 890 /* 891 * Search tuns_byaddr_list for occurrence of tun_t with matching 892 * inner addresses. This function does not take into account 893 * prefixes. Possibly we could generalize this function in the 894 * future with V6_MASK_EQ() and pass in an all 1's prefix for IP 895 * address matches. 896 * Returns NULL on no match. 897 * This function is not directly called - it's assigned into itp_get_byaddr(). 898 */ 899 static ipsec_tun_pol_t * 900 itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af, netstack_t *ns) 901 { 902 tun_t *tun_list; 903 uint_t index; 904 in6_addr_t lmapped, fmapped, *laddr, *faddr; 905 ipsec_stack_t *ipss = ns->netstack_ipsec; 906 tun_stack_t *tuns = ns->netstack_tun; 907 908 if (af == AF_INET) { 909 laddr = &lmapped; 910 faddr = &fmapped; 911 IN6_INADDR_TO_V4MAPPED((struct in_addr *)lin, laddr); 912 IN6_INADDR_TO_V4MAPPED((struct in_addr *)fin, faddr); 913 } else { 914 laddr = (in6_addr_t *)lin; 915 faddr = (in6_addr_t *)fin; 916 } 917 918 index = TUN_BYADDR_LIST_HASH(*faddr); 919 920 /* 921 * it's ok to grab global lock while holding tun_lock/perimeter 922 */ 923 mutex_enter(&tuns->tuns_global_lock); 924 925 /* 926 * walk through list of tun_t looking for a match of 927 * inner addresses. Addresses are inserted with 928 * IN6_IPADDR_TO_V4MAPPED(), so v6 matching works for 929 * all cases. 930 */ 931 for (tun_list = tuns->tuns_byaddr_list[index]; tun_list; 932 tun_list = tun_list->tun_next) { 933 if (IN6_ARE_ADDR_EQUAL(&tun_list->tun_laddr, laddr) && 934 IN6_ARE_ADDR_EQUAL(&tun_list->tun_faddr, faddr)) { 935 ipsec_tun_pol_t *itp; 936 937 if (!tun_policy_present(tun_list, ns, ipss)) { 938 tun1dbg(("itp_get_byaddr: No IPsec policy on " 939 "matching tun_t instance %p/%s\n", 940 (void *)tun_list, tun_list->tun_lifname)); 941 continue; 942 } 943 tun1dbg(("itp_get_byaddr: Found matching tun_t %p with " 944 "IPsec policy\n", (void *)tun_list)); 945 mutex_enter(&tun_list->tun_itp->itp_lock); 946 itp = tun_list->tun_itp; 947 mutex_exit(&tuns->tuns_global_lock); 948 ITP_REFHOLD(itp); 949 mutex_exit(&itp->itp_lock); 950 tun1dbg(("itp_get_byaddr: Found itp %p \n", 951 (void *)itp)); 952 return (itp); 953 } 954 } 955 956 /* didn't find one, return zilch */ 957 958 tun1dbg(("itp_get_byaddr: No matching tunnel instances with policy\n")); 959 mutex_exit(&tuns->tuns_global_lock); 960 return (NULL); 961 } 962 963 /* 964 * Search tuns_byaddr_list for occurrence of tun_t, same upper and lower stream, 965 * and same type (6to4 vs automatic vs configured) 966 * If none is found, insert this tun entry. 967 */ 968 static void 969 tun_add_byaddr(tun_t *atp) 970 { 971 tun_t *tun_list; 972 t_uscalar_t ppa = atp->tun_ppa; 973 uint_t mask = atp->tun_flags & (TUN_LOWER_MASK | TUN_UPPER_MASK); 974 uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)); 975 uint_t index = TUN_BYADDR_LIST_HASH(atp->tun_faddr); 976 tun_stack_t *tuns = atp->tun_netstack->netstack_tun; 977 978 tun1dbg(("tun_add_byaddr: index = %d\n", index)); 979 980 ASSERT(MUTEX_HELD(&tuns->tuns_global_lock)); 981 ASSERT(atp->tun_next == NULL); 982 983 /* 984 * walk through list of tun_t looking for a match of 985 * ppa, same upper and lower stream and same tunnel type 986 * (automatic or configured). 987 * There shouldn't be all that many tunnels, so a sequential 988 * search of the bucket should be fine. 989 */ 990 for (tun_list = tuns->tuns_byaddr_list[index]; tun_list; 991 tun_list = tun_list->tun_next) { 992 if (tun_list->tun_ppa == ppa && 993 ((tun_list->tun_flags & (TUN_LOWER_MASK | 994 TUN_UPPER_MASK)) == mask) && 995 ((tun_list->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) == 996 tun_type)) { 997 tun1dbg(("tun_add_byaddr: tun 0x%p Found ppa %d " \ 998 "tun_stats 0x%p\n", (void *)atp, ppa, 999 (void *)tun_list)); 1000 tun1dbg(("tun_add_byaddr: Nothing to do.")); 1001 /* Collision, do nothing. */ 1002 return; 1003 } 1004 } 1005 1006 /* didn't find one, throw it in the global list */ 1007 1008 atp->tun_next = tuns->tuns_byaddr_list[index]; 1009 atp->tun_ptpn = &(tuns->tuns_byaddr_list[index]); 1010 if (tuns->tuns_byaddr_list[index] != NULL) 1011 tuns->tuns_byaddr_list[index]->tun_ptpn = &(atp->tun_next); 1012 tuns->tuns_byaddr_list[index] = atp; 1013 } 1014 1015 /* 1016 * Search tuns_ppa_list for occurrence of tun_ppa, same lower stream, 1017 * and same type (6to4 vs automatic vs configured) 1018 * If none is found, insert this tun entry and create a new kstat for 1019 * the entry. 1020 * This is needed so that multiple tunnels with the same interface 1021 * name (e.g. ip.tun0 under IPv4 and ip.tun0 under IPv6) can share the 1022 * same kstats. (they share the same tun_stat and kstat) 1023 * Don't need to hold tun_lock if we are coming is as qwriter() 1024 */ 1025 static tun_stats_t * 1026 tun_add_stat(queue_t *q) 1027 { 1028 tun_t *atp = (tun_t *)q->q_ptr; 1029 tun_stats_t *tun_list; 1030 tun_stats_t *tun_stat; 1031 t_uscalar_t ppa = atp->tun_ppa; 1032 uint_t lower = atp->tun_flags & TUN_LOWER_MASK; 1033 uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)); 1034 uint_t index = TUN_LIST_HASH(ppa); 1035 tun_stack_t *tuns = atp->