1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1676 jpk * Common Development and Distribution License (the "License"). 6 1676 jpk * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 11042 Erik * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 11042 Erik * Use is subject to license terms. 24 0 stevel */ 25 0 stevel /* Copyright (c) 1990 Mentat Inc. */ 26 0 stevel 27 0 stevel /* 28 0 stevel * Procedures for the kernel part of DVMRP, 29 0 stevel * a Distance-Vector Multicast Routing Protocol. 30 0 stevel * (See RFC-1075) 31 0 stevel * Written by David Waitzman, BBN Labs, August 1988. 32 0 stevel * Modified by Steve Deering, Stanford, February 1989. 33 0 stevel * Modified by Mark J. Steiglitz, Stanford, May, 1991 34 0 stevel * Modified by Van Jacobson, LBL, January 1993 35 0 stevel * Modified by Ajit Thyagarajan, PARC, August 1993 36 0 stevel * Modified by Bill Fenner, PARC, April 1995 37 0 stevel * 38 0 stevel * MROUTING 3.5 39 0 stevel */ 40 0 stevel 41 0 stevel /* 42 0 stevel * TODO 43 0 stevel * - function pointer field in vif, void *vif_sendit() 44 0 stevel */ 45 0 stevel 46 0 stevel #include <sys/types.h> 47 0 stevel #include <sys/stream.h> 48 0 stevel #include <sys/stropts.h> 49 0 stevel #include <sys/strlog.h> 50 0 stevel #include <sys/systm.h> 51 0 stevel #include <sys/ddi.h> 52 0 stevel #include <sys/cmn_err.h> 53 0 stevel #include <sys/zone.h> 54 0 stevel 55 0 stevel #include <sys/param.h> 56 0 stevel #include <sys/socket.h> 57 0 stevel #include <sys/vtrace.h> 58 0 stevel #include <sys/debug.h> 59 0 stevel #include <net/if.h> 60 0 stevel #include <sys/sockio.h> 61 0 stevel #include <netinet/in.h> 62 0 stevel #include <net/if_dl.h> 63 0 stevel 64 11042 Erik #include <inet/ipsec_impl.h> 65 0 stevel #include <inet/common.h> 66 0 stevel #include <inet/mi.h> 67 0 stevel #include <inet/nd.h> 68 0 stevel #include <inet/mib2.h> 69 0 stevel #include <netinet/ip6.h> 70 0 stevel #include <inet/ip.h> 71 0 stevel #include <inet/snmpcom.h> 72 0 stevel 73 0 stevel #include <netinet/igmp.h> 74 0 stevel #include <netinet/igmp_var.h> 75 0 stevel #include <netinet/udp.h> 76 0 stevel #include <netinet/ip_mroute.h> 77 0 stevel #include <inet/ip_multi.h> 78 0 stevel #include <inet/ip_ire.h> 79 11042 Erik #include <inet/ip_ndp.h> 80 0 stevel #include <inet/ip_if.h> 81 0 stevel #include <inet/ipclassifier.h> 82 0 stevel 83 0 stevel #include <netinet/pim.h> 84 0 stevel 85 0 stevel 86 0 stevel /* 87 0 stevel * MT Design: 88 0 stevel * 89 0 stevel * There are three main data structures viftable, mfctable and tbftable that 90 0 stevel * need to be protected against MT races. 91 0 stevel * 92 0 stevel * vitable is a fixed length array of vif structs. There is no lock to protect 93 0 stevel * the whole array, instead each struct is protected by its own indiviual lock. 94 0 stevel * The value of v_marks in conjuction with the value of v_refcnt determines the 95 0 stevel * current state of a vif structure. One special state that needs mention 96 0 stevel * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates 97 0 stevel * that vif is being initalized. 98 0 stevel * Each structure is freed when the refcnt goes down to zero. If a delete comes 99 11042 Erik * in when the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED 100 0 stevel * which prevents the struct from further use. When the refcnt goes to zero 101 0 stevel * the struct is freed and is marked VIF_MARK_NOTINUSE. 102 0 stevel * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill 103 0 stevel * from going away a refhold is put on the ipif before using it. see 104 0 stevel * lock_good_vif() and unlock_good_vif(). 105 0 stevel * 106 0 stevel * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts 107 0 stevel * of the vif struct. 108 0 stevel * 109 0 stevel * tbftable is also a fixed length array of tbf structs and is only accessed 110 0 stevel * via v_tbf. It is protected by its own lock tbf_lock. 111 0 stevel * 112 0 stevel * Lock Ordering is 113 0 stevel * v_lock --> tbf_lock 114 0 stevel * v_lock --> ill_locK 115 0 stevel * 116 0 stevel * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb). 117 0 stevel * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker, 118 0 stevel * it also maintains a state. These fields are protected by a lock (mfcb_lock). 119 0 stevel * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to 120 0 stevel * protect the struct elements. 121 0 stevel * 122 0 stevel * mfc structs are dynamically allocated and are singly linked 123 0 stevel * at the head of the chain. When an mfc structure is to be deleted 124 0 stevel * it is marked condemned and so is the state in the bucket struct. 125 0 stevel * When the last walker of the hash bucket exits all the mfc structs 126 0 stevel * marked condemed are freed. 127 0 stevel * 128 0 stevel * Locking Hierarchy: 129 0 stevel * The bucket lock should be acquired before the mfc struct lock. 130 0 stevel * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking 131 0 stevel * operations on the bucket struct. 132 0 stevel * 133 0 stevel * last_encap_lock and numvifs_mutex should be acquired after 134 0 stevel * acquring vif or mfc locks. These locks protect some global variables. 135 0 stevel * 136 0 stevel * The statistics are not currently protected by a lock 137 0 stevel * causing the stats be be approximate, not exact. 138 0 stevel */ 139 0 stevel 140 0 stevel #define NO_VIF MAXVIFS /* from mrouted, no route for src */ 141 0 stevel 142 0 stevel /* 143 0 stevel * Timeouts: 144 0 stevel * Upcall timeouts - BSD uses boolean_t mfc->expire and 145 0 stevel * nexpire[MFCTBLSIZE], the number of times expire has been called. 146 0 stevel * SunOS 5.x uses mfc->timeout for each mfc. 147 0 stevel * Some Unixes are limited in the number of simultaneous timeouts 148 0 stevel * that can be run, SunOS 5.x does not have this restriction. 149 0 stevel */ 150 0 stevel 151 0 stevel /* 152 0 stevel * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and 153 0 stevel * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall 154 0 stevel * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE 155 0 stevel */ 156 0 stevel #define EXPIRE_TIMEOUT (hz/4) /* 4x / second */ 157 0 stevel #define UPCALL_EXPIRE 6 /* number of timeouts */ 158 0 stevel 159 0 stevel /* 160 0 stevel * Hash function for a source, group entry 161 0 stevel */ 162 0 stevel #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 163 0 stevel ((g) >> 20) ^ ((g) >> 10) ^ (g)) 164 0 stevel 165 0 stevel #define TBF_REPROCESS (hz / 100) /* 100x /second */ 166 0 stevel 167 0 stevel /* Identify PIM packet that came on a Register interface */ 168 0 stevel #define PIM_REGISTER_MARKER 0xffffffff 169 0 stevel 170 0 stevel /* Function declarations */ 171 3448 dh155122 static int add_mfc(struct mfcctl *, ip_stack_t *); 172 11042 Erik static int add_vif(struct vifctl *, conn_t *, ip_stack_t *); 173 3448 dh155122 static int del_mfc(struct mfcctl *, ip_stack_t *); 174 11042 Erik static int del_vif(vifi_t *, ip_stack_t *); 175 0 stevel static void del_vifp(struct vif *); 176 0 stevel static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 177 0 stevel static void expire_upcalls(void *); 178 3448 dh155122 static void fill_route(struct mfc *, struct mfcctl *, ip_stack_t *); 179 3448 dh155122 static void free_queue(struct mfc *); 180 3448 dh155122 static int get_assert(uchar_t *, ip_stack_t *); 181 3448 dh155122 static int get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *); 182 3448 dh155122 static int get_sg_cnt(struct sioc_sg_req *, ip_stack_t *); 183 0 stevel static int get_version(uchar_t *); 184 3448 dh155122 static int get_vif_cnt(struct sioc_vif_req *, ip_stack_t *); 185 0 stevel static int ip_mdq(mblk_t *, ipha_t *, ill_t *, 186 0 stevel ipaddr_t, struct mfc *); 187 5240 nordmark static int ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *); 188 0 stevel static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 189 11042 Erik static int register_mforward(mblk_t *, ip_recv_attr_t *); 190 0 stevel static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t); 191 3448 dh155122 static int set_assert(int *, ip_stack_t *); 192 0 stevel 193 0 stevel /* 194 0 stevel * Token Bucket Filter functions 195 0 stevel */ 196 0 stevel static int priority(struct vif *, ipha_t *); 197 0 stevel static void tbf_control(struct vif *, mblk_t *, ipha_t *); 198 0 stevel static int tbf_dq_sel(struct vif *, ipha_t *); 199 0 stevel static void tbf_process_q(struct vif *); 200 0 stevel static void tbf_queue(struct vif *, mblk_t *); 201 0 stevel static void tbf_reprocess_q(void *); 202 0 stevel static void tbf_send_packet(struct vif *, mblk_t *); 203 0 stevel static void tbf_update_tokens(struct vif *); 204 0 stevel static void release_mfc(struct mfcb *); 205 0 stevel 206 3448 dh155122 static boolean_t is_mrouter_off(ip_stack_t *); 207 0 stevel /* 208 0 stevel * Encapsulation packets 209 0 stevel */ 210 0 stevel 211 0 stevel #define ENCAP_TTL 64 212 0 stevel 213 0 stevel /* prototype IP hdr for encapsulated packets */ 214 0 stevel static ipha_t multicast_encap_iphdr = { 215 0 stevel IP_SIMPLE_HDR_VERSION, 216 0 stevel 0, /* tos */ 217 0 stevel sizeof (ipha_t), /* total length */ 218 0 stevel 0, /* id */ 219 0 stevel 0, /* frag offset */ 220 0 stevel ENCAP_TTL, IPPROTO_ENCAP, 221 0 stevel 0, /* checksum */ 222 0 stevel }; 223 0 stevel 224 0 stevel /* 225 0 stevel * Rate limit for assert notification messages, in nsec. 226 0 stevel */ 227 0 stevel #define ASSERT_MSG_TIME 3000000000 228 0 stevel 229 0 stevel 230 0 stevel #define VIF_REFHOLD(vifp) { \ 231 0 stevel mutex_enter(&(vifp)->v_lock); \ 232 0 stevel (vifp)->v_refcnt++; \ 233 0 stevel mutex_exit(&(vifp)->v_lock); \ 234 0 stevel } 235 0 stevel 236 0 stevel #define VIF_REFRELE_LOCKED(vifp) { \ 237 0 stevel (vifp)->v_refcnt--; \ 238 0 stevel if ((vifp)->v_refcnt == 0 && \ 239 0 stevel ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 240 0 stevel del_vifp(vifp); \ 241 0 stevel } else { \ 242 0 stevel mutex_exit(&(vifp)->v_lock); \ 243 0 stevel } \ 244 0 stevel } 245 0 stevel 246 0 stevel #define VIF_REFRELE(vifp) { \ 247 0 stevel mutex_enter(&(vifp)->v_lock); \ 248 0 stevel (vifp)->v_refcnt--; \ 249 0 stevel if ((vifp)->v_refcnt == 0 && \ 250 0 stevel ((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \ 251 0 stevel del_vifp(vifp); \ 252 0 stevel } else { \ 253 0 stevel mutex_exit(&(vifp)->v_lock); \ 254 0 stevel } \ 255 0 stevel } 256 0 stevel 257 0 stevel #define MFCB_REFHOLD(mfcb) { \ 258 0 stevel mutex_enter(&(mfcb)->mfcb_lock); \ 259 0 stevel (mfcb)->mfcb_refcnt++; \ 260 0 stevel ASSERT((mfcb)->mfcb_refcnt != 0); \ 261 0 stevel mutex_exit(&(mfcb)->mfcb_lock); \ 262 0 stevel } 263 0 stevel 264 0 stevel #define MFCB_REFRELE(mfcb) { \ 265 0 stevel mutex_enter(&(mfcb)->mfcb_lock); \ 266 0 stevel ASSERT((mfcb)->mfcb_refcnt != 0); \ 267 0 stevel if (--(mfcb)->mfcb_refcnt == 0 && \ 268 0 stevel ((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \ 269 0 stevel release_mfc(mfcb); \ 270 0 stevel } \ 271 0 stevel mutex_exit(&(mfcb)->mfcb_lock); \ 272 0 stevel } 273 0 stevel 274 0 stevel /* 275 0 stevel * MFCFIND: 276 0 stevel * Find a route for a given origin IP address and multicast group address. 277 0 stevel * Skip entries with pending upcalls. 278 0 stevel * Type of service parameter to be added in the future! 279 0 stevel */ 280 0 stevel #define MFCFIND(mfcbp, o, g, rt) { \ 281 0 stevel struct mfc *_mb_rt = NULL; \ 282 0 stevel rt = NULL; \ 283 0 stevel _mb_rt = mfcbp->mfcb_mfc; \ 284 0 stevel while (_mb_rt) { \ 285 0 stevel if ((_mb_rt->mfc_origin.s_addr == o) && \ 286 0 stevel (_mb_rt->mfc_mcastgrp.s_addr == g) && \ 287 0 stevel (_mb_rt->mfc_rte == NULL) && \ 288 0 stevel (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \ 289 0 stevel rt = _mb_rt; \ 290 0 stevel break; \ 291 0 stevel } \ 292 0 stevel _mb_rt = _mb_rt->mfc_next; \ 293 0 stevel } \ 294 0 stevel } 295 0 stevel 296 0 stevel /* 297 0 stevel * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime() 298 0 stevel * are inefficient. We use gethrestime() which returns a timespec_t with 299 0 stevel * sec and nsec, the resolution is machine dependent. 300 0 stevel * The following 2 macros have been changed to use nsec instead of usec. 301 0 stevel */ 302 0 stevel /* 303 0 stevel * Macros to compute elapsed time efficiently. 304 0 stevel * Borrowed from Van Jacobson's scheduling code. 305 0 stevel * Delta should be a hrtime_t. 306 0 stevel */ 307 0 stevel #define TV_DELTA(a, b, delta) { \ 308 0 stevel int xxs; \ 309 0 stevel \ 310 0 stevel delta = (a).tv_nsec - (b).tv_nsec; \ 311 0 stevel if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 312 0 stevel switch (xxs) { \ 313 0 stevel case 2: \ 314 0 stevel delta += 1000000000; \ 315 0 stevel /*FALLTHROUGH*/ \ 316 0 stevel case 1: \ 317 0 stevel delta += 1000000000; \ 318 0 stevel break; \ 319 0 stevel default: \ 320 0 stevel delta += (1000000000 * xxs); \ 321 0 stevel } \ 322 0 stevel } \ 323 0 stevel } 324 0 stevel 325 0 stevel #define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \ 326 0 stevel (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 327 0 stevel 328 0 stevel /* 329 0 stevel * Handle MRT setsockopt commands to modify the multicast routing tables. 330 0 stevel */ 331 0 stevel int 332 11042 Erik ip_mrouter_set(int cmd, conn_t *connp, int checkonly, uchar_t *data, 333 11042 Erik int datalen) 334 0 stevel { 335 5240 nordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 336 3448 dh155122 337 3448 dh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 338 5240 nordmark if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) { 339 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 340 0 stevel return (EACCES); 341 0 stevel } 342 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 343 0 stevel 344 0 stevel if (checkonly) { 345 0 stevel /* 346 0 stevel * do not do operation, just pretend to - new T_CHECK 347 0 stevel * Note: Even routines further on can probably fail but 348 0 stevel * this T_CHECK stuff is only to please XTI so it not 349 0 stevel * necessary to be perfect. 350 0 stevel */ 351 0 stevel switch (cmd) { 352 0 stevel case MRT_INIT: 353 0 stevel case MRT_DONE: 354 0 stevel case MRT_ADD_VIF: 355 0 stevel case MRT_DEL_VIF: 356 0 stevel case MRT_ADD_MFC: 357 0 stevel case MRT_DEL_MFC: 358 0 stevel case MRT_ASSERT: 359 5240 nordmark return (0); 360 0 stevel default: 361 5240 nordmark return (EOPNOTSUPP); 362 0 stevel } 363 0 stevel } 364 0 stevel 365 0 stevel /* 366 0 stevel * make sure no command is issued after multicast routing has been 367 0 stevel * turned off. 368 0 stevel */ 369 0 stevel if (cmd != MRT_INIT && cmd != MRT_DONE) { 370 3448 dh155122 if (is_mrouter_off(ipst)) 371 0 stevel return (EINVAL); 372 0 stevel } 373 0 stevel 374 0 stevel switch (cmd) { 375 5240 nordmark case MRT_INIT: return (ip_mrouter_init(connp, data, datalen, ipst)); 376 11042 Erik case MRT_DONE: return (ip_mrouter_done(ipst)); 377 11042 Erik case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, connp, ipst)); 378 11042 Erik case MRT_DEL_VIF: return (del_vif((vifi_t *)data, ipst)); 379 3448 dh155122 case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data, ipst)); 380 3448 dh155122 case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data, ipst)); 381 3448 dh155122 case MRT_ASSERT: return (set_assert((int *)data, ipst)); 382 0 stevel default: return (EOPNOTSUPP); 383 0 stevel } 384 0 stevel } 385 0 stevel 386 0 stevel /* 387 0 stevel * Handle MRT getsockopt commands 388 0 stevel */ 389 0 stevel int 390 11042 Erik ip_mrouter_get(int cmd, conn_t *connp, uchar_t *data) 391 0 stevel { 392 5240 nordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 393 3448 dh155122 394 5240 nordmark if (connp != ipst->ips_ip_g_mrouter) 395 0 stevel return (EACCES); 396 0 stevel 397 0 stevel switch (cmd) { 398 0 stevel case MRT_VERSION: return (get_version((uchar_t *)data)); 399 3448 dh155122 case MRT_ASSERT: return (get_assert((uchar_t *)data, ipst)); 400 0 stevel default: return (EOPNOTSUPP); 401 0 stevel } 402 0 stevel } 403 0 stevel 404 0 stevel /* 405 0 stevel * Handle ioctl commands to obtain information from the cache. 406 0 stevel * Called with shared access to IP. These are read_only ioctls. 407 0 stevel */ 408 0 stevel /* ARGSUSED */ 409 0 stevel int 410 0 stevel mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, 411 0 stevel ip_ioctl_cmd_t *ipip, void *if_req) 412 0 stevel { 413 0 stevel mblk_t *mp1; 414 0 stevel struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 415 5240 nordmark conn_t *connp = Q_TO_CONN(q); 416 5240 nordmark ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 417 0 stevel 418 0 stevel /* Existence verified in ip_wput_nondata */ 419 0 stevel mp1 = mp->b_cont->b_cont; 420 0 stevel 421 0 stevel switch (iocp->ioc_cmd) { 422 0 stevel case (SIOCGETVIFCNT): 423 3448 dh155122 return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst)); 424 0 stevel case (SIOCGETSGCNT): 425 3448 dh155122 return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst)); 426 0 stevel case (SIOCGETLSGCNT): 427 3448 dh155122 return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst)); 428 0 stevel default: 429 0 stevel return (EINVAL); 430 0 stevel } 431 0 stevel } 432 0 stevel 433 0 stevel /* 434 0 stevel * Returns the packet, byte, rpf-failure count for the source, group provided. 435 0 stevel */ 436 0 stevel static int 437 3448 dh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst) 438 0 stevel { 439 0 stevel struct mfc *rt; 440 0 stevel struct mfcb *mfcbp; 441 0 stevel 442 3448 dh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)]; 443 0 stevel MFCB_REFHOLD(mfcbp); 444 0 stevel MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt); 445 0 stevel 446 0 stevel if (rt != NULL) { 447 0 stevel mutex_enter(&rt->mfc_mutex); 448 0 stevel req->pktcnt = rt->mfc_pkt_cnt; 449 0 stevel req->bytecnt = rt->mfc_byte_cnt; 450 0 stevel req->wrong_if = rt->mfc_wrong_if; 451 0 stevel mutex_exit(&rt->mfc_mutex); 452 0 stevel } else 453 0 stevel req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU; 454 0 stevel 455 0 stevel MFCB_REFRELE(mfcbp); 456 0 stevel return (0); 457 0 stevel } 458 0 stevel 459 0 stevel /* 460 0 stevel * Returns the packet, byte, rpf-failure count for the source, group provided. 461 0 stevel * Uses larger counters and IPv6 addresses. 462 0 stevel */ 463 0 stevel /* ARGSUSED XXX until implemented */ 464 0 stevel static int 465 3448 dh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst) 466 0 stevel { 467 0 stevel /* XXX TODO SIOCGETLSGCNT */ 468 0 stevel return (ENXIO); 469 0 stevel } 470 0 stevel 471 0 stevel /* 472 0 stevel * Returns the input and output packet and byte counts on the vif provided. 473 0 stevel */ 474 0 stevel static int 475 3448 dh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst) 476 0 stevel { 477 0 stevel vifi_t vifi = req->vifi; 478 0 stevel 479 3448 dh155122 if (vifi >= ipst->ips_numvifs) 480 0 stevel return (EINVAL); 481 0 stevel 482 0 stevel /* 483 0 stevel * No locks here, an approximation is fine. 484 0 stevel */ 485 3448 dh155122 req->icount = ipst->ips_vifs[vifi].v_pkt_in; 486 3448 dh155122 req->ocount = ipst->ips_vifs[vifi].v_pkt_out; 487 3448 dh155122 req->ibytes = ipst->ips_vifs[vifi].v_bytes_in; 488 3448 dh155122 req->obytes = ipst->ips_vifs[vifi].v_bytes_out; 489 0 stevel 490 0 stevel return (0); 491 0 stevel } 492 0 stevel 493 0 stevel static int 494 0 stevel get_version(uchar_t *data) 495 0 stevel { 496 0 stevel int *v = (int *)data; 497 0 stevel 498 0 stevel *v = 0x0305; /* XXX !!!! */ 499 0 stevel 500 0 stevel return (0); 501 0 stevel } 502 0 stevel 503 0 stevel /* 504 0 stevel * Set PIM assert processing global. 505 0 stevel */ 506 0 stevel static int 507 3448 dh155122 set_assert(int *i, ip_stack_t *ipst) 508 0 stevel { 509 0 stevel if ((*i != 1) && (*i != 0)) 510 0 stevel return (EINVAL); 511 0 stevel 512 3448 dh155122 ipst->ips_pim_assert = *i; 513 0 stevel 514 0 stevel return (0); 515 0 stevel } 516 0 stevel 517 0 stevel /* 518 0 stevel * Get PIM assert processing global. 519 0 stevel */ 520 0 stevel static int 521 3448 dh155122 get_assert(uchar_t *data, ip_stack_t *ipst) 522 0 stevel { 523 0 stevel int *i = (int *)data; 524 0 stevel 525 3448 dh155122 *i = ipst->ips_pim_assert; 526 0 stevel 527 0 stevel return (0); 528 0 stevel } 529 0 stevel 530 0 stevel /* 531 0 stevel * Enable multicast routing. 532 0 stevel */ 533 0 stevel static int 534 5240 nordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst) 535 0 stevel { 536 0 stevel int *v; 537 0 stevel 538 0 stevel if (data == NULL || (datalen != sizeof (int))) 539 0 stevel return (ENOPROTOOPT); 540 0 stevel 541 0 stevel v = (int *)data; 542 0 stevel if (*v != 1) 543 0 stevel return (ENOPROTOOPT); 544 0 stevel 545 3448 dh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 546 3448 dh155122 if (ipst->ips_ip_g_mrouter != NULL) { 547 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 548 0 stevel return (EADDRINUSE); 549 0 stevel } 550 0 stevel 551 5240 nordmark /* 552 5240 nordmark * MRT_INIT should only be allowed for RAW sockets, but we double 553 5240 nordmark * check. 554 5240 nordmark */ 555 5240 nordmark if (!IPCL_IS_RAWIP(connp)) { 556 5240 nordmark mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 557 5240 nordmark return (EINVAL); 558 5240 nordmark } 559 5240 nordmark 560 5240 nordmark ipst->ips_ip_g_mrouter = connp; 561 0 stevel connp->conn_multi_router = 1; 562 0 stevel /* In order for tunnels to work we have to turn ip_g_forward on */ 563 3448 dh155122 if (!WE_ARE_FORWARDING(ipst)) { 564 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 565 5240 nordmark (void) mi_strlog(connp->conn_rq, 1, SL_TRACE, 566 0 stevel "ip_mrouter_init: turning on forwarding"); 567 0 stevel } 568 3448 dh155122 ipst->ips_saved_ip_g_forward = ipst->ips_ip_g_forward; 569 3448 dh155122 ipst->ips_ip_g_forward = IP_FORWARD_ALWAYS; 570 0 stevel } 571 0 stevel 572 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 573 0 stevel return (0); 574 3448 dh155122 } 575 3448 dh155122 576 3448 dh155122 void 577 3448 dh155122 ip_mrouter_stack_init(ip_stack_t *ipst) 578 3448 dh155122 { 579 3448 dh155122 mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL); 580 3448 dh155122 581 3448 dh155122 ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1), 582 3448 dh155122 KM_SLEEP); 583 3448 dh155122 ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP); 584 3448 dh155122 /* 585 3448 dh155122 * mfctable: 586 3448 dh155122 * Includes all mfcs, including waiting upcalls. 587 3448 dh155122 * Multiple mfcs per bucket. 588 3448 dh155122 */ 589 3448 dh155122 ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ, 590 3448 dh155122 KM_SLEEP); 591 3448 dh155122 /* 592 3448 dh155122 * Define the token bucket filter structures. 593 3448 dh155122 * tbftable -> each vif has one of these for storing info. 594 3448 dh155122 */ 595 3448 dh155122 ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP); 596 3448 dh155122 597 3448 dh155122 mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL); 598 3448 dh155122 599 3448 dh155122 ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl); 600 3448 dh155122 ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl); 601 0 stevel } 602 0 stevel 603 0 stevel /* 604 0 stevel * Disable multicast routing. 605 0 stevel * Didn't use global timeout_val (BSD version), instead check the mfctable. 606 0 stevel */ 607 0 stevel int 608 11042 Erik ip_mrouter_done(ip_stack_t *ipst) 609 0 stevel { 610 5240 nordmark conn_t *mrouter; 611 0 stevel vifi_t vifi; 612 0 stevel struct mfc *mfc_rt; 613 0 stevel int i; 614 0 stevel 615 3448 dh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 616 3448 dh155122 if (ipst->ips_ip_g_mrouter == NULL) { 617 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 618 0 stevel return (EINVAL); 619 0 stevel } 620 0 stevel 621 5240 nordmark mrouter = ipst->ips_ip_g_mrouter; 622 0 stevel 623 3448 dh155122 if (ipst->ips_saved_ip_g_forward != -1) { 624 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 625 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 626 0 stevel "ip_mrouter_done: turning off forwarding"); 627 0 stevel } 628 3448 dh155122 ipst->ips_ip_g_forward = ipst->ips_saved_ip_g_forward; 629 3448 dh155122 ipst->ips_saved_ip_g_forward = -1; 630 0 stevel } 631 0 stevel 632 0 stevel /* 633 0 stevel * Always clear cache when vifs change. 634 3448 dh155122 * No need to get ipst->ips_last_encap_lock since we are running as 635 3448 dh155122 * a writer. 636 0 stevel */ 637 3448 dh155122 mutex_enter(&ipst->ips_last_encap_lock); 638 3448 dh155122 ipst->ips_last_encap_src = 0; 639 3448 dh155122 ipst->ips_last_encap_vif = NULL; 640 3448 dh155122 mutex_exit(&ipst->ips_last_encap_lock); 641 5240 nordmark mrouter->conn_multi_router = 0; 642 0 stevel 643 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 644 0 stevel 645 0 stevel /* 646 0 stevel * For each phyint in use, 647 0 stevel * disable promiscuous reception of all IP multicasts. 648 0 stevel */ 649 0 stevel for (vifi = 0; vifi < MAXVIFS; vifi++) { 650 3448 dh155122 struct vif *vifp = ipst->ips_vifs + vifi; 651 0 stevel 652 0 stevel mutex_enter(&vifp->v_lock); 653 0 stevel /* 654 0 stevel * if the vif is active mark it condemned. 655 0 stevel */ 656 0 stevel if (vifp->v_marks & VIF_MARK_GOOD) { 657 0 stevel ASSERT(vifp->v_ipif != NULL); 658 0 stevel ipif_refhold(vifp->v_ipif); 659 0 stevel /* Phyint only */ 660 0 stevel if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 661 0 stevel ipif_t *ipif = vifp->v_ipif; 662 11042 Erik ilm_t *ilm = vifp->v_ilm; 663 0 stevel 664 11042 Erik vifp->v_ilm = NULL; 665 11042 Erik vifp->v_marks &= ~VIF_MARK_GOOD; 666 11042 Erik vifp->v_marks |= VIF_MARK_CONDEMNED; 667 0 stevel 668 11042 Erik mutex_exit(&(vifp)->v_lock); 669 11042 Erik if (ilm != NULL) { 670 11042 Erik ill_t *ill = ipif->ipif_ill; 671 11042 Erik 672 11042 Erik (void) ip_delmulti(ilm); 673 11042 Erik ASSERT(ill->ill_mrouter_cnt > 0); 674 11042 Erik atomic_dec_32(&ill->ill_mrouter_cnt); 675 0 stevel } 676 0 stevel mutex_enter(&vifp->v_lock); 677 0 stevel } 678 10495 Erik ipif_refrele(vifp->v_ipif); 679 0 stevel /* 680 0 stevel * decreases the refcnt added in add_vif. 681 0 stevel * and release v_lock. 682 0 stevel */ 683 0 stevel VIF_REFRELE_LOCKED(vifp); 684 0 stevel } else { 685 0 stevel mutex_exit(&vifp->v_lock); 686 0 stevel continue; 687 0 stevel } 688 0 stevel } 689 0 stevel 690 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 691 3448 dh155122 ipst->ips_numvifs = 0; 692 3448 dh155122 ipst->ips_pim_assert = 0; 693 3448 dh155122 ipst->ips_reg_vif_num = ALL_VIFS; 694 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 695 0 stevel 696 0 stevel /* 697 0 stevel * Free upcall msgs. 698 0 stevel * Go through mfctable and stop any outstanding upcall 699 0 stevel * timeouts remaining on mfcs. 700 0 stevel */ 701 0 stevel for (i = 0; i < MFCTBLSIZ; i++) { 702 3448 dh155122 mutex_enter(&ipst->ips_mfcs[i].mfcb_lock); 703 3448 dh155122 ipst->ips_mfcs[i].mfcb_refcnt++; 704 3448 dh155122 ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED; 705 3448 dh155122 mutex_exit(&ipst->ips_mfcs[i].mfcb_lock); 706 3448 dh155122 mfc_rt = ipst->ips_mfcs[i].mfcb_mfc; 707 0 stevel while (mfc_rt) { 708 0 stevel /* Free upcalls */ 709 0 stevel mutex_enter(&mfc_rt->mfc_mutex); 710 0 stevel if (mfc_rt->mfc_rte != NULL) { 711 0 stevel if (mfc_rt->mfc_timeout_id != 0) { 712 0 stevel /* 713 0 stevel * OK to drop the lock as we have 714 0 stevel * a refcnt on the bucket. timeout 715 0 stevel * can fire but it will see that 716 0 stevel * mfc_timeout_id == 0 and not do 717 0 stevel * anything. see expire_upcalls(). 718 0 stevel */ 719 0 stevel mfc_rt->mfc_timeout_id = 0; 720 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 721 0 stevel (void) untimeout( 722 0 stevel mfc_rt->mfc_timeout_id); 723 0 stevel mfc_rt->mfc_timeout_id = 0; 724 0 stevel mutex_enter(&mfc_rt->mfc_mutex); 725 0 stevel 726 0 stevel /* 727 0 stevel * all queued upcall packets 728 0 stevel * and mblk will be freed in 729 0 stevel * release_mfc(). 730 0 stevel */ 731 0 stevel } 732 0 stevel } 733 0 stevel 734 0 stevel mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 735 0 stevel 736 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 737 0 stevel mfc_rt = mfc_rt->mfc_next; 738 0 stevel } 739 3448 dh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 740 0 stevel } 741 0 stevel 742 3448 dh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 743 3448 dh155122 ipst->ips_ip_g_mrouter = NULL; 744 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 745 0 stevel return (0); 746 0 stevel } 747 0 stevel 748 3448 dh155122 void 749 3448 dh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst) 750 3448 dh155122 { 751 3448 dh155122 struct mfcb *mfcbp; 752 3448 dh155122 struct mfc *rt; 753 3448 dh155122 int i; 754 3448 dh155122 755 3448 dh155122 for (i = 0; i < MFCTBLSIZ; i++) { 756 3448 dh155122 mfcbp = &ipst->ips_mfcs[i]; 757 3448 dh155122 758 3448 dh155122 while ((rt = mfcbp->mfcb_mfc) != NULL) { 759 3448 dh155122 (void) printf("ip_mrouter_stack_destroy: free for %d\n", 760 3448 dh155122 i); 761 3448 dh155122 762 3448 dh155122 mfcbp->mfcb_mfc = rt->mfc_next; 763 3448 dh155122 free_queue(rt); 764 3448 dh155122 mi_free(rt); 765 3448 dh155122 } 766 3448 dh155122 } 767 3448 dh155122 kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1)); 768 3448 dh155122 ipst->ips_vifs = NULL; 769 3448 dh155122 kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat)); 770 3448 dh155122 ipst->ips_mrtstat = NULL; 771 3448 dh155122 kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ); 772 3448 dh155122 ipst->ips_mfcs = NULL; 773 3448 dh155122 kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS); 774 3448 dh155122 ipst->ips_tbfs = NULL; 775 3448 dh155122 776 3448 dh155122 mutex_destroy(&ipst->ips_last_encap_lock); 777 3448 dh155122 mutex_destroy(&ipst->ips_ip_g_mrouter_mutex); 778 3448 dh155122 } 779 3448 dh155122 780 0 stevel static boolean_t 781 3448 dh155122 is_mrouter_off(ip_stack_t *ipst) 782 0 stevel { 783 5240 nordmark conn_t *mrouter; 784 0 stevel 785 3448 dh155122 mutex_enter(&ipst->ips_ip_g_mrouter_mutex); 786 3448 dh155122 if (ipst->ips_ip_g_mrouter == NULL) { 787 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 788 0 stevel return (B_TRUE); 789 0 stevel } 790 0 stevel 791 5240 nordmark mrouter = ipst->ips_ip_g_mrouter; 792 5240 nordmark if (mrouter->conn_multi_router == 0) { 793 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 794 0 stevel return (B_TRUE); 795 0 stevel } 796 3448 dh155122 mutex_exit(&ipst->ips_ip_g_mrouter_mutex); 797 0 stevel return (B_FALSE); 798 0 stevel } 799 0 stevel 800 0 stevel static void 801 0 stevel unlock_good_vif(struct vif *vifp) 802 0 stevel { 803 0 stevel ASSERT(vifp->v_ipif != NULL); 804 0 stevel ipif_refrele(vifp->v_ipif); 805 0 stevel VIF_REFRELE(vifp); 806 0 stevel } 807 0 stevel 808 0 stevel static boolean_t 809 0 stevel lock_good_vif(struct vif *vifp) 810 0 stevel { 811 0 stevel mutex_enter(&vifp->v_lock); 812 0 stevel if (!(vifp->v_marks & VIF_MARK_GOOD)) { 813 0 stevel mutex_exit(&vifp->v_lock); 814 0 stevel return (B_FALSE); 815 0 stevel } 816 0 stevel 817 0 stevel ASSERT(vifp->v_ipif != NULL); 818 0 stevel mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock); 819 0 stevel if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) { 820 0 stevel mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 821 0 stevel mutex_exit(&vifp->v_lock); 822 0 stevel return (B_FALSE); 823 0 stevel } 824 0 stevel ipif_refhold_locked(vifp->v_ipif); 825 0 stevel mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock); 826 0 stevel vifp->v_refcnt++; 827 0 stevel mutex_exit(&vifp->v_lock); 828 0 stevel return (B_TRUE); 829 0 stevel } 830 0 stevel 831 0 stevel /* 832 0 stevel * Add a vif to the vif table. 833 0 stevel */ 834 0 stevel static int 835 11042 Erik add_vif(struct vifctl *vifcp, conn_t *connp, ip_stack_t *ipst) 836 0 stevel { 837 3448 dh155122 struct vif *vifp = ipst->ips_vifs + vifcp->vifc_vifi; 838 0 stevel ipif_t *ipif; 839 11042 Erik int error = 0; 840 3448 dh155122 struct tbf *v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi; 841 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 842 11042 Erik ilm_t *ilm; 843 11042 Erik ill_t *ill; 844 0 stevel 845 0 stevel ASSERT(connp != NULL); 846 0 stevel 847 0 stevel if (vifcp->vifc_vifi >= MAXVIFS) 848 0 stevel return (EINVAL); 849 0 stevel 850 3448 dh155122 if (is_mrouter_off(ipst)) 851 0 stevel return (EINVAL); 852 0 stevel 853 0 stevel mutex_enter(&vifp->v_lock); 854 0 stevel /* 855 0 stevel * Viftable entry should be 0. 856 0 stevel * if v_marks == 0 but v_refcnt != 0 means struct is being 857 0 stevel * initialized. 858 0 stevel * 859 0 stevel * Also note that it is very unlikely that we will get a MRT_ADD_VIF 860 0 stevel * request while the delete is in progress, mrouted only sends add 861 0 stevel * requests when a new interface is added and the new interface cannot 862 0 stevel * have the same vifi as an existing interface. We make sure that 863 0 stevel * ill_delete will block till the vif is deleted by adding a refcnt 864 0 stevel * to ipif in del_vif(). 865 0 stevel */ 866 0 stevel if (vifp->v_lcl_addr.s_addr != 0 || 867 0 stevel vifp->v_marks != 0 || 868 0 stevel vifp->v_refcnt != 0) { 869 0 stevel mutex_exit(&vifp->v_lock); 870 0 stevel return (EADDRINUSE); 871 0 stevel } 872 0 stevel 873 0 stevel /* Incoming vif should not be 0 */ 874 0 stevel if (vifcp->vifc_lcl_addr.s_addr == 0) { 875 0 stevel mutex_exit(&vifp->v_lock); 876 0 stevel return (EINVAL); 877 0 stevel } 878 0 stevel 879 0 stevel vifp->v_refcnt++; 880 0 stevel mutex_exit(&vifp->v_lock); 881 0 stevel /* Find the interface with the local address */ 882 0 stevel ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL, 883 11042 Erik IPCL_ZONEID(connp), ipst); 884 0 stevel if (ipif == NULL) { 885 0 stevel VIF_REFRELE(vifp); 886 0 stevel return (EADDRNOTAVAIL); 887 0 stevel } 888 0 stevel 889 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 890 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 891 0 stevel "add_vif: src 0x%x enter", 892 0 stevel vifcp->vifc_lcl_addr.s_addr); 893 0 stevel } 894 0 stevel 895 0 stevel mutex_enter(&vifp->v_lock); 896 0 stevel /* 897 0 stevel * Always clear cache when vifs change. 898 0 stevel * Needed to ensure that src isn't left over from before vif was added. 899 0 stevel * No need to get last_encap_lock, since we are running as a writer. 900 0 stevel */ 901 0 stevel 902 3448 dh155122 mutex_enter(&ipst->ips_last_encap_lock); 903 3448 dh155122 ipst->ips_last_encap_src = 0; 904 3448 dh155122 ipst->ips_last_encap_vif = NULL; 905 3448 dh155122 mutex_exit(&ipst->ips_last_encap_lock); 906 0 stevel 907 0 stevel if (vifcp->vifc_flags & VIFF_TUNNEL) { 908 0 stevel if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) { 909 0 stevel cmn_err(CE_WARN, 910 0 stevel "add_vif: source route tunnels not supported\n"); 911 0 stevel VIF_REFRELE_LOCKED(vifp); 912 0 stevel ipif_refrele(ipif); 913 0 stevel return (EOPNOTSUPP); 914 0 stevel } 915 0 stevel vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 916 0 stevel 917 0 stevel } else { 918 0 stevel /* Phyint or Register vif */ 919 0 stevel if (vifcp->vifc_flags & VIFF_REGISTER) { 920 0 stevel /* 921 0 stevel * Note: Since all IPPROTO_IP level options (including 922 0 stevel * MRT_ADD_VIF) are done exclusively via 923 0 stevel * ip_optmgmt_writer(), a lock is not necessary to 924 0 stevel * protect reg_vif_num. 925 0 stevel */ 926 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 927 3448 dh155122 if (ipst->ips_reg_vif_num == ALL_VIFS) { 928 3448 dh155122 ipst->ips_reg_vif_num = vifcp->vifc_vifi; 929 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 930 0 stevel } else { 931 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 932 0 stevel VIF_REFRELE_LOCKED(vifp); 933 0 stevel ipif_refrele(ipif); 934 0 stevel return (EADDRINUSE); 935 0 stevel } 936 0 stevel } 937 0 stevel 938 0 stevel /* Make sure the interface supports multicast */ 939 0 stevel if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) { 940 0 stevel VIF_REFRELE_LOCKED(vifp); 941 0 stevel ipif_refrele(ipif); 942 0 stevel if (vifcp->vifc_flags & VIFF_REGISTER) { 943 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 944 3448 dh155122 ipst->ips_reg_vif_num = ALL_VIFS; 945 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 946 0 stevel } 947 0 stevel return (EOPNOTSUPP); 948 0 stevel } 949 0 stevel /* Enable promiscuous reception of all IP mcasts from the if */ 950 0 stevel mutex_exit(&vifp->v_lock); 951 11042 Erik 952 11042 Erik ill = ipif->ipif_ill; 953 11042 Erik if (IS_UNDER_IPMP(ill)) 954 11042 Erik ill = ipmp_ill_hold_ipmp_ill(ill); 955 11042 Erik 956 11042 Erik if (ill == NULL) { 957 11042 Erik ilm = NULL; 958 11042 Erik } else { 959 11042 Erik ilm = ip_addmulti(&ipv6_all_zeros, ill, 960 11042 Erik ipif->ipif_zoneid, &error); 961 11042 Erik if (ilm != NULL) 962 11042 Erik atomic_inc_32(&ill->ill_mrouter_cnt); 963 11042 Erik if (IS_UNDER_IPMP(ipif->ipif_ill)) { 964 11042 Erik ill_refrele(ill); 965 11042 Erik ill = ipif->ipif_ill; 966 11042 Erik } 967 11042 Erik } 968 11042 Erik 969 0 stevel mutex_enter(&vifp->v_lock); 970 0 stevel /* 971 0 stevel * since we released the lock lets make sure that 972 0 stevel * ip_mrouter_done() has not been called. 973 0 stevel */ 974 11042 Erik if (ilm == NULL || is_mrouter_off(ipst)) { 975 11042 Erik if (ilm != NULL) { 976 11042 Erik (void) ip_delmulti(ilm); 977 11042 Erik ASSERT(ill->ill_mrouter_cnt > 0); 978 11042 Erik atomic_dec_32(&ill->ill_mrouter_cnt); 979 11042 Erik } 980 0 stevel if (vifcp->vifc_flags & VIFF_REGISTER) { 981 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 982 3448 dh155122 ipst->ips_reg_vif_num = ALL_VIFS; 983 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 984 0 stevel } 985 0 stevel VIF_REFRELE_LOCKED(vifp); 986 0 stevel ipif_refrele(ipif); 987 0 stevel return (error?error:EINVAL); 988 0 stevel } 989 11042 Erik vifp->v_ilm = ilm; 990 0 stevel } 991 0 stevel /* Define parameters for the tbf structure */ 992 0 stevel vifp->v_tbf = v_tbf; 993 0 stevel gethrestime(&vifp->v_tbf->tbf_last_pkt_t); 994 0 stevel vifp->v_tbf->tbf_n_tok = 0; 995 0 stevel vifp->v_tbf->tbf_q_len = 0; 996 0 stevel vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 997 0 stevel vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 998 0 stevel 999 0 stevel vifp->v_flags = vifcp->vifc_flags; 1000 0 stevel vifp->v_threshold = vifcp->vifc_threshold; 1001 0 stevel vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 1002 0 stevel vifp->v_ipif = ipif; 1003 0 stevel ipif_refrele(ipif); 1004 0 stevel /* Scaling up here, allows division by 1024 in critical code. */ 1005 0 stevel vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000); 1006 0 stevel vifp->v_timeout_id = 0; 1007 0 stevel /* initialize per vif pkt counters */ 1008 0 stevel vifp->v_pkt_in = 0; 1009 0 stevel vifp->v_pkt_out = 0; 1010 0 stevel vifp->v_bytes_in = 0; 1011 0 stevel vifp->v_bytes_out = 0; 1012 0 stevel mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL); 1013 0 stevel 1014 0 stevel /* Adjust numvifs up, if the vifi is higher than numvifs */ 1015 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1016 3448 dh155122 if (ipst->ips_numvifs <= vifcp->vifc_vifi) 1017 3448 dh155122 ipst->ips_numvifs = vifcp->vifc_vifi + 1; 1018 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 1019 0 stevel 1020 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1021 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1022 0 stevel "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d", 1023 0 stevel vifcp->vifc_vifi, 1024 0 stevel ntohl(vifcp->vifc_lcl_addr.s_addr), 1025 0 stevel (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 1026 0 stevel ntohl(vifcp->vifc_rmt_addr.s_addr), 1027 0 stevel vifcp->vifc_threshold, vifcp->vifc_rate_limit); 1028 0 stevel } 1029 0 stevel 1030 0 stevel vifp->v_marks = VIF_MARK_GOOD; 1031 0 stevel mutex_exit(&vifp->v_lock); 1032 0 stevel return (0); 1033 0 stevel } 1034 0 stevel 1035 0 stevel 1036 0 stevel /* Delete a vif from the vif table. */ 1037 0 stevel static void 1038 0 stevel del_vifp(struct vif *vifp) 1039 0 stevel { 1040 0 stevel struct tbf *t = vifp->v_tbf; 1041 0 stevel mblk_t *mp0; 1042 0 stevel vifi_t vifi; 1043 3448 dh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 1044 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 1045 0 stevel 1046 0 stevel ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED); 1047 0 stevel ASSERT(t != NULL); 1048 0 stevel 1049 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1050 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1051 0 stevel "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr); 1052 0 stevel } 1053 0 stevel 1054 0 stevel if (vifp->v_timeout_id != 0) { 1055 0 stevel (void) untimeout(vifp->v_timeout_id); 1056 0 stevel vifp->v_timeout_id = 0; 1057 0 stevel } 1058 0 stevel 1059 0 stevel /* 1060 0 stevel * Free packets queued at the interface. 1061 0 stevel * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc. 1062 0 stevel */ 1063 0 stevel mutex_enter(&t->tbf_lock); 1064 0 stevel while (t->tbf_q != NULL) { 1065 0 stevel mp0 = t->tbf_q; 1066 0 stevel t->tbf_q = t->tbf_q->b_next; 1067 0 stevel mp0->b_prev = mp0->b_next = NULL; 1068 0 stevel freemsg(mp0); 1069 0 stevel } 1070 0 stevel mutex_exit(&t->tbf_lock); 1071 0 stevel 1072 0 stevel /* 1073 0 stevel * Always clear cache when vifs change. 1074 0 stevel * No need to get last_encap_lock since we are running as a writer. 1075 0 stevel */ 1076 3448 dh155122 mutex_enter(&ipst->ips_last_encap_lock); 1077 3448 dh155122 if (vifp == ipst->ips_last_encap_vif) { 1078 3448 dh155122 ipst->ips_last_encap_vif = NULL; 1079 3448 dh155122 ipst->ips_last_encap_src = 0; 1080 0 stevel } 1081 3448 dh155122 mutex_exit(&ipst->ips_last_encap_lock); 1082 0 stevel 1083 0 stevel mutex_destroy(&t->tbf_lock); 1084 0 stevel 1085 0 stevel bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf))); 1086 0 stevel 1087 0 stevel /* Adjust numvifs down */ 1088 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1089 3448 dh155122 for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */ 1090 3448 dh155122 if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0) 1091 0 stevel break; 1092 3448 dh155122 ipst->ips_numvifs = vifi; 1093 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 1094 0 stevel 1095 0 stevel bzero(vifp, sizeof (*vifp)); 1096 0 stevel } 1097 0 stevel 1098 0 stevel static int 1099 11042 Erik del_vif(vifi_t *vifip, ip_stack_t *ipst) 1100 0 stevel { 1101 3448 dh155122 struct vif *vifp = ipst->ips_vifs + *vifip; 1102 0 stevel 1103 3448 dh155122 if (*vifip >= ipst->ips_numvifs) 1104 0 stevel return (EINVAL); 1105 0 stevel 1106 0 stevel mutex_enter(&vifp->v_lock); 1107 0 stevel /* 1108 0 stevel * Not initialized 1109 0 stevel * Here we are not looking at the vif that is being initialized 1110 0 stevel * i.e vifp->v_marks == 0 and refcnt > 0. 1111 0 stevel */ 1112 0 stevel if (vifp->v_lcl_addr.s_addr == 0 || 1113 0 stevel !(vifp->v_marks & VIF_MARK_GOOD)) { 1114 0 stevel mutex_exit(&vifp->v_lock); 1115 0 stevel return (EADDRNOTAVAIL); 1116 0 stevel } 1117 0 stevel 1118 0 stevel /* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */ 1119 0 stevel vifp->v_marks &= ~VIF_MARK_GOOD; 1120 0 stevel vifp->v_marks |= VIF_MARK_CONDEMNED; 1121 0 stevel 1122 0 stevel /* Phyint only */ 1123 0 stevel if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 1124 0 stevel ipif_t *ipif = vifp->v_ipif; 1125 11042 Erik ilm_t *ilm = vifp->v_ilm; 1126 11042 Erik 1127 11042 Erik vifp->v_ilm = NULL; 1128 11042 Erik 1129 0 stevel ASSERT(ipif != NULL); 1130 0 stevel /* 1131 0 stevel * should be OK to drop the lock as we 1132 0 stevel * have marked this as CONDEMNED. 1133 0 stevel */ 1134 0 stevel mutex_exit(&(vifp)->v_lock); 1135 11042 Erik if (ilm != NULL) { 1136 11042 Erik (void) ip_delmulti(ilm); 1137 11042 Erik ASSERT(ipif->ipif_ill->ill_mrouter_cnt > 0); 1138 11042 Erik atomic_dec_32(&ipif->ipif_ill->ill_mrouter_cnt); 1139 11042 Erik } 1140 0 stevel mutex_enter(&(vifp)->v_lock); 1141 11042 Erik } 1142 11042 Erik 1143 11042 Erik if (vifp->v_flags & VIFF_REGISTER) { 1144 11042 Erik mutex_enter(&ipst->ips_numvifs_mutex); 1145 11042 Erik ipst->ips_reg_vif_num = ALL_VIFS; 1146 11042 Erik mutex_exit(&ipst->ips_numvifs_mutex); 1147 0 stevel } 1148 0 stevel 1149 0 stevel /* 1150 0 stevel * decreases the refcnt added in add_vif. 1151 0 stevel */ 1152 0 stevel VIF_REFRELE_LOCKED(vifp); 1153 0 stevel return (0); 1154 0 stevel } 1155 0 stevel 1156 0 stevel /* 1157 0 stevel * Add an mfc entry. 1158 0 stevel */ 1159 0 stevel static int 1160 3448 dh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) 1161 0 stevel { 1162 0 stevel struct mfc *rt; 1163 0 stevel struct rtdetq *rte; 1164 0 stevel ushort_t nstl; 1165 0 stevel int i; 1166 0 stevel struct mfcb *mfcbp; 1167 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 1168 0 stevel 1169 0 stevel /* 1170 0 stevel * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted 1171 0 stevel * did not have a real route for pkt. 1172 0 stevel * We want this pkt without rt installed in the mfctable to prevent 1173 0 stevel * multiiple tries, so go ahead and put it in mfctable, it will 1174 0 stevel * be discarded later in ip_mdq() because the child is NULL. 1175 0 stevel */ 1176 0 stevel 1177 0 stevel /* Error checking, out of bounds? */ 1178 0 stevel if (mfccp->mfcc_parent > MAXVIFS) { 1179 0 stevel ip0dbg(("ADD_MFC: mfcc_parent out of range %d", 1180 0 stevel (int)mfccp->mfcc_parent)); 1181 0 stevel return (EINVAL); 1182 0 stevel } 1183 0 stevel 1184 0 stevel if ((mfccp->mfcc_parent != NO_VIF) && 1185 3448 dh155122 (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) { 1186 0 stevel ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n", 1187 0 stevel (int)mfccp->mfcc_parent)); 1188 0 stevel return (EINVAL); 1189 0 stevel } 1190 0 stevel 1191 3448 dh155122 if (is_mrouter_off(ipst)) { 1192 0 stevel return (EINVAL); 1193 0 stevel } 1194 0 stevel 1195 3448 dh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr, 1196 0 stevel mfccp->mfcc_mcastgrp.s_addr)]; 1197 0 stevel MFCB_REFHOLD(mfcbp); 1198 0 stevel MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr, 1199 0 stevel mfccp->mfcc_mcastgrp.s_addr, rt); 1200 0 stevel 1201 0 stevel /* If an entry already exists, just update the fields */ 1202 0 stevel if (rt) { 1203 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1204 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1205 0 stevel "add_mfc: update o %x grp %x parent %x", 1206 0 stevel ntohl(mfccp->mfcc_origin.s_addr), 1207 0 stevel ntohl(mfccp->mfcc_mcastgrp.s_addr), 1208 0 stevel mfccp->mfcc_parent); 1209 0 stevel } 1210 0 stevel mutex_enter(&rt->mfc_mutex); 1211 0 stevel rt->mfc_parent = mfccp->mfcc_parent; 1212 0 stevel 1213 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1214 3448 dh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) 1215 0 stevel rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1216 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 1217 0 stevel mutex_exit(&rt->mfc_mutex); 1218 0 stevel 1219 0 stevel MFCB_REFRELE(mfcbp); 1220 0 stevel return (0); 1221 0 stevel } 1222 0 stevel 1223 0 stevel /* 1224 0 stevel * Find the entry for which the upcall was made and update. 1225 0 stevel */ 1226 0 stevel for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) { 1227 0 stevel mutex_enter(&rt->mfc_mutex); 1228 0 stevel if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 1229 0 stevel (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 1230 0 stevel (rt->mfc_rte != NULL) && 1231 0 stevel !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 1232 0 stevel if (nstl++ != 0) 1233 0 stevel cmn_err(CE_WARN, 1234 0 stevel "add_mfc: %s o %x g %x p %x", 1235 0 stevel "multiple kernel entries", 1236 0 stevel ntohl(mfccp->mfcc_origin.s_addr), 1237 0 stevel ntohl(mfccp->mfcc_mcastgrp.s_addr), 1238 0 stevel mfccp->mfcc_parent); 1239 0 stevel 1240 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1241 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, 1242 3448 dh155122 SL_TRACE, 1243 0 stevel "add_mfc: o %x g %x p %x", 1244 0 stevel ntohl(mfccp->mfcc_origin.s_addr), 1245 0 stevel ntohl(mfccp->mfcc_mcastgrp.s_addr), 1246 0 stevel mfccp->mfcc_parent); 1247 0 stevel } 1248 3448 dh155122 fill_route(rt, mfccp, ipst); 1249 0 stevel 1250 0 stevel /* 1251 0 stevel * Prevent cleanup of cache entry. 1252 0 stevel * Timer starts in ip_mforward. 1253 0 stevel */ 1254 0 stevel if (rt->mfc_timeout_id != 0) { 1255 0 stevel timeout_id_t id; 1256 0 stevel id = rt->mfc_timeout_id; 1257 0 stevel /* 1258 0 stevel * setting id to zero will avoid this 1259 0 stevel * entry from being cleaned up in 1260 0 stevel * expire_up_calls(). 1261 0 stevel */ 1262 0 stevel rt->mfc_timeout_id = 0; 1263 0 stevel /* 1264 0 stevel * dropping the lock is fine as we 1265 0 stevel * have a refhold on the bucket. 1266 0 stevel * so mfc cannot be freed. 1267 0 stevel * The timeout can fire but it will see 1268 0 stevel * that mfc_timeout_id == 0 and not cleanup. 1269 0 stevel */ 1270 0 stevel mutex_exit(&rt->mfc_mutex); 1271 0 stevel (void) untimeout(id); 1272 0 stevel mutex_enter(&rt->mfc_mutex); 1273 0 stevel } 1274 0 stevel 1275 0 stevel /* 1276 0 stevel * Send all pkts that are queued waiting for the upcall. 1277 0 stevel * ip_mdq param tun set to 0 - 1278 0 stevel * the return value of ip_mdq() isn't used here, 1279 0 stevel * so value we send doesn't matter. 1280 0 stevel */ 1281 0 stevel while (rt->mfc_rte != NULL) { 1282 0 stevel rte = rt->mfc_rte; 1283 0 stevel rt->mfc_rte = rte->rte_next; 1284 0 stevel mutex_exit(&rt->mfc_mutex); 1285 0 stevel (void) ip_mdq(rte->mp, (ipha_t *) 1286 0 stevel rte->mp->b_rptr, rte->ill, 0, rt); 1287 0 stevel freemsg(rte->mp); 1288 0 stevel mi_free((char *)rte); 1289 0 stevel mutex_enter(&rt->mfc_mutex); 1290 0 stevel } 1291 0 stevel } 1292 0 stevel mutex_exit(&rt->mfc_mutex); 1293 0 stevel } 1294 0 stevel 1295 0 stevel 1296 0 stevel /* 1297 0 stevel * It is possible that an entry is being inserted without an upcall 1298 0 stevel */ 1299 0 stevel if (nstl == 0) { 1300 0 stevel mutex_enter(&(mfcbp->mfcb_lock)); 1301 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1302 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1303 0 stevel "add_mfc: no upcall o %x g %x p %x", 1304 0 stevel ntohl(mfccp->mfcc_origin.s_addr), 1305 0 stevel ntohl(mfccp->mfcc_mcastgrp.s_addr), 1306 0 stevel mfccp->mfcc_parent); 1307 0 stevel } 1308 3448 dh155122 if (is_mrouter_off(ipst)) { 1309 0 stevel mutex_exit(&mfcbp->mfcb_lock); 1310 0 stevel MFCB_REFRELE(mfcbp); 1311 0 stevel return (EINVAL); 1312 0 stevel } 1313 0 stevel 1314 0 stevel for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) { 1315 0 stevel 1316 0 stevel mutex_enter(&rt->mfc_mutex); 1317 0 stevel if ((rt->mfc_origin.s_addr == 1318 0 stevel mfccp->mfcc_origin.s_addr) && 1319 0 stevel (rt->mfc_mcastgrp.s_addr == 1320 5240 nordmark mfccp->mfcc_mcastgrp.s_addr) && 1321 5240 nordmark (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) { 1322 3448 dh155122 fill_route(rt, mfccp, ipst); 1323 0 stevel mutex_exit(&rt->mfc_mutex); 1324 0 stevel break; 1325 0 stevel } 1326 0 stevel mutex_exit(&rt->mfc_mutex); 1327 0 stevel } 1328 0 stevel 1329 0 stevel /* No upcall, so make a new entry into mfctable */ 1330 0 stevel if (rt == NULL) { 1331 0 stevel rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 1332 0 stevel if (rt == NULL) { 1333 0 stevel ip1dbg(("add_mfc: out of memory\n")); 1334 0 stevel mutex_exit(&mfcbp->mfcb_lock); 1335 0 stevel MFCB_REFRELE(mfcbp); 1336 0 stevel return (ENOBUFS); 1337 0 stevel } 1338 0 stevel 1339 0 stevel /* Insert new entry at head of hash chain */ 1340 0 stevel mutex_enter(&rt->mfc_mutex); 1341 3448 dh155122 fill_route(rt, mfccp, ipst); 1342 0 stevel 1343 0 stevel /* Link into table */ 1344 0 stevel rt->mfc_next = mfcbp->mfcb_mfc; 1345 0 stevel mfcbp->mfcb_mfc = rt; 1346 0 stevel mutex_exit(&rt->mfc_mutex); 1347 0 stevel } 1348 0 stevel mutex_exit(&mfcbp->mfcb_lock); 1349 0 stevel } 1350 0 stevel 1351 0 stevel MFCB_REFRELE(mfcbp); 1352 0 stevel return (0); 1353 0 stevel } 1354 0 stevel 1355 0 stevel /* 1356 0 stevel * Fills in mfc structure from mrouted mfcctl. 1357 0 stevel */ 1358 0 stevel static void 1359 3448 dh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst) 1360 0 stevel { 1361 0 stevel int i; 1362 0 stevel 1363 0 stevel rt->mfc_origin = mfccp->mfcc_origin; 1364 0 stevel rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 1365 0 stevel rt->mfc_parent = mfccp->mfcc_parent; 1366 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1367 3448 dh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) { 1368 0 stevel rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1369 0 stevel } 1370 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 1371 0 stevel /* Initialize pkt counters per src-grp */ 1372 0 stevel rt->mfc_pkt_cnt = 0; 1373 0 stevel rt->mfc_byte_cnt = 0; 1374 0 stevel rt->mfc_wrong_if = 0; 1375 0 stevel rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0; 1376 0 stevel 1377 0 stevel } 1378 0 stevel 1379 0 stevel static void 1380 0 stevel free_queue(struct mfc *mfcp) 1381 0 stevel { 1382 0 stevel struct rtdetq *rte0; 1383 0 stevel 1384 0 stevel /* 1385 0 stevel * Drop all queued upcall packets. 1386 0 stevel * Free the mbuf with the pkt. 1387 0 stevel */ 1388 0 stevel while ((rte0 = mfcp->mfc_rte) != NULL) { 1389 0 stevel mfcp->mfc_rte = rte0->rte_next; 1390 0 stevel freemsg(rte0->mp); 1391 0 stevel mi_free((char *)rte0); 1392 0 stevel } 1393 0 stevel } 1394 0 stevel /* 1395 0 stevel * go thorugh the hash bucket and free all the entries marked condemned. 1396 0 stevel */ 1397 0 stevel void 1398 0 stevel release_mfc(struct mfcb *mfcbp) 1399 0 stevel { 1400 0 stevel struct mfc *current_mfcp; 1401 0 stevel struct mfc *prev_mfcp; 1402 0 stevel 1403 0 stevel prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 1404 0 stevel 1405 0 stevel while (current_mfcp != NULL) { 1406 0 stevel if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) { 1407 0 stevel if (current_mfcp == mfcbp->mfcb_mfc) { 1408 0 stevel mfcbp->mfcb_mfc = current_mfcp->mfc_next; 1409 0 stevel free_queue(current_mfcp); 1410 0 stevel mi_free(current_mfcp); 1411 0 stevel prev_mfcp = current_mfcp = mfcbp->mfcb_mfc; 1412 0 stevel continue; 1413 0 stevel } 1414 0 stevel ASSERT(prev_mfcp != NULL); 1415 0 stevel prev_mfcp->mfc_next = current_mfcp->mfc_next; 1416 0 stevel free_queue(current_mfcp); 1417 0 stevel mi_free(current_mfcp); 1418 0 stevel current_mfcp = NULL; 1419 0 stevel } else { 1420 0 stevel prev_mfcp = current_mfcp; 1421 0 stevel } 1422 0 stevel 1423 0 stevel current_mfcp = prev_mfcp->mfc_next; 1424 0 stevel 1425 0 stevel } 1426 0 stevel mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED; 1427 0 stevel ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0); 1428 0 stevel } 1429 0 stevel 1430 0 stevel /* 1431 0 stevel * Delete an mfc entry. 1432 0 stevel */ 1433 0 stevel static int 1434 3448 dh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst) 1435 0 stevel { 1436 0 stevel struct in_addr origin; 1437 0 stevel struct in_addr mcastgrp; 1438 5240 nordmark struct mfc *rt; 1439 5240 nordmark uint_t hash; 1440 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 1441 0 stevel 1442 0 stevel origin = mfccp->mfcc_origin; 1443 0 stevel mcastgrp = mfccp->mfcc_mcastgrp; 1444 0 stevel hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 1445 0 stevel 1446 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1447 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1448 0 stevel "del_mfc: o %x g %x", 1449 0 stevel ntohl(origin.s_addr), 1450 0 stevel ntohl(mcastgrp.s_addr)); 1451 0 stevel } 1452 0 stevel 1453 3448 dh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]); 1454 0 stevel 1455 0 stevel /* Find mfc in mfctable, finds only entries without upcalls */ 1456 3448 dh155122 for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) { 1457 0 stevel mutex_enter(&rt->mfc_mutex); 1458 0 stevel if (origin.s_addr == rt->mfc_origin.s_addr && 1459 0 stevel mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 1460 0 stevel rt->mfc_rte == NULL && 1461 0 stevel !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) 1462 0 stevel break; 1463 0 stevel mutex_exit(&rt->mfc_mutex); 1464 0 stevel } 1465 0 stevel 1466 0 stevel /* 1467 0 stevel * Return if there was an upcall (mfc_rte != NULL, 1468 0 stevel * or rt not in mfctable. 1469 0 stevel */ 1470 0 stevel if (rt == NULL) { 1471 3448 dh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 1472 0 stevel return (EADDRNOTAVAIL); 1473 0 stevel } 1474 0 stevel 1475 0 stevel 1476 0 stevel /* 1477 0 stevel * no need to hold lock as we have a reference. 1478 0 stevel */ 1479 3448 dh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 1480 0 stevel /* error checking */ 1481 0 stevel if (rt->mfc_timeout_id != 0) { 1482 0 stevel ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null")); 1483 0 stevel /* 1484 0 stevel * Its ok to drop the lock, the struct cannot be freed 1485 0 stevel * since we have a ref on the hash bucket. 1486 0 stevel */ 1487 0 stevel rt->mfc_timeout_id = 0; 1488 0 stevel mutex_exit(&rt->mfc_mutex); 1489 0 stevel (void) untimeout(rt->mfc_timeout_id); 1490 0 stevel mutex_enter(&rt->mfc_mutex); 1491 0 stevel } 1492 0 stevel 1493 0 stevel ASSERT(rt->mfc_rte == NULL); 1494 0 stevel 1495 0 stevel 1496 0 stevel /* 1497 0 stevel * Delete the entry from the cache 1498 0 stevel */ 1499 0 stevel rt->mfc_marks |= MFCB_MARK_CONDEMNED; 1500 0 stevel mutex_exit(&rt->mfc_mutex); 1501 0 stevel 1502 3448 dh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 1503 0 stevel 1504 0 stevel return (0); 1505 0 stevel } 1506 0 stevel 1507 0 stevel #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1508 0 stevel 1509 0 stevel /* 1510 0 stevel * IP multicast forwarding function. This function assumes that the packet 1511 0 stevel * pointed to by ipha has arrived on (or is about to be sent to) the interface 1512 0 stevel * pointed to by "ill", and the packet is to be relayed to other networks 1513 0 stevel * that have members of the packet's destination IP multicast group. 1514 0 stevel * 1515 0 stevel * The packet is returned unscathed to the caller, unless it is 1516 0 stevel * erroneous, in which case a -1 value tells the caller (IP) 1517 0 stevel * to discard it. 1518 0 stevel * 1519 0 stevel * Unlike BSD, SunOS 5.x needs to return to IP info about 1520 0 stevel * whether pkt came in thru a tunnel, so it can be discarded, unless 1521 0 stevel * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try 1522 0 stevel * to be delivered. 1523 0 stevel * Return values are 0 - pkt is okay and phyint 1524 0 stevel * -1 - pkt is malformed and to be tossed 1525 0 stevel * 1 - pkt came in on tunnel 1526 0 stevel */ 1527 0 stevel int 1528 11042 Erik ip_mforward(mblk_t *mp, ip_recv_attr_t *ira) 1529 0 stevel { 1530 11042 Erik ipha_t *ipha = (ipha_t *)mp->b_rptr; 1531 11042 Erik ill_t *ill = ira->ira_ill; 1532 0 stevel struct mfc *rt; 1533 0 stevel ipaddr_t src, dst, tunnel_src = 0; 1534 0 stevel static int srctun = 0; 1535 0 stevel vifi_t vifi; 1536 0 stevel boolean_t pim_reg_packet = B_FALSE; 1537 11042 Erik struct mfcb *mfcbp; 1538 3448 dh155122 ip_stack_t *ipst = ill->ill_ipst; 1539 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 1540 11042 Erik ill_t *rill = ira->ira_rill; 1541 11042 Erik 1542 11042 Erik ASSERT(ira->ira_pktlen == msgdsize(mp)); 1543 0 stevel 1544 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1545 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1546 0 stevel "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s", 1547 0 stevel ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 1548 0 stevel ill->ill_name); 1549 0 stevel } 1550 0 stevel 1551 0 stevel dst = ipha->ipha_dst; 1552 11042 Erik if (ira->ira_flags & IRAF_PIM_REGISTER) 1553 0 stevel pim_reg_packet = B_TRUE; 1554 11042 Erik else if (ira->ira_flags & IRAF_MROUTE_TUNNEL_SET) 1555 11042 Erik tunnel_src = ira->ira_mroute_tunnel; 1556 0 stevel 1557 0 stevel /* 1558 0 stevel * Don't forward a packet with time-to-live of zero or one, 1559 0 stevel * or a packet destined to a local-only group. 1560 0 stevel */ 1561 0 stevel if (CLASSD(dst) && (ipha->ipha_ttl <= 1 || 1562 5240 nordmark (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) { 1563 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1564 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1565 0 stevel "ip_mforward: not forwarded ttl %d," 1566 0 stevel " dst 0x%x ill %s", 1567 0 stevel ipha->ipha_ttl, ntohl(dst), ill->ill_name); 1568 0 stevel } 1569 0 stevel if (tunnel_src != 0) 1570 0 stevel return (1); 1571 0 stevel else 1572 0 stevel return (0); 1573 0 stevel } 1574 0 stevel 1575 0 stevel if ((tunnel_src != 0) || pim_reg_packet) { 1576 0 stevel /* 1577 0 stevel * Packet arrived over an encapsulated tunnel or via a PIM 1578 11042 Erik * register message. 1579 0 stevel */ 1580 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1581 0 stevel if (tunnel_src != 0) { 1582 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, 1583 3448 dh155122 SL_TRACE, 1584 0 stevel "ip_mforward: ill %s arrived via ENCAP TUN", 1585 0 stevel ill->ill_name); 1586 0 stevel } else if (pim_reg_packet) { 1587 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, 1588 3448 dh155122 SL_TRACE, 1589 0 stevel "ip_mforward: ill %s arrived via" 1590 0 stevel " REGISTER VIF", 1591 0 stevel ill->ill_name); 1592 0 stevel } 1593 0 stevel } 1594 0 stevel } else if ((ipha->ipha_version_and_hdr_length & 0xf) < 1595 0 stevel (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 || 1596 0 stevel ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) { 1597 0 stevel /* Packet arrived via a physical interface. */ 1598 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1599 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1600 0 stevel "ip_mforward: ill %s arrived via PHYINT", 1601 0 stevel ill->ill_name); 1602 0 stevel } 1603 0 stevel 1604 0 stevel } else { 1605 0 stevel /* 1606 0 stevel * Packet arrived through a SRCRT tunnel. 1607 0 stevel * Source-route tunnels are no longer supported. 1608 0 stevel * Error message printed every 1000 times. 1609 0 stevel */ 1610 0 stevel if ((srctun++ % 1000) == 0) { 1611 0 stevel cmn_err(CE_WARN, 1612 0 stevel "ip_mforward: received source-routed pkt from %x", 1613 0 stevel ntohl(ipha->ipha_src)); 1614 0 stevel } 1615 0 stevel return (-1); 1616 0 stevel } 1617 0 stevel 1618 3448 dh155122 ipst->ips_mrtstat->mrts_fwd_in++; 1619 0 stevel src = ipha->ipha_src; 1620 0 stevel 1621 0 stevel /* Find route in cache, return NULL if not there or upcalls q'ed. */ 1622 0 stevel 1623 0 stevel /* 1624 0 stevel * Lock the mfctable against changes made by ip_mforward. 1625 0 stevel * Note that only add_mfc and del_mfc can remove entries and 1626 0 stevel * they run with exclusive access to IP. So we do not need to 1627 0 stevel * guard against the rt being deleted, so release lock after reading. 1628 0 stevel */ 1629 0 stevel 1630 3448 dh155122 if (is_mrouter_off(ipst)) 1631 0 stevel return (-1); 1632 0 stevel 1633 3448 dh155122 mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)]; 1634 0 stevel MFCB_REFHOLD(mfcbp); 1635 0 stevel MFCFIND(mfcbp, src, dst, rt); 1636 0 stevel 1637 0 stevel /* Entry exists, so forward if necessary */ 1638 0 stevel if (rt != NULL) { 1639 0 stevel int ret = 0; 1640 3448 dh155122 ipst->ips_mrtstat->mrts_mfc_hits++; 1641 0 stevel if (pim_reg_packet) { 1642 3448 dh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); 1643 0 stevel ret = ip_mdq(mp, ipha, 1644 3448 dh155122 ipst->ips_vifs[ipst->ips_reg_vif_num]. 1645 3448 dh155122 v_ipif->ipif_ill, 1646 3448 dh155122 0, rt); 1647 0 stevel } else { 1648 0 stevel ret = ip_mdq(mp, ipha, ill, tunnel_src, rt); 1649 0 stevel } 1650 0 stevel 1651 0 stevel MFCB_REFRELE(mfcbp); 1652 0 stevel return (ret); 1653 0 stevel 1654 0 stevel /* 1655 0 stevel * Don't forward if we don't have a cache entry. Mrouted will 1656 0 stevel * always provide a cache entry in response to an upcall. 1657 0 stevel */ 1658 0 stevel } else { 1659 0 stevel /* 1660 0 stevel * If we don't have a route for packet's origin, make a copy 1661 0 stevel * of the packet and send message to routing daemon. 1662 0 stevel */ 1663 0 stevel struct mfc *mfc_rt = NULL; 1664 0 stevel mblk_t *mp0 = NULL; 1665 0 stevel mblk_t *mp_copy = NULL; 1666 0 stevel struct rtdetq *rte = NULL; 1667 0 stevel struct rtdetq *rte_m, *rte1, *prev_rte; 1668 0 stevel uint_t hash; 1669 0 stevel int npkts; 1670 0 stevel boolean_t new_mfc = B_FALSE; 1671 3448 dh155122 ipst->ips_mrtstat->mrts_mfc_misses++; 1672 0 stevel /* BSD uses mrts_no_route++ */ 1673 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1674 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1675 0 stevel "ip_mforward: no rte ill %s src %x g %x misses %d", 1676 0 stevel ill->ill_name, ntohl(src), ntohl(dst), 1677 3448 dh155122 (int)ipst->ips_mrtstat->mrts_mfc_misses); 1678 0 stevel } 1679 0 stevel /* 1680 0 stevel * The order of the following code differs from the BSD code. 1681 0 stevel * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x 1682 0 stevel * code works, so SunOS 5.x wasn't changed to conform to the 1683 0 stevel * BSD version. 1684 0 stevel */ 1685 0 stevel 1686 0 stevel /* Lock mfctable. */ 1687 0 stevel hash = MFCHASH(src, dst); 1688 3448 dh155122 mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock)); 1689 0 stevel 1690 0 stevel /* 1691 0 stevel * If we are turning off mrouted return an error 1692 0 stevel */ 1693 3448 dh155122 if (is_mrouter_off(ipst)) { 1694 0 stevel mutex_exit(&mfcbp->mfcb_lock); 1695 0 stevel MFCB_REFRELE(mfcbp); 1696 0 stevel return (-1); 1697 0 stevel } 1698 0 stevel 1699 0 stevel /* Is there an upcall waiting for this packet? */ 1700 3448 dh155122 for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt; 1701 0 stevel mfc_rt = mfc_rt->mfc_next) { 1702 0 stevel mutex_enter(&mfc_rt->mfc_mutex); 1703 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1704 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, 1705 3448 dh155122 SL_TRACE, 1706 0 stevel "ip_mforward: MFCTAB hash %d o 0x%x" 1707 0 stevel " g 0x%x\n", 1708 0 stevel hash, ntohl(mfc_rt->mfc_origin.s_addr), 1709 0 stevel ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1710 0 stevel } 1711 0 stevel /* There is an upcall */ 1712 0 stevel if ((src == mfc_rt->mfc_origin.s_addr) && 1713 0 stevel (dst == mfc_rt->mfc_mcastgrp.s_addr) && 1714 0 stevel (mfc_rt->mfc_rte != NULL) && 1715 0 stevel !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) { 1716 0 stevel break; 1717 0 stevel } 1718 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 1719 0 stevel } 1720 0 stevel /* No upcall, so make a new entry into mfctable */ 1721 0 stevel if (mfc_rt == NULL) { 1722 0 stevel mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc)); 1723 0 stevel if (mfc_rt == NULL) { 1724 3448 dh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 1725 0 stevel ip1dbg(("ip_mforward: out of memory " 1726 0 stevel "for mfc, mfc_rt\n")); 1727 0 stevel goto error_return; 1728 0 stevel } else 1729 0 stevel new_mfc = B_TRUE; 1730 0 stevel /* Get resources */ 1731 0 stevel /* TODO could copy header and dup rest */ 1732 0 stevel mp_copy = copymsg(mp); 1733 0 stevel if (mp_copy == NULL) { 1734 3448 dh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 1735 0 stevel ip1dbg(("ip_mforward: out of memory for " 1736 0 stevel "mblk, mp_copy\n")); 1737 0 stevel goto error_return; 1738 0 stevel } 1739 0 stevel mutex_enter(&mfc_rt->mfc_mutex); 1740 0 stevel } 1741 0 stevel /* Get resources for rte, whether first rte or not first. */ 1742 0 stevel /* Add this packet into rtdetq */ 1743 0 stevel rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq)); 1744 0 stevel if (rte == NULL) { 1745 3448 dh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 1746 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 1747 0 stevel ip1dbg(("ip_mforward: out of memory for" 1748 0 stevel " rtdetq, rte\n")); 1749 0 stevel goto error_return; 1750 0 stevel } 1751 0 stevel 1752 0 stevel mp0 = copymsg(mp); 1753 0 stevel if (mp0 == NULL) { 1754 3448 dh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 1755 0 stevel ip1dbg(("ip_mforward: out of memory for mblk, mp0\n")); 1756 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 1757 0 stevel goto error_return; 1758 0 stevel } 1759 0 stevel rte->mp = mp0; 1760 0 stevel if (pim_reg_packet) { 1761 3448 dh155122 ASSERT(ipst->ips_reg_vif_num != ALL_VIFS); 1762 3448 dh155122 rte->ill = 1763 3448 dh155122 ipst->ips_vifs[ipst->ips_reg_vif_num]. 1764 3448 dh155122 v_ipif->ipif_ill; 1765 0 stevel } else { 1766 0 stevel rte->ill = ill; 1767 0 stevel } 1768 0 stevel rte->rte_next = NULL; 1769 0 stevel 1770 0 stevel /* 1771 0 stevel * Determine if upcall q (rtdetq) has overflowed. 1772 0 stevel * mfc_rt->mfc_rte is null by mi_zalloc 1773 0 stevel * if it is the first message. 1774 0 stevel */ 1775 0 stevel for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m; 1776 0 stevel rte_m = rte_m->rte_next) 1777 0 stevel npkts++; 1778 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1779 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1780 0 stevel "ip_mforward: upcalls %d\n", npkts); 1781 0 stevel } 1782 0 stevel if (npkts > MAX_UPQ) { 1783 3448 dh155122 ipst->ips_mrtstat->mrts_upq_ovflw++; 1784 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 1785 0 stevel goto error_return; 1786 0 stevel } 1787 0 stevel 1788 0 stevel if (npkts == 0) { /* first upcall */ 1789 0 stevel int i = 0; 1790 0 stevel /* 1791 0 stevel * Now finish installing the new mfc! Now that we have 1792 0 stevel * resources! Insert new entry at head of hash chain. 1793 0 stevel * Use src and dst which are ipaddr_t's. 1794 0 stevel */ 1795 0 stevel mfc_rt->mfc_origin.s_addr = src; 1796 0 stevel mfc_rt->mfc_mcastgrp.s_addr = dst; 1797 0 stevel 1798 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1799 3448 dh155122 for (i = 0; i < (int)ipst->ips_numvifs; i++) 1800 0 stevel mfc_rt->mfc_ttls[i] = 0; 1801 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 1802 0 stevel mfc_rt->mfc_parent = ALL_VIFS; 1803 0 stevel 1804 0 stevel /* Link into table */ 1805 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1806 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, 1807 3448 dh155122 SL_TRACE, 1808 0 stevel "ip_mforward: NEW MFCTAB hash %d o 0x%x " 1809 0 stevel "g 0x%x\n", hash, 1810 0 stevel ntohl(mfc_rt->mfc_origin.s_addr), 1811 0 stevel ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1812 0 stevel } 1813 3448 dh155122 mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc; 1814 3448 dh155122 ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt; 1815 0 stevel mfc_rt->mfc_rte = NULL; 1816 0 stevel } 1817 0 stevel 1818 0 stevel /* Link in the upcall */ 1819 0 stevel /* First upcall */ 1820 0 stevel if (mfc_rt->mfc_rte == NULL) 1821 0 stevel mfc_rt->mfc_rte = rte; 1822 0 stevel else { 1823 0 stevel /* not the first upcall */ 1824 0 stevel prev_rte = mfc_rt->mfc_rte; 1825 0 stevel for (rte1 = mfc_rt->mfc_rte->rte_next; rte1; 1826 5240 nordmark prev_rte = rte1, rte1 = rte1->rte_next) 1827 5240 nordmark ; 1828 0 stevel prev_rte->rte_next = rte; 1829 0 stevel } 1830 0 stevel 1831 0 stevel /* 1832 0 stevel * No upcalls waiting, this is first one, so send a message to 1833 0 stevel * routing daemon to install a route into kernel table. 1834 0 stevel */ 1835 0 stevel if (npkts == 0) { 1836 0 stevel struct igmpmsg *im; 1837 0 stevel /* ipha_protocol is 0, for upcall */ 1838 0 stevel ASSERT(mp_copy != NULL); 1839 0 stevel im = (struct igmpmsg *)mp_copy->b_rptr; 1840 0 stevel im->im_msgtype = IGMPMSG_NOCACHE; 1841 0 stevel im->im_mbz = 0; 1842 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 1843 0 stevel if (pim_reg_packet) { 1844 3448 dh155122 im->im_vif = (uchar_t)ipst->ips_reg_vif_num; 1845 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 1846 0 stevel } else { 1847 0 stevel /* 1848 0 stevel * XXX do we need to hold locks here ? 1849 0 stevel */ 1850 3448 dh155122 for (vifi = 0; 1851 3448 dh155122 vifi < ipst->ips_numvifs; 1852 3448 dh155122 vifi++) { 1853 3448 dh155122 if (ipst->ips_vifs[vifi].v_ipif == NULL) 1854 0 stevel continue; 1855 3448 dh155122 if (ipst->ips_vifs[vifi]. 1856 3448 dh155122 v_ipif->ipif_ill == ill) { 1857 0 stevel im->im_vif = (uchar_t)vifi; 1858 0 stevel break; 1859 0 stevel } 1860 0 stevel } 1861 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 1862 3448 dh155122 ASSERT(vifi < ipst->ips_numvifs); 1863 0 stevel } 1864 0 stevel 1865 3448 dh155122 ipst->ips_mrtstat->mrts_upcalls++; 1866 0 stevel /* Timer to discard upcalls if mrouted is too slow */ 1867 0 stevel mfc_rt->mfc_timeout_id = timeout(expire_upcalls, 1868 0 stevel mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE); 1869 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 1870 3448 dh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 1871 5240 nordmark /* Pass to RAWIP */ 1872 11042 Erik ira->ira_ill = ira->ira_rill = NULL; 1873 11042 Erik (mrouter->conn_recv)(mrouter, mp_copy, NULL, ira); 1874 11042 Erik ira->ira_ill = ill; 1875 11042 Erik ira->ira_rill = rill; 1876 0 stevel } else { 1877 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 1878 3448 dh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 1879 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1880 11042 Erik ip_drop_input("ip_mforward - upcall already waiting", 1881 11042 Erik mp_copy, ill); 1882 0 stevel freemsg(mp_copy); 1883 0 stevel } 1884 0 stevel 1885 0 stevel MFCB_REFRELE(mfcbp); 1886 0 stevel if (tunnel_src != 0) 1887 0 stevel return (1); 1888 0 stevel else 1889 0 stevel return (0); 1890 0 stevel error_return: 1891 3448 dh155122 mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock)); 1892 0 stevel MFCB_REFRELE(mfcbp); 1893 0 stevel if (mfc_rt != NULL && (new_mfc == B_TRUE)) 1894 0 stevel mi_free((char *)mfc_rt); 1895 0 stevel if (rte != NULL) 1896 0 stevel mi_free((char *)rte); 1897 11042 Erik if (mp_copy != NULL) { 1898 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1899 11042 Erik ip_drop_input("ip_mforward error", mp_copy, ill); 1900 0 stevel freemsg(mp_copy); 1901 11042 Erik } 1902 0 stevel if (mp0 != NULL) 1903 0 stevel freemsg(mp0); 1904 0 stevel return (-1); 1905 0 stevel } 1906 0 stevel } 1907 0 stevel 1908 0 stevel /* 1909 0 stevel * Clean up the mfctable cache entry if upcall is not serviced. 1910 0 stevel * SunOS 5.x has timeout per mfc, unlike BSD which has one timer. 1911 0 stevel */ 1912 0 stevel static void 1913 0 stevel expire_upcalls(void *arg) 1914 0 stevel { 1915 0 stevel struct mfc *mfc_rt = arg; 1916 0 stevel uint_t hash; 1917 0 stevel struct mfc *prev_mfc, *mfc0; 1918 3448 dh155122 ip_stack_t *ipst; 1919 5240 nordmark conn_t *mrouter; 1920 3448 dh155122 1921 3448 dh155122 if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) { 1922 3448 dh155122 cmn_err(CE_WARN, "expire_upcalls: no ILL\n"); 1923 3448 dh155122 return; 1924 3448 dh155122 } 1925 3448 dh155122 ipst = mfc_rt->mfc_rte->ill->ill_ipst; 1926 5240 nordmark mrouter = ipst->ips_ip_g_mrouter; 1927 0 stevel 1928 0 stevel hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr); 1929 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1930 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1931 0 stevel "expire_upcalls: hash %d s %x g %x", 1932 0 stevel hash, ntohl(mfc_rt->mfc_origin.s_addr), 1933 0 stevel ntohl(mfc_rt->mfc_mcastgrp.s_addr)); 1934 0 stevel } 1935 3448 dh155122 MFCB_REFHOLD(&ipst->ips_mfcs[hash]); 1936 0 stevel mutex_enter(&mfc_rt->mfc_mutex); 1937 0 stevel /* 1938 0 stevel * if timeout has been set to zero, than the 1939 0 stevel * entry has been filled, no need to delete it. 1940 0 stevel */ 1941 0 stevel if (mfc_rt->mfc_timeout_id == 0) 1942 0 stevel goto done; 1943 3448 dh155122 ipst->ips_mrtstat->mrts_cache_cleanups++; 1944 0 stevel mfc_rt->mfc_timeout_id = 0; 1945 0 stevel 1946 0 stevel /* Determine entry to be cleaned up in cache table. */ 1947 3448 dh155122 for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0; 1948 0 stevel prev_mfc = mfc0, mfc0 = mfc0->mfc_next) 1949 0 stevel if (mfc0 == mfc_rt) 1950 0 stevel break; 1951 0 stevel 1952 0 stevel /* del_mfc takes care of gone mfcs */ 1953 0 stevel ASSERT(prev_mfc != NULL); 1954 0 stevel ASSERT(mfc0 != NULL); 1955 0 stevel 1956 0 stevel /* 1957 0 stevel * Delete the entry from the cache 1958 0 stevel */ 1959 3448 dh155122 ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED; 1960 0 stevel mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED; 1961 0 stevel 1962 0 stevel /* 1963 0 stevel * release_mfc will drop all queued upcall packets. 1964 0 stevel * and will free the mbuf with the pkt, if, timing info. 1965 0 stevel */ 1966 0 stevel done: 1967 0 stevel mutex_exit(&mfc_rt->mfc_mutex); 1968 3448 dh155122 MFCB_REFRELE(&ipst->ips_mfcs[hash]); 1969 0 stevel } 1970 0 stevel 1971 0 stevel /* 1972 0 stevel * Packet forwarding routine once entry in the cache is made. 1973 0 stevel */ 1974 0 stevel static int 1975 0 stevel ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src, 1976 0 stevel struct mfc *rt) 1977 0 stevel { 1978 0 stevel vifi_t vifi; 1979 0 stevel struct vif *vifp; 1980 0 stevel ipaddr_t dst = ipha->ipha_dst; 1981 0 stevel size_t plen = msgdsize(mp); 1982 0 stevel vifi_t num_of_vifs; 1983 3448 dh155122 ip_stack_t *ipst = ill->ill_ipst; 1984 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 1985 11042 Erik ip_recv_attr_t iras; 1986 0 stevel 1987 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 1988 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 1989 0 stevel "ip_mdq: SEND src %x, ipha_dst %x, ill %s", 1990 0 stevel ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), 1991 0 stevel ill->ill_name); 1992 0 stevel } 1993 0 stevel 1994 0 stevel /* Macro to send packet on vif */ 1995 0 stevel #define MC_SEND(ipha, mp, vifp, dst) { \ 1996 0 stevel if ((vifp)->v_flags & VIFF_TUNNEL) \ 1997 0 stevel encap_send((ipha), (mp), (vifp), (dst)); \ 1998 0 stevel else if ((vifp)->v_flags & VIFF_REGISTER) \ 1999 0 stevel register_send((ipha), (mp), (vifp), (dst)); \ 2000 0 stevel else \ 2001 0 stevel phyint_send((ipha), (mp), (vifp), (dst)); \ 2002 0 stevel } 2003 0 stevel 2004 0 stevel vifi = rt->mfc_parent; 2005 0 stevel 2006 0 stevel /* 2007 0 stevel * The value of vifi is MAXVIFS if the pkt had no parent, i.e., 2008 0 stevel * Mrouted had no route. 2009 0 stevel * We wanted the route installed in the mfctable to prevent multiple 2010 0 stevel * tries, so it passed add_mfc(), but is discarded here. The v_ipif is 2011 0 stevel * NULL so we don't want to check the ill. Still needed as of Mrouted 2012 0 stevel * 3.6. 2013 0 stevel */ 2014 0 stevel if (vifi == NO_VIF) { 2015 0 stevel ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n", 2016 0 stevel ill->ill_name)); 2017 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2018 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2019 0 stevel "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name); 2020 0 stevel } 2021 0 stevel return (-1); /* drop pkt */ 2022 0 stevel } 2023 0 stevel 2024 3448 dh155122 if (!lock_good_vif(&ipst->ips_vifs[vifi])) 2025 0 stevel return (-1); 2026 0 stevel /* 2027 0 stevel * The MFC entries are not cleaned up when an ipif goes 2028 0 stevel * away thus this code has to guard against an MFC referencing 2029 0 stevel * an ipif that has been closed. Note: reset_mrt_vif_ipif 2030 0 stevel * sets the v_ipif to NULL when the ipif disappears. 2031 0 stevel */ 2032 3448 dh155122 ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL); 2033 0 stevel 2034 3448 dh155122 if (vifi >= ipst->ips_numvifs) { 2035 0 stevel cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs " 2036 0 stevel "%d ill %s viftable ill %s\n", 2037 3448 dh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, 2038 3448 dh155122 ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); 2039 3448 dh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 2040 0 stevel return (-1); 2041 0 stevel } 2042 0 stevel /* 2043 0 stevel * Don't forward if it didn't arrive from the parent vif for its 2044 8485 Peter * origin. 2045 0 stevel */ 2046 11042 Erik if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill) || 2047 3448 dh155122 (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) { 2048 0 stevel /* Came in the wrong interface */ 2049 0 stevel ip1dbg(("ip_mdq: arrived wrong if, vifi %d " 2050 0 stevel "numvifs %d ill %s viftable ill %s\n", 2051 3448 dh155122 (int)vifi, (int)ipst->ips_numvifs, ill->ill_name, 2052 11042 Erik ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name)); 2053 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2054 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2055 0 stevel "ip_mdq: arrived wrong if, vifi %d ill " 2056 0 stevel "%s viftable ill %s\n", 2057 11042 Erik (int)vifi, ill->ill_name, 2058 11042 Erik ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name); 2059 0 stevel } 2060 3448 dh155122 ipst->ips_mrtstat->mrts_wrong_if++; 2061 0 stevel rt->mfc_wrong_if++; 2062 0 stevel 2063 0 stevel /* 2064 0 stevel * If we are doing PIM assert processing and we are forwarding 2065 0 stevel * packets on this interface, and it is a broadcast medium 2066 0 stevel * interface (and not a tunnel), send a message to the routing. 2067 0 stevel * 2068 0 stevel * We use the first ipif on the list, since it's all we have. 2069 0 stevel * Chances are the ipif_flags are the same for ipifs on the ill. 2070 0 stevel */ 2071 3448 dh155122 if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 && 2072 0 stevel (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) && 2073 3448 dh155122 !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) { 2074 0 stevel mblk_t *mp_copy; 2075 0 stevel struct igmpmsg *im; 2076 0 stevel 2077 0 stevel /* TODO could copy header and dup rest */ 2078 0 stevel mp_copy = copymsg(mp); 2079 0 stevel if (mp_copy == NULL) { 2080 3448 dh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 2081 0 stevel ip1dbg(("ip_mdq: out of memory " 2082 0 stevel "for mblk, mp_copy\n")); 2083 3448 dh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 2084 0 stevel return (-1); 2085 0 stevel } 2086 0 stevel 2087 0 stevel im = (struct igmpmsg *)mp_copy->b_rptr; 2088 0 stevel im->im_msgtype = IGMPMSG_WRONGVIF; 2089 0 stevel im->im_mbz = 0; 2090 0 stevel im->im_vif = (ushort_t)vifi; 2091 5240 nordmark /* Pass to RAWIP */ 2092 11042 Erik 2093 11042 Erik bzero(&iras, sizeof (iras)); 2094 11042 Erik iras.ira_flags = IRAF_IS_IPV4; 2095 11042 Erik iras.ira_ip_hdr_length = 2096 11042 Erik IPH_HDR_LENGTH(mp_copy->b_rptr); 2097 11042 Erik iras.ira_pktlen = msgdsize(mp_copy); 2098 11042 Erik (mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras); 2099 11042 Erik ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 2100 0 stevel } 2101 3448 dh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 2102 0 stevel if (tunnel_src != 0) 2103 0 stevel return (1); 2104 0 stevel else 2105 0 stevel return (0); 2106 0 stevel } 2107 0 stevel /* 2108 0 stevel * If I sourced this packet, it counts as output, else it was input. 2109 0 stevel */ 2110 3448 dh155122 if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) { 2111 3448 dh155122 ipst->ips_vifs[vifi].v_pkt_out++; 2112 3448 dh155122 ipst->ips_vifs[vifi].v_bytes_out += plen; 2113 0 stevel } else { 2114 3448 dh155122 ipst->ips_vifs[vifi].v_pkt_in++; 2115 3448 dh155122 ipst->ips_vifs[vifi].v_bytes_in += plen; 2116 0 stevel } 2117 0 stevel mutex_enter(&rt->mfc_mutex); 2118 0 stevel rt->mfc_pkt_cnt++; 2119 0 stevel rt->mfc_byte_cnt += plen; 2120 0 stevel mutex_exit(&rt->mfc_mutex); 2121 3448 dh155122 unlock_good_vif(&ipst->ips_vifs[vifi]); 2122 0 stevel /* 2123 0 stevel * For each vif, decide if a copy of the packet should be forwarded. 2124 0 stevel * Forward if: 2125 0 stevel * - the vif threshold ttl is non-zero AND 2126 0 stevel * - the pkt ttl exceeds the vif's threshold 2127 0 stevel * A non-zero mfc_ttl indicates that the vif is part of 2128 0 stevel * the output set for the mfc entry. 2129 0 stevel */ 2130 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 2131 3448 dh155122 num_of_vifs = ipst->ips_numvifs; 2132 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 2133 3448 dh155122 for (vifp = ipst->ips_vifs, vifi = 0; 2134 3448 dh155122 vifi < num_of_vifs; 2135 3448 dh155122 vifp++, vifi++) { 2136 0 stevel if (!lock_good_vif(vifp)) 2137 0 stevel continue; 2138 0 stevel if ((rt->mfc_ttls[vifi] > 0) && 2139 0 stevel (ipha->ipha_ttl > rt->mfc_ttls[vifi])) { 2140 0 stevel /* 2141 0 stevel * lock_good_vif should not have succedded if 2142 0 stevel * v_ipif is null. 2143 0 stevel */ 2144 0 stevel ASSERT(vifp->v_ipif != NULL); 2145 0 stevel vifp->v_pkt_out++; 2146 0 stevel vifp->v_bytes_out += plen; 2147 0 stevel MC_SEND(ipha, mp, vifp, dst); 2148 3448 dh155122 ipst->ips_mrtstat->mrts_fwd_out++; 2149 0 stevel } 2150 0 stevel unlock_good_vif(vifp); 2151 0 stevel } 2152 0 stevel if (tunnel_src != 0) 2153 0 stevel return (1); 2154 0 stevel else 2155 0 stevel return (0); 2156 0 stevel } 2157 0 stevel 2158 0 stevel /* 2159 0 stevel * Send the packet on physical interface. 2160 0 stevel * Caller assumes can continue to use mp on return. 2161 0 stevel */ 2162 0 stevel /* ARGSUSED */ 2163 0 stevel static void 2164 0 stevel phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2165 0 stevel { 2166 0 stevel mblk_t *mp_copy; 2167 3448 dh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 2168 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2169 0 stevel 2170 0 stevel /* Make a new reference to the packet */ 2171 0 stevel mp_copy = copymsg(mp); /* TODO could copy header and dup rest */ 2172 0 stevel if (mp_copy == NULL) { 2173 3448 dh155122 ipst->ips_mrtstat->mrts_fwd_drop++; 2174 0 stevel ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n")); 2175 0 stevel return; 2176 0 stevel } 2177 0 stevel if (vifp->v_rate_limit <= 0) 2178 0 stevel tbf_send_packet(vifp, mp_copy); 2179 0 stevel else { 2180 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2181 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2182 0 stevel "phyint_send: tbf_contr rate %d " 2183 0 stevel "vifp 0x%p mp 0x%p dst 0x%x", 2184 0 stevel vifp->v_rate_limit, (void *)vifp, (void *)mp, dst); 2185 0 stevel } 2186 0 stevel tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr); 2187 0 stevel } 2188 0 stevel } 2189 0 stevel 2190 0 stevel /* 2191 0 stevel * Send the whole packet for REGISTER encapsulation to PIM daemon 2192 0 stevel * Caller assumes it can continue to use mp on return. 2193 0 stevel */ 2194 0 stevel /* ARGSUSED */ 2195 0 stevel static void 2196 0 stevel register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2197 0 stevel { 2198 0 stevel struct igmpmsg *im; 2199 0 stevel mblk_t *mp_copy; 2200 0 stevel ipha_t *ipha_copy; 2201 11042 Erik ill_t *ill = vifp->v_ipif->ipif_ill; 2202 11042 Erik ip_stack_t *ipst = ill->ill_ipst; 2203 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2204 11042 Erik ip_recv_attr_t iras; 2205 0 stevel 2206 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2207 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2208 0 stevel "register_send: src %x, dst %x\n", 2209 0 stevel ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 2210 0 stevel } 2211 0 stevel 2212 0 stevel /* 2213 0 stevel * Copy the old packet & pullup its IP header into the new mblk_t so we 2214 0 stevel * can modify it. Try to fill the new mblk_t since if we don't the 2215 0 stevel * ethernet driver will. 2216 0 stevel */ 2217 0 stevel mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED); 2218 0 stevel if (mp_copy == NULL) { 2219 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 2220 3448 dh155122 if (ipst->ips_ip_mrtdebug > 3) { 2221 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2222 0 stevel "register_send: allocb failure."); 2223 0 stevel } 2224 0 stevel return; 2225 0 stevel } 2226 0 stevel 2227 0 stevel /* 2228 0 stevel * Bump write pointer to account for igmpmsg being added. 2229 0 stevel */ 2230 0 stevel mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg); 2231 0 stevel 2232 0 stevel /* 2233 0 stevel * Chain packet to new mblk_t. 2234 0 stevel */ 2235 0 stevel if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 2236 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 2237 3448 dh155122 if (ipst->ips_ip_mrtdebug > 3) { 2238 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2239 0 stevel "register_send: copymsg failure."); 2240 0 stevel } 2241 0 stevel freeb(mp_copy); 2242 0 stevel return; 2243 0 stevel } 2244 0 stevel 2245 0 stevel /* 2246 5240 nordmark * icmp_input() asserts that IP version field is set to an 2247 0 stevel * appropriate version. Hence, the struct igmpmsg that this really 2248 0 stevel * becomes, needs to have the correct IP version field. 2249 0 stevel */ 2250 0 stevel ipha_copy = (ipha_t *)mp_copy->b_rptr; 2251 0 stevel *ipha_copy = multicast_encap_iphdr; 2252 0 stevel 2253 0 stevel /* 2254 0 stevel * The kernel uses the struct igmpmsg header to encode the messages to 2255 0 stevel * the multicast routing daemon. Fill in the fields in the header 2256 0 stevel * starting with the message type which is IGMPMSG_WHOLEPKT 2257 0 stevel */ 2258 0 stevel im = (struct igmpmsg *)mp_copy->b_rptr; 2259 0 stevel im->im_msgtype = IGMPMSG_WHOLEPKT; 2260 0 stevel im->im_src.s_addr = ipha->ipha_src; 2261 0 stevel im->im_dst.s_addr = ipha->ipha_dst; 2262 0 stevel 2263 0 stevel /* 2264 0 stevel * Must Be Zero. This is because the struct igmpmsg is really an IP 2265 0 stevel * header with renamed fields and the multicast routing daemon uses 2266 0 stevel * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages. 2267 0 stevel */ 2268 0 stevel im->im_mbz = 0; 2269 0 stevel 2270 3448 dh155122 ++ipst->ips_mrtstat->mrts_upcalls; 2271 11042 Erik if (IPCL_IS_NONSTR(mrouter) ? mrouter->conn_flow_cntrld : 2272 11042 Erik !canputnext(mrouter->conn_rq)) { 2273 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_regsend_drops; 2274 3448 dh155122 if (ipst->ips_ip_mrtdebug > 3) { 2275 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2276 0 stevel "register_send: register upcall failure."); 2277 0 stevel } 2278 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2279 11042 Erik ip_drop_input("mrts_pim_regsend_drops", mp_copy, ill); 2280 0 stevel freemsg(mp_copy); 2281 0 stevel } else { 2282 5240 nordmark /* Pass to RAWIP */ 2283 11042 Erik bzero(&iras, sizeof (iras)); 2284 11042 Erik iras.ira_flags = IRAF_IS_IPV4; 2285 11042 Erik iras.ira_ip_hdr_length = sizeof (ipha_t); 2286 11042 Erik iras.ira_pktlen = msgdsize(mp_copy); 2287 11042 Erik (mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras); 2288 11042 Erik ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 2289 0 stevel } 2290 0 stevel } 2291 0 stevel 2292 0 stevel /* 2293 0 stevel * pim_validate_cksum handles verification of the checksum in the 2294 0 stevel * pim header. For PIM Register packets, the checksum is calculated 2295 0 stevel * across the PIM header only. For all other packets, the checksum 2296 0 stevel * is for the PIM header and remainder of the packet. 2297 0 stevel * 2298 0 stevel * returns: B_TRUE, if checksum is okay. 2299 0 stevel * B_FALSE, if checksum is not valid. 2300 0 stevel */ 2301 0 stevel static boolean_t 2302 0 stevel pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp) 2303 0 stevel { 2304 0 stevel mblk_t *mp_dup; 2305 0 stevel 2306 0 stevel if ((mp_dup = dupmsg(mp)) == NULL) 2307 0 stevel return (B_FALSE); 2308 0 stevel 2309 0 stevel mp_dup->b_rptr += IPH_HDR_LENGTH(ip); 2310 0 stevel if (pimp->pim_type == PIM_REGISTER) 2311 0 stevel mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN; 2312 0 stevel if (IP_CSUM(mp_dup, 0, 0)) { 2313 0 stevel freemsg(mp_dup); 2314 0 stevel return (B_FALSE); 2315 0 stevel } 2316 0 stevel freemsg(mp_dup); 2317 0 stevel return (B_TRUE); 2318 0 stevel } 2319 0 stevel 2320 0 stevel /* 2321 11042 Erik * Process PIM protocol packets i.e. IP Protocol 103. 2322 11042 Erik * Register messages are decapsulated and sent onto multicast forwarding. 2323 11042 Erik * 2324 11042 Erik * Return NULL for a bad packet that is discarded here. 2325 11042 Erik * Return mp if the message is OK and should be handed to "raw" receivers. 2326 11042 Erik * Callers of pim_input() may need to reinitialize variables that were copied 2327 11042 Erik * from the mblk as this calls pullupmsg(). 2328 0 stevel */ 2329 11042 Erik mblk_t * 2330 11042 Erik pim_input(mblk_t *mp, ip_recv_attr_t *ira) 2331 0 stevel { 2332 0 stevel ipha_t *eip, *ip; 2333 0 stevel int iplen, pimlen, iphlen; 2334 0 stevel struct pim *pimp; /* pointer to a pim struct */ 2335 0 stevel uint32_t *reghdr; 2336 11042 Erik ill_t *ill = ira->ira_ill; 2337 3448 dh155122 ip_stack_t *ipst = ill->ill_ipst; 2338 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2339 0 stevel 2340 0 stevel /* 2341 0 stevel * Pullup the msg for PIM protocol processing. 2342 0 stevel */ 2343 0 stevel if (pullupmsg(mp, -1) == 0) { 2344 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 2345 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2346 11042 Erik ip_drop_input("mrts_pim_nomemory", mp, ill); 2347 0 stevel freemsg(mp); 2348 11042 Erik return (NULL); 2349 0 stevel } 2350 0 stevel 2351 0 stevel ip = (ipha_t *)mp->b_rptr; 2352 0 stevel iplen = ip->ipha_length; 2353 0 stevel iphlen = IPH_HDR_LENGTH(ip); 2354 0 stevel pimlen = ntohs(iplen) - iphlen; 2355 0 stevel 2356 0 stevel /* 2357 0 stevel * Validate lengths 2358 0 stevel */ 2359 0 stevel if (pimlen < PIM_MINLEN) { 2360 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_malformed; 2361 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2362 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2363 0 stevel "pim_input: length not at least minlen"); 2364 0 stevel } 2365 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2366 11042 Erik ip_drop_input("mrts_pim_malformed", mp, ill); 2367 0 stevel freemsg(mp); 2368 11042 Erik return (NULL); 2369 0 stevel } 2370 0 stevel 2371 0 stevel /* 2372 0 stevel * Point to the PIM header. 2373 0 stevel */ 2374 0 stevel pimp = (struct pim *)((caddr_t)ip + iphlen); 2375 0 stevel 2376 0 stevel /* 2377 0 stevel * Check the version number. 2378 0 stevel */ 2379 0 stevel if (pimp->pim_vers != PIM_VERSION) { 2380 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_badversion; 2381 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2382 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2383 0 stevel "pim_input: unknown version of PIM"); 2384 0 stevel } 2385 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2386 11042 Erik ip_drop_input("mrts_pim_badversion", mp, ill); 2387 0 stevel freemsg(mp); 2388 11042 Erik return (NULL); 2389 0 stevel } 2390 0 stevel 2391 0 stevel /* 2392 0 stevel * Validate the checksum 2393 0 stevel */ 2394 0 stevel if (!pim_validate_cksum(mp, ip, pimp)) { 2395 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_rcv_badcsum; 2396 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2397 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2398 0 stevel "pim_input: invalid checksum"); 2399 0 stevel } 2400 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2401 11042 Erik ip_drop_input("pim_rcv_badcsum", mp, ill); 2402 0 stevel freemsg(mp); 2403 11042 Erik return (NULL); 2404 0 stevel } 2405 0 stevel 2406 0 stevel if (pimp->pim_type != PIM_REGISTER) 2407 11042 Erik return (mp); 2408 0 stevel 2409 0 stevel reghdr = (uint32_t *)(pimp + 1); 2410 0 stevel eip = (ipha_t *)(reghdr + 1); 2411 0 stevel 2412 0 stevel /* 2413 0 stevel * check if the inner packet is destined to mcast group 2414 0 stevel */ 2415 0 stevel if (!CLASSD(eip->ipha_dst)) { 2416 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_badregisters; 2417 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2418 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2419 0 stevel "pim_input: Inner pkt not mcast .. !"); 2420 0 stevel } 2421 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2422 11042 Erik ip_drop_input("mrts_pim_badregisters", mp, ill); 2423 0 stevel freemsg(mp); 2424 11042 Erik return (NULL); 2425 0 stevel } 2426 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2427 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2428 0 stevel "register from %x, to %x, len %d", 2429 0 stevel ntohl(eip->ipha_src), 2430 0 stevel ntohl(eip->ipha_dst), 2431 0 stevel ntohs(eip->ipha_length)); 2432 0 stevel } 2433 0 stevel /* 2434 0 stevel * If the null register bit is not set, decapsulate 2435 0 stevel * the packet before forwarding it. 2436 11042 Erik * Avoid this in no register vif 2437 0 stevel */ 2438 11042 Erik if (!(ntohl(*reghdr) & PIM_NULL_REGISTER) && 2439 11042 Erik ipst->ips_reg_vif_num != ALL_VIFS) { 2440 0 stevel mblk_t *mp_copy; 2441 11042 Erik uint_t saved_pktlen; 2442 0 stevel 2443 0 stevel /* Copy the message */ 2444 0 stevel if ((mp_copy = copymsg(mp)) == NULL) { 2445 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_nomemory; 2446 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2447 11042 Erik ip_drop_input("mrts_pim_nomemory", mp, ill); 2448 0 stevel freemsg(mp); 2449 11042 Erik return (NULL); 2450 0 stevel } 2451 0 stevel 2452 0 stevel /* 2453 0 stevel * Decapsulate the packet and give it to 2454 0 stevel * register_mforward. 2455 0 stevel */ 2456 11042 Erik mp_copy->b_rptr += iphlen + sizeof (pim_t) + sizeof (*reghdr); 2457 11042 Erik saved_pktlen = ira->ira_pktlen; 2458 11042 Erik ira->ira_pktlen -= iphlen + sizeof (pim_t) + sizeof (*reghdr); 2459 11042 Erik if (register_mforward(mp_copy, ira) != 0) { 2460 11042 Erik /* register_mforward already called ip_drop_input */ 2461 0 stevel freemsg(mp); 2462 11042 Erik ira->ira_pktlen = saved_pktlen; 2463 11042 Erik return (NULL); 2464 0 stevel } 2465 11042 Erik ira->ira_pktlen = saved_pktlen; 2466 0 stevel } 2467 0 stevel 2468 0 stevel /* 2469 0 stevel * Pass all valid PIM packets up to any process(es) listening on a raw 2470 0 stevel * PIM socket. For Solaris it is done right after pim_input() is 2471 0 stevel * called. 2472 0 stevel */ 2473 11042 Erik return (mp); 2474 0 stevel } 2475 0 stevel 2476 0 stevel /* 2477 0 stevel * PIM sparse mode hook. Called by pim_input after decapsulating 2478 0 stevel * the packet. Loop back the packet, as if we have received it. 2479 0 stevel * In pim_input() we have to check if the destination is a multicast address. 2480 0 stevel */ 2481 0 stevel static int 2482 11042 Erik register_mforward(mblk_t *mp, ip_recv_attr_t *ira) 2483 0 stevel { 2484 11042 Erik ire_t *ire; 2485 11042 Erik ipha_t *ipha = (ipha_t *)mp->b_rptr; 2486 11042 Erik ill_t *ill = ira->ira_ill; 2487 3448 dh155122 ip_stack_t *ipst = ill->ill_ipst; 2488 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2489 0 stevel 2490 3448 dh155122 ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs); 2491 3448 dh155122 2492 3448 dh155122 if (ipst->ips_ip_mrtdebug > 3) { 2493 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2494 0 stevel "register_mforward: src %x, dst %x\n", 2495 0 stevel ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst)); 2496 0 stevel } 2497 0 stevel /* 2498 0 stevel * Need to pass in to ip_mforward() the information that the 2499 11042 Erik * packet has arrived on the register_vif. We mark it with 2500 11042 Erik * the IRAF_PIM_REGISTER attribute. 2501 11042 Erik * pim_input verified that the (inner) destination is multicast, 2502 11042 Erik * hence we skip the generic code in ip_input. 2503 0 stevel */ 2504 11042 Erik ira->ira_flags |= IRAF_PIM_REGISTER; 2505 3448 dh155122 ++ipst->ips_mrtstat->mrts_pim_regforwards; 2506 11042 Erik 2507 11042 Erik if (!CLASSD(ipha->ipha_dst)) { 2508 11042 Erik ire = ire_route_recursive_v4(ipha->ipha_dst, 0, NULL, ALL_ZONES, 2509 11042 Erik ira->ira_tsl, MATCH_IRE_SECATTR, B_TRUE, 0, ipst, NULL, 2510 11042 Erik NULL, NULL); 2511 11042 Erik } else { 2512 11042 Erik ire = ire_multicast(ill); 2513 11042 Erik } 2514 11042 Erik ASSERT(ire != NULL); 2515 11042 Erik /* Normally this will return the IRE_MULTICAST */ 2516 11042 Erik if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2517 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2518 11042 Erik ip_drop_input("mrts_pim RTF_REJECT", mp, ill); 2519 11042 Erik freemsg(mp); 2520 11042 Erik ire_refrele(ire); 2521 11042 Erik return (-1); 2522 11042 Erik } 2523 11042 Erik ASSERT(ire->ire_type & IRE_MULTICAST); 2524 11042 Erik (*ire->ire_recvfn)(ire, mp, ipha, ira); 2525 11042 Erik ire_refrele(ire); 2526 11042 Erik 2527 0 stevel return (0); 2528 0 stevel } 2529 0 stevel 2530 0 stevel /* 2531 0 stevel * Send an encapsulated packet. 2532 0 stevel * Caller assumes can continue to use mp when routine returns. 2533 0 stevel */ 2534 0 stevel /* ARGSUSED */ 2535 0 stevel static void 2536 0 stevel encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst) 2537 0 stevel { 2538 0 stevel mblk_t *mp_copy; 2539 0 stevel ipha_t *ipha_copy; 2540 0 stevel size_t len; 2541 3448 dh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 2542 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2543 0 stevel 2544 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2545 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2546 3448 dh155122 "encap_send: vif %ld enter", 2547 3448 dh155122 (ptrdiff_t)(vifp - ipst->ips_vifs)); 2548 0 stevel } 2549 0 stevel len = ntohs(ipha->ipha_length); 2550 0 stevel 2551 0 stevel /* 2552 0 stevel * Copy the old packet & pullup it's IP header into the 2553 0 stevel * new mbuf so we can modify it. Try to fill the new 2554 0 stevel * mbuf since if we don't the ethernet driver will. 2555 0 stevel */ 2556 0 stevel mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED); 2557 0 stevel if (mp_copy == NULL) 2558 0 stevel return; 2559 0 stevel mp_copy->b_rptr += 32; 2560 0 stevel mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr); 2561 0 stevel if ((mp_copy->b_cont = copymsg(mp)) == NULL) { 2562 0 stevel freeb(mp_copy); 2563 0 stevel return; 2564 0 stevel } 2565 0 stevel 2566 0 stevel /* 2567 0 stevel * Fill in the encapsulating IP header. 2568 0 stevel * Remote tunnel dst in rmt_addr, from add_vif(). 2569 0 stevel */ 2570 0 stevel ipha_copy = (ipha_t *)mp_copy->b_rptr; 2571 0 stevel *ipha_copy = multicast_encap_iphdr; 2572 0 stevel ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET); 2573 0 stevel ipha_copy->ipha_length = htons(len + sizeof (ipha_t)); 2574 0 stevel ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr; 2575 0 stevel ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr; 2576 0 stevel ASSERT(ipha_copy->ipha_ident == 0); 2577 0 stevel 2578 0 stevel /* Turn the encapsulated IP header back into a valid one. */ 2579 0 stevel ipha = (ipha_t *)mp_copy->b_cont->b_rptr; 2580 0 stevel ipha->ipha_ttl--; 2581 0 stevel ipha->ipha_hdr_checksum = 0; 2582 0 stevel ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2583 0 stevel 2584 11042 Erik ipha_copy->ipha_ttl = ipha->ipha_ttl; 2585 11042 Erik 2586 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2587 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2588 0 stevel "encap_send: group 0x%x", ntohl(ipha->ipha_dst)); 2589 0 stevel } 2590 0 stevel if (vifp->v_rate_limit <= 0) 2591 0 stevel tbf_send_packet(vifp, mp_copy); 2592 0 stevel else 2593 0 stevel /* ipha is from the original header */ 2594 0 stevel tbf_control(vifp, mp_copy, ipha); 2595 0 stevel } 2596 0 stevel 2597 0 stevel /* 2598 11042 Erik * De-encapsulate a packet and feed it back through IP input if it 2599 11042 Erik * matches one of our multicast tunnels. 2600 11042 Erik * 2601 0 stevel * This routine is called whenever IP gets a packet with prototype 2602 11042 Erik * IPPROTO_ENCAP and a local destination address and the packet didn't 2603 11042 Erik * match one of our configured IP-in-IP tunnels. 2604 0 stevel */ 2605 0 stevel void 2606 11042 Erik ip_mroute_decap(mblk_t *mp, ip_recv_attr_t *ira) 2607 0 stevel { 2608 0 stevel ipha_t *ipha = (ipha_t *)mp->b_rptr; 2609 0 stevel ipha_t *ipha_encap; 2610 0 stevel int hlen = IPH_HDR_LENGTH(ipha); 2611 11042 Erik int hlen_encap; 2612 0 stevel ipaddr_t src; 2613 0 stevel struct vif *vifp; 2614 11042 Erik ire_t *ire; 2615 11042 Erik ill_t *ill = ira->ira_ill; 2616 3448 dh155122 ip_stack_t *ipst = ill->ill_ipst; 2617 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2618 11042 Erik 2619 11042 Erik /* Make sure we have all of the inner header */ 2620 11042 Erik ipha_encap = (ipha_t *)((char *)ipha + hlen); 2621 11042 Erik if (mp->b_wptr - mp->b_rptr < hlen + IP_SIMPLE_HDR_LENGTH) { 2622 11042 Erik ipha = ip_pullup(mp, hlen + IP_SIMPLE_HDR_LENGTH, ira); 2623 11042 Erik if (ipha == NULL) { 2624 11042 Erik ipst->ips_mrtstat->mrts_bad_tunnel++; 2625 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2626 11042 Erik ip_drop_input("ip_mroute_decap: too short", mp, ill); 2627 11042 Erik freemsg(mp); 2628 11042 Erik return; 2629 11042 Erik } 2630 11042 Erik ipha_encap = (ipha_t *)((char *)ipha + hlen); 2631 11042 Erik } 2632 11042 Erik hlen_encap = IPH_HDR_LENGTH(ipha_encap); 2633 11042 Erik if (mp->b_wptr - mp->b_rptr < hlen + hlen_encap) { 2634 11042 Erik ipha = ip_pullup(mp, hlen + hlen_encap, ira); 2635 11042 Erik if (ipha == NULL) { 2636 11042 Erik ipst->ips_mrtstat->mrts_bad_tunnel++; 2637 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2638 11042 Erik ip_drop_input("ip_mroute_decap: too short", mp, ill); 2639 11042 Erik freemsg(mp); 2640 11042 Erik return; 2641 11042 Erik } 2642 11042 Erik ipha_encap = (ipha_t *)((char *)ipha + hlen); 2643 11042 Erik } 2644 0 stevel 2645 0 stevel /* 2646 0 stevel * Dump the packet if it's not to a multicast destination or if 2647 0 stevel * we don't have an encapsulating tunnel with the source. 2648 0 stevel * Note: This code assumes that the remote site IP address 2649 0 stevel * uniquely identifies the tunnel (i.e., that this site has 2650 0 stevel * at most one tunnel with the remote site). 2651 0 stevel */ 2652 0 stevel if (!CLASSD(ipha_encap->ipha_dst)) { 2653 3448 dh155122 ipst->ips_mrtstat->mrts_bad_tunnel++; 2654 0 stevel ip1dbg(("ip_mroute_decap: bad tunnel\n")); 2655 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2656 11042 Erik ip_drop_input("mrts_bad_tunnel", mp, ill); 2657 0 stevel freemsg(mp); 2658 0 stevel return; 2659 0 stevel } 2660 0 stevel src = (ipaddr_t)ipha->ipha_src; 2661 3448 dh155122 mutex_enter(&ipst->ips_last_encap_lock); 2662 3448 dh155122 if (src != ipst->ips_last_encap_src) { 2663 0 stevel struct vif *vife; 2664 0 stevel 2665 3448 dh155122 vifp = ipst->ips_vifs; 2666 3448 dh155122 vife = vifp + ipst->ips_numvifs; 2667 3448 dh155122 ipst->ips_last_encap_src = src; 2668 3448 dh155122 ipst->ips_last_encap_vif = 0; 2669 0 stevel for (; vifp < vife; ++vifp) { 2670 0 stevel if (!lock_good_vif(vifp)) 2671 0 stevel continue; 2672 0 stevel if (vifp->v_rmt_addr.s_addr == src) { 2673 0 stevel if (vifp->v_flags & VIFF_TUNNEL) 2674 3448 dh155122 ipst->ips_last_encap_vif = vifp; 2675 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2676 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 2677 0 stevel 1, SL_TRACE, 2678 0 stevel "ip_mroute_decap: good tun " 2679 0 stevel "vif %ld with %x", 2680 3448 dh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), 2681 0 stevel ntohl(src)); 2682 0 stevel } 2683 0 stevel unlock_good_vif(vifp); 2684 0 stevel break; 2685 0 stevel } 2686 0 stevel unlock_good_vif(vifp); 2687 0 stevel } 2688 0 stevel } 2689 3448 dh155122 if ((vifp = ipst->ips_last_encap_vif) == 0) { 2690 3448 dh155122 mutex_exit(&ipst->ips_last_encap_lock); 2691 3448 dh155122 ipst->ips_mrtstat->mrts_bad_tunnel++; 2692 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2693 11042 Erik ip_drop_input("mrts_bad_tunnel", mp, ill); 2694 0 stevel freemsg(mp); 2695 0 stevel ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n", 2696 3448 dh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src))); 2697 0 stevel return; 2698 0 stevel } 2699 3448 dh155122 mutex_exit(&ipst->ips_last_encap_lock); 2700 0 stevel 2701 0 stevel /* 2702 0 stevel * Need to pass in the tunnel source to ip_mforward (so that it can 2703 11042 Erik * verify that the packet arrived over the correct vif.) 2704 0 stevel */ 2705 11042 Erik ira->ira_flags |= IRAF_MROUTE_TUNNEL_SET; 2706 11042 Erik ira->ira_mroute_tunnel = src; 2707 0 stevel mp->b_rptr += hlen; 2708 11042 Erik ira->ira_pktlen -= hlen; 2709 11042 Erik ira->ira_ip_hdr_length = hlen_encap; 2710 11042 Erik 2711 11042 Erik /* 2712 11042 Erik * We don't redo any of the filtering in ill_input_full_v4 and we 2713 11042 Erik * have checked that all of ipha_encap and any IP options are 2714 11042 Erik * pulled up. Hence we call ire_recv_multicast_v4 directly. 2715 11042 Erik * However, we have to check for RSVP as in ip_input_full_v4 2716 11042 Erik * and if so we pass it to ire_recv_broadcast_v4 for local delivery 2717 11042 Erik * to the rsvpd. 2718 11042 Erik */ 2719 11042 Erik if (ipha_encap->ipha_protocol == IPPROTO_RSVP && 2720 11042 Erik ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 2721 11042 Erik ire = ire_route_recursive_v4(INADDR_BROADCAST, 0, ill, 2722 11042 Erik ALL_ZONES, ira->ira_tsl, MATCH_IRE_ILL|MATCH_IRE_SECATTR, 2723 11042 Erik B_TRUE, 0, ipst, NULL, NULL, NULL); 2724 11042 Erik } else { 2725 11042 Erik ire = ire_multicast(ill); 2726 11042 Erik } 2727 11042 Erik ASSERT(ire != NULL); 2728 11042 Erik /* Normally this will return the IRE_MULTICAST or IRE_BROADCAST */ 2729 11042 Erik if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2730 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2731 11042 Erik ip_drop_input("ip_mroute_decap: RTF_REJECT", mp, ill); 2732 11042 Erik freemsg(mp); 2733 11042 Erik ire_refrele(ire); 2734 11042 Erik return; 2735 11042 Erik } 2736 11042 Erik ire->ire_ib_pkt_count++; 2737 11042 Erik ASSERT(ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST)); 2738 11042 Erik (*ire->ire_recvfn)(ire, mp, ipha_encap, ira); 2739 11042 Erik ire_refrele(ire); 2740 0 stevel } 2741 0 stevel 2742 0 stevel /* 2743 0 stevel * Remove all records with v_ipif == ipif. Called when an interface goes away 2744 0 stevel * (stream closed). Called as writer. 2745 0 stevel */ 2746 0 stevel void 2747 0 stevel reset_mrt_vif_ipif(ipif_t *ipif) 2748 0 stevel { 2749 0 stevel vifi_t vifi, tmp_vifi; 2750 0 stevel vifi_t num_of_vifs; 2751 3448 dh155122 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2752 0 stevel 2753 0 stevel /* Can't check vifi >= 0 since vifi_t is unsigned! */ 2754 0 stevel 2755 3448 dh155122 mutex_enter(&ipst->ips_numvifs_mutex); 2756 3448 dh155122 num_of_vifs = ipst->ips_numvifs; 2757 3448 dh155122 mutex_exit(&ipst->ips_numvifs_mutex); 2758 0 stevel 2759 0 stevel for (vifi = num_of_vifs; vifi != 0; vifi--) { 2760 0 stevel tmp_vifi = vifi - 1; 2761 3448 dh155122 if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) { 2762 11042 Erik (void) del_vif(&tmp_vifi, ipst); 2763 0 stevel } 2764 0 stevel } 2765 0 stevel } 2766 0 stevel 2767 0 stevel /* Remove pending upcall msgs when ill goes away. Called by ill_delete. */ 2768 0 stevel void 2769 0 stevel reset_mrt_ill(ill_t *ill) 2770 0 stevel { 2771 11042 Erik struct mfc *rt; 2772 0 stevel struct rtdetq *rte; 2773 11042 Erik int i; 2774 3448 dh155122 ip_stack_t *ipst = ill->ill_ipst; 2775 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2776 11042 Erik timeout_id_t id; 2777 0 stevel 2778 0 stevel for (i = 0; i < MFCTBLSIZ; i++) { 2779 3448 dh155122 MFCB_REFHOLD(&ipst->ips_mfcs[i]); 2780 3448 dh155122 if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) { 2781 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2782 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, 2783 3448 dh155122 SL_TRACE, 2784 0 stevel "reset_mrt_ill: mfctable [%d]", i); 2785 0 stevel } 2786 0 stevel while (rt != NULL) { 2787 0 stevel mutex_enter(&rt->mfc_mutex); 2788 0 stevel while ((rte = rt->mfc_rte) != NULL) { 2789 11042 Erik if (rte->ill == ill && 2790 11042 Erik (id = rt->mfc_timeout_id) != 0) { 2791 11042 Erik /* 2792 11042 Erik * Its ok to drop the lock, the 2793 11042 Erik * struct cannot be freed since 2794 11042 Erik * we have a ref on the hash 2795 11042 Erik * bucket. 2796 11042 Erik */ 2797 11042 Erik mutex_exit(&rt->mfc_mutex); 2798 11042 Erik (void) untimeout(id); 2799 11042 Erik mutex_enter(&rt->mfc_mutex); 2800 11042 Erik } 2801 0 stevel if (rte->ill == ill) { 2802 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2803 3448 dh155122 (void) mi_strlog( 2804 5240 nordmark mrouter->conn_rq, 2805 3448 dh155122 1, SL_TRACE, 2806 3448 dh155122 "reset_mrt_ill: " 2807 7240 rh87107 "ill 0x%p", (void *)ill); 2808 0 stevel } 2809 0 stevel rt->mfc_rte = rte->rte_next; 2810 0 stevel freemsg(rte->mp); 2811 0 stevel mi_free((char *)rte); 2812 0 stevel } 2813 0 stevel } 2814 0 stevel mutex_exit(&rt->mfc_mutex); 2815 0 stevel rt = rt->mfc_next; 2816 0 stevel } 2817 0 stevel } 2818 3448 dh155122 MFCB_REFRELE(&ipst->ips_mfcs[i]); 2819 0 stevel } 2820 0 stevel } 2821 0 stevel 2822 0 stevel /* 2823 0 stevel * Token bucket filter module. 2824 0 stevel * The ipha is for mcastgrp destination for phyint and encap. 2825 0 stevel */ 2826 0 stevel static void 2827 0 stevel tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha) 2828 0 stevel { 2829 0 stevel size_t p_len = msgdsize(mp); 2830 0 stevel struct tbf *t = vifp->v_tbf; 2831 0 stevel timeout_id_t id = 0; 2832 11042 Erik ill_t *ill = vifp->v_ipif->ipif_ill; 2833 11042 Erik ip_stack_t *ipst = ill->ill_ipst; 2834 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2835 0 stevel 2836 0 stevel /* Drop if packet is too large */ 2837 0 stevel if (p_len > MAX_BKT_SIZE) { 2838 3448 dh155122 ipst->ips_mrtstat->mrts_pkt2large++; 2839 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2840 11042 Erik ip_drop_output("tbf_control - too large", mp, ill); 2841 0 stevel freemsg(mp); 2842 0 stevel return; 2843 0 stevel } 2844 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2845 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2846 0 stevel "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x", 2847 3448 dh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len, 2848 0 stevel ntohl(ipha->ipha_dst)); 2849 0 stevel } 2850 0 stevel 2851 0 stevel mutex_enter(&t->tbf_lock); 2852 0 stevel 2853 0 stevel tbf_update_tokens(vifp); 2854 0 stevel 2855 0 stevel /* 2856 0 stevel * If there are enough tokens, 2857 0 stevel * and the queue is empty, send this packet out. 2858 0 stevel */ 2859 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2860 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2861 0 stevel "tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d", 2862 3448 dh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len, 2863 0 stevel t->tbf_q_len); 2864 0 stevel } 2865 0 stevel /* No packets are queued */ 2866 0 stevel if (t->tbf_q_len == 0) { 2867 0 stevel /* queue empty, send packet if enough tokens */ 2868 0 stevel if (p_len <= t->tbf_n_tok) { 2869 0 stevel t->tbf_n_tok -= p_len; 2870 0 stevel mutex_exit(&t->tbf_lock); 2871 0 stevel tbf_send_packet(vifp, mp); 2872 0 stevel return; 2873 0 stevel } else { 2874 0 stevel /* Queue packet and timeout till later */ 2875 0 stevel tbf_queue(vifp, mp); 2876 0 stevel ASSERT(vifp->v_timeout_id == 0); 2877 0 stevel vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp, 2878 0 stevel TBF_REPROCESS); 2879 0 stevel } 2880 0 stevel } else if (t->tbf_q_len < t->tbf_max_q_len) { 2881 0 stevel /* Finite queue length, so queue pkts and process queue */ 2882 0 stevel tbf_queue(vifp, mp); 2883 0 stevel tbf_process_q(vifp); 2884 0 stevel } else { 2885 0 stevel /* Check that we have UDP header with IP header */ 2886 0 stevel size_t hdr_length = IPH_HDR_LENGTH(ipha) + 2887 5240 nordmark sizeof (struct udphdr); 2888 0 stevel 2889 0 stevel if ((mp->b_wptr - mp->b_rptr) < hdr_length) { 2890 0 stevel if (!pullupmsg(mp, hdr_length)) { 2891 11042 Erik BUMP_MIB(ill->ill_ip_mib, 2892 11042 Erik ipIfStatsOutDiscards); 2893 11042 Erik ip_drop_output("tbf_control - pullup", mp, ill); 2894 0 stevel freemsg(mp); 2895 0 stevel ip1dbg(("tbf_ctl: couldn't pullup udp hdr, " 2896 0 stevel "vif %ld src 0x%x dst 0x%x\n", 2897 3448 dh155122 (ptrdiff_t)(vifp - ipst->ips_vifs), 2898 0 stevel ntohl(ipha->ipha_src), 2899 0 stevel ntohl(ipha->ipha_dst))); 2900 0 stevel mutex_exit(&vifp->v_tbf->tbf_lock); 2901 0 stevel return; 2902 0 stevel } else 2903 0 stevel /* Have to reassign ipha after pullupmsg */ 2904 0 stevel ipha = (ipha_t *)mp->b_rptr; 2905 0 stevel } 2906 0 stevel /* 2907 0 stevel * Queue length too much, 2908 0 stevel * try to selectively dq, or queue and process 2909 0 stevel */ 2910 0 stevel if (!tbf_dq_sel(vifp, ipha)) { 2911 3448 dh155122 ipst->ips_mrtstat->mrts_q_overflow++; 2912 11042 Erik BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2913 11042 Erik ip_drop_output("mrts_q_overflow", mp, ill); 2914 0 stevel freemsg(mp); 2915 0 stevel } else { 2916 0 stevel tbf_queue(vifp, mp); 2917 0 stevel tbf_process_q(vifp); 2918 0 stevel } 2919 0 stevel } 2920 0 stevel if (t->tbf_q_len == 0) { 2921 0 stevel id = vifp->v_timeout_id; 2922 0 stevel vifp->v_timeout_id = 0; 2923 0 stevel } 2924 0 stevel mutex_exit(&vifp->v_tbf->tbf_lock); 2925 0 stevel if (id != 0) 2926 0 stevel (void) untimeout(id); 2927 0 stevel } 2928 0 stevel 2929 0 stevel /* 2930 0 stevel * Adds a packet to the tbf queue at the interface. 2931 0 stevel * The ipha is for mcastgrp destination for phyint and encap. 2932 0 stevel */ 2933 0 stevel static void 2934 0 stevel tbf_queue(struct vif *vifp, mblk_t *mp) 2935 0 stevel { 2936 0 stevel struct tbf *t = vifp->v_tbf; 2937 3448 dh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 2938 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2939 0 stevel 2940 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) { 2941 5240 nordmark (void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE, 2942 3448 dh155122 "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs)); 2943 0 stevel } 2944 0 stevel ASSERT(MUTEX_HELD(&t->tbf_lock)); 2945 0 stevel 2946 0 stevel if (t->tbf_t == NULL) { 2947 0 stevel /* Queue was empty */ 2948 0 stevel t->tbf_q = mp; 2949 0 stevel } else { 2950 0 stevel /* Insert at tail */ 2951 0 stevel t->tbf_t->b_next = mp; 2952 0 stevel } 2953 0 stevel /* set new tail pointer */ 2954 0 stevel t->tbf_t = mp; 2955 0 stevel 2956 0 stevel mp->b_next = mp->b_prev = NULL; 2957 0 stevel 2958 0 stevel t->tbf_q_len++; 2959 0 stevel } 2960 0 stevel 2961 0 stevel /* 2962 0 stevel * Process the queue at the vif interface. 2963 0 stevel * Drops the tbf_lock when sending packets. 2964 0 stevel * 2965 0 stevel * NOTE : The caller should quntimeout if the queue length is 0. 2966 0 stevel */ 2967 0 stevel static void 2968 0 stevel tbf_process_q(struct vif *vifp) 2969 0 stevel { 2970 0 stevel mblk_t *mp; 2971 0 stevel struct tbf *t = vifp->v_tbf; 2972 0 stevel size_t len; 2973 3448 dh155122 ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst; 2974 5240 nordmark conn_t *mrouter = ipst->ips_ip_g_mrouter; 2975 0 stevel 2976 3448 dh155122 if (ipst->ips_ip_mrtdebug > 1) {