Home | History | Annotate | Download | only in ip
      1      0    stevel /*
      2      0    stevel  * CDDL HEADER START
      3      0    stevel  *
      4      0    stevel  * The contents of this file are subject to the terms of the
      5   1676       jpk  * Common Development and Distribution License (the "License").
      6   1676       jpk  * You may not use this file except in compliance with the License.
      7      0    stevel  *
      8      0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0    stevel  * or http://www.opensolaris.org/os/licensing.
     10      0    stevel  * See the License for the specific language governing permissions
     11      0    stevel  * and limitations under the License.
     12      0    stevel  *
     13      0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0    stevel  *
     19      0    stevel  * CDDL HEADER END
     20      0    stevel  */
     21      0    stevel /*
     22  11042      Erik  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  11042      Erik  * Use is subject to license terms.
     24      0    stevel  */
     25      0    stevel /* Copyright (c) 1990 Mentat Inc. */
     26      0    stevel 
     27      0    stevel /*
     28      0    stevel  * Procedures for the kernel part of DVMRP,
     29      0    stevel  * a Distance-Vector Multicast Routing Protocol.
     30      0    stevel  * (See RFC-1075)
     31      0    stevel  * Written by David Waitzman, BBN Labs, August 1988.
     32      0    stevel  * Modified by Steve Deering, Stanford, February 1989.
     33      0    stevel  * Modified by Mark J. Steiglitz, Stanford, May, 1991
     34      0    stevel  * Modified by Van Jacobson, LBL, January 1993
     35      0    stevel  * Modified by Ajit Thyagarajan, PARC, August 1993
     36      0    stevel  * Modified by Bill Fenner, PARC, April 1995
     37      0    stevel  *
     38      0    stevel  * MROUTING 3.5
     39      0    stevel  */
     40      0    stevel 
     41      0    stevel /*
     42      0    stevel  * TODO
     43      0    stevel  * - function pointer field in vif, void *vif_sendit()
     44      0    stevel  */
     45      0    stevel 
     46      0    stevel #include <sys/types.h>
     47      0    stevel #include <sys/stream.h>
     48      0    stevel #include <sys/stropts.h>
     49      0    stevel #include <sys/strlog.h>
     50      0    stevel #include <sys/systm.h>
     51      0    stevel #include <sys/ddi.h>
     52      0    stevel #include <sys/cmn_err.h>
     53      0    stevel #include <sys/zone.h>
     54      0    stevel 
     55      0    stevel #include <sys/param.h>
     56      0    stevel #include <sys/socket.h>
     57      0    stevel #include <sys/vtrace.h>
     58      0    stevel #include <sys/debug.h>
     59      0    stevel #include <net/if.h>
     60      0    stevel #include <sys/sockio.h>
     61      0    stevel #include <netinet/in.h>
     62      0    stevel #include <net/if_dl.h>
     63      0    stevel 
     64  11042      Erik #include <inet/ipsec_impl.h>
     65      0    stevel #include <inet/common.h>
     66      0    stevel #include <inet/mi.h>
     67      0    stevel #include <inet/nd.h>
     68      0    stevel #include <inet/mib2.h>
     69      0    stevel #include <netinet/ip6.h>
     70      0    stevel #include <inet/ip.h>
     71      0    stevel #include <inet/snmpcom.h>
     72      0    stevel 
     73      0    stevel #include <netinet/igmp.h>
     74      0    stevel #include <netinet/igmp_var.h>
     75      0    stevel #include <netinet/udp.h>
     76      0    stevel #include <netinet/ip_mroute.h>
     77      0    stevel #include <inet/ip_multi.h>
     78      0    stevel #include <inet/ip_ire.h>
     79  11042      Erik #include <inet/ip_ndp.h>
     80      0    stevel #include <inet/ip_if.h>
     81      0    stevel #include <inet/ipclassifier.h>
     82      0    stevel 
     83      0    stevel #include <netinet/pim.h>
     84      0    stevel 
     85      0    stevel 
     86      0    stevel /*
     87      0    stevel  * MT Design:
     88      0    stevel  *
     89      0    stevel  * There are three main data structures viftable, mfctable and tbftable that
     90      0    stevel  * need to be protected against MT races.
     91      0    stevel  *
     92      0    stevel  * vitable is a fixed length array of vif structs. There is no lock to protect
     93      0    stevel  * the whole array, instead each struct is protected by its own indiviual lock.
     94      0    stevel  * The value of v_marks in conjuction with the value of v_refcnt determines the
     95      0    stevel  * current state of a vif structure. One special state that needs mention
     96      0    stevel  * is when the vif is marked VIF_MARK_NOTINUSE but refcnt != 0. This indicates
     97      0    stevel  * that vif is being initalized.
     98      0    stevel  * Each structure is freed when the refcnt goes down to zero. If a delete comes
     99  11042      Erik  * in when the recfnt is > 1, the vif structure is marked VIF_MARK_CONDEMNED
    100      0    stevel  * which prevents the struct from further use.  When the refcnt goes to zero
    101      0    stevel  * the struct is freed and is marked VIF_MARK_NOTINUSE.
    102      0    stevel  * vif struct stores a pointer to the ipif in v_ipif, to prevent ipif/ill
    103      0    stevel  * from  going away a refhold is put on the ipif before using it. see
    104      0    stevel  * lock_good_vif() and unlock_good_vif().
    105      0    stevel  *
    106      0    stevel  * VIF_REFHOLD and VIF_REFRELE macros have been provided to manipulate refcnts
    107      0    stevel  * of the vif struct.
    108      0    stevel  *
    109      0    stevel  * tbftable is also a fixed length array of tbf structs and is only accessed
    110      0    stevel  * via v_tbf.  It is protected by its own lock tbf_lock.
    111      0    stevel  *
    112      0    stevel  * Lock Ordering is
    113      0    stevel  * v_lock --> tbf_lock
    114      0    stevel  * v_lock --> ill_locK
    115      0    stevel  *
    116      0    stevel  * mfctable is a fixed size hash table of mfc buckets strcuts (struct mfcb).
    117      0    stevel  * Each mfc bucket struct (struct mfcb) maintains a refcnt for each walker,
    118      0    stevel  * it also maintains a state. These fields are protected by a lock (mfcb_lock).
    119      0    stevel  * mfc structs only maintain a state and have no refcnt. mfc_mutex is used to
    120      0    stevel  * protect the struct elements.
    121      0    stevel  *
    122      0    stevel  * mfc structs are dynamically allocated and are singly linked
    123      0    stevel  * at the head of the chain. When an mfc structure is to be deleted
    124      0    stevel  * it is marked condemned and so is the state in the bucket struct.
    125      0    stevel  * When the last walker of the hash bucket exits all the mfc structs
    126      0    stevel  * marked condemed are freed.
    127      0    stevel  *
    128      0    stevel  * Locking Hierarchy:
    129      0    stevel  * The bucket lock should be acquired before the mfc struct lock.
    130      0    stevel  * MFCB_REFHOLD and MFCB_REFRELE macros are provided for locking
    131      0    stevel  * operations on the bucket struct.
    132      0    stevel  *
    133      0    stevel  * last_encap_lock and numvifs_mutex should be acquired after
    134      0    stevel  * acquring vif or mfc locks. These locks protect some global variables.
    135      0    stevel  *
    136      0    stevel  * The statistics are not currently protected by a lock
    137      0    stevel  * causing the stats be be approximate, not exact.
    138      0    stevel  */
    139      0    stevel 
    140      0    stevel #define	NO_VIF	MAXVIFS 	/* from mrouted, no route for src */
    141      0    stevel 
    142      0    stevel /*
    143      0    stevel  * Timeouts:
    144      0    stevel  * 	Upcall timeouts - BSD uses boolean_t mfc->expire and
    145      0    stevel  *	nexpire[MFCTBLSIZE], the number of times expire has been called.
    146      0    stevel  *	SunOS 5.x uses mfc->timeout for each mfc.
    147      0    stevel  *	Some Unixes are limited in the number of simultaneous timeouts
    148      0    stevel  * 	that can be run, SunOS 5.x does not have this restriction.
    149      0    stevel  */
    150      0    stevel 
    151      0    stevel /*
    152      0    stevel  * In BSD, EXPIRE_TIMEOUT is how often expire_upcalls() is called and
    153      0    stevel  * UPCALL_EXPIRE is the nmber of timeouts before a particular upcall
    154      0    stevel  * expires. Thus the time till expiration is EXPIRE_TIMEOUT * UPCALL_EXPIRE
    155      0    stevel  */
    156      0    stevel #define		EXPIRE_TIMEOUT	(hz/4)	/* 4x / second	*/
    157      0    stevel #define		UPCALL_EXPIRE	6	/* number of timeouts	*/
    158      0    stevel 
    159      0    stevel /*
    160      0    stevel  * Hash function for a source, group entry
    161      0    stevel  */
    162      0    stevel #define	MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
    163      0    stevel 	((g) >> 20) ^ ((g) >> 10) ^ (g))
    164      0    stevel 
    165      0    stevel #define			TBF_REPROCESS	(hz / 100)	/* 100x /second	*/
    166      0    stevel 
    167      0    stevel /* Identify PIM packet that came on a Register interface */
    168      0    stevel #define	PIM_REGISTER_MARKER	0xffffffff
    169      0    stevel 
    170      0    stevel /* Function declarations */
    171   3448  dh155122 static int	add_mfc(struct mfcctl *, ip_stack_t *);
    172  11042      Erik static int	add_vif(struct vifctl *, conn_t *, ip_stack_t *);
    173   3448  dh155122 static int	del_mfc(struct mfcctl *, ip_stack_t *);
    174  11042      Erik static int	del_vif(vifi_t *, ip_stack_t *);
    175      0    stevel static void	del_vifp(struct vif *);
    176      0    stevel static void	encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
    177      0    stevel static void	expire_upcalls(void *);
    178   3448  dh155122 static void	fill_route(struct mfc *, struct mfcctl *, ip_stack_t *);
    179   3448  dh155122 static void	free_queue(struct mfc *);
    180   3448  dh155122 static int	get_assert(uchar_t *, ip_stack_t *);
    181   3448  dh155122 static int	get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *);
    182   3448  dh155122 static int	get_sg_cnt(struct sioc_sg_req *, ip_stack_t *);
    183      0    stevel static int	get_version(uchar_t *);
    184   3448  dh155122 static int	get_vif_cnt(struct sioc_vif_req *, ip_stack_t *);
    185      0    stevel static int	ip_mdq(mblk_t *, ipha_t *, ill_t *,
    186      0    stevel 		    ipaddr_t, struct mfc *);
    187   5240  nordmark static int	ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *);
    188      0    stevel static void	phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
    189  11042      Erik static int	register_mforward(mblk_t *, ip_recv_attr_t *);
    190      0    stevel static void	register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
    191   3448  dh155122 static int	set_assert(int *, ip_stack_t *);
    192      0    stevel 
    193      0    stevel /*
    194      0    stevel  * Token Bucket Filter functions
    195      0    stevel  */
    196      0    stevel static int  priority(struct vif *, ipha_t *);
    197      0    stevel static void tbf_control(struct vif *, mblk_t *, ipha_t *);
    198      0    stevel static int  tbf_dq_sel(struct vif *, ipha_t *);
    199      0    stevel static void tbf_process_q(struct vif *);
    200      0    stevel static void tbf_queue(struct vif *, mblk_t *);
    201      0    stevel static void tbf_reprocess_q(void *);
    202      0    stevel static void tbf_send_packet(struct vif *, mblk_t *);
    203      0    stevel static void tbf_update_tokens(struct vif *);
    204      0    stevel static void release_mfc(struct mfcb *);
    205      0    stevel 
    206   3448  dh155122 static boolean_t is_mrouter_off(ip_stack_t *);
    207      0    stevel /*
    208      0    stevel  * Encapsulation packets
    209      0    stevel  */
    210      0    stevel 
    211      0    stevel #define	ENCAP_TTL	64
    212      0    stevel 
    213      0    stevel /* prototype IP hdr for encapsulated packets */
    214      0    stevel static ipha_t multicast_encap_iphdr = {
    215      0    stevel 	IP_SIMPLE_HDR_VERSION,
    216      0    stevel 	0,				/* tos */
    217      0    stevel 	sizeof (ipha_t),		/* total length */
    218      0    stevel 	0,				/* id */
    219      0    stevel 	0,				/* frag offset */
    220      0    stevel 	ENCAP_TTL, IPPROTO_ENCAP,
    221      0    stevel 	0,				/* checksum */
    222      0    stevel };
    223      0    stevel 
    224      0    stevel /*
    225      0    stevel  * Rate limit for assert notification messages, in nsec.
    226      0    stevel  */
    227      0    stevel #define	ASSERT_MSG_TIME		3000000000
    228      0    stevel 
    229      0    stevel 
    230      0    stevel #define	VIF_REFHOLD(vifp) {			\
    231      0    stevel 	mutex_enter(&(vifp)->v_lock);		\
    232      0    stevel 	(vifp)->v_refcnt++;			\
    233      0    stevel 	mutex_exit(&(vifp)->v_lock);		\
    234      0    stevel }
    235      0    stevel 
    236      0    stevel #define	VIF_REFRELE_LOCKED(vifp) {				\
    237      0    stevel 	(vifp)->v_refcnt--;					\
    238      0    stevel 	if ((vifp)->v_refcnt == 0 &&				\
    239      0    stevel 		((vifp)->v_marks & VIF_MARK_CONDEMNED)) {	\
    240      0    stevel 			del_vifp(vifp);				\
    241      0    stevel 	} else {						\
    242      0    stevel 		mutex_exit(&(vifp)->v_lock);			\
    243      0    stevel 	}							\
    244      0    stevel }
    245      0    stevel 
    246      0    stevel #define	VIF_REFRELE(vifp) {					\
    247      0    stevel 	mutex_enter(&(vifp)->v_lock);				\
    248      0    stevel 	(vifp)->v_refcnt--;					\
    249      0    stevel 	if ((vifp)->v_refcnt == 0 &&				\
    250      0    stevel 		((vifp)->v_marks & VIF_MARK_CONDEMNED)) {	\
    251      0    stevel 			del_vifp(vifp);				\
    252      0    stevel 	} else {						\
    253      0    stevel 		mutex_exit(&(vifp)->v_lock);			\
    254      0    stevel 	}							\
    255      0    stevel }
    256      0    stevel 
    257      0    stevel #define	MFCB_REFHOLD(mfcb) {				\
    258      0    stevel 	mutex_enter(&(mfcb)->mfcb_lock);		\
    259      0    stevel 	(mfcb)->mfcb_refcnt++;				\
    260      0    stevel 	ASSERT((mfcb)->mfcb_refcnt != 0);		\
    261      0    stevel 	mutex_exit(&(mfcb)->mfcb_lock);			\
    262      0    stevel }
    263      0    stevel 
    264      0    stevel #define	MFCB_REFRELE(mfcb) {					\
    265      0    stevel 	mutex_enter(&(mfcb)->mfcb_lock);			\
    266      0    stevel 	ASSERT((mfcb)->mfcb_refcnt != 0);			\
    267      0    stevel 	if (--(mfcb)->mfcb_refcnt == 0 &&			\
    268      0    stevel 		((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) {	\
    269      0    stevel 			release_mfc(mfcb);			\
    270      0    stevel 	}							\
    271      0    stevel 	mutex_exit(&(mfcb)->mfcb_lock);				\
    272      0    stevel }
    273      0    stevel 
    274      0    stevel /*
    275      0    stevel  * MFCFIND:
    276      0    stevel  * Find a route for a given origin IP address and multicast group address.
    277      0    stevel  * Skip entries with pending upcalls.
    278      0    stevel  * Type of service parameter to be added in the future!
    279      0    stevel  */
    280      0    stevel #define	MFCFIND(mfcbp, o, g, rt) { \
    281      0    stevel 	struct mfc *_mb_rt = NULL; \
    282      0    stevel 	rt = NULL; \
    283      0    stevel 	_mb_rt = mfcbp->mfcb_mfc; \
    284      0    stevel 	while (_mb_rt) { \
    285      0    stevel 		if ((_mb_rt->mfc_origin.s_addr == o) && \
    286      0    stevel 		    (_mb_rt->mfc_mcastgrp.s_addr == g) && \
    287      0    stevel 		    (_mb_rt->mfc_rte == NULL) && \
    288      0    stevel 		    (!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) {        \
    289      0    stevel 		    rt = _mb_rt; \
    290      0    stevel 		    break; \
    291      0    stevel 		} \
    292      0    stevel 	_mb_rt = _mb_rt->mfc_next; \
    293      0    stevel 	} \
    294      0    stevel }
    295      0    stevel 
    296      0    stevel /*
    297      0    stevel  * BSD uses timeval with sec and usec. In SunOS 5.x uniqtime() and gethrtime()
    298      0    stevel  * are inefficient. We use gethrestime() which returns a timespec_t with
    299      0    stevel  * sec and nsec, the resolution is machine dependent.
    300      0    stevel  * The following 2 macros have been changed to use nsec instead of usec.
    301      0    stevel  */
    302      0    stevel /*
    303      0    stevel  * Macros to compute elapsed time efficiently.
    304      0    stevel  * Borrowed from Van Jacobson's scheduling code.
    305      0    stevel  * Delta should be a hrtime_t.
    306      0    stevel  */
    307      0    stevel #define	TV_DELTA(a, b, delta) { \
    308      0    stevel 	int xxs; \
    309      0    stevel  \
    310      0    stevel 	delta = (a).tv_nsec - (b).tv_nsec; \
    311      0    stevel 	if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
    312      0    stevel 		switch (xxs) { \
    313      0    stevel 		case 2: \
    314      0    stevel 		    delta += 1000000000; \
    315      0    stevel 		    /*FALLTHROUGH*/ \
    316      0    stevel 		case 1: \
    317      0    stevel 		    delta += 1000000000; \
    318      0    stevel 		    break; \
    319      0    stevel 		default: \
    320      0    stevel 		    delta += (1000000000 * xxs); \
    321      0    stevel 		} \
    322      0    stevel 	} \
    323      0    stevel }
    324      0    stevel 
    325      0    stevel #define	TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \
    326      0    stevel 	(a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
    327      0    stevel 
    328      0    stevel /*
    329      0    stevel  * Handle MRT setsockopt commands to modify the multicast routing tables.
    330      0    stevel  */
    331      0    stevel int
    332  11042      Erik ip_mrouter_set(int cmd, conn_t *connp, int checkonly, uchar_t *data,
    333  11042      Erik     int datalen)
    334      0    stevel {
    335   5240  nordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
    336   3448  dh155122 
    337   3448  dh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
    338   5240  nordmark 	if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) {
    339   3448  dh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    340      0    stevel 		return (EACCES);
    341      0    stevel 	}
    342   3448  dh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    343      0    stevel 
    344      0    stevel 	if (checkonly) {
    345      0    stevel 		/*
    346      0    stevel 		 * do not do operation, just pretend to - new T_CHECK
    347      0    stevel 		 * Note: Even routines further on can probably fail but
    348      0    stevel 		 * this T_CHECK stuff is only to please XTI so it not
    349      0    stevel 		 * necessary to be perfect.
    350      0    stevel 		 */
    351      0    stevel 		switch (cmd) {
    352      0    stevel 		case MRT_INIT:
    353      0    stevel 		case MRT_DONE:
    354      0    stevel 		case MRT_ADD_VIF:
    355      0    stevel 		case MRT_DEL_VIF:
    356      0    stevel 		case MRT_ADD_MFC:
    357      0    stevel 		case MRT_DEL_MFC:
    358      0    stevel 		case MRT_ASSERT:
    359   5240  nordmark 			return (0);
    360      0    stevel 		default:
    361   5240  nordmark 			return (EOPNOTSUPP);
    362      0    stevel 		}
    363      0    stevel 	}
    364      0    stevel 
    365      0    stevel 	/*
    366      0    stevel 	 * make sure no command is issued after multicast routing has been
    367      0    stevel 	 * turned off.
    368      0    stevel 	 */
    369      0    stevel 	if (cmd != MRT_INIT && cmd != MRT_DONE) {
    370   3448  dh155122 		if (is_mrouter_off(ipst))
    371      0    stevel 			return (EINVAL);
    372      0    stevel 	}
    373      0    stevel 
    374      0    stevel 	switch (cmd) {
    375   5240  nordmark 	case MRT_INIT:	return (ip_mrouter_init(connp, data, datalen, ipst));
    376  11042      Erik 	case MRT_DONE:	return (ip_mrouter_done(ipst));
    377  11042      Erik 	case MRT_ADD_VIF:  return (add_vif((struct vifctl *)data, connp, ipst));
    378  11042      Erik 	case MRT_DEL_VIF:  return (del_vif((vifi_t *)data, ipst));
    379   3448  dh155122 	case MRT_ADD_MFC:  return (add_mfc((struct mfcctl *)data, ipst));
    380   3448  dh155122 	case MRT_DEL_MFC:  return (del_mfc((struct mfcctl *)data, ipst));
    381   3448  dh155122 	case MRT_ASSERT:   return (set_assert((int *)data, ipst));
    382      0    stevel 	default:	   return (EOPNOTSUPP);
    383      0    stevel 	}
    384      0    stevel }
    385      0    stevel 
    386      0    stevel /*
    387      0    stevel  * Handle MRT getsockopt commands
    388      0    stevel  */
    389      0    stevel int
    390  11042      Erik ip_mrouter_get(int cmd, conn_t *connp, uchar_t *data)
    391      0    stevel {
    392   5240  nordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
    393   3448  dh155122 
    394   5240  nordmark 	if (connp != ipst->ips_ip_g_mrouter)
    395      0    stevel 		return (EACCES);
    396      0    stevel 
    397      0    stevel 	switch (cmd) {
    398      0    stevel 	case MRT_VERSION:	return (get_version((uchar_t *)data));
    399   3448  dh155122 	case MRT_ASSERT:	return (get_assert((uchar_t *)data, ipst));
    400      0    stevel 	default:		return (EOPNOTSUPP);
    401      0    stevel 	}
    402      0    stevel }
    403      0    stevel 
    404      0    stevel /*
    405      0    stevel  * Handle ioctl commands to obtain information from the cache.
    406      0    stevel  * Called with shared access to IP. These are read_only ioctls.
    407      0    stevel  */
    408      0    stevel /* ARGSUSED */
    409      0    stevel int
    410      0    stevel mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
    411      0    stevel     ip_ioctl_cmd_t *ipip, void *if_req)
    412      0    stevel {
    413      0    stevel 	mblk_t	*mp1;
    414      0    stevel 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
    415   5240  nordmark 	conn_t		*connp = Q_TO_CONN(q);
    416   5240  nordmark 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
    417      0    stevel 
    418      0    stevel 	/* Existence verified in ip_wput_nondata */
    419      0    stevel 	mp1 = mp->b_cont->b_cont;
    420      0    stevel 
    421      0    stevel 	switch (iocp->ioc_cmd) {
    422      0    stevel 	case (SIOCGETVIFCNT):
    423   3448  dh155122 		return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst));
    424      0    stevel 	case (SIOCGETSGCNT):
    425   3448  dh155122 		return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst));
    426      0    stevel 	case (SIOCGETLSGCNT):
    427   3448  dh155122 		return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst));
    428      0    stevel 	default:
    429      0    stevel 		return (EINVAL);
    430      0    stevel 	}
    431      0    stevel }
    432      0    stevel 
    433      0    stevel /*
    434      0    stevel  * Returns the packet, byte, rpf-failure count for the source, group provided.
    435      0    stevel  */
    436      0    stevel static int
    437   3448  dh155122 get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst)
    438      0    stevel {
    439      0    stevel 	struct mfc *rt;
    440      0    stevel 	struct mfcb *mfcbp;
    441      0    stevel 
    442   3448  dh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)];
    443      0    stevel 	MFCB_REFHOLD(mfcbp);
    444      0    stevel 	MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt);
    445      0    stevel 
    446      0    stevel 	if (rt != NULL) {
    447      0    stevel 		mutex_enter(&rt->mfc_mutex);
    448      0    stevel 		req->pktcnt   = rt->mfc_pkt_cnt;
    449      0    stevel 		req->bytecnt  = rt->mfc_byte_cnt;
    450      0    stevel 		req->wrong_if = rt->mfc_wrong_if;
    451      0    stevel 		mutex_exit(&rt->mfc_mutex);
    452      0    stevel 	} else
    453      0    stevel 		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU;
    454      0    stevel 
    455      0    stevel 	MFCB_REFRELE(mfcbp);
    456      0    stevel 	return (0);
    457      0    stevel }
    458      0    stevel 
    459      0    stevel /*
    460      0    stevel  * Returns the packet, byte, rpf-failure count for the source, group provided.
    461      0    stevel  * Uses larger counters and IPv6 addresses.
    462      0    stevel  */
    463      0    stevel /* ARGSUSED XXX until implemented */
    464      0    stevel static int
    465   3448  dh155122 get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst)
    466      0    stevel {
    467      0    stevel 	/* XXX TODO SIOCGETLSGCNT */
    468      0    stevel 	return (ENXIO);
    469      0    stevel }
    470      0    stevel 
    471      0    stevel /*
    472      0    stevel  * Returns the input and output packet and byte counts on the vif provided.
    473      0    stevel  */
    474      0    stevel static int
    475   3448  dh155122 get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst)
    476      0    stevel {
    477      0    stevel 	vifi_t vifi = req->vifi;
    478      0    stevel 
    479   3448  dh155122 	if (vifi >= ipst->ips_numvifs)
    480      0    stevel 		return (EINVAL);
    481      0    stevel 
    482      0    stevel 	/*
    483      0    stevel 	 * No locks here, an approximation is fine.
    484      0    stevel 	 */
    485   3448  dh155122 	req->icount = ipst->ips_vifs[vifi].v_pkt_in;
    486   3448  dh155122 	req->ocount = ipst->ips_vifs[vifi].v_pkt_out;
    487   3448  dh155122 	req->ibytes = ipst->ips_vifs[vifi].v_bytes_in;
    488   3448  dh155122 	req->obytes = ipst->ips_vifs[vifi].v_bytes_out;
    489      0    stevel 
    490      0    stevel 	return (0);
    491      0    stevel }
    492      0    stevel 
    493      0    stevel static int
    494      0    stevel get_version(uchar_t *data)
    495      0    stevel {
    496      0    stevel 	int *v = (int *)data;
    497      0    stevel 
    498      0    stevel 	*v = 0x0305;	/* XXX !!!! */
    499      0    stevel 
    500      0    stevel 	return (0);
    501      0    stevel }
    502      0    stevel 
    503      0    stevel /*
    504      0    stevel  * Set PIM assert processing global.
    505      0    stevel  */
    506      0    stevel static int
    507   3448  dh155122 set_assert(int *i, ip_stack_t *ipst)
    508      0    stevel {
    509      0    stevel 	if ((*i != 1) && (*i != 0))
    510      0    stevel 		return (EINVAL);
    511      0    stevel 
    512   3448  dh155122 	ipst->ips_pim_assert = *i;
    513      0    stevel 
    514      0    stevel 	return (0);
    515      0    stevel }
    516      0    stevel 
    517      0    stevel /*
    518      0    stevel  * Get PIM assert processing global.
    519      0    stevel  */
    520      0    stevel static int
    521   3448  dh155122 get_assert(uchar_t *data, ip_stack_t *ipst)
    522      0    stevel {
    523      0    stevel 	int *i = (int *)data;
    524      0    stevel 
    525   3448  dh155122 	*i = ipst->ips_pim_assert;
    526      0    stevel 
    527      0    stevel 	return (0);
    528      0    stevel }
    529      0    stevel 
    530      0    stevel /*
    531      0    stevel  * Enable multicast routing.
    532      0    stevel  */
    533      0    stevel static int
    534   5240  nordmark ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst)
    535      0    stevel {
    536      0    stevel 	int	*v;
    537      0    stevel 
    538      0    stevel 	if (data == NULL || (datalen != sizeof (int)))
    539      0    stevel 		return (ENOPROTOOPT);
    540      0    stevel 
    541      0    stevel 	v = (int *)data;
    542      0    stevel 	if (*v != 1)
    543      0    stevel 		return (ENOPROTOOPT);
    544      0    stevel 
    545   3448  dh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
    546   3448  dh155122 	if (ipst->ips_ip_g_mrouter != NULL) {
    547   3448  dh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    548      0    stevel 		return (EADDRINUSE);
    549      0    stevel 	}
    550      0    stevel 
    551   5240  nordmark 	/*
    552   5240  nordmark 	 * MRT_INIT should only be allowed for RAW sockets, but we double
    553   5240  nordmark 	 * check.
    554   5240  nordmark 	 */
    555   5240  nordmark 	if (!IPCL_IS_RAWIP(connp)) {
    556   5240  nordmark 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    557   5240  nordmark 		return (EINVAL);
    558   5240  nordmark 	}
    559   5240  nordmark 
    560   5240  nordmark 	ipst->ips_ip_g_mrouter = connp;
    561      0    stevel 	connp->conn_multi_router = 1;
    562      0    stevel 	/* In order for tunnels to work we have to turn ip_g_forward on */
    563   3448  dh155122 	if (!WE_ARE_FORWARDING(ipst)) {
    564   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
    565   5240  nordmark 			(void) mi_strlog(connp->conn_rq, 1, SL_TRACE,
    566      0    stevel 			    "ip_mrouter_init: turning on forwarding");
    567      0    stevel 		}
    568   3448  dh155122 		ipst->ips_saved_ip_g_forward = ipst->ips_ip_g_forward;
    569   3448  dh155122 		ipst->ips_ip_g_forward = IP_FORWARD_ALWAYS;
    570      0    stevel 	}
    571      0    stevel 
    572   3448  dh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    573      0    stevel 	return (0);
    574   3448  dh155122 }
    575   3448  dh155122 
    576   3448  dh155122 void
    577   3448  dh155122 ip_mrouter_stack_init(ip_stack_t *ipst)
    578   3448  dh155122 {
    579   3448  dh155122 	mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL);
    580   3448  dh155122 
    581   3448  dh155122 	ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1),
    582   3448  dh155122 	    KM_SLEEP);
    583   3448  dh155122 	ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP);
    584   3448  dh155122 	/*
    585   3448  dh155122 	 * mfctable:
    586   3448  dh155122 	 * Includes all mfcs, including waiting upcalls.
    587   3448  dh155122 	 * Multiple mfcs per bucket.
    588   3448  dh155122 	 */
    589   3448  dh155122 	ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ,
    590   3448  dh155122 	    KM_SLEEP);
    591   3448  dh155122 	/*
    592   3448  dh155122 	 * Define the token bucket filter structures.
    593   3448  dh155122 	 * tbftable -> each vif has one of these for storing info.
    594   3448  dh155122 	 */
    595   3448  dh155122 	ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP);
    596   3448  dh155122 
    597   3448  dh155122 	mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL);
    598   3448  dh155122 
    599   3448  dh155122 	ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
    600   3448  dh155122 	ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
    601      0    stevel }
    602      0    stevel 
    603      0    stevel /*
    604      0    stevel  * Disable multicast routing.
    605      0    stevel  * Didn't use global timeout_val (BSD version), instead check the mfctable.
    606      0    stevel  */
    607      0    stevel int
    608  11042      Erik ip_mrouter_done(ip_stack_t *ipst)
    609      0    stevel {
    610   5240  nordmark 	conn_t		*mrouter;
    611      0    stevel 	vifi_t 		vifi;
    612      0    stevel 	struct mfc	*mfc_rt;
    613      0    stevel 	int		i;
    614      0    stevel 
    615   3448  dh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
    616   3448  dh155122 	if (ipst->ips_ip_g_mrouter == NULL) {
    617   3448  dh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    618      0    stevel 		return (EINVAL);
    619      0    stevel 	}
    620      0    stevel 
    621   5240  nordmark 	mrouter = ipst->ips_ip_g_mrouter;
    622      0    stevel 
    623   3448  dh155122 	if (ipst->ips_saved_ip_g_forward != -1) {
    624   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
    625   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
    626      0    stevel 			    "ip_mrouter_done: turning off forwarding");
    627      0    stevel 		}
    628   3448  dh155122 		ipst->ips_ip_g_forward = ipst->ips_saved_ip_g_forward;
    629   3448  dh155122 		ipst->ips_saved_ip_g_forward = -1;
    630      0    stevel 	}
    631      0    stevel 
    632      0    stevel 	/*
    633      0    stevel 	 * Always clear cache when vifs change.
    634   3448  dh155122 	 * No need to get ipst->ips_last_encap_lock since we are running as
    635   3448  dh155122 	 * a writer.
    636      0    stevel 	 */
    637   3448  dh155122 	mutex_enter(&ipst->ips_last_encap_lock);
    638   3448  dh155122 	ipst->ips_last_encap_src = 0;
    639   3448  dh155122 	ipst->ips_last_encap_vif = NULL;
    640   3448  dh155122 	mutex_exit(&ipst->ips_last_encap_lock);
    641   5240  nordmark 	mrouter->conn_multi_router = 0;
    642      0    stevel 
    643   3448  dh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    644      0    stevel 
    645      0    stevel 	/*
    646      0    stevel 	 * For each phyint in use,
    647      0    stevel 	 * disable promiscuous reception of all IP multicasts.
    648      0    stevel 	 */
    649      0    stevel 	for (vifi = 0; vifi < MAXVIFS; vifi++) {
    650   3448  dh155122 		struct vif *vifp = ipst->ips_vifs + vifi;
    651      0    stevel 
    652      0    stevel 		mutex_enter(&vifp->v_lock);
    653      0    stevel 		/*
    654      0    stevel 		 * if the vif is active mark it condemned.
    655      0    stevel 		 */
    656      0    stevel 		if (vifp->v_marks & VIF_MARK_GOOD) {
    657      0    stevel 			ASSERT(vifp->v_ipif != NULL);
    658      0    stevel 			ipif_refhold(vifp->v_ipif);
    659      0    stevel 			/* Phyint only */
    660      0    stevel 			if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
    661      0    stevel 				ipif_t *ipif = vifp->v_ipif;
    662  11042      Erik 				ilm_t *ilm = vifp->v_ilm;
    663      0    stevel 
    664  11042      Erik 				vifp->v_ilm = NULL;
    665  11042      Erik 				vifp->v_marks &= ~VIF_MARK_GOOD;
    666  11042      Erik 				vifp->v_marks |= VIF_MARK_CONDEMNED;
    667      0    stevel 
    668  11042      Erik 				mutex_exit(&(vifp)->v_lock);
    669  11042      Erik 				if (ilm != NULL) {
    670  11042      Erik 					ill_t *ill = ipif->ipif_ill;
    671  11042      Erik 
    672  11042      Erik 					(void) ip_delmulti(ilm);
    673  11042      Erik 					ASSERT(ill->ill_mrouter_cnt > 0);
    674  11042      Erik 					atomic_dec_32(&ill->ill_mrouter_cnt);
    675      0    stevel 				}
    676      0    stevel 				mutex_enter(&vifp->v_lock);
    677      0    stevel 			}
    678  10495      Erik 			ipif_refrele(vifp->v_ipif);
    679      0    stevel 			/*
    680      0    stevel 			 * decreases the refcnt added in add_vif.
    681      0    stevel 			 * and release v_lock.
    682      0    stevel 			 */
    683      0    stevel 			VIF_REFRELE_LOCKED(vifp);
    684      0    stevel 		} else {
    685      0    stevel 			mutex_exit(&vifp->v_lock);
    686      0    stevel 			continue;
    687      0    stevel 		}
    688      0    stevel 	}
    689      0    stevel 
    690   3448  dh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
    691   3448  dh155122 	ipst->ips_numvifs = 0;
    692   3448  dh155122 	ipst->ips_pim_assert = 0;
    693   3448  dh155122 	ipst->ips_reg_vif_num = ALL_VIFS;
    694   3448  dh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
    695      0    stevel 
    696      0    stevel 	/*
    697      0    stevel 	 * Free upcall msgs.
    698      0    stevel 	 * Go through mfctable and stop any outstanding upcall
    699      0    stevel 	 * timeouts remaining on mfcs.
    700      0    stevel 	 */
    701      0    stevel 	for (i = 0; i < MFCTBLSIZ; i++) {
    702   3448  dh155122 		mutex_enter(&ipst->ips_mfcs[i].mfcb_lock);
    703   3448  dh155122 		ipst->ips_mfcs[i].mfcb_refcnt++;
    704   3448  dh155122 		ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED;
    705   3448  dh155122 		mutex_exit(&ipst->ips_mfcs[i].mfcb_lock);
    706   3448  dh155122 		mfc_rt = ipst->ips_mfcs[i].mfcb_mfc;
    707      0    stevel 		while (mfc_rt) {
    708      0    stevel 			/* Free upcalls */
    709      0    stevel 			mutex_enter(&mfc_rt->mfc_mutex);
    710      0    stevel 			if (mfc_rt->mfc_rte != NULL) {
    711      0    stevel 				if (mfc_rt->mfc_timeout_id != 0) {
    712      0    stevel 					/*
    713      0    stevel 					 * OK to drop the lock as we have
    714      0    stevel 					 * a refcnt on the bucket. timeout
    715      0    stevel 					 * can fire but it will see that
    716      0    stevel 					 * mfc_timeout_id == 0 and not do
    717      0    stevel 					 * anything. see expire_upcalls().
    718      0    stevel 					 */
    719      0    stevel 					mfc_rt->mfc_timeout_id = 0;
    720      0    stevel 					mutex_exit(&mfc_rt->mfc_mutex);
    721      0    stevel 					(void) untimeout(
    722      0    stevel 					    mfc_rt->mfc_timeout_id);
    723      0    stevel 						mfc_rt->mfc_timeout_id = 0;
    724      0    stevel 					mutex_enter(&mfc_rt->mfc_mutex);
    725      0    stevel 
    726      0    stevel 					/*
    727      0    stevel 					 * all queued upcall packets
    728      0    stevel 					 * and mblk will be freed in
    729      0    stevel 					 * release_mfc().
    730      0    stevel 					 */
    731      0    stevel 				}
    732      0    stevel 			}
    733      0    stevel 
    734      0    stevel 			mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
    735      0    stevel 
    736      0    stevel 			mutex_exit(&mfc_rt->mfc_mutex);
    737      0    stevel 			mfc_rt = mfc_rt->mfc_next;
    738      0    stevel 		}
    739   3448  dh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
    740      0    stevel 	}
    741      0    stevel 
    742   3448  dh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
    743   3448  dh155122 	ipst->ips_ip_g_mrouter = NULL;
    744   3448  dh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    745      0    stevel 	return (0);
    746      0    stevel }
    747      0    stevel 
    748   3448  dh155122 void
    749   3448  dh155122 ip_mrouter_stack_destroy(ip_stack_t *ipst)
    750   3448  dh155122 {
    751   3448  dh155122 	struct mfcb *mfcbp;
    752   3448  dh155122 	struct mfc  *rt;
    753   3448  dh155122 	int i;
    754   3448  dh155122 
    755   3448  dh155122 	for (i = 0; i < MFCTBLSIZ; i++) {
    756   3448  dh155122 		mfcbp = &ipst->ips_mfcs[i];
    757   3448  dh155122 
    758   3448  dh155122 		while ((rt = mfcbp->mfcb_mfc) != NULL) {
    759   3448  dh155122 			(void) printf("ip_mrouter_stack_destroy: free for %d\n",
    760   3448  dh155122 			    i);
    761   3448  dh155122 
    762   3448  dh155122 			mfcbp->mfcb_mfc = rt->mfc_next;
    763   3448  dh155122 			free_queue(rt);
    764   3448  dh155122 			mi_free(rt);
    765   3448  dh155122 		}
    766   3448  dh155122 	}
    767   3448  dh155122 	kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1));
    768   3448  dh155122 	ipst->ips_vifs = NULL;
    769   3448  dh155122 	kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat));
    770   3448  dh155122 	ipst->ips_mrtstat = NULL;
    771   3448  dh155122 	kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ);
    772   3448  dh155122 	ipst->ips_mfcs = NULL;
    773   3448  dh155122 	kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS);
    774   3448  dh155122 	ipst->ips_tbfs = NULL;
    775   3448  dh155122 
    776   3448  dh155122 	mutex_destroy(&ipst->ips_last_encap_lock);
    777   3448  dh155122 	mutex_destroy(&ipst->ips_ip_g_mrouter_mutex);
    778   3448  dh155122 }
    779   3448  dh155122 
    780      0    stevel static boolean_t
    781   3448  dh155122 is_mrouter_off(ip_stack_t *ipst)
    782      0    stevel {
    783   5240  nordmark 	conn_t	*mrouter;
    784      0    stevel 
    785   3448  dh155122 	mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
    786   3448  dh155122 	if (ipst->ips_ip_g_mrouter == NULL) {
    787   3448  dh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    788      0    stevel 		return (B_TRUE);
    789      0    stevel 	}
    790      0    stevel 
    791   5240  nordmark 	mrouter = ipst->ips_ip_g_mrouter;
    792   5240  nordmark 	if (mrouter->conn_multi_router == 0) {
    793   3448  dh155122 		mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    794      0    stevel 		return (B_TRUE);
    795      0    stevel 	}
    796   3448  dh155122 	mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
    797      0    stevel 	return (B_FALSE);
    798      0    stevel }
    799      0    stevel 
    800      0    stevel static void
    801      0    stevel unlock_good_vif(struct vif *vifp)
    802      0    stevel {
    803      0    stevel 	ASSERT(vifp->v_ipif != NULL);
    804      0    stevel 	ipif_refrele(vifp->v_ipif);
    805      0    stevel 	VIF_REFRELE(vifp);
    806      0    stevel }
    807      0    stevel 
    808      0    stevel static boolean_t
    809      0    stevel lock_good_vif(struct vif *vifp)
    810      0    stevel {
    811      0    stevel 	mutex_enter(&vifp->v_lock);
    812      0    stevel 	if (!(vifp->v_marks & VIF_MARK_GOOD)) {
    813      0    stevel 		mutex_exit(&vifp->v_lock);
    814      0    stevel 		return (B_FALSE);
    815      0    stevel 	}
    816      0    stevel 
    817      0    stevel 	ASSERT(vifp->v_ipif != NULL);
    818      0    stevel 	mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock);
    819      0    stevel 	if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) {
    820      0    stevel 		mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
    821      0    stevel 		mutex_exit(&vifp->v_lock);
    822      0    stevel 		return (B_FALSE);
    823      0    stevel 	}
    824      0    stevel 	ipif_refhold_locked(vifp->v_ipif);
    825      0    stevel 	mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
    826      0    stevel 	vifp->v_refcnt++;
    827      0    stevel 	mutex_exit(&vifp->v_lock);
    828      0    stevel 	return (B_TRUE);
    829      0    stevel }
    830      0    stevel 
    831      0    stevel /*
    832      0    stevel  * Add a vif to the vif table.
    833      0    stevel  */
    834      0    stevel static int
    835  11042      Erik add_vif(struct vifctl *vifcp, conn_t *connp, ip_stack_t *ipst)
    836      0    stevel {
    837   3448  dh155122 	struct vif	*vifp = ipst->ips_vifs + vifcp->vifc_vifi;
    838      0    stevel 	ipif_t		*ipif;
    839  11042      Erik 	int		error = 0;
    840   3448  dh155122 	struct tbf	*v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi;
    841   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
    842  11042      Erik 	ilm_t		*ilm;
    843  11042      Erik 	ill_t		*ill;
    844      0    stevel 
    845      0    stevel 	ASSERT(connp != NULL);
    846      0    stevel 
    847      0    stevel 	if (vifcp->vifc_vifi >= MAXVIFS)
    848      0    stevel 		return (EINVAL);
    849      0    stevel 
    850   3448  dh155122 	if (is_mrouter_off(ipst))
    851      0    stevel 		return (EINVAL);
    852      0    stevel 
    853      0    stevel 	mutex_enter(&vifp->v_lock);
    854      0    stevel 	/*
    855      0    stevel 	 * Viftable entry should be 0.
    856      0    stevel 	 * if v_marks == 0 but v_refcnt != 0 means struct is being
    857      0    stevel 	 * initialized.
    858      0    stevel 	 *
    859      0    stevel 	 * Also note that it is very unlikely that we will get a MRT_ADD_VIF
    860      0    stevel 	 * request while the delete is in progress, mrouted only sends add
    861      0    stevel 	 * requests when a new interface is added and the new interface cannot
    862      0    stevel 	 * have the same vifi as an existing interface. We make sure that
    863      0    stevel 	 * ill_delete will block till the vif is deleted by adding a refcnt
    864      0    stevel 	 * to ipif in del_vif().
    865      0    stevel 	 */
    866      0    stevel 	if (vifp->v_lcl_addr.s_addr != 0 ||
    867      0    stevel 	    vifp->v_marks != 0 ||
    868      0    stevel 	    vifp->v_refcnt != 0) {
    869      0    stevel 		mutex_exit(&vifp->v_lock);
    870      0    stevel 		return (EADDRINUSE);
    871      0    stevel 	}
    872      0    stevel 
    873      0    stevel 	/* Incoming vif should not be 0 */
    874      0    stevel 	if (vifcp->vifc_lcl_addr.s_addr == 0) {
    875      0    stevel 		mutex_exit(&vifp->v_lock);
    876      0    stevel 		return (EINVAL);
    877      0    stevel 	}
    878      0    stevel 
    879      0    stevel 	vifp->v_refcnt++;
    880      0    stevel 	mutex_exit(&vifp->v_lock);
    881      0    stevel 	/* Find the interface with the local address */
    882      0    stevel 	ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL,
    883  11042      Erik 	    IPCL_ZONEID(connp), ipst);
    884      0    stevel 	if (ipif == NULL) {
    885      0    stevel 		VIF_REFRELE(vifp);
    886      0    stevel 		return (EADDRNOTAVAIL);
    887      0    stevel 	}
    888      0    stevel 
    889   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
    890   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
    891      0    stevel 		    "add_vif: src 0x%x enter",
    892      0    stevel 		    vifcp->vifc_lcl_addr.s_addr);
    893      0    stevel 	}
    894      0    stevel 
    895      0    stevel 	mutex_enter(&vifp->v_lock);
    896      0    stevel 	/*
    897      0    stevel 	 * Always clear cache when vifs change.
    898      0    stevel 	 * Needed to ensure that src isn't left over from before vif was added.
    899      0    stevel 	 * No need to get last_encap_lock, since we are running as a writer.
    900      0    stevel 	 */
    901      0    stevel 
    902   3448  dh155122 	mutex_enter(&ipst->ips_last_encap_lock);
    903   3448  dh155122 	ipst->ips_last_encap_src = 0;
    904   3448  dh155122 	ipst->ips_last_encap_vif = NULL;
    905   3448  dh155122 	mutex_exit(&ipst->ips_last_encap_lock);
    906      0    stevel 
    907      0    stevel 	if (vifcp->vifc_flags & VIFF_TUNNEL) {
    908      0    stevel 		if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) {
    909      0    stevel 			cmn_err(CE_WARN,
    910      0    stevel 			    "add_vif: source route tunnels not supported\n");
    911      0    stevel 			VIF_REFRELE_LOCKED(vifp);
    912      0    stevel 			ipif_refrele(ipif);
    913      0    stevel 			return (EOPNOTSUPP);
    914      0    stevel 		}
    915      0    stevel 		vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
    916      0    stevel 
    917      0    stevel 	} else {
    918      0    stevel 		/* Phyint or Register vif */
    919      0    stevel 		if (vifcp->vifc_flags & VIFF_REGISTER) {
    920      0    stevel 			/*
    921      0    stevel 			 * Note: Since all IPPROTO_IP level options (including
    922      0    stevel 			 * MRT_ADD_VIF) are done exclusively via
    923      0    stevel 			 * ip_optmgmt_writer(), a lock is not necessary to
    924      0    stevel 			 * protect reg_vif_num.
    925      0    stevel 			 */
    926   3448  dh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
    927   3448  dh155122 			if (ipst->ips_reg_vif_num == ALL_VIFS) {
    928   3448  dh155122 				ipst->ips_reg_vif_num = vifcp->vifc_vifi;
    929   3448  dh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
    930      0    stevel 			} else {
    931   3448  dh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
    932      0    stevel 				VIF_REFRELE_LOCKED(vifp);
    933      0    stevel 				ipif_refrele(ipif);
    934      0    stevel 				return (EADDRINUSE);
    935      0    stevel 			}
    936      0    stevel 		}
    937      0    stevel 
    938      0    stevel 		/* Make sure the interface supports multicast */
    939      0    stevel 		if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) {
    940      0    stevel 			VIF_REFRELE_LOCKED(vifp);
    941      0    stevel 			ipif_refrele(ipif);
    942      0    stevel 			if (vifcp->vifc_flags & VIFF_REGISTER) {
    943   3448  dh155122 				mutex_enter(&ipst->ips_numvifs_mutex);
    944   3448  dh155122 				ipst->ips_reg_vif_num = ALL_VIFS;
    945   3448  dh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
    946      0    stevel 			}
    947      0    stevel 			return (EOPNOTSUPP);
    948      0    stevel 		}
    949      0    stevel 		/* Enable promiscuous reception of all IP mcasts from the if */
    950      0    stevel 		mutex_exit(&vifp->v_lock);
    951  11042      Erik 
    952  11042      Erik 		ill = ipif->ipif_ill;
    953  11042      Erik 		if (IS_UNDER_IPMP(ill))
    954  11042      Erik 			ill = ipmp_ill_hold_ipmp_ill(ill);
    955  11042      Erik 
    956  11042      Erik 		if (ill == NULL) {
    957  11042      Erik 			ilm = NULL;
    958  11042      Erik 		} else {
    959  11042      Erik 			ilm = ip_addmulti(&ipv6_all_zeros, ill,
    960  11042      Erik 			    ipif->ipif_zoneid, &error);
    961  11042      Erik 			if (ilm != NULL)
    962  11042      Erik 				atomic_inc_32(&ill->ill_mrouter_cnt);
    963  11042      Erik 			if (IS_UNDER_IPMP(ipif->ipif_ill)) {
    964  11042      Erik 				ill_refrele(ill);
    965  11042      Erik 				ill = ipif->ipif_ill;
    966  11042      Erik 			}
    967  11042      Erik 		}
    968  11042      Erik 
    969      0    stevel 		mutex_enter(&vifp->v_lock);
    970      0    stevel 		/*
    971      0    stevel 		 * since we released the lock lets make sure that
    972      0    stevel 		 * ip_mrouter_done() has not been called.
    973      0    stevel 		 */
    974  11042      Erik 		if (ilm == NULL || is_mrouter_off(ipst)) {
    975  11042      Erik 			if (ilm != NULL) {
    976  11042      Erik 				(void) ip_delmulti(ilm);
    977  11042      Erik 				ASSERT(ill->ill_mrouter_cnt > 0);
    978  11042      Erik 				atomic_dec_32(&ill->ill_mrouter_cnt);
    979  11042      Erik 			}
    980      0    stevel 			if (vifcp->vifc_flags & VIFF_REGISTER) {
    981   3448  dh155122 				mutex_enter(&ipst->ips_numvifs_mutex);
    982   3448  dh155122 				ipst->ips_reg_vif_num = ALL_VIFS;
    983   3448  dh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
    984      0    stevel 			}
    985      0    stevel 			VIF_REFRELE_LOCKED(vifp);
    986      0    stevel 			ipif_refrele(ipif);
    987      0    stevel 			return (error?error:EINVAL);
    988      0    stevel 		}
    989  11042      Erik 		vifp->v_ilm = ilm;
    990      0    stevel 	}
    991      0    stevel 	/* Define parameters for the tbf structure */
    992      0    stevel 	vifp->v_tbf = v_tbf;
    993      0    stevel 	gethrestime(&vifp->v_tbf->tbf_last_pkt_t);
    994      0    stevel 	vifp->v_tbf->tbf_n_tok = 0;
    995      0    stevel 	vifp->v_tbf->tbf_q_len = 0;
    996      0    stevel 	vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
    997      0    stevel 	vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
    998      0    stevel 
    999      0    stevel 	vifp->v_flags = vifcp->vifc_flags;
   1000      0    stevel 	vifp->v_threshold = vifcp->vifc_threshold;
   1001      0    stevel 	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
   1002      0    stevel 	vifp->v_ipif = ipif;
   1003      0    stevel 	ipif_refrele(ipif);
   1004      0    stevel 	/* Scaling up here, allows division by 1024 in critical code.	*/
   1005      0    stevel 	vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000);
   1006      0    stevel 	vifp->v_timeout_id = 0;
   1007      0    stevel 	/* initialize per vif pkt counters */
   1008      0    stevel 	vifp->v_pkt_in = 0;
   1009      0    stevel 	vifp->v_pkt_out = 0;
   1010      0    stevel 	vifp->v_bytes_in = 0;
   1011      0    stevel 	vifp->v_bytes_out = 0;
   1012      0    stevel 	mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL);
   1013      0    stevel 
   1014      0    stevel 	/* Adjust numvifs up, if the vifi is higher than numvifs */
   1015   3448  dh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
   1016   3448  dh155122 	if (ipst->ips_numvifs <= vifcp->vifc_vifi)
   1017   3448  dh155122 		ipst->ips_numvifs = vifcp->vifc_vifi + 1;
   1018   3448  dh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
   1019      0    stevel 
   1020   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   1021   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1022      0    stevel 		    "add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d",
   1023      0    stevel 		    vifcp->vifc_vifi,
   1024      0    stevel 		    ntohl(vifcp->vifc_lcl_addr.s_addr),
   1025      0    stevel 		    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
   1026      0    stevel 		    ntohl(vifcp->vifc_rmt_addr.s_addr),
   1027      0    stevel 		    vifcp->vifc_threshold, vifcp->vifc_rate_limit);
   1028      0    stevel 	}
   1029      0    stevel 
   1030      0    stevel 	vifp->v_marks = VIF_MARK_GOOD;
   1031      0    stevel 	mutex_exit(&vifp->v_lock);
   1032      0    stevel 	return (0);
   1033      0    stevel }
   1034      0    stevel 
   1035      0    stevel 
   1036      0    stevel /* Delete a vif from the vif table. */
   1037      0    stevel static void
   1038      0    stevel del_vifp(struct vif *vifp)
   1039      0    stevel {
   1040      0    stevel 	struct tbf	*t = vifp->v_tbf;
   1041      0    stevel 	mblk_t  *mp0;
   1042      0    stevel 	vifi_t  vifi;
   1043   3448  dh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
   1044   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   1045      0    stevel 
   1046      0    stevel 	ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED);
   1047      0    stevel 	ASSERT(t != NULL);
   1048      0    stevel 
   1049   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   1050   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1051      0    stevel 		    "del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr);
   1052      0    stevel 	}
   1053      0    stevel 
   1054      0    stevel 	if (vifp->v_timeout_id != 0) {
   1055      0    stevel 		(void) untimeout(vifp->v_timeout_id);
   1056      0    stevel 		vifp->v_timeout_id = 0;
   1057      0    stevel 	}
   1058      0    stevel 
   1059      0    stevel 	/*
   1060      0    stevel 	 * Free packets queued at the interface.
   1061      0    stevel 	 * Mrouted takes care of cleaning up mfcs - makes calls to del_mfc.
   1062      0    stevel 	 */
   1063      0    stevel 	mutex_enter(&t->tbf_lock);
   1064      0    stevel 	while (t->tbf_q != NULL) {
   1065      0    stevel 		mp0 = t->tbf_q;
   1066      0    stevel 		t->tbf_q = t->tbf_q->b_next;
   1067      0    stevel 		mp0->b_prev = mp0->b_next = NULL;
   1068      0    stevel 		freemsg(mp0);
   1069      0    stevel 	}
   1070      0    stevel 	mutex_exit(&t->tbf_lock);
   1071      0    stevel 
   1072      0    stevel 	/*
   1073      0    stevel 	 * Always clear cache when vifs change.
   1074      0    stevel 	 * No need to get last_encap_lock since we are running as a writer.
   1075      0    stevel 	 */
   1076   3448  dh155122 	mutex_enter(&ipst->ips_last_encap_lock);
   1077   3448  dh155122 	if (vifp == ipst->ips_last_encap_vif) {
   1078   3448  dh155122 		ipst->ips_last_encap_vif = NULL;
   1079   3448  dh155122 		ipst->ips_last_encap_src = 0;
   1080      0    stevel 	}
   1081   3448  dh155122 	mutex_exit(&ipst->ips_last_encap_lock);
   1082      0    stevel 
   1083      0    stevel 	mutex_destroy(&t->tbf_lock);
   1084      0    stevel 
   1085      0    stevel 	bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf)));
   1086      0    stevel 
   1087      0    stevel 	/* Adjust numvifs down */
   1088   3448  dh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
   1089   3448  dh155122 	for (vifi = ipst->ips_numvifs; vifi != 0; vifi--) /* vifi is unsigned */
   1090   3448  dh155122 		if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0)
   1091      0    stevel 			break;
   1092   3448  dh155122 	ipst->ips_numvifs = vifi;
   1093   3448  dh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
   1094      0    stevel 
   1095      0    stevel 	bzero(vifp, sizeof (*vifp));
   1096      0    stevel }
   1097      0    stevel 
   1098      0    stevel static int
   1099  11042      Erik del_vif(vifi_t *vifip, ip_stack_t *ipst)
   1100      0    stevel {
   1101   3448  dh155122 	struct vif	*vifp = ipst->ips_vifs + *vifip;
   1102      0    stevel 
   1103   3448  dh155122 	if (*vifip >= ipst->ips_numvifs)
   1104      0    stevel 		return (EINVAL);
   1105      0    stevel 
   1106      0    stevel 	mutex_enter(&vifp->v_lock);
   1107      0    stevel 	/*
   1108      0    stevel 	 * Not initialized
   1109      0    stevel 	 * Here we are not looking at the vif that is being initialized
   1110      0    stevel 	 * i.e vifp->v_marks == 0 and refcnt > 0.
   1111      0    stevel 	 */
   1112      0    stevel 	if (vifp->v_lcl_addr.s_addr == 0 ||
   1113      0    stevel 	    !(vifp->v_marks & VIF_MARK_GOOD)) {
   1114      0    stevel 		mutex_exit(&vifp->v_lock);
   1115      0    stevel 		return (EADDRNOTAVAIL);
   1116      0    stevel 	}
   1117      0    stevel 
   1118      0    stevel 	/* Clear VIF_MARK_GOOD and set VIF_MARK_CONDEMNED. */
   1119      0    stevel 	vifp->v_marks &= ~VIF_MARK_GOOD;
   1120      0    stevel 	vifp->v_marks |= VIF_MARK_CONDEMNED;
   1121      0    stevel 
   1122      0    stevel 	/* Phyint only */
   1123      0    stevel 	if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
   1124      0    stevel 		ipif_t *ipif = vifp->v_ipif;
   1125  11042      Erik 		ilm_t *ilm = vifp->v_ilm;
   1126  11042      Erik 
   1127  11042      Erik 		vifp->v_ilm = NULL;
   1128  11042      Erik 
   1129      0    stevel 		ASSERT(ipif != NULL);
   1130      0    stevel 		/*
   1131      0    stevel 		 * should be OK to drop the lock as we
   1132      0    stevel 		 * have marked this as CONDEMNED.
   1133      0    stevel 		 */
   1134      0    stevel 		mutex_exit(&(vifp)->v_lock);
   1135  11042      Erik 		if (ilm != NULL) {
   1136  11042      Erik 			(void) ip_delmulti(ilm);
   1137  11042      Erik 			ASSERT(ipif->ipif_ill->ill_mrouter_cnt > 0);
   1138  11042      Erik 			atomic_dec_32(&ipif->ipif_ill->ill_mrouter_cnt);
   1139  11042      Erik 		}
   1140      0    stevel 		mutex_enter(&(vifp)->v_lock);
   1141  11042      Erik 	}
   1142  11042      Erik 
   1143  11042      Erik 	if (vifp->v_flags & VIFF_REGISTER) {
   1144  11042      Erik 		mutex_enter(&ipst->ips_numvifs_mutex);
   1145  11042      Erik 		ipst->ips_reg_vif_num = ALL_VIFS;
   1146  11042      Erik 		mutex_exit(&ipst->ips_numvifs_mutex);
   1147      0    stevel 	}
   1148      0    stevel 
   1149      0    stevel 	/*
   1150      0    stevel 	 * decreases the refcnt added in add_vif.
   1151      0    stevel 	 */
   1152      0    stevel 	VIF_REFRELE_LOCKED(vifp);
   1153      0    stevel 	return (0);
   1154      0    stevel }
   1155      0    stevel 
   1156      0    stevel /*
   1157      0    stevel  * Add an mfc entry.
   1158      0    stevel  */
   1159      0    stevel static int
   1160   3448  dh155122 add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
   1161      0    stevel {
   1162      0    stevel 	struct mfc *rt;
   1163      0    stevel 	struct rtdetq *rte;
   1164      0    stevel 	ushort_t nstl;
   1165      0    stevel 	int i;
   1166      0    stevel 	struct mfcb *mfcbp;
   1167   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   1168      0    stevel 
   1169      0    stevel 	/*
   1170      0    stevel 	 * The value of vifi is NO_VIF (==MAXVIFS) if Mrouted
   1171      0    stevel 	 * did not have a real route for pkt.
   1172      0    stevel 	 * We want this pkt without rt installed in the mfctable to prevent
   1173      0    stevel 	 * multiiple tries, so go ahead and put it in mfctable, it will
   1174      0    stevel 	 * be discarded later in ip_mdq() because the child is NULL.
   1175      0    stevel 	 */
   1176      0    stevel 
   1177      0    stevel 	/* Error checking, out of bounds? */
   1178      0    stevel 	if (mfccp->mfcc_parent > MAXVIFS) {
   1179      0    stevel 		ip0dbg(("ADD_MFC: mfcc_parent out of range %d",
   1180      0    stevel 		    (int)mfccp->mfcc_parent));
   1181      0    stevel 		return (EINVAL);
   1182      0    stevel 	}
   1183      0    stevel 
   1184      0    stevel 	if ((mfccp->mfcc_parent != NO_VIF) &&
   1185   3448  dh155122 	    (ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) {
   1186      0    stevel 		ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n",
   1187      0    stevel 		    (int)mfccp->mfcc_parent));
   1188      0    stevel 		return (EINVAL);
   1189      0    stevel 	}
   1190      0    stevel 
   1191   3448  dh155122 	if (is_mrouter_off(ipst)) {
   1192      0    stevel 		return (EINVAL);
   1193      0    stevel 	}
   1194      0    stevel 
   1195   3448  dh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr,
   1196      0    stevel 	    mfccp->mfcc_mcastgrp.s_addr)];
   1197      0    stevel 	MFCB_REFHOLD(mfcbp);
   1198      0    stevel 	MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr,
   1199      0    stevel 	    mfccp->mfcc_mcastgrp.s_addr, rt);
   1200      0    stevel 
   1201      0    stevel 	/* If an entry already exists, just update the fields */
   1202      0    stevel 	if (rt) {
   1203   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   1204   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1205      0    stevel 			    "add_mfc: update o %x grp %x parent %x",
   1206      0    stevel 			    ntohl(mfccp->mfcc_origin.s_addr),
   1207      0    stevel 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
   1208      0    stevel 			    mfccp->mfcc_parent);
   1209      0    stevel 		}
   1210      0    stevel 		mutex_enter(&rt->mfc_mutex);
   1211      0    stevel 		rt->mfc_parent = mfccp->mfcc_parent;
   1212      0    stevel 
   1213   3448  dh155122 		mutex_enter(&ipst->ips_numvifs_mutex);
   1214   3448  dh155122 		for (i = 0; i < (int)ipst->ips_numvifs; i++)
   1215      0    stevel 			rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
   1216   3448  dh155122 		mutex_exit(&ipst->ips_numvifs_mutex);
   1217      0    stevel 		mutex_exit(&rt->mfc_mutex);
   1218      0    stevel 
   1219      0    stevel 		MFCB_REFRELE(mfcbp);
   1220      0    stevel 		return (0);
   1221      0    stevel 	}
   1222      0    stevel 
   1223      0    stevel 	/*
   1224      0    stevel 	 * Find the entry for which the upcall was made and update.
   1225      0    stevel 	 */
   1226      0    stevel 	for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) {
   1227      0    stevel 		mutex_enter(&rt->mfc_mutex);
   1228      0    stevel 		if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
   1229      0    stevel 		    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
   1230      0    stevel 		    (rt->mfc_rte != NULL) &&
   1231      0    stevel 		    !(rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
   1232      0    stevel 			if (nstl++ != 0)
   1233      0    stevel 				cmn_err(CE_WARN,
   1234      0    stevel 				    "add_mfc: %s o %x g %x p %x",
   1235      0    stevel 				    "multiple kernel entries",
   1236      0    stevel 				    ntohl(mfccp->mfcc_origin.s_addr),
   1237      0    stevel 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
   1238      0    stevel 				    mfccp->mfcc_parent);
   1239      0    stevel 
   1240   3448  dh155122 			if (ipst->ips_ip_mrtdebug > 1) {
   1241   5240  nordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
   1242   3448  dh155122 				    SL_TRACE,
   1243      0    stevel 				    "add_mfc: o %x g %x p %x",
   1244      0    stevel 				    ntohl(mfccp->mfcc_origin.s_addr),
   1245      0    stevel 				    ntohl(mfccp->mfcc_mcastgrp.s_addr),
   1246      0    stevel 				    mfccp->mfcc_parent);
   1247      0    stevel 			}
   1248   3448  dh155122 			fill_route(rt, mfccp, ipst);
   1249      0    stevel 
   1250      0    stevel 			/*
   1251      0    stevel 			 * Prevent cleanup of cache entry.
   1252      0    stevel 			 * Timer starts in ip_mforward.
   1253      0    stevel 			 */
   1254      0    stevel 			if (rt->mfc_timeout_id != 0) {
   1255      0    stevel 				timeout_id_t id;
   1256      0    stevel 				id = rt->mfc_timeout_id;
   1257      0    stevel 				/*
   1258      0    stevel 				 * setting id to zero will avoid this
   1259      0    stevel 				 * entry from being cleaned up in
   1260      0    stevel 				 * expire_up_calls().
   1261      0    stevel 				 */
   1262      0    stevel 				rt->mfc_timeout_id = 0;
   1263      0    stevel 				/*
   1264      0    stevel 				 * dropping the lock is fine as we
   1265      0    stevel 				 * have a refhold on the bucket.
   1266      0    stevel 				 * so mfc cannot be freed.
   1267      0    stevel 				 * The timeout can fire but it will see
   1268      0    stevel 				 * that mfc_timeout_id == 0 and not cleanup.
   1269      0    stevel 				 */
   1270      0    stevel 				mutex_exit(&rt->mfc_mutex);
   1271      0    stevel 				(void) untimeout(id);
   1272      0    stevel 				mutex_enter(&rt->mfc_mutex);
   1273      0    stevel 			}
   1274      0    stevel 
   1275      0    stevel 			/*
   1276      0    stevel 			 * Send all pkts that are queued waiting for the upcall.
   1277      0    stevel 			 * ip_mdq param tun set to 0 -
   1278      0    stevel 			 * the return value of ip_mdq() isn't used here,
   1279      0    stevel 			 * so value we send doesn't matter.
   1280      0    stevel 			 */
   1281      0    stevel 			while (rt->mfc_rte != NULL) {
   1282      0    stevel 				rte = rt->mfc_rte;
   1283      0    stevel 				rt->mfc_rte = rte->rte_next;
   1284      0    stevel 				mutex_exit(&rt->mfc_mutex);
   1285      0    stevel 				(void) ip_mdq(rte->mp, (ipha_t *)
   1286      0    stevel 				    rte->mp->b_rptr, rte->ill, 0, rt);
   1287      0    stevel 				freemsg(rte->mp);
   1288      0    stevel 				mi_free((char *)rte);
   1289      0    stevel 				mutex_enter(&rt->mfc_mutex);
   1290      0    stevel 			}
   1291      0    stevel 		}
   1292      0    stevel 		mutex_exit(&rt->mfc_mutex);
   1293      0    stevel 	}
   1294      0    stevel 
   1295      0    stevel 
   1296      0    stevel 	/*
   1297      0    stevel 	 * It is possible that an entry is being inserted without an upcall
   1298      0    stevel 	 */
   1299      0    stevel 	if (nstl == 0) {
   1300      0    stevel 		mutex_enter(&(mfcbp->mfcb_lock));
   1301   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   1302   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1303      0    stevel 			    "add_mfc: no upcall o %x g %x p %x",
   1304      0    stevel 			    ntohl(mfccp->mfcc_origin.s_addr),
   1305      0    stevel 			    ntohl(mfccp->mfcc_mcastgrp.s_addr),
   1306      0    stevel 			    mfccp->mfcc_parent);
   1307      0    stevel 		}
   1308   3448  dh155122 		if (is_mrouter_off(ipst)) {
   1309      0    stevel 			mutex_exit(&mfcbp->mfcb_lock);
   1310      0    stevel 			MFCB_REFRELE(mfcbp);
   1311      0    stevel 			return (EINVAL);
   1312      0    stevel 		}
   1313      0    stevel 
   1314      0    stevel 		for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) {
   1315      0    stevel 
   1316      0    stevel 			mutex_enter(&rt->mfc_mutex);
   1317      0    stevel 			if ((rt->mfc_origin.s_addr ==
   1318      0    stevel 			    mfccp->mfcc_origin.s_addr) &&
   1319      0    stevel 			    (rt->mfc_mcastgrp.s_addr ==
   1320   5240  nordmark 			    mfccp->mfcc_mcastgrp.s_addr) &&
   1321   5240  nordmark 			    (!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) {
   1322   3448  dh155122 				fill_route(rt, mfccp, ipst);
   1323      0    stevel 				mutex_exit(&rt->mfc_mutex);
   1324      0    stevel 				break;
   1325      0    stevel 			}
   1326      0    stevel 			mutex_exit(&rt->mfc_mutex);
   1327      0    stevel 		}
   1328      0    stevel 
   1329      0    stevel 		/* No upcall, so make a new entry into mfctable */
   1330      0    stevel 		if (rt == NULL) {
   1331      0    stevel 			rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
   1332      0    stevel 			if (rt == NULL) {
   1333      0    stevel 				ip1dbg(("add_mfc: out of memory\n"));
   1334      0    stevel 				mutex_exit(&mfcbp->mfcb_lock);
   1335      0    stevel 				MFCB_REFRELE(mfcbp);
   1336      0    stevel 				return (ENOBUFS);
   1337      0    stevel 			}
   1338      0    stevel 
   1339      0    stevel 			/* Insert new entry at head of hash chain */
   1340      0    stevel 			mutex_enter(&rt->mfc_mutex);
   1341   3448  dh155122 			fill_route(rt, mfccp, ipst);
   1342      0    stevel 
   1343      0    stevel 			/* Link into table */
   1344      0    stevel 			rt->mfc_next   = mfcbp->mfcb_mfc;
   1345      0    stevel 			mfcbp->mfcb_mfc = rt;
   1346      0    stevel 			mutex_exit(&rt->mfc_mutex);
   1347      0    stevel 		}
   1348      0    stevel 		mutex_exit(&mfcbp->mfcb_lock);
   1349      0    stevel 	}
   1350      0    stevel 
   1351      0    stevel 	MFCB_REFRELE(mfcbp);
   1352      0    stevel 	return (0);
   1353      0    stevel }
   1354      0    stevel 
   1355      0    stevel /*
   1356      0    stevel  * Fills in mfc structure from mrouted mfcctl.
   1357      0    stevel  */
   1358      0    stevel static void
   1359   3448  dh155122 fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst)
   1360      0    stevel {
   1361      0    stevel 	int i;
   1362      0    stevel 
   1363      0    stevel 	rt->mfc_origin		= mfccp->mfcc_origin;
   1364      0    stevel 	rt->mfc_mcastgrp	= mfccp->mfcc_mcastgrp;
   1365      0    stevel 	rt->mfc_parent		= mfccp->mfcc_parent;
   1366   3448  dh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
   1367   3448  dh155122 	for (i = 0; i < (int)ipst->ips_numvifs; i++) {
   1368      0    stevel 		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
   1369      0    stevel 	}
   1370   3448  dh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
   1371      0    stevel 	/* Initialize pkt counters per src-grp */
   1372      0    stevel 	rt->mfc_pkt_cnt	= 0;
   1373      0    stevel 	rt->mfc_byte_cnt	= 0;
   1374      0    stevel 	rt->mfc_wrong_if	= 0;
   1375      0    stevel 	rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0;
   1376      0    stevel 
   1377      0    stevel }
   1378      0    stevel 
   1379      0    stevel static void
   1380      0    stevel free_queue(struct mfc *mfcp)
   1381      0    stevel {
   1382      0    stevel 	struct rtdetq *rte0;
   1383      0    stevel 
   1384      0    stevel 	/*
   1385      0    stevel 	 * Drop all queued upcall packets.
   1386      0    stevel 	 * Free the mbuf with the pkt.
   1387      0    stevel 	 */
   1388      0    stevel 	while ((rte0 = mfcp->mfc_rte) != NULL) {
   1389      0    stevel 		mfcp->mfc_rte = rte0->rte_next;
   1390      0    stevel 		freemsg(rte0->mp);
   1391      0    stevel 		mi_free((char *)rte0);
   1392      0    stevel 	}
   1393      0    stevel }
   1394      0    stevel /*
   1395      0    stevel  * go thorugh the hash bucket and free all the entries marked condemned.
   1396      0    stevel  */
   1397      0    stevel void
   1398      0    stevel release_mfc(struct mfcb *mfcbp)
   1399      0    stevel {
   1400      0    stevel 	struct mfc *current_mfcp;
   1401      0    stevel 	struct mfc *prev_mfcp;
   1402      0    stevel 
   1403      0    stevel 	prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
   1404      0    stevel 
   1405      0    stevel 	while (current_mfcp != NULL) {
   1406      0    stevel 		if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) {
   1407      0    stevel 			if (current_mfcp == mfcbp->mfcb_mfc) {
   1408      0    stevel 				mfcbp->mfcb_mfc = current_mfcp->mfc_next;
   1409      0    stevel 				free_queue(current_mfcp);
   1410      0    stevel 				mi_free(current_mfcp);
   1411      0    stevel 				prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
   1412      0    stevel 				continue;
   1413      0    stevel 			}
   1414      0    stevel 			ASSERT(prev_mfcp != NULL);
   1415      0    stevel 			prev_mfcp->mfc_next = current_mfcp->mfc_next;
   1416      0    stevel 			free_queue(current_mfcp);
   1417      0    stevel 			mi_free(current_mfcp);
   1418      0    stevel 			current_mfcp = NULL;
   1419      0    stevel 		} else {
   1420      0    stevel 			prev_mfcp = current_mfcp;
   1421      0    stevel 		}
   1422      0    stevel 
   1423      0    stevel 		current_mfcp = prev_mfcp->mfc_next;
   1424      0    stevel 
   1425      0    stevel 	}
   1426      0    stevel 	mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED;
   1427      0    stevel 	ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0);
   1428      0    stevel }
   1429      0    stevel 
   1430      0    stevel /*
   1431      0    stevel  * Delete an mfc entry.
   1432      0    stevel  */
   1433      0    stevel static int
   1434   3448  dh155122 del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
   1435      0    stevel {
   1436      0    stevel 	struct in_addr	origin;
   1437      0    stevel 	struct in_addr	mcastgrp;
   1438   5240  nordmark 	struct mfc 	*rt;
   1439   5240  nordmark 	uint_t		hash;
   1440   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   1441      0    stevel 
   1442      0    stevel 	origin = mfccp->mfcc_origin;
   1443      0    stevel 	mcastgrp = mfccp->mfcc_mcastgrp;
   1444      0    stevel 	hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
   1445      0    stevel 
   1446   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   1447   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1448      0    stevel 		    "del_mfc: o %x g %x",
   1449      0    stevel 		    ntohl(origin.s_addr),
   1450      0    stevel 		    ntohl(mcastgrp.s_addr));
   1451      0    stevel 	}
   1452      0    stevel 
   1453   3448  dh155122 	MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
   1454      0    stevel 
   1455      0    stevel 	/* Find mfc in mfctable, finds only entries without upcalls */
   1456   3448  dh155122 	for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) {
   1457      0    stevel 		mutex_enter(&rt->mfc_mutex);
   1458      0    stevel 		if (origin.s_addr == rt->mfc_origin.s_addr &&
   1459      0    stevel 		    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
   1460      0    stevel 		    rt->mfc_rte == NULL &&
   1461      0    stevel 		    !(rt->mfc_marks & MFCB_MARK_CONDEMNED))
   1462      0    stevel 			break;
   1463      0    stevel 		mutex_exit(&rt->mfc_mutex);
   1464      0    stevel 	}
   1465      0    stevel 
   1466      0    stevel 	/*
   1467      0    stevel 	 * Return if there was an upcall (mfc_rte != NULL,
   1468      0    stevel 	 * or rt not in mfctable.
   1469      0    stevel 	 */
   1470      0    stevel 	if (rt == NULL) {
   1471   3448  dh155122 		MFCB_REFRELE(&ipst->ips_mfcs[hash]);
   1472      0    stevel 		return (EADDRNOTAVAIL);
   1473      0    stevel 	}
   1474      0    stevel 
   1475      0    stevel 
   1476      0    stevel 	/*
   1477      0    stevel 	 * no need to hold lock as we have a reference.
   1478      0    stevel 	 */
   1479   3448  dh155122 	ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
   1480      0    stevel 	/* error checking */
   1481      0    stevel 	if (rt->mfc_timeout_id != 0) {
   1482      0    stevel 		ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null"));
   1483      0    stevel 		/*
   1484      0    stevel 		 * Its ok to drop the lock,  the struct cannot be freed
   1485      0    stevel 		 * since we have a ref on the hash bucket.
   1486      0    stevel 		 */
   1487      0    stevel 		rt->mfc_timeout_id = 0;
   1488      0    stevel 		mutex_exit(&rt->mfc_mutex);
   1489      0    stevel 		(void) untimeout(rt->mfc_timeout_id);
   1490      0    stevel 		mutex_enter(&rt->mfc_mutex);
   1491      0    stevel 	}
   1492      0    stevel 
   1493      0    stevel 	ASSERT(rt->mfc_rte == NULL);
   1494      0    stevel 
   1495      0    stevel 
   1496      0    stevel 	/*
   1497      0    stevel 	 * Delete the entry from the cache
   1498      0    stevel 	 */
   1499      0    stevel 	rt->mfc_marks |= MFCB_MARK_CONDEMNED;
   1500      0    stevel 	mutex_exit(&rt->mfc_mutex);
   1501      0    stevel 
   1502   3448  dh155122 	MFCB_REFRELE(&ipst->ips_mfcs[hash]);
   1503      0    stevel 
   1504      0    stevel 	return (0);
   1505      0    stevel }
   1506      0    stevel 
   1507      0    stevel #define	TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
   1508      0    stevel 
   1509      0    stevel /*
   1510      0    stevel  * IP multicast forwarding function. This function assumes that the packet
   1511      0    stevel  * pointed to by ipha has arrived on (or is about to be sent to) the interface
   1512      0    stevel  * pointed to by "ill", and the packet is to be relayed to other networks
   1513      0    stevel  * that have members of the packet's destination IP multicast group.
   1514      0    stevel  *
   1515      0    stevel  * The packet is returned unscathed to the caller, unless it is
   1516      0    stevel  * erroneous, in which case a -1 value tells the caller (IP)
   1517      0    stevel  * to discard it.
   1518      0    stevel  *
   1519      0    stevel  * Unlike BSD, SunOS 5.x needs to return to IP info about
   1520      0    stevel  * whether pkt came in thru a tunnel, so it can be discarded, unless
   1521      0    stevel  * it's IGMP. In BSD, the ifp is bogus for tunnels, so pkt won't try
   1522      0    stevel  * to be delivered.
   1523      0    stevel  * Return values are 0 - pkt is okay and phyint
   1524      0    stevel  *		    -1 - pkt is malformed and to be tossed
   1525      0    stevel  *                   1 - pkt came in on tunnel
   1526      0    stevel  */
   1527      0    stevel int
   1528  11042      Erik ip_mforward(mblk_t *mp, ip_recv_attr_t *ira)
   1529      0    stevel {
   1530  11042      Erik 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
   1531  11042      Erik 	ill_t		*ill = ira->ira_ill;
   1532      0    stevel 	struct mfc 	*rt;
   1533      0    stevel 	ipaddr_t	src, dst, tunnel_src = 0;
   1534      0    stevel 	static int	srctun = 0;
   1535      0    stevel 	vifi_t		vifi;
   1536      0    stevel 	boolean_t	pim_reg_packet = B_FALSE;
   1537  11042      Erik 	struct mfcb	*mfcbp;
   1538   3448  dh155122 	ip_stack_t	*ipst = ill->ill_ipst;
   1539   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   1540  11042      Erik 	ill_t		*rill = ira->ira_rill;
   1541  11042      Erik 
   1542  11042      Erik 	ASSERT(ira->ira_pktlen == msgdsize(mp));
   1543      0    stevel 
   1544   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   1545   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1546      0    stevel 		    "ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s",
   1547      0    stevel 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
   1548      0    stevel 		    ill->ill_name);
   1549      0    stevel 	}
   1550      0    stevel 
   1551      0    stevel 	dst = ipha->ipha_dst;
   1552  11042      Erik 	if (ira->ira_flags & IRAF_PIM_REGISTER)
   1553      0    stevel 		pim_reg_packet = B_TRUE;
   1554  11042      Erik 	else if (ira->ira_flags & IRAF_MROUTE_TUNNEL_SET)
   1555  11042      Erik 		tunnel_src = ira->ira_mroute_tunnel;
   1556      0    stevel 
   1557      0    stevel 	/*
   1558      0    stevel 	 * Don't forward a packet with time-to-live of zero or one,
   1559      0    stevel 	 * or a packet destined to a local-only group.
   1560      0    stevel 	 */
   1561      0    stevel 	if (CLASSD(dst) && (ipha->ipha_ttl <= 1 ||
   1562   5240  nordmark 	    (ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) {
   1563   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   1564   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1565      0    stevel 			    "ip_mforward: not forwarded ttl %d,"
   1566      0    stevel 			    " dst 0x%x ill %s",
   1567      0    stevel 			    ipha->ipha_ttl, ntohl(dst), ill->ill_name);
   1568      0    stevel 		}
   1569      0    stevel 		if (tunnel_src != 0)
   1570      0    stevel 			return (1);
   1571      0    stevel 		else
   1572      0    stevel 			return (0);
   1573      0    stevel 	}
   1574      0    stevel 
   1575      0    stevel 	if ((tunnel_src != 0) || pim_reg_packet) {
   1576      0    stevel 		/*
   1577      0    stevel 		 * Packet arrived over an encapsulated tunnel or via a PIM
   1578  11042      Erik 		 * register message.
   1579      0    stevel 		 */
   1580   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   1581      0    stevel 			if (tunnel_src != 0) {
   1582   5240  nordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
   1583   3448  dh155122 				    SL_TRACE,
   1584      0    stevel 				    "ip_mforward: ill %s arrived via ENCAP TUN",
   1585      0    stevel 				    ill->ill_name);
   1586      0    stevel 			} else if (pim_reg_packet) {
   1587   5240  nordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
   1588   3448  dh155122 				    SL_TRACE,
   1589      0    stevel 				    "ip_mforward: ill %s arrived via"
   1590      0    stevel 				    "  REGISTER VIF",
   1591      0    stevel 				    ill->ill_name);
   1592      0    stevel 			}
   1593      0    stevel 		}
   1594      0    stevel 	} else if ((ipha->ipha_version_and_hdr_length & 0xf) <
   1595      0    stevel 	    (uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 ||
   1596      0    stevel 	    ((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) {
   1597      0    stevel 		/* Packet arrived via a physical interface. */
   1598   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   1599   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1600      0    stevel 			    "ip_mforward: ill %s arrived via PHYINT",
   1601      0    stevel 			    ill->ill_name);
   1602      0    stevel 		}
   1603      0    stevel 
   1604      0    stevel 	} else {
   1605      0    stevel 		/*
   1606      0    stevel 		 * Packet arrived through a SRCRT tunnel.
   1607      0    stevel 		 * Source-route tunnels are no longer supported.
   1608      0    stevel 		 * Error message printed every 1000 times.
   1609      0    stevel 		 */
   1610      0    stevel 		if ((srctun++ % 1000) == 0) {
   1611      0    stevel 			cmn_err(CE_WARN,
   1612      0    stevel 			    "ip_mforward: received source-routed pkt from %x",
   1613      0    stevel 			    ntohl(ipha->ipha_src));
   1614      0    stevel 		}
   1615      0    stevel 		return (-1);
   1616      0    stevel 	}
   1617      0    stevel 
   1618   3448  dh155122 	ipst->ips_mrtstat->mrts_fwd_in++;
   1619      0    stevel 	src = ipha->ipha_src;
   1620      0    stevel 
   1621      0    stevel 	/* Find route in cache, return NULL if not there or upcalls q'ed. */
   1622      0    stevel 
   1623      0    stevel 	/*
   1624      0    stevel 	 * Lock the mfctable against changes made by ip_mforward.
   1625      0    stevel 	 * Note that only add_mfc and del_mfc can remove entries and
   1626      0    stevel 	 * they run with exclusive access to IP. So we do not need to
   1627      0    stevel 	 * guard against the rt being deleted, so release lock after reading.
   1628      0    stevel 	 */
   1629      0    stevel 
   1630   3448  dh155122 	if (is_mrouter_off(ipst))
   1631      0    stevel 		return (-1);
   1632      0    stevel 
   1633   3448  dh155122 	mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)];
   1634      0    stevel 	MFCB_REFHOLD(mfcbp);
   1635      0    stevel 	MFCFIND(mfcbp, src, dst, rt);
   1636      0    stevel 
   1637      0    stevel 	/* Entry exists, so forward if necessary */
   1638      0    stevel 	if (rt != NULL) {
   1639      0    stevel 		int ret = 0;
   1640   3448  dh155122 		ipst->ips_mrtstat->mrts_mfc_hits++;
   1641      0    stevel 		if (pim_reg_packet) {
   1642   3448  dh155122 			ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
   1643      0    stevel 			ret = ip_mdq(mp, ipha,
   1644   3448  dh155122 			    ipst->ips_vifs[ipst->ips_reg_vif_num].
   1645   3448  dh155122 			    v_ipif->ipif_ill,
   1646   3448  dh155122 			    0, rt);
   1647      0    stevel 		} else {
   1648      0    stevel 			ret = ip_mdq(mp, ipha, ill, tunnel_src, rt);
   1649      0    stevel 		}
   1650      0    stevel 
   1651      0    stevel 		MFCB_REFRELE(mfcbp);
   1652      0    stevel 		return (ret);
   1653      0    stevel 
   1654      0    stevel 		/*
   1655      0    stevel 		 * Don't forward if we don't have a cache entry.  Mrouted will
   1656      0    stevel 		 * always provide a cache entry in response to an upcall.
   1657      0    stevel 		 */
   1658      0    stevel 	} else {
   1659      0    stevel 		/*
   1660      0    stevel 		 * If we don't have a route for packet's origin, make a copy
   1661      0    stevel 		 * of the packet and send message to routing daemon.
   1662      0    stevel 		 */
   1663      0    stevel 		struct mfc	*mfc_rt	 = NULL;
   1664      0    stevel 		mblk_t		*mp0	 = NULL;
   1665      0    stevel 		mblk_t		*mp_copy = NULL;
   1666      0    stevel 		struct rtdetq	*rte	 = NULL;
   1667      0    stevel 		struct rtdetq	*rte_m, *rte1, *prev_rte;
   1668      0    stevel 		uint_t		hash;
   1669      0    stevel 		int		npkts;
   1670      0    stevel 		boolean_t	new_mfc = B_FALSE;
   1671   3448  dh155122 		ipst->ips_mrtstat->mrts_mfc_misses++;
   1672      0    stevel 		/* BSD uses mrts_no_route++ */
   1673   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   1674   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1675      0    stevel 			    "ip_mforward: no rte ill %s src %x g %x misses %d",
   1676      0    stevel 			    ill->ill_name, ntohl(src), ntohl(dst),
   1677   3448  dh155122 			    (int)ipst->ips_mrtstat->mrts_mfc_misses);
   1678      0    stevel 		}
   1679      0    stevel 		/*
   1680      0    stevel 		 * The order of the following code differs from the BSD code.
   1681      0    stevel 		 * Pre-mc3.5, the BSD code was incorrect and SunOS 5.x
   1682      0    stevel 		 * code works, so SunOS 5.x wasn't changed to conform to the
   1683      0    stevel 		 * BSD version.
   1684      0    stevel 		 */
   1685      0    stevel 
   1686      0    stevel 		/* Lock mfctable. */
   1687      0    stevel 		hash = MFCHASH(src, dst);
   1688   3448  dh155122 		mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock));
   1689      0    stevel 
   1690      0    stevel 		/*
   1691      0    stevel 		 * If we are turning off mrouted return an error
   1692      0    stevel 		 */
   1693   3448  dh155122 		if (is_mrouter_off(ipst)) {
   1694      0    stevel 			mutex_exit(&mfcbp->mfcb_lock);
   1695      0    stevel 			MFCB_REFRELE(mfcbp);
   1696      0    stevel 			return (-1);
   1697      0    stevel 		}
   1698      0    stevel 
   1699      0    stevel 		/* Is there an upcall waiting for this packet? */
   1700   3448  dh155122 		for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt;
   1701      0    stevel 		    mfc_rt = mfc_rt->mfc_next) {
   1702      0    stevel 			mutex_enter(&mfc_rt->mfc_mutex);
   1703   3448  dh155122 			if (ipst->ips_ip_mrtdebug > 1) {
   1704   5240  nordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
   1705   3448  dh155122 				    SL_TRACE,
   1706      0    stevel 				    "ip_mforward: MFCTAB hash %d o 0x%x"
   1707      0    stevel 				    " g 0x%x\n",
   1708      0    stevel 				    hash, ntohl(mfc_rt->mfc_origin.s_addr),
   1709      0    stevel 				    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
   1710      0    stevel 			}
   1711      0    stevel 			/* There is an upcall */
   1712      0    stevel 			if ((src == mfc_rt->mfc_origin.s_addr) &&
   1713      0    stevel 			    (dst == mfc_rt->mfc_mcastgrp.s_addr) &&
   1714      0    stevel 			    (mfc_rt->mfc_rte != NULL) &&
   1715      0    stevel 			    !(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
   1716      0    stevel 				break;
   1717      0    stevel 			}
   1718      0    stevel 			mutex_exit(&mfc_rt->mfc_mutex);
   1719      0    stevel 		}
   1720      0    stevel 		/* No upcall, so make a new entry into mfctable */
   1721      0    stevel 		if (mfc_rt == NULL) {
   1722      0    stevel 			mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
   1723      0    stevel 			if (mfc_rt == NULL) {
   1724   3448  dh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
   1725      0    stevel 				ip1dbg(("ip_mforward: out of memory "
   1726      0    stevel 				    "for mfc, mfc_rt\n"));
   1727      0    stevel 				goto error_return;
   1728      0    stevel 			} else
   1729      0    stevel 				new_mfc = B_TRUE;
   1730      0    stevel 			/* Get resources */
   1731      0    stevel 			/* TODO could copy header and dup rest */
   1732      0    stevel 			mp_copy = copymsg(mp);
   1733      0    stevel 			if (mp_copy == NULL) {
   1734   3448  dh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
   1735      0    stevel 				ip1dbg(("ip_mforward: out of memory for "
   1736      0    stevel 				    "mblk, mp_copy\n"));
   1737      0    stevel 				goto error_return;
   1738      0    stevel 			}
   1739      0    stevel 			mutex_enter(&mfc_rt->mfc_mutex);
   1740      0    stevel 		}
   1741      0    stevel 		/* Get resources for rte, whether first rte or not first. */
   1742      0    stevel 		/* Add this packet into rtdetq */
   1743      0    stevel 		rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq));
   1744      0    stevel 		if (rte == NULL) {
   1745   3448  dh155122 			ipst->ips_mrtstat->mrts_fwd_drop++;
   1746      0    stevel 			mutex_exit(&mfc_rt->mfc_mutex);
   1747      0    stevel 			ip1dbg(("ip_mforward: out of memory for"
   1748      0    stevel 			    " rtdetq, rte\n"));
   1749      0    stevel 			goto error_return;
   1750      0    stevel 		}
   1751      0    stevel 
   1752      0    stevel 		mp0 = copymsg(mp);
   1753      0    stevel 		if (mp0 == NULL) {
   1754   3448  dh155122 			ipst->ips_mrtstat->mrts_fwd_drop++;
   1755      0    stevel 			ip1dbg(("ip_mforward: out of memory for mblk, mp0\n"));
   1756      0    stevel 			mutex_exit(&mfc_rt->mfc_mutex);
   1757      0    stevel 			goto error_return;
   1758      0    stevel 		}
   1759      0    stevel 		rte->mp		= mp0;
   1760      0    stevel 		if (pim_reg_packet) {
   1761   3448  dh155122 			ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
   1762   3448  dh155122 			rte->ill =
   1763   3448  dh155122 			    ipst->ips_vifs[ipst->ips_reg_vif_num].
   1764   3448  dh155122 			    v_ipif->ipif_ill;
   1765      0    stevel 		} else {
   1766      0    stevel 			rte->ill = ill;
   1767      0    stevel 		}
   1768      0    stevel 		rte->rte_next	= NULL;
   1769      0    stevel 
   1770      0    stevel 		/*
   1771      0    stevel 		 * Determine if upcall q (rtdetq) has overflowed.
   1772      0    stevel 		 * mfc_rt->mfc_rte is null by mi_zalloc
   1773      0    stevel 		 * if it is the first message.
   1774      0    stevel 		 */
   1775      0    stevel 		for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m;
   1776      0    stevel 		    rte_m = rte_m->rte_next)
   1777      0    stevel 			npkts++;
   1778   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   1779   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1780      0    stevel 			    "ip_mforward: upcalls %d\n", npkts);
   1781      0    stevel 		}
   1782      0    stevel 		if (npkts > MAX_UPQ) {
   1783   3448  dh155122 			ipst->ips_mrtstat->mrts_upq_ovflw++;
   1784      0    stevel 			mutex_exit(&mfc_rt->mfc_mutex);
   1785      0    stevel 			goto error_return;
   1786      0    stevel 		}
   1787      0    stevel 
   1788      0    stevel 		if (npkts == 0) {	/* first upcall */
   1789      0    stevel 			int i = 0;
   1790      0    stevel 			/*
   1791      0    stevel 			 * Now finish installing the new mfc! Now that we have
   1792      0    stevel 			 * resources!  Insert new entry at head of hash chain.
   1793      0    stevel 			 * Use src and dst which are ipaddr_t's.
   1794      0    stevel 			 */
   1795      0    stevel 			mfc_rt->mfc_origin.s_addr = src;
   1796      0    stevel 			mfc_rt->mfc_mcastgrp.s_addr = dst;
   1797      0    stevel 
   1798   3448  dh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
   1799   3448  dh155122 			for (i = 0; i < (int)ipst->ips_numvifs; i++)
   1800      0    stevel 				mfc_rt->mfc_ttls[i] = 0;
   1801   3448  dh155122 			mutex_exit(&ipst->ips_numvifs_mutex);
   1802      0    stevel 			mfc_rt->mfc_parent = ALL_VIFS;
   1803      0    stevel 
   1804      0    stevel 			/* Link into table */
   1805   3448  dh155122 			if (ipst->ips_ip_mrtdebug > 1) {
   1806   5240  nordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
   1807   3448  dh155122 				    SL_TRACE,
   1808      0    stevel 				    "ip_mforward: NEW MFCTAB hash %d o 0x%x "
   1809      0    stevel 				    "g 0x%x\n", hash,
   1810      0    stevel 				    ntohl(mfc_rt->mfc_origin.s_addr),
   1811      0    stevel 				    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
   1812      0    stevel 			}
   1813   3448  dh155122 			mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc;
   1814   3448  dh155122 			ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt;
   1815      0    stevel 			mfc_rt->mfc_rte = NULL;
   1816      0    stevel 		}
   1817      0    stevel 
   1818      0    stevel 		/* Link in the upcall */
   1819      0    stevel 		/* First upcall */
   1820      0    stevel 		if (mfc_rt->mfc_rte == NULL)
   1821      0    stevel 			mfc_rt->mfc_rte = rte;
   1822      0    stevel 		else {
   1823      0    stevel 			/* not the first upcall */
   1824      0    stevel 			prev_rte = mfc_rt->mfc_rte;
   1825      0    stevel 			for (rte1 = mfc_rt->mfc_rte->rte_next; rte1;
   1826   5240  nordmark 			    prev_rte = rte1, rte1 = rte1->rte_next)
   1827   5240  nordmark 				;
   1828      0    stevel 			prev_rte->rte_next = rte;
   1829      0    stevel 		}
   1830      0    stevel 
   1831      0    stevel 		/*
   1832      0    stevel 		 * No upcalls waiting, this is first one, so send a message to
   1833      0    stevel 		 * routing daemon to install a route into kernel table.
   1834      0    stevel 		 */
   1835      0    stevel 		if (npkts == 0) {
   1836      0    stevel 			struct igmpmsg	*im;
   1837      0    stevel 			/* ipha_protocol is 0, for upcall */
   1838      0    stevel 			ASSERT(mp_copy != NULL);
   1839      0    stevel 			im = (struct igmpmsg *)mp_copy->b_rptr;
   1840      0    stevel 			im->im_msgtype	= IGMPMSG_NOCACHE;
   1841      0    stevel 			im->im_mbz = 0;
   1842   3448  dh155122 			mutex_enter(&ipst->ips_numvifs_mutex);
   1843      0    stevel 			if (pim_reg_packet) {
   1844   3448  dh155122 				im->im_vif = (uchar_t)ipst->ips_reg_vif_num;
   1845   3448  dh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
   1846      0    stevel 			} else {
   1847      0    stevel 				/*
   1848      0    stevel 				 * XXX do we need to hold locks here ?
   1849      0    stevel 				 */
   1850   3448  dh155122 				for (vifi = 0;
   1851   3448  dh155122 				    vifi < ipst->ips_numvifs;
   1852   3448  dh155122 				    vifi++) {
   1853   3448  dh155122 					if (ipst->ips_vifs[vifi].v_ipif == NULL)
   1854      0    stevel 						continue;
   1855   3448  dh155122 					if (ipst->ips_vifs[vifi].
   1856   3448  dh155122 					    v_ipif->ipif_ill == ill) {
   1857      0    stevel 						im->im_vif = (uchar_t)vifi;
   1858      0    stevel 						break;
   1859      0    stevel 					}
   1860      0    stevel 				}
   1861   3448  dh155122 				mutex_exit(&ipst->ips_numvifs_mutex);
   1862   3448  dh155122 				ASSERT(vifi < ipst->ips_numvifs);
   1863      0    stevel 			}
   1864      0    stevel 
   1865   3448  dh155122 			ipst->ips_mrtstat->mrts_upcalls++;
   1866      0    stevel 			/* Timer to discard upcalls if mrouted is too slow */
   1867      0    stevel 			mfc_rt->mfc_timeout_id = timeout(expire_upcalls,
   1868      0    stevel 			    mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE);
   1869      0    stevel 			mutex_exit(&mfc_rt->mfc_mutex);
   1870   3448  dh155122 			mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
   1871   5240  nordmark 			/* Pass to RAWIP */
   1872  11042      Erik 			ira->ira_ill = ira->ira_rill = NULL;
   1873  11042      Erik 			(mrouter->conn_recv)(mrouter, mp_copy, NULL, ira);
   1874  11042      Erik 			ira->ira_ill = ill;
   1875  11042      Erik 			ira->ira_rill = rill;
   1876      0    stevel 		} else {
   1877      0    stevel 			mutex_exit(&mfc_rt->mfc_mutex);
   1878   3448  dh155122 			mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
   1879  11042      Erik 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   1880  11042      Erik 			ip_drop_input("ip_mforward - upcall already waiting",
   1881  11042      Erik 			    mp_copy, ill);
   1882      0    stevel 			freemsg(mp_copy);
   1883      0    stevel 		}
   1884      0    stevel 
   1885      0    stevel 		MFCB_REFRELE(mfcbp);
   1886      0    stevel 		if (tunnel_src != 0)
   1887      0    stevel 			return (1);
   1888      0    stevel 		else
   1889      0    stevel 			return (0);
   1890      0    stevel 	error_return:
   1891   3448  dh155122 		mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
   1892      0    stevel 		MFCB_REFRELE(mfcbp);
   1893      0    stevel 		if (mfc_rt != NULL && (new_mfc == B_TRUE))
   1894      0    stevel 			mi_free((char *)mfc_rt);
   1895      0    stevel 		if (rte != NULL)
   1896      0    stevel 			mi_free((char *)rte);
   1897  11042      Erik 		if (mp_copy != NULL) {
   1898  11042      Erik 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   1899  11042      Erik 			ip_drop_input("ip_mforward error", mp_copy, ill);
   1900      0    stevel 			freemsg(mp_copy);
   1901  11042      Erik 		}
   1902      0    stevel 		if (mp0 != NULL)
   1903      0    stevel 			freemsg(mp0);
   1904      0    stevel 		return (-1);
   1905      0    stevel 	}
   1906      0    stevel }
   1907      0    stevel 
   1908      0    stevel /*
   1909      0    stevel  * Clean up the mfctable cache entry if upcall is not serviced.
   1910      0    stevel  * SunOS 5.x has timeout per mfc, unlike BSD which has one timer.
   1911      0    stevel  */
   1912      0    stevel static void
   1913      0    stevel expire_upcalls(void *arg)
   1914      0    stevel {
   1915      0    stevel 	struct mfc *mfc_rt = arg;
   1916      0    stevel 	uint_t hash;
   1917      0    stevel 	struct mfc *prev_mfc, *mfc0;
   1918   3448  dh155122 	ip_stack_t	*ipst;
   1919   5240  nordmark 	conn_t		*mrouter;
   1920   3448  dh155122 
   1921   3448  dh155122 	if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) {
   1922   3448  dh155122 		cmn_err(CE_WARN, "expire_upcalls: no ILL\n");
   1923   3448  dh155122 		return;
   1924   3448  dh155122 	}
   1925   3448  dh155122 	ipst = mfc_rt->mfc_rte->ill->ill_ipst;
   1926   5240  nordmark 	mrouter = ipst->ips_ip_g_mrouter;
   1927      0    stevel 
   1928      0    stevel 	hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr);
   1929   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   1930   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1931      0    stevel 		    "expire_upcalls: hash %d s %x g %x",
   1932      0    stevel 		    hash, ntohl(mfc_rt->mfc_origin.s_addr),
   1933      0    stevel 		    ntohl(mfc_rt->mfc_mcastgrp.s_addr));
   1934      0    stevel 	}
   1935   3448  dh155122 	MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
   1936      0    stevel 	mutex_enter(&mfc_rt->mfc_mutex);
   1937      0    stevel 	/*
   1938      0    stevel 	 * if timeout has been set to zero, than the
   1939      0    stevel 	 * entry has been filled, no need to delete it.
   1940      0    stevel 	 */
   1941      0    stevel 	if (mfc_rt->mfc_timeout_id == 0)
   1942      0    stevel 		goto done;
   1943   3448  dh155122 	ipst->ips_mrtstat->mrts_cache_cleanups++;
   1944      0    stevel 	mfc_rt->mfc_timeout_id = 0;
   1945      0    stevel 
   1946      0    stevel 	/* Determine entry to be cleaned up in cache table. */
   1947   3448  dh155122 	for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0;
   1948      0    stevel 	    prev_mfc = mfc0, mfc0 = mfc0->mfc_next)
   1949      0    stevel 		if (mfc0 == mfc_rt)
   1950      0    stevel 			break;
   1951      0    stevel 
   1952      0    stevel 	/* del_mfc takes care of gone mfcs */
   1953      0    stevel 	ASSERT(prev_mfc != NULL);
   1954      0    stevel 	ASSERT(mfc0 != NULL);
   1955      0    stevel 
   1956      0    stevel 	/*
   1957      0    stevel 	 * Delete the entry from the cache
   1958      0    stevel 	 */
   1959   3448  dh155122 	ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
   1960      0    stevel 	mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
   1961      0    stevel 
   1962      0    stevel 	/*
   1963      0    stevel 	 * release_mfc will drop all queued upcall packets.
   1964      0    stevel 	 * and will free the mbuf with the pkt, if, timing info.
   1965      0    stevel 	 */
   1966      0    stevel done:
   1967      0    stevel 	mutex_exit(&mfc_rt->mfc_mutex);
   1968   3448  dh155122 	MFCB_REFRELE(&ipst->ips_mfcs[hash]);
   1969      0    stevel }
   1970      0    stevel 
   1971      0    stevel /*
   1972      0    stevel  * Packet forwarding routine once entry in the cache is made.
   1973      0    stevel  */
   1974      0    stevel static int
   1975      0    stevel ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src,
   1976      0    stevel     struct mfc *rt)
   1977      0    stevel {
   1978      0    stevel 	vifi_t vifi;
   1979      0    stevel 	struct vif *vifp;
   1980      0    stevel 	ipaddr_t dst = ipha->ipha_dst;
   1981      0    stevel 	size_t  plen = msgdsize(mp);
   1982      0    stevel 	vifi_t num_of_vifs;
   1983   3448  dh155122 	ip_stack_t	*ipst = ill->ill_ipst;
   1984   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   1985  11042      Erik 	ip_recv_attr_t	iras;
   1986      0    stevel 
   1987   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   1988   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   1989      0    stevel 		    "ip_mdq: SEND src %x, ipha_dst %x, ill %s",
   1990      0    stevel 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
   1991      0    stevel 		    ill->ill_name);
   1992      0    stevel 	}
   1993      0    stevel 
   1994      0    stevel 	/* Macro to send packet on vif */
   1995      0    stevel #define	MC_SEND(ipha, mp, vifp, dst) { \
   1996      0    stevel 	if ((vifp)->v_flags & VIFF_TUNNEL) \
   1997      0    stevel 		encap_send((ipha), (mp), (vifp), (dst)); \
   1998      0    stevel 	else if ((vifp)->v_flags & VIFF_REGISTER) \
   1999      0    stevel 		register_send((ipha), (mp), (vifp), (dst)); \
   2000      0    stevel 	else \
   2001      0    stevel 		phyint_send((ipha), (mp), (vifp), (dst)); \
   2002      0    stevel }
   2003      0    stevel 
   2004      0    stevel 	vifi = rt->mfc_parent;
   2005      0    stevel 
   2006      0    stevel 	/*
   2007      0    stevel 	 * The value of vifi is MAXVIFS if the pkt had no parent, i.e.,
   2008      0    stevel 	 * Mrouted had no route.
   2009      0    stevel 	 * We wanted the route installed in the mfctable to prevent multiple
   2010      0    stevel 	 * tries, so it passed add_mfc(), but is discarded here. The v_ipif is
   2011      0    stevel 	 * NULL so we don't want to check the ill. Still needed as of Mrouted
   2012      0    stevel 	 * 3.6.
   2013      0    stevel 	 */
   2014      0    stevel 	if (vifi == NO_VIF) {
   2015      0    stevel 		ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n",
   2016      0    stevel 		    ill->ill_name));
   2017   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   2018   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2019      0    stevel 			    "ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name);
   2020      0    stevel 		}
   2021      0    stevel 		return (-1);	/* drop pkt */
   2022      0    stevel 	}
   2023      0    stevel 
   2024   3448  dh155122 	if (!lock_good_vif(&ipst->ips_vifs[vifi]))
   2025      0    stevel 		return (-1);
   2026      0    stevel 	/*
   2027      0    stevel 	 * The MFC entries are not cleaned up when an ipif goes
   2028      0    stevel 	 * away thus this code has to guard against an MFC referencing
   2029      0    stevel 	 * an ipif that has been closed. Note: reset_mrt_vif_ipif
   2030      0    stevel 	 * sets the v_ipif to NULL when the ipif disappears.
   2031      0    stevel 	 */
   2032   3448  dh155122 	ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL);
   2033      0    stevel 
   2034   3448  dh155122 	if (vifi >= ipst->ips_numvifs) {
   2035      0    stevel 		cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs "
   2036      0    stevel 		    "%d ill %s viftable ill %s\n",
   2037   3448  dh155122 		    (int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
   2038   3448  dh155122 		    ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
   2039   3448  dh155122 		unlock_good_vif(&ipst->ips_vifs[vifi]);
   2040      0    stevel 		return (-1);
   2041      0    stevel 	}
   2042      0    stevel 	/*
   2043      0    stevel 	 * Don't forward if it didn't arrive from the parent vif for its
   2044   8485     Peter 	 * origin.
   2045      0    stevel 	 */
   2046  11042      Erik 	if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill) ||
   2047   3448  dh155122 	    (ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) {
   2048      0    stevel 		/* Came in the wrong interface */
   2049      0    stevel 		ip1dbg(("ip_mdq: arrived wrong if, vifi %d "
   2050      0    stevel 			"numvifs %d ill %s viftable ill %s\n",
   2051   3448  dh155122 			(int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
   2052  11042      Erik 			ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name));
   2053   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   2054   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2055      0    stevel 			    "ip_mdq: arrived wrong if, vifi %d ill "
   2056      0    stevel 			    "%s viftable ill %s\n",
   2057  11042      Erik 			    (int)vifi, ill->ill_name,
   2058  11042      Erik 			    ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
   2059      0    stevel 		}
   2060   3448  dh155122 		ipst->ips_mrtstat->mrts_wrong_if++;
   2061      0    stevel 		rt->mfc_wrong_if++;
   2062      0    stevel 
   2063      0    stevel 		/*
   2064      0    stevel 		 * If we are doing PIM assert processing and we are forwarding
   2065      0    stevel 		 * packets on this interface, and it is a broadcast medium
   2066      0    stevel 		 * interface (and not a tunnel), send a message to the routing.
   2067      0    stevel 		 *
   2068      0    stevel 		 * We use the first ipif on the list, since it's all we have.
   2069      0    stevel 		 * Chances are the ipif_flags are the same for ipifs on the ill.
   2070      0    stevel 		 */
   2071   3448  dh155122 		if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 &&
   2072      0    stevel 		    (ill->ill_ipif->ipif_flags & IPIF_BROADCAST) &&
   2073   3448  dh155122 		    !(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) {
   2074      0    stevel 			mblk_t		*mp_copy;
   2075      0    stevel 			struct igmpmsg	*im;
   2076      0    stevel 
   2077      0    stevel 			/* TODO could copy header and dup rest */
   2078      0    stevel 			mp_copy = copymsg(mp);
   2079      0    stevel 			if (mp_copy == NULL) {
   2080   3448  dh155122 				ipst->ips_mrtstat->mrts_fwd_drop++;
   2081      0    stevel 				ip1dbg(("ip_mdq: out of memory "
   2082      0    stevel 				    "for mblk, mp_copy\n"));
   2083   3448  dh155122 				unlock_good_vif(&ipst->ips_vifs[vifi]);
   2084      0    stevel 				return (-1);
   2085      0    stevel 			}
   2086      0    stevel 
   2087      0    stevel 			im = (struct igmpmsg *)mp_copy->b_rptr;
   2088      0    stevel 			im->im_msgtype = IGMPMSG_WRONGVIF;
   2089      0    stevel 			im->im_mbz = 0;
   2090      0    stevel 			im->im_vif = (ushort_t)vifi;
   2091   5240  nordmark 			/* Pass to RAWIP */
   2092  11042      Erik 
   2093  11042      Erik 			bzero(&iras, sizeof (iras));
   2094  11042      Erik 			iras.ira_flags = IRAF_IS_IPV4;
   2095  11042      Erik 			iras.ira_ip_hdr_length =
   2096  11042      Erik 			    IPH_HDR_LENGTH(mp_copy->b_rptr);
   2097  11042      Erik 			iras.ira_pktlen = msgdsize(mp_copy);
   2098  11042      Erik 			(mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras);
   2099  11042      Erik 			ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
   2100      0    stevel 		}
   2101   3448  dh155122 		unlock_good_vif(&ipst->ips_vifs[vifi]);
   2102      0    stevel 		if (tunnel_src != 0)
   2103      0    stevel 			return (1);
   2104      0    stevel 		else
   2105      0    stevel 			return (0);
   2106      0    stevel 	}
   2107      0    stevel 	/*
   2108      0    stevel 	 * If I sourced this packet, it counts as output, else it was input.
   2109      0    stevel 	 */
   2110   3448  dh155122 	if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) {
   2111   3448  dh155122 		ipst->ips_vifs[vifi].v_pkt_out++;
   2112   3448  dh155122 		ipst->ips_vifs[vifi].v_bytes_out += plen;
   2113      0    stevel 	} else {
   2114   3448  dh155122 		ipst->ips_vifs[vifi].v_pkt_in++;
   2115   3448  dh155122 		ipst->ips_vifs[vifi].v_bytes_in += plen;
   2116      0    stevel 	}
   2117      0    stevel 	mutex_enter(&rt->mfc_mutex);
   2118      0    stevel 	rt->mfc_pkt_cnt++;
   2119      0    stevel 	rt->mfc_byte_cnt += plen;
   2120      0    stevel 	mutex_exit(&rt->mfc_mutex);
   2121   3448  dh155122 	unlock_good_vif(&ipst->ips_vifs[vifi]);
   2122      0    stevel 	/*
   2123      0    stevel 	 * For each vif, decide if a copy of the packet should be forwarded.
   2124      0    stevel 	 * Forward if:
   2125      0    stevel 	 *		- the vif threshold ttl is non-zero AND
   2126      0    stevel 	 *		- the pkt ttl exceeds the vif's threshold
   2127      0    stevel 	 * A non-zero mfc_ttl indicates that the vif is part of
   2128      0    stevel 	 * the output set for the mfc entry.
   2129      0    stevel 	 */
   2130   3448  dh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
   2131   3448  dh155122 	num_of_vifs = ipst->ips_numvifs;
   2132   3448  dh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
   2133   3448  dh155122 	for (vifp = ipst->ips_vifs, vifi = 0;
   2134   3448  dh155122 	    vifi < num_of_vifs;
   2135   3448  dh155122 	    vifp++, vifi++) {
   2136      0    stevel 		if (!lock_good_vif(vifp))
   2137      0    stevel 			continue;
   2138      0    stevel 		if ((rt->mfc_ttls[vifi] > 0) &&
   2139      0    stevel 		    (ipha->ipha_ttl > rt->mfc_ttls[vifi])) {
   2140      0    stevel 			/*
   2141      0    stevel 			 * lock_good_vif should not have succedded if
   2142      0    stevel 			 * v_ipif is null.
   2143      0    stevel 			 */
   2144      0    stevel 			ASSERT(vifp->v_ipif != NULL);
   2145      0    stevel 			vifp->v_pkt_out++;
   2146      0    stevel 			vifp->v_bytes_out += plen;
   2147      0    stevel 			MC_SEND(ipha, mp, vifp, dst);
   2148   3448  dh155122 			ipst->ips_mrtstat->mrts_fwd_out++;
   2149      0    stevel 		}
   2150      0    stevel 		unlock_good_vif(vifp);
   2151      0    stevel 	}
   2152      0    stevel 	if (tunnel_src != 0)
   2153      0    stevel 		return (1);
   2154      0    stevel 	else
   2155      0    stevel 		return (0);
   2156      0    stevel }
   2157      0    stevel 
   2158      0    stevel /*
   2159      0    stevel  * Send the packet on physical interface.
   2160      0    stevel  * Caller assumes can continue to use mp on return.
   2161      0    stevel  */
   2162      0    stevel /* ARGSUSED */
   2163      0    stevel static void
   2164      0    stevel phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
   2165      0    stevel {
   2166      0    stevel 	mblk_t 	*mp_copy;
   2167   3448  dh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
   2168   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2169      0    stevel 
   2170      0    stevel 	/* Make a new reference to the packet */
   2171      0    stevel 	mp_copy = copymsg(mp);	/* TODO could copy header and dup rest */
   2172      0    stevel 	if (mp_copy == NULL) {
   2173   3448  dh155122 		ipst->ips_mrtstat->mrts_fwd_drop++;
   2174      0    stevel 		ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n"));
   2175      0    stevel 		return;
   2176      0    stevel 	}
   2177      0    stevel 	if (vifp->v_rate_limit <= 0)
   2178      0    stevel 		tbf_send_packet(vifp, mp_copy);
   2179      0    stevel 	else  {
   2180   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   2181   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2182      0    stevel 			    "phyint_send: tbf_contr rate %d "
   2183      0    stevel 			    "vifp 0x%p mp 0x%p dst 0x%x",
   2184      0    stevel 			    vifp->v_rate_limit, (void *)vifp, (void *)mp, dst);
   2185      0    stevel 		}
   2186      0    stevel 		tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr);
   2187      0    stevel 	}
   2188      0    stevel }
   2189      0    stevel 
   2190      0    stevel /*
   2191      0    stevel  * Send the whole packet for REGISTER encapsulation to PIM daemon
   2192      0    stevel  * Caller assumes it can continue to use mp on return.
   2193      0    stevel  */
   2194      0    stevel /* ARGSUSED */
   2195      0    stevel static void
   2196      0    stevel register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
   2197      0    stevel {
   2198      0    stevel 	struct igmpmsg	*im;
   2199      0    stevel 	mblk_t		*mp_copy;
   2200      0    stevel 	ipha_t		*ipha_copy;
   2201  11042      Erik 	ill_t		*ill = vifp->v_ipif->ipif_ill;
   2202  11042      Erik 	ip_stack_t	*ipst = ill->ill_ipst;
   2203   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2204  11042      Erik 	ip_recv_attr_t	iras;
   2205      0    stevel 
   2206   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   2207   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2208      0    stevel 		    "register_send: src %x, dst %x\n",
   2209      0    stevel 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
   2210      0    stevel 	}
   2211      0    stevel 
   2212      0    stevel 	/*
   2213      0    stevel 	 * Copy the old packet & pullup its IP header into the new mblk_t so we
   2214      0    stevel 	 * can modify it.  Try to fill the new mblk_t since if we don't the
   2215      0    stevel 	 * ethernet driver will.
   2216      0    stevel 	 */
   2217      0    stevel 	mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED);
   2218      0    stevel 	if (mp_copy == NULL) {
   2219   3448  dh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
   2220   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 3) {
   2221   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2222      0    stevel 			    "register_send: allocb failure.");
   2223      0    stevel 		}
   2224      0    stevel 		return;
   2225      0    stevel 	}
   2226      0    stevel 
   2227      0    stevel 	/*
   2228      0    stevel 	 * Bump write pointer to account for igmpmsg being added.
   2229      0    stevel 	 */
   2230      0    stevel 	mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg);
   2231      0    stevel 
   2232      0    stevel 	/*
   2233      0    stevel 	 * Chain packet to new mblk_t.
   2234      0    stevel 	 */
   2235      0    stevel 	if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
   2236   3448  dh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
   2237   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 3) {
   2238   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2239      0    stevel 			    "register_send: copymsg failure.");
   2240      0    stevel 		}
   2241      0    stevel 		freeb(mp_copy);
   2242      0    stevel 		return;
   2243      0    stevel 	}
   2244      0    stevel 
   2245      0    stevel 	/*
   2246   5240  nordmark 	 * icmp_input() asserts that IP version field is set to an
   2247      0    stevel 	 * appropriate version. Hence, the struct igmpmsg that this really
   2248      0    stevel 	 * becomes, needs to have the correct IP version field.
   2249      0    stevel 	 */
   2250      0    stevel 	ipha_copy = (ipha_t *)mp_copy->b_rptr;
   2251      0    stevel 	*ipha_copy = multicast_encap_iphdr;
   2252      0    stevel 
   2253      0    stevel 	/*
   2254      0    stevel 	 * The kernel uses the struct igmpmsg header to encode the messages to
   2255      0    stevel 	 * the multicast routing daemon. Fill in the fields in the header
   2256      0    stevel 	 * starting with the message type which is IGMPMSG_WHOLEPKT
   2257      0    stevel 	 */
   2258      0    stevel 	im = (struct igmpmsg *)mp_copy->b_rptr;
   2259      0    stevel 	im->im_msgtype = IGMPMSG_WHOLEPKT;
   2260      0    stevel 	im->im_src.s_addr = ipha->ipha_src;
   2261      0    stevel 	im->im_dst.s_addr = ipha->ipha_dst;
   2262      0    stevel 
   2263      0    stevel 	/*
   2264      0    stevel 	 * Must Be Zero. This is because the struct igmpmsg is really an IP
   2265      0    stevel 	 * header with renamed fields and the multicast routing daemon uses
   2266      0    stevel 	 * an ipha_protocol (aka im_mbz) of 0 to distinguish these messages.
   2267      0    stevel 	 */
   2268      0    stevel 	im->im_mbz = 0;
   2269      0    stevel 
   2270   3448  dh155122 	++ipst->ips_mrtstat->mrts_upcalls;
   2271  11042      Erik 	if (IPCL_IS_NONSTR(mrouter) ? mrouter->conn_flow_cntrld :
   2272  11042      Erik 	    !canputnext(mrouter->conn_rq)) {
   2273   3448  dh155122 		++ipst->ips_mrtstat->mrts_pim_regsend_drops;
   2274   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 3) {
   2275   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2276      0    stevel 			    "register_send: register upcall failure.");
   2277      0    stevel 		}
   2278  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2279  11042      Erik 		ip_drop_input("mrts_pim_regsend_drops", mp_copy, ill);
   2280      0    stevel 		freemsg(mp_copy);
   2281      0    stevel 	} else {
   2282   5240  nordmark 		/* Pass to RAWIP */
   2283  11042      Erik 		bzero(&iras, sizeof (iras));
   2284  11042      Erik 		iras.ira_flags = IRAF_IS_IPV4;
   2285  11042      Erik 		iras.ira_ip_hdr_length = sizeof (ipha_t);
   2286  11042      Erik 		iras.ira_pktlen = msgdsize(mp_copy);
   2287  11042      Erik 		(mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras);
   2288  11042      Erik 		ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
   2289      0    stevel 	}
   2290      0    stevel }
   2291      0    stevel 
   2292      0    stevel /*
   2293      0    stevel  * pim_validate_cksum handles verification of the checksum in the
   2294      0    stevel  * pim header.  For PIM Register packets, the checksum is calculated
   2295      0    stevel  * across the PIM header only.  For all other packets, the checksum
   2296      0    stevel  * is for the PIM header and remainder of the packet.
   2297      0    stevel  *
   2298      0    stevel  * returns: B_TRUE, if checksum is okay.
   2299      0    stevel  *          B_FALSE, if checksum is not valid.
   2300      0    stevel  */
   2301      0    stevel static boolean_t
   2302      0    stevel pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp)
   2303      0    stevel {
   2304      0    stevel 	mblk_t *mp_dup;
   2305      0    stevel 
   2306      0    stevel 	if ((mp_dup = dupmsg(mp)) == NULL)
   2307      0    stevel 		return (B_FALSE);
   2308      0    stevel 
   2309      0    stevel 	mp_dup->b_rptr += IPH_HDR_LENGTH(ip);
   2310      0    stevel 	if (pimp->pim_type == PIM_REGISTER)
   2311      0    stevel 		mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN;
   2312      0    stevel 	if (IP_CSUM(mp_dup, 0, 0)) {
   2313      0    stevel 		freemsg(mp_dup);
   2314      0    stevel 		return (B_FALSE);
   2315      0    stevel 	}
   2316      0    stevel 	freemsg(mp_dup);
   2317      0    stevel 	return (B_TRUE);
   2318      0    stevel }
   2319      0    stevel 
   2320      0    stevel /*
   2321  11042      Erik  * Process PIM protocol packets i.e. IP Protocol 103.
   2322  11042      Erik  * Register messages are decapsulated and sent onto multicast forwarding.
   2323  11042      Erik  *
   2324  11042      Erik  * Return NULL for a bad packet that is discarded here.
   2325  11042      Erik  * Return mp if the message is OK and should be handed to "raw" receivers.
   2326  11042      Erik  * Callers of pim_input() may need to reinitialize variables that were copied
   2327  11042      Erik  * from the mblk as this calls pullupmsg().
   2328      0    stevel  */
   2329  11042      Erik mblk_t *
   2330  11042      Erik pim_input(mblk_t *mp, ip_recv_attr_t *ira)
   2331      0    stevel {
   2332      0    stevel 	ipha_t		*eip, *ip;
   2333      0    stevel 	int		iplen, pimlen, iphlen;
   2334      0    stevel 	struct pim	*pimp;	/* pointer to a pim struct */
   2335      0    stevel 	uint32_t	*reghdr;
   2336  11042      Erik 	ill_t		*ill = ira->ira_ill;
   2337   3448  dh155122 	ip_stack_t	*ipst = ill->ill_ipst;
   2338   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2339      0    stevel 
   2340      0    stevel 	/*
   2341      0    stevel 	 * Pullup the msg for PIM protocol processing.
   2342      0    stevel 	 */
   2343      0    stevel 	if (pullupmsg(mp, -1) == 0) {
   2344   3448  dh155122 		++ipst->ips_mrtstat->mrts_pim_nomemory;
   2345  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2346  11042      Erik 		ip_drop_input("mrts_pim_nomemory", mp, ill);
   2347      0    stevel 		freemsg(mp);
   2348  11042      Erik 		return (NULL);
   2349      0    stevel 	}
   2350      0    stevel 
   2351      0    stevel 	ip = (ipha_t *)mp->b_rptr;
   2352      0    stevel 	iplen = ip->ipha_length;
   2353      0    stevel 	iphlen = IPH_HDR_LENGTH(ip);
   2354      0    stevel 	pimlen = ntohs(iplen) - iphlen;
   2355      0    stevel 
   2356      0    stevel 	/*
   2357      0    stevel 	 * Validate lengths
   2358      0    stevel 	 */
   2359      0    stevel 	if (pimlen < PIM_MINLEN) {
   2360   3448  dh155122 		++ipst->ips_mrtstat->mrts_pim_malformed;
   2361   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   2362   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2363      0    stevel 			    "pim_input: length not at least minlen");
   2364      0    stevel 		}
   2365  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2366  11042      Erik 		ip_drop_input("mrts_pim_malformed", mp, ill);
   2367      0    stevel 		freemsg(mp);
   2368  11042      Erik 		return (NULL);
   2369      0    stevel 	}
   2370      0    stevel 
   2371      0    stevel 	/*
   2372      0    stevel 	 * Point to the PIM header.
   2373      0    stevel 	 */
   2374      0    stevel 	pimp = (struct pim *)((caddr_t)ip + iphlen);
   2375      0    stevel 
   2376      0    stevel 	/*
   2377      0    stevel 	 * Check the version number.
   2378      0    stevel 	 */
   2379      0    stevel 	if (pimp->pim_vers != PIM_VERSION) {
   2380   3448  dh155122 		++ipst->ips_mrtstat->mrts_pim_badversion;
   2381   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   2382   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2383      0    stevel 			    "pim_input: unknown version of PIM");
   2384      0    stevel 		}
   2385  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2386  11042      Erik 		ip_drop_input("mrts_pim_badversion", mp, ill);
   2387      0    stevel 		freemsg(mp);
   2388  11042      Erik 		return (NULL);
   2389      0    stevel 	}
   2390      0    stevel 
   2391      0    stevel 	/*
   2392      0    stevel 	 * Validate the checksum
   2393      0    stevel 	 */
   2394      0    stevel 	if (!pim_validate_cksum(mp, ip, pimp)) {
   2395   3448  dh155122 		++ipst->ips_mrtstat->mrts_pim_rcv_badcsum;
   2396   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   2397   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2398      0    stevel 			    "pim_input: invalid checksum");
   2399      0    stevel 		}
   2400  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2401  11042      Erik 		ip_drop_input("pim_rcv_badcsum", mp, ill);
   2402      0    stevel 		freemsg(mp);
   2403  11042      Erik 		return (NULL);
   2404      0    stevel 	}
   2405      0    stevel 
   2406      0    stevel 	if (pimp->pim_type != PIM_REGISTER)
   2407  11042      Erik 		return (mp);
   2408      0    stevel 
   2409      0    stevel 	reghdr = (uint32_t *)(pimp + 1);
   2410      0    stevel 	eip = (ipha_t *)(reghdr + 1);
   2411      0    stevel 
   2412      0    stevel 	/*
   2413      0    stevel 	 * check if the inner packet is destined to mcast group
   2414      0    stevel 	 */
   2415      0    stevel 	if (!CLASSD(eip->ipha_dst)) {
   2416   3448  dh155122 		++ipst->ips_mrtstat->mrts_pim_badregisters;
   2417   3448  dh155122 		if (ipst->ips_ip_mrtdebug > 1) {
   2418   5240  nordmark 			(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2419      0    stevel 			    "pim_input: Inner pkt not mcast .. !");
   2420      0    stevel 		}
   2421  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2422  11042      Erik 		ip_drop_input("mrts_pim_badregisters", mp, ill);
   2423      0    stevel 		freemsg(mp);
   2424  11042      Erik 		return (NULL);
   2425      0    stevel 	}
   2426   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   2427   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2428      0    stevel 		    "register from %x, to %x, len %d",
   2429      0    stevel 		    ntohl(eip->ipha_src),
   2430      0    stevel 		    ntohl(eip->ipha_dst),
   2431      0    stevel 		    ntohs(eip->ipha_length));
   2432      0    stevel 	}
   2433      0    stevel 	/*
   2434      0    stevel 	 * If the null register bit is not set, decapsulate
   2435      0    stevel 	 * the packet before forwarding it.
   2436  11042      Erik 	 * Avoid this in no register vif
   2437      0    stevel 	 */
   2438  11042      Erik 	if (!(ntohl(*reghdr) & PIM_NULL_REGISTER) &&
   2439  11042      Erik 	    ipst->ips_reg_vif_num != ALL_VIFS) {
   2440      0    stevel 		mblk_t *mp_copy;
   2441  11042      Erik 		uint_t saved_pktlen;
   2442      0    stevel 
   2443      0    stevel 		/* Copy the message */
   2444      0    stevel 		if ((mp_copy = copymsg(mp)) == NULL) {
   2445   3448  dh155122 			++ipst->ips_mrtstat->mrts_pim_nomemory;
   2446  11042      Erik 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2447  11042      Erik 			ip_drop_input("mrts_pim_nomemory", mp, ill);
   2448      0    stevel 			freemsg(mp);
   2449  11042      Erik 			return (NULL);
   2450      0    stevel 		}
   2451      0    stevel 
   2452      0    stevel 		/*
   2453      0    stevel 		 * Decapsulate the packet and give it to
   2454      0    stevel 		 * register_mforward.
   2455      0    stevel 		 */
   2456  11042      Erik 		mp_copy->b_rptr += iphlen + sizeof (pim_t) + sizeof (*reghdr);
   2457  11042      Erik 		saved_pktlen = ira->ira_pktlen;
   2458  11042      Erik 		ira->ira_pktlen -= iphlen + sizeof (pim_t) + sizeof (*reghdr);
   2459  11042      Erik 		if (register_mforward(mp_copy, ira) != 0) {
   2460  11042      Erik 			/* register_mforward already called ip_drop_input */
   2461      0    stevel 			freemsg(mp);
   2462  11042      Erik 			ira->ira_pktlen = saved_pktlen;
   2463  11042      Erik 			return (NULL);
   2464      0    stevel 		}
   2465  11042      Erik 		ira->ira_pktlen = saved_pktlen;
   2466      0    stevel 	}
   2467      0    stevel 
   2468      0    stevel 	/*
   2469      0    stevel 	 * Pass all valid PIM packets up to any process(es) listening on a raw
   2470      0    stevel 	 * PIM socket. For Solaris it is done right after pim_input() is
   2471      0    stevel 	 * called.
   2472      0    stevel 	 */
   2473  11042      Erik 	return (mp);
   2474      0    stevel }
   2475      0    stevel 
   2476      0    stevel /*
   2477      0    stevel  * PIM sparse mode hook.  Called by pim_input after decapsulating
   2478      0    stevel  * the packet. Loop back the packet, as if we have received it.
   2479      0    stevel  * In pim_input() we have to check if the destination is a multicast address.
   2480      0    stevel  */
   2481      0    stevel static int
   2482  11042      Erik register_mforward(mblk_t *mp, ip_recv_attr_t *ira)
   2483      0    stevel {
   2484  11042      Erik 	ire_t		*ire;
   2485  11042      Erik 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
   2486  11042      Erik 	ill_t		*ill = ira->ira_ill;
   2487   3448  dh155122 	ip_stack_t	*ipst = ill->ill_ipst;
   2488   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2489      0    stevel 
   2490   3448  dh155122 	ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs);
   2491   3448  dh155122 
   2492   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 3) {
   2493   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2494      0    stevel 		    "register_mforward: src %x, dst %x\n",
   2495      0    stevel 		    ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
   2496      0    stevel 	}
   2497      0    stevel 	/*
   2498      0    stevel 	 * Need to pass in to ip_mforward() the information that the
   2499  11042      Erik 	 * packet has arrived on the register_vif. We mark it with
   2500  11042      Erik 	 * the IRAF_PIM_REGISTER attribute.
   2501  11042      Erik 	 * pim_input verified that the (inner) destination is multicast,
   2502  11042      Erik 	 * hence we skip the generic code in ip_input.
   2503      0    stevel 	 */
   2504  11042      Erik 	ira->ira_flags |= IRAF_PIM_REGISTER;
   2505   3448  dh155122 	++ipst->ips_mrtstat->mrts_pim_regforwards;
   2506  11042      Erik 
   2507  11042      Erik 	if (!CLASSD(ipha->ipha_dst)) {
   2508  11042      Erik 		ire = ire_route_recursive_v4(ipha->ipha_dst, 0, NULL, ALL_ZONES,
   2509  11042      Erik 		    ira->ira_tsl, MATCH_IRE_SECATTR, B_TRUE, 0, ipst, NULL,
   2510  11042      Erik 		    NULL, NULL);
   2511  11042      Erik 	} else {
   2512  11042      Erik 		ire = ire_multicast(ill);
   2513  11042      Erik 	}
   2514  11042      Erik 	ASSERT(ire != NULL);
   2515  11042      Erik 	/* Normally this will return the IRE_MULTICAST */
   2516  11042      Erik 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
   2517  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2518  11042      Erik 		ip_drop_input("mrts_pim RTF_REJECT", mp, ill);
   2519  11042      Erik 		freemsg(mp);
   2520  11042      Erik 		ire_refrele(ire);
   2521  11042      Erik 		return (-1);
   2522  11042      Erik 	}
   2523  11042      Erik 	ASSERT(ire->ire_type & IRE_MULTICAST);
   2524  11042      Erik 	(*ire->ire_recvfn)(ire, mp, ipha, ira);
   2525  11042      Erik 	ire_refrele(ire);
   2526  11042      Erik 
   2527      0    stevel 	return (0);
   2528      0    stevel }
   2529      0    stevel 
   2530      0    stevel /*
   2531      0    stevel  * Send an encapsulated packet.
   2532      0    stevel  * Caller assumes can continue to use mp when routine returns.
   2533      0    stevel  */
   2534      0    stevel /* ARGSUSED */
   2535      0    stevel static void
   2536      0    stevel encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
   2537      0    stevel {
   2538      0    stevel 	mblk_t 	*mp_copy;
   2539      0    stevel 	ipha_t 	*ipha_copy;
   2540      0    stevel 	size_t	len;
   2541   3448  dh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
   2542   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2543      0    stevel 
   2544   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   2545   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2546   3448  dh155122 		    "encap_send: vif %ld enter",
   2547   3448  dh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs));
   2548      0    stevel 	}
   2549      0    stevel 	len = ntohs(ipha->ipha_length);
   2550      0    stevel 
   2551      0    stevel 	/*
   2552      0    stevel 	 * Copy the old packet & pullup it's IP header into the
   2553      0    stevel 	 * new mbuf so we can modify it.  Try to fill the new
   2554      0    stevel 	 * mbuf since if we don't the ethernet driver will.
   2555      0    stevel 	 */
   2556      0    stevel 	mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED);
   2557      0    stevel 	if (mp_copy == NULL)
   2558      0    stevel 		return;
   2559      0    stevel 	mp_copy->b_rptr += 32;
   2560      0    stevel 	mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr);
   2561      0    stevel 	if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
   2562      0    stevel 		freeb(mp_copy);
   2563      0    stevel 		return;
   2564      0    stevel 	}
   2565      0    stevel 
   2566      0    stevel 	/*
   2567      0    stevel 	 * Fill in the encapsulating IP header.
   2568      0    stevel 	 * Remote tunnel dst in rmt_addr, from add_vif().
   2569      0    stevel 	 */
   2570      0    stevel 	ipha_copy = (ipha_t *)mp_copy->b_rptr;
   2571      0    stevel 	*ipha_copy = multicast_encap_iphdr;
   2572      0    stevel 	ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET);
   2573      0    stevel 	ipha_copy->ipha_length = htons(len + sizeof (ipha_t));
   2574      0    stevel 	ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr;
   2575      0    stevel 	ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr;
   2576      0    stevel 	ASSERT(ipha_copy->ipha_ident == 0);
   2577      0    stevel 
   2578      0    stevel 	/* Turn the encapsulated IP header back into a valid one. */
   2579      0    stevel 	ipha = (ipha_t *)mp_copy->b_cont->b_rptr;
   2580      0    stevel 	ipha->ipha_ttl--;
   2581      0    stevel 	ipha->ipha_hdr_checksum = 0;
   2582      0    stevel 	ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
   2583      0    stevel 
   2584  11042      Erik 	ipha_copy->ipha_ttl = ipha->ipha_ttl;
   2585  11042      Erik 
   2586   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   2587   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2588      0    stevel 		    "encap_send: group 0x%x", ntohl(ipha->ipha_dst));
   2589      0    stevel 	}
   2590      0    stevel 	if (vifp->v_rate_limit <= 0)
   2591      0    stevel 		tbf_send_packet(vifp, mp_copy);
   2592      0    stevel 	else
   2593      0    stevel 		/* ipha is from the original header */
   2594      0    stevel 		tbf_control(vifp, mp_copy, ipha);
   2595      0    stevel }
   2596      0    stevel 
   2597      0    stevel /*
   2598  11042      Erik  * De-encapsulate a packet and feed it back through IP input if it
   2599  11042      Erik  * matches one of our multicast tunnels.
   2600  11042      Erik  *
   2601      0    stevel  * This routine is called whenever IP gets a packet with prototype
   2602  11042      Erik  * IPPROTO_ENCAP and a local destination address and the packet didn't
   2603  11042      Erik  * match one of our configured IP-in-IP tunnels.
   2604      0    stevel  */
   2605      0    stevel void
   2606  11042      Erik ip_mroute_decap(mblk_t *mp, ip_recv_attr_t *ira)
   2607      0    stevel {
   2608      0    stevel 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
   2609      0    stevel 	ipha_t		*ipha_encap;
   2610      0    stevel 	int		hlen = IPH_HDR_LENGTH(ipha);
   2611  11042      Erik 	int		hlen_encap;
   2612      0    stevel 	ipaddr_t	src;
   2613      0    stevel 	struct vif	*vifp;
   2614  11042      Erik 	ire_t		*ire;
   2615  11042      Erik 	ill_t		*ill = ira->ira_ill;
   2616   3448  dh155122 	ip_stack_t	*ipst = ill->ill_ipst;
   2617   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2618  11042      Erik 
   2619  11042      Erik 	/* Make sure we have all of the inner header */
   2620  11042      Erik 	ipha_encap = (ipha_t *)((char *)ipha + hlen);
   2621  11042      Erik 	if (mp->b_wptr - mp->b_rptr < hlen + IP_SIMPLE_HDR_LENGTH) {
   2622  11042      Erik 		ipha = ip_pullup(mp, hlen + IP_SIMPLE_HDR_LENGTH, ira);
   2623  11042      Erik 		if (ipha == NULL) {
   2624  11042      Erik 			ipst->ips_mrtstat->mrts_bad_tunnel++;
   2625  11042      Erik 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2626  11042      Erik 			ip_drop_input("ip_mroute_decap: too short", mp, ill);
   2627  11042      Erik 			freemsg(mp);
   2628  11042      Erik 			return;
   2629  11042      Erik 		}
   2630  11042      Erik 		ipha_encap = (ipha_t *)((char *)ipha + hlen);
   2631  11042      Erik 	}
   2632  11042      Erik 	hlen_encap = IPH_HDR_LENGTH(ipha_encap);
   2633  11042      Erik 	if (mp->b_wptr - mp->b_rptr < hlen + hlen_encap) {
   2634  11042      Erik 		ipha = ip_pullup(mp, hlen + hlen_encap, ira);
   2635  11042      Erik 		if (ipha == NULL) {
   2636  11042      Erik 			ipst->ips_mrtstat->mrts_bad_tunnel++;
   2637  11042      Erik 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2638  11042      Erik 			ip_drop_input("ip_mroute_decap: too short", mp, ill);
   2639  11042      Erik 			freemsg(mp);
   2640  11042      Erik 			return;
   2641  11042      Erik 		}
   2642  11042      Erik 		ipha_encap = (ipha_t *)((char *)ipha + hlen);
   2643  11042      Erik 	}
   2644      0    stevel 
   2645      0    stevel 	/*
   2646      0    stevel 	 * Dump the packet if it's not to a multicast destination or if
   2647      0    stevel 	 * we don't have an encapsulating tunnel with the source.
   2648      0    stevel 	 * Note:  This code assumes that the remote site IP address
   2649      0    stevel 	 * uniquely identifies the tunnel (i.e., that this site has
   2650      0    stevel 	 * at most one tunnel with the remote site).
   2651      0    stevel 	 */
   2652      0    stevel 	if (!CLASSD(ipha_encap->ipha_dst)) {
   2653   3448  dh155122 		ipst->ips_mrtstat->mrts_bad_tunnel++;
   2654      0    stevel 		ip1dbg(("ip_mroute_decap: bad tunnel\n"));
   2655  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2656  11042      Erik 		ip_drop_input("mrts_bad_tunnel", mp, ill);
   2657      0    stevel 		freemsg(mp);
   2658      0    stevel 		return;
   2659      0    stevel 	}
   2660      0    stevel 	src = (ipaddr_t)ipha->ipha_src;
   2661   3448  dh155122 	mutex_enter(&ipst->ips_last_encap_lock);
   2662   3448  dh155122 	if (src != ipst->ips_last_encap_src) {
   2663      0    stevel 		struct vif *vife;
   2664      0    stevel 
   2665   3448  dh155122 		vifp = ipst->ips_vifs;
   2666   3448  dh155122 		vife = vifp + ipst->ips_numvifs;
   2667   3448  dh155122 		ipst->ips_last_encap_src = src;
   2668   3448  dh155122 		ipst->ips_last_encap_vif = 0;
   2669      0    stevel 		for (; vifp < vife; ++vifp) {
   2670      0    stevel 			if (!lock_good_vif(vifp))
   2671      0    stevel 				continue;
   2672      0    stevel 			if (vifp->v_rmt_addr.s_addr == src) {
   2673      0    stevel 				if (vifp->v_flags & VIFF_TUNNEL)
   2674   3448  dh155122 					ipst->ips_last_encap_vif = vifp;
   2675   3448  dh155122 				if (ipst->ips_ip_mrtdebug > 1) {
   2676   5240  nordmark 					(void) mi_strlog(mrouter->conn_rq,
   2677      0    stevel 					    1, SL_TRACE,
   2678      0    stevel 					    "ip_mroute_decap: good tun "
   2679      0    stevel 					    "vif %ld with %x",
   2680   3448  dh155122 					    (ptrdiff_t)(vifp - ipst->ips_vifs),
   2681      0    stevel 					    ntohl(src));
   2682      0    stevel 				}
   2683      0    stevel 				unlock_good_vif(vifp);
   2684      0    stevel 				break;
   2685      0    stevel 			}
   2686      0    stevel 			unlock_good_vif(vifp);
   2687      0    stevel 		}
   2688      0    stevel 	}
   2689   3448  dh155122 	if ((vifp = ipst->ips_last_encap_vif) == 0) {
   2690   3448  dh155122 		mutex_exit(&ipst->ips_last_encap_lock);
   2691   3448  dh155122 		ipst->ips_mrtstat->mrts_bad_tunnel++;
   2692  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2693  11042      Erik 		ip_drop_input("mrts_bad_tunnel", mp, ill);
   2694      0    stevel 		freemsg(mp);
   2695      0    stevel 		ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n",
   2696   3448  dh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src)));
   2697      0    stevel 		return;
   2698      0    stevel 	}
   2699   3448  dh155122 	mutex_exit(&ipst->ips_last_encap_lock);
   2700      0    stevel 
   2701      0    stevel 	/*
   2702      0    stevel 	 * Need to pass in the tunnel source to ip_mforward (so that it can
   2703  11042      Erik 	 * verify that the packet arrived over the correct vif.)
   2704      0    stevel 	 */
   2705  11042      Erik 	ira->ira_flags |= IRAF_MROUTE_TUNNEL_SET;
   2706  11042      Erik 	ira->ira_mroute_tunnel = src;
   2707      0    stevel 	mp->b_rptr += hlen;
   2708  11042      Erik 	ira->ira_pktlen -= hlen;
   2709  11042      Erik 	ira->ira_ip_hdr_length = hlen_encap;
   2710  11042      Erik 
   2711  11042      Erik 	/*
   2712  11042      Erik 	 * We don't redo any of the filtering in ill_input_full_v4 and we
   2713  11042      Erik 	 * have checked that all of ipha_encap and any IP options are
   2714  11042      Erik 	 * pulled up. Hence we call ire_recv_multicast_v4 directly.
   2715  11042      Erik 	 * However, we have to check for RSVP as in ip_input_full_v4
   2716  11042      Erik 	 * and if so we pass it to ire_recv_broadcast_v4 for local delivery
   2717  11042      Erik 	 * to the rsvpd.
   2718  11042      Erik 	 */
   2719  11042      Erik 	if (ipha_encap->ipha_protocol == IPPROTO_RSVP &&
   2720  11042      Erik 	    ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) {
   2721  11042      Erik 		ire = ire_route_recursive_v4(INADDR_BROADCAST, 0, ill,
   2722  11042      Erik 		    ALL_ZONES, ira->ira_tsl, MATCH_IRE_ILL|MATCH_IRE_SECATTR,
   2723  11042      Erik 		    B_TRUE, 0, ipst, NULL, NULL, NULL);
   2724  11042      Erik 	} else {
   2725  11042      Erik 		ire = ire_multicast(ill);
   2726  11042      Erik 	}
   2727  11042      Erik 	ASSERT(ire != NULL);
   2728  11042      Erik 	/* Normally this will return the IRE_MULTICAST or IRE_BROADCAST */
   2729  11042      Erik 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
   2730  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2731  11042      Erik 		ip_drop_input("ip_mroute_decap: RTF_REJECT", mp, ill);
   2732  11042      Erik 		freemsg(mp);
   2733  11042      Erik 		ire_refrele(ire);
   2734  11042      Erik 		return;
   2735  11042      Erik 	}
   2736  11042      Erik 	ire->ire_ib_pkt_count++;
   2737  11042      Erik 	ASSERT(ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST));
   2738  11042      Erik 	(*ire->ire_recvfn)(ire, mp, ipha_encap, ira);
   2739  11042      Erik 	ire_refrele(ire);
   2740      0    stevel }
   2741      0    stevel 
   2742      0    stevel /*
   2743      0    stevel  * Remove all records with v_ipif == ipif.  Called when an interface goes away
   2744      0    stevel  * (stream closed).  Called as writer.
   2745      0    stevel  */
   2746      0    stevel void
   2747      0    stevel reset_mrt_vif_ipif(ipif_t *ipif)
   2748      0    stevel {
   2749      0    stevel 	vifi_t vifi, tmp_vifi;
   2750      0    stevel 	vifi_t num_of_vifs;
   2751   3448  dh155122 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
   2752      0    stevel 
   2753      0    stevel 	/* Can't check vifi >= 0 since vifi_t is unsigned! */
   2754      0    stevel 
   2755   3448  dh155122 	mutex_enter(&ipst->ips_numvifs_mutex);
   2756   3448  dh155122 	num_of_vifs = ipst->ips_numvifs;
   2757   3448  dh155122 	mutex_exit(&ipst->ips_numvifs_mutex);
   2758      0    stevel 
   2759      0    stevel 	for (vifi = num_of_vifs; vifi != 0; vifi--) {
   2760      0    stevel 		tmp_vifi = vifi - 1;
   2761   3448  dh155122 		if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) {
   2762  11042      Erik 			(void) del_vif(&tmp_vifi, ipst);
   2763      0    stevel 		}
   2764      0    stevel 	}
   2765      0    stevel }
   2766      0    stevel 
   2767      0    stevel /* Remove pending upcall msgs when ill goes away.  Called by ill_delete.  */
   2768      0    stevel void
   2769      0    stevel reset_mrt_ill(ill_t *ill)
   2770      0    stevel {
   2771  11042      Erik 	struct mfc	*rt;
   2772      0    stevel 	struct rtdetq	*rte;
   2773  11042      Erik 	int		i;
   2774   3448  dh155122 	ip_stack_t	*ipst = ill->ill_ipst;
   2775   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2776  11042      Erik 	timeout_id_t	id;
   2777      0    stevel 
   2778      0    stevel 	for (i = 0; i < MFCTBLSIZ; i++) {
   2779   3448  dh155122 		MFCB_REFHOLD(&ipst->ips_mfcs[i]);
   2780   3448  dh155122 		if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) {
   2781   3448  dh155122 			if (ipst->ips_ip_mrtdebug > 1) {
   2782   5240  nordmark 				(void) mi_strlog(mrouter->conn_rq, 1,
   2783   3448  dh155122 				    SL_TRACE,
   2784      0    stevel 				    "reset_mrt_ill: mfctable [%d]", i);
   2785      0    stevel 			}
   2786      0    stevel 			while (rt != NULL) {
   2787      0    stevel 				mutex_enter(&rt->mfc_mutex);
   2788      0    stevel 				while ((rte = rt->mfc_rte) != NULL) {
   2789  11042      Erik 					if (rte->ill == ill &&
   2790  11042      Erik 					    (id = rt->mfc_timeout_id) != 0) {
   2791  11042      Erik 						/*
   2792  11042      Erik 						 * Its ok to drop the lock,  the
   2793  11042      Erik 						 * struct cannot be freed since
   2794  11042      Erik 						 * we have a ref on the hash
   2795  11042      Erik 						 * bucket.
   2796  11042      Erik 						 */
   2797  11042      Erik 						mutex_exit(&rt->mfc_mutex);
   2798  11042      Erik 						(void) untimeout(id);
   2799  11042      Erik 						mutex_enter(&rt->mfc_mutex);
   2800  11042      Erik 					}
   2801      0    stevel 					if (rte->ill == ill) {
   2802   3448  dh155122 						if (ipst->ips_ip_mrtdebug > 1) {
   2803   3448  dh155122 						(void) mi_strlog(
   2804   5240  nordmark 						    mrouter->conn_rq,
   2805   3448  dh155122 						    1, SL_TRACE,
   2806   3448  dh155122 						    "reset_mrt_ill: "
   2807   7240   rh87107 						    "ill 0x%p", (void *)ill);
   2808      0    stevel 						}
   2809      0    stevel 						rt->mfc_rte = rte->rte_next;
   2810      0    stevel 						freemsg(rte->mp);
   2811      0    stevel 						mi_free((char *)rte);
   2812      0    stevel 					}
   2813      0    stevel 				}
   2814      0    stevel 				mutex_exit(&rt->mfc_mutex);
   2815      0    stevel 				rt = rt->mfc_next;
   2816      0    stevel 			}
   2817      0    stevel 		}
   2818   3448  dh155122 		MFCB_REFRELE(&ipst->ips_mfcs[i]);
   2819      0    stevel 	}
   2820      0    stevel }
   2821      0    stevel 
   2822      0    stevel /*
   2823      0    stevel  * Token bucket filter module.
   2824      0    stevel  * The ipha is for mcastgrp destination for phyint and encap.
   2825      0    stevel  */
   2826      0    stevel static void
   2827      0    stevel tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha)
   2828      0    stevel {
   2829      0    stevel 	size_t 	p_len =  msgdsize(mp);
   2830      0    stevel 	struct tbf	*t    = vifp->v_tbf;
   2831      0    stevel 	timeout_id_t id = 0;
   2832  11042      Erik 	ill_t		*ill = vifp->v_ipif->ipif_ill;
   2833  11042      Erik 	ip_stack_t	*ipst = ill->ill_ipst;
   2834   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2835      0    stevel 
   2836      0    stevel 	/* Drop if packet is too large */
   2837      0    stevel 	if (p_len > MAX_BKT_SIZE) {
   2838   3448  dh155122 		ipst->ips_mrtstat->mrts_pkt2large++;
   2839  11042      Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2840  11042      Erik 		ip_drop_output("tbf_control - too large", mp, ill);
   2841      0    stevel 		freemsg(mp);
   2842      0    stevel 		return;
   2843      0    stevel 	}
   2844   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   2845   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2846      0    stevel 		    "tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x",
   2847   3448  dh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len,
   2848      0    stevel 		    ntohl(ipha->ipha_dst));
   2849      0    stevel 	}
   2850      0    stevel 
   2851      0    stevel 	mutex_enter(&t->tbf_lock);
   2852      0    stevel 
   2853      0    stevel 	tbf_update_tokens(vifp);
   2854      0    stevel 
   2855      0    stevel 	/*
   2856      0    stevel 	 * If there are enough tokens,
   2857      0    stevel 	 * and the queue is empty, send this packet out.
   2858      0    stevel 	 */
   2859   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   2860   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2861      0    stevel 		    "tbf_control: vif %ld, TOKENS  %d, pkt len  %lu, qlen  %d",
   2862   3448  dh155122 		    (ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len,
   2863      0    stevel 		    t->tbf_q_len);
   2864      0    stevel 	}
   2865      0    stevel 	/* No packets are queued */
   2866      0    stevel 	if (t->tbf_q_len == 0) {
   2867      0    stevel 		/* queue empty, send packet if enough tokens */
   2868      0    stevel 		if (p_len <= t->tbf_n_tok) {
   2869      0    stevel 			t->tbf_n_tok -= p_len;
   2870      0    stevel 			mutex_exit(&t->tbf_lock);
   2871      0    stevel 			tbf_send_packet(vifp, mp);
   2872      0    stevel 			return;
   2873      0    stevel 		} else {
   2874      0    stevel 			/* Queue packet and timeout till later */
   2875      0    stevel 			tbf_queue(vifp, mp);
   2876      0    stevel 			ASSERT(vifp->v_timeout_id == 0);
   2877      0    stevel 			vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
   2878      0    stevel 			    TBF_REPROCESS);
   2879      0    stevel 		}
   2880      0    stevel 	} else if (t->tbf_q_len < t->tbf_max_q_len) {
   2881      0    stevel 		/* Finite queue length, so queue pkts and process queue */
   2882      0    stevel 		tbf_queue(vifp, mp);
   2883      0    stevel 		tbf_process_q(vifp);
   2884      0    stevel 	} else {
   2885      0    stevel 		/* Check that we have UDP header with IP header */
   2886      0    stevel 		size_t hdr_length = IPH_HDR_LENGTH(ipha) +
   2887   5240  nordmark 		    sizeof (struct udphdr);
   2888      0    stevel 
   2889      0    stevel 		if ((mp->b_wptr - mp->b_rptr) < hdr_length) {
   2890      0    stevel 			if (!pullupmsg(mp, hdr_length)) {
   2891  11042      Erik 				BUMP_MIB(ill->ill_ip_mib,
   2892  11042      Erik 				    ipIfStatsOutDiscards);
   2893  11042      Erik 				ip_drop_output("tbf_control - pullup", mp, ill);
   2894      0    stevel 				freemsg(mp);
   2895      0    stevel 				ip1dbg(("tbf_ctl: couldn't pullup udp hdr, "
   2896      0    stevel 				    "vif %ld src 0x%x dst 0x%x\n",
   2897   3448  dh155122 				    (ptrdiff_t)(vifp - ipst->ips_vifs),
   2898      0    stevel 				    ntohl(ipha->ipha_src),
   2899      0    stevel 				    ntohl(ipha->ipha_dst)));
   2900      0    stevel 				mutex_exit(&vifp->v_tbf->tbf_lock);
   2901      0    stevel 				return;
   2902      0    stevel 			} else
   2903      0    stevel 				/* Have to reassign ipha after pullupmsg */
   2904      0    stevel 				ipha = (ipha_t *)mp->b_rptr;
   2905      0    stevel 		}
   2906      0    stevel 		/*
   2907      0    stevel 		 * Queue length too much,
   2908      0    stevel 		 * try to selectively dq, or queue and process
   2909      0    stevel 		 */
   2910      0    stevel 		if (!tbf_dq_sel(vifp, ipha)) {
   2911   3448  dh155122 			ipst->ips_mrtstat->mrts_q_overflow++;
   2912  11042      Erik 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2913  11042      Erik 			ip_drop_output("mrts_q_overflow", mp, ill);
   2914      0    stevel 			freemsg(mp);
   2915      0    stevel 		} else {
   2916      0    stevel 			tbf_queue(vifp, mp);
   2917      0    stevel 			tbf_process_q(vifp);
   2918      0    stevel 		}
   2919      0    stevel 	}
   2920      0    stevel 	if (t->tbf_q_len == 0) {
   2921      0    stevel 		id = vifp->v_timeout_id;
   2922      0    stevel 		vifp->v_timeout_id = 0;
   2923      0    stevel 	}
   2924      0    stevel 	mutex_exit(&vifp->v_tbf->tbf_lock);
   2925      0    stevel 	if (id != 0)
   2926      0    stevel 		(void) untimeout(id);
   2927      0    stevel }
   2928      0    stevel 
   2929      0    stevel /*
   2930      0    stevel  * Adds a packet to the tbf queue at the interface.
   2931      0    stevel  * The ipha is for mcastgrp destination for phyint and encap.
   2932      0    stevel  */
   2933      0    stevel static void
   2934      0    stevel tbf_queue(struct vif *vifp, mblk_t *mp)
   2935      0    stevel {
   2936      0    stevel 	struct tbf	*t = vifp->v_tbf;
   2937   3448  dh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
   2938   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2939      0    stevel 
   2940   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {
   2941   5240  nordmark 		(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
   2942   3448  dh155122 		    "tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs));
   2943      0    stevel 	}
   2944      0    stevel 	ASSERT(MUTEX_HELD(&t->tbf_lock));
   2945      0    stevel 
   2946      0    stevel 	if (t->tbf_t == NULL) {
   2947      0    stevel 		/* Queue was empty */
   2948      0    stevel 		t->tbf_q = mp;
   2949      0    stevel 	} else {
   2950      0    stevel 		/* Insert at tail */
   2951      0    stevel 		t->tbf_t->b_next = mp;
   2952      0    stevel 	}
   2953      0    stevel 	/* set new tail pointer */
   2954      0    stevel 	t->tbf_t = mp;
   2955      0    stevel 
   2956      0    stevel 	mp->b_next = mp->b_prev = NULL;
   2957      0    stevel 
   2958      0    stevel 	t->tbf_q_len++;
   2959      0    stevel }
   2960      0    stevel 
   2961      0    stevel /*
   2962      0    stevel  * Process the queue at the vif interface.
   2963      0    stevel  * Drops the tbf_lock when sending packets.
   2964      0    stevel  *
   2965      0    stevel  * NOTE : The caller should quntimeout if the queue length is 0.
   2966      0    stevel  */
   2967      0    stevel static void
   2968      0    stevel tbf_process_q(struct vif *vifp)
   2969      0    stevel {
   2970      0    stevel 	mblk_t	*mp;
   2971      0    stevel 	struct tbf	*t = vifp->v_tbf;
   2972      0    stevel 	size_t	len;
   2973   3448  dh155122 	ip_stack_t	*ipst = vifp->v_ipif->ipif_ill->ill_ipst;
   2974   5240  nordmark 	conn_t		*mrouter = ipst->ips_ip_g_mrouter;
   2975      0    stevel 
   2976   3448  dh155122 	if (ipst->ips_ip_mrtdebug > 1) {