Home | History | Annotate | Download | only in ip
      1      0     stevel /*
      2      0     stevel  * CDDL HEADER START
      3      0     stevel  *
      4      0     stevel  * The contents of this file are subject to the terms of the
      5   1676        jpk  * Common Development and Distribution License (the "License").
      6   1676        jpk  * You may not use this file except in compliance with the License.
      7      0     stevel  *
      8      0     stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0     stevel  * or http://www.opensolaris.org/os/licensing.
     10      0     stevel  * See the License for the specific language governing permissions
     11      0     stevel  * and limitations under the License.
     12      0     stevel  *
     13      0     stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0     stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0     stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0     stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0     stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0     stevel  *
     19      0     stevel  * CDDL HEADER END
     20      0     stevel  */
     21      0     stevel /*
     22   8485      Peter  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23      0     stevel  * Use is subject to license terms.
     24      0     stevel  */
     25      0     stevel 
     26      0     stevel #include <sys/types.h>
     27      0     stevel #include <sys/stream.h>
     28      0     stevel #include <sys/stropts.h>
     29   2546   carlsonj #include <sys/strsun.h>
     30      0     stevel #include <sys/sysmacros.h>
     31      0     stevel #include <sys/errno.h>
     32      0     stevel #include <sys/dlpi.h>
     33      0     stevel #include <sys/socket.h>
     34      0     stevel #include <sys/ddi.h>
     35   2546   carlsonj #include <sys/sunddi.h>
     36      0     stevel #include <sys/cmn_err.h>
     37      0     stevel #include <sys/debug.h>
     38      0     stevel #include <sys/vtrace.h>
     39      0     stevel #include <sys/kmem.h>
     40      0     stevel #include <sys/zone.h>
     41   2546   carlsonj #include <sys/ethernet.h>
     42   2546   carlsonj #include <sys/sdt.h>
     43  11042       Erik #include <sys/mac.h>
     44      0     stevel 
     45      0     stevel #include <net/if.h>
     46   2546   carlsonj #include <net/if_types.h>
     47      0     stevel #include <net/if_dl.h>
     48      0     stevel #include <net/route.h>
     49      0     stevel #include <netinet/in.h>
     50      0     stevel #include <netinet/ip6.h>
     51      0     stevel #include <netinet/icmp6.h>
     52      0     stevel 
     53      0     stevel #include <inet/common.h>
     54      0     stevel #include <inet/mi.h>
     55      0     stevel #include <inet/mib2.h>
     56      0     stevel #include <inet/nd.h>
     57      0     stevel #include <inet/ip.h>
     58   2733   nordmark #include <inet/ip_impl.h>
     59   3448   dh155122 #include <inet/ipclassifier.h>
     60      0     stevel #include <inet/ip_if.h>
     61      0     stevel #include <inet/ip_ire.h>
     62      0     stevel #include <inet/ip_rts.h>
     63      0     stevel #include <inet/ip6.h>
     64      0     stevel #include <inet/ip_ndp.h>
     65   2546   carlsonj #include <inet/sctp_ip.h>
     66  11042       Erik #include <inet/ip_arp.h>
     67   9175    Sowmini #include <inet/ip2mac_impl.h>
     68  11042       Erik 
     69  11042       Erik #define	ANNOUNCE_INTERVAL(isv6) \
     70  11042       Erik 	(isv6 ? ipst->ips_ip_ndp_unsolicit_interval : \
     71  11042       Erik 	ipst->ips_ip_arp_publish_interval)
     72  11042       Erik 
     73  11042       Erik #define	DEFENSE_INTERVAL(isv6) \
     74  11042       Erik 	(isv6 ? ipst->ips_ndp_defend_interval : \
     75  11042       Erik 	ipst->ips_arp_defend_interval)
     76  11042       Erik 
     77  11042       Erik /* Non-tunable probe interval, based on link capabilities */
     78  11042       Erik #define	ILL_PROBE_INTERVAL(ill)	((ill)->ill_note_link ? 150 : 1500)
     79  11042       Erik 
     80  11042       Erik /*
     81  11042       Erik  * The IPv4 Link Local address space is special; we do extra duplicate checking
     82  11042       Erik  * there, as the entire assignment mechanism rests on random numbers.
     83  11042       Erik  */
     84  11042       Erik #define	IS_IPV4_LL_SPACE(ptr)	(((uchar_t *)ptr)[0] == 169 && \
     85  11042       Erik 				((uchar_t *)ptr)[1] == 254)
     86  11042       Erik 
     87  11042       Erik /*
     88  11042       Erik  * NCE_EXTERNAL_FLAGS_MASK defines the set of ncec_flags that may be passed
     89  11042       Erik  * in to the ncec*add* functions.
     90  11042       Erik  *
     91  11042       Erik  * NCE_F_AUTHORITY means that we ignore any incoming adverts for that
     92  11042       Erik  * mapping (though DAD is performed for the mapping). NCE_F_PUBLISH means
     93  11042       Erik  * that we will respond to requests for the protocol address.
     94  11042       Erik  */
     95  11042       Erik #define	NCE_EXTERNAL_FLAGS_MASK \
     96  11042       Erik 	(NCE_F_MYADDR | NCE_F_ISROUTER | NCE_F_NONUD | \
     97  11042       Erik 	NCE_F_ANYCAST | NCE_F_UNSOL_ADV | NCE_F_BCAST | NCE_F_MCAST | \
     98  11042       Erik 	NCE_F_AUTHORITY | NCE_F_PUBLISH | NCE_F_STATIC)
     99      0     stevel 
    100      0     stevel /*
    101   2546   carlsonj  * Lock ordering:
    102   2546   carlsonj  *
    103  11042       Erik  *	ndp_g_lock -> ill_lock -> ncec_lock
    104   2546   carlsonj  *
    105   2546   carlsonj  * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
    106  11042       Erik  * ncec_next.  ncec_lock protects the contents of the NCE (particularly
    107  11042       Erik  * ncec_refcnt).
    108  11042       Erik  */
    109  11042       Erik 
    110  11042       Erik static	void	nce_cleanup_list(ncec_t *ncec);
    111  11042       Erik static	void 	nce_set_ll(ncec_t *ncec, uchar_t *ll_addr);
    112  11042       Erik static	ncec_t	*ncec_lookup_illgrp(ill_t *, const in6_addr_t *,
    113  11042       Erik     ncec_t *);
    114  11042       Erik static	nce_t	*nce_lookup_addr(ill_t *, const in6_addr_t *);
    115  11042       Erik static	int	nce_set_multicast_v6(ill_t *ill, const in6_addr_t *addr,
    116  11042       Erik     uint16_t ncec_flags, nce_t **newnce);
    117  11042       Erik static	int	nce_set_multicast_v4(ill_t *ill, const in_addr_t *dst,
    118  11042       Erik     uint16_t ncec_flags, nce_t **newnce);
    119  11042       Erik static	boolean_t	ndp_xmit(ill_t *ill, uint32_t operation,
    120  11042       Erik     uint8_t *hwaddr, uint_t hwaddr_len, const in6_addr_t *sender,
    121      0     stevel     const in6_addr_t *target, int flag);
    122  11042       Erik static void	ncec_refhold_locked(ncec_t *);
    123  11042       Erik static boolean_t ill_defend_rate_limit(ill_t *, ncec_t *);
    124  11042       Erik static	void	nce_queue_mp_common(ncec_t *, mblk_t *, boolean_t);
    125  11042       Erik static	int	nce_add_common(ill_t *, uchar_t *, uint_t, const in6_addr_t *,
    126  11042       Erik     uint16_t, uint16_t, nce_t **);
    127  11042       Erik static nce_t *nce_add_impl(ill_t *, ncec_t *, nce_t *, mblk_t *);
    128  11042       Erik static nce_t *nce_add(ill_t *, ncec_t *);
    129  11042       Erik static void nce_inactive(nce_t *);
    130  11042       Erik extern nce_t 	*nce_lookup(ill_t *, const in6_addr_t *);
    131  11042       Erik static nce_t *nce_ill_lookup_then_add(ill_t *, ncec_t *);
    132  11042       Erik static int	nce_add_v6(ill_t *, uchar_t *, uint_t, const in6_addr_t *,
    133  11042       Erik     uint16_t, uint16_t, nce_t **);
    134  11042       Erik static int	nce_add_v4(ill_t *, uchar_t *, uint_t, const in_addr_t *,
    135  11042       Erik     uint16_t, uint16_t, nce_t **);
    136  11042       Erik static int  nce_add_v6_postprocess(nce_t *);
    137  11042       Erik static int  nce_add_v4_postprocess(nce_t *);
    138  11042       Erik static ill_t *nce_resolve_src(ncec_t *, in6_addr_t *);
    139  11042       Erik static clock_t nce_fuzz_interval(clock_t, boolean_t);
    140  11042       Erik static void nce_resolv_ipmp_ok(ncec_t *);
    141  11042       Erik static void nce_walk_common(ill_t *, pfi_t, void *);
    142  11042       Erik static void nce_start_timer(ncec_t *, uint_t);
    143  11042       Erik static nce_t *nce_fastpath_create(ill_t *, ncec_t *);
    144  11042       Erik static void nce_fastpath_trigger(nce_t *);
    145  11042       Erik static nce_t *nce_fastpath(ncec_t *, boolean_t, nce_t *);
    146  11042       Erik 
    147  11042       Erik #ifdef DEBUG
    148  11042       Erik static void	ncec_trace_cleanup(const ncec_t *);
    149      0     stevel #endif
    150      0     stevel 
    151   3448   dh155122 #define	NCE_HASH_PTR_V4(ipst, addr)					\
    152   3448   dh155122 	(&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)]))
    153   3448   dh155122 
    154   3448   dh155122 #define	NCE_HASH_PTR_V6(ipst, addr)				 \
    155   3448   dh155122 	(&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
    156   3448   dh155122 		NCE_TABLE_SIZE)]))
    157   2535   sangeeta 
    158  11042       Erik extern kmem_cache_t *ncec_cache;
    159  11042       Erik extern kmem_cache_t *nce_cache;
    160  11042       Erik 
    161  11042       Erik /*
    162  11042       Erik  * Send out a IPv6 (unicast) or IPv4 (broadcast) DAD probe
    163  11042       Erik  * If src_ill is not null, the ncec_addr is bound to src_ill. The
    164  11042       Erik  * src_ill is ignored by nce_dad for IPv4 Neighbor Cache entries where
    165  11042       Erik  * the probe is sent on the ncec_ill (in the non-IPMP case) or the
    166  11042       Erik  * IPMP cast_ill (in the IPMP case).
    167  11042       Erik  *
    168  11042       Erik  * Note that the probe interval is based on ncec->ncec_ill which
    169  11042       Erik  * may be the ipmp_ill.
    170  11042       Erik  */
    171  11042       Erik static void
    172  11042       Erik nce_dad(ncec_t *ncec, ill_t *src_ill, boolean_t send_probe)
    173  11042       Erik {
    174  11042       Erik 	boolean_t dropped;
    175  11042       Erik 	uint32_t probe_interval;
    176  11042       Erik 
    177  11042       Erik 	ASSERT(!(ncec->ncec_flags & NCE_F_MCAST));
    178  11042       Erik 	ASSERT(!(ncec->ncec_flags & NCE_F_BCAST));
    179  11042       Erik 	if (ncec->ncec_ipversion == IPV6_VERSION) {
    180  11042       Erik 		dropped = ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT,
    181  11042       Erik 		    ncec->ncec_lladdr, ncec->ncec_lladdr_length,
    182  11042       Erik 		    &ipv6_all_zeros, &ncec->ncec_addr, NDP_PROBE);
    183  11042       Erik 		probe_interval = ILL_PROBE_INTERVAL(ncec->ncec_ill);
    184  11042       Erik 	} else {
    185  11042       Erik 		/* IPv4 DAD delay the initial probe. */
    186  11042       Erik 		if (send_probe)
    187  11042       Erik 			dropped = arp_probe(ncec);
    188  11042       Erik 		else
    189  11042       Erik 			dropped = B_TRUE;
    190  11042       Erik 		probe_interval = nce_fuzz_interval(ncec->ncec_xmit_interval,
    191  11042       Erik 		    !send_probe);
    192  11042       Erik 	}
    193  11042       Erik 	if (!dropped) {
    194  11042       Erik 		mutex_enter(&ncec->ncec_lock);
    195  11042       Erik 		ncec->ncec_pcnt--;
    196  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    197  11042       Erik 	}
    198  11042       Erik 	nce_restart_timer(ncec, probe_interval);
    199  11042       Erik }
    200  11042       Erik 
    201  11042       Erik /*
    202  11042       Erik  * Compute default flags to use for an advertisement of this ncec's address.
    203  11042       Erik  */
    204  11042       Erik static int
    205  11042       Erik nce_advert_flags(const ncec_t *ncec)
    206  11042       Erik {
    207  11042       Erik 	int flag = 0;
    208  11042       Erik 
    209  11042       Erik 	if (ncec->ncec_flags & NCE_F_ISROUTER)
    210  11042       Erik 		flag |= NDP_ISROUTER;
    211  11042       Erik 	if (!(ncec->ncec_flags & NCE_F_ANYCAST))
    212  11042       Erik 		flag |= NDP_ORIDE;
    213  11042       Erik 
    214  11042       Erik 	return (flag);
    215  11042       Erik }
    216   2546   carlsonj 
    217      0     stevel /*
    218      0     stevel  * NDP Cache Entry creation routine.
    219   3448   dh155122  * This routine must always be called with ndp6->ndp_g_lock held.
    220  11042       Erik  */
    221  11042       Erik int
    222  11042       Erik nce_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
    223  11042       Erik     const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce)
    224  11042       Erik {
    225  11042       Erik 	int		err;
    226      0     stevel 	nce_t		*nce;
    227  11042       Erik 
    228  11042       Erik 	ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock));
    229  11042       Erik 	ASSERT(ill != NULL && ill->ill_isv6);
    230  11042       Erik 
    231  11042       Erik 	err = nce_add_common(ill, hw_addr, hw_addr_len, addr, flags, state,
    232  11042       Erik 	    &nce);
    233  11042       Erik 	if (err != 0)
    234  11042       Erik 		return (err);
    235  11042       Erik 	ASSERT(newnce != NULL);
    236  11042       Erik 	*newnce = nce;
    237  11042       Erik 	return (err);
    238  11042       Erik }
    239  11042       Erik 
    240  11042       Erik /*
    241  11042       Erik  * Post-processing routine to be executed after nce_add_v6(). This function
    242  11042       Erik  * triggers fastpath (if appropriate) and DAD on the newly added nce entry
    243  11042       Erik  * and must be called without any locks held.
    244  11042       Erik  */
    245  11042       Erik int
    246  11042       Erik nce_add_v6_postprocess(nce_t *nce)
    247  11042       Erik {
    248  11042       Erik 	ncec_t		*ncec = nce->nce_common;
    249      0     stevel 	boolean_t	dropped = B_FALSE;
    250  11042       Erik 	uchar_t		*hw_addr = ncec->ncec_lladdr;
    251  11042       Erik 	uint_t		hw_addr_len = ncec->ncec_lladdr_length;
    252  11042       Erik 	ill_t		*ill = ncec->ncec_ill;
    253  11042       Erik 	int		err = 0;
    254  11042       Erik 	uint16_t	flags = ncec->ncec_flags;
    255  11042       Erik 	ip_stack_t	*ipst = ill->ill_ipst;
    256  11042       Erik 	boolean_t	trigger_fastpath = B_TRUE;
    257  11042       Erik 
    258  11042       Erik 	/*
    259  11042       Erik 	 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
    260  11042       Erik 	 * we call nce_fastpath as soon as the ncec is resolved in nce_process.
    261  11042       Erik 	 * We call nce_fastpath from nce_update if the link layer address of
    262  11042       Erik 	 * the peer changes from nce_update
    263  11042       Erik 	 */
    264  11042       Erik 	if (NCE_PUBLISH(ncec) || !NCE_ISREACHABLE(ncec) ||
    265  11042       Erik 	    (hw_addr == NULL && ill->ill_net_type != IRE_IF_NORESOLVER))
    266  11042       Erik 		trigger_fastpath = B_FALSE;
    267  11042       Erik 
    268  11042       Erik 	if (trigger_fastpath)
    269  11042       Erik 		nce_fastpath_trigger(nce);
    270  11042       Erik 	if (NCE_PUBLISH(ncec) && ncec->ncec_state == ND_PROBE) {
    271  11042       Erik 		ill_t *hwaddr_ill;
    272  11042       Erik 		/*
    273  11042       Erik 		 * Unicast entry that needs DAD.
    274  11042       Erik 		 */
    275  11042       Erik 		if (IS_IPMP(ill)) {
    276  11042       Erik 			hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp,
    277  11042       Erik 			    hw_addr, hw_addr_len);
    278  11042       Erik 		} else {
    279  11042       Erik 			hwaddr_ill = ill;
    280  11042       Erik 		}
    281  11042       Erik 		nce_dad(ncec, hwaddr_ill, B_TRUE);
    282   2546   carlsonj 		err = EINPROGRESS;
    283   2546   carlsonj 	} else if (flags & NCE_F_UNSOL_ADV) {
    284      0     stevel 		/*
    285      0     stevel 		 * We account for the transmit below by assigning one
    286      0     stevel 		 * less than the ndd variable. Subsequent decrements
    287  11042       Erik 		 * are done in nce_timer.
    288  11042       Erik 		 */
    289  11042       Erik 		mutex_enter(&ncec->ncec_lock);
    290  11042       Erik 		ncec->ncec_unsolicit_count =
    291  11042       Erik 		    ipst->ips_ip_ndp_unsolicit_count - 1;
    292  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    293  11042       Erik 		dropped = ndp_xmit(ill,
    294  11042       Erik 		    ND_NEIGHBOR_ADVERT,
    295  11042       Erik 		    hw_addr,
    296  11042       Erik 		    hw_addr_len,
    297  11042       Erik 		    &ncec->ncec_addr,	/* Source and target of the adv */
    298  11042       Erik 		    &ipv6_all_hosts_mcast, /* Destination of the packet */
    299  11042       Erik 		    nce_advert_flags(ncec));
    300  11042       Erik 		mutex_enter(&ncec->ncec_lock);
    301      0     stevel 		if (dropped)
    302  11042       Erik 			ncec->ncec_unsolicit_count++;
    303  11042       Erik 		else
    304  11042       Erik 			ncec->ncec_last_time_defended = ddi_get_lbolt();
    305  11042       Erik 		if (ncec->ncec_unsolicit_count != 0) {
    306  11042       Erik 			nce_start_timer(ncec,
    307  11042       Erik 			    ipst->ips_ip_ndp_unsolicit_interval);
    308  11042       Erik 		}
    309  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    310  11042       Erik 	}
    311  11042       Erik 	return (err);
    312  11042       Erik }
    313  11042       Erik 
    314  11042       Erik /*
    315  11042       Erik  * Atomically lookup and add (if needed) Neighbor Cache information for
    316  11042       Erik  * an address.
    317  11042       Erik  *
    318  11042       Erik  * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
    319  11042       Erik  * are always added pointing at the ipmp_ill. Thus, when the ill passed
    320  11042       Erik  * to nce_add_v6 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
    321  11042       Erik  * entries will be created, both pointing at the same ncec_t. The nce_t
    322  11042       Erik  * entries will have their nce_ill set to the ipmp_ill and the under_ill
    323  11042       Erik  * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
    324  11042       Erik  * Local addresses are always created on the ill passed to nce_add_v6.
    325  11042       Erik  */
    326  11042       Erik int
    327  11042       Erik nce_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
    328  11042       Erik     const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce)
    329  11042       Erik {
    330  11042       Erik 	int		err = 0;
    331  11042       Erik 	ip_stack_t	*ipst = ill->ill_ipst;
    332  11042       Erik 	nce_t		*nce, *upper_nce = NULL;
    333  11042       Erik 	ill_t		*in_ill = ill;
    334  11042       Erik 	boolean_t	need_ill_refrele = B_FALSE;
    335  11042       Erik 
    336  11042       Erik 	if (flags & NCE_F_MCAST) {
    337  11042       Erik 		/*
    338  11042       Erik 		 * hw_addr will be figured out in nce_set_multicast_v6;
    339  11042       Erik 		 * caller has to select the cast_ill
    340  11042       Erik 		 */
    341  11042       Erik 		ASSERT(hw_addr == NULL);
    342  11042       Erik 		ASSERT(!IS_IPMP(ill));
    343  11042       Erik 		err = nce_set_multicast_v6(ill, addr, flags, newnce);
    344  11042       Erik 		return (err);
    345  11042       Erik 	}
    346   4714    sowmini 	ASSERT(ill->ill_isv6);
    347  11042       Erik 	if (IS_UNDER_IPMP(ill) && !(flags & NCE_F_MYADDR)) {
    348  11042       Erik 		ill = ipmp_ill_hold_ipmp_ill(ill);
    349  11042       Erik 		if (ill == NULL)
    350  11042       Erik 			return (ENXIO);
    351  11042       Erik 		need_ill_refrele = B_TRUE;
    352  11042       Erik 	}
    353  11042       Erik 
    354   3448   dh155122 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
    355  11042       Erik 	nce = nce_lookup_addr(ill, addr);
    356      0     stevel 	if (nce == NULL) {
    357  11042       Erik 		err = nce_add_v6(ill, hw_addr, hw_addr_len, addr, flags, state,
    358  11042       Erik 		    &nce);
    359  11042       Erik 	} else {
    360      0     stevel 		err = EEXIST;
    361      0     stevel 	}
    362   3448   dh155122 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
    363  11042       Erik 	if (err == 0)
    364  11042       Erik 		err = nce_add_v6_postprocess(nce);
    365  11042       Erik 	if (in_ill != ill && nce != NULL) {
    366  11042       Erik 		nce_t *under_nce;
    367  11042       Erik 
    368  11042       Erik 		/*
    369  11042       Erik 		 * in_ill was the under_ill. Try to create the under_nce.
    370  11042       Erik 		 * Hold the ill_g_lock to prevent changes to group membership
    371  11042       Erik 		 * until we are done.
    372  11042       Erik 		 */
    373  11042       Erik 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
    374  11042       Erik 		if (IS_IN_SAME_ILLGRP(in_ill, ill)) {
    375  11042       Erik 			under_nce = nce_fastpath_create(in_ill,
    376  11042       Erik 			    nce->nce_common);
    377  11042       Erik 			upper_nce = nce;
    378  11042       Erik 			if ((nce = under_nce) == NULL)
    379  11042       Erik 				err = EINVAL;
    380  11042       Erik 		}
    381  11042       Erik 		rw_exit(&ipst->ips_ill_g_lock);
    382  11042       Erik 		if (under_nce != NULL && NCE_ISREACHABLE(nce->nce_common))
    383  11042       Erik 			nce_fastpath_trigger(under_nce);
    384  11042       Erik 	}
    385  11042       Erik 	if (nce != NULL) {
    386  11042       Erik 		if (newnce != NULL)
    387  11042       Erik 			*newnce = nce;
    388  11042       Erik 		else
    389  11042       Erik 			nce_refrele(nce);
    390  11042       Erik 	}
    391  11042       Erik 	/* nce_refrele is deferred until the lock is dropped  */
    392  11042       Erik 	if (upper_nce != NULL)
    393  11042       Erik 		nce_refrele(upper_nce);
    394  11042       Erik 	if (need_ill_refrele)
    395  11042       Erik 		ill_refrele(ill);
    396      0     stevel 	return (err);
    397      0     stevel }
    398      0     stevel 
    399      0     stevel /*
    400      0     stevel  * Remove all the CONDEMNED nces from the appropriate hash table.
    401      0     stevel  * We create a private list of NCEs, these may have ires pointing
    402      0     stevel  * to them, so the list will be passed through to clean up dependent
    403  11042       Erik  * ires and only then we can do ncec_refrele() which can make NCE inactive.
    404  11042       Erik  */
    405  11042       Erik static void
    406  11042       Erik nce_remove(ndp_g_t *ndp, ncec_t *ncec, ncec_t **free_nce_list)
    407  11042       Erik {
    408  11042       Erik 	ncec_t *ncec1;
    409  11042       Erik 	ncec_t **ptpn;
    410      0     stevel 
    411   2535   sangeeta 	ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
    412   2535   sangeeta 	ASSERT(ndp->ndp_g_walker == 0);
    413  11042       Erik 	for (; ncec; ncec = ncec1) {
    414  11042       Erik 		ncec1 = ncec->ncec_next;
    415  11042       Erik 		mutex_enter(&ncec->ncec_lock);
    416  11042       Erik 		if (NCE_ISCONDEMNED(ncec)) {
    417  11042       Erik 			ptpn = ncec->ncec_ptpn;
    418  11042       Erik 			ncec1 = ncec->ncec_next;
    419  11042       Erik 			if (ncec1 != NULL)
    420  11042       Erik 				ncec1->ncec_ptpn = ptpn;
    421  11042       Erik 			*ptpn = ncec1;
    422  11042       Erik 			ncec->ncec_ptpn = NULL;
    423  11042       Erik 			ncec->ncec_next = NULL;
    424  11042       Erik 			ncec->ncec_next = *free_nce_list;
    425  11042       Erik 			*free_nce_list = ncec;
    426  11042       Erik 		}
    427  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    428  11042       Erik 	}
    429  11042       Erik }
    430  11042       Erik 
    431  11042       Erik /*
    432  11042       Erik  * 1. Mark the entry CONDEMNED. This ensures that no new nce_lookup()
    433  11042       Erik  *    will return this NCE. Also no new timeouts will
    434  11042       Erik  *    be started (See nce_restart_timer).
    435      0     stevel  * 2. Cancel any currently running timeouts.
    436      0     stevel  * 3. If there is an ndp walker, return. The walker will do the cleanup.
    437      0     stevel  *    This ensures that walkers see a consistent list of NCEs while walking.
    438      0     stevel  * 4. Otherwise remove the NCE from the list of NCEs
    439  11042       Erik  */
    440  11042       Erik void
    441  11042       Erik ncec_delete(ncec_t *ncec)
    442  11042       Erik {
    443  11042       Erik 	ncec_t	**ptpn;
    444  11042       Erik 	ncec_t	*ncec1;
    445  11042       Erik 	int	ipversion = ncec->ncec_ipversion;
    446   3448   dh155122 	ndp_g_t *ndp;
    447  11042       Erik 	ip_stack_t	*ipst = ncec->ncec_ipst;
    448   3448   dh155122 
    449   3448   dh155122 	if (ipversion == IPV4_VERSION)
    450   3448   dh155122 		ndp = ipst->ips_ndp4;
    451   3448   dh155122 	else
    452   3448   dh155122 		ndp = ipst->ips_ndp6;
    453      0     stevel 
    454      0     stevel 	/* Serialize deletes */
    455  11042       Erik 	mutex_enter(&ncec->ncec_lock);
    456  11042       Erik 	if (NCE_ISCONDEMNED(ncec)) {
    457      0     stevel 		/* Some other thread is doing the delete */
    458  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    459      0     stevel 		return;
    460      0     stevel 	}
    461      0     stevel 	/*
    462      0     stevel 	 * Caller has a refhold. Also 1 ref for being in the list. Thus
    463      0     stevel 	 * refcnt has to be >= 2
    464      0     stevel 	 */
    465  11042       Erik 	ASSERT(ncec->ncec_refcnt >= 2);
    466  11042       Erik 	ncec->ncec_flags |= NCE_F_CONDEMNED;
    467  11042       Erik 	mutex_exit(&ncec->ncec_lock);
    468  11042       Erik 
    469  11042       Erik 	/* Count how many condemned ires for kmem_cache callback */
    470  11042       Erik 	atomic_add_32(&ipst->ips_num_nce_condemned, 1);
    471  11042       Erik 	nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL);
    472      0     stevel 
    473   9175    Sowmini 	/* Complete any waiting callbacks */
    474  11042       Erik 	ncec_cb_dispatch(ncec);
    475   9175    Sowmini 
    476      0     stevel 	/*
    477      0     stevel 	 * Cancel any running timer. Timeout can't be restarted
    478  11042       Erik 	 * since CONDEMNED is set. Can't hold ncec_lock across untimeout.
    479      0     stevel 	 * Passing invalid timeout id is fine.
    480      0     stevel 	 */
    481  11042       Erik 	if (ncec->ncec_timeout_id != 0) {
    482  11042       Erik 		(void) untimeout(ncec->ncec_timeout_id);
    483  11042       Erik 		ncec->ncec_timeout_id = 0;
    484      0     stevel 	}
    485      0     stevel 
    486   2535   sangeeta 	mutex_enter(&ndp->ndp_g_lock);
    487  11042       Erik 	if (ncec->ncec_ptpn == NULL) {
    488  11042       Erik 		/*
    489  11042       Erik 		 * The last ndp walker has already removed this ncec from
    490  11042       Erik 		 * the list after we marked the ncec CONDEMNED and before
    491   2535   sangeeta 		 * we grabbed the global lock.
    492      0     stevel 		 */
    493   2535   sangeeta 		mutex_exit(&ndp->ndp_g_lock);
    494      0     stevel 		return;
    495      0     stevel 	}
    496   2535   sangeeta 	if (ndp->ndp_g_walker > 0) {
    497      0     stevel 		/*
    498      0     stevel 		 * Can't unlink. The walker will clean up
    499      0     stevel 		 */
    500   2535   sangeeta 		ndp->ndp_g_walker_cleanup = B_TRUE;
    501   2535   sangeeta 		mutex_exit(&ndp->ndp_g_lock);
    502      0     stevel 		return;
    503      0     stevel 	}
    504      0     stevel 
    505      0     stevel 	/*
    506  11042       Erik 	 * Now remove the ncec from the list. nce_restart_timer won't restart
    507      0     stevel 	 * the timer since it is marked CONDEMNED.
    508      0     stevel 	 */
    509  11042       Erik 	ptpn = ncec->ncec_ptpn;
    510  11042       Erik 	ncec1 = ncec->ncec_next;
    511  11042       Erik 	if (ncec1 != NULL)
    512  11042       Erik 		ncec1->ncec_ptpn = ptpn;
    513  11042       Erik 	*ptpn = ncec1;
    514  11042       Erik 	ncec->ncec_ptpn = NULL;
    515  11042       Erik 	ncec->ncec_next = NULL;
    516   2535   sangeeta 	mutex_exit(&ndp->ndp_g_lock);
    517      0     stevel 
    518  11042       Erik 	/* Removed from ncec_ptpn/ncec_next list */
    519  11042       Erik 	ncec_refrele_notr(ncec);
    520  11042       Erik }
    521  11042       Erik 
    522  11042       Erik void
    523  11042       Erik ncec_inactive(ncec_t *ncec)
    524      0     stevel {
    525      0     stevel 	mblk_t		**mpp;
    526  11042       Erik 	ill_t		*ill = ncec->ncec_ill;
    527  11042       Erik 	ip_stack_t	*ipst = ncec->ncec_ipst;
    528  11042       Erik 
    529  11042       Erik 	ASSERT(ncec->ncec_refcnt == 0);
    530  11042       Erik 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
    531  11042       Erik 
    532  11042       Erik 	/* Count how many condemned nces for kmem_cache callback */
    533  11042       Erik 	if (NCE_ISCONDEMNED(ncec))
    534  11042       Erik 		atomic_add_32(&ipst->ips_num_nce_condemned, -1);
    535  11042       Erik 
    536  11042       Erik 	/* Free all allocated messages */
    537  11042       Erik 	mpp = &ncec->ncec_qd_mp;
    538  11042       Erik 	while (*mpp != NULL) {
    539  11042       Erik 		mblk_t  *mp;
    540  11042       Erik 
    541  11042       Erik 		mp = *mpp;
    542  11042       Erik 		*mpp = mp->b_next;
    543  11042       Erik 
    544  11042       Erik 		inet_freemsg(mp);
    545  11042       Erik 	}
    546  11042       Erik 	/*
    547  11042       Erik 	 * must have been cleaned up in ncec_delete
    548  11042       Erik 	 */
    549  11042       Erik 	ASSERT(list_is_empty(&ncec->ncec_cb));
    550  11042       Erik 	list_destroy(&ncec->ncec_cb);
    551  11042       Erik 	/*
    552  11042       Erik 	 * free the ncec_lladdr if one was allocated in nce_add_common()
    553  11042       Erik 	 */
    554  11042       Erik 	if (ncec->ncec_lladdr_length > 0)
    555  11042       Erik 		kmem_free(ncec->ncec_lladdr, ncec->ncec_lladdr_length);
    556  11042       Erik 
    557  11042       Erik #ifdef DEBUG
    558  11042       Erik 	ncec_trace_cleanup(ncec);
    559  11042       Erik #endif
    560  11042       Erik 
    561      0     stevel 	mutex_enter(&ill->ill_lock);
    562   6255    sowmini 	DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
    563  11042       Erik 	    (char *), "ncec", (void *), ncec);
    564  11042       Erik 	ill->ill_ncec_cnt--;
    565  11042       Erik 	ncec->ncec_ill = NULL;
    566  11042       Erik 	/*
    567  11042       Erik 	 * If the number of ncec's associated with this ill have dropped
    568      0     stevel 	 * to zero, check whether we need to restart any operation that
    569      0     stevel 	 * is waiting for this to happen.
    570      0     stevel 	 */
    571   6255    sowmini 	if (ILL_DOWN_OK(ill)) {
    572      0     stevel 		/* ipif_ill_refrele_tail drops the ill_lock */
    573      0     stevel 		ipif_ill_refrele_tail(ill);
    574      0     stevel 	} else {
    575      0     stevel 		mutex_exit(&ill->ill_lock);
    576      0     stevel 	}
    577  11042       Erik 
    578  11042       Erik 	mutex_destroy(&ncec->ncec_lock);
    579  11042       Erik 	kmem_cache_free(ncec_cache, ncec);
    580  11042       Erik }
    581  11042       Erik 
    582  11042       Erik /*
    583  11042       Erik  * ncec_walk routine.  Delete the ncec if it is associated with the ill
    584      0     stevel  * that is going away.  Always called as a writer.
    585      0     stevel  */
    586      0     stevel void
    587  11042       Erik ncec_delete_per_ill(ncec_t *ncec, uchar_t *arg)
    588  11042       Erik {
    589  11042       Erik 	if ((ncec != NULL) && ncec->ncec_ill == (ill_t *)arg) {
    590  11042       Erik 		ncec_delete(ncec);
    591  11042       Erik 	}
    592  11042       Erik }
    593  11042       Erik 
    594  11042       Erik /*
    595  11042       Erik  * Neighbor Cache cleanup logic for a list of ncec_t entries.
    596  11042       Erik  */
    597  11042       Erik static void
    598  11042       Erik nce_cleanup_list(ncec_t *ncec)
    599  11042       Erik {
    600  11042       Erik 	ncec_t *ncec_next;
    601  11042       Erik 
    602  11042       Erik 	ASSERT(ncec != NULL);
    603  11042       Erik 	while (ncec != NULL) {
    604  11042       Erik 		ncec_next = ncec->ncec_next;
    605  11042       Erik 		ncec->ncec_next = NULL;
    606      0     stevel 
    607      0     stevel 		/*
    608      0     stevel 		 * It is possible for the last ndp walker (this thread)
    609  11042       Erik 		 * to come here after ncec_delete has marked the ncec CONDEMNED
    610  11042       Erik 		 * and before it has removed the ncec from the fastpath list
    611      0     stevel 		 * or called untimeout. So we need to do it here. It is safe
    612  11042       Erik 		 * for both ncec_delete and this thread to do it twice or
    613      0     stevel 		 * even simultaneously since each of the threads has a
    614  11042       Erik 		 * reference on the ncec.
    615  11042       Erik 		 */
    616  11042       Erik 		nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL);
    617      0     stevel 		/*
    618      0     stevel 		 * Cancel any running timer. Timeout can't be restarted
    619  11042       Erik 		 * since CONDEMNED is set. The ncec_lock can't be
    620  11042       Erik 		 * held across untimeout though passing invalid timeout
    621  11042       Erik 		 * id is fine.
    622  11042       Erik 		 */
    623  11042       Erik 		if (ncec->ncec_timeout_id != 0) {
    624  11042       Erik 			(void) untimeout(ncec->ncec_timeout_id);
    625  11042       Erik 			ncec->ncec_timeout_id = 0;
    626  11042       Erik 		}
    627  11042       Erik 		/* Removed from ncec_ptpn/ncec_next list */
    628  11042       Erik 		ncec_refrele_notr(ncec);
    629  11042       Erik 		ncec = ncec_next;
    630   2535   sangeeta 	}
    631      0     stevel }
    632      0     stevel 
    633      0     stevel /*
    634   2546   carlsonj  * Restart DAD on given NCE.  Returns B_TRUE if DAD has been restarted.
    635   2546   carlsonj  */
    636   2546   carlsonj boolean_t
    637  11042       Erik nce_restart_dad(ncec_t *ncec)
    638   2546   carlsonj {
    639   2546   carlsonj 	boolean_t started;
    640  11042       Erik 	ill_t *ill, *hwaddr_ill;
    641  11042       Erik 
    642  11042       Erik 	if (ncec == NULL)
    643   2546   carlsonj 		return (B_FALSE);
    644  11042       Erik 	ill = ncec->ncec_ill;
    645  11042       Erik 	mutex_enter(&ncec->ncec_lock);
    646  11042       Erik 	if (ncec->ncec_state == ND_PROBE) {
    647  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    648   2546   carlsonj 		started = B_TRUE;
    649  11042       Erik 	} else if (ncec->ncec_state == ND_REACHABLE) {
    650  11042       Erik 		ASSERT(ncec->ncec_lladdr != NULL);
    651  11042       Erik 		ncec->ncec_state = ND_PROBE;
    652  11042       Erik 		ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT;
    653  11042       Erik 		/*
    654  11042       Erik 		 * Slight cheat here: we don't use the initial probe delay
    655  11042       Erik 		 * for IPv4 in this obscure case.
    656  11042       Erik 		 */
    657  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    658  11042       Erik 		if (IS_IPMP(ill)) {
    659  11042       Erik 			hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp,
    660  11042       Erik 			    ncec->ncec_lladdr, ncec->ncec_lladdr_length);
    661  11042       Erik 		} else {
    662  11042       Erik 			hwaddr_ill = ill;
    663  11042       Erik 		}
    664  11042       Erik 		nce_dad(ncec, hwaddr_ill, B_TRUE);
    665   2546   carlsonj 		started = B_TRUE;
    666   2546   carlsonj 	} else {
    667  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    668   2546   carlsonj 		started = B_FALSE;
    669   2546   carlsonj 	}
    670   2546   carlsonj 	return (started);
    671   2546   carlsonj }
    672   2546   carlsonj 
    673   2546   carlsonj /*
    674  11042       Erik  * IPv6 Cache entry lookup.  Try to find an ncec matching the parameters passed.
    675  11042       Erik  * If one is found, the refcnt on the ncec will be incremented.
    676  11042       Erik  */
    677  11042       Erik ncec_t *
    678  11042       Erik ncec_lookup_illgrp_v6(ill_t *ill, const in6_addr_t *addr)
    679  11042       Erik {
    680  11042       Erik 	ncec_t		*ncec;
    681  11042       Erik 	ip_stack_t	*ipst = ill->ill_ipst;
    682  11042       Erik 
    683  11042       Erik 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
    684  11042       Erik 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
    685   3448   dh155122 
    686   3448   dh155122 	/* Get head of v6 hash table */
    687  11042       Erik 	ncec = *((ncec_t **)NCE_HASH_PTR_V6(ipst, *addr));
    688  11042       Erik 	ncec = ncec_lookup_illgrp(ill, addr, ncec);
    689  11042       Erik 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
    690  11042       Erik 	rw_exit(&ipst->ips_ill_g_lock);
    691  11042       Erik 	return (ncec);
    692  11042       Erik }
    693  11042       Erik /*
    694  11042       Erik  * IPv4 Cache entry lookup.  Try to find an ncec matching the parameters passed.
    695  11042       Erik  * If one is found, the refcnt on the ncec will be incremented.
    696  11042       Erik  */
    697  11042       Erik ncec_t *
    698  11042       Erik ncec_lookup_illgrp_v4(ill_t *ill, const in_addr_t *addr)
    699  11042       Erik {
    700  11042       Erik 	ncec_t	*ncec = NULL;
    701   2535   sangeeta 	in6_addr_t addr6;
    702   3448   dh155122 	ip_stack_t *ipst = ill->ill_ipst;
    703   2535   sangeeta 
    704  11042       Erik 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
    705  11042       Erik 	mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
    706   3448   dh155122 
    707   3448   dh155122 	/* Get head of v4 hash table */
    708  11042       Erik 	ncec = *((ncec_t **)NCE_HASH_PTR_V4(ipst, *addr));
    709   2535   sangeeta 	IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
    710  11042       Erik 	ncec = ncec_lookup_illgrp(ill, &addr6, ncec);
    711  11042       Erik 	mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
    712  11042       Erik 	rw_exit(&ipst->ips_ill_g_lock);
    713  11042       Erik 	return (ncec);
    714  11042       Erik }
    715  11042       Erik 
    716  11042       Erik /*
    717  11042       Erik  * Cache entry lookup.  Try to find an ncec matching the parameters passed.
    718  11042       Erik  * If an ncec is found, increment the hold count on that ncec.
    719  11042       Erik  * The caller passes in the start of the appropriate hash table, and must
    720  11042       Erik  * be holding the appropriate global lock (ndp_g_lock). In addition, since
    721  11042       Erik  * this function matches ncec_t entries across the illgrp, the ips_ill_g_lock
    722  11042       Erik  * must be held as reader.
    723  11042       Erik  *
    724  11042       Erik  * This function always matches across the ipmp group.
    725  11042       Erik  */
    726  11042       Erik ncec_t *
    727  11042       Erik ncec_lookup_illgrp(ill_t *ill, const in6_addr_t *addr, ncec_t *ncec)
    728      0     stevel {
    729   3448   dh155122 	ndp_g_t		*ndp;
    730   3448   dh155122 	ip_stack_t	*ipst = ill->ill_ipst;
    731   3448   dh155122 
    732   3448   dh155122 	if (ill->ill_isv6)
    733   3448   dh155122 		ndp = ipst->ips_ndp6;
    734   3448   dh155122 	else
    735   3448   dh155122 		ndp = ipst->ips_ndp4;
    736      0     stevel 
    737  11042       Erik 	ASSERT(ill != NULL);
    738   2535   sangeeta 	ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
    739      0     stevel 	if (IN6_IS_ADDR_UNSPECIFIED(addr))
    740      0     stevel 		return (NULL);
    741  11042       Erik 	for (; ncec != NULL; ncec = ncec->ncec_next) {
    742  11042       Erik 		if (ncec->ncec_ill == ill ||
    743  11042       Erik 		    IS_IN_SAME_ILLGRP(ill, ncec->ncec_ill)) {
    744  11042       Erik 			if (IN6_ARE_ADDR_EQUAL(&ncec->ncec_addr, addr)) {
    745  11042       Erik 				mutex_enter(&ncec->ncec_lock);
    746  11042       Erik 				if (!NCE_ISCONDEMNED(ncec)) {
    747  11042       Erik 					ncec_refhold_locked(ncec);
    748  11042       Erik 					mutex_exit(&ncec->ncec_lock);
    749      0     stevel 					break;
    750      0     stevel 				}
    751  11042       Erik 				mutex_exit(&ncec->ncec_lock);
    752  11042       Erik 			}
    753  11042       Erik 		}
    754  11042       Erik 	}
    755  11042       Erik 	return (ncec);
    756  11042       Erik }
    757  11042       Erik 
    758  11042       Erik /*
    759  11042       Erik  * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
    760  11042       Erik  * entries for ill only, i.e., when ill is part of an ipmp group,
    761  11042       Erik  * nce_lookup_v4 will never try to match across the group.
    762  11042       Erik  */
    763  11042       Erik nce_t *
    764  11042       Erik nce_lookup_v4(ill_t *ill, const in_addr_t *addr)
    765  11042       Erik {
    766  11042       Erik 	nce_t *nce;
    767  11042       Erik 	in6_addr_t addr6;
    768  11042       Erik 	ip_stack_t *ipst = ill->ill_ipst;
    769  11042       Erik 
    770  11042       Erik 	mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
    771  11042       Erik 	IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
    772  11042       Erik 	nce = nce_lookup_addr(ill, &addr6);
    773  11042       Erik 	mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
    774  11042       Erik 	return (nce);
    775  11042       Erik }
    776  11042       Erik 
    777  11042       Erik /*
    778  11042       Erik  * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
    779  11042       Erik  * entries for ill only, i.e., when ill is part of an ipmp group,
    780  11042       Erik  * nce_lookup_v6 will never try to match across the group.
    781  11042       Erik  */
    782  11042       Erik nce_t *
    783  11042       Erik nce_lookup_v6(ill_t *ill, const in6_addr_t *addr6)
    784  11042       Erik {
    785  11042       Erik 	nce_t *nce;
    786  11042       Erik 	ip_stack_t *ipst = ill->ill_ipst;
    787  11042       Erik 
    788  11042       Erik 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
    789  11042       Erik 	nce = nce_lookup_addr(ill, addr6);
    790  11042       Erik 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
    791  11042       Erik 	return (nce);
    792  11042       Erik }
    793  11042       Erik 
    794  11042       Erik static nce_t *
    795  11042       Erik nce_lookup_addr(ill_t *ill, const in6_addr_t *addr)
    796  11042       Erik {
    797  11042       Erik 	nce_t *nce;
    798  11042       Erik 
    799  11042       Erik 	ASSERT(ill != NULL);
    800  11042       Erik #ifdef DEBUG
    801  11042       Erik 	if (ill->ill_isv6)
    802  11042       Erik 		ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock));
    803  11042       Erik 	else
    804  11042       Erik 		ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp4->ndp_g_lock));
    805  11042       Erik #endif
    806  11042       Erik 	mutex_enter(&ill->ill_lock);
    807  11042       Erik 	nce = nce_lookup(ill, addr);
    808  11042       Erik 	mutex_exit(&ill->ill_lock);
    809  11042       Erik 	return (nce);
    810  11042       Erik }
    811  11042       Erik 
    812  11042       Erik 
    813  11042       Erik /*
    814  11042       Erik  * Router turned to host.  We need to make sure that cached copies of the ncec
    815  11042       Erik  * are not used for forwarding packets if they were derived from the default
    816  11042       Erik  * route, and that the default route itself is removed, as  required by
    817  11042       Erik  * section 7.2.5 of RFC 2461.
    818  11042       Erik  *
    819  11042       Erik  * Note that the ncec itself probably has valid link-layer information for the
    820  11042       Erik  * nexthop, so that there is no reason to delete the ncec, as long as the
    821  11042       Erik  * ISROUTER flag is turned off.
    822  11042       Erik  */
    823  11042       Erik static void
    824  11042       Erik ncec_router_to_host(ncec_t *ncec)
    825  11042       Erik {
    826  11042       Erik 	ire_t		*ire;
    827  11042       Erik 	ip_stack_t	*ipst = ncec->ncec_ipst;
    828  11042       Erik 
    829  11042       Erik 	mutex_enter(&ncec->ncec_lock);
    830  11042       Erik 	ncec->ncec_flags &= ~NCE_F_ISROUTER;
    831  11042       Erik 	mutex_exit(&ncec->ncec_lock);
    832  11042       Erik 
    833  11042       Erik 	ire = ire_ftable_lookup_v6(&ipv6_all_zeros, &ipv6_all_zeros,
    834  11042       Erik 	    &ncec->ncec_addr, IRE_DEFAULT, ncec->ncec_ill, ALL_ZONES, NULL,
    835  11042       Erik 	    MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW, 0, ipst, NULL);
    836  11042       Erik 	if (ire != NULL) {
    837  11042       Erik 		ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst);
    838  11042       Erik 		ire_delete(ire);
    839  11042       Erik 		ire_refrele(ire);
    840  11042       Erik 	}
    841      0     stevel }
    842      0     stevel 
    843      0     stevel /*
    844      0     stevel  * Process passed in parameters either from an incoming packet or via
    845      0     stevel  * user ioctl.
    846      0     stevel  */
    847  11042       Erik void
    848  11042       Erik nce_process(ncec_t *ncec, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
    849  11042       Erik {
    850  11042       Erik 	ill_t	*ill = ncec->ncec_ill;
    851  11042       Erik 	uint32_t hw_addr_len = ill->ill_phys_addr_length;
    852      0     stevel 	boolean_t ll_updated = B_FALSE;
    853      0     stevel 	boolean_t ll_changed;
    854  11042       Erik 	nce_t	*nce;
    855  11042       Erik 
    856  11042       Erik 	ASSERT(ncec->ncec_ipversion == IPV6_VERSION);
    857      0     stevel 	/*
    858      0     stevel 	 * No updates of link layer address or the neighbor state is
    859      0     stevel 	 * allowed, when the cache is in NONUD state.  This still
    860      0     stevel 	 * allows for responding to reachability solicitation.
    861      0     stevel 	 */
    862  11042       Erik 	mutex_enter(&ncec->ncec_lock);
    863  11042       Erik 	if (ncec->ncec_state == ND_INCOMPLETE) {
    864      0     stevel 		if (hw_addr == NULL) {
    865  11042       Erik 			mutex_exit(&ncec->ncec_lock);
    866      0     stevel 			return;
    867      0     stevel 		}
    868  11042       Erik 		nce_set_ll(ncec, hw_addr);
    869  11042       Erik 		/*
    870  11042       Erik 		 * Update ncec state and send the queued packets
    871      0     stevel 		 * back to ip this time ire will be added.
    872      0     stevel 		 */
    873      0     stevel 		if (flag & ND_NA_FLAG_SOLICITED) {
    874  11042       Erik 			nce_update(ncec, ND_REACHABLE, NULL);
    875  11042       Erik 		} else {
    876  11042       Erik 			nce_update(ncec, ND_STALE, NULL);
    877  11042       Erik 		}
    878  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    879  11042       Erik 		nce = nce_fastpath(ncec, B_TRUE, NULL);
    880  11042       Erik 		nce_resolv_ok(ncec);
    881  11042       Erik 		if (nce != NULL)
    882  11042       Erik 			nce_refrele(nce);
    883  11042       Erik 		return;
    884  11042       Erik 	}
    885  11042       Erik 	ll_changed = nce_cmp_ll_addr(ncec, hw_addr, hw_addr_len);
    886      0     stevel 	if (!is_adv) {
    887      0     stevel 		/* If this is a SOLICITATION request only */
    888      0     stevel 		if (ll_changed)
    889  11042       Erik 			nce_update(ncec, ND_STALE, hw_addr);
    890  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    891  11042       Erik 		ncec_cb_dispatch(ncec);
    892      0     stevel 		return;
    893      0     stevel 	}
    894      0     stevel 	if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) {
    895      0     stevel 		/* If in any other state than REACHABLE, ignore */
    896  11042       Erik 		if (ncec->ncec_state == ND_REACHABLE) {
    897  11042       Erik 			nce_update(ncec, ND_STALE, NULL);
    898  11042       Erik 		}
    899  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    900  11042       Erik 		ncec_cb_dispatch(ncec);
    901      0     stevel 		return;
    902      0     stevel 	} else {
    903      0     stevel 		if (ll_changed) {
    904  11042       Erik 			nce_update(ncec, ND_UNCHANGED, hw_addr);
    905      0     stevel 			ll_updated = B_TRUE;
    906      0     stevel 		}
    907      0     stevel 		if (flag & ND_NA_FLAG_SOLICITED) {
    908  11042       Erik 			nce_update(ncec, ND_REACHABLE, NULL);
    909      0     stevel 		} else {
    910      0     stevel 			if (ll_updated) {
    911  11042       Erik 				nce_update(ncec, ND_STALE, NULL);
    912  11042       Erik 			}
    913  11042       Erik 		}
    914  11042       Erik 		mutex_exit(&ncec->ncec_lock);
    915  11042       Erik 		if (!(flag & ND_NA_FLAG_ROUTER) && (ncec->ncec_flags &
    916      0     stevel 		    NCE_F_ISROUTER)) {
    917  11042       Erik 			ncec_router_to_host(ncec);
    918  11042       Erik 		} else {
    919  11042       Erik 			ncec_cb_dispatch(ncec);
    920  11042       Erik 		}
    921  11042       Erik 	}
    922  11042       Erik }
    923  11042       Erik 
    924  11042       Erik /*
    925  11042       Erik  * Pass arg1 to the pfi supplied, along with each ncec in existence.
    926  11042       Erik  * ncec_walk() places a REFHOLD on the ncec and drops the lock when
    927      0     stevel  * walking the hash list.
    928      0     stevel  */
    929      0     stevel void
    930  11042       Erik ncec_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1,
    931   2535   sangeeta     boolean_t trace)
    932      0     stevel {
    933  11042       Erik 	ncec_t	*ncec;
    934  11042       Erik 	ncec_t	*ncec1;
    935  11042       Erik 	ncec_t	**ncep;
    936  11042       Erik 	ncec_t	*free_nce_list = NULL;
    937      0     stevel 
    938   2535   sangeeta 	mutex_enter(&ndp->ndp_g_lock);
    939  11042       Erik 	/* Prevent ncec_delete from unlink and free of NCE */
    940   2535   sangeeta 	ndp->ndp_g_walker++;
    941   2535   sangeeta 	mutex_exit(&ndp->ndp_g_lock);
    942   2535   sangeeta 	for (ncep = ndp->nce_hash_tbl;
    943   2535   sangeeta 	    ncep < A_END(ndp->nce_hash_tbl); ncep++) {
    944  11042       Erik 		for (ncec = *ncep; ncec != NULL; ncec = ncec1) {
    945  11042       Erik 			ncec1 = ncec->ncec_next;
    946  11042       Erik 			if (ill == NULL || ncec->ncec_ill == ill) {
    947      0     stevel 				if (trace) {
    948  11042       Erik 					ncec_refhold(ncec);
    949  11042       Erik 					(*pfi)(ncec, arg1);
    950  11042       Erik 					ncec_refrele(ncec);
    951      0     stevel 				} else {
    952  11042       Erik 					ncec_refhold_notr(ncec);
    953  11042       Erik 					(*pfi)(ncec, arg1);
    954  11042       Erik 					ncec_refrele_notr(ncec);
    955  11042       Erik 				}
    956      0     stevel 			}
    957      0     stevel 		}
    958      0     stevel 	}
    959   2535   sangeeta 	mutex_enter(&ndp->ndp_g_lock);
    960   2535   sangeeta 	ndp->ndp_g_walker--;
    961   2535   sangeeta 	if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) {
    962      0     stevel 		/* Time to delete condemned entries */
    963   2535   sangeeta 		for (ncep = ndp->nce_hash_tbl;
    964   2535   sangeeta 		    ncep < A_END(ndp->nce_hash_tbl); ncep++) {
    965  11042       Erik 			ncec = *ncep;
    966  11042       Erik 			if (ncec != NULL) {
    967  11042       Erik 				nce_remove(ndp, ncec, &free_nce_list);
    968  11042       Erik 			}
    969      0     stevel 		}
    970   2535   sangeeta 		ndp->ndp_g_walker_cleanup = B_FALSE;
    971      0     stevel 	}
    972   4714    sowmini 
    973   2535   sangeeta 	mutex_exit(&ndp->ndp_g_lock);
    974      0     stevel 
    975      0     stevel 	if (free_nce_list != NULL) {
    976  11042       Erik 		nce_cleanup_list(free_nce_list);
    977      0     stevel 	}
    978      0     stevel }
    979      0     stevel 
    980   3448   dh155122 /*
    981   3448   dh155122  * Walk everything.
    982   3448   dh155122  * Note that ill can be NULL hence can't derive the ipst from it.
    983   3448   dh155122  */
    984   3448   dh155122 void
    985  11042       Erik ncec_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst)
    986  11042       Erik {
    987  11042       Erik 	ncec_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE);
    988  11042       Erik 	ncec_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE);
    989      0     stevel }
    990      0     stevel 
    991      0     stevel /*
    992      0     stevel  * For each interface an entry is added for the unspecified multicast group.
    993      0     stevel  * Here that mapping is used to form the multicast cache entry for a particular
    994      0     stevel  * multicast destination.
    995      0     stevel  */
    996      0     stevel static int
    997  11042       Erik nce_set_multicast_v6(ill_t *ill, const in6_addr_t *dst,
    998  11042       Erik     uint16_t flags, nce_t **newnce)
    999  11042       Erik {
   1000  11042       Erik 	uchar_t		*hw_addr;
   1001  11042       Erik 	int		err = 0;
   1002  11042       Erik 	ip_stack_t	*ipst = ill->ill_ipst;
   1003      0     stevel 	nce_t		*nce;
   1004      0     stevel 
   1005      0     stevel 	ASSERT(ill != NULL);
   1006   2535   sangeeta 	ASSERT(ill->ill_isv6);
   1007      0     stevel 	ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst)));
   1008      0     stevel 
   1009   3448   dh155122 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
   1010  11042       Erik 	nce = nce_lookup_addr(ill, dst);
   1011      0     stevel 	if (nce != NULL) {
   1012   3448   dh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
   1013  11042       Erik 		goto done;
   1014  11042       Erik 	}
   1015      0     stevel 	if (ill->ill_net_type == IRE_IF_RESOLVER) {
   1016      0     stevel 		/*
   1017      0     stevel 		 * For IRE_IF_RESOLVER a hardware mapping can be
   1018  11042       Erik 		 * generated.
   1019      0     stevel 		 */
   1020      0     stevel 		hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP);
   1021      0     stevel 		if (hw_addr == NULL) {
   1022   3448   dh155122 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
   1023      0     stevel 			return (ENOMEM);
   1024      0     stevel 		}
   1025  11042       Erik 		ip_mcast_mapping(ill, (uchar_t *)dst, hw_addr);
   1026  11042       Erik 	} else {
   1027  11077       Erik 		/* No hw_addr is needed for IRE_IF_NORESOLVER. */
   1028  11042       Erik 		hw_addr = NULL;
   1029  11042       Erik 	}
   1030  11042       Erik 	ASSERT((flags & NCE_F_MCAST) != 0);
   1031  11042       Erik 	ASSERT((flags & NCE_F_NONUD) != 0);
   1032  11042       Erik 	/* nce_state will be computed by nce_add_common() */
   1033  11042       Erik 	err = nce_add_v6(ill, hw_addr, ill->ill_phys_addr_length, dst, flags,
   1034  11042       Erik 	    ND_UNCHANGED, &nce);
   1035   3448   dh155122 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
   1036  11042       Erik 	if (err == 0)
   1037  11042       Erik 		err = nce_add_v6_postprocess(nce);
   1038      0     stevel 	if (hw_addr != NULL)
   1039      0     stevel 		kmem_free(hw_addr, ill->ill_nd_lla_len);
   1040      0     stevel 	if (err != 0) {
   1041  11042       Erik 		ip1dbg(("nce_set_multicast_v6: create failed" "%d\n", err));
   1042      0     stevel 		return (err);
   1043      0     stevel 	}
   1044  11042       Erik done:
   1045  11042       Erik 	ASSERT(nce->nce_common->ncec_state == ND_REACHABLE);
   1046  11042       Erik 	if (newnce != NULL)
   1047  11042       Erik 		*newnce = nce;
   1048  11042       Erik 	else
   1049  11042       Erik 		nce_refrele(nce);
   1050  11042       Erik 	return (0);
   1051  11042       Erik }
   1052  11042       Erik 
   1053  11042       Erik /*
   1054  11042       Erik  * Return the link layer address, and any flags of a ncec.
   1055      0     stevel  */
   1056      0     stevel int
   1057      0     stevel ndp_query(ill_t *ill, struct lif_nd_req *lnr)
   1058      0     stevel {
   1059  11042       Erik 	ncec_t		*ncec;
   1060      0     stevel 	in6_addr_t	*addr;
   1061      0     stevel 	sin6_t		*sin6;
   1062      0     stevel 
   1063   2535   sangeeta 	ASSERT(ill != NULL && ill->ill_isv6);
   1064      0     stevel 	sin6 = (sin6_t *)&lnr->lnr_addr;
   1065      0     stevel 	addr =  &sin6->sin6_addr;
   1066      0     stevel 
   1067   8485      Peter 	/*
   1068   8485      Peter 	 * NOTE: if the ill is an IPMP interface, then match against the whole
   1069   8485      Peter 	 * illgrp.  This e.g. allows in.ndpd to retrieve the link layer
   1070   8485      Peter 	 * addresses for the data addresses on an IPMP interface even though
   1071  11042       Erik 	 * ipif_ndp_up() created them with an ncec_ill of ipif_bound_ill.
   1072  11042       Erik 	 */
   1073  11042       Erik 	ncec = ncec_lookup_illgrp_v6(ill, addr);
   1074  11042       Erik 	if (ncec == NULL)
   1075      0     stevel 		return (ESRCH);
   1076  11042       Erik 	/* If no link layer address is available yet, return ESRCH */
   1077  11042       Erik 	if (!NCE_ISREACHABLE(ncec)) {
   1078  11042       Erik 		ncec_refrele(ncec);
   1079   9658    Sowmini 		return (ESRCH);
   1080   9658    Sowmini 	}
   1081  11042       Erik 	lnr->lnr_hdw_len = ill->ill_phys_addr_length;
   1082  11042       Erik 	bcopy(ncec->ncec_lladdr, (uchar_t *)&lnr->lnr_hdw_addr,
   1083  11042       Erik 	    lnr->lnr_hdw_len);
   1084  11042       Erik 	if (ncec->ncec_flags & NCE_F_ISROUTER)
   1085      0     stevel 		lnr->lnr_flags = NDF_ISROUTER_ON;
   1086  11042       Erik 	if (ncec->ncec_flags & NCE_F_ANYCAST)
   1087      0     stevel 		lnr->lnr_flags |= NDF_ANYCAST_ON;
   1088  11042       Erik 	ncec_refrele(ncec);
   1089  11042       Erik 	return (0);
   1090  11042       Erik }
   1091  11042       Erik 
   1092  11042       Erik /*
   1093  11042       Erik  * Finish setting up the Enable/Disable multicast for the driver.
   1094  11042       Erik  */
   1095  11042       Erik mblk_t *
   1096  11042       Erik ndp_mcastreq(ill_t *ill, const in6_addr_t *v6group, uint32_t hw_addr_len,
   1097      0     stevel     uint32_t hw_addr_offset, mblk_t *mp)
   1098      0     stevel {
   1099      0     stevel 	uchar_t		*hw_addr;
   1100  11042       Erik 	ipaddr_t	v4group;
   1101  11042       Erik 	uchar_t		*addr;
   1102  11042       Erik 
   1103      0     stevel 	ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
   1104  11042       Erik 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
   1105  11042       Erik 		IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
   1106  11042       Erik 
   1107  11042       Erik 		ASSERT(CLASSD(v4group));
   1108  11042       Erik 		ASSERT(!(ill->ill_isv6));
   1109  11042       Erik 
   1110  11042       Erik 		addr = (uchar_t *)&v4group;
   1111  11042       Erik 	} else {
   1112  11042       Erik 		ASSERT(IN6_IS_ADDR_MULTICAST(v6group));
   1113  11042       Erik 		ASSERT(ill->ill_isv6);
   1114  11042       Erik 
   1115  11042       Erik 		addr = (uchar_t *)v6group;
   1116  11042       Erik 	}
   1117      0     stevel 	hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len);
   1118  11042       Erik 	if (hw_addr == NULL) {
   1119  11042       Erik 		ip0dbg(("ndp_mcastreq NULL hw_addr\n"));
   1120      0     stevel 		freemsg(mp);
   1121  11042       Erik 		return (NULL);
   1122  11042       Erik 	}
   1123  11042       Erik 
   1124  11042       Erik 	ip_mcast_mapping(ill, addr, hw_addr);
   1125  11042       Erik 	return (mp);
   1126  11042       Erik }
   1127  11042       Erik 
   1128  11042       Erik void
   1129  11042       Erik ip_ndp_resolve(ncec_t *ncec)
   1130  11042       Erik {
   1131  11042       Erik 	in_addr_t	sender4 = INADDR_ANY;
   1132   9175    Sowmini 	in6_addr_t	sender6 = ipv6_all_zeros;
   1133  11042       Erik 	ill_t		*src_ill;
   1134   9175    Sowmini 	uint32_t	ms;
   1135  11042       Erik 
   1136  11042       Erik 	src_ill = nce_resolve_src(ncec, &sender6);
   1137  11042       Erik 	if (src_ill == NULL) {
   1138  11042       Erik 		/* Make sure we try again later */
   1139  11042       Erik 		ms = ncec->ncec_ill->ill_reachable_retrans_time;
   1140  11042       Erik 		nce_restart_timer(ncec, (clock_t)ms);
   1141  11042       Erik 		return;
   1142  11042       Erik 	}
   1143  11042       Erik 	if (ncec->ncec_ipversion == IPV4_VERSION)
   1144  11042       Erik 		IN6_V4MAPPED_TO_IPADDR(&sender6, sender4);
   1145  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   1146  11042       Erik 	if (ncec->ncec_ipversion == IPV6_VERSION)
   1147  11042       Erik 		ms = ndp_solicit(ncec, sender6, src_ill);
   1148  11042       Erik 	else
   1149  11042       Erik 		ms = arp_request(ncec, sender4, src_ill);
   1150  11042       Erik 	mutex_exit(&ncec->ncec_lock);
   1151   9175    Sowmini 	if (ms == 0) {
   1152  11042       Erik 		if (ncec->ncec_state != ND_REACHABLE) {
   1153  11042       Erik 			if (ncec->ncec_ipversion == IPV6_VERSION)
   1154  11042       Erik 				ndp_resolv_failed(ncec);
   1155  11042       Erik 			else
   1156  11042       Erik 				arp_resolv_failed(ncec);
   1157  11042       Erik 			ASSERT((ncec->ncec_flags & NCE_F_STATIC) == 0);
   1158  11042       Erik 			nce_make_unreachable(ncec);
   1159  11042       Erik 			ncec_delete(ncec);
   1160  11042       Erik 		}
   1161  11042       Erik 	} else {
   1162  11042       Erik 		nce_restart_timer(ncec, (clock_t)ms);
   1163  11042       Erik 	}
   1164  11042       Erik done:
   1165  11042       Erik 	ill_refrele(src_ill);
   1166  11042       Erik }
   1167  11042       Erik 
   1168  11042       Erik /*
   1169  11042       Erik  * Send an IPv6 neighbor solicitation.
   1170      0     stevel  * Returns number of milliseconds after which we should either rexmit or abort.
   1171      0     stevel  * Return of zero means we should abort.
   1172  11042       Erik  * The caller holds the ncec_lock to protect ncec_qd_mp and ncec_rcnt.
   1173  11042       Erik  * The optional source address is used as a hint to ndp_solicit for
   1174  11042       Erik  * which source to use in the packet.
   1175  11042       Erik  *
   1176  11042       Erik  * NOTE: This routine drops ncec_lock (and later reacquires it) when sending
   1177      0     stevel  * the packet.
   1178      0     stevel  */
   1179      0     stevel uint32_t
   1180  11042       Erik ndp_solicit(ncec_t *ncec, in6_addr_t src, ill_t *ill)
   1181  11042       Erik {
   1182  11042       Erik 	in6_addr_t	dst;
   1183  11042       Erik 	boolean_t	dropped = B_FALSE;
   1184  11042       Erik 
   1185  11042       Erik 	ASSERT(ncec->ncec_ipversion == IPV6_VERSION);
   1186  11042       Erik 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
   1187  11042       Erik 
   1188  11042       Erik 	if (ncec->ncec_rcnt == 0)
   1189      0     stevel 		return (0);
   1190      0     stevel 
   1191  11042       Erik 	dst = ncec->ncec_addr;
   1192  11042       Erik 	ncec->ncec_rcnt--;
   1193  11042       Erik 	mutex_exit(&ncec->ncec_lock);
   1194  11042       Erik 	dropped = ndp_xmit(ill, ND_NEIGHBOR_SOLICIT, ill->ill_phys_addr,
   1195  11042       Erik 	    ill->ill_phys_addr_length, &src, &dst, 0);
   1196  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   1197      0     stevel 	if (dropped)
   1198  11042       Erik 		ncec->ncec_rcnt++;
   1199  11042       Erik 	return (ncec->ncec_ill->ill_reachable_retrans_time);
   1200      0     stevel }
   1201      0     stevel 
   1202   2546   carlsonj /*
   1203   2546   carlsonj  * Attempt to recover an address on an interface that's been marked as a
   1204   2546   carlsonj  * duplicate.  Because NCEs are destroyed when the interface goes down, there's
   1205   2546   carlsonj  * no easy way to just probe the address and have the right thing happen if
   1206   2546   carlsonj  * it's no longer in use.  Instead, we just bring it up normally and allow the
   1207   2546   carlsonj  * regular interface start-up logic to probe for a remaining duplicate and take
   1208   2546   carlsonj  * us back down if necessary.
   1209   2546   carlsonj  * Neither DHCP nor temporary addresses arrive here; they're excluded by
   1210   2546   carlsonj  * ip_ndp_excl.
   1211   2546   carlsonj  */
   1212   2546   carlsonj /* ARGSUSED */
   1213  11042       Erik void
   1214  11042       Erik ip_addr_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
   1215   2546   carlsonj {
   1216   2546   carlsonj 	ill_t	*ill = rq->q_ptr;
   1217   2546   carlsonj 	ipif_t	*ipif;
   1218  11042       Erik 	in6_addr_t *addr6 = (in6_addr_t *)mp->b_rptr;
   1219  11042       Erik 	in_addr_t *addr4 = (in_addr_t *)mp->b_rptr;
   1220  11042       Erik 	boolean_t addr_equal;
   1221   2546   carlsonj 
   1222   2546   carlsonj 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
   1223   2546   carlsonj 		/*
   1224   2546   carlsonj 		 * We do not support recovery of proxy ARP'd interfaces,
   1225   2546   carlsonj 		 * because the system lacks a complete proxy ARP mechanism.
   1226   2546   carlsonj 		 */
   1227  11042       Erik 		if (ill->ill_isv6) {
   1228  11042       Erik 			addr_equal = IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
   1229  11042       Erik 			    addr6);
   1230  11042       Erik 		} else {
   1231  11042       Erik 			addr_equal = (ipif->ipif_lcl_addr == *addr4);
   1232  11042       Erik 		}
   1233  11042       Erik 
   1234  11042       Erik 		if ((ipif->ipif_flags & IPIF_POINTOPOINT) || !addr_equal)
   1235   2546   carlsonj 			continue;
   1236   2546   carlsonj 
   1237   2546   carlsonj 		/*
   1238   3322   carlsonj 		 * If we have already recovered or if the interface is going
   1239   3322   carlsonj 		 * away, then ignore.
   1240   2546   carlsonj 		 */
   1241   2546   carlsonj 		mutex_enter(&ill->ill_lock);
   1242   3322   carlsonj 		if (!(ipif->ipif_flags & IPIF_DUPLICATE) ||
   1243   8485      Peter 		    (ipif->ipif_state_flags & IPIF_CONDEMNED)) {
   1244   2546   carlsonj 			mutex_exit(&ill->ill_lock);
   1245   2546   carlsonj 			continue;
   1246   2546   carlsonj 		}
   1247   2546   carlsonj 
   1248   2546   carlsonj 		ipif->ipif_flags &= ~IPIF_DUPLICATE;
   1249   2546   carlsonj 		ill->ill_ipif_dup_count--;
   1250   2546   carlsonj 		mutex_exit(&ill->ill_lock);
   1251   2546   carlsonj 		ipif->ipif_was_dup = B_TRUE;
   1252   2546   carlsonj 
   1253  11042       Erik 		if (ill->ill_isv6) {
   1254  11042       Erik 			VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS);
   1255  11042       Erik 			(void) ipif_up_done_v6(ipif);
   1256  11042       Erik 		} else {
   1257  11042       Erik 			VERIFY(ipif_arp_up(ipif, Res_act_initial, B_TRUE) !=
   1258  11042       Erik 			    EINPROGRESS);
   1259  11042       Erik 			(void) ipif_up_done(ipif);
   1260  11042       Erik 		}
   1261   2546   carlsonj 	}
   1262   2546   carlsonj 	freeb(mp);
   1263   2546   carlsonj }
   1264   2546   carlsonj 
   1265   2546   carlsonj /*
   1266   2546   carlsonj  * Attempt to recover an IPv6 interface that's been shut down as a duplicate.
   1267   2546   carlsonj  * As long as someone else holds the address, the interface will stay down.
   1268   2546   carlsonj  * When that conflict goes away, the interface is brought back up.  This is
   1269   2546   carlsonj  * done so that accidental shutdowns of addresses aren't made permanent.  Your
   1270   2546   carlsonj  * server will recover from a failure.
   1271   2546   carlsonj  *
   1272   2546   carlsonj  * For DHCP and temporary addresses, recovery is not done in the kernel.
   1273   2546   carlsonj  * Instead, it's handled by user space processes (dhcpagent and in.ndpd).
   1274   2546   carlsonj  *
   1275   2546   carlsonj  * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
   1276   2546   carlsonj  */
   1277  11042       Erik void
   1278  11042       Erik ipif_dup_recovery(void *arg)
   1279   2546   carlsonj {
   1280   2546   carlsonj 	ipif_t *ipif = arg;
   1281   2546   carlsonj 
   1282   2546   carlsonj 	ipif->ipif_recovery_id = 0;
   1283   2546   carlsonj 	if (!(ipif->ipif_flags & IPIF_DUPLICATE))
   1284   2546   carlsonj 		return;
   1285   2546   carlsonj 
   1286   3322   carlsonj 	/*
   1287   3322   carlsonj 	 * No lock, because this is just an optimization.
   1288   3322   carlsonj 	 */
   1289   8485      Peter 	if (ipif->ipif_state_flags & IPIF_CONDEMNED)
   1290   3322   carlsonj 		return;
   1291   3322   carlsonj 
   1292   2546   carlsonj 	/* If the link is down, we'll retry this later */
   1293   2546   carlsonj 	if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING))
   1294   2546   carlsonj 		return;
   1295   2546   carlsonj 
   1296  11042       Erik 	ipif_do_recovery(ipif);
   1297   2546   carlsonj }
   1298   2546   carlsonj 
   1299   2546   carlsonj /*
   1300   2546   carlsonj  * Perform interface recovery by forcing the duplicate interfaces up and
   1301   2546   carlsonj  * allowing the system to determine which ones should stay up.
   1302   2546   carlsonj  *
   1303   2546   carlsonj  * Called both by recovery timer expiry and link-up notification.
   1304   2546   carlsonj  */
   1305      0     stevel void
   1306  11042       Erik ipif_do_recovery(ipif_t *ipif)
   1307   2546   carlsonj {
   1308   2546   carlsonj 	ill_t *ill = ipif->ipif_ill;
   1309   2546   carlsonj 	mblk_t *mp;
   1310   3448   dh155122 	ip_stack_t *ipst = ill->ill_ipst;
   1311  11042       Erik 	size_t mp_size;
   1312  11042       Erik 
   1313  11042       Erik 	if (ipif->ipif_isv6)
   1314  11042       Erik 		mp_size = sizeof (ipif->ipif_v6lcl_addr);
   1315  11042       Erik 	else
   1316  11042       Erik 		mp_size = sizeof (ipif->ipif_lcl_addr);
   1317  11042       Erik 	mp = allocb(mp_size, BPRI_MED);
   1318   2546   carlsonj 	if (mp == NULL) {
   1319   3322   carlsonj 		mutex_enter(&ill->ill_lock);
   1320  11042       Erik 		if (ipst->ips_ip_dup_recovery > 0 &&
   1321  11042       Erik 		    ipif->ipif_recovery_id == 0 &&
   1322   8485      Peter 		    !(ipif->ipif_state_flags & IPIF_CONDEMNED)) {
   1323  11042       Erik 			ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
   1324   3448   dh155122 			    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
   1325   3322   carlsonj 		}
   1326   3322   carlsonj 		mutex_exit(&ill->ill_lock);
   1327   2546   carlsonj 	} else {
   1328   8485      Peter 		/*
   1329   8485      Peter 		 * A recovery timer may still be running if we got here from
   1330   8485      Peter 		 * ill_restart_dad(); cancel that timer.
   1331   8485      Peter 		 */
   1332   8485      Peter 		if (ipif->ipif_recovery_id != 0)
   1333   8485      Peter 			(void) untimeout(ipif->ipif_recovery_id);
   1334   8485      Peter 		ipif->ipif_recovery_id = 0;
   1335   8485      Peter 
   1336  11042       Erik 		if (ipif->ipif_isv6) {
   1337  11042       Erik 			bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr,
   1338  11042       Erik 			    sizeof (ipif->ipif_v6lcl_addr));
   1339  11042       Erik 		} else  {
   1340  11042       Erik 			bcopy(&ipif->ipif_lcl_addr, mp->b_rptr,
   1341  11042       Erik 			    sizeof (ipif->ipif_lcl_addr));
   1342  11042       Erik 		}
   1343   2546   carlsonj 		ill_refhold(ill);
   1344  11042       Erik 		qwriter_ip(ill, ill->ill_rq, mp, ip_addr_recover, NEW_OP,
   1345   4360       meem 		    B_FALSE);
   1346   2546   carlsonj 	}
   1347   2546   carlsonj }
   1348   2546   carlsonj 
   1349   2546   carlsonj /*
   1350   8485      Peter  * Find the MAC and IP addresses in an NA/NS message.
   1351   2546   carlsonj  */
   1352   8485      Peter static void
   1353  11042       Erik ip_ndp_find_addresses(mblk_t *mp, ip_recv_attr_t *ira, ill_t *ill,
   1354  11042       Erik     in6_addr_t *targp, uchar_t **haddr, uint_t *haddrlenp)
   1355  11042       Erik {
   1356   8485      Peter 	icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
   1357   8485      Peter 	nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
   1358   2546   carlsonj 	uchar_t *addr;
   1359  11042       Erik 	int alen;
   1360  11042       Erik 
   1361  11042       Erik 	/* icmp_inbound_v6 ensures this */
   1362  11042       Erik 	ASSERT(ira->ira_flags & IRAF_L2SRC_SET);
   1363  11042       Erik 
   1364  11042       Erik 	addr = ira->ira_l2src;
   1365  11042       Erik 	alen = ill->ill_phys_addr_length;
   1366   2546   carlsonj 	if (alen > 0) {
   1367   2546   carlsonj 		*haddr = addr;
   1368   8485      Peter 		*haddrlenp = alen;
   1369   2546   carlsonj 	} else {
   1370   2546   carlsonj 		*haddr = NULL;
   1371   8485      Peter 		*haddrlenp = 0;
   1372   2546   carlsonj 	}
   1373   8485      Peter 
   1374   8485      Peter 	/* nd_ns_target and nd_na_target are at the same offset, so we cheat */
   1375   8485      Peter 	*targp = ns->nd_ns_target;
   1376   2546   carlsonj }
   1377   2546   carlsonj 
   1378   2546   carlsonj /*
   1379   2546   carlsonj  * This is for exclusive changes due to NDP duplicate address detection
   1380   2546   carlsonj  * failure.
   1381   2546   carlsonj  */
   1382   2546   carlsonj /* ARGSUSED */
   1383   2546   carlsonj static void
   1384   2546   carlsonj ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
   1385   2546   carlsonj {
   1386   2546   carlsonj 	ill_t	*ill = rq->q_ptr;
   1387   2546   carlsonj 	ipif_t	*ipif;
   1388   8485      Peter 	uchar_t	*haddr;
   1389   8485      Peter 	uint_t	haddrlen;
   1390   3448   dh155122 	ip_stack_t *ipst = ill->ill_ipst;
   1391   8485      Peter 	in6_addr_t targ;
   1392  11042       Erik 	ip_recv_attr_t iras;
   1393  11042       Erik 	mblk_t	*attrmp;
   1394  11042       Erik 
   1395  11042       Erik 	attrmp = mp;
   1396  11042       Erik 	mp = mp->b_cont;
   1397  11042       Erik 	attrmp->b_cont = NULL;
   1398  11042       Erik 	if (!ip_recv_attr_from_mblk(attrmp, &iras)) {
   1399  11042       Erik 		/* The ill or ip_stack_t disappeared on us */
   1400  11042       Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   1401  11042       Erik 		ip_drop_input("ip_recv_attr_from_mblk", mp, ill);
   1402  11042       Erik 		freemsg(mp);
   1403  11042       Erik 		ira_cleanup(&iras, B_TRUE);
   1404  11042       Erik 		return;
   1405  11042       Erik 	}
   1406  11042       Erik 
   1407  11042       Erik 	ASSERT(ill == iras.ira_rill);
   1408  11042       Erik 
   1409  11042       Erik 	ip_ndp_find_addresses(mp, &iras, ill, &targ, &haddr, &haddrlen);
   1410   8485      Peter 	if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) {
   1411   2546   carlsonj 		/*
   1412   8485      Peter 		 * Ignore conflicts generated by misbehaving switches that
   1413   8485      Peter 		 * just reflect our own messages back to us.  For IPMP, we may
   1414   8485      Peter 		 * see reflections across any ill in the illgrp.
   1415  11042       Erik 		 *
   1416  11042       Erik 		 * RFC2462 and revisions tried to detect both the case
   1417  11042       Erik 		 * when a statically configured IPv6 address is a duplicate,
   1418  11042       Erik 		 * and the case when the L2 address itself is a duplicate. The
   1419  11042       Erik 		 * later is important because, with stateles address autoconf,
   1420  11042       Erik 		 * if the L2 address is a duplicate, the resulting IPv6
   1421  11042       Erik 		 * address(es) would also be duplicates. We rely on DAD of the
   1422  11042       Erik 		 * IPv6 address itself to detect the latter case.
   1423  11042       Erik 		 */
   1424  11042       Erik 		/* For an under ill_grp can change under lock */
   1425  11042       Erik 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   1426   8485      Peter 		if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
   1427   8485      Peter 		    IS_UNDER_IPMP(ill) &&
   1428  11042       Erik 		    ipmp_illgrp_find_ill(ill->ill_grp, haddr,
   1429  11042       Erik 		    haddrlen) != NULL) {
   1430  11042       Erik 			rw_exit(&ipst->ips_ill_g_lock);
   1431   8485      Peter 			goto ignore_conflict;
   1432  11042       Erik 		}
   1433  11042       Erik 		rw_exit(&ipst->ips_ill_g_lock);
   1434   8485      Peter 	}
   1435   8485      Peter 
   1436   8485      Peter 	/*
   1437   8485      Peter 	 * Look up the appropriate ipif.
   1438   8485      Peter 	 */
   1439  11042       Erik 	ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, ipst);
   1440   8485      Peter 	if (ipif == NULL)
   1441   8485      Peter 		goto ignore_conflict;
   1442   8485      Peter 
   1443   8485      Peter 	/* Reload the ill to match the ipif */
   1444   8485      Peter 	ill = ipif->ipif_ill;
   1445   8485      Peter 
   1446   8485      Peter 	/* If it's already duplicate or ineligible, then don't do anything. */
   1447   8485      Peter 	if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) {
   1448   8485      Peter 		ipif_refrele(ipif);
   1449   2546   carlsonj 		goto ignore_conflict;
   1450   2546   carlsonj 	}
   1451   4972       meem 
   1452   8485      Peter 	/*
   1453   8485      Peter 	 * If this is a failure during duplicate recovery, then don't
   1454   8485      Peter 	 * complain.  It may take a long time to recover.
   1455   8485      Peter 	 */
   1456   8485      Peter 	if (!ipif->ipif_was_dup) {
   1457   8485      Peter 		char ibuf[LIFNAMSIZ];
   1458   8485      Peter 		char hbuf[MAC_STR_LEN];
   1459   8485      Peter 		char sbuf[INET6_ADDRSTRLEN];
   1460   2546   carlsonj 
   1461   8485      Peter 		ipif_get_name(ipif, ibuf, sizeof (ibuf));
   1462   8485      Peter 		cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);"
   1463   8485      Peter 		    " disabled", ibuf,
   1464   8485      Peter 		    inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)),
   1465   8485      Peter 		    mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)));
   1466   2546   carlsonj 	}
   1467   8485      Peter 	mutex_enter(&ill->ill_lock);
   1468   8485      Peter 	ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
   1469   8485      Peter 	ipif->ipif_flags |= IPIF_DUPLICATE;
   1470   8485      Peter 	ill->ill_ipif_dup_count++;
   1471   8485      Peter 	mutex_exit(&ill->ill_lock);
   1472   8485      Peter 	(void) ipif_down(ipif, NULL, NULL);
   1473  11042       Erik 	(void) ipif_down_tail(ipif);
   1474   8485      Peter 	mutex_enter(&ill->ill_lock);
   1475   8485      Peter 	if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
   1476   8485      Peter 	    ill->ill_net_type == IRE_IF_RESOLVER &&
   1477   8485      Peter 	    !(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
   1478   8485      Peter 	    ipst->ips_ip_dup_recovery > 0) {
   1479   8485      Peter 		ASSERT(ipif->ipif_recovery_id == 0);
   1480  11042       Erik 		ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
   1481   8485      Peter 		    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
   1482   8485      Peter 	}
   1483   8485      Peter 	mutex_exit(&ill->ill_lock);
   1484   8485      Peter 	ipif_refrele(ipif);
   1485  11042       Erik 
   1486   2546   carlsonj ignore_conflict:
   1487   2546   carlsonj 	freemsg(mp);
   1488  11042       Erik 	ira_cleanup(&iras, B_TRUE);
   1489   2546   carlsonj }
   1490   2546   carlsonj 
   1491   2546   carlsonj /*
   1492   2546   carlsonj  * Handle failure by tearing down the ipifs with the specified address.  Note
   1493  11042       Erik  * that tearing down the ipif also means deleting the ncec through ipif_down, so
   1494  11042       Erik  * it's not possible to do recovery by just restarting the ncec timer.  Instead,
   1495   2546   carlsonj  * we start a timer on the ipif.
   1496  11042       Erik  * Caller has to free mp;
   1497  11042       Erik  */
   1498  11042       Erik static void
   1499  11042       Erik ndp_failure(mblk_t *mp, ip_recv_attr_t *ira)
   1500  11042       Erik {
   1501  11042       Erik 	const uchar_t	*haddr;
   1502  11042       Erik 	ill_t		*ill = ira->ira_rill;
   1503  11042       Erik 
   1504  11042       Erik 	/*
   1505  11042       Erik 	 * Ignore conflicts generated by misbehaving switches that just
   1506  11042       Erik 	 * reflect our own messages back to us.
   1507  11042       Erik 	 */
   1508  11042       Erik 
   1509  11042       Erik 	/* icmp_inbound_v6 ensures this */
   1510  11042       Erik 	ASSERT(ira->ira_flags & IRAF_L2SRC_SET);
   1511  11042       Erik 	haddr = ira->ira_l2src;
   1512  11042       Erik 	if (haddr != NULL &&
   1513  11042       Erik 	    bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) {
   1514  11042       Erik 		return;
   1515  11042       Erik 	}
   1516  11042       Erik 
   1517   2546   carlsonj 	if ((mp = copymsg(mp)) != NULL) {
   1518  11042       Erik 		mblk_t	*attrmp;
   1519  11042       Erik 
   1520  11042       Erik 		attrmp = ip_recv_attr_to_mblk(ira);
   1521  11042       Erik 		if (attrmp == NULL) {
   1522  11042       Erik 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   1523  11042       Erik 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
   1524   2546   carlsonj 			freemsg(mp);
   1525   2546   carlsonj 		} else {
   1526  11042       Erik 			ASSERT(attrmp->b_cont == NULL);
   1527  11042       Erik 			attrmp->b_cont = mp;
   1528  11042       Erik 			mp = attrmp;
   1529   2546   carlsonj 			ill_refhold(ill);
   1530  11042       Erik 			qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_excl, NEW_OP,
   1531   4360       meem 			    B_FALSE);
   1532   2546   carlsonj 		}
   1533   2546   carlsonj 	}
   1534   2546   carlsonj }
   1535   2546   carlsonj 
   1536   2546   carlsonj /*
   1537   2546   carlsonj  * Handle a discovered conflict: some other system is advertising that it owns
   1538   2546   carlsonj  * one of our IP addresses.  We need to defend ourselves, or just shut down the
   1539   2546   carlsonj  * interface.
   1540  11042       Erik  *
   1541  11042       Erik  * Handles both IPv4 and IPv6
   1542  11042       Erik  */
   1543  11042       Erik boolean_t
   1544  11042       Erik ip_nce_conflict(mblk_t *mp, ip_recv_attr_t *ira, ncec_t *ncec)
   1545  11042       Erik {
   1546  11042       Erik 	ipif_t		*ipif;
   1547  11042       Erik 	clock_t		now;
   1548  11042       Erik 	uint_t		maxdefense;
   1549  11042       Erik 	uint_t		defs;
   1550  11042       Erik 	ill_t		*ill = ira->ira_ill;
   1551  11042       Erik 	ip_stack_t	*ipst = ill->ill_ipst;
   1552  11042       Erik 	uint32_t	elapsed;
   1553  11042       Erik 	boolean_t	isv6 = ill->ill_isv6;
   1554  11042       Erik 	ipaddr_t	ncec_addr;
   1555  11042       Erik 
   1556  11042       Erik 	if (isv6) {
   1557  11042       Erik 		ipif = ipif_lookup_addr_v6(&ncec->ncec_addr, ill, ALL_ZONES,
   1558  11042       Erik 		    ipst);
   1559  11042       Erik 	} else {
   1560  11042       Erik 		if (arp_no_defense) {
   1561  11042       Erik 			/*
   1562  11042       Erik 			 * Yes, there is a conflict, but no, we do not
   1563  11042       Erik 			 * defend ourself.
   1564  11042       Erik 			 */
   1565  11042       Erik 			return (B_TRUE);
   1566  11042       Erik 		}
   1567  11042       Erik 		IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ncec_addr);
   1568  11042       Erik 		ipif = ipif_lookup_addr(ncec_addr, ill, ALL_ZONES,
   1569  11042       Erik 		    ipst);
   1570  11042       Erik 	}
   1571   2546   carlsonj 	if (ipif == NULL)
   1572  11042       Erik 		return (B_FALSE);
   1573   8485      Peter 
   1574   2546   carlsonj 	/*
   1575   2546   carlsonj 	 * First, figure out if this address is disposable.
   1576   2546   carlsonj 	 */
   1577   2546   carlsonj 	if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY))
   1578   3448   dh155122 		maxdefense = ipst->ips_ip_max_temp_defend;
   1579   3448   dh155122 	else
   1580   3448   dh155122 		maxdefense = ipst->ips_ip_max_defend;
   1581   2546   carlsonj 
   1582   2546   carlsonj 	/*
   1583   2546   carlsonj 	 * Now figure out how many times we've defended ourselves.  Ignore
   1584   2546   carlsonj 	 * defenses that happened long in the past.
   1585   2546   carlsonj 	 */
   1586  11042       Erik 	now = ddi_get_lbolt();
   1587  11042       Erik 	elapsed = (drv_hztousec(now - ncec->ncec_last_time_defended))/1000000;
   1588  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   1589  11042       Erik 	if ((defs = ncec->ncec_defense_count) > 0 &&
   1590  11042       Erik 	    elapsed > ipst->ips_ip_defend_interval) {
   1591  11042       Erik 		/*
   1592  11042       Erik 		 * ip_defend_interval has elapsed.
   1593  11042       Erik 		 * reset the defense count.
   1594  11042       Erik 		 */
   1595  11042       Erik 		ncec->ncec_defense_count = defs = 0;
   1596  11042       Erik 	}
   1597  11042       Erik 	ncec->ncec_defense_count++;
   1598  11042       Erik 	ncec->ncec_last_time_defended = now;
   1599  11042       Erik 	mutex_exit(&ncec->ncec_lock);
   1600   2546   carlsonj 	ipif_refrele(ipif);
   1601   2546   carlsonj 
   1602   2546   carlsonj 	/*
   1603   2546   carlsonj 	 * If we've defended ourselves too many times already, then give up and
   1604  11042       Erik 	 * tear down the interface(s) using this address.
   1605  11042       Erik 	 * Otherwise, caller has to defend by sending out an announce.
   1606   2546   carlsonj 	 */
   1607   2546   carlsonj 	if (defs >= maxdefense) {
   1608  11042       Erik 		if (isv6)
   1609  11042       Erik 			ndp_failure(mp, ira);
   1610  11042       Erik 		else
   1611  11042       Erik 			arp_failure(mp, ira);
   1612  11042       Erik 	} else {
   1613  11042       Erik 		return (B_TRUE); /* caller must defend this address */
   1614  11042       Erik 	}
   1615  11042       Erik 	return (B_FALSE);
   1616  11042       Erik }
   1617  11042       Erik 
   1618  11042       Erik /*
   1619  11042       Erik  * Handle reception of Neighbor Solicitation messages.
   1620  11042       Erik  */
   1621  11042       Erik static void
   1622  11042       Erik ndp_input_solicit(mblk_t *mp, ip_recv_attr_t *ira)
   1623  11042       Erik {
   1624  11042       Erik 	ill_t		*ill = ira->ira_ill, *under_ill;
   1625      0     stevel 	nd_neighbor_solicit_t *ns;
   1626  11042       Erik 	uint32_t	hlen = ill->ill_phys_addr_length;
   1627      0     stevel 	uchar_t		*haddr = NULL;
   1628      0     stevel 	icmp6_t		*icmp_nd;
   1629      0     stevel 	ip6_t		*ip6h;
   1630  11042       Erik 	ncec_t		*our_ncec = NULL;
   1631      0     stevel 	in6_addr_t	target;
   1632      0     stevel 	in6_addr_t	src;
   1633      0     stevel 	int		len;
   1634      0     stevel 	int		flag = 0;
   1635      0     stevel 	nd_opt_hdr_t	*opt = NULL;
   1636      0     stevel 	boolean_t	bad_solicit = B_FALSE;
   1637      0     stevel 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
   1638  11042       Erik 	boolean_t	need_ill_refrele = B_FALSE;
   1639      0     stevel 
   1640      0     stevel 	ip6h = (ip6_t *)mp->b_rptr;
   1641      0     stevel 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
   1642      0     stevel 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
   1643      0     stevel 	src = ip6h->ip6_src;
   1644      0     stevel 	ns = (nd_neighbor_solicit_t *)icmp_nd;
   1645      0     stevel 	target = ns->nd_ns_target;
   1646      0     stevel 	if (IN6_IS_ADDR_MULTICAST(&target)) {
   1647      0     stevel 		if (ip_debug > 2) {
   1648      0     stevel 			/* ip1dbg */
   1649      0     stevel 			pr_addr_dbg("ndp_input_solicit: Target is"
   1650      0     stevel 			    " multicast! %s\n", AF_INET6, &target);
   1651      0     stevel 		}
   1652      0     stevel 		bad_solicit = B_TRUE;
   1653      0     stevel 		goto done;
   1654      0     stevel 	}
   1655      0     stevel 	if (len > sizeof (nd_neighbor_solicit_t)) {
   1656      0     stevel 		/* Options present */
   1657      0     stevel 		opt = (nd_opt_hdr_t *)&ns[1];
   1658      0     stevel 		len -= sizeof (nd_neighbor_solicit_t);
   1659      0     stevel 		if (!ndp_verify_optlen(opt, len)) {
   1660      0     stevel 			ip1dbg(("ndp_input_solicit: Bad opt len\n"));
   1661      0     stevel 			bad_solicit = B_TRUE;
   1662      0     stevel 			goto done;
   1663      0     stevel 		}
   1664      0     stevel 	}
   1665      0     stevel 	if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
   1666      0     stevel 		/* Check to see if this is a valid DAD solicitation */
   1667      0     stevel 		if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) {
   1668      0     stevel 			if (ip_debug > 2) {
   1669      0     stevel 				/* ip1dbg */
   1670      0     stevel 				pr_addr_dbg("ndp_input_solicit: IPv6 "
   1671      0     stevel 				    "Destination is not solicited node "
   1672      0     stevel 				    "multicast %s\n", AF_INET6,
   1673      0     stevel 				    &ip6h->ip6_dst);
   1674      0     stevel 			}
   1675      0     stevel 			bad_solicit = B_TRUE;
   1676      0     stevel 			goto done;
   1677      0     stevel 		}
   1678      0     stevel 	}
   1679      0     stevel 
   1680   8485      Peter 	/*
   1681   8485      Peter 	 * NOTE: with IPMP, it's possible the nominated multicast ill (which
   1682   8485      Peter 	 * received this packet if it's multicast) is not the ill tied to
   1683   8485      Peter 	 * e.g. the IPMP ill's data link-local.  So we match across the illgrp
   1684   8485      Peter 	 * to ensure we find the associated NCE.
   1685   8485      Peter 	 */
   1686  11042       Erik 	our_ncec = ncec_lookup_illgrp_v6(ill, &target);
   1687  11042       Erik 	/*
   1688  11042       Erik 	 * If this is a valid Solicitation for an address we are publishing,
   1689  11042       Erik 	 * then a PUBLISH entry should exist in the cache
   1690  11042       Erik 	 */
   1691  11042       Erik 	if (our_ncec == NULL || !NCE_PUBLISH(our_ncec)) {
   1692      0     stevel 		ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
   1693      0     stevel 		    "ifname=%s ", ill->ill_name));
   1694      0     stevel 		if (ip_debug > 2) {
   1695      0     stevel 			/* ip1dbg */
   1696      0     stevel 			pr_addr_dbg(" dst %s\n", AF_INET6, &target);
   1697      0     stevel 		}
   1698  11042       Erik 		if (our_ncec == NULL)
   1699  11042       Erik 			bad_solicit = B_TRUE;
   1700      0     stevel 		goto done;
   1701      0     stevel 	}
   1702      0     stevel 
   1703      0     stevel 	/* At this point we should have a verified NS per spec */
   1704      0     stevel 	if (opt != NULL) {
   1705      0     stevel 		opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR);
   1706      0     stevel 		if (opt != NULL) {
   1707      0     stevel 			haddr = (uchar_t *)&opt[1];
   1708   2546   carlsonj 			if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
   1709      0     stevel 			    hlen == 0) {
   1710  11042       Erik 				ip1dbg(("ndp_input_advert: bad SLLA\n"));
   1711      0     stevel 				bad_solicit = B_TRUE;
   1712      0     stevel 				goto done;
   1713      0     stevel 			}
   1714      0     stevel 		}
   1715      0     stevel 	}
   1716    980   dd193516 
   1717   2699   carlsonj 	/* If sending directly to peer, set the unicast flag */
   1718   2699   carlsonj 	if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))
   1719      0     stevel 		flag |= NDP_UNICAST;
   1720      0     stevel 
   1721      0     stevel 	/*
   1722  11042       Erik 	 * Create/update the entry for the soliciting node on the ipmp_ill.
   1723      0     stevel 	 * or respond to outstanding queries, don't if
   1724      0     stevel 	 * the source is unspecified address.
   1725      0     stevel 	 */
   1726      0     stevel 	if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
   1727   2546   carlsonj 		int	err;
   1728      0     stevel 		nce_t	*nnce;
   1729      0     stevel 
   1730   2535   sangeeta 		ASSERT(ill->ill_isv6);
   1731   2546   carlsonj 		/*
   1732   2546   carlsonj 		 * Regular solicitations *must* include the Source Link-Layer
   1733   2546   carlsonj 		 * Address option.  Ignore messages that do not.
   1734   2546   carlsonj 		 */
   1735   2546   carlsonj 		if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
   1736   2546   carlsonj 			ip1dbg(("ndp_input_solicit: source link-layer address "
   1737   2546   carlsonj 			    "option missing with a specified source.\n"));
   1738   2546   carlsonj 			bad_solicit = B_TRUE;
   1739   2546   carlsonj 			goto done;
   1740   2546   carlsonj 		}
   1741   2546   carlsonj 
   1742   2546   carlsonj 		/*
   1743   2546   carlsonj 		 * This is a regular solicitation.  If we're still in the
   1744   2546   carlsonj 		 * process of verifying the address, then don't respond at all
   1745   2546   carlsonj 		 * and don't keep track of the sender.
   1746   2546   carlsonj 		 */
   1747  11042       Erik 		if (our_ncec->ncec_state == ND_PROBE)
   1748   2546   carlsonj 			goto done;
   1749   2546   carlsonj 
   1750   2546   carlsonj 		/*
   1751   2546   carlsonj 		 * If the solicitation doesn't have sender hardware address
   1752   2546   carlsonj 		 * (legal for unicast solicitation), then process without
   1753   2546   carlsonj 		 * installing the return NCE.  Either we already know it, or
   1754   2546   carlsonj 		 * we'll be forced to look it up when (and if) we reply to the
   1755   2546   carlsonj 		 * packet.
   1756   2546   carlsonj 		 */
   1757   2546   carlsonj 		if (haddr == NULL)
   1758   2546   carlsonj 			goto no_source;
   1759   2546   carlsonj 
   1760  11042       Erik 		under_ill = ill;
   1761  11042       Erik 		if (IS_UNDER_IPMP(under_ill)) {
   1762  11042       Erik 			ill = ipmp_ill_hold_ipmp_ill(under_ill);
   1763  11042       Erik 			if (ill == NULL)
   1764  11042       Erik 				ill = under_ill;
   1765  11042       Erik 			else
   1766  11042       Erik 				need_ill_refrele = B_TRUE;
   1767  11042       Erik 		}
   1768  11042       Erik 		err = nce_lookup_then_add_v6(ill,
   1769  11042       Erik 		    haddr, hlen,
   1770      0     stevel 		    &src,	/* Soliciting nodes address */
   1771      0     stevel 		    0,
   1772      0     stevel 		    ND_STALE,
   1773   4714    sowmini 		    &nnce);
   1774  11042       Erik 
   1775  11042       Erik 		if (need_ill_refrele) {
   1776  11042       Erik 			ill_refrele(ill);
   1777  11042       Erik 			ill = under_ill;
   1778  11042       Erik 			need_ill_refrele =  B_FALSE;
   1779  11042       Erik 		}
   1780      0     stevel 		switch (err) {
   1781      0     stevel 		case 0:
   1782      0     stevel 			/* done with this entry */
   1783  11042       Erik 			nce_refrele(nnce);
   1784      0     stevel 			break;
   1785      0     stevel 		case EEXIST:
   1786      0     stevel 			/*
   1787   8485      Peter 			 * B_FALSE indicates this is not an an advertisement.
   1788      0     stevel 			 */
   1789  11042       Erik 			nce_process(nnce->nce_common, haddr, 0, B_FALSE);
   1790  11042       Erik 			nce_refrele(nnce);
   1791      0     stevel 			break;
   1792      0     stevel 		default:
   1793      0     stevel 			ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
   1794      0     stevel 			    err));
   1795      0     stevel 			goto done;
   1796      0     stevel 		}
   1797   2546   carlsonj no_source:
   1798      0     stevel 		flag |= NDP_SOLICITED;
   1799      0     stevel 	} else {
   1800      0     stevel 		/*
   1801   2546   carlsonj 		 * No source link layer address option should be present in a
   1802   2546   carlsonj 		 * valid DAD request.
   1803   2546   carlsonj 		 */
   1804   2546   carlsonj 		if (haddr != NULL) {
   1805   2546   carlsonj 			ip1dbg(("ndp_input_solicit: source link-layer address "
   1806   2546   carlsonj 			    "option present with an unspecified source.\n"));
   1807   2546   carlsonj 			bad_solicit = B_TRUE;
   1808   2546   carlsonj 			goto done;
   1809   2546   carlsonj 		}
   1810  11042       Erik 		if (our_ncec->ncec_state == ND_PROBE) {
   1811  11042       Erik 			/*
   1812  11042       Erik 			 * Internally looped-back probes will have
   1813  11042       Erik 			 * IRAF_L2SRC_LOOPBACK set so we can ignore our own
   1814   2546   carlsonj 			 * transmissions.
   1815   2546   carlsonj 			 */
   1816  11042       Erik 			if (!(ira->ira_flags & IRAF_L2SRC_LOOPBACK)) {
   1817   2546   carlsonj 				/*
   1818   2546   carlsonj 				 * If someone else is probing our address, then
   1819   2546   carlsonj 				 * we've crossed wires.  Declare failure.
   1820   2546   carlsonj 				 */
   1821  11042       Erik 				ndp_failure(mp, ira);
   1822   2546   carlsonj 			}
   1823   2546   carlsonj 			goto done;
   1824   2546   carlsonj 		}
   1825   2546   carlsonj 		/*
   1826   2546   carlsonj 		 * This is a DAD probe.  Multicast the advertisement to the
   1827   2546   carlsonj 		 * all-nodes address.
   1828      0     stevel 		 */
   1829      0     stevel 		src = ipv6_all_hosts_mcast;
   1830      0     stevel 	}
   1831  11042       Erik 	flag |= nce_advert_flags(our_ncec);
   1832  11042       Erik 	(void) ndp_xmit(ill,
   1833  11042       Erik 	    ND_NEIGHBOR_ADVERT,
   1834  11042       Erik 	    our_ncec->ncec_lladdr,
   1835  11042       Erik 	    our_ncec->ncec_lladdr_length,
   1836  11042       Erik 	    &target,	/* Source and target of the advertisement pkt */
   1837  11042       Erik 	    &src,	/* IP Destination (source of original pkt) */
   1838  11042       Erik 	    flag);
   1839      0     stevel done:
   1840      0     stevel 	if (bad_solicit)
   1841      0     stevel 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations);
   1842  11042       Erik 	if (our_ncec != NULL)
   1843  11042       Erik 		ncec_refrele(our_ncec);
   1844  11042       Erik }
   1845  11042       Erik 
   1846  11042       Erik /*
   1847  11042       Erik  * Handle reception of Neighbor Solicitation messages
   1848  11042       Erik  */
   1849  11042       Erik void
   1850  11042       Erik ndp_input_advert(mblk_t *mp, ip_recv_attr_t *ira)
   1851  11042       Erik {
   1852  11042       Erik 	ill_t		*ill = ira->ira_ill;
   1853      0     stevel 	nd_neighbor_advert_t *na;
   1854  11042       Erik 	uint32_t	hlen = ill->ill_phys_addr_length;
   1855      0     stevel 	uchar_t		*haddr = NULL;
   1856      0     stevel 	icmp6_t		*icmp_nd;
   1857      0     stevel 	ip6_t		*ip6h;
   1858  11042       Erik 	ncec_t		*dst_ncec = NULL;
   1859      0     stevel 	in6_addr_t	target;
   1860      0     stevel 	nd_opt_hdr_t	*opt = NULL;
   1861      0     stevel 	int		len;
   1862   8485      Peter 	ip_stack_t	*ipst = ill->ill_ipst;
   1863      0     stevel 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
   1864      0     stevel 
   1865      0     stevel 	ip6h = (ip6_t *)mp->b_rptr;
   1866      0     stevel 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
   1867      0     stevel 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
   1868      0     stevel 	na = (nd_neighbor_advert_t *)icmp_nd;
   1869  11042       Erik 
   1870      0     stevel 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
   1871      0     stevel 	    (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) {
   1872      0     stevel 		ip1dbg(("ndp_input_advert: Target is multicast but the "
   1873      0     stevel 		    "solicited flag is not zero\n"));
   1874      0     stevel 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
   1875      0     stevel 		return;
   1876      0     stevel 	}
   1877      0     stevel 	target = na->nd_na_target;
   1878      0     stevel 	if (IN6_IS_ADDR_MULTICAST(&target)) {
   1879      0     stevel 		ip1dbg(("ndp_input_advert: Target is multicast!\n"));
   1880      0     stevel 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
   1881      0     stevel 		return;
   1882      0     stevel 	}
   1883      0     stevel 	if (len > sizeof (nd_neighbor_advert_t)) {
   1884      0     stevel 		opt = (nd_opt_hdr_t *)&na[1];
   1885      0     stevel 		if (!ndp_verify_optlen(opt,
   1886      0     stevel 		    len - sizeof (nd_neighbor_advert_t))) {
   1887   2546   carlsonj 			ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
   1888      0     stevel 			BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
   1889      0     stevel 			return;
   1890      0     stevel 		}
   1891      0     stevel 		/* At this point we have a verified NA per spec */
   1892      0     stevel 		len -= sizeof (nd_neighbor_advert_t);
   1893      0     stevel 		opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR);
   1894      0     stevel 		if (opt != NULL) {
   1895      0     stevel 			haddr = (uchar_t *)&opt[1];
   1896   2546   carlsonj 			if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
   1897      0     stevel 			    hlen == 0) {
   1898   2546   carlsonj 				ip1dbg(("ndp_input_advert: bad SLLA\n"));
   1899      0     stevel 				BUMP_MIB(mib,
   1900      0     stevel 				    ipv6IfIcmpInBadNeighborAdvertisements);
   1901      0     stevel 				return;
   1902      0     stevel 			}
   1903      0     stevel 		}
   1904      0     stevel 	}
   1905      0     stevel 
   1906      0     stevel 	/*
   1907   8485      Peter 	 * NOTE: we match across the illgrp since we need to do DAD for all of
   1908   8485      Peter 	 * our local addresses, and those are spread across all the active
   1909      0     stevel 	 * ills in the group.
   1910      0     stevel 	 */
   1911  11042       Erik 	if ((dst_ncec = ncec_lookup_illgrp_v6(ill, &target)) == NULL)
   1912  11042       Erik 		return;
   1913  11042       Erik 
   1914  11042       Erik 	if (NCE_PUBLISH(dst_ncec)) {
   1915  11042       Erik 		/*
   1916  11042       Erik 		 * Someone just advertised an addresses that we publish. First,
   1917   8485      Peter 		 * check it it was us -- if so, we can safely ignore it.
   1918  11042       Erik 		 * We don't get the haddr from the ira_l2src because, in the
   1919  11042       Erik 		 * case that the packet originated from us, on an IPMP group,
   1920  11042       Erik 		 * the ira_l2src may would be the link-layer address of the
   1921  11042       Erik 		 * cast_ill used to send the packet, which may not be the same
   1922  11042       Erik 		 * as the dst_ncec->ncec_lladdr of the address.
   1923   8485      Peter 		 */
   1924   8485      Peter 		if (haddr != NULL) {
   1925  11042       Erik 			if (ira->ira_flags & IRAF_L2SRC_LOOPBACK)
   1926  11042       Erik 				goto out;
   1927  11042       Erik 
   1928  11042       Erik 			if (!nce_cmp_ll_addr(dst_ncec, haddr, hlen))
   1929  11042       Erik 				goto out;   /* from us -- no conflict */
   1930   8485      Peter 
   1931   8485      Peter 			/*
   1932   8485      Peter 			 * If we're in an IPMP group, check if this is an echo
   1933   8485      Peter 			 * from another ill in the group.  Use the double-
   1934   8485      Peter 			 * checked locking pattern to avoid grabbing
   1935   8485      Peter 			 * ill_g_lock in the non-IPMP case.
   1936   8485      Peter 			 */
   1937   8485      Peter 			if (IS_UNDER_IPMP(ill)) {
   1938   8485      Peter 				rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   1939   8485      Peter 				if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill(
   1940   8485      Peter 				    ill->ill_grp, haddr, hlen) != NULL) {
   1941   8485      Peter 					rw_exit(&ipst->ips_ill_g_lock);
   1942   8485      Peter 					goto out;
   1943   8485      Peter 				}
   1944   8485      Peter 				rw_exit(&ipst->ips_ill_g_lock);
   1945   8485      Peter 			}
   1946      0     stevel 		}
   1947   8485      Peter 
   1948   8485      Peter 		/*
   1949   8485      Peter 		 * This appears to be a real conflict.  If we're trying to
   1950   8485      Peter 		 * configure this NCE (ND_PROBE), then shut it down.
   1951   8485      Peter 		 * Otherwise, handle the discovered conflict.
   1952  11042       Erik 		 */
   1953  11042       Erik 		if (dst_ncec->ncec_state == ND_PROBE) {
   1954  11042       Erik 			ndp_failure(mp, ira);
   1955  11042       Erik 		} else {
   1956  11042       Erik 			if (ip_nce_conflict(mp, ira, dst_ncec)) {
   1957  11042       Erik 				char hbuf[MAC_STR_LEN];
   1958  11042       Erik 				char sbuf[INET6_ADDRSTRLEN];
   1959  11042       Erik 
   1960  11042       Erik 				cmn_err(CE_WARN,
   1961  11042       Erik 				    "node '%s' is using %s on %s",
   1962  11042       Erik 				    inet_ntop(AF_INET6, &target, sbuf,
   1963  11042       Erik 				    sizeof (sbuf)),
   1964  11042       Erik 				    haddr == NULL ? "<none>" :
   1965  11042       Erik 				    mac_colon_addr(haddr, hlen, hbuf,
   1966  11042       Erik 				    sizeof (hbuf)), ill->ill_name);
   1967  11042       Erik 				/*
   1968  11042       Erik 				 * RFC 4862, Section 5.4.4 does not mandate
   1969  11042       Erik 				 * any specific behavior when an NA matches
   1970  11042       Erik 				 * a non-tentative address assigned to the
   1971  11042       Erik 				 * receiver. We make the choice of defending
   1972  11042       Erik 				 * our address, based on the assumption that
   1973  11042       Erik 				 * the sender has not detected the Duplicate.
   1974  11042       Erik 				 *
   1975  11042       Erik 				 * ncec_last_time_defended has been adjusted
   1976  11042       Erik 				 * in ip_nce_conflict()
   1977  11042       Erik 				 */
   1978  11042       Erik 				(void) ndp_announce(dst_ncec);
   1979  11042       Erik 			}
   1980  11042       Erik 		}
   1981   8485      Peter 	} else {
   1982   8485      Peter 		if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER)
   1983  11042       Erik 			dst_ncec->ncec_flags |= NCE_F_ISROUTER;
   1984   8485      Peter 
   1985   8485      Peter 		/* B_TRUE indicates this an advertisement */
   1986  11042       Erik 		nce_process(dst_ncec, haddr, na->nd_na_flags_reserved, B_TRUE);
   1987      0     stevel 	}
   1988   8485      Peter out:
   1989  11042       Erik 	ncec_refrele(dst_ncec);
   1990      0     stevel }
   1991      0     stevel 
   1992      0     stevel /*
   1993      0     stevel  * Process NDP neighbor solicitation/advertisement messages.
   1994      0     stevel  * The checksum has already checked o.k before reaching here.
   1995  11042       Erik  * Information about the datalink header is contained in ira_l2src, but
   1996  11042       Erik  * that should be ignored for loopback packets.
   1997  11042       Erik  */
   1998  11042       Erik void
   1999  11042       Erik ndp_input(mblk_t *mp, ip_recv_attr_t *ira)
   2000  11042       Erik {
   2001  11042       Erik 	ill_t		*ill = ira->ira_rill;
   2002      0     stevel 	icmp6_t		*icmp_nd;
   2003      0     stevel 	ip6_t		*ip6h;
   2004      0     stevel 	int		len;
   2005      0     stevel 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
   2006  11042       Erik 	ill_t		*orig_ill = NULL;
   2007  11042       Erik 
   2008  11042       Erik 	/*
   2009  11042       Erik 	 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
   2010  11042       Erik 	 * and make it be the IPMP upper so avoid being confused by a packet
   2011  11042       Erik 	 * addressed to a unicast address on a different ill.
   2012  11042       Erik 	 */
   2013  11042       Erik 	if (IS_UNDER_IPMP(ill)) {
   2014  11042       Erik 		orig_ill = ill;
   2015  11042       Erik 		ill = ipmp_ill_hold_ipmp_ill(orig_ill);
   2016  11042       Erik 		if (ill == NULL) {
   2017  11042       Erik 			ill = orig_ill;
   2018  11042       Erik 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2019  11042       Erik 			ip_drop_input("ipIfStatsInDiscards - IPMP ill",
   2020  11042       Erik 			    mp, ill);
   2021  11042       Erik 			freemsg(mp);
   2022  11042       Erik 			return;
   2023  11042       Erik 		}
   2024  11042       Erik 		ASSERT(ill != orig_ill);
   2025  11042       Erik 		orig_ill = ira->ira_ill;
   2026  11042       Erik 		ira->ira_ill = ill;
   2027  11042       Erik 		mib = ill->ill_icmp6_mib;
   2028  11042       Erik 	}
   2029      0     stevel 	if (!pullupmsg(mp, -1)) {
   2030      0     stevel 		ip1dbg(("ndp_input: pullupmsg failed\n"));
   2031   3284   apersson 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2032  11042       Erik 		ip_drop_input("ipIfStatsInDiscards - pullupmsg", mp, ill);
   2033      0     stevel 		goto done;
   2034      0     stevel 	}
   2035      0     stevel 	ip6h = (ip6_t *)mp->b_rptr;
   2036      0     stevel 	if (ip6h->ip6_hops != IPV6_MAX_HOPS) {
   2037      0     stevel 		ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
   2038  11042       Erik 		ip_drop_input("ipv6IfIcmpBadHoplimit", mp, ill);
   2039      0     stevel 		BUMP_MIB(mib, ipv6IfIcmpBadHoplimit);
   2040      0     stevel 		goto done;
   2041      0     stevel 	}
   2042      0     stevel 	/*
   2043      0     stevel 	 * NDP does not accept any extension headers between the
   2044      0     stevel 	 * IP header and the ICMP header since e.g. a routing
   2045      0     stevel 	 * header could be dangerous.
   2046      0     stevel 	 * This assumes that any AH or ESP headers are removed
   2047      0     stevel 	 * by ip prior to passing the packet to ndp_input.
   2048      0     stevel 	 */
   2049      0     stevel 	if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
   2050      0     stevel 		ip1dbg(("ndp_input: Wrong next header 0x%x\n",
   2051      0     stevel 		    ip6h->ip6_nxt));
   2052  11042       Erik 		ip_drop_input("Wrong next header", mp, ill);
   2053      0     stevel 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
   2054      0     stevel 		goto done;
   2055      0     stevel 	}
   2056      0     stevel 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
   2057      0     stevel 	ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT ||
   2058      0     stevel 	    icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT);
   2059      0     stevel 	if (icmp_nd->icmp6_code != 0) {
   2060      0     stevel 		ip1dbg(("ndp_input: icmp6 code != 0 \n"));
   2061  11042       Erik 		ip_drop_input("code non-zero", mp, ill);
   2062      0     stevel 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
   2063      0     stevel 		goto done;
   2064      0     stevel 	}
   2065      0     stevel 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
   2066      0     stevel 	/*
   2067      0     stevel 	 * Make sure packet length is large enough for either
   2068      0     stevel 	 * a NS or a NA icmp packet.
   2069      0     stevel 	 */
   2070      0     stevel 	if (len <  sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) {
   2071      0     stevel 		ip1dbg(("ndp_input: packet too short\n"));
   2072  11042       Erik 		ip_drop_input("packet too short", mp, ill);
   2073      0     stevel 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
   2074      0     stevel 		goto done;
   2075      0     stevel 	}
   2076      0     stevel 	if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) {
   2077  11042       Erik 		ndp_input_solicit(mp, ira);
   2078  11042       Erik 	} else {
   2079  11042       Erik 		ndp_input_advert(mp, ira);
   2080      0     stevel 	}
   2081      0     stevel done:
   2082      0     stevel 	freemsg(mp);
   2083  11042       Erik 	if (orig_ill != NULL) {
   2084  11042       Erik 		ill_refrele(ill);
   2085  11042       Erik 		ira->ira_ill = orig_ill;
   2086  11042       Erik 	}
   2087  11042       Erik }
   2088  11042       Erik 
   2089  11042       Erik /*
   2090  11042       Erik  * ndp_xmit is called to form and transmit a ND solicitation or
   2091      0     stevel  * advertisement ICMP packet.
   2092   2546   carlsonj  *
   2093   2546   carlsonj  * If the source address is unspecified and this isn't a probe (used for
   2094   2546   carlsonj  * duplicate address detection), an appropriate source address and link layer
   2095   2546   carlsonj  * address will be chosen here.  The link layer address option is included if
   2096   2546   carlsonj  * the source is specified (i.e., all non-probe packets), and omitted (per the
   2097   2546   carlsonj  * specification) otherwise.
   2098   2546   carlsonj  *
   2099      0     stevel  * It returns B_FALSE only if it does a successful put() to the
   2100      0     stevel  * corresponding ill's ill_wq otherwise returns B_TRUE.
   2101      0     stevel  */
   2102      0     stevel static boolean_t
   2103  11042       Erik ndp_xmit(ill_t *ill, uint32_t operation, uint8_t *hw_addr, uint_t hw_addr_len,
   2104   8485      Peter     const in6_addr_t *sender, const in6_addr_t *target, int flag)
   2105      0     stevel {
   2106      0     stevel 	uint32_t	len;
   2107      0     stevel 	icmp6_t 	*icmp6;
   2108      0     stevel 	mblk_t		*mp;
   2109      0     stevel 	ip6_t		*ip6h;
   2110      0     stevel 	nd_opt_hdr_t	*opt;
   2111  11042       Erik 	uint_t		plen;
   2112   3909    ja97890 	zoneid_t	zoneid = GLOBAL_ZONEID;
   2113  11042       Erik 	ill_t		*hwaddr_ill = ill;
   2114  11042       Erik 	ip_xmit_attr_t	ixas;
   2115  11042       Erik 	ip_stack_t	*ipst = ill->ill_ipst;
   2116  11042       Erik 	boolean_t	need_refrele = B_FALSE;
   2117  11042       Erik 	boolean_t	probe = B_FALSE;
   2118  11042       Erik 
   2119  11042       Erik 	if (IS_UNDER_IPMP(ill)) {
   2120  11042       Erik 		probe = ipif_lookup_testaddr_v6(ill, sender, NULL);
   2121  11042       Erik 		/*
   2122  11042       Erik 		 * We send non-probe packets on the upper IPMP interface.
   2123  11042       Erik 		 * ip_output_simple() will use cast_ill for sending any
   2124  11042       Erik 		 * multicast packets. Note that we can't follow the same
   2125  11042       Erik 		 * logic for probe packets because all interfaces in the ipmp
   2126  11042       Erik 		 * group may have failed, so that we really want to only try
   2127  11042       Erik 		 * to send the ND packet on the ill corresponding to the src
   2128  11042       Erik 		 * address.
   2129  11042       Erik 		 */
   2130  11042       Erik 		if (!probe) {
   2131  11042       Erik 			ill = ipmp_ill_hold_ipmp_ill(ill);
   2132  11042       Erik 			if (ill != NULL)
   2133  11042       Erik 				need_refrele = B_TRUE;
   2134  11042       Erik 			else
   2135  11042       Erik 				ill = hwaddr_ill;
   2136  11042       Erik 		}
   2137  11042       Erik 	}
   2138  11042       Erik 
   2139  11042       Erik 	/*
   2140  11042       Erik 	 * If we have a unspecified source(sender) address, select a
   2141  11042       Erik 	 * proper source address for the solicitation here itself so
   2142  11042       Erik 	 * that we can initialize the h/w address correctly.
   2143  11042       Erik 	 *
   2144  11042       Erik 	 * If the sender is specified then we use this address in order
   2145  11042       Erik 	 * to lookup the zoneid before calling ip_output_v6(). This is to
   2146  11042       Erik 	 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly
   2147  11042       Erik 	 * by IP (we cannot guarantee that the global zone has an interface
   2148  11042       Erik 	 * route to the destination).
   2149  11042       Erik 	 *
   2150  11042       Erik 	 * Note that the NA never comes here with the unspecified source
   2151  11042       Erik 	 * address.
   2152  11042       Erik 	 */
   2153  11042       Erik 
   2154  11042       Erik 	/*
   2155  11042       Erik 	 * Probes will have unspec src at this point.
   2156  11042       Erik 	 */
   2157  11042       Erik 	if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) {
   2158  11042       Erik 		zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ipst);
   2159  11042       Erik 		/*
   2160  11042       Erik 		 * It's possible for ipif_lookup_addr_zoneid_v6() to return
   2161  11042       Erik 		 * ALL_ZONES if it cannot find a matching ipif for the address
   2162  11042       Erik 		 * we are trying to use. In this case we err on the side of
   2163  11042       Erik 		 * trying to send the packet by defaulting to the GLOBAL_ZONEID.
   2164  11042       Erik 		 */
   2165  11042       Erik 		if (zoneid == ALL_ZONES)
   2166  11042       Erik 			zoneid = GLOBAL_ZONEID;
   2167  11042       Erik 	}
   2168  11042       Erik 
   2169  11042       Erik 	plen = (sizeof (nd_opt_hdr_t) + hw_addr_len + 7) / 8;
   2170  11042       Erik 	len = IPV6_HDR_LEN + sizeof (nd_neighbor_advert_t) + plen * 8;
   2171      0     stevel 	mp = allocb(len,  BPRI_LO);
   2172      0     stevel 	if (mp == NULL) {
   2173  11042       Erik 		if (need_refrele)
   2174  11042       Erik 			ill_refrele(ill);
   2175      0     stevel 		return (B_TRUE);
   2176      0     stevel 	}
   2177  11042       Erik 
   2178      0     stevel 	bzero((char *)mp->b_rptr, len);
   2179      0     stevel 	mp->b_wptr = mp->b_rptr + len;
   2180      0     stevel 
   2181  11042       Erik 	bzero(&ixas, sizeof (ixas));
   2182  11042       Erik 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6 | IXAF_NO_HW_CKSUM;
   2183  11042       Erik 
   2184  11042       Erik 	ixas.ixa_ifindex = ill->ill_phyint->phyint_ifindex;
   2185  11042       Erik 	ixas.ixa_ipst = ipst;
   2186  11042       Erik 	ixas.ixa_cred = kcred;
   2187  11042       Erik 	ixas.ixa_cpid = NOPID;
   2188  11042       Erik 	ixas.ixa_tsl = NULL;
   2189  11042       Erik 	ixas.ixa_zoneid = zoneid;
   2190  11042       Erik 
   2191  11042       Erik 	ip6h = (ip6_t *)mp->b_rptr;
   2192      0     stevel 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
   2193  11042       Erik 	ip6h->ip6_plen = htons(len - IPV6_HDR_LEN);
   2194      0     stevel 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
   2195      0     stevel 	ip6h->ip6_hops = IPV6_MAX_HOPS;
   2196  11042       Erik 	ixas.ixa_multicast_ttl = ip6h->ip6_hops;
   2197      0     stevel 	ip6h->ip6_dst = *target;
   2198      0     stevel 	icmp6 = (icmp6_t *)&ip6h[1];
   2199      0     stevel 
   2200  11042       Erik 	if (hw_addr_len != 0) {
   2201  11042       Erik 		opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN +
   2202  11042       Erik 		    sizeof (nd_neighbor_advert_t));
   2203  11042       Erik 	} else {
   2204  11042       Erik 		opt = NULL;
   2205  11042       Erik 	}
   2206  11042       Erik 	if (operation == ND_NEIGHBOR_SOLICIT) {
   2207      0     stevel 		nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
   2208      0     stevel 
   2209  11042       Erik 		if (opt != NULL && !(flag & NDP_PROBE)) {
   2210  11042       Erik 			/*
   2211  11042       Erik 			 * Note that we don't send out SLLA for ND probes
   2212  11042       Erik 			 * per RFC 4862, even though we do send out the src
   2213  11042       Erik 			 * haddr for IPv4 DAD probes, even though both IPv4
   2214  11042       Erik 			 * and IPv6 go out with the unspecified/INADDR_ANY
   2215  11042       Erik 			 * src IP addr.
   2216  11042       Erik 			 */
   2217   2546   carlsonj 			opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
   2218  11042       Erik 		}
   2219  11042       Erik 		ip6h->ip6_src = *sender;
   2220      0     stevel 		ns->nd_ns_target = *target;
   2221      0     stevel 		if (!(flag & NDP_UNICAST)) {
   2222      0     stevel 			/* Form multicast address of the target */
   2223      0     stevel 			ip6h->ip6_dst = ipv6_solicited_node_mcast;
   2224      0     stevel 			ip6h->ip6_dst.s6_addr32[3] |=
   2225      0     stevel 			    ns->nd_ns_target.s6_addr32[3];
   2226      0     stevel 		}
   2227      0     stevel 	} else {
   2228      0     stevel 		nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
   2229      0     stevel 
   2230   2546   carlsonj 		ASSERT(!(flag & NDP_PROBE));
   2231  11042       Erik 		if (opt != NULL)
   2232  11042       Erik 			opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
   2233  11042       Erik 		ip6h->ip6_src = *sender;
   2234      0     stevel 		na->nd_na_target = *sender;
   2235      0     stevel 		if (flag & NDP_ISROUTER)
   2236      0     stevel 			na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER;
   2237      0     stevel 		if (flag & NDP_SOLICITED)
   2238      0     stevel 			na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED;
   2239      0     stevel 		if (flag & NDP_ORIDE)
   2240      0     stevel 			na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE;
   2241      0     stevel 	}
   2242   2546   carlsonj 
   2243   2546   carlsonj 	if (!(flag & NDP_PROBE)) {
   2244  11042       Erik 		if (hw_addr != NULL && opt != NULL) {
   2245   2598   carlsonj 			/* Fill in link layer address and option len */
   2246  11042       Erik 			opt->nd_opt_len = (uint8_t)plen;
   2247  11042       Erik 			bcopy(hw_addr, &opt[1], hw_addr_len);
   2248  11042       Erik 		}
   2249  11042       Erik 	}
   2250  11042       Erik 	if (opt != NULL && opt->nd_opt_type == 0) {
   2251  11042       Erik 		/* If there's no link layer address option, then strip it. */
   2252  11042       Erik 		len -= plen * 8;
   2253  11042       Erik 		mp->b_wptr = mp->b_rptr + len;
   2254  11042       Erik 		ip6h->ip6_plen = htons(len - IPV6_HDR_LEN);
   2255  11042       Erik 	}
   2256  11042       Erik 
   2257  11042       Erik 	icmp6->icmp6_type = (uint8_t)operation;
   2258      0     stevel 	icmp6->icmp6_code = 0;
   2259      0     stevel 	/*
   2260      0     stevel 	 * Prepare for checksum by putting icmp length in the icmp
   2261  11042       Erik 	 * checksum field. The checksum is calculated in ip_output.c.
   2262      0     stevel 	 */
   2263      0     stevel 	icmp6->icmp6_cksum = ip6h->ip6_plen;
   2264      0     stevel 
   2265  11042       Erik 	(void) ip_output_simple(mp, &ixas);
   2266  11042       Erik 	ixa_cleanup(&ixas);
   2267  11042       Erik 	if (need_refrele)
   2268  11042       Erik 		ill_refrele(ill);
   2269   3909    ja97890 	return (B_FALSE);
   2270      0     stevel }
   2271      0     stevel 
   2272      0     stevel /*
   2273  11042       Erik  * Used to set ND_UNREACHBLE before ncec_delete sets it NCE_F_CONDEMNED.
   2274  11042       Erik  * The datapath uses this as an indication that there
   2275  11042       Erik  * is a problem (as opposed to a NCE that was just
   2276  11042       Erik  * reclaimed due to lack of memory.
   2277  11042       Erik  * Note that static ARP entries never become unreachable.
   2278  11042       Erik  */
   2279  11042       Erik void
   2280  11042       Erik nce_make_unreachable(ncec_t *ncec)
   2281  11042       Erik {
   2282  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   2283  11042       Erik 	ncec->ncec_state = ND_UNREACHABLE;
   2284  11042       Erik 	mutex_exit(&ncec->ncec_lock);
   2285  11042       Erik }
   2286  11042       Erik 
   2287  11042       Erik /*
   2288  11042       Erik  * NCE retransmit timer. Common to IPv4 and IPv6.
   2289      0     stevel  * This timer goes off when:
   2290  11042       Erik  * a. It is time to retransmit a resolution for resolver.
   2291      0     stevel  * b. It is time to send reachability probes.
   2292      0     stevel  */
   2293      0     stevel void
   2294  11042       Erik nce_timer(void *arg)
   2295  11042       Erik {
   2296  11042       Erik 	ncec_t		*ncec = arg;
   2297  11042       Erik 	ill_t		*ill = ncec->ncec_ill, *src_ill;
   2298      0     stevel 	char		addrbuf[INET6_ADDRSTRLEN];
   2299      0     stevel 	boolean_t	dropped = B_FALSE;
   2300  11042       Erik 	ip_stack_t	*ipst = ncec->ncec_ipst;
   2301  11042       Erik 	boolean_t	isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
   2302  11042       Erik 	in_addr_t	sender4 = INADDR_ANY;
   2303  11042       Erik 	in6_addr_t	sender6 = ipv6_all_zeros;
   2304  11042       Erik 
   2305  11042       Erik 	/*
   2306  11042       Erik 	 * The timer has to be cancelled by ncec_delete before doing the final
   2307      0     stevel 	 * refrele. So the NCE is guaranteed to exist when the timer runs
   2308      0     stevel 	 * until it clears the timeout_id. Before clearing the timeout_id
   2309  11042       Erik 	 * bump up the refcnt so that we can continue to use the ncec
   2310  11042       Erik 	 */
   2311  11042       Erik 	ASSERT(ncec != NULL);
   2312  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   2313  11042       Erik 	ncec_refhold_locked(ncec);
   2314  11042       Erik 	ncec->ncec_timeout_id = 0;
   2315  11042       Erik 	mutex_exit(&ncec->ncec_lock);
   2316  11042       Erik 
   2317  11042       Erik 	src_ill = nce_resolve_src(ncec, &sender6);
   2318  11042       Erik 	/* if we could not find a sender address, return */
   2319  11042       Erik 	if (src_ill == NULL) {
   2320  11042       Erik 		if (!isv6) {
   2321  11042       Erik 			IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, sender4);
   2322  11042       Erik 			ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET,
   2323  11042       Erik 			    &sender4, addrbuf, sizeof (addrbuf))));
   2324  11042       Erik 		} else {
   2325  11042       Erik 			ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET6,
   2326  11042       Erik 			    &ncec->ncec_addr, addrbuf, sizeof (addrbuf))));
   2327  11042       Erik 		}
   2328  11042       Erik 		nce_restart_timer(ncec, ill->ill_reachable_retrans_time);
   2329  11042       Erik 		ncec_refrele(ncec);
   2330  11042       Erik 		return;
   2331  11042       Erik 	}
   2332  11042       Erik 	if (!isv6)
   2333  11042       Erik 		IN6_V4MAPPED_TO_IPADDR(&sender6, sender4);
   2334  11042       Erik 
   2335  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   2336  11042       Erik 	/*
   2337  11042       Erik 	 * Check the reachability state.
   2338  11042       Erik 	 */
   2339  11042       Erik 	switch (ncec->ncec_state) {
   2340      0     stevel 	case ND_DELAY:
   2341  11042       Erik 		ASSERT(ncec->ncec_lladdr != NULL);
   2342  11042       Erik 		ncec->ncec_state = ND_PROBE;
   2343  11042       Erik 		ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT;
   2344  11042       Erik 		if (isv6) {
   2345  11042       Erik 			mutex_exit(&ncec->ncec_lock);
   2346  11077       Erik 			dropped = ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT,
   2347  11042       Erik 			    src_ill->ill_phys_addr,
   2348  11042       Erik 			    src_ill->ill_phys_addr_length,
   2349  11042       Erik 			    &sender6, &ncec->ncec_addr,
   2350  11042       Erik 			    NDP_UNICAST);
   2351  11042       Erik 		} else {
   2352  11077       Erik 			dropped = arp_request(ncec, sender4, src_ill);
   2353  11077       Erik 			mutex_exit(&ncec->ncec_lock);
   2354  11077       Erik 		}
   2355  11077       Erik 		if (!dropped) {
   2356  11077       Erik 			mutex_enter(&ncec->ncec_lock);
   2357  11077       Erik 			ncec->ncec_pcnt--;
   2358  11042       Erik 			mutex_exit(&ncec->ncec_lock);
   2359  11042       Erik 		}
   2360      0     stevel 		if (ip_debug > 3) {
   2361      0     stevel 			/* ip2dbg */
   2362  11042       Erik 			pr_addr_dbg("nce_timer: state for %s changed "
   2363  11042       Erik 			    "to PROBE\n", AF_INET6, &ncec->ncec_addr);
   2364  11042       Erik 		}
   2365  11042       Erik 		nce_restart_timer(ncec, ill->ill_reachable_retrans_time);
   2366  11042       Erik 		break;
   2367      0     stevel 	case ND_PROBE:
   2368      0     stevel 		/* must be retransmit timer */
   2369  11042       Erik 		ASSERT(ncec->ncec_pcnt >= -1);
   2370  11042       Erik 		if (ncec->ncec_pcnt > 0) {
   2371  11042       Erik 			/*
   2372  11042       Erik 			 * As per RFC2461, the ncec gets deleted after
   2373      0     stevel 			 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
   2374      0     stevel 			 * Note that the first unicast solicitation is sent
   2375      0     stevel 			 * during the DELAY state.
   2376      0     stevel 			 */
   2377  11042       Erik 			ip2dbg(("nce_timer: pcount=%x dst %s\n",
   2378  11042       Erik 			    ncec->ncec_pcnt,
   2379  11042       Erik 			    inet_ntop((isv6? AF_INET6 : AF_INET),
   2380  11042       Erik 			    &ncec->ncec_addr, addrbuf, sizeof (addrbuf))));
   2381  11042       Erik 			if (NCE_PUBLISH(ncec)) {
   2382  11042       Erik 				mutex_exit(&ncec->ncec_lock);
   2383  11042       Erik 				/*
   2384  11042       Erik 				 * send out a probe; note that src_ill
   2385  11042       Erik 				 * is ignored by nce_dad() for all
   2386  11042       Erik 				 * DAD message types other than IPv6
   2387  11042       Erik 				 * unicast probes
   2388  11042       Erik 				 */
   2389  11042       Erik 				nce_dad(ncec, src_ill, B_TRUE);
   2390  11042       Erik 			} else {
   2391  11042       Erik 				ASSERT(src_ill != NULL);
   2392  11042       Erik 				if (isv6) {
   2393  11042       Erik 					mutex_exit(&ncec->ncec_lock);
   2394  11077       Erik 					dropped = ndp_xmit(src_ill,
   2395  11042       Erik 					    ND_NEIGHBOR_SOLICIT,
   2396  11042       Erik 					    src_ill->ill_phys_addr,
   2397  11042       Erik 					    src_ill->ill_phys_addr_length,
   2398  11042       Erik 					    &sender6, &ncec->ncec_addr,
   2399  11042       Erik 					    NDP_UNICAST);
   2400  11042       Erik 				} else {
   2401  11042       Erik 					/*
   2402  11042       Erik 					 * since the nce is REACHABLE,
   2403  11042       Erik 					 * the ARP request will be sent out
   2404  11042       Erik 					 * as a link-layer unicast.
   2405  11042       Erik 					 */
   2406  11077       Erik 					dropped = arp_request(ncec, sender4,
   2407  11042       Erik 					    src_ill);
   2408  11077       Erik 					mutex_exit(&ncec->ncec_lock);
   2409  11077       Erik 				}
   2410  11077       Erik 				if (!dropped) {
   2411  11077       Erik 					mutex_enter(&ncec->ncec_lock);
   2412  11077       Erik 					ncec->ncec_pcnt--;
   2413  11042       Erik 					mutex_exit(&ncec->ncec_lock);
   2414  11042       Erik 				}
   2415  11042       Erik 				nce_restart_timer(ncec,
   2416  11042       Erik 				    ill->ill_reachable_retrans_time);
   2417  11042       Erik 			}
   2418  11042       Erik 		} else if (ncec->ncec_pcnt < 0) {
   2419  11042       Erik 			/* No hope, delete the ncec */
   2420  11042       Erik 			/* Tell datapath it went bad */
   2421  11042       Erik 			ncec->ncec_state = ND_UNREACHABLE;
   2422  11042       Erik 			mutex_exit(&ncec->ncec_lock);
   2423   2546   carlsonj 			if (ip_debug > 2) {
   2424   2546   carlsonj 				/* ip1dbg */
   2425  11042       Erik 				pr_addr_dbg("nce_timer: Delete NCE for"
   2426  11042       Erik 				    " dst %s\n", (isv6? AF_INET6: AF_INET),
   2427  11042       Erik 				    &ncec->ncec_addr);
   2428  11042       Erik 			}
   2429  11042       Erik 			/* if static ARP can't delete. */
   2430  11042       Erik 			if ((ncec->ncec_flags & NCE_F_STATIC) == 0)
   2431  11042       Erik 				ncec_delete(ncec);
   2432  11042       Erik 
   2433  11042       Erik 		} else if (!NCE_PUBLISH(ncec)) {
   2434  11042       Erik 			/*
   2435  11042       Erik 			 * Probe count is 0 for a dynamic entry (one that we
   2436  11042       Erik 			 * ourselves are not publishing). We should never get
   2437  11042       Erik 			 * here if NONUD was requested, hence the ASSERT below.
   2438  11042       Erik 			 */
   2439  11042       Erik 			ASSERT((ncec->ncec_flags & NCE_F_NONUD) == 0);
   2440  11042       Erik 			ip2dbg(("nce_timer: pcount=%x dst %s\n",
   2441  11042       Erik 			    ncec->ncec_pcnt, inet_ntop(AF_INET6,
   2442  11042       Erik 			    &ncec->ncec_addr, addrbuf, sizeof (addrbuf))));
   2443  11042       Erik 			ncec->ncec_pcnt--;
   2444  11042       Erik 			mutex_exit(&ncec->ncec_lock);
   2445   2546   carlsonj 			/* Wait one interval before killing */
   2446  11042       Erik 			nce_restart_timer(ncec,
   2447  11042       Erik 			    ill->ill_reachable_retrans_time);
   2448   2546   carlsonj 		} else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) {
   2449   2546   carlsonj 			ipif_t *ipif;
   2450  11042       Erik 			ipaddr_t ncec_addr;
   2451   2546   carlsonj 
   2452   2546   carlsonj 			/*
   2453   2546   carlsonj 			 * We're done probing, and we can now declare this
   2454   2546   carlsonj 			 * address to be usable.  Let IP know that it's ok to
   2455   2546   carlsonj 			 * use.
   2456   2546   carlsonj 			 */
   2457  11042       Erik 			ncec->ncec_state = ND_REACHABLE;
   2458  11042       Erik 			ncec->ncec_flags &= ~NCE_F_UNVERIFIED;
   2459  11042       Erik 			mutex_exit(&ncec->ncec_lock);
   2460  11042       Erik 			if (isv6) {
   2461  11042       Erik 				ipif = ipif_lookup_addr_exact_v6(
   2462  11042       Erik 				    &ncec->ncec_addr, ill, ipst);
   2463  11042       Erik 			} else {
   2464  11042       Erik 				IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr,
   2465  11042       Erik 				    ncec_addr);
   2466  11042       Erik 				ipif = ipif_lookup_addr_exact(ncec_addr, ill,
   2467  11042       Erik 				    ipst);
   2468  11042       Erik 			}
   2469   2546   carlsonj 			if (ipif != NULL) {
   2470   2546   carlsonj 				if (ipif->ipif_was_dup) {
   2471   2546   carlsonj 					char ibuf[LIFNAMSIZ + 10];
   2472   2546   carlsonj 					char sbuf[INET6_ADDRSTRLEN];
   2473   2546   carlsonj 
   2474   2546   carlsonj 					ipif->ipif_was_dup = B_FALSE;
   2475   2546   carlsonj 					(void) inet_ntop(AF_INET6,
   2476   2546   carlsonj 					    &ipif->ipif_v6lcl_addr,
   2477   2546   carlsonj 					    sbuf, sizeof (sbuf));
   2478   4972       meem 					ipif_get_name(ipif, ibuf,
   2479   4972       meem 					    sizeof (ibuf));
   2480   2546   carlsonj 					cmn_err(CE_NOTE, "recovered address "
   2481   2546   carlsonj 					    "%s on %s", sbuf, ibuf);
   2482      0     stevel 				}
   2483   2546   carlsonj 				if ((ipif->ipif_flags & IPIF_UP) &&
   2484   8023       Phil 				    !ipif->ipif_addr_ready)
   2485   8023       Phil 					ipif_up_notify(ipif);
   2486   2546   carlsonj 				ipif->ipif_addr_ready = 1;
   2487   2546   carlsonj 				ipif_refrele(ipif);
   2488   2546   carlsonj 			}
   2489  11042       Erik 			if (!isv6 && arp_no_defense)
   2490  11042       Erik 				break;
   2491   2546   carlsonj 			/* Begin defending our new address */
   2492  11042       Erik 			if (ncec->ncec_unsolicit_count > 0) {
   2493  11042       Erik 				ncec->ncec_unsolicit_count--;
   2494  11042       Erik 				if (isv6) {
   2495  11042       Erik 					dropped = ndp_announce(ncec);
   2496  11042       Erik 				} else {
   2497  11042       Erik 					dropped = arp_announce(ncec);
   2498  11042       Erik 				}
   2499  11042       Erik 
   2500  11042       Erik 				if (dropped)
   2501  11042       Erik 					ncec->ncec_unsolicit_count++;
   2502  11042       Erik 				else
   2503  11042       Erik 					ncec->ncec_last_time_defended =
   2504  11042       Erik 					    ddi_get_lbolt();
   2505  11042       Erik 			}
   2506  11042       Erik 			if (ncec->ncec_unsolicit_count > 0) {
   2507  11042       Erik 				nce_restart_timer(ncec,
   2508  11042       Erik 				    ANNOUNCE_INTERVAL(isv6));
   2509  11042       Erik 			} else if (DEFENSE_INTERVAL(isv6) != 0) {
   2510  11042       Erik 				nce_restart_timer(ncec, DEFENSE_INTERVAL(isv6));
   2511      0     stevel 			}
   2512   2546   carlsonj 		} else {
   2513   2546   carlsonj 			/*
   2514   2546   carlsonj 			 * This is an address we're probing to be our own, but
   2515   2546   carlsonj 			 * the ill is down.  Wait until it comes back before
   2516   2546   carlsonj 			 * doing anything, but switch to reachable state so
   2517   2546   carlsonj 			 * that the restart will work.
   2518   2546   carlsonj 			 */
   2519  11042       Erik 			ncec->ncec_state = ND_REACHABLE;
   2520  11042       Erik 			mutex_exit(&ncec->ncec_lock);
   2521  11042       Erik 		}
   2522  11042       Erik 		break;
   2523   8485      Peter 	case ND_INCOMPLETE: {
   2524  11042       Erik 		mblk_t	*mp, *nextmp;
   2525  11042       Erik 		mblk_t	**prevmpp;
   2526  11042       Erik 
   2527  11042       Erik 		/*
   2528  11042       Erik 		 * Per case (2) in the nce_queue_mp() comments, scan ncec_qd_mp
   2529  11042       Erik 		 * for any IPMP probe packets, and toss them.  IPMP probe
   2530  11042       Erik 		 * packets will always be at the head of ncec_qd_mp, so that
   2531  11042       Erik 		 * we can stop at the first queued ND packet that is
   2532  11042       Erik 		 * not a probe packet.
   2533  11042       Erik 		 */
   2534  11042       Erik 		prevmpp = &ncec->ncec_qd_mp;
   2535  11042       Erik 		for (mp = ncec->ncec_qd_mp; mp != NULL; mp = nextmp) {
   2536   8485      Peter 			nextmp = mp->b_next;
   2537  11042       Erik 
   2538  11042       Erik 			if (IS_UNDER_IPMP(ill) && ncec->ncec_nprobes > 0) {
   2539   8485      Peter 				inet_freemsg(mp);
   2540  11042       Erik 				ncec->ncec_nprobes--;
   2541   8485      Peter 				*prevmpp = nextmp;
   2542   8485      Peter 			} else {
   2543   8485      Peter 				prevmpp = &mp->b_next;
   2544   8485      Peter 			}
   2545   8485      Peter 		}
   2546  11042       Erik 
   2547  11042       Erik 		/*
   2548  11042       Erik 		 * Must be resolver's retransmit timer.
   2549  11042       Erik 		 */
   2550  11042       Erik 		mutex_exit(&ncec->ncec_lock);
   2551  11042       Erik 		ip_ndp_resolve(ncec);
   2552      0     stevel 		break;
   2553   8485      Peter 	}
   2554   8485      Peter 	case ND_REACHABLE:
   2555  11042       Erik 		if (((ncec->ncec_flags & NCE_F_UNSOL_ADV) &&
   2556  11042       Erik 		    ncec->ncec_unsolicit_count != 0) ||
   2557  11042       Erik 		    (NCE_PUBLISH(ncec) && DEFENSE_INTERVAL(isv6) != 0)) {
   2558  11042       Erik 			if (ncec->ncec_unsolicit_count > 0) {
   2559  11042       Erik 				ncec->ncec_unsolicit_count--;
   2560  11042       Erik 				mutex_exit(&ncec->ncec_lock);
   2561  11042       Erik 				/*
   2562  11042       Erik 				 * When we get to zero announcements left,
   2563  11042       Erik 				 * switch to address defense
   2564  11042       Erik 				 */
   2565  11042       Erik 			} else {
   2566  11042       Erik 				boolean_t rate_limit;
   2567  11042       Erik 
   2568  11042       Erik 				mutex_exit(&ncec->ncec_lock);
   2569  11042       Erik 				rate_limit = ill_defend_rate_limit(ill, ncec);
   2570  11042       Erik 				if (rate_limit) {
   2571  11042       Erik 					nce_restart_timer(ncec,
   2572  11042       Erik 					    DEFENSE_INTERVAL(isv6));
   2573  11042       Erik 					break;
   2574  11042       Erik 				}
   2575  11042       Erik 			}
   2576  11042       Erik 			if (isv6) {
   2577  11042       Erik 				dropped = ndp_announce(ncec);
   2578  11042       Erik 			} else {
   2579  11042       Erik 				dropped = arp_announce(ncec);
   2580  11042       Erik 			}
   2581  11042       Erik 			mutex_enter(&ncec->ncec_lock);
   2582      0     stevel 			if (dropped) {
   2583  11042       Erik 				ncec->ncec_unsolicit_count++;
   2584  11042       Erik 			} else {
   2585  11042       Erik 				ncec->ncec_last_time_defended =
   2586  11042       Erik 				    ddi_get_lbolt();
   2587  11042       Erik 			}
   2588  11042       Erik 			mutex_exit(&ncec->ncec_lock);
   2589  11042       Erik 			if (ncec->ncec_unsolicit_count != 0) {
   2590  11042       Erik 				nce_restart_timer(ncec,
   2591  11042       Erik 				    ANNOUNCE_INTERVAL(isv6));
   2592  11042       Erik 			} else {
   2593  11042       Erik 				nce_restart_timer(ncec, DEFENSE_INTERVAL(isv6));
   2594  11042       Erik 			}
   2595  11042       Erik 		} else {
   2596  11042       Erik 			mutex_exit(&ncec->ncec_lock);
   2597  11042       Erik 		}
   2598      0     stevel 		break;
   2599      0     stevel 	default:
   2600  11042       Erik 		mutex_exit(&ncec->ncec_lock);
   2601  11042       Erik 		break;
   2602  11042       Erik 	}
   2603  11042       Erik done:
   2604  11042       Erik 	ncec_refrele(ncec);
   2605  11042       Erik 	ill_refrele(src_ill);
   2606      0     stevel }
   2607      0     stevel 
   2608      0     stevel /*
   2609      0     stevel  * Set a link layer address from the ll_addr passed in.
   2610      0     stevel  * Copy SAP from ill.
   2611      0     stevel  */
   2612      0     stevel static void
   2613  11042       Erik nce_set_ll(ncec_t *ncec, uchar_t *ll_addr)
   2614  11042       Erik {
   2615  11042       Erik 	ill_t	*ill = ncec->ncec_ill;
   2616      0     stevel 
   2617      0     stevel 	ASSERT(ll_addr != NULL);
   2618      0     stevel 	if (ill->ill_phys_addr_length > 0) {
   2619      0     stevel 		/*
   2620      0     stevel 		 * The bcopy() below used to be called for the physical address
   2621      0     stevel 		 * length rather than the link layer address length. For
   2622      0     stevel 		 * ethernet and many other media, the phys_addr and lla are
   2623      0     stevel 		 * identical.
   2624  11042       Erik 		 *
   2625  11042       Erik 		 * The phys_addr and lla may not be the same for devices that
   2626  11042       Erik 		 * support DL_IPV6_LINK_LAYER_ADDR, though there are currently
   2627  11042       Erik 		 * no known instances of these.
   2628      0     stevel 		 *
   2629      0     stevel 		 * For PPP or other interfaces with a zero length
   2630      0     stevel 		 * physical address, don't do anything here.
   2631      0     stevel 		 * The bcopy() with a zero phys_addr length was previously
   2632      0     stevel 		 * a no-op for interfaces with a zero-length physical address.
   2633      0     stevel 		 * Using the lla for them would change the way they operate.
   2634      0     stevel 		 * Doing nothing in such cases preserves expected behavior.
   2635      0     stevel 		 */
   2636  11042       Erik 		bcopy(ll_addr, ncec->ncec_lladdr, ill->ill_nd_lla_len);
   2637  11042       Erik 	}
   2638  11042       Erik }
   2639  11042       Erik 
   2640  11042       Erik boolean_t
   2641  11042       Erik nce_cmp_ll_addr(const ncec_t *ncec, const uchar_t *ll_addr,
   2642  11042       Erik     uint32_t ll_addr_len)
   2643  11042       Erik {
   2644  11042       Erik 	ASSERT(ncec->ncec_lladdr != NULL);
   2645      0     stevel 	if (ll_addr == NULL)
   2646      0     stevel 		return (B_FALSE);
   2647  11042       Erik 	if (bcmp(ll_addr, ncec->ncec_lladdr, ll_addr_len) != 0)
   2648      0     stevel 		return (B_TRUE);
   2649      0     stevel 	return (B_FALSE);
   2650      0     stevel }
   2651      0     stevel 
   2652      0     stevel /*
   2653      0     stevel  * Updates the link layer address or the reachability state of
   2654      0     stevel  * a cache entry.  Reset probe counter if needed.
   2655      0     stevel  */
   2656  11042       Erik void
   2657  11042       Erik nce_update(ncec_t *ncec, uint16_t new_state, uchar_t *new_ll_addr)
   2658  11042       Erik {
   2659  11042       Erik 	ill_t	*ill = ncec->ncec_ill;
   2660      0     stevel 	boolean_t need_stop_timer = B_FALSE;
   2661      0     stevel 	boolean_t need_fastpath_update = B_FALSE;
   2662  11042       Erik 	nce_t	*nce = NULL;
   2663  11042       Erik 	timeout_id_t tid;
   2664  11042       Erik 
   2665  11042       Erik 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
   2666      0     stevel 	/*
   2667      0     stevel 	 * If this interface does not do NUD, there is no point
   2668      0     stevel 	 * in allowing an update to the cache entry.  Although
   2669      0     stevel 	 * we will respond to NS.
   2670      0     stevel 	 * The only time we accept an update for a resolver when
   2671      0     stevel 	 * NUD is turned off is when it has just been created.
   2672      0     stevel 	 * Non-Resolvers will always be created as REACHABLE.
   2673      0     stevel 	 */
   2674      0     stevel 	if (new_state != ND_UNCHANGED) {
   2675  11042       Erik 		if ((ncec->ncec_flags & NCE_F_NONUD) &&
   2676  11042       Erik 		    (ncec->ncec_state != ND_INCOMPLETE))
   2677      0     stevel 			return;
   2678      0     stevel 		ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN);
   2679      0     stevel 		ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX);
   2680      0     stevel 		need_stop_timer = B_TRUE;
   2681      0     stevel 		if (new_state == ND_REACHABLE)
   2682  11066     rafael 			ncec->ncec_last = TICK_TO_MSEC(ddi_get_lbolt64());
   2683      0     stevel 		else {
   2684      0     stevel 			/* We force NUD in this case */
   2685  11042       Erik 			ncec->ncec_last = 0;
   2686  11042       Erik 		}
   2687  11042       Erik 		ncec->ncec_state = new_state;
   2688  11042       Erik 		ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT;
   2689  11042       Erik 		ASSERT(ncec->ncec_lladdr != NULL || new_state == ND_INITIAL ||
   2690  11042       Erik 		    new_state == ND_INCOMPLETE);
   2691  11042       Erik 	}
   2692  11042       Erik 	if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) {
   2693  11042       Erik 		tid = ncec->ncec_timeout_id;
   2694  11042       Erik 		ncec->ncec_timeout_id = 0;
   2695  11042       Erik 	}
   2696  11042       Erik 	/*
   2697  11042       Erik 	 * Re-trigger fastpath probe and
   2698      0     stevel 	 * overwrite the DL_UNITDATA_REQ data, noting we'll lose
   2699      0     stevel 	 * whatever packets that happens to be transmitting at the time.
   2700      0     stevel 	 */
   2701      0     stevel 	if (new_ll_addr != NULL) {
   2702  11042       Erik 		bcopy(new_ll_addr, ncec->ncec_lladdr,
   2703  11042       Erik 		    ill->ill_phys_addr_length);
   2704    741   masputra 		need_fastpath_update = B_TRUE;
   2705      0     stevel 	}
   2706  11042       Erik 	mutex_exit(&ncec->ncec_lock);
   2707  11042       Erik 	if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) {
   2708  11042       Erik 		if (tid != 0)
   2709  11042       Erik 			(void) untimeout(tid);
   2710  11042       Erik 	}
   2711  11042       Erik 	if (need_fastpath_update) {
   2712  11042       Erik 		/*
   2713  11042       Erik 		 * Delete any existing existing dlur_mp and fp_mp information.
   2714  11042       Erik 		 * For IPMP interfaces, all underlying ill's must be checked
   2715  11042       Erik 		 * and purged.
   2716  11042       Erik 		 */
   2717  11042       Erik 		nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL);
   2718  11042       Erik 		/*
   2719  11042       Erik 		 * add the new dlur_mp and fp_mp
   2720  11042       Erik 		 */
   2721  11042       Erik 		nce = nce_fastpath(ncec, B_TRUE, NULL);
   2722  11042       Erik 		if (nce != NULL)
   2723  11042       Erik 			nce_refrele(nce);
   2724  11042       Erik 	}
   2725  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   2726  11042       Erik }
   2727  11042       Erik 
   2728  11042       Erik static void
   2729  11042       Erik nce_queue_mp_common(ncec_t *ncec, mblk_t *mp, boolean_t head_insert)
   2730   2535   sangeeta {
   2731   2535   sangeeta 	uint_t	count = 0;
   2732   8485      Peter 	mblk_t  **mpp, *tmp;
   2733   2535   sangeeta 
   2734  11042       Erik 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
   2735  11042       Erik 
   2736  11042       Erik 	for (mpp = &ncec->ncec_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) {
   2737  11042       Erik 		if (++count > ncec->ncec_ill->ill_max_buf) {
   2738  11042       Erik 			tmp = ncec->ncec_qd_mp->b_next;
   2739  11042       Erik 			ncec->ncec_qd_mp->b_next = NULL;
   2740  11042       Erik 			/*
   2741  11042       Erik 			 * if we never create data addrs on the under_ill
   2742  11042       Erik 			 * does this matter?
   2743  11042       Erik 			 */
   2744  11042       Erik 			BUMP_MIB(ncec->ncec_ill->ill_ip_mib,
   2745  11042       Erik 			    ipIfStatsOutDiscards);
   2746  11042       Erik 			ip_drop_output("ipIfStatsOutDiscards", ncec->ncec_qd_mp,
   2747  11042       Erik 			    ncec->ncec_ill);
   2748  11042       Erik 			freemsg(ncec->ncec_qd_mp);
   2749  11042       Erik 			ncec->ncec_qd_mp = tmp;
   2750   2535   sangeeta 		}
   2751   2535   sangeeta 	}
   2752   8485      Peter 
   2753   2535   sangeeta 	if (head_insert) {
   2754  11042       Erik 		ncec->ncec_nprobes++;
   2755  11042       Erik 		mp->b_next = ncec->ncec_qd_mp;
   2756  11042       Erik 		ncec->ncec_qd_mp = mp;
   2757   2535   sangeeta 	} else {
   2758   2535   sangeeta 		*mpp = mp;
   2759   2535   sangeeta 	}
   2760   2535   sangeeta }
   2761   2535   sangeeta 
   2762  11042       Erik /*
   2763  11042       Erik  * nce_queue_mp will queue the packet into the ncec_qd_mp. The packet will be
   2764  11042       Erik  * queued at the head or tail of the queue based on the input argument
   2765  11042       Erik  * 'head_insert'. The caller should specify this argument as B_TRUE if this
   2766  11042       Erik  * packet is an IPMP probe packet, in which case the following happens:
   2767  11042       Erik  *
   2768  11042       Erik  *   1. Insert it at the head of the ncec_qd_mp list.  Consider the normal
   2769  11042       Erik  *	(non-ipmp_probe) load-speading case where the source address of the ND
   2770  11042       Erik  *	packet is not tied to ncec_ill. If the ill bound to the source address
   2771  11042       Erik  *	cannot receive, the response to the ND packet will not be received.
   2772  11042       Erik  *	However, if ND packets for ncec_ill's probes are queued	behind that ND
   2773  11042       Erik  *	packet, those probes will also fail to be sent, and thus in.mpathd will
   2774  11042       Erik  *	 erroneously conclude that ncec_ill has also failed.
   2775  11042       Erik  *
   2776  11042       Erik  *   2. Drop the ipmp_probe packet in ndp_timer() if the ND did	not succeed on
   2777  11042       Erik  *	the first attempt.  This ensures that ND problems do not manifest as
   2778  11042       Erik  *	probe RTT spikes.
   2779  11042       Erik  *
   2780  11042       Erik  * We achieve this by inserting ipmp_probe() packets at the head of the
   2781  11042       Erik  * nce_queue.
   2782  11042       Erik  *
   2783  11042       Erik  * The ncec for the probe target is created with ncec_ill set to the ipmp_ill,
   2784  11042       Erik  * but the caller needs to set head_insert to B_TRUE if this is a probe packet.
   2785  11042       Erik  */
   2786  11042       Erik void
   2787  11042       Erik nce_queue_mp(ncec_t *ncec, mblk_t *mp, boolean_t head_insert)
   2788  11042       Erik {
   2789  11042       Erik 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
   2790  11042       Erik 	nce_queue_mp_common(ncec, mp, head_insert);
   2791      0     stevel }
   2792      0     stevel 
   2793      0     stevel /*
   2794      0     stevel  * Called when address resolution failed due to a timeout.
   2795      0     stevel  * Send an ICMP unreachable in response to all queued packets.
   2796      0     stevel  */
   2797      0     stevel void
   2798  11042       Erik ndp_resolv_failed(ncec_t *ncec)
   2799  11042       Erik {
   2800  11042       Erik 	mblk_t	*mp, *nxt_mp;
   2801      0     stevel 	char	buf[INET6_ADDRSTRLEN];
   2802  11042       Erik 	ill_t *ill = ncec->ncec_ill;
   2803  11042       Erik 	ip_recv_attr_t	iras;
   2804  11042       Erik 
   2805  11042       Erik 	bzero(&iras, sizeof (iras));
   2806  11042       Erik 	iras.ira_flags = 0;
   2807  11042       Erik 	/*
   2808  11042       Erik 	 * we are setting the ira_rill to the ipmp_ill (instead of
   2809  11042       Erik 	 * the actual ill on which the packet was received), but this
   2810  11042       Erik 	 * is ok because we don't actually need the real ira_rill.
   2811  11042       Erik 	 * to send the icmp unreachable to the sender.
   2812  11042       Erik 	 */
   2813  11042       Erik 	iras.ira_ill = iras.ira_rill = ill;
   2814  11042       Erik 	iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
   2815  11042       Erik 	iras.ira_rifindex = iras.ira_ruifindex;
   2816  11042       Erik 
   2817  11042       Erik 	ip1dbg(("ndp_resolv_failed: dst %s\n",
   2818  11042       Erik 	    inet_ntop(AF_INET6, (char *)&ncec->ncec_addr, buf, sizeof (buf))));
   2819  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   2820  11042       Erik 	mp = ncec->ncec_qd_mp;
   2821  11042       Erik 	ncec->ncec_qd_mp = NULL;
   2822  11042       Erik 	ncec->ncec_nprobes = 0;
   2823  11042       Erik 	mutex_exit(&ncec->ncec_lock);
   2824      0     stevel 	while (mp != NULL) {
   2825      0     stevel 		nxt_mp = mp->b_next;
   2826      0     stevel 		mp->b_next = NULL;
   2827  11042       Erik 
   2828  11042       Erik 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2829  11042       Erik 		ip_drop_output("ipIfStatsOutDiscards - address unreachable",
   2830  11042       Erik 		    mp, ill);
   2831  11042       Erik 		icmp_unreachable_v6(mp,
   2832  11042       Erik 		    ICMP6_DST_UNREACH_ADDR, B_FALSE, &iras);
   2833  11042       Erik 		ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
   2834      0     stevel 		mp = nxt_mp;
   2835      0     stevel 	}
   2836  11042       Erik 	ncec_cb_dispatch(ncec); /* finish off waiting callbacks */
   2837  11042       Erik }
   2838  11042       Erik 
   2839  11042       Erik /*
   2840  11042       Erik  * Handle the completion of NDP and ARP resolution.
   2841  11042       Erik  */
   2842  11042       Erik void
   2843  11042       Erik nce_resolv_ok(ncec_t *ncec)
   2844  11042       Erik {
   2845  11042       Erik 	mblk_t *mp;
   2846  11042       Erik 	uint_t pkt_len;
   2847  11042       Erik 	iaflags_t ixaflags = IXAF_NO_TRACE;
   2848  11042       Erik 	nce_t *nce;
   2849  11042       Erik 	ill_t	*ill = ncec->ncec_ill;
   2850  11042       Erik 	boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
   2851  11042       Erik 	ip_stack_t *ipst = ill->ill_ipst;
   2852  11042       Erik 
   2853  11042       Erik 	if (IS_IPMP(ncec->ncec_ill)) {
   2854  11042       Erik 		nce_resolv_ipmp_ok(ncec);
   2855  11042       Erik 		return;
   2856  11042       Erik 	}
   2857  11042       Erik 	/* non IPMP case */
   2858  11042       Erik 
   2859  11042       Erik 	mutex_enter(&ncec->ncec_lock);
   2860  11042       Erik 	ASSERT(ncec->ncec_nprobes == 0);
   2861  11042       Erik 	mp = ncec->ncec_qd_mp;
   2862  11042       Erik 	ncec->ncec_qd_mp = NULL;
   2863  11042       Erik 	mutex_exit(&ncec->ncec_lock);
   2864  11042       Erik 
   2865  11042       Erik 	while (mp != NULL) {
   2866  11042       Erik 		mblk_t *nxt_mp;
   2867  11042       Erik 
   2868  11042       Erik 		if (ill->ill_isv6) {
   2869  11042       Erik 			ip6_t *ip6h = (ip6_t *)mp->b_rptr;
   2870  11042       Erik 
   2871  11042       Erik 			pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
   2872  11042       Erik 		} else {
   2873  11042       Erik 			ipha_t *ipha = (ipha_t *)mp->b_rptr;
   2874  11042       Erik 
   2875  11042       Erik 			ixaflags |= IXAF_IS_IPV4;
   2876  11042       Erik 			pkt_len = ntohs(ipha->ipha_length);
   2877  11042       Erik 		}
   2878  11042       Erik 		nxt_mp = mp->b_next;
   2879  11042       Erik 		mp->b_next = NULL;
   2880  11042       Erik 		/*
   2881  11042       Erik 		 * IXAF_NO_DEV_FLOW_CTL information for TCP packets is no
   2882  11042       Erik 		 * longer available, but it's ok to drop this flag because TCP
   2883  11042       Erik 		 * has its own flow-control in effect, so TCP packets
   2884  11042       Erik 		 * are not likely to get here when flow-control is in effect.
   2885  11042       Erik 		 */
   2886  11042       Erik 		mutex_enter(&ill->ill_lock);
   2887  11042       Erik 		nce = nce_lookup(ill, &ncec->ncec_addr);
   2888  11042       Erik 		mutex_exit(&ill->ill_lock);
   2889  11042       Erik 
   2890  11042       Erik 		if (nce == NULL) {
   2891  11042       Erik 			if (isv6) {
   2892  11042       Erik 				BUMP_MIB(&ipst->ips_ip6_mib,
   2893  11042       Erik 				    ipIfStatsOutDiscards);
   2894  11042       Erik 			} else {
   2895  11042       Erik 				BUMP_MIB(&ipst->ips_ip_mib,
   2896  11042       Erik 				    ipIfStatsOutDiscards);
   2897  11042       Erik 			}
   2898  11042       Erik 			ip_drop_output("ipIfStatsOutDiscards - no nce",
   2899  11042       Erik 			    mp, NULL);
   2900  11042       Erik 			freemsg(mp);
   2901  11042       Erik 		} else {
   2902  11042       Erik 			/*
   2903  11042       Erik 			 * We don't know the zoneid, but
   2904  11042       Erik 			 * ip_xmit does not care since IXAF_NO_TRACE
   2905  11042       Erik 			 * is set. (We traced the packet the first
   2906  11042       Erik 			 * time through ip_xmit.)
   2907  11042       Erik 			 */
   2908  11042       Erik 			(void) ip_xmit(mp, nce, ixaflags, pkt_len, 0,
   2909  11042       Erik 			    ALL_ZONES, 0, NULL);
   2910  11042       Erik 			nce_refrele(nce);
   2911  11042       Erik 		}
   2912  11042       Erik 		mp = nxt_mp;
   2913  11042       Erik 	}
   2914  11042       Erik 
   2915  11042       Erik 	ncec_cb_dispatch(ncec); /* complete callbacks */
   2916  11042       Erik }
   2917  11042       Erik 
   2918  11042       Erik /*
   2919  11042       Erik  * Called by SIOCSNDP* ioctl to add/change an ncec entry
   2920      0     stevel  * and the corresponding attributes.
   2921      0     stevel  * Disallow states other than ND_REACHABLE or ND_STALE.
   2922      0     stevel  */
   2923      0     stevel int
   2924      0     stevel ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr)
   2925      0     stevel {
   2926      0     stevel 	sin6_t		*sin6;
   2927      0     stevel 	in6_addr_t	*addr;
   2928  11042       Erik 	ncec_t		*ncec;
   2929      0     stevel 	nce_t		*nce;
   2930  11042       Erik 	int		err = 0;
   2931      0     stevel 	uint16_t	new_flags = 0;
   2932      0     stevel 	uint16_t	old_flags = 0;
   2933      0     stevel 	int		inflags = lnr->lnr_flags;
   2934   3448   dh155122 	ip_stack_t	*ipst = ill->ill_ipst;
   2935  11042       Erik 	boolean_t	do_postprocess = B_FALSE;
   2936      0     stevel 
   2937   2535   sangeeta 	ASSERT(ill->ill_isv6);
   2938      0     stevel 	if ((lnr->lnr_state_create != ND_REACHABLE) &&
   2939      0     stevel 	    (lnr->lnr_state_create != ND_STALE))
   2940      0     stevel 		return (EINVAL);
   2941      0     stevel 
   2942      0     stevel 	sin6 = (sin6_t *)&lnr->lnr_addr;
   2943      0     stevel 	addr = &sin6->sin6_addr;
   2944      0     stevel 
   2945   3448   dh155122 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
   2946  11042       Erik 	ASSERT(!IS_UNDER_IPMP(ill));
   2947  11042       Erik 	nce = nce_lookup_addr(ill, addr);
   2948      0     stevel 	if (nce != NULL)
   2949  11042       Erik 		new_flags = nce->nce_common->ncec_flags;
   2950      0     stevel 
   2951      0     stevel 	switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) {
   2952      0     stevel 	case NDF_ISROUTER_ON:
   2953      0     stevel 		new_flags |= NCE_F_ISROUTER;
   2954      0     stevel 		break;
   2955      0     stevel 	case NDF_ISROUTER_OFF:
   2956      0     stevel 		new_flags &= ~NCE_F_ISROUTER;
   2957      0     stevel 		break;
   2958      0     stevel 	case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON):
   2959   3448   dh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
   2960      0     stevel 		if (nce != NULL)
   2961  11042       Erik 			nce_refrele(nce);
   2962      0     stevel 		return (EINVAL);
   2963      0     stevel 	}
   2964      0     stevel 
   2965      0     stevel 	switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) {
   2966      0     stevel 	case NDF_ANYCAST_ON:
   2967      0     stevel 		new_flags |= NCE_F_ANYCAST;
   2968      0     stevel 		break;
   2969      0     stevel 	case NDF_ANYCAST_OFF:
   2970      0     stevel 		new_flags &= ~NCE_F_ANYCAST;
   2971      0     stevel 		break;
   2972      0     stevel 	case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON):
   2973   3448   dh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
   2974      0     stevel 		if (nce != NULL)
   2975  11042       Erik 			nce_refrele(nce);
   2976      0     stevel 		return (EINVAL);
   2977      0     stevel 	}
   2978      0     stevel 
   2979      0     stevel 	if (nce == NULL) {
   2980  11042       Erik 		err = nce_add_v6(ill,
   2981      0     stevel 		    (uchar_t *)lnr->lnr_hdw_addr,
   2982  11042       Erik 		    ill->ill_phys_addr_length,
   2983      0     stevel 		    addr,
   2984      0     stevel 		    new_flags,
   2985      0     stevel 		    lnr->lnr_state_create,
   2986   4714    sowmini 		    &nce);
   2987      0     stevel 		if (err != 0) {
   2988   3448   dh155122 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
   2989      0     stevel 			ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err));
   2990      0     stevel 			return (err);
   2991  11042       Erik 		} else {
   2992  11042       Erik 			do_postprocess = B_TRUE;
   2993  11042       Erik 		}
   2994  11042       Erik 	}
   2995  11042       Erik 	ncec = nce->nce_common;
   2996  11042       Erik 	old_flags = ncec->ncec_flags;
   2997      0     stevel 	if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) {
   2998  11042       Erik 		ncec_router_to_host(ncec);
   2999   3448   dh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
   3000  11042       Erik 		if (do_postprocess)
   3001  11042       Erik 			err = nce_add_v6_postprocess(nce);
   3002  11042       Erik 		nce_refrele(nce);
   3003      0     stevel 		return (0);
   3004      0     stevel 	}
   3005   3448   dh155122 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
   3006      0     stevel 
   3007  11042       Erik 	if (do_postprocess)
   3008  11042       Erik 		err = nce_add_v6_postprocess(nce);
   3009  11042       Erik