1 9175 Sowmini /* 2 9175 Sowmini * CDDL HEADER START 3 9175 Sowmini * 4 9175 Sowmini * The contents of this file are subject to the terms of the 5 9175 Sowmini * Common Development and Distribution License (the "License"). 6 9175 Sowmini * You may not use this file except in compliance with the License. 7 9175 Sowmini * 8 9175 Sowmini * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 9175 Sowmini * or http://www.opensolaris.org/os/licensing. 10 9175 Sowmini * See the License for the specific language governing permissions 11 9175 Sowmini * and limitations under the License. 12 9175 Sowmini * 13 9175 Sowmini * When distributing Covered Code, include this CDDL HEADER in each 14 9175 Sowmini * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 9175 Sowmini * If applicable, add the following below this CDDL HEADER, with the 16 9175 Sowmini * fields enclosed by brackets "[]" replaced with your own identifying 17 9175 Sowmini * information: Portions Copyright [yyyy] [name of copyright owner] 18 9175 Sowmini * 19 9175 Sowmini * CDDL HEADER END 20 9175 Sowmini */ 21 11042 Erik 22 9175 Sowmini /* 23 9175 Sowmini * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 9175 Sowmini * Use is subject to license terms. 25 9175 Sowmini */ 26 9175 Sowmini 27 9175 Sowmini /* 28 9175 Sowmini * Functions to implement IP address -> link layer address (PSARC 2006/482) 29 9175 Sowmini */ 30 9175 Sowmini #include <inet/ip2mac.h> 31 9175 Sowmini #include <inet/ip2mac_impl.h> 32 9175 Sowmini #include <sys/zone.h> 33 9175 Sowmini #include <inet/ip_ndp.h> 34 9175 Sowmini #include <inet/ip_if.h> 35 9175 Sowmini #include <inet/ip6.h> 36 9175 Sowmini 37 9175 Sowmini /* 38 9175 Sowmini * dispatch pending callbacks. 39 9175 Sowmini */ 40 9175 Sowmini void 41 11042 Erik ncec_cb_dispatch(ncec_t *ncec) 42 9175 Sowmini { 43 11042 Erik ncec_cb_t *ncec_cb; 44 9175 Sowmini ip2mac_t ip2m; 45 9175 Sowmini 46 11042 Erik mutex_enter(&ncec->ncec_lock); 47 11042 Erik if (list_is_empty(&ncec->ncec_cb)) { 48 11042 Erik mutex_exit(&ncec->ncec_lock); 49 9175 Sowmini return; 50 9175 Sowmini } 51 11042 Erik ncec_ip2mac_response(&ip2m, ncec); 52 11042 Erik ncec_cb_refhold_locked(ncec); 53 9175 Sowmini /* 54 9175 Sowmini * IP does not hold internal locks like nce_lock across calls to 55 9175 Sowmini * other subsystems for fear of recursive lock entry and lock 56 9175 Sowmini * hierarchy violation. The caller may be holding locks across 57 9175 Sowmini * the call to IP. (It would be ideal if no subsystem holds locks 58 9175 Sowmini * across calls into another subsystem, especially if calls can 59 9175 Sowmini * happen in either direction). 60 9175 Sowmini */ 61 11042 Erik ncec_cb = list_head(&ncec->ncec_cb); 62 11042 Erik for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) { 63 11042 Erik if (ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) 64 9175 Sowmini continue; 65 11042 Erik ncec_cb->ncec_cb_flags |= NCE_CB_DISPATCHED; 66 11042 Erik mutex_exit(&ncec->ncec_lock); 67 11042 Erik (*ncec_cb->ncec_cb_func)(&ip2m, ncec_cb->ncec_cb_arg); 68 11042 Erik mutex_enter(&ncec->ncec_lock); 69 9175 Sowmini } 70 11042 Erik ncec_cb_refrele(ncec); 71 11042 Erik mutex_exit(&ncec->ncec_lock); 72 9175 Sowmini } 73 9175 Sowmini 74 9175 Sowmini /* 75 9175 Sowmini * fill up the ip2m response fields with inforamation from the nce. 76 9175 Sowmini */ 77 9175 Sowmini void 78 11042 Erik ncec_ip2mac_response(ip2mac_t *ip2m, ncec_t *ncec) 79 9175 Sowmini { 80 11042 Erik boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); 81 11042 Erik sin_t *sin; 82 9175 Sowmini sin6_t *sin6; 83 9175 Sowmini struct sockaddr_dl *sdl; 84 9175 Sowmini 85 11042 Erik ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 86 9175 Sowmini bzero(ip2m, sizeof (*ip2m)); 87 11042 Erik if (NCE_ISREACHABLE(ncec) && !NCE_ISCONDEMNED(ncec)) 88 9175 Sowmini ip2m->ip2mac_err = 0; 89 9175 Sowmini else 90 9175 Sowmini ip2m->ip2mac_err = ESRCH; 91 9175 Sowmini if (isv6) { 92 9175 Sowmini sin6 = (sin6_t *)&ip2m->ip2mac_pa; 93 9175 Sowmini sin6->sin6_family = AF_INET6; 94 11042 Erik sin6->sin6_addr = ncec->ncec_addr; 95 11042 Erik } else { 96 11042 Erik sin = (sin_t *)&ip2m->ip2mac_pa; 97 11042 Erik sin->sin_family = AF_INET; 98 11042 Erik IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &sin->sin_addr); 99 9175 Sowmini } 100 9175 Sowmini if (ip2m->ip2mac_err == 0) { 101 9175 Sowmini sdl = &ip2m->ip2mac_ha; 102 9175 Sowmini sdl->sdl_family = AF_LINK; 103 11042 Erik sdl->sdl_type = ncec->ncec_ill->ill_type; 104 11042 Erik /* 105 11042 Erik * should we put ncec_ill->ill_name in there? why? 106 11042 Erik * likewise for the sdl_index 107 11042 Erik */ 108 9175 Sowmini sdl->sdl_nlen = 0; 109 11042 Erik sdl->sdl_alen = ncec->ncec_ill->ill_phys_addr_length; 110 11042 Erik if (ncec->ncec_lladdr != NULL) 111 11042 Erik bcopy(ncec->ncec_lladdr, LLADDR(sdl), sdl->sdl_alen); 112 9175 Sowmini } 113 9175 Sowmini } 114 9175 Sowmini 115 9175 Sowmini void 116 11042 Erik ncec_cb_refhold_locked(ncec_t *ncec) 117 9175 Sowmini { 118 11042 Erik ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 119 11042 Erik ncec->ncec_cb_walker_cnt++; 120 9175 Sowmini } 121 9175 Sowmini 122 9175 Sowmini void 123 11042 Erik ncec_cb_refrele(ncec_t *ncec) 124 9175 Sowmini { 125 11042 Erik ncec_cb_t *ncec_cb, *ncec_cb_next = NULL; 126 9175 Sowmini 127 11042 Erik ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 128 11042 Erik if (--ncec->ncec_cb_walker_cnt == 0) { 129 11042 Erik for (ncec_cb = list_head(&ncec->ncec_cb); ncec_cb != NULL; 130 11042 Erik ncec_cb = ncec_cb_next) { 131 9175 Sowmini 132 11042 Erik ncec_cb_next = list_next(&ncec->ncec_cb, ncec_cb); 133 11042 Erik if ((ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) == 0) 134 9175 Sowmini continue; 135 11042 Erik list_remove(&ncec->ncec_cb, ncec_cb); 136 11042 Erik kmem_free(ncec_cb, sizeof (*ncec_cb)); 137 9175 Sowmini } 138 9175 Sowmini } 139 9175 Sowmini } 140 9175 Sowmini 141 9175 Sowmini /* 142 9175 Sowmini * add a callback to the nce, so that the callback can be invoked 143 9175 Sowmini * after address resolution succeeds/fails. 144 9175 Sowmini */ 145 9175 Sowmini static ip2mac_id_t 146 11042 Erik ncec_add_cb(ncec_t *ncec, ip2mac_callback_t *cb, void *cbarg) 147 9175 Sowmini { 148 11042 Erik ncec_cb_t *nce_cb; 149 9175 Sowmini ip2mac_id_t ip2mid = NULL; 150 9175 Sowmini 151 11042 Erik ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 152 9175 Sowmini if ((nce_cb = kmem_zalloc(sizeof (*nce_cb), KM_NOSLEEP)) == NULL) 153 9175 Sowmini return (ip2mid); 154 11042 Erik nce_cb->ncec_cb_func = cb; 155 11042 Erik nce_cb->ncec_cb_arg = cbarg; 156 9175 Sowmini /* 157 11042 Erik * We identify the ncec_cb_t during cancellation by the address 158 9175 Sowmini * of the nce_cb_t itself, and, as a short-cut for eliminating 159 11042 Erik * clear mismatches, only look in the callback list of ncec's 160 9175 Sowmini * whose address is equal to the nce_cb_id. 161 9175 Sowmini */ 162 11042 Erik nce_cb->ncec_cb_id = ncec; /* no refs! just an address */ 163 11042 Erik list_insert_tail(&ncec->ncec_cb, nce_cb); 164 11042 Erik ip2mid = ncec; /* this is the id to be used in ip2mac_cancel */ 165 9175 Sowmini 166 9175 Sowmini return (nce_cb); 167 9175 Sowmini } 168 9175 Sowmini 169 9175 Sowmini /* 170 9175 Sowmini * Resolve an IP address to a link-layer address using the data-structures 171 9175 Sowmini * defined in PSARC 2006/482. If the current link-layer address for the 172 9175 Sowmini * IP address is not known, the state-machine for resolving the resolution 173 9175 Sowmini * will be triggered, and the callback function (*cb) will be invoked after 174 9175 Sowmini * the resolution completes. 175 9175 Sowmini */ 176 9175 Sowmini ip2mac_id_t 177 11042 Erik ip2mac(uint_t op, ip2mac_t *ip2m, ip2mac_callback_t *cb, void *cbarg, 178 9175 Sowmini zoneid_t zoneid) 179 9175 Sowmini { 180 11042 Erik ncec_t *ncec; 181 11042 Erik nce_t *nce = NULL; 182 9175 Sowmini boolean_t isv6; 183 9175 Sowmini ill_t *ill; 184 9175 Sowmini netstack_t *ns; 185 9175 Sowmini ip_stack_t *ipst; 186 9175 Sowmini ip2mac_id_t ip2mid = NULL; 187 11042 Erik sin_t *sin; 188 9175 Sowmini sin6_t *sin6; 189 9175 Sowmini int err; 190 9175 Sowmini uint64_t delta; 191 11042 Erik boolean_t need_resolve = B_FALSE; 192 9175 Sowmini 193 9175 Sowmini isv6 = (ip2m->ip2mac_pa.ss_family == AF_INET6); 194 9175 Sowmini 195 9175 Sowmini ns = netstack_find_by_zoneid(zoneid); 196 9175 Sowmini if (ns == NULL) { 197 9175 Sowmini ip2m->ip2mac_err = EINVAL; 198 9175 Sowmini return (NULL); 199 9175 Sowmini } 200 9175 Sowmini /* 201 9175 Sowmini * For exclusive stacks we reset the zoneid to zero 202 9175 Sowmini * since IP uses the global zoneid in the exclusive stacks. 203 9175 Sowmini */ 204 9175 Sowmini if (ns->netstack_stackid != GLOBAL_NETSTACKID) 205 9175 Sowmini zoneid = GLOBAL_ZONEID; 206 9175 Sowmini ipst = ns->netstack_ip; 207 9175 Sowmini /* 208 9175 Sowmini * find the ill from the ip2m->ip2mac_ifindex 209 9175 Sowmini */ 210 11042 Erik ill = ill_lookup_on_ifindex(ip2m->ip2mac_ifindex, isv6, ipst); 211 9175 Sowmini if (ill == NULL) { 212 9175 Sowmini ip2m->ip2mac_err = ENXIO; 213 9175 Sowmini netstack_rele(ns); 214 9175 Sowmini return (NULL); 215 9175 Sowmini } 216 9175 Sowmini if (isv6) { 217 9175 Sowmini sin6 = (sin6_t *)&ip2m->ip2mac_pa; 218 11042 Erik if (op == IP2MAC_LOOKUP) { 219 11042 Erik nce = nce_lookup_v6(ill, &sin6->sin6_addr); 220 9175 Sowmini } else { 221 11042 Erik err = nce_lookup_then_add_v6(ill, NULL, 222 11042 Erik ill->ill_phys_addr_length, 223 11042 Erik &sin6->sin6_addr, 0, ND_UNCHANGED, &nce); 224 9175 Sowmini } 225 9175 Sowmini } else { 226 11042 Erik sin = (sin_t *)&ip2m->ip2mac_pa; 227 11042 Erik if (op == IP2MAC_LOOKUP) { 228 11042 Erik nce = nce_lookup_v4(ill, &sin->sin_addr.s_addr); 229 11042 Erik } else { 230 11042 Erik err = nce_lookup_then_add_v4(ill, NULL, 231 11042 Erik ill->ill_phys_addr_length, 232 11042 Erik &sin->sin_addr.s_addr, 0, ND_UNCHANGED, &nce); 233 11042 Erik } 234 9175 Sowmini } 235 11042 Erik if (op == IP2MAC_LOOKUP) { 236 9175 Sowmini if (nce == NULL) { 237 9175 Sowmini ip2m->ip2mac_err = ESRCH; 238 9175 Sowmini goto done; 239 9175 Sowmini } 240 11042 Erik ncec = nce->nce_common; 241 11066 rafael delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last; 242 11042 Erik mutex_enter(&ncec->ncec_lock); 243 11042 Erik if (NCE_ISREACHABLE(ncec) && 244 11042 Erik delta < (uint64_t)ill->ill_reachable_time) { 245 11042 Erik ncec_ip2mac_response(ip2m, ncec); 246 9175 Sowmini ip2m->ip2mac_err = 0; 247 9175 Sowmini } else { 248 9175 Sowmini ip2m->ip2mac_err = ESRCH; 249 9175 Sowmini } 250 11042 Erik mutex_exit(&ncec->ncec_lock); 251 9175 Sowmini goto done; 252 9175 Sowmini } else { 253 9175 Sowmini if (err != 0 && err != EEXIST) { 254 9175 Sowmini ip2m->ip2mac_err = err; 255 9175 Sowmini goto done; 256 9175 Sowmini } 257 9175 Sowmini } 258 11042 Erik ncec = nce->nce_common; 259 11066 rafael delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last; 260 11042 Erik mutex_enter(&ncec->ncec_lock); 261 11042 Erik if (NCE_ISCONDEMNED(ncec)) { 262 9175 Sowmini ip2m->ip2mac_err = ESRCH; 263 11042 Erik } else { 264 11042 Erik if (NCE_ISREACHABLE(ncec)) { 265 11042 Erik if (NCE_MYADDR(ncec) || 266 11042 Erik delta < (uint64_t)ill->ill_reachable_time) { 267 11042 Erik ncec_ip2mac_response(ip2m, ncec); 268 11042 Erik ip2m->ip2mac_err = 0; 269 11042 Erik mutex_exit(&ncec->ncec_lock); 270 11042 Erik goto done; 271 11042 Erik } 272 9175 Sowmini /* 273 9175 Sowmini * Since we do not control the packet output 274 9175 Sowmini * path for ip2mac() callers, we need to verify 275 9175 Sowmini * if the existing information in the nce is 276 9175 Sowmini * very old, and retrigger resolution if necessary. 277 9175 Sowmini * We will not return the existing stale 278 9175 Sowmini * information until it is verified through a 279 9175 Sowmini * resolver request/response exchange. 280 9175 Sowmini * 281 9175 Sowmini * In the future, we may want to support extensions 282 9175 Sowmini * that do additional callbacks on link-layer updates, 283 9175 Sowmini * so that we can return the stale information but 284 9175 Sowmini * also update the caller if the lladdr changes. 285 9175 Sowmini */ 286 11042 Erik ncec->ncec_rcnt = ill->ill_xmit_count; 287 11042 Erik ncec->ncec_state = ND_PROBE; 288 11042 Erik need_resolve = B_TRUE; /* reachable but very old nce */ 289 11042 Erik } else if (ncec->ncec_state == ND_INITIAL) { 290 11042 Erik need_resolve = B_TRUE; /* ND_INITIAL nce */ 291 11042 Erik ncec->ncec_state = ND_INCOMPLETE; 292 9175 Sowmini } 293 11042 Erik /* 294 11042 Erik * NCE not known to be reachable in the recent past. We must 295 11042 Erik * reconfirm the information before returning it to the caller 296 11042 Erik */ 297 11042 Erik if (ncec->ncec_rcnt > 0) { 298 9175 Sowmini /* 299 11042 Erik * Still resolving this ncec, so we can queue the 300 11042 Erik * callback information in ncec->ncec_cb 301 9175 Sowmini */ 302 11042 Erik ip2mid = ncec_add_cb(ncec, cb, cbarg); 303 9175 Sowmini ip2m->ip2mac_err = EINPROGRESS; 304 9175 Sowmini } else { 305 9175 Sowmini /* 306 11042 Erik * No more retransmits allowed -- resolution failed. 307 9175 Sowmini */ 308 9175 Sowmini ip2m->ip2mac_err = ESRCH; 309 9175 Sowmini } 310 9175 Sowmini } 311 11042 Erik mutex_exit(&ncec->ncec_lock); 312 9175 Sowmini done: 313 11042 Erik /* 314 11042 Erik * if NCE_ISREACHABLE(ncec) but very old, or if it is ND_INITIAL, 315 11042 Erik * trigger resolve. 316 11042 Erik */ 317 11042 Erik if (need_resolve) 318 11042 Erik ip_ndp_resolve(ncec); 319 11042 Erik if (nce != NULL) 320 11042 Erik nce_refrele(nce); 321 9175 Sowmini netstack_rele(ns); 322 9175 Sowmini ill_refrele(ill); 323 9175 Sowmini return (ip2mid); 324 9175 Sowmini } 325 9175 Sowmini 326 9175 Sowmini /* 327 11042 Erik * data passed to ncec_walk for canceling outstanding callbacks. 328 9175 Sowmini */ 329 9175 Sowmini typedef struct ip2mac_cancel_data_s { 330 9175 Sowmini ip2mac_id_t ip2m_cancel_id; 331 9175 Sowmini int ip2m_cancel_err; 332 9175 Sowmini } ip2mac_cancel_data_t; 333 9175 Sowmini 334 9175 Sowmini /* 335 11042 Erik * callback invoked for each active ncec. If the ip2mac_id_t corresponds 336 11042 Erik * to an active nce_cb_t in the ncec's callback list, we want to remove 337 9175 Sowmini * the callback (if there are no walkers) or return EBUSY to the caller 338 9175 Sowmini */ 339 9175 Sowmini static int 340 11042 Erik ip2mac_cancel_callback(ncec_t *ncec, void *arg) 341 9175 Sowmini { 342 9175 Sowmini ip2mac_cancel_data_t *ip2m_wdata = arg; 343 11042 Erik ncec_cb_t *ip2m_nce_cb = ip2m_wdata->ip2m_cancel_id; 344 11042 Erik ncec_cb_t *ncec_cb; 345 9175 Sowmini 346 11042 Erik if (ip2m_nce_cb->ncec_cb_id != ncec) 347 9175 Sowmini return (0); 348 9175 Sowmini 349 11042 Erik mutex_enter(&ncec->ncec_lock); 350 11042 Erik if (list_is_empty(&ncec->ncec_cb)) { 351 11042 Erik mutex_exit(&ncec->ncec_lock); 352 9175 Sowmini return (0); 353 9175 Sowmini } 354 9175 Sowmini /* 355 9175 Sowmini * IP does not hold internal locks like nce_lock across calls to 356 9175 Sowmini * other subsystems for fear of recursive lock entry and lock 357 9175 Sowmini * hierarchy violation. The caller may be holding locks across 358 9175 Sowmini * the call to IP. (It would be ideal if no subsystem holds locks 359 9175 Sowmini * across calls into another subsystem, especially if calls can 360 9175 Sowmini * happen in either direction). 361 9175 Sowmini */ 362 11042 Erik ncec_cb = list_head(&ncec->ncec_cb); 363 11042 Erik for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) { 364 11042 Erik if (ncec_cb != ip2m_nce_cb) 365 9175 Sowmini continue; 366 9175 Sowmini /* 367 9175 Sowmini * If there are no walkers we can remove the nce_cb. 368 9175 Sowmini * Otherwise the exiting walker will clean up. 369 9175 Sowmini */ 370 11042 Erik if (ncec->ncec_cb_walker_cnt == 0) { 371 11042 Erik list_remove(&ncec->ncec_cb, ncec_cb); 372 9175 Sowmini } else { 373 9175 Sowmini ip2m_wdata->ip2m_cancel_err = EBUSY; 374 9175 Sowmini } 375 9175 Sowmini break; 376 9175 Sowmini } 377 11042 Erik mutex_exit(&ncec->ncec_lock); 378 9175 Sowmini return (0); 379 9175 Sowmini } 380 9175 Sowmini 381 9175 Sowmini /* 382 9175 Sowmini * cancel an outstanding timeout set up via ip2mac 383 9175 Sowmini */ 384 9175 Sowmini int 385 9175 Sowmini ip2mac_cancel(ip2mac_id_t ip2mid, zoneid_t zoneid) 386 9175 Sowmini { 387 9175 Sowmini netstack_t *ns; 388 9175 Sowmini ip_stack_t *ipst; 389 9175 Sowmini ip2mac_cancel_data_t ip2m_wdata; 390 9175 Sowmini 391 9175 Sowmini ns = netstack_find_by_zoneid(zoneid); 392 9175 Sowmini if (ns == NULL) { 393 9175 Sowmini ip2m_wdata.ip2m_cancel_err = EINVAL; 394 9175 Sowmini return (ip2m_wdata.ip2m_cancel_err); 395 9175 Sowmini } 396 9175 Sowmini /* 397 9175 Sowmini * For exclusive stacks we reset the zoneid to zero 398 9175 Sowmini * since IP uses the global zoneid in the exclusive stacks. 399 9175 Sowmini */ 400 9175 Sowmini if (ns->netstack_stackid != GLOBAL_NETSTACKID) 401 9175 Sowmini zoneid = GLOBAL_ZONEID; 402 9175 Sowmini ipst = ns->netstack_ip; 403 9175 Sowmini 404 9175 Sowmini ip2m_wdata.ip2m_cancel_id = ip2mid; 405 9175 Sowmini ip2m_wdata.ip2m_cancel_err = 0; 406 11042 Erik ncec_walk(NULL, ip2mac_cancel_callback, &ip2m_wdata, ipst); 407 9175 Sowmini /* 408 9175 Sowmini * We may return EBUSY if a walk to dispatch callbacks is 409 9175 Sowmini * in progress, in which case the caller needs to synchronize 410 9175 Sowmini * with the registered callback function to make sure the 411 9175 Sowmini * module does not exit when there is a callback pending. 412 9175 Sowmini */ 413 9175 Sowmini netstack_rele(ns); 414 9175 Sowmini return (ip2m_wdata.ip2m_cancel_err); 415 9175 Sowmini } 416