1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/errno.h> 30 #include <sys/strlog.h> 31 #include <sys/tihdr.h> 32 #include <sys/socket.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/kmem.h> 36 #include <sys/zone.h> 37 #include <sys/sysmacros.h> 38 #include <sys/cmn_err.h> 39 #include <sys/vtrace.h> 40 #include <sys/debug.h> 41 #include <sys/atomic.h> 42 #include <sys/strsun.h> 43 #include <sys/random.h> 44 #include <netinet/in.h> 45 #include <net/if.h> 46 #include <netinet/ip6.h> 47 #include <net/pfkeyv2.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/ip.h> 53 #include <inet/ip_impl.h> 54 #include <inet/ip6.h> 55 #include <inet/sadb.h> 56 #include <inet/ipsec_info.h> 57 #include <inet/ipsec_impl.h> 58 #include <inet/ipsecesp.h> 59 #include <inet/ipdrop.h> 60 #include <inet/tcp.h> 61 #include <sys/kstat.h> 62 #include <sys/policy.h> 63 #include <sys/strsun.h> 64 #include <inet/udp_impl.h> 65 #include <sys/taskq.h> 66 #include <sys/note.h> 67 68 #include <sys/iphada.h> 69 70 /* 71 * Table of ND variables supported by ipsecesp. These are loaded into 72 * ipsecesp_g_nd in ipsecesp_init_nd. 73 * All of these are alterable, within the min/max values given, at run time. 74 */ 75 static ipsecespparam_t lcl_param_arr[] = { 76 /* min max value name */ 77 { 0, 3, 0, "ipsecesp_debug"}, 78 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, 79 { 1, 10, 1, "ipsecesp_reap_delay"}, 80 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"}, 81 { 1, 300, 15, "ipsecesp_acquire_timeout"}, 82 { 1, 1800, 90, "ipsecesp_larval_timeout"}, 83 /* Default lifetime values for ACQUIRE messages. */ 84 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"}, 85 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"}, 86 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"}, 87 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"}, 88 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"}, 89 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, 90 { 0, 1, 0, "ipsecesp_log_unknown_spi"}, 91 { 0, 2, 1, "ipsecesp_padding_check"}, 92 { 0, 600, 20, "ipsecesp_nat_keepalive_interval"}, 93 }; 94 #define ipsecesp_debug ipsecesp_params[0].ipsecesp_param_value 95 #define ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value 96 #define ipsecesp_age_int_max ipsecesp_params[1].ipsecesp_param_max 97 #define ipsecesp_reap_delay ipsecesp_params[2].ipsecesp_param_value 98 #define ipsecesp_replay_size ipsecesp_params[3].ipsecesp_param_value 99 #define ipsecesp_acquire_timeout \ 100 ipsecesp_params[4].ipsecesp_param_value 101 #define ipsecesp_larval_timeout \ 102 ipsecesp_params[5].ipsecesp_param_value 103 #define ipsecesp_default_soft_bytes \ 104 ipsecesp_params[6].ipsecesp_param_value 105 #define ipsecesp_default_hard_bytes \ 106 ipsecesp_params[7].ipsecesp_param_value 107 #define ipsecesp_default_soft_addtime \ 108 ipsecesp_params[8].ipsecesp_param_value 109 #define ipsecesp_default_hard_addtime \ 110 ipsecesp_params[9].ipsecesp_param_value 111 #define ipsecesp_default_soft_usetime \ 112 ipsecesp_params[10].ipsecesp_param_value 113 #define ipsecesp_default_hard_usetime \ 114 ipsecesp_params[11].ipsecesp_param_value 115 #define ipsecesp_log_unknown_spi \ 116 ipsecesp_params[12].ipsecesp_param_value 117 #define ipsecesp_padding_check \ 118 ipsecesp_params[13].ipsecesp_param_value 119 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */ 120 121 #define esp0dbg(a) printf a 122 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ 123 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a 124 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a 125 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a 126 127 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); 128 static int ipsecesp_close(queue_t *); 129 static void ipsecesp_rput(queue_t *, mblk_t *); 130 static void ipsecesp_wput(queue_t *, mblk_t *); 131 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); 132 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); 133 static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *); 134 135 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *); 136 static ipsec_status_t esp_outbound_accelerated(mblk_t *, uint_t); 137 static ipsec_status_t esp_inbound_accelerated(mblk_t *, mblk_t *, 138 boolean_t, ipsa_t *); 139 140 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, 141 ipsecesp_stack_t *); 142 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, 143 kstat_named_t **, ipsecesp_stack_t *); 144 static ipsec_status_t esp_submit_req_inbound(mblk_t *, ipsa_t *, uint_t); 145 static ipsec_status_t esp_submit_req_outbound(mblk_t *, ipsa_t *, uchar_t *, 146 uint_t); 147 /* Setable in /etc/system */ 148 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; 149 150 static struct module_info info = { 151 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 152 }; 153 154 static struct qinit rinit = { 155 (pfi_t)ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 156 NULL 157 }; 158 159 static struct qinit winit = { 160 (pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 161 NULL 162 }; 163 164 struct streamtab ipsecespinfo = { 165 &rinit, &winit, NULL, NULL 166 }; 167 168 static taskq_t *esp_taskq; 169 170 /* 171 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now. 172 * 173 * Question: Do I need this, given that all instance's esps->esps_wq point 174 * to IP? 175 * 176 * Answer: Yes, because I need to know which queue is BOUND to 177 * IPPROTO_ESP 178 */ 179 180 /* 181 * Stats. This may eventually become a full-blown SNMP MIB once that spec 182 * stabilizes. 183 */ 184 185 typedef struct esp_kstats_s { 186 kstat_named_t esp_stat_num_aalgs; 187 kstat_named_t esp_stat_good_auth; 188 kstat_named_t esp_stat_bad_auth; 189 kstat_named_t esp_stat_bad_padding; 190 kstat_named_t esp_stat_replay_failures; 191 kstat_named_t esp_stat_replay_early_failures; 192 kstat_named_t esp_stat_keysock_in; 193 kstat_named_t esp_stat_out_requests; 194 kstat_named_t esp_stat_acquire_requests; 195 kstat_named_t esp_stat_bytes_expired; 196 kstat_named_t esp_stat_out_discards; 197 kstat_named_t esp_stat_in_accelerated; 198 kstat_named_t esp_stat_out_accelerated; 199 kstat_named_t esp_stat_noaccel; 200 kstat_named_t esp_stat_crypto_sync; 201 kstat_named_t esp_stat_crypto_async; 202 kstat_named_t esp_stat_crypto_failures; 203 kstat_named_t esp_stat_num_ealgs; 204 kstat_named_t esp_stat_bad_decrypt; 205 kstat_named_t esp_stat_sa_port_renumbers; 206 } esp_kstats_t; 207 208 /* 209 * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if 210 * kstat_create_netstack for espstack->esp_ksp succeeds, but when it 211 * fails, it will be NULL. Note this is done for all stack instances, 212 * so it *could* fail. hence a non-NULL checking is done for 213 * ESP_BUMP_STAT and ESP_DEBUMP_STAT 214 */ 215 #define ESP_BUMP_STAT(espstack, x) \ 216 do { \ 217 if (espstack->esp_kstats != NULL) \ 218 (espstack->esp_kstats->esp_stat_ ## x).value.ui64++; \ 219 _NOTE(CONSTCOND) \ 220 } while (0) 221 222 #define ESP_DEBUMP_STAT(espstack, x) \ 223 do { \ 224 if (espstack->esp_kstats != NULL) \ 225 (espstack->esp_kstats->esp_stat_ ## x).value.ui64--; \ 226 _NOTE(CONSTCOND) \ 227 } while (0) 228 229 static int esp_kstat_update(kstat_t *, int); 230 231 static boolean_t 232 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) 233 { 234 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", 235 "net", KSTAT_TYPE_NAMED, 236 sizeof (esp_kstats_t) / sizeof (kstat_named_t), 237 KSTAT_FLAG_PERSISTENT, stackid); 238 239 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) 240 return (B_FALSE); 241 242 espstack->esp_kstats = espstack->esp_ksp->ks_data; 243 244 espstack->esp_ksp->ks_update = esp_kstat_update; 245 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; 246 247 #define K64 KSTAT_DATA_UINT64 248 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) 249 250 KI(num_aalgs); 251 KI(num_ealgs); 252 KI(good_auth); 253 KI(bad_auth); 254 KI(bad_padding); 255 KI(replay_failures); 256 KI(replay_early_failures); 257 KI(keysock_in); 258 KI(out_requests); 259 KI(acquire_requests); 260 KI(bytes_expired); 261 KI(out_discards); 262 KI(in_accelerated); 263 KI(out_accelerated); 264 KI(noaccel); 265 KI(crypto_sync); 266 KI(crypto_async); 267 KI(crypto_failures); 268 KI(bad_decrypt); 269 KI(sa_port_renumbers); 270 271 #undef KI 272 #undef K64 273 274 kstat_install(espstack->esp_ksp); 275 276 return (B_TRUE); 277 } 278 279 static int 280 esp_kstat_update(kstat_t *kp, int rw) 281 { 282 esp_kstats_t *ekp; 283 netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; 284 netstack_t *ns; 285 ipsec_stack_t *ipss; 286 287 if ((kp == NULL) || (kp->ks_data == NULL)) 288 return (EIO); 289 290 if (rw == KSTAT_WRITE) 291 return (EACCES); 292 293 ns = netstack_find_by_stackid(stackid); 294 if (ns == NULL) 295 return (-1); 296 ipss = ns->netstack_ipsec; 297 if (ipss == NULL) { 298 netstack_rele(ns); 299 return (-1); 300 } 301 ekp = (esp_kstats_t *)kp->ks_data; 302 303 mutex_enter(&ipss->ipsec_alg_lock); 304 ekp->esp_stat_num_aalgs.value.ui64 = 305 ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; 306 ekp->esp_stat_num_ealgs.value.ui64 = 307 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 308 mutex_exit(&ipss->ipsec_alg_lock); 309 310 netstack_rele(ns); 311 return (0); 312 } 313 314 #ifdef DEBUG 315 /* 316 * Debug routine, useful to see pre-encryption data. 317 */ 318 static char * 319 dump_msg(mblk_t *mp) 320 { 321 char tmp_str[3], tmp_line[256]; 322 323 while (mp != NULL) { 324 unsigned char *ptr; 325 326 printf("mblk address 0x%p, length %ld, db_ref %d " 327 "type %d, base 0x%p, lim 0x%p\n", 328 (void *) mp, (long)(mp->b_wptr - mp->b_rptr), 329 mp->b_datap->db_ref, mp->b_datap->db_type, 330 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim); 331 ptr = mp->b_rptr; 332 333 tmp_line[0] = '\0'; 334 while (ptr < mp->b_wptr) { 335 uint_t diff; 336 337 diff = (ptr - mp->b_rptr); 338 if (!(diff & 0x1f)) { 339 if (strlen(tmp_line) > 0) { 340 printf("bytes: %s\n", tmp_line); 341 tmp_line[0] = '\0'; 342 } 343 } 344 if (!(diff & 0x3)) 345 (void) strcat(tmp_line, " "); 346 (void) sprintf(tmp_str, "%02x", *ptr); 347 (void) strcat(tmp_line, tmp_str); 348 ptr++; 349 } 350 if (strlen(tmp_line) > 0) 351 printf("bytes: %s\n", tmp_line); 352 353 mp = mp->b_cont; 354 } 355 356 return ("\n"); 357 } 358 359 #else /* DEBUG */ 360 static char * 361 dump_msg(mblk_t *mp) 362 { 363 printf("Find value of mp %p.\n", mp); 364 return ("\n"); 365 } 366 #endif /* DEBUG */ 367 368 /* 369 * Don't have to lock age_interval, as only one thread will access it at 370 * a time, because I control the one function that does with timeout(). 371 */ 372 static void 373 esp_ager(void *arg) 374 { 375 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 376 netstack_t *ns = espstack->ipsecesp_netstack; 377 hrtime_t begin = gethrtime(); 378 379 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 380 espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); 381 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, 382 espstack->esp_sadb.s_ip_q, espstack->ipsecesp_reap_delay, ns); 383 384 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, 385 esp_ager, espstack, 386 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, 387 info.mi_idnum); 388 } 389 390 /* 391 * Get an ESP NDD parameter. 392 */ 393 /* ARGSUSED */ 394 static int 395 ipsecesp_param_get(q, mp, cp, cr) 396 queue_t *q; 397 mblk_t *mp; 398 caddr_t cp; 399 cred_t *cr; 400 { 401 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 402 uint_t value; 403 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 404 405 mutex_enter(&espstack->ipsecesp_param_lock); 406 value = ipsecesppa->ipsecesp_param_value; 407 mutex_exit(&espstack->ipsecesp_param_lock); 408 409 (void) mi_mpprintf(mp, "%u", value); 410 return (0); 411 } 412 413 /* 414 * This routine sets an NDD variable in a ipsecespparam_t structure. 415 */ 416 /* ARGSUSED */ 417 static int 418 ipsecesp_param_set(q, mp, value, cp, cr) 419 queue_t *q; 420 mblk_t *mp; 421 char *value; 422 caddr_t cp; 423 cred_t *cr; 424 { 425 ulong_t new_value; 426 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 427 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 428 429 /* 430 * Fail the request if the new value does not lie within the 431 * required bounds. 432 */ 433 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 434 new_value < ipsecesppa->ipsecesp_param_min || 435 new_value > ipsecesppa->ipsecesp_param_max) { 436 return (EINVAL); 437 } 438 439 /* Set the new value */ 440 mutex_enter(&espstack->ipsecesp_param_lock); 441 ipsecesppa->ipsecesp_param_value = new_value; 442 mutex_exit(&espstack->ipsecesp_param_lock); 443 return (0); 444 } 445 446 /* 447 * Using lifetime NDD variables, fill in an extended combination's 448 * lifetime information. 449 */ 450 void 451 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) 452 { 453 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 454 455 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; 456 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; 457 ecomb->sadb_x_ecomb_soft_addtime = 458 espstack->ipsecesp_default_soft_addtime; 459 ecomb->sadb_x_ecomb_hard_addtime = 460 espstack->ipsecesp_default_hard_addtime; 461 ecomb->sadb_x_ecomb_soft_usetime = 462 espstack->ipsecesp_default_soft_usetime; 463 ecomb->sadb_x_ecomb_hard_usetime = 464 espstack->ipsecesp_default_hard_usetime; 465 } 466 467 /* 468 * Initialize things for ESP at module load time. 469 */ 470 boolean_t 471 ipsecesp_ddi_init(void) 472 { 473 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, 474 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); 475 476 /* 477 * We want to be informed each time a stack is created or 478 * destroyed in the kernel, so we can maintain the 479 * set of ipsecesp_stack_t's. 480 */ 481 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, 482 ipsecesp_stack_fini); 483 484 return (B_TRUE); 485 } 486 487 /* 488 * Walk through the param array specified registering each element with the 489 * named dispatch handler. 490 */ 491 static boolean_t 492 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) 493 { 494 for (; cnt-- > 0; espp++) { 495 if (espp->ipsecesp_param_name != NULL && 496 espp->ipsecesp_param_name[0]) { 497 if (!nd_load(ndp, 498 espp->ipsecesp_param_name, 499 ipsecesp_param_get, ipsecesp_param_set, 500 (caddr_t)espp)) { 501 nd_free(ndp); 502 return (B_FALSE); 503 } 504 } 505 } 506 return (B_TRUE); 507 } 508 /* 509 * Initialize things for ESP for each stack instance 510 */ 511 static void * 512 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) 513 { 514 ipsecesp_stack_t *espstack; 515 ipsecespparam_t *espp; 516 517 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), 518 KM_SLEEP); 519 espstack->ipsecesp_netstack = ns; 520 521 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 522 espstack->ipsecesp_params = espp; 523 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); 524 525 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, 526 A_CNT(lcl_param_arr)); 527 528 (void) esp_kstat_init(espstack, stackid); 529 530 espstack->esp_sadb.s_acquire_timeout = 531 &espstack->ipsecesp_acquire_timeout; 532 espstack->esp_sadb.s_acqfn = esp_send_acquire; 533 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, 534 espstack->ipsecesp_netstack); 535 536 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); 537 538 ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); 539 return (espstack); 540 } 541 542 /* 543 * Destroy things for ESP at module unload time. 544 */ 545 void 546 ipsecesp_ddi_destroy(void) 547 { 548 netstack_unregister(NS_IPSECESP); 549 taskq_destroy(esp_taskq); 550 } 551 552 /* 553 * Destroy things for ESP for one stack instance 554 */ 555 static void 556 ipsecesp_stack_fini(netstackid_t stackid, void *arg) 557 { 558 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 559 560 if (espstack->esp_pfkey_q != NULL) { 561 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); 562 } 563 espstack->esp_sadb.s_acqfn = NULL; 564 espstack->esp_sadb.s_acquire_timeout = NULL; 565 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); 566 ip_drop_unregister(&espstack->esp_dropper); 567 mutex_destroy(&espstack->ipsecesp_param_lock); 568 nd_free(&espstack->ipsecesp_g_nd); 569 570 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); 571 espstack->ipsecesp_params = NULL; 572 kstat_delete_netstack(espstack->esp_ksp, stackid); 573 espstack->esp_ksp = NULL; 574 espstack->esp_kstats = NULL; 575 kmem_free(espstack, sizeof (*espstack)); 576 } 577 578 /* 579 * ESP module open routine. 580 */ 581 /* ARGSUSED */ 582 static int 583 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 584 { 585 netstack_t *ns; 586 ipsecesp_stack_t *espstack; 587 588 if (secpolicy_ip_config(credp, B_FALSE) != 0) 589 return (EPERM); 590 591 if (q->q_ptr != NULL) 592 return (0); /* Re-open of an already open instance. */ 593 594 if (sflag != MODOPEN) 595 return (EINVAL); 596 597 ns = netstack_find_by_cred(credp); 598 ASSERT(ns != NULL); 599 espstack = ns->netstack_ipsecesp; 600 ASSERT(espstack != NULL); 601 602 /* 603 * ASSUMPTIONS (because I'm MT_OCEXCL): 604 * 605 * * I'm being pushed on top of IP for all my opens (incl. #1). 606 * * Only ipsecesp_open() can write into esp_sadb.s_ip_q. 607 * * Because of this, I can check lazily for esp_sadb.s_ip_q. 608 * 609 * If these assumptions are wrong, I'm in BIG trouble... 610 */ 611 612 q->q_ptr = espstack; 613 WR(q)->q_ptr = q->q_ptr; 614 615 if (espstack->esp_sadb.s_ip_q == NULL) { 616 struct T_unbind_req *tur; 617 618 espstack->esp_sadb.s_ip_q = WR(q); 619 /* Allocate an unbind... */ 620 espstack->esp_ip_unbind = allocb(sizeof (struct T_unbind_req), 621 BPRI_HI); 622 623 /* 624 * Send down T_BIND_REQ to bind IPPROTO_ESP. 625 * Handle the ACK here in ESP. 626 */ 627 qprocson(q); 628 if (espstack->esp_ip_unbind == NULL || 629 !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { 630 if (espstack->esp_ip_unbind != NULL) { 631 freeb(espstack->esp_ip_unbind); 632 espstack->esp_ip_unbind = NULL; 633 } 634 q->q_ptr = NULL; 635 netstack_rele(espstack->ipsecesp_netstack); 636 return (ENOMEM); 637 } 638 639 espstack->esp_ip_unbind->b_datap->db_type = M_PROTO; 640 tur = (struct T_unbind_req *)espstack->esp_ip_unbind->b_rptr; 641 tur->PRIM_type = T_UNBIND_REQ; 642 } else { 643 qprocson(q); 644 } 645 646 /* 647 * For now, there's not much I can do. I'll be getting a message 648 * passed down to me from keysock (in my wput), and a T_BIND_ACK 649 * up from IP (in my rput). 650 */ 651 652 return (0); 653 } 654 655 /* 656 * ESP module close routine. 657 */ 658 static int 659 ipsecesp_close(queue_t *q) 660 { 661 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 662 663 /* 664 * If esp_sadb.s_ip_q is attached to this instance, send a 665 * T_UNBIND_REQ to IP for the instance before doing 666 * a qprocsoff(). 667 */ 668 if (WR(q) == espstack->esp_sadb.s_ip_q && 669 espstack->esp_ip_unbind != NULL) { 670 putnext(WR(q), espstack->esp_ip_unbind); 671 espstack->esp_ip_unbind = NULL; 672 } 673 674 /* 675 * Clean up q_ptr, if needed. 676 */ 677 qprocsoff(q); 678 679 /* Keysock queue check is safe, because of OCEXCL perimeter. */ 680 681 if (q == espstack->esp_pfkey_q) { 682 esp1dbg(espstack, 683 ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); 684 espstack->esp_pfkey_q = NULL; 685 /* Detach qtimeouts. */ 686 (void) quntimeout(q, espstack->esp_event); 687 } 688 689 if (WR(q) == espstack->esp_sadb.s_ip_q) { 690 /* 691 * If the esp_sadb.s_ip_q is attached to this instance, find 692 * another. The OCEXCL outer perimeter helps us here. 693 */ 694 espstack->esp_sadb.s_ip_q = NULL; 695 696 /* 697 * Find a replacement queue for esp_sadb.s_ip_q. 698 */ 699 if (espstack->esp_pfkey_q != NULL && 700 espstack->esp_pfkey_q != RD(q)) { 701 /* 702 * See if we can use the pfkey_q. 703 */ 704 espstack->esp_sadb.s_ip_q = WR(espstack->esp_pfkey_q); 705 } 706 707 if (espstack->esp_sadb.s_ip_q == NULL || 708 !sadb_t_bind_req(espstack->esp_sadb.s_ip_q, IPPROTO_ESP)) { 709 esp1dbg(espstack, ("ipsecesp: Can't reassign ip_q.\n")); 710 espstack->esp_sadb.s_ip_q = NULL; 711 } else { 712 espstack->esp_ip_unbind = 713 allocb(sizeof (struct T_unbind_req), BPRI_HI); 714 715 if (espstack->esp_ip_unbind != NULL) { 716 struct T_unbind_req *tur; 717 718 espstack->esp_ip_unbind->b_datap->db_type = 719 M_PROTO; 720 tur = (struct T_unbind_req *) 721 espstack->esp_ip_unbind->b_rptr; 722 tur->PRIM_type = T_UNBIND_REQ; 723 } 724 /* If it's NULL, I can't do much here. */ 725 } 726 } 727 728 netstack_rele(espstack->ipsecesp_netstack); 729 return (0); 730 } 731 732 /* 733 * Add a number of bytes to what the SA has protected so far. Return 734 * B_TRUE if the SA can still protect that many bytes. 735 * 736 * Caller must REFRELE the passed-in assoc. This function must REFRELE 737 * any obtained peer SA. 738 */ 739 static boolean_t 740 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) 741 { 742 ipsa_t *inassoc, *outassoc; 743 isaf_t *bucket; 744 boolean_t inrc, outrc, isv6; 745 sadb_t *sp; 746 int outhash; 747 netstack_t *ns = assoc->ipsa_netstack; 748 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 749 750 /* No peer? No problem! */ 751 if (!assoc->ipsa_haspeer) { 752 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, 753 B_TRUE)); 754 } 755 756 /* 757 * Otherwise, we want to grab both the original assoc and its peer. 758 * There might be a race for this, but if it's a real race, two 759 * expire messages may occur. We limit this by only sending the 760 * expire message on one of the peers, we'll pick the inbound 761 * arbitrarily. 762 * 763 * If we need tight synchronization on the peer SA, then we need to 764 * reconsider. 765 */ 766 767 /* Use address length to select IPv6/IPv4 */ 768 isv6 = (assoc->ipsa_addrfam == AF_INET6); 769 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 770 771 if (inbound) { 772 inassoc = assoc; 773 if (isv6) { 774 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 775 &inassoc->ipsa_dstaddr)); 776 } else { 777 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 778 &inassoc->ipsa_dstaddr)); 779 } 780 bucket = &sp->sdb_of[outhash]; 781 mutex_enter(&bucket->isaf_lock); 782 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 783 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 784 inassoc->ipsa_addrfam); 785 mutex_exit(&bucket->isaf_lock); 786 if (outassoc == NULL) { 787 /* Q: Do we wish to set haspeer == B_FALSE? */ 788 esp0dbg(("esp_age_bytes: " 789 "can't find peer for inbound.\n")); 790 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, 791 bytes, B_TRUE)); 792 } 793 } else { 794 outassoc = assoc; 795 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 796 mutex_enter(&bucket->isaf_lock); 797 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 798 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 799 outassoc->ipsa_addrfam); 800 mutex_exit(&bucket->isaf_lock); 801 if (inassoc == NULL) { 802 /* Q: Do we wish to set haspeer == B_FALSE? */ 803 esp0dbg(("esp_age_bytes: " 804 "can't find peer for outbound.\n")); 805 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, 806 bytes, B_TRUE)); 807 } 808 } 809 810 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); 811 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); 812 813 /* 814 * REFRELE any peer SA. 815 * 816 * Because of the multi-line macro nature of IPSA_REFRELE, keep 817 * them in { }. 818 */ 819 if (inbound) { 820 IPSA_REFRELE(outassoc); 821 } else { 822 IPSA_REFRELE(inassoc); 823 } 824 825 return (inrc && outrc); 826 } 827 828 /* 829 * Do incoming NAT-T manipulations for packet. 830 */ 831 static ipsec_status_t 832 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) 833 { 834 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 835 tcpha_t *tcph; 836 udpha_t *udpha; 837 /* Initialize to our inbound cksum adjustment... */ 838 uint32_t sum = assoc->ipsa_inbound_cksum; 839 840 switch (ipha->ipha_protocol) { 841 case IPPROTO_TCP: 842 tcph = (tcpha_t *)(data_mp->b_rptr + 843 IPH_HDR_LENGTH(ipha)); 844 845 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) 846 sum += ~ntohs(tcph->tha_sum) & 0xFFFF; 847 DOWN_SUM(sum); 848 DOWN_SUM(sum); 849 tcph->tha_sum = ~htons(sum); 850 break; 851 case IPPROTO_UDP: 852 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha)); 853 854 if (udpha->uha_checksum != 0) { 855 /* Adujst if the inbound one was not zero. */ 856 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF; 857 DOWN_SUM(sum); 858 DOWN_SUM(sum); 859 udpha->uha_checksum = ~htons(sum); 860 if (udpha->uha_checksum == 0) 861 udpha->uha_checksum = 0xFFFF; 862 } 863 #undef DOWN_SUM 864 break; 865 case IPPROTO_IP: 866 /* 867 * This case is only an issue for self-encapsulated 868 * packets. So for now, fall through. 869 */ 870 break; 871 } 872 return (IPSEC_STATUS_SUCCESS); 873 } 874 875 876 /* 877 * Strip ESP header, check padding, and fix IP header. 878 * Returns B_TRUE on success, B_FALSE if an error occured. 879 */ 880 static boolean_t 881 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 882 kstat_named_t **counter, ipsecesp_stack_t *espstack) 883 { 884 ipha_t *ipha; 885 ip6_t *ip6h; 886 uint_t divpoint; 887 mblk_t *scratch; 888 uint8_t nexthdr, padlen; 889 uint8_t lastpad; 890 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 891 uint8_t *lastbyte; 892 893 /* 894 * Strip ESP data and fix IP header. 895 * 896 * XXX In case the beginning of esp_inbound() changes to not do a 897 * pullup, this part of the code can remain unchanged. 898 */ 899 if (isv4) { 900 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); 901 ipha = (ipha_t *)data_mp->b_rptr; 902 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) + 903 IPH_HDR_LENGTH(ipha)); 904 divpoint = IPH_HDR_LENGTH(ipha); 905 } else { 906 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); 907 ip6h = (ip6_t *)data_mp->b_rptr; 908 divpoint = ip_hdr_length_v6(data_mp, ip6h); 909 } 910 911 scratch = data_mp; 912 while (scratch->b_cont != NULL) 913 scratch = scratch->b_cont; 914 915 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3); 916 917 /* 918 * "Next header" and padding length are the last two bytes in the 919 * ESP-protected datagram, thus the explicit - 1 and - 2. 920 * lastpad is the last byte of the padding, which can be used for 921 * a quick check to see if the padding is correct. 922 */ 923 lastbyte = scratch->b_wptr - 1; 924 nexthdr = *lastbyte--; 925 padlen = *lastbyte--; 926 927 if (isv4) { 928 /* Fix part of the IP header. */ 929 ipha->ipha_protocol = nexthdr; 930 /* 931 * Reality check the padlen. The explicit - 2 is for the 932 * padding length and the next-header bytes. 933 */ 934 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - 935 sizeof (esph_t) - ivlen) { 936 ESP_BUMP_STAT(espstack, bad_decrypt); 937 ipsec_rl_strlog(espstack->ipsecesp_netstack, 938 info.mi_idnum, 0, 0, 939 SL_ERROR | SL_WARN, 940 "Corrupt ESP packet (padlen too big).\n"); 941 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 942 padlen)); 943 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " 944 "hdr - ivlen(%d) = %d.\n", 945 ntohs(ipha->ipha_length), ivlen, 946 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - 947 2 - sizeof (esph_t) - ivlen))); 948 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 949 return (B_FALSE); 950 } 951 952 /* 953 * Fix the rest of the header. The explicit - 2 is for the 954 * padding length and the next-header bytes. 955 */ 956 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen - 957 2 - sizeof (esph_t) - ivlen); 958 ipha->ipha_hdr_checksum = 0; 959 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 960 } else { 961 if (ip6h->ip6_nxt == IPPROTO_ESP) { 962 ip6h->ip6_nxt = nexthdr; 963 } else { 964 ip6_pkt_t ipp; 965 966 bzero(&ipp, sizeof (ipp)); 967 (void) ip_find_hdr_v6(data_mp, ip6h, &ipp, NULL); 968 if (ipp.ipp_dstopts != NULL) { 969 ipp.ipp_dstopts->ip6d_nxt = nexthdr; 970 } else if (ipp.ipp_rthdr != NULL) { 971 ipp.ipp_rthdr->ip6r_nxt = nexthdr; 972 } else if (ipp.ipp_hopopts != NULL) { 973 ipp.ipp_hopopts->ip6h_nxt = nexthdr; 974 } else { 975 /* Panic a DEBUG kernel. */ 976 ASSERT(ipp.ipp_hopopts != NULL); 977 /* Otherwise, pretend it's IP + ESP. */ 978