Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/socket.h>
     30 #include <sys/ksynch.h>
     31 #include <sys/kmem.h>
     32 #include <sys/errno.h>
     33 #include <sys/systm.h>
     34 #include <sys/sysmacros.h>
     35 #include <sys/cmn_err.h>
     36 #include <sys/strsun.h>
     37 #include <sys/zone.h>
     38 #include <netinet/in.h>
     39 #include <inet/common.h>
     40 #include <inet/ip.h>
     41 #include <inet/ip6.h>
     42 #include <inet/ip6_asp.h>
     43 #include <inet/ip_ire.h>
     44 #include <inet/ipclassifier.h>
     45 
     46 #define	IN6ADDR_MASK128_INIT \
     47 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }
     48 #define	IN6ADDR_MASK96_INIT	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 }
     49 #ifdef _BIG_ENDIAN
     50 #define	IN6ADDR_MASK16_INIT	{ 0xffff0000U, 0, 0, 0 }
     51 #else
     52 #define	IN6ADDR_MASK16_INIT	{ 0x0000ffffU, 0, 0, 0 }
     53 #endif
     54 
     55 
     56 /*
     57  * This table is ordered such that longest prefix matches are hit first
     58  * (longer prefix lengths first).  The last entry must be the "default"
     59  * entry (::0/0).
     60  */
     61 static ip6_asp_t default_ip6_asp_table[] = {
     62 	{ IN6ADDR_LOOPBACK_INIT,	IN6ADDR_MASK128_INIT,
     63 	    "Loopback", 50 },
     64 	{ IN6ADDR_ANY_INIT,		IN6ADDR_MASK96_INIT,
     65 	    "IPv4_Compatible", 20 },
     66 #ifdef _BIG_ENDIAN
     67 	{ { 0, 0, 0x0000ffffU, 0 },	IN6ADDR_MASK96_INIT,
     68 	    "IPv4", 10 },
     69 	{ { 0x20020000U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
     70 	    "6to4", 30 },
     71 #else
     72 	{ { 0, 0, 0xffff0000U, 0 },	IN6ADDR_MASK96_INIT,
     73 	    "IPv4", 10 },
     74 	{ { 0x00000220U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
     75 	    "6to4", 30 },
     76 #endif
     77 	{ IN6ADDR_ANY_INIT,		IN6ADDR_ANY_INIT,
     78 	    "Default", 40 }
     79 };
     80 
     81 /*
     82  * The IPv6 Default Address Selection policy table.
     83  * Until someone up above reconfigures the policy table, use the global
     84  * default.  The table needs no lock since the only way to alter it is
     85  * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
     86  */
     87 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t);
     88 static void ip6_asp_check_for_updates(ip_stack_t *);
     89 
     90 void
     91 ip6_asp_init(ip_stack_t *ipst)
     92 {
     93 	/* Initialize the table lock */
     94 	mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL);
     95 
     96 	ipst->ips_ip6_asp_table = default_ip6_asp_table;
     97 
     98 	ipst->ips_ip6_asp_table_count =
     99 	    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
    100 }
    101 
    102 void
    103 ip6_asp_free(ip_stack_t *ipst)
    104 {
    105 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
    106 		kmem_free(ipst->ips_ip6_asp_table,
    107 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
    108 		ipst->ips_ip6_asp_table = NULL;
    109 	}
    110 	mutex_destroy(&ipst->ips_ip6_asp_lock);
    111 }
    112 
    113 /*
    114  * Return false if the table is being updated. Else, increment the ref
    115  * count and return true.
    116  */
    117 boolean_t
    118 ip6_asp_can_lookup(ip_stack_t *ipst)
    119 {
    120 	mutex_enter(&ipst->ips_ip6_asp_lock);
    121 	if (ipst->ips_ip6_asp_uip) {
    122 		mutex_exit(&ipst->ips_ip6_asp_lock);
    123 		return (B_FALSE);
    124 	}
    125 	IP6_ASP_TABLE_REFHOLD(ipst);
    126 	mutex_exit(&ipst->ips_ip6_asp_lock);
    127 	return (B_TRUE);
    128 
    129 }
    130 
    131 void
    132 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func)
    133 {
    134 	conn_t	*connp = Q_TO_CONN(q);
    135 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
    136 
    137 	ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) &&
    138 	    (mp->b_next == NULL));
    139 	mp->b_queue = (void *)q;
    140 	mp->b_prev = (void *)func;
    141 	mp->b_next = NULL;
    142 
    143 	mutex_enter(&ipst->ips_ip6_asp_lock);
    144 	if (ipst->ips_ip6_asp_pending_ops == NULL) {
    145 		ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL);
    146 		ipst->ips_ip6_asp_pending_ops =
    147 		    ipst->ips_ip6_asp_pending_ops_tail = mp;
    148 	} else {
    149 		ipst->ips_ip6_asp_pending_ops_tail->b_next = mp;
    150 		ipst->ips_ip6_asp_pending_ops_tail = mp;
    151 	}
    152 	mutex_exit(&ipst->ips_ip6_asp_lock);
    153 }
    154 
    155 static void
    156 ip6_asp_complete_op(ip_stack_t *ipst)
    157 {
    158 	mblk_t		*mp;
    159 	queue_t		*q;
    160 	aspfunc_t	func;
    161 
    162 	mutex_enter(&ipst->ips_ip6_asp_lock);
    163 	while (ipst->ips_ip6_asp_pending_ops != NULL) {
    164 		mp = ipst->ips_ip6_asp_pending_ops;
    165 		ipst->ips_ip6_asp_pending_ops = mp->b_next;
    166 		mp->b_next = NULL;
    167 		if (ipst->ips_ip6_asp_pending_ops == NULL)
    168 			ipst->ips_ip6_asp_pending_ops_tail = NULL;
    169 		mutex_exit(&ipst->ips_ip6_asp_lock);
    170 
    171 		q = (queue_t *)mp->b_queue;
    172 		func = (aspfunc_t)mp->b_prev;
    173 
    174 		mp->b_prev = NULL;
    175 		mp->b_queue = NULL;
    176 
    177 
    178 		(*func)(NULL, q, mp, NULL);
    179 		mutex_enter(&ipst->ips_ip6_asp_lock);
    180 	}
    181 	mutex_exit(&ipst->ips_ip6_asp_lock);
    182 }
    183 
    184 /*
    185  * Decrement reference count. When it gets to 0, we check for (pending)
    186  * saved update to the table, if any.
    187  */
    188 void
    189 ip6_asp_table_refrele(ip_stack_t *ipst)
    190 {
    191 	IP6_ASP_TABLE_REFRELE(ipst);
    192 }
    193 
    194 /*
    195  * This function is guaranteed never to return a NULL pointer.  It
    196  * will always return information from one of the entries in the
    197  * asp_table (which will never be empty).  If a pointer is passed
    198  * in for the precedence, the precedence value will be set; a
    199  * pointer to the label will be returned by the function.
    200  *
    201  * Since the table is only anticipated to have five or six entries
    202  * total, the lookup algorithm hasn't been optimized to anything
    203  * better than O(n).
    204  */
    205 char *
    206 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst)
    207 {
    208 	ip6_asp_t *aspp;
    209 	ip6_asp_t *match = NULL;
    210 	ip6_asp_t *default_policy;
    211 
    212 	aspp = ipst->ips_ip6_asp_table;
    213 	/* The default entry must always be the last one */
    214 	default_policy = aspp + ipst->ips_ip6_asp_table_count - 1;
    215 
    216 	while (match == NULL) {
    217 		if (aspp == default_policy) {
    218 			match = aspp;
    219 		} else {
    220 			if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask,
    221 			    aspp->ip6_asp_prefix))
    222 				match = aspp;
    223 			else
    224 				aspp++;
    225 		}
    226 	}
    227 
    228 	if (precedence != NULL)
    229 		*precedence = match->ip6_asp_precedence;
    230 	return (match->ip6_asp_label);
    231 }
    232 
    233 /*
    234  * If we had deferred updating the table because of outstanding references,
    235  * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
    236  * ip_sioctl_ip6addrpolicy() has already done it for us.
    237  */
    238 void
    239 ip6_asp_check_for_updates(ip_stack_t *ipst)
    240 {
    241 	ip6_asp_t *table;
    242 	size_t	table_size;
    243 	mblk_t	*data_mp, *mp;
    244 	struct iocblk *iocp;
    245 
    246 	mutex_enter(&ipst->ips_ip6_asp_lock);
    247 	if (ipst->ips_ip6_asp_pending_update == NULL ||
    248 	    ipst->ips_ip6_asp_refcnt > 0) {
    249 		mutex_exit(&ipst->ips_ip6_asp_lock);
    250 		return;
    251 	}
    252 
    253 	mp = ipst->ips_ip6_asp_pending_update;
    254 	ipst->ips_ip6_asp_pending_update = NULL;
    255 	ASSERT(mp->b_prev != NULL);
    256 
    257 	ipst->ips_ip6_asp_uip = B_TRUE;
    258 
    259 	iocp = (struct iocblk *)mp->b_rptr;
    260 	data_mp = mp->b_cont;
    261 	if (data_mp == NULL) {
    262 		table = NULL;
    263 		table_size = iocp->ioc_count;
    264 	} else {
    265 		table = (ip6_asp_t *)data_mp->b_rptr;
    266 		table_size = iocp->ioc_count;
    267 	}
    268 
    269 	ip6_asp_replace(mp, table, table_size, B_TRUE, ipst,
    270 	    iocp->ioc_flag & IOC_MODELS);
    271 }
    272 
    273 /*
    274  * ip6_asp_replace replaces the contents of the IPv6 address selection
    275  * policy table with those specified in new_table.  If new_table is NULL,
    276  * this indicates that the caller wishes ip to use the default policy
    277  * table.  The caller is responsible for making sure that there are exactly
    278  * new_count policy entries in new_table.
    279  */
    280 /*ARGSUSED5*/
    281 void
    282 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size,
    283     boolean_t locked, ip_stack_t *ipst, model_t datamodel)
    284 {
    285 	int			ret_val = 0;
    286 	ip6_asp_t		*tmp_table;
    287 	uint_t			count;
    288 	queue_t			*q;
    289 	struct iocblk		*iocp;
    290 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
    291 	size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel);
    292 #else
    293 	const size_t ip6_asp_size = sizeof (ip6_asp_t);
    294 #endif
    295 
    296 	if (new_size % ip6_asp_size != 0) {
    297 		ip1dbg(("ip6_asp_replace: invalid table size\n"));
    298 		ret_val = EINVAL;
    299 		if (locked)
    300 			goto unlock_end;
    301 		goto replace_end;
    302 	} else {
    303 		count = new_size / ip6_asp_size;
    304 	}
    305 
    306 
    307 	if (!locked)
    308 		mutex_enter(&ipst->ips_ip6_asp_lock);
    309 	/*
    310 	 * Check if we are in the process of creating any IRE using the
    311 	 * current information. If so, wait till that is done.
    312 	 */
    313 	if (!locked && ipst->ips_ip6_asp_refcnt > 0) {
    314 		/* Save this request for later processing */
    315 		if (ipst->ips_ip6_asp_pending_update == NULL) {
    316 			ipst->ips_ip6_asp_pending_update = mp;
    317 		} else {
    318 			/* Let's not queue multiple requests for now */
    319 			ip1dbg(("ip6_asp_replace: discarding request\n"));
    320 			mutex_exit(&ipst->ips_ip6_asp_lock);
    321 			ret_val =  EAGAIN;
    322 			goto replace_end;
    323 		}
    324 		mutex_exit(&ipst->ips_ip6_asp_lock);
    325 		return;
    326 	}
    327 
    328 	/* Prevent lookups till the table have been updated */
    329 	if (!locked)
    330 		ipst->ips_ip6_asp_uip = B_TRUE;
    331 
    332 	ASSERT(ipst->ips_ip6_asp_refcnt == 0);
    333 
    334 	if (new_table == NULL) {
    335 		/*
    336 		 * This is a special case.  The user wants to revert
    337 		 * back to using the default table.
    338 		 */
    339 		if (ipst->ips_ip6_asp_table == default_ip6_asp_table)
    340 			goto unlock_end;
    341 
    342 		kmem_free(ipst->ips_ip6_asp_table,
    343 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
    344 		ipst->ips_ip6_asp_table = default_ip6_asp_table;
    345 		ipst->ips_ip6_asp_table_count =
    346 		    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
    347 		goto unlock_end;
    348 	}
    349 
    350 	if (count == 0) {
    351 		ret_val = EINVAL;
    352 		ip1dbg(("ip6_asp_replace: empty table\n"));
    353 		goto unlock_end;
    354 	}
    355 
    356 	if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) ==
    357 	    NULL) {
    358 		ret_val = ENOMEM;
    359 		goto unlock_end;
    360 	}
    361 
    362 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
    363 
    364 	/*
    365 	 * If 'new_table' -actually- originates from a 32-bit process
    366 	 * then the nicely aligned ip6_asp_label array will be
    367 	 * subtlely misaligned on this kernel, because the structure
    368 	 * is 8 byte aligned in the kernel, but only 4 byte aligned in
    369 	 * userland.  Fix it up here.
    370 	 *
    371 	 * XX64	See the notes in ip_sioctl_ip6addrpolicy.  Perhaps we could
    372 	 *	do the datamodel transformation (below) there instead of here?
    373 	 */
    374 	if (datamodel == IOC_ILP32) {
    375 		ip6_asp_t *dst;
    376 		ip6_asp32_t *src;
    377 		int i;
    378 
    379 		if ((dst = kmem_zalloc(count * sizeof (*dst),
    380 		    KM_NOSLEEP)) == NULL) {
    381 			kmem_free(tmp_table, count * sizeof (ip6_asp_t));
    382 			ret_val = ENOMEM;
    383 			goto unlock_end;
    384 		}
    385 
    386 		/*
    387 		 * Copy each element of the table from ip6_asp32_t
    388 		 * format into ip6_asp_t format.  Fortunately, since
    389 		 * we're just dealing with a trailing structure pad,
    390 		 * we can do this straightforwardly with a flurry of
    391 		 * bcopying.
    392 		 */
    393 		src = (void *)new_table;
    394 		for (i = 0; i < count; i++)
    395 			bcopy(src + i, dst + i, sizeof (*src));
    396 
    397 		ip6_asp_copy(dst, tmp_table, count);
    398 		kmem_free(dst, count * sizeof (*dst));
    399 	} else
    400 #endif
    401 		ip6_asp_copy(new_table, tmp_table, count);
    402 
    403 	/* Make sure the last entry is the default entry */
    404 	if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) ||
    405 	    !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) {
    406 		ret_val = EINVAL;
    407 		kmem_free(tmp_table, count * sizeof (ip6_asp_t));
    408 		ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
    409 		goto unlock_end;
    410 	}
    411 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
    412 		kmem_free(ipst->ips_ip6_asp_table,
    413 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
    414 	}
    415 	ipst->ips_ip6_asp_table = tmp_table;
    416 	ipst->ips_ip6_asp_table_count = count;
    417 
    418 	/*
    419 	 * The user has changed the address selection policy table.  IPv6
    420 	 * source address selection for existing IRE_CACHE and
    421 	 * RTF_DYNAMIC entries used the old table, so we need to
    422 	 * clear the cache.
    423 	 */
    424 	ire_walk_v6(ire_delete_cache_v6, NULL, ALL_ZONES, ipst);
    425 
    426 unlock_end:
    427 	ipst->ips_ip6_asp_uip = B_FALSE;
    428 	mutex_exit(&ipst->ips_ip6_asp_lock);
    429 
    430 replace_end:
    431 	/* Reply to the ioctl */
    432 	q = (queue_t *)mp->b_prev;
    433 	mp->b_prev = NULL;
    434 	if (q == NULL) {
    435 		freemsg(mp);
    436 		goto check_binds;
    437 	}
    438 	iocp = (struct iocblk *)mp->b_rptr;
    439 	iocp->ioc_error = ret_val;
    440 	iocp->ioc_count = 0;
    441 	DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK;
    442 	qreply(q, mp);
    443 check_binds:
    444 	ip6_asp_complete_op(ipst);
    445 }
    446 
    447 /*
    448  * Copies the contents of src_table to dst_table, and sorts the
    449  * entries in decending order of prefix lengths.  It assumes that both
    450  * tables are appropriately sized to contain count entries.
    451  */
    452 static void
    453 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count)
    454 {
    455 	ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp;
    456 
    457 	dst_table[0] = src_table[0];
    458 	if (count == 1)
    459 		return;
    460 
    461 	/*
    462 	 * Sort the entries in descending order of prefix lengths.
    463 	 *
    464 	 * Note: this should be a small table.  In 99% of cases, we
    465 	 * expect the table to have 5 entries.  In the remaining 1%
    466 	 * of cases, we expect the table to have one or two more
    467 	 * entries.  It would be very rare for the table to have
    468 	 * double-digit entries.
    469 	 */
    470 	src_limit = src_table + count;
    471 	dst_limit = dst_table + 1;
    472 	for (src_ptr = src_table + 1; src_ptr != src_limit;
    473 	    src_ptr++, dst_limit++) {
    474 		for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) {
    475 			if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) >
    476 			    ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) {
    477 				/*
    478 				 * Make room to insert the source entry
    479 				 * before dst_ptr by shifting entries to
    480 				 * the right.
    481 				 */
    482 				for (dp = dst_limit - 1; dp >= dst_ptr; dp--)
    483 					*(dp + 1) = *dp;
    484 				break;
    485 			}
    486 		}
    487 		*dst_ptr = *src_ptr;
    488 	}
    489 }
    490 
    491 /*
    492  * This function copies as many entries from ip6_asp_table as will fit
    493  * into dtable.  The dtable_size parameter is the size of dtable
    494  * in bytes.  This function returns the number of entries in
    495  * ip6_asp_table, even if it's not able to fit all of the entries into
    496  * dtable.
    497  */
    498 int
    499 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst)
    500 {
    501 	uint_t dtable_count;
    502 
    503 	if (dtable != NULL) {
    504 		if (dtable_size < sizeof (ip6_asp_t))
    505 			return (-1);
    506 
    507 		dtable_count = dtable_size / sizeof (ip6_asp_t);
    508 		bcopy(ipst->ips_ip6_asp_table, dtable,
    509 		    MIN(ipst->ips_ip6_asp_table_count, dtable_count) *
    510 		    sizeof (ip6_asp_t));
    511 	}
    512 
    513 	return (ipst->ips_ip6_asp_table_count);
    514 }
    515 
    516 /*
    517  * Compare two labels.  Return B_TRUE if they are equal, B_FALSE
    518  * otherwise.
    519  */
    520 boolean_t
    521 ip6_asp_labelcmp(const char *label1, const char *label2)
    522 {
    523 	int64_t *llptr1, *llptr2;
    524 
    525 	/*
    526 	 * The common case, the two labels are actually the same string
    527 	 * from the policy table.
    528 	 */
    529 	if (label1 == label2)
    530 		return (B_TRUE);
    531 
    532 	/*
    533 	 * Since we know the labels are at most 16 bytes long, compare
    534 	 * the two strings as two 8-byte long integers.  The ip6_asp_t
    535 	 * structure guarantees that the labels are 8 byte alligned.
    536 	 */
    537 	llptr1 = (int64_t *)label1;
    538 	llptr2 = (int64_t *)label2;
    539 	if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1])
    540 		return (B_TRUE);
    541 	return (B_FALSE);
    542 }
    543