Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/kmem.h>
     27 #include <sys/errno.h>
     28 #include <sys/systm.h>
     29 #include <sys/cmn_err.h>
     30 #include <sys/brand.h>
     31 #include <sys/machbrand.h>
     32 #include <sys/modctl.h>
     33 #include <sys/rwlock.h>
     34 #include <sys/zone.h>
     35 
     36 #define	SUPPORTED_BRAND_VERSION BRAND_VER_1
     37 
     38 #if defined(__sparcv9)
     39 /* sparcv9 uses system wide brand interposition hooks */
     40 static void brand_plat_interposition_enable(void);
     41 static void brand_plat_interposition_disable(void);
     42 
     43 struct brand_mach_ops native_mach_ops  = {
     44 		NULL, NULL
     45 };
     46 #else /* !__sparcv9 */
     47 struct brand_mach_ops native_mach_ops  = {
     48 		NULL, NULL, NULL, NULL, NULL, NULL
     49 };
     50 #endif /* !__sparcv9 */
     51 
     52 brand_t native_brand = {
     53 		BRAND_VER_1,
     54 		"native",
     55 		NULL,
     56 		&native_mach_ops
     57 };
     58 
     59 /*
     60  * Used to maintain a list of all the brands currently loaded into the
     61  * kernel.
     62  */
     63 struct brand_list {
     64 	int			bl_refcnt;
     65 	struct brand_list	*bl_next;
     66 	brand_t			*bl_brand;
     67 };
     68 
     69 static struct brand_list *brand_list = NULL;
     70 
     71 /*
     72  * This lock protects the integrity of the brand list.
     73  */
     74 static kmutex_t brand_list_lock;
     75 
     76 void
     77 brand_init()
     78 {
     79 	mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
     80 	p0.p_brand = &native_brand;
     81 }
     82 
     83 int
     84 brand_register(brand_t *brand)
     85 {
     86 	struct brand_list *list, *scan;
     87 
     88 	if (brand == NULL)
     89 		return (EINVAL);
     90 
     91 	if (brand->b_version != SUPPORTED_BRAND_VERSION) {
     92 		if (brand->b_version < SUPPORTED_BRAND_VERSION) {
     93 			cmn_err(CE_WARN,
     94 			    "brand '%s' was built to run on older versions "
     95 			    "of Solaris.",
     96 			    brand->b_name);
     97 		} else {
     98 			cmn_err(CE_WARN,
     99 			    "brand '%s' was built to run on a newer version "
    100 			    "of Solaris.",
    101 			    brand->b_name);
    102 		}
    103 		return (EINVAL);
    104 	}
    105 
    106 	/* Sanity checks */
    107 	if (brand->b_name == NULL || brand->b_ops == NULL ||
    108 	    brand->b_ops->b_brandsys == NULL) {
    109 		cmn_err(CE_WARN, "Malformed brand");
    110 		return (EINVAL);
    111 	}
    112 
    113 	list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
    114 
    115 	/* Add the brand to the list of loaded brands. */
    116 	mutex_enter(&brand_list_lock);
    117 
    118 	/*
    119 	 * Check to be sure we haven't already registered this brand.
    120 	 */
    121 	for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
    122 		if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
    123 			cmn_err(CE_WARN,
    124 			    "Invalid attempt to load a second instance of "
    125 			    "brand %s", brand->b_name);
    126 			mutex_exit(&brand_list_lock);
    127 			kmem_free(list, sizeof (struct brand_list));
    128 			return (EINVAL);
    129 		}
    130 	}
    131 
    132 #if defined(__sparcv9)
    133 	/* sparcv9 uses system wide brand interposition hooks */
    134 	if (brand_list == NULL)
    135 		brand_plat_interposition_enable();
    136 #endif /* __sparcv9 */
    137 
    138 	list->bl_brand = brand;
    139 	list->bl_refcnt = 0;
    140 	list->bl_next = brand_list;
    141 	brand_list = list;
    142 
    143 	mutex_exit(&brand_list_lock);
    144 
    145 	return (0);
    146 }
    147 
    148 /*
    149  * The kernel module implementing this brand is being unloaded, so remove
    150  * it from the list of active brands.
    151  */
    152 int
    153 brand_unregister(brand_t *brand)
    154 {
    155 	struct brand_list *list, *prev;
    156 
    157 	/* Sanity checks */
    158 	if (brand == NULL || brand->b_name == NULL) {
    159 		cmn_err(CE_WARN, "Malformed brand");
    160 		return (EINVAL);
    161 	}
    162 
    163 	prev = NULL;
    164 	mutex_enter(&brand_list_lock);
    165 
    166 	for (list = brand_list; list != NULL; list = list->bl_next) {
    167 		if (list->bl_brand == brand)
    168 			break;
    169 		prev = list;
    170 	}
    171 
    172 	if (list == NULL) {
    173 		cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
    174 		mutex_exit(&brand_list_lock);
    175 		return (EINVAL);
    176 	}
    177 
    178 	if (list->bl_refcnt > 0) {
    179 		cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
    180 		    brand->b_name);
    181 		mutex_exit(&brand_list_lock);
    182 		return (EBUSY);
    183 	}
    184 
    185 	/* Remove brand from the list */
    186 	if (prev != NULL)
    187 		prev->bl_next = list->bl_next;
    188 	else
    189 		brand_list = list->bl_next;
    190 
    191 #if defined(__sparcv9)
    192 	/* sparcv9 uses system wide brand interposition hooks */
    193 	if (brand_list == NULL)
    194 		brand_plat_interposition_disable();
    195 #endif /* __sparcv9 */
    196 
    197 	mutex_exit(&brand_list_lock);
    198 
    199 	kmem_free(list, sizeof (struct brand_list));
    200 
    201 	return (0);
    202 }
    203 
    204 /*
    205  * Record that a zone of this brand has been instantiated.  If the kernel
    206  * module implementing this brand's functionality is not present, this
    207  * routine attempts to load the module as a side effect.
    208  */
    209 brand_t *
    210 brand_register_zone(struct brand_attr *attr)
    211 {
    212 	struct brand_list *l = NULL;
    213 	ddi_modhandle_t	hdl = NULL;
    214 	char *modname;
    215 	int err = 0;
    216 
    217 	if (is_system_labeled()) {
    218 		cmn_err(CE_WARN,
    219 		    "Branded zones are not allowed on labeled systems.");
    220 		return (NULL);
    221 	}
    222 
    223 	/*
    224 	 * We make at most two passes through this loop.  The first time
    225 	 * through, we're looking to see if this is a new user of an
    226 	 * already loaded brand.  If the brand hasn't been loaded, we
    227 	 * call ddi_modopen() to force it to be loaded and then make a
    228 	 * second pass through the list of brands.  If we don't find the
    229 	 * brand the second time through it means that the modname
    230 	 * specified in the brand_attr structure doesn't provide the brand
    231 	 * specified in the brandname field.  This would suggest a bug in
    232 	 * the brand's config.xml file.  We close the module and return
    233 	 * 'NULL' to the caller.
    234 	 */
    235 	for (;;) {
    236 		/*
    237 		 * Search list of loaded brands
    238 		 */
    239 		mutex_enter(&brand_list_lock);
    240 		for (l = brand_list; l != NULL; l = l->bl_next)
    241 			if (strcmp(attr->ba_brandname,
    242 			    l->bl_brand->b_name) == 0)
    243 				break;
    244 		if ((l != NULL) || (hdl != NULL))
    245 			break;
    246 		mutex_exit(&brand_list_lock);
    247 
    248 		/*
    249 		 * We didn't find that the requested brand has been loaded
    250 		 * yet, so we trigger the load of the appropriate kernel
    251 		 * module and search the list again.
    252 		 */
    253 		modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    254 		(void) strcpy(modname, "brand/");
    255 		(void) strcat(modname, attr->ba_modname);
    256 		hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
    257 		kmem_free(modname, MAXPATHLEN);
    258 
    259 		if (err != 0)
    260 			return (NULL);
    261 	}
    262 
    263 	/*
    264 	 * If we found the matching brand, bump its reference count.
    265 	 */
    266 	if (l != NULL)
    267 		l->bl_refcnt++;
    268 
    269 	mutex_exit(&brand_list_lock);
    270 
    271 	if (hdl != NULL)
    272 		(void) ddi_modclose(hdl);
    273 
    274 	return ((l != NULL) ? l->bl_brand : NULL);
    275 }
    276 
    277 /*
    278  * Return the number of zones currently using this brand.
    279  */
    280 int
    281 brand_zone_count(struct brand *bp)
    282 {
    283 	struct brand_list *l;
    284 	int cnt = 0;
    285 
    286 	mutex_enter(&brand_list_lock);
    287 	for (l = brand_list; l != NULL; l = l->bl_next)
    288 		if (l->bl_brand == bp) {
    289 			cnt = l->bl_refcnt;
    290 			break;
    291 		}
    292 	mutex_exit(&brand_list_lock);
    293 
    294 	return (cnt);
    295 }
    296 
    297 void
    298 brand_unregister_zone(struct brand *bp)
    299 {
    300 	struct brand_list *list;
    301 
    302 	mutex_enter(&brand_list_lock);
    303 	for (list = brand_list; list != NULL; list = list->bl_next) {
    304 		if (list->bl_brand == bp) {
    305 			ASSERT(list->bl_refcnt > 0);
    306 			list->bl_refcnt--;
    307 			break;
    308 		}
    309 	}
    310 	mutex_exit(&brand_list_lock);
    311 }
    312 
    313 void
    314 brand_setbrand(proc_t *p)
    315 {
    316 	brand_t *bp = p->p_zone->zone_brand;
    317 
    318 	ASSERT(bp != NULL);
    319 	ASSERT(p->p_brand == &native_brand);
    320 
    321 	/*
    322 	 * We should only be called from exec(), when we know the process
    323 	 * is single-threaded.
    324 	 */
    325 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
    326 
    327 	p->p_brand = bp;
    328 	ASSERT(PROC_IS_BRANDED(p));
    329 	BROP(p)->b_setbrand(p);
    330 }
    331 
    332 void
    333 brand_clearbrand(proc_t *p)
    334 {
    335 	brand_t *bp = p->p_zone->zone_brand;
    336 	ASSERT(bp != NULL);
    337 
    338 	/*
    339 	 * We should only be called from exec_common() or proc_exit(),
    340 	 * when we know the process is single-threaded.
    341 	 */
    342 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
    343 
    344 	ASSERT(PROC_IS_BRANDED(p));
    345 	BROP(p)->b_proc_exit(p, p->p_tlist->t_lwp);
    346 	p->p_brand = &native_brand;
    347 }
    348 
    349 #if defined(__sparcv9)
    350 /*
    351  * Currently, only sparc has system level brand syscall interposition.
    352  * On x86 we're able to enable syscall interposition on a per-cpu basis
    353  * when a branded thread is scheduled to run on a cpu.
    354  */
    355 
    356 /* Local variables needed for dynamic syscall interposition support */
    357 static uint32_t	syscall_trap_patch_instr_orig;
    358 static uint32_t	syscall_trap32_patch_instr_orig;
    359 
    360 /* Trap Table syscall entry hot patch points */
    361 extern void	syscall_trap_patch_point(void);
    362 extern void	syscall_trap32_patch_point(void);
    363 
    364 /* Alternate syscall entry handlers used when branded zones are running */
    365 extern void	syscall_wrapper(void);
    366 extern void	syscall_wrapper32(void);
    367 
    368 /* Macros used to facilitate sparcv9 instruction generation */
    369 #define	BA_A_INSTR	0x30800000	/* ba,a addr */
    370 #define	DISP22(from, to) \
    371 	((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
    372 
    373 /*ARGSUSED*/
    374 static void
    375 brand_plat_interposition_enable(void)
    376 {
    377 	ASSERT(MUTEX_HELD(&brand_list_lock));
    378 
    379 	/*
    380 	 * Before we hot patch the kernel save the current instructions
    381 	 * so that we can restore them later.
    382 	 */
    383 	syscall_trap_patch_instr_orig =
    384 	    *(uint32_t *)syscall_trap_patch_point;
    385 	syscall_trap32_patch_instr_orig =
    386 	    *(uint32_t *)syscall_trap32_patch_point;
    387 
    388 	/*
    389 	 * Modify the trap table at the patch points.
    390 	 *
    391 	 * We basically replace the first instruction at the patch
    392 	 * point with a ba,a instruction that will transfer control
    393 	 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
    394 	 * 32-bit syscalls respectively.  It's important to note that
    395 	 * the annul bit is set in the branch so we don't execute
    396 	 * the instruction directly following the one we're patching
    397 	 * during the branch's delay slot.
    398 	 *
    399 	 * It also doesn't matter that we're not atomically updating both
    400 	 * the 64 and 32 bit syscall paths at the same time since there's
    401 	 * no actual branded processes running on the system yet.
    402 	 */
    403 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
    404 	    BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
    405 	    4);
    406 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
    407 	    BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
    408 	    4);
    409 }
    410 
    411 /*ARGSUSED*/
    412 static void
    413 brand_plat_interposition_disable(void)
    414 {
    415 	ASSERT(MUTEX_HELD(&brand_list_lock));
    416 
    417 	/*
    418 	 * Restore the original instructions at the trap table syscall
    419 	 * patch points to disable the brand syscall interposition
    420 	 * mechanism.
    421 	 */
    422 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
    423 	    syscall_trap_patch_instr_orig, 4);
    424 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
    425 	    syscall_trap32_patch_instr_orig, 4);
    426 }
    427 #endif /* __sparcv9 */
    428