Home | History | Annotate | Download | only in syscall
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"@(#)acctctl.c	1.8	08/01/30 SMI"
     27 
     28 #include <sys/proc.h>
     29 #include <sys/systm.h>
     30 #include <sys/param.h>
     31 #include <sys/kmem.h>
     32 #include <sys/sysmacros.h>
     33 #include <sys/types.h>
     34 #include <sys/cmn_err.h>
     35 #include <sys/user.h>
     36 #include <sys/cred.h>
     37 #include <sys/vnode.h>
     38 #include <sys/file.h>
     39 #include <sys/pathname.h>
     40 #include <sys/modctl.h>
     41 #include <sys/acctctl.h>
     42 #include <sys/bitmap.h>
     43 #include <sys/exacct.h>
     44 #include <sys/policy.h>
     45 
     46 /*
     47  * acctctl(2)
     48  *
     49  *   acctctl() provides the administrative interface to the extended accounting
     50  *   subsystem.  The process and task accounting facilities are configurable:
     51  *   resources can be individually specified for recording in the appropriate
     52  *   accounting file.
     53  *
     54  *   The current implementation of acctctl() requires that the process and task
     55  *   and flow files be distinct across all zones.
     56  *
     57  * Locking
     58  *   Each accounting species has an ac_info_t which contains a mutex,
     59  *   used to protect the ac_info_t's contents, and to serialize access to the
     60  *   appropriate file.
     61  */
     62 
     63 static list_t exacct_globals_list;
     64 static kmutex_t exacct_globals_list_lock;
     65 
     66 static int
     67 ac_state_set(ac_info_t *info, void *buf, size_t bufsz)
     68 {
     69 	int state;
     70 
     71 	if (buf == NULL || (bufsz != sizeof (int)))
     72 		return (EINVAL);
     73 
     74 	if (copyin(buf, &state, bufsz) != 0)
     75 		return (EFAULT);
     76 
     77 	if (state != AC_ON && state != AC_OFF)
     78 		return (EINVAL);
     79 
     80 	mutex_enter(&info->ac_lock);
     81 	info->ac_state = state;
     82 	mutex_exit(&info->ac_lock);
     83 	return (0);
     84 }
     85 
     86 static int
     87 ac_state_get(ac_info_t *info, void *buf, size_t bufsz)
     88 {
     89 	if (buf == NULL || (bufsz != sizeof (int)))
     90 		return (EINVAL);
     91 
     92 	mutex_enter(&info->ac_lock);
     93 	if (copyout(&info->ac_state, buf, bufsz) != 0) {
     94 		mutex_exit(&info->ac_lock);
     95 		return (EFAULT);
     96 	}
     97 	mutex_exit(&info->ac_lock);
     98 	return (0);
     99 }
    100 
    101 static boolean_t
    102 ac_file_in_use(vnode_t *vp)
    103 {
    104 	boolean_t in_use = B_FALSE;
    105 	struct exacct_globals *acg;
    106 
    107 	if (vp == NULL)
    108 		return (B_FALSE);
    109 	mutex_enter(&exacct_globals_list_lock);
    110 	/*
    111 	 * Start off by grabbing all locks.
    112 	 */
    113 	for (acg = list_head(&exacct_globals_list); acg != NULL;
    114 	    acg = list_next(&exacct_globals_list, acg)) {
    115 		mutex_enter(&acg->ac_proc.ac_lock);
    116 		mutex_enter(&acg->ac_task.ac_lock);
    117 		mutex_enter(&acg->ac_flow.ac_lock);
    118 		mutex_enter(&acg->ac_net.ac_lock);
    119 	}
    120 
    121 	for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL;
    122 	    acg = list_next(&exacct_globals_list, acg)) {
    123 		/*
    124 		 * We need to verify that we aren't already using this file for
    125 		 * accounting in any zone.
    126 		 */
    127 		if (vn_compare(acg->ac_proc.ac_vnode, vp) ||
    128 		    vn_compare(acg->ac_task.ac_vnode, vp) ||
    129 		    vn_compare(acg->ac_flow.ac_vnode, vp) ||
    130 		    vn_compare(acg->ac_net.ac_vnode, vp))
    131 			in_use = B_TRUE;
    132 	}
    133 
    134 	/*
    135 	 * Drop all locks.
    136 	 */
    137 	for (acg = list_head(&exacct_globals_list); acg != NULL;
    138 	    acg = list_next(&exacct_globals_list, acg)) {
    139 		mutex_exit(&acg->ac_proc.ac_lock);
    140 		mutex_exit(&acg->ac_task.ac_lock);
    141 		mutex_exit(&acg->ac_flow.ac_lock);
    142 		mutex_exit(&acg->ac_net.ac_lock);
    143 	}
    144 	mutex_exit(&exacct_globals_list_lock);
    145 	return (in_use);
    146 }
    147 
    148 static int
    149 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz)
    150 {
    151 	int error = 0;
    152 	void *kbuf;
    153 	void *namebuf;
    154 	int namelen;
    155 	vnode_t *vp;
    156 	void *hdr;
    157 	size_t hdrsize;
    158 
    159 	if (ubuf == NULL) {
    160 		mutex_enter(&info->ac_lock);
    161 
    162 		/*
    163 		 * Closing accounting file
    164 		 */
    165 		if (info->ac_vnode != NULL) {
    166 			error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0,
    167 			    CRED(), NULL);
    168 			if (error) {
    169 				mutex_exit(&info->ac_lock);
    170 				return (error);
    171 			}
    172 			VN_RELE(info->ac_vnode);
    173 			info->ac_vnode = NULL;
    174 		}
    175 		if (info->ac_file != NULL) {
    176 			kmem_free(info->ac_file, strlen(info->ac_file) + 1);
    177 			info->ac_file = NULL;
    178 		}
    179 
    180 		mutex_exit(&info->ac_lock);
    181 		return (error);
    182 	}
    183 
    184 	if (bufsz < 2 || bufsz > MAXPATHLEN)
    185 		return (EINVAL);
    186 
    187 	/*
    188 	 * We have to copy in the whole buffer since we can't tell the length
    189 	 * of the string in user's address space.
    190 	 */
    191 	kbuf = kmem_zalloc(bufsz, KM_SLEEP);
    192 	if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) {
    193 		kmem_free(kbuf, bufsz);
    194 		return (error);
    195 	}
    196 	if (*((char *)kbuf) != '/') {
    197 		kmem_free(kbuf, bufsz);
    198 		return (EINVAL);
    199 	}
    200 
    201 	/*
    202 	 * Now, allocate the space where we are going to save the
    203 	 * name of the accounting file and kmem_free kbuf. We have to do this
    204 	 * now because it is not good to sleep in kmem_alloc() while
    205 	 * holding ac_info's lock.
    206 	 */
    207 	namelen = strlen(kbuf) + 1;
    208 	namebuf = kmem_alloc(namelen, KM_SLEEP);
    209 	(void) strcpy(namebuf, kbuf);
    210 	kmem_free(kbuf, bufsz);
    211 
    212 	/*
    213 	 * Check if this file already exists.
    214 	 */
    215 	error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
    216 
    217 	/*
    218 	 * Check if the file is already in use.
    219 	 */
    220 	if (!error) {
    221 		if (ac_file_in_use(vp)) {
    222 			/*
    223 			 * If we're already using it then return EBUSY
    224 			 */
    225 			kmem_free(namebuf, namelen);
    226 			VN_RELE(vp);
    227 			return (EBUSY);
    228 		}
    229 		VN_RELE(vp);
    230 	}
    231 
    232 	/*
    233 	 * Now, grab info's ac_lock and try to set up everything.
    234 	 */
    235 	mutex_enter(&info->ac_lock);
    236 
    237 	if ((error = vn_open(namebuf, UIO_SYSSPACE,
    238 	    FCREAT | FWRITE | FTRUNC, 0600, &vp, CRCREAT, 0)) != 0) {
    239 		mutex_exit(&info->ac_lock);
    240 		kmem_free(namebuf, namelen);
    241 		return (error);
    242 	}
    243 
    244 	if (vp->v_type != VREG) {
    245 		VN_RELE(vp);
    246 		mutex_exit(&info->ac_lock);
    247 		kmem_free(namebuf, namelen);
    248 		return (EACCES);
    249 	}
    250 
    251 	if (info->ac_vnode != NULL) {
    252 		/*
    253 		 * Switch from an old file to a new file by swapping
    254 		 * their vnode pointers.
    255 		 */
    256 		vnode_t *oldvp;
    257 		oldvp = info->ac_vnode;
    258 		info->ac_vnode = vp;
    259 		vp = oldvp;
    260 	} else {
    261 		/*
    262 		 * Start writing accounting records to a new file.
    263 		 */
    264 		info->ac_vnode = vp;
    265 		vp = NULL;
    266 	}
    267 	if (vp) {
    268 		/*
    269 		 * We still need to close the old file.
    270 		 */
    271 		if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) {
    272 			VN_RELE(vp);
    273 			mutex_exit(&info->ac_lock);
    274 			kmem_free(namebuf, namelen);
    275 			return (error);
    276 		}
    277 		VN_RELE(vp);
    278 		if (info->ac_file != NULL) {
    279 			kmem_free(info->ac_file,
    280 			    strlen(info->ac_file) + 1);
    281 			info->ac_file = NULL;
    282 		}
    283 	}
    284 	/*
    285 	 * Finally, point ac_file to the filename string and release the lock.
    286 	 */
    287 	info->ac_file = namebuf;
    288 	mutex_exit(&info->ac_lock);
    289 
    290 	/*
    291 	 * Create and write an exacct header to the file.
    292 	 */
    293 	hdr = exacct_create_header(&hdrsize);
    294 	error = exacct_write_header(info, hdr, hdrsize);
    295 
    296 	return (error);
    297 }
    298 
    299 static int
    300 ac_file_get(ac_info_t *info, void *buf, size_t bufsz)
    301 {
    302 	int error = 0;
    303 	vnode_t *vnode;
    304 	char *file;
    305 
    306 	mutex_enter(&info->ac_lock);
    307 	file = info->ac_file;
    308 	vnode = info->ac_vnode;
    309 
    310 	if (file == NULL || vnode == NULL) {
    311 		mutex_exit(&info->ac_lock);
    312 		return (ENOTACTIVE);
    313 	}
    314 
    315 	if (strlen(file) >= bufsz)
    316 		error = ENOMEM;
    317 	else
    318 		error = copyoutstr(file, buf, MAXPATHLEN, NULL);
    319 
    320 	mutex_exit(&info->ac_lock);
    321 	return (error);
    322 }
    323 
    324 static int
    325 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres)
    326 {
    327 	ac_res_t *res;
    328 	ac_res_t *tmp;
    329 	ulong_t *maskp;
    330 	int id;
    331 	uint_t counter = 0;
    332 
    333 	/*
    334 	 * Validate that a non-zero buffer, sized within limits and to an
    335 	 * integral number of ac_res_t's has been specified.
    336 	 */
    337 	if (bufsz == 0 ||
    338 	    bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) ||
    339 	    (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz)
    340 		return (EINVAL);
    341 
    342 	tmp = res = kmem_alloc(bufsz, KM_SLEEP);
    343 	if (copyin(buf, res, bufsz) != 0) {
    344 		kmem_free(res, bufsz);
    345 		return (EFAULT);
    346 	}
    347 
    348 	maskp = (ulong_t *)&info->ac_mask;
    349 
    350 	mutex_enter(&info->ac_lock);
    351 	while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) {
    352 		if (id > maxres || id < 0) {
    353 			mutex_exit(&info->ac_lock);
    354 			kmem_free(res, bufsz);
    355 			return (EINVAL);
    356 		}
    357 		if (tmp->ar_state == AC_ON) {
    358 			BT_SET(maskp, id);
    359 		} else if (tmp->ar_state == AC_OFF) {
    360 			BT_CLEAR(maskp, id);
    361 		} else {
    362 			mutex_exit(&info->ac_lock);
    363 			kmem_free(res, bufsz);
    364 			return (EINVAL);
    365 		}
    366 		tmp++;
    367 		counter++;
    368 	}
    369 	mutex_exit(&info->ac_lock);
    370 	kmem_free(res, bufsz);
    371 	return (0);
    372 }
    373 
    374 static int
    375 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres)
    376 {
    377 	int error = 0;
    378 	ac_res_t *res;
    379 	ac_res_t *tmp;
    380 	size_t ressz = sizeof (ac_res_t) * (maxres + 1);
    381 	ulong_t *maskp;
    382 	int id;
    383 
    384 	if (bufsz < ressz)
    385 		return (EINVAL);
    386 	tmp = res = kmem_alloc(ressz, KM_SLEEP);
    387 
    388 	mutex_enter(&info->ac_lock);
    389 	maskp = (ulong_t *)&info->ac_mask;
    390 	for (id = 1; id <= maxres; id++) {
    391 		tmp->ar_id = id;
    392 		tmp->ar_state = BT_TEST(maskp, id);
    393 		tmp++;
    394 	}
    395 	tmp->ar_id = AC_NONE;
    396 	tmp->ar_state = AC_OFF;
    397 	mutex_exit(&info->ac_lock);
    398 	error = copyout(res, buf, ressz);
    399 	kmem_free(res, ressz);
    400 	return (error);
    401 }
    402 
    403 /*
    404  * acctctl()
    405  *
    406  * Overview
    407  *   acctctl() is the entry point for the acctctl(2) system call.
    408  *
    409  * Return values
    410  *   On successful completion, return 0; otherwise -1 is returned and errno is
    411  *   set appropriately.
    412  *
    413  * Caller's context
    414  *   Called from the system call path.
    415  */
    416 int
    417 acctctl(int cmd, void *buf, size_t bufsz)
    418 {
    419 	int error = 0;
    420 	int mode = AC_MODE(cmd);
    421 	int option = AC_OPTION(cmd);
    422 	int maxres;
    423 	ac_info_t *info;
    424 	zone_t *zone = curproc->p_zone;
    425 	struct exacct_globals *acg;
    426 
    427 	acg = zone_getspecific(exacct_zone_key, zone);
    428 	/*
    429 	 * exacct_zone_key and associated per-zone state were initialized when
    430 	 * the module was loaded.
    431 	 */
    432 	ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED);
    433 	ASSERT(acg != NULL);
    434 
    435 	switch (mode) {	/* sanity check */
    436 	case AC_TASK:
    437 		info = &acg->ac_task;
    438 		maxres = AC_TASK_MAX_RES;
    439 		break;
    440 	case AC_PROC:
    441 		info = &acg->ac_proc;
    442 		maxres = AC_PROC_MAX_RES;
    443 		break;
    444 	/*
    445 	 * Flow/net accounting isn't configurable in non-global
    446 	 * zones, but we have this field on a per-zone basis for future
    447 	 * expansion as well as the ability to return default "unset"
    448 	 * values for the various AC_*_GET queries.  AC_*_SET commands
    449 	 * fail with EPERM for AC_FLOW and AC_NET in non-global zones.
    450 	 */
    451 	case AC_FLOW:
    452 		info = &acg->ac_flow;
    453 		maxres = AC_FLOW_MAX_RES;
    454 		break;
    455 	case AC_NET:
    456 		info = &acg->ac_net;
    457 		maxres = AC_NET_MAX_RES;
    458 		break;
    459 	default:
    460 		return (set_errno(EINVAL));
    461 	}
    462 
    463 	switch (option) {
    464 	case AC_STATE_SET:
    465 		if ((error = secpolicy_acct(CRED())) != 0)
    466 			break;
    467 		if ((mode == AC_FLOW || mode == AC_NET) &&
    468 		    getzoneid() != GLOBAL_ZONEID) {
    469 			error = EPERM;
    470 			break;
    471 		}
    472 		error = ac_state_set(info, buf, bufsz);
    473 		break;
    474 	case AC_STATE_GET:
    475 		error = ac_state_get(info, buf, bufsz);
    476 		break;
    477 	case AC_FILE_SET:
    478 		if ((error = secpolicy_acct(CRED())) != 0)
    479 			break;
    480 		if ((mode == AC_FLOW || mode == AC_NET) &&
    481 		    getzoneid() != GLOBAL_ZONEID) {
    482 			error = EPERM;
    483 			break;
    484 		}
    485 		error = ac_file_set(info, buf, bufsz);
    486 		break;
    487 	case AC_FILE_GET:
    488 		error = ac_file_get(info, buf, bufsz);
    489 		break;
    490 	case AC_RES_SET:
    491 		if ((error = secpolicy_acct(CRED())) != 0)
    492 			break;
    493 		if ((mode == AC_FLOW || mode == AC_NET) &&
    494 		    getzoneid() != GLOBAL_ZONEID) {
    495 			error = EPERM;
    496 			break;
    497 		}
    498 		error = ac_res_set(info, buf, bufsz, maxres);
    499 		break;
    500 	case AC_RES_GET:
    501 		error = ac_res_get(info, buf, bufsz, maxres);
    502 		break;
    503 	default:
    504 		return (set_errno(EINVAL));
    505 	}
    506 	if (error)
    507 		return (set_errno(error));
    508 	return (0);
    509 }
    510 
    511 static struct sysent ac_sysent = {
    512 	3,
    513 	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
    514 	acctctl
    515 };
    516 
    517 static struct modlsys modlsys = {
    518 	&mod_syscallops,
    519 	"acctctl system call",
    520 	&ac_sysent
    521 };
    522 
    523 #ifdef _SYSCALL32_IMPL
    524 static struct modlsys modlsys32 = {
    525 	&mod_syscallops32,
    526 	"32-bit acctctl system call",
    527 	&ac_sysent
    528 };
    529 #endif
    530 
    531 static struct modlinkage modlinkage = {
    532 	MODREV_1,
    533 	&modlsys,
    534 #ifdef _SYSCALL32_IMPL
    535 	&modlsys32,
    536 #endif
    537 	NULL
    538 };
    539 
    540 /* ARGSUSED */
    541 static void *
    542 exacct_zone_init(zoneid_t zoneid)
    543 {
    544 	struct exacct_globals *acg;
    545 
    546 	acg = kmem_zalloc(sizeof (*acg), KM_SLEEP);
    547 	mutex_enter(&exacct_globals_list_lock);
    548 	list_insert_tail(&exacct_globals_list, acg);
    549 	mutex_exit(&exacct_globals_list_lock);
    550 	return (acg);
    551 }
    552 
    553 static void
    554 exacct_free_info(ac_info_t *info)
    555 {
    556 	mutex_enter(&info->ac_lock);
    557 	if (info->ac_vnode) {
    558 		(void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL);
    559 		VN_RELE(info->ac_vnode);
    560 		kmem_free(info->ac_file, strlen(info->ac_file) + 1);
    561 	}
    562 	info->ac_state = AC_OFF;
    563 	info->ac_vnode = NULL;
    564 	info->ac_file = NULL;
    565 	mutex_exit(&info->ac_lock);
    566 }
    567 
    568 /* ARGSUSED */
    569 static void
    570 exacct_zone_shutdown(zoneid_t zoneid, void *data)
    571 {
    572 	struct exacct_globals *acg = data;
    573 
    574 	/*
    575 	 * The accounting files need to be closed during shutdown rather than
    576 	 * destroy, since otherwise the filesystem they reside on may fail to
    577 	 * unmount, thus causing the entire zone halt/reboot to fail.
    578 	 */
    579 	exacct_free_info(&acg->ac_proc);
    580 	exacct_free_info(&acg->ac_task);
    581 	exacct_free_info(&acg->ac_flow);
    582 	exacct_free_info(&acg->ac_net);
    583 }
    584 
    585 /* ARGSUSED */
    586 static void
    587 exacct_zone_fini(zoneid_t zoneid, void *data)
    588 {
    589 	struct exacct_globals *acg = data;
    590 
    591 	mutex_enter(&exacct_globals_list_lock);
    592 	list_remove(&exacct_globals_list, acg);
    593 	mutex_exit(&exacct_globals_list_lock);
    594 
    595 	mutex_destroy(&acg->ac_proc.ac_lock);
    596 	mutex_destroy(&acg->ac_task.ac_lock);
    597 	mutex_destroy(&acg->ac_flow.ac_lock);
    598 	mutex_destroy(&acg->ac_net.ac_lock);
    599 	kmem_free(acg, sizeof (*acg));
    600 }
    601 
    602 int
    603 _init()
    604 {
    605 	int error;
    606 
    607 	mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL);
    608 	list_create(&exacct_globals_list, sizeof (struct exacct_globals),
    609 	    offsetof(struct exacct_globals, ac_link));
    610 	zone_key_create(&exacct_zone_key, exacct_zone_init,
    611 	    exacct_zone_shutdown, exacct_zone_fini);
    612 
    613 	if ((error = mod_install(&modlinkage)) != 0) {
    614 		(void) zone_key_delete(exacct_zone_key);
    615 		exacct_zone_key = ZONE_KEY_UNINITIALIZED;
    616 		mutex_destroy(&exacct_globals_list_lock);
    617 		list_destroy(&exacct_globals_list);
    618 	}
    619 	return (error);
    620 }
    621 
    622 int
    623 _info(struct modinfo *modinfop)
    624 {
    625 	return (mod_info(&modlinkage, modinfop));
    626 }
    627 
    628 int
    629 _fini()
    630 {
    631 	return (EBUSY);
    632 }
    633