Home | History | Annotate | Download | only in disp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <sys/types.h>
     30 #include <sys/param.h>
     31 #include <sys/sysmacros.h>
     32 #include <sys/cred.h>
     33 #include <sys/proc.h>
     34 #include <sys/session.h>
     35 #include <sys/strsubr.h>
     36 #include <sys/user.h>
     37 #include <sys/priocntl.h>
     38 #include <sys/class.h>
     39 #include <sys/disp.h>
     40 #include <sys/procset.h>
     41 #include <sys/debug.h>
     42 #include <sys/kmem.h>
     43 #include <sys/errno.h>
     44 #include <sys/fx.h>
     45 #include <sys/fxpriocntl.h>
     46 #include <sys/cpuvar.h>
     47 #include <sys/systm.h>
     48 #include <sys/vtrace.h>
     49 #include <sys/schedctl.h>
     50 #include <sys/tnf_probe.h>
     51 #include <sys/sunddi.h>
     52 #include <sys/spl.h>
     53 #include <sys/modctl.h>
     54 #include <sys/policy.h>
     55 #include <sys/sdt.h>
     56 #include <sys/cpupart.h>
     57 #include <sys/cpucaps.h>
     58 
     59 static pri_t fx_init(id_t, int, classfuncs_t **);
     60 
     61 static struct sclass csw = {
     62 	"FX",
     63 	fx_init,
     64 	0
     65 };
     66 
     67 static struct modlsched modlsched = {
     68 	&mod_schedops, "Fixed priority sched class", &csw
     69 };
     70 
     71 static struct modlinkage modlinkage = {
     72 	MODREV_1, (void *)&modlsched, NULL
     73 };
     74 
     75 
     76 /*
     77  * control flags (kparms->fx_cflags).
     78  */
     79 #define	FX_DOUPRILIM	0x01    /* change user priority limit */
     80 #define	FX_DOUPRI	0x02    /* change user priority */
     81 #define	FX_DOTQ		0x04    /* change FX time quantum */
     82 
     83 
     84 #define	FXMAXUPRI 60		/* maximum user priority setting */
     85 
     86 #define	FX_MAX_UNPRIV_PRI	0	/* maximum unpriviledge priority */
     87 
     88 /*
     89  * The fxproc_t structures that have a registered callback vector,
     90  * are also kept in an array of circular doubly linked lists. A hash on
     91  * the thread id (from ddi_get_kt_did()) is used to determine which list
     92  * each of such fxproc structures should be placed. Each list has a dummy
     93  * "head" which is never removed, so the list is never empty.
     94  */
     95 
     96 #define	FX_CB_LISTS 16		/* number of lists, must be power of 2 */
     97 #define	FX_CB_LIST_HASH(ktid)	((uint_t)ktid & (FX_CB_LISTS - 1))
     98 
     99 /* Insert fxproc into callback list */
    100 #define	FX_CB_LIST_INSERT(fxpp)						\
    101 {									\
    102 	int index = FX_CB_LIST_HASH(fxpp->fx_ktid);			\
    103 	kmutex_t *lockp = &fx_cb_list_lock[index];			\
    104 	fxproc_t *headp = &fx_cb_plisthead[index];			\
    105 	mutex_enter(lockp);						\
    106 	fxpp->fx_cb_next = headp->fx_cb_next;				\
    107 	fxpp->fx_cb_prev = headp;					\
    108 	headp->fx_cb_next->fx_cb_prev = fxpp;				\
    109 	headp->fx_cb_next = fxpp;					\
    110 	mutex_exit(lockp);						\
    111 }
    112 
    113 /*
    114  * Remove thread from callback list.
    115  */
    116 #define	FX_CB_LIST_DELETE(fxpp)						\
    117 {									\
    118 	int index = FX_CB_LIST_HASH(fxpp->fx_ktid);			\
    119 	kmutex_t *lockp = &fx_cb_list_lock[index];			\
    120 	mutex_enter(lockp);						\
    121 	fxpp->fx_cb_prev->fx_cb_next = fxpp->fx_cb_next;		\
    122 	fxpp->fx_cb_next->fx_cb_prev = fxpp->fx_cb_prev;		\
    123 	mutex_exit(lockp);						\
    124 }
    125 
    126 #define	FX_HAS_CB(fxpp)	(fxpp->fx_callback != NULL)
    127 
    128 /* adjust x to be between 0 and fx_maxumdpri */
    129 
    130 #define	FX_ADJUST_PRI(pri)						\
    131 {									\
    132 	if (pri < 0)							\
    133 		pri = 0;  						\
    134 	else if (pri > fx_maxumdpri) 					\
    135 		pri = fx_maxumdpri;  					\
    136 }
    137 
    138 #define	FX_ADJUST_QUANTUM(q)						\
    139 {									\
    140 	if (q > INT_MAX)						\
    141 		q = INT_MAX;						\
    142 	else if (q <= 0)						\
    143 		q = FX_TQINF;						\
    144 }
    145 
    146 #define	FX_ISVALID(pri, quantum) \
    147 	(((pri >= 0) || (pri == FX_CB_NOCHANGE)) &&			\
    148 	    ((quantum >= 0) || (quantum == FX_NOCHANGE) ||		\
    149 		(quantum == FX_TQDEF) || (quantum == FX_TQINF)))
    150 
    151 
    152 static id_t	fx_cid;		/* fixed priority class ID */
    153 static fxdpent_t *fx_dptbl;	/* fixed priority disp parameter table */
    154 
    155 static pri_t	fx_maxupri = FXMAXUPRI;
    156 static pri_t	fx_maxumdpri;	/* max user mode fixed priority */
    157 
    158 static pri_t	fx_maxglobpri;	/* maximum global priority used by fx class */
    159 static kmutex_t	fx_dptblock;	/* protects fixed priority dispatch table */
    160 
    161 
    162 static kmutex_t	fx_cb_list_lock[FX_CB_LISTS];	/* protects list of fxprocs */
    163 						/* that have callbacks */
    164 static fxproc_t	fx_cb_plisthead[FX_CB_LISTS];	/* dummy fxproc at head of */
    165 						/* list of fxprocs with */
    166 						/* callbacks */
    167 
    168 static int	fx_admin(caddr_t, cred_t *);
    169 static int	fx_getclinfo(void *);
    170 static int	fx_parmsin(void *);
    171 static int	fx_parmsout(void *, pc_vaparms_t *);
    172 static int	fx_vaparmsin(void *, pc_vaparms_t *);
    173 static int	fx_vaparmsout(void *, pc_vaparms_t *);
    174 static int	fx_getclpri(pcpri_t *);
    175 static int	fx_alloc(void **, int);
    176 static void	fx_free(void *);
    177 static int	fx_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
    178 static void	fx_exitclass(void *);
    179 static int	fx_canexit(kthread_t *, cred_t *);
    180 static int	fx_fork(kthread_t *, kthread_t *, void *);
    181 static void	fx_forkret(kthread_t *, kthread_t *);
    182 static void	fx_parmsget(kthread_t *, void *);
    183 static int	fx_parmsset(kthread_t *, void *, id_t, cred_t *);
    184 static void	fx_stop(kthread_t *, int, int);
    185 static void	fx_exit(kthread_t *);
    186 static pri_t	fx_swapin(kthread_t *, int);
    187 static pri_t	fx_swapout(kthread_t *, int);
    188 static void	fx_trapret(kthread_t *);
    189 static void	fx_preempt(kthread_t *);
    190 static void	fx_setrun(kthread_t *);
    191 static void	fx_sleep(kthread_t *);
    192 static void	fx_tick(kthread_t *);
    193 static void	fx_wakeup(kthread_t *);
    194 static int	fx_donice(kthread_t *, cred_t *, int, int *);
    195 static int	fx_doprio(kthread_t *, cred_t *, int, int *);
    196 static pri_t	fx_globpri(kthread_t *);
    197 static void	fx_yield(kthread_t *);
    198 static void	fx_nullsys();
    199 
    200 extern fxdpent_t *fx_getdptbl(void);
    201 
    202 static void	fx_change_priority(kthread_t *, fxproc_t *);
    203 static fxproc_t *fx_list_lookup(kt_did_t);
    204 static void fx_list_release(fxproc_t *);
    205 
    206 
    207 static struct classfuncs fx_classfuncs = {
    208 	/* class functions */
    209 	fx_admin,
    210 	fx_getclinfo,
    211 	fx_parmsin,
    212 	fx_parmsout,
    213 	fx_vaparmsin,
    214 	fx_vaparmsout,
    215 	fx_getclpri,
    216 	fx_alloc,
    217 	fx_free,
    218 
    219 	/* thread functions */
    220 	fx_enterclass,
    221 	fx_exitclass,
    222 	fx_canexit,
    223 	fx_fork,
    224 	fx_forkret,
    225 	fx_parmsget,
    226 	fx_parmsset,
    227 	fx_stop,
    228 	fx_exit,
    229 	fx_nullsys,	/* active */
    230 	fx_nullsys,	/* inactive */
    231 	fx_swapin,
    232 	fx_swapout,
    233 	fx_trapret,
    234 	fx_preempt,
    235 	fx_setrun,
    236 	fx_sleep,
    237 	fx_tick,
    238 	fx_wakeup,
    239 	fx_donice,
    240 	fx_globpri,
    241 	fx_nullsys,	/* set_process_group */
    242 	fx_yield,
    243 	fx_doprio,
    244 };
    245 
    246 
    247 int
    248 _init()
    249 {
    250 	return (mod_install(&modlinkage));
    251 }
    252 
    253 int
    254 _fini()
    255 {
    256 	return (EBUSY);
    257 }
    258 
    259 int
    260 _info(struct modinfo *modinfop)
    261 {
    262 	return (mod_info(&modlinkage, modinfop));
    263 }
    264 
    265 /*
    266  * Fixed priority class initialization. Called by dispinit() at boot time.
    267  * We can ignore the clparmsz argument since we know that the smallest
    268  * possible parameter buffer is big enough for us.
    269  */
    270 /* ARGSUSED */
    271 static pri_t
    272 fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
    273 {
    274 	int i;
    275 	extern pri_t fx_getmaxumdpri(void);
    276 
    277 	fx_dptbl = fx_getdptbl();
    278 	fx_maxumdpri = fx_getmaxumdpri();
    279 	fx_maxglobpri = fx_dptbl[fx_maxumdpri].fx_globpri;
    280 
    281 	fx_cid = cid;		/* Record our class ID */
    282 
    283 	/*
    284 	 * Initialize the hash table for fxprocs with callbacks
    285 	 */
    286 	for (i = 0; i < FX_CB_LISTS; i++) {
    287 		fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev =
    288 		    &fx_cb_plisthead[i];
    289 	}
    290 
    291 	/*
    292 	 * We're required to return a pointer to our classfuncs
    293 	 * structure and the highest global priority value we use.
    294 	 */
    295 	*clfuncspp = &fx_classfuncs;
    296 	return (fx_maxglobpri);
    297 }
    298 
    299 /*
    300  * Get or reset the fx_dptbl values per the user's request.
    301  */
    302 static int
    303 fx_admin(caddr_t uaddr, cred_t *reqpcredp)
    304 {
    305 	fxadmin_t	fxadmin;
    306 	fxdpent_t	*tmpdpp;
    307 	int		userdpsz;
    308 	int		i;
    309 	size_t		fxdpsz;
    310 
    311 	if (get_udatamodel() == DATAMODEL_NATIVE) {
    312 		if (copyin(uaddr, &fxadmin, sizeof (fxadmin_t)))
    313 			return (EFAULT);
    314 	}
    315 #ifdef _SYSCALL32_IMPL
    316 	else {
    317 		/* get fxadmin struct from ILP32 caller */
    318 		fxadmin32_t fxadmin32;
    319 		if (copyin(uaddr, &fxadmin32, sizeof (fxadmin32_t)))
    320 			return (EFAULT);
    321 		fxadmin.fx_dpents =
    322 		    (struct fxdpent *)(uintptr_t)fxadmin32.fx_dpents;
    323 		fxadmin.fx_ndpents = fxadmin32.fx_ndpents;
    324 		fxadmin.fx_cmd = fxadmin32.fx_cmd;
    325 	}
    326 #endif /* _SYSCALL32_IMPL */
    327 
    328 	fxdpsz = (fx_maxumdpri + 1) * sizeof (fxdpent_t);
    329 
    330 	switch (fxadmin.fx_cmd) {
    331 	case FX_GETDPSIZE:
    332 		fxadmin.fx_ndpents = fx_maxumdpri + 1;
    333 
    334 		if (get_udatamodel() == DATAMODEL_NATIVE) {
    335 			if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t)))
    336 				return (EFAULT);
    337 		}
    338 #ifdef _SYSCALL32_IMPL
    339 		else {
    340 			/* return fxadmin struct to ILP32 caller */
    341 			fxadmin32_t fxadmin32;
    342 			fxadmin32.fx_dpents =
    343 			    (caddr32_t)(uintptr_t)fxadmin.fx_dpents;
    344 			fxadmin32.fx_ndpents = fxadmin.fx_ndpents;
    345 			fxadmin32.fx_cmd = fxadmin.fx_cmd;
    346 			if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t)))
    347 				return (EFAULT);
    348 		}
    349 #endif /* _SYSCALL32_IMPL */
    350 		break;
    351 
    352 	case FX_GETDPTBL:
    353 		userdpsz = MIN(fxadmin.fx_ndpents * sizeof (fxdpent_t),
    354 		    fxdpsz);
    355 		if (copyout(fx_dptbl, fxadmin.fx_dpents, userdpsz))
    356 			return (EFAULT);
    357 
    358 		fxadmin.fx_ndpents = userdpsz / sizeof (fxdpent_t);
    359 
    360 		if (get_udatamodel() == DATAMODEL_NATIVE) {
    361 			if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t)))
    362 				return (EFAULT);
    363 		}
    364 #ifdef _SYSCALL32_IMPL
    365 		else {
    366 			/* return fxadmin struct to ILP32 callers */
    367 			fxadmin32_t fxadmin32;
    368 			fxadmin32.fx_dpents =
    369 			    (caddr32_t)(uintptr_t)fxadmin.fx_dpents;
    370 			fxadmin32.fx_ndpents = fxadmin.fx_ndpents;
    371 			fxadmin32.fx_cmd = fxadmin.fx_cmd;
    372 			if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t)))
    373 				return (EFAULT);
    374 		}
    375 #endif /* _SYSCALL32_IMPL */
    376 		break;
    377 
    378 	case FX_SETDPTBL:
    379 		/*
    380 		 * We require that the requesting process has sufficient
    381 		 * privileges. We also require that the table supplied by
    382 		 * the user exactly match the current fx_dptbl in size.
    383 		 */
    384 		if (secpolicy_dispadm(reqpcredp) != 0) {
    385 			return (EPERM);
    386 		}
    387 		if (fxadmin.fx_ndpents * sizeof (fxdpent_t) != fxdpsz) {
    388 			return (EINVAL);
    389 		}
    390 
    391 		/*
    392 		 * We read the user supplied table into a temporary buffer
    393 		 * where it is validated before being copied over the
    394 		 * fx_dptbl.
    395 		 */
    396 		tmpdpp = kmem_alloc(fxdpsz, KM_SLEEP);
    397 		if (copyin(fxadmin.fx_dpents, tmpdpp, fxdpsz)) {
    398 			kmem_free(tmpdpp, fxdpsz);
    399 			return (EFAULT);
    400 		}
    401 		for (i = 0; i < fxadmin.fx_ndpents; i++) {
    402 
    403 			/*
    404 			 * Validate the user supplied values. All we are doing
    405 			 * here is verifying that the values are within their
    406 			 * allowable ranges and will not panic the system. We
    407 			 * make no attempt to ensure that the resulting
    408 			 * configuration makes sense or results in reasonable
    409 			 * performance.
    410 			 */
    411 			if (tmpdpp[i].fx_quantum <= 0 &&
    412 			    tmpdpp[i].fx_quantum != FX_TQINF) {
    413 				kmem_free(tmpdpp, fxdpsz);
    414 				return (EINVAL);
    415 			}
    416 		}
    417 
    418 		/*
    419 		 * Copy the user supplied values over the current fx_dptbl
    420 		 * values. The fx_globpri member is read-only so we don't
    421 		 * overwrite it.
    422 		 */
    423 		mutex_enter(&fx_dptblock);
    424 		for (i = 0; i < fxadmin.fx_ndpents; i++) {
    425 			fx_dptbl[i].fx_quantum = tmpdpp[i].fx_quantum;
    426 		}
    427 		mutex_exit(&fx_dptblock);
    428 		kmem_free(tmpdpp, fxdpsz);
    429 		break;
    430 
    431 	default:
    432 		return (EINVAL);
    433 	}
    434 	return (0);
    435 }
    436 
    437 /*
    438  * Allocate a fixed priority class specific thread structure and
    439  * initialize it with the parameters supplied. Also move the thread
    440  * to specified priority.
    441  */
    442 static int
    443 fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
    444     void *bufp)
    445 {
    446 	fxkparms_t	*fxkparmsp = (fxkparms_t *)parmsp;
    447 	fxproc_t	*fxpp;
    448 	pri_t		reqfxupri;
    449 	pri_t		reqfxuprilim;
    450 
    451 	fxpp = (fxproc_t *)bufp;
    452 	ASSERT(fxpp != NULL);
    453 
    454 	/*
    455 	 * Initialize the fxproc structure.
    456 	 */
    457 	fxpp->fx_flags = 0;
    458 	fxpp->fx_callback = NULL;
    459 	fxpp->fx_cookie = NULL;
    460 
    461 	if (fxkparmsp == NULL) {
    462 		/*
    463 		 * Use default values.
    464 		 */
    465 		fxpp->fx_pri = fxpp->fx_uprilim = 0;
    466 		fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum;
    467 		fxpp->fx_nice =  NZERO;
    468 	} else {
    469 		/*
    470 		 * Use supplied values.
    471 		 */
    472 
    473 		if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) {
    474 			reqfxuprilim = 0;
    475 		} else {
    476 			if (fxkparmsp->fx_uprilim > FX_MAX_UNPRIV_PRI &&
    477 			    secpolicy_setpriority(reqpcredp) != 0)
    478 				return (EPERM);
    479 			reqfxuprilim = fxkparmsp->fx_uprilim;
    480 			FX_ADJUST_PRI(reqfxuprilim);
    481 		}
    482 
    483 		if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) {
    484 			reqfxupri = reqfxuprilim;
    485 		} else {
    486 			if (fxkparmsp->fx_upri > FX_MAX_UNPRIV_PRI &&
    487 			    secpolicy_setpriority(reqpcredp) != 0)
    488 				return (EPERM);
    489 			/*
    490 			 * Set the user priority to the requested value
    491 			 * or the upri limit, whichever is lower.
    492 			 */
    493 			reqfxupri = fxkparmsp->fx_upri;
    494 			FX_ADJUST_PRI(reqfxupri);
    495 
    496 			if (reqfxupri > reqfxuprilim)
    497 				reqfxupri = reqfxuprilim;
    498 		}
    499 
    500 
    501 		fxpp->fx_uprilim = reqfxuprilim;
    502 		fxpp->fx_pri = reqfxupri;
    503 
    504 		fxpp->fx_nice = NZERO - (NZERO * reqfxupri) / fx_maxupri;
    505 
    506 		if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) ||
    507 		    (fxkparmsp->fx_tqntm == FX_TQDEF)) {
    508 			fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum;
    509 		} else {
    510 			if (secpolicy_setpriority(reqpcredp) != 0)
    511 				return (EPERM);
    512 
    513 			if (fxkparmsp->fx_tqntm == FX_TQINF)
    514 				fxpp->fx_pquantum = FX_TQINF;
    515 			else {
    516 				fxpp->fx_pquantum = fxkparmsp->fx_tqntm;
    517 			}
    518 		}
    519 
    520 	}
    521 
    522 	fxpp->fx_timeleft = fxpp->fx_pquantum;
    523 	cpucaps_sc_init(&fxpp->fx_caps);
    524 	fxpp->fx_tp = t;
    525 
    526 	thread_lock(t);			/* get dispatcher lock on thread */
    527 	t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
    528 	t->t_cid = cid;
    529 	t->t_cldata = (void *)fxpp;
    530 	t->t_schedflag &= ~TS_RUNQMATCH;
    531 	fx_change_priority(t, fxpp);
    532 	thread_unlock(t);
    533 
    534 	return (0);
    535 }
    536 
    537 /*
    538  * The thread is exiting.
    539  */
    540 static void
    541 fx_exit(kthread_t *t)
    542 {
    543 	fxproc_t *fxpp;
    544 
    545 	thread_lock(t);
    546 	fxpp = (fxproc_t *)(t->t_cldata);
    547 
    548 	/*
    549 	 * A thread could be exiting in between clock ticks, so we need to
    550 	 * calculate how much CPU time it used since it was charged last time.
    551 	 *
    552 	 * CPU caps are not enforced on exiting processes - it is usually
    553 	 * desirable to exit as soon as possible to free resources.
    554 	 */
    555 	(void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ONLY);
    556 
    557 	if (FX_HAS_CB(fxpp)) {
    558 		FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie);
    559 		fxpp->fx_callback = NULL;
    560 		fxpp->fx_cookie = NULL;
    561 		thread_unlock(t);
    562 		FX_CB_LIST_DELETE(fxpp);
    563 		return;
    564 	}
    565 
    566 	thread_unlock(t);
    567 }
    568 
    569 /*
    570  * Exiting the class. Free fxproc structure of thread.
    571  */
    572 static void
    573 fx_exitclass(void *procp)
    574 {
    575 	fxproc_t *fxpp = (fxproc_t *)procp;
    576 
    577 	thread_lock(fxpp->fx_tp);
    578 	if (FX_HAS_CB(fxpp)) {
    579 
    580 		FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie);
    581 
    582 		fxpp->fx_callback = NULL;
    583 		fxpp->fx_cookie = NULL;
    584 		thread_unlock(fxpp->fx_tp);
    585 		FX_CB_LIST_DELETE(fxpp);
    586 	} else
    587 		thread_unlock(fxpp->fx_tp);
    588 
    589 	kmem_free(fxpp, sizeof (fxproc_t));
    590 }
    591 
    592 /* ARGSUSED */
    593 static int
    594 fx_canexit(kthread_t *t, cred_t *cred)
    595 {
    596 	/*
    597 	 * A thread can always leave the FX class
    598 	 */
    599 	return (0);
    600 }
    601 
    602 /*
    603  * Initialize fixed-priority class specific proc structure for a child.
    604  * callbacks are not inherited upon fork.
    605  */
    606 static int
    607 fx_fork(kthread_t *t, kthread_t *ct, void *bufp)
    608 {
    609 	fxproc_t	*pfxpp;		/* ptr to parent's fxproc structure */
    610 	fxproc_t	*cfxpp;		/* ptr to child's fxproc structure */
    611 
    612 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
    613 
    614 	cfxpp = (fxproc_t *)bufp;
    615 	ASSERT(cfxpp != NULL);
    616 	thread_lock(t);
    617 	pfxpp = (fxproc_t *)t->t_cldata;
    618 	/*
    619 	 * Initialize child's fxproc structure.
    620 	 */
    621 	cfxpp->fx_timeleft = cfxpp->fx_pquantum = pfxpp->fx_pquantum;
    622 	cfxpp->fx_pri = pfxpp->fx_pri;
    623 	cfxpp->fx_uprilim = pfxpp->fx_uprilim;
    624 	cfxpp->fx_nice = pfxpp->fx_nice;
    625 	cfxpp->fx_callback = NULL;
    626 	cfxpp->fx_cookie = NULL;
    627 	cfxpp->fx_flags = pfxpp->fx_flags & ~(FXBACKQ);
    628 	cpucaps_sc_init(&cfxpp->fx_caps);
    629 
    630 	cfxpp->fx_tp = ct;
    631 	ct->t_cldata = (void *)cfxpp;
    632 	thread_unlock(t);
    633 
    634 	/*
    635 	 * Link new structure into fxproc list.
    636 	 */
    637 	return (0);
    638 }
    639 
    640 
    641 /*
    642  * Child is placed at back of dispatcher queue and parent gives
    643  * up processor so that the child runs first after the fork.
    644  * This allows the child immediately execing to break the multiple
    645  * use of copy on write pages with no disk home. The parent will
    646  * get to steal them back rather than uselessly copying them.
    647  */
    648 static void
    649 fx_forkret(kthread_t *t, kthread_t *ct)
    650 {
    651 	proc_t	*pp = ttoproc(t);
    652 	proc_t	*cp = ttoproc(ct);
    653 	fxproc_t *fxpp;
    654 
    655 	ASSERT(t == curthread);
    656 	ASSERT(MUTEX_HELD(&pidlock));
    657 
    658 	/*
    659 	 * Grab the child's p_lock before dropping pidlock to ensure
    660 	 * the process does not disappear before we set it running.
    661 	 */
    662 	mutex_enter(&cp->p_lock);
    663 	mutex_exit(&pidlock);
    664 	continuelwps(cp);
    665 	mutex_exit(&cp->p_lock);
    666 
    667 	mutex_enter(&pp->p_lock);
    668 	continuelwps(pp);
    669 	mutex_exit(&pp->p_lock);
    670 
    671 	thread_lock(t);
    672 	fxpp = (fxproc_t *)(t->t_cldata);
    673 	t->t_pri = fx_dptbl[fxpp->fx_pri].fx_globpri;
    674 	ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri);
    675 	THREAD_TRANSITION(t);
    676 	fx_setrun(t);
    677 	thread_unlock(t);
    678 
    679 	swtch();
    680 }
    681 
    682 
    683 /*
    684  * Get information about the fixed-priority class into the buffer
    685  * pointed to by fxinfop. The maximum configured user priority
    686  * is the only information we supply.
    687  */
    688 static int
    689 fx_getclinfo(void *infop)
    690 {
    691 	fxinfo_t *fxinfop = (fxinfo_t *)infop;
    692 	fxinfop->fx_maxupri = fx_maxupri;
    693 	return (0);
    694 }
    695 
    696 
    697 
    698 /*
    699  * Return the user mode scheduling priority range.
    700  */
    701 static int
    702 fx_getclpri(pcpri_t *pcprip)
    703 {
    704 	pcprip->pc_clpmax = fx_maxupri;
    705 	pcprip->pc_clpmin = 0;
    706 	return (0);
    707 }
    708 
    709 
    710 static void
    711 fx_nullsys()
    712 {}
    713 
    714 
    715 /*
    716  * Get the fixed-priority parameters of the thread pointed to by
    717  * fxprocp into the buffer pointed to by fxparmsp.
    718  */
    719 static void
    720 fx_parmsget(kthread_t *t, void *parmsp)
    721 {
    722 	fxproc_t *fxpp = (fxproc_t *)t->t_cldata;
    723 	fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp;
    724 
    725 	fxkparmsp->fx_upri = fxpp->fx_pri;
    726 	fxkparmsp->fx_uprilim = fxpp->fx_uprilim;
    727 	fxkparmsp->fx_tqntm = fxpp->fx_pquantum;
    728 }
    729 
    730 
    731 
    732 /*
    733  * Check the validity of the fixed-priority parameters in the buffer
    734  * pointed to by fxparmsp.
    735  */
    736 static int
    737 fx_parmsin(void *parmsp)
    738 {
    739 	fxparms_t	*fxparmsp = (fxparms_t *)parmsp;
    740 	uint_t		cflags;
    741 	longlong_t	ticks;
    742 	/*
    743 	 * Check validity of parameters.
    744 	 */
    745 
    746 	if ((fxparmsp->fx_uprilim > fx_maxupri ||
    747 	    fxparmsp->fx_uprilim < 0) &&
    748 	    fxparmsp->fx_uprilim != FX_NOCHANGE)
    749 		return (EINVAL);
    750 
    751 	if ((fxparmsp->fx_upri > fx_maxupri ||
    752 	    fxparmsp->fx_upri < 0) &&
    753 	    fxparmsp->fx_upri != FX_NOCHANGE)
    754 		return (EINVAL);
    755 
    756 	if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) ||
    757 	    fxparmsp->fx_tqnsecs >= NANOSEC)
    758 		return (EINVAL);
    759 
    760 	cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0);
    761 
    762 	if (fxparmsp->fx_uprilim != FX_NOCHANGE) {
    763 		cflags |= FX_DOUPRILIM;
    764 	}
    765 
    766 	if (fxparmsp->fx_tqnsecs != FX_NOCHANGE)
    767 		cflags |= FX_DOTQ;
    768 
    769 	/*
    770 	 * convert the buffer to kernel format.
    771 	 */
    772 
    773 	if (fxparmsp->fx_tqnsecs >= 0) {
    774 		if ((ticks = SEC_TO_TICK((longlong_t)fxparmsp->fx_tqsecs) +
    775 		    NSEC_TO_TICK_ROUNDUP(fxparmsp->fx_tqnsecs)) > INT_MAX)
    776 			return (ERANGE);
    777 
    778 		((fxkparms_t *)fxparmsp)->fx_tqntm = (int)ticks;
    779 	} else {
    780 		if ((fxparmsp->fx_tqnsecs != FX_NOCHANGE) &&
    781 		    (fxparmsp->fx_tqnsecs != FX_TQINF) &&
    782 		    (fxparmsp->fx_tqnsecs != FX_TQDEF))
    783 			return (EINVAL);
    784 		((fxkparms_t *)fxparmsp)->fx_tqntm = fxparmsp->fx_tqnsecs;
    785 	}
    786 
    787 	((fxkparms_t *)fxparmsp)->fx_cflags = cflags;
    788 
    789 	return (0);
    790 }
    791 
    792 
    793 /*
    794  * Check the validity of the fixed-priority parameters in the pc_vaparms_t
    795  * structure vaparmsp and put them in the buffer pointed to by fxprmsp.
    796  * pc_vaparms_t contains (key, value) pairs of parameter.
    797  */
    798 static int
    799 fx_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp)
    800 {
    801 	uint_t		secs = 0;
    802 	uint_t		cnt;
    803 	int		nsecs = 0;
    804 	int		priflag, secflag, nsecflag, limflag;
    805 	longlong_t	ticks;
    806 	fxkparms_t	*fxprmsp = (fxkparms_t *)prmsp;
    807 	pc_vaparm_t	*vpp = &vaparmsp->pc_parms[0];
    808 
    809 
    810 	/*
    811 	 * First check the validity of parameters and convert them
    812 	 * from the user supplied format to the internal format.
    813 	 */
    814 	priflag = secflag = nsecflag = limflag = 0;
    815 
    816 	fxprmsp->fx_cflags = 0;
    817 
    818 	if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
    819 		return (EINVAL);
    820 
    821 	for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
    822 
    823 		switch (vpp->pc_key) {
    824 		case FX_KY_UPRILIM:
    825 			if (limflag++)
    826 				return (EINVAL);
    827 			fxprmsp->fx_cflags |= FX_DOUPRILIM;
    828 			fxprmsp->fx_uprilim = (pri_t)vpp->pc_parm;
    829 			if (fxprmsp->fx_uprilim > fx_maxupri ||
    830 			    fxprmsp->fx_uprilim < 0)
    831 				return (EINVAL);
    832 			break;
    833 
    834 		case FX_KY_UPRI:
    835 			if (priflag++)
    836 				return (EINVAL);
    837 			fxprmsp->fx_cflags |= FX_DOUPRI;
    838 			fxprmsp->fx_upri = (pri_t)vpp->pc_parm;
    839 			if (fxprmsp->fx_upri > fx_maxupri ||
    840 			    fxprmsp->fx_upri < 0)
    841 				return (EINVAL);
    842 			break;
    843 
    844 		case FX_KY_TQSECS:
    845 			if (secflag++)
    846 				return (EINVAL);
    847 			fxprmsp->fx_cflags |= FX_DOTQ;
    848 			secs = (uint_t)vpp->pc_parm;
    849 			break;
    850 
    851 		case FX_KY_TQNSECS:
    852 			if (nsecflag++)
    853 				return (EINVAL);
    854 			fxprmsp->fx_cflags |= FX_DOTQ;
    855 			nsecs = (int)vpp->pc_parm;
    856 			break;
    857 
    858 		default:
    859 			return (EINVAL);
    860 		}
    861 	}
    862 
    863 	if (vaparmsp->pc_vaparmscnt == 0) {
    864 		/*
    865 		 * Use default parameters.
    866 		 */
    867 		fxprmsp->fx_upri = 0;
    868 		fxprmsp->fx_uprilim = 0;
    869 		fxprmsp->fx_tqntm = FX_TQDEF;
    870 		fxprmsp->fx_cflags = FX_DOUPRI | FX_DOUPRILIM | FX_DOTQ;
    871 	} else if ((fxprmsp->fx_cflags & FX_DOTQ) != 0) {
    872 		if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC)
    873 			return (EINVAL);
    874 
    875 		if (nsecs >= 0) {
    876 			if ((ticks = SEC_TO_TICK((longlong_t)secs) +
    877 			    NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX)
    878 				return (ERANGE);
    879 
    880 			fxprmsp->fx_tqntm = (int)ticks;
    881 		} else {
    882 			if (nsecs != FX_TQINF && nsecs != FX_TQDEF)
    883 				return (EINVAL);
    884 			fxprmsp->fx_tqntm = nsecs;
    885 		}
    886 	}
    887 
    888 	return (0);
    889 }
    890 
    891 
    892 /*
    893  * Nothing to do here but return success.
    894  */
    895 /* ARGSUSED */
    896 static int
    897 fx_parmsout(void *parmsp, pc_vaparms_t *vaparmsp)
    898 {
    899 	register fxkparms_t	*fxkprmsp = (fxkparms_t *)parmsp;
    900 
    901 	if (vaparmsp != NULL)
    902 		return (0);
    903 
    904 	if (fxkprmsp->fx_tqntm < 0) {
    905 		/*
    906 		 * Quantum field set to special value (e.g. FX_TQINF)
    907 		 */
    908 		((fxparms_t *)fxkprmsp)->fx_tqnsecs = fxkprmsp->fx_tqntm;
    909 		((fxparms_t *)fxkprmsp)->fx_tqsecs = 0;
    910 
    911 	} else {
    912 		/* Convert quantum from ticks to seconds-nanoseconds */
    913 
    914 		timestruc_t ts;
    915 		TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts);
    916 		((fxparms_t *)fxkprmsp)->fx_tqsecs = ts.tv_sec;
    917 		((fxparms_t *)fxkprmsp)->fx_tqnsecs = ts.tv_nsec;
    918 	}
    919 
    920 	return (0);
    921 }
    922 
    923 
    924 /*
    925  * Copy all selected fixed-priority class parameters to the user.
    926  * The parameters are specified by a key.
    927  */
    928 static int
    929 fx_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
    930 {
    931 	fxkparms_t	*fxkprmsp = (fxkparms_t *)prmsp;
    932 	timestruc_t	ts;
    933 	uint_t		cnt;
    934 	uint_t		secs;
    935 	int		nsecs;
    936 	int		priflag, secflag, nsecflag, limflag;
    937 	pc_vaparm_t	*vpp = &vaparmsp->pc_parms[0];
    938 
    939 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
    940 
    941 	priflag = secflag = nsecflag = limflag = 0;
    942 
    943 	if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
    944 		return (EINVAL);
    945 
    946 	if (fxkprmsp->fx_tqntm < 0) {
    947 		/*
    948 		 * Quantum field set to special value (e.g. FX_TQINF).
    949 		 */
    950 		secs = 0;
    951 		nsecs = fxkprmsp->fx_tqntm;
    952 	} else {
    953 		/*
    954 		 * Convert quantum from ticks to seconds-nanoseconds.
    955 		 */
    956 		TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts);
    957 		secs = ts.tv_sec;
    958 		nsecs = ts.tv_nsec;
    959 	}
    960 
    961 
    962 	for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
    963 
    964 		switch (vpp->pc_key) {
    965 		case FX_KY_UPRILIM:
    966 			if (limflag++)
    967 				return (EINVAL);
    968 			if (copyout(&fxkprmsp->fx_uprilim,
    969 			    (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
    970 				return (EFAULT);
    971 			break;
    972 
    973 		case FX_KY_UPRI:
    974 			if (priflag++)
    975 				return (EINVAL);
    976 			if (copyout(&fxkprmsp->fx_upri,
    977 			    (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
    978 				return (EFAULT);
    979 			break;
    980 
    981 		case FX_KY_TQSECS:
    982 			if (secflag++)
    983 				return (EINVAL);
    984 			if (copyout(&secs,
    985 			    (void *)(uintptr_t)vpp->pc_parm, sizeof (uint_t)))
    986 				return (EFAULT);
    987 			break;
    988 
    989 		case FX_KY_TQNSECS:
    990 			if (nsecflag++)
    991 				return (EINVAL);
    992 			if (copyout(&nsecs,
    993 			    (void *)(uintptr_t)vpp->pc_parm, sizeof (int)))
    994 				return (EFAULT);
    995 			break;
    996 
    997 		default:
    998 			return (EINVAL);
    999 		}
   1000 	}
   1001 
   1002 	return (0);
   1003 }
   1004 
   1005 /*
   1006  * Set the scheduling parameters of the thread pointed to by fxprocp
   1007  * to those specified in the buffer pointed to by fxparmsp.
   1008  */
   1009 /* ARGSUSED */
   1010 static int
   1011 fx_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
   1012 {
   1013 	char		nice;
   1014 	pri_t		reqfxuprilim;
   1015 	pri_t		reqfxupri;
   1016 	fxkparms_t	*fxkparmsp = (fxkparms_t *)parmsp;
   1017 	fxproc_t	*fxpp;
   1018 
   1019 
   1020 	ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
   1021 
   1022 	thread_lock(tx);
   1023 	fxpp = (fxproc_t *)tx->t_cldata;
   1024 
   1025 	if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0)
   1026 		reqfxuprilim = fxpp->fx_uprilim;
   1027 	else
   1028 		reqfxuprilim = fxkparmsp->fx_uprilim;
   1029 
   1030 	/*
   1031 	 * Basic permissions enforced by generic kernel code
   1032 	 * for all classes require that a thread attempting
   1033 	 * to change the scheduling parameters of a target
   1034 	 * thread be privileged or have a real or effective
   1035 	 * UID matching that of the target thread. We are not
   1036 	 * called unless these basic permission checks have
   1037 	 * already passed. The fixed priority class requires in
   1038 	 * addition that the calling thread be privileged if it
   1039 	 * is attempting to raise the pri above its current
   1040 	 * value This may have been checked previously but if our
   1041 	 * caller passed us a non-NULL credential pointer we assume
   1042 	 * it hasn't and we check it here.
   1043 	 */
   1044 
   1045 	if ((reqpcredp != NULL) &&
   1046 	    (reqfxuprilim > fxpp->fx_uprilim ||
   1047 	    ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)) &&
   1048 	    secpolicy_setpriority(reqpcredp) != 0) {
   1049 		thread_unlock(tx);
   1050 		return (EPERM);
   1051 	}
   1052 
   1053 	FX_ADJUST_PRI(reqfxuprilim);
   1054 
   1055 	if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0)
   1056 		reqfxupri = fxpp->fx_pri;
   1057 	else
   1058 		reqfxupri = fxkparmsp->fx_upri;
   1059 
   1060 
   1061 	/*
   1062 	 * Make sure the user priority doesn't exceed the upri limit.
   1063 	 */
   1064 	if (reqfxupri > reqfxuprilim)
   1065 		reqfxupri = reqfxuprilim;
   1066 
   1067 	/*
   1068 	 * Set fx_nice to the nice value corresponding to the user
   1069 	 * priority we are setting.  Note that setting the nice field
   1070 	 * of the parameter struct won't affect upri or nice.
   1071 	 */
   1072 
   1073 	nice = NZERO - (reqfxupri * NZERO) / fx_maxupri;
   1074 
   1075 	if (nice > NZERO)
   1076 		nice = NZERO;
   1077 
   1078 	fxpp->fx_uprilim = reqfxuprilim;
   1079 	fxpp->fx_pri = reqfxupri;
   1080 
   1081 	if (fxkparmsp->fx_tqntm == FX_TQINF)
   1082 		fxpp->fx_pquantum = FX_TQINF;
   1083 	else if (fxkparmsp->fx_tqntm == FX_TQDEF)
   1084 		fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum;
   1085 	else if ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)
   1086 		fxpp->fx_pquantum = fxkparmsp->fx_tqntm;
   1087 
   1088 	fxpp->fx_nice = nice;
   1089 
   1090 	fx_change_priority(tx, fxpp);
   1091 	thread_unlock(tx);
   1092 	return (0);
   1093 }
   1094 
   1095 
   1096 /*
   1097  * Return the global scheduling priority that would be assigned
   1098  * to a thread entering the fixed-priority class with the fx_upri.
   1099  */
   1100 static pri_t
   1101 fx_globpri(kthread_t *t)
   1102 {
   1103 	fxproc_t *fxpp;
   1104 
   1105 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
   1106 
   1107 	fxpp = (fxproc_t *)t->t_cldata;
   1108 	return (fx_dptbl[fxpp->fx_pri].fx_globpri);
   1109 
   1110 }
   1111 
   1112 /*
   1113  * Arrange for thread to be placed in appropriate location
   1114  * on dispatcher queue.
   1115  *
   1116  * This is called with the current thread in TS_ONPROC and locked.
   1117  */
   1118 static void
   1119 fx_preempt(kthread_t *t)
   1120 {
   1121 	fxproc_t	*fxpp = (fxproc_t *)(t->t_cldata);
   1122 
   1123 	ASSERT(t == curthread);
   1