Home | History | Annotate | Download | only in vm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
     27 /*	 All Rights Reserved   */
     28 
     29 /*
     30  * University Copyright- Copyright (c) 1982, 1986, 1988
     31  * The Regents of the University of California
     32  * All Rights Reserved
     33  *
     34  * University Acknowledgment- Portions of this document are derived from
     35  * software developed by the University of California, Berkeley, and its
     36  * contributors.
     37  */
     38 
     39 #ifndef	_VM_ANON_H
     40 #define	_VM_ANON_H
     41 
     42 #pragma ident	"@(#)anon.h	1.94	07/10/10 SMI"
     43 
     44 #include <sys/cred.h>
     45 #include <sys/zone.h>
     46 #include <vm/seg.h>
     47 #include <vm/vpage.h>
     48 
     49 #ifdef	__cplusplus
     50 extern "C" {
     51 #endif
     52 
     53 /*
     54  * VM - Anonymous pages.
     55  */
     56 
     57 typedef	unsigned long anoff_t;		/* anon offsets */
     58 
     59 /*
     60  *	Each anonymous page, either in memory or in swap, has an anon structure.
     61  * The structure (slot) provides a level of indirection between anonymous pages
     62  * and their backing store.
     63  *
     64  *	(an_vp, an_off) names the vnode of the anonymous page for this slot.
     65  *
     66  * 	(an_pvp, an_poff) names the location of the physical backing store
     67  * 	for the page this slot represents. If the name is null there is no
     68  * 	associated physical store. The physical backing store location can
     69  *	change while the slot is in use.
     70  *
     71  *	an_hash is a hash list of anon slots. The list is hashed by
     72  * 	(an_vp, an_off) of the associated anonymous page and provides a
     73  *	method of going from the name of an anonymous page to its
     74  * 	associated anon slot.
     75  *
     76  *	an_refcnt holds a reference count which is the number of separate
     77  * 	copies that will need to be created in case of copy-on-write.
     78  *	A refcnt > 0 protects the existence of the slot. The refcnt is
     79  * 	initialized to 1 when the anon slot is created in anon_alloc().
     80  *	If a client obtains an anon slot and allows multiple threads to
     81  * 	share it, then it is the client's responsibility to insure that
     82  *	it does not allow one thread to try to reference the slot at the
     83  *	same time as another is trying to decrement the last count and
     84  *	destroy the anon slot. E.g., the seg_vn segment type protects
     85  *	against this with higher level locks.
     86  */
     87 
     88 struct anon {
     89 	struct vnode *an_vp;	/* vnode of anon page */
     90 	struct vnode *an_pvp;	/* vnode of physical backing store */
     91 	anoff_t an_off;		/* offset of anon page */
     92 	anoff_t an_poff;	/* offset in vnode */
     93 	struct anon *an_hash;	/* hash table of anon slots */
     94 	int an_refcnt;		/* # of people sharing slot */
     95 };
     96 
     97 #ifdef _KERNEL
     98 /*
     99  * The swapinfo_lock protects:
    100  *		swapinfo list
    101  *		individual swapinfo structures
    102  *
    103  * The anoninfo_lock protects:
    104  *		anoninfo counters
    105  *
    106  * The anonhash_lock protects:
    107  *		anon hash lists
    108  *		anon slot fields
    109  *
    110  * Fields in the anon slot which are read-only for the life of the slot
    111  * (an_vp, an_off) do not require the anonhash_lock be held to access them.
    112  * If you access a field without the anonhash_lock held you must be holding
    113  * the slot with an_refcnt to make sure it isn't destroyed.
    114  * To write (an_pvp, an_poff) in a given slot you must also hold the
    115  * p_iolock of the anonymous page for slot.
    116  */
    117 extern kmutex_t anoninfo_lock;
    118 extern kmutex_t swapinfo_lock;
    119 extern kmutex_t anonhash_lock[];
    120 extern pad_mutex_t anon_array_lock[];
    121 extern kcondvar_t anon_array_cv[];
    122 
    123 /*
    124  * Global hash table to provide a function from (vp, off) -> ap
    125  */
    126 extern size_t anon_hash_size;
    127 extern struct anon **anon_hash;
    128 #define	ANON_HASH_SIZE	anon_hash_size
    129 #define	ANON_HASHAVELEN	4
    130 #define	ANON_HASH(VP, OFF)	\
    131 ((((uintptr_t)(VP) >> 7)  ^ ((OFF) >> PAGESHIFT)) & (ANON_HASH_SIZE - 1))
    132 
    133 #define	AH_LOCK_SIZE	64
    134 #define	AH_LOCK(vp, off) (ANON_HASH((vp), (off)) & (AH_LOCK_SIZE -1))
    135 
    136 #endif	/* _KERNEL */
    137 
    138 /*
    139  * Declaration for the Global counters to accurately
    140  * track the kernel foot print in memory.
    141  */
    142 extern  pgcnt_t segvn_pages_locked;
    143 extern  pgcnt_t pages_locked;
    144 extern  pgcnt_t pages_claimed;
    145 extern  pgcnt_t pages_useclaim;
    146 extern  pgcnt_t obp_pages;
    147 
    148 /*
    149  * Anonymous backing store accounting structure for swapctl.
    150  *
    151  * ani_max = maximum amount of swap space
    152  *	(including potentially available physical memory)
    153  * ani_free = amount of unallocated anonymous memory
    154  *	(some of which might be reserved and including
    155  *	potentially available physical memory)
    156  * ani_resv = amount of claimed (reserved) anonymous memory
    157  *
    158  * The swap data can be aquired more efficiently through the
    159  * kstats interface.
    160  * Total slots currently available for reservation =
    161  *	MAX(ani_max - ani_resv, 0) + (availrmem - swapfs_minfree)
    162  */
    163 struct anoninfo {
    164 	pgcnt_t	ani_max;
    165 	pgcnt_t	ani_free;
    166 	pgcnt_t	ani_resv;
    167 };
    168 
    169 #ifdef _SYSCALL32
    170 struct anoninfo32 {
    171 	size32_t ani_max;
    172 	size32_t ani_free;
    173 	size32_t ani_resv;
    174 };
    175 #endif /* _SYSCALL32 */
    176 
    177 /*
    178  * Define the NCPU pool of the ani_free counters. Update the counter
    179  * of the cpu on which the thread is running and in every clock intr
    180  * sync anoninfo.ani_free with the current total off all the NCPU entries.
    181  */
    182 
    183 typedef	struct	ani_free {
    184 	kmutex_t	ani_lock;
    185 	pgcnt_t		ani_count;
    186 	uchar_t		pad[64 - sizeof (kmutex_t) - sizeof (pgcnt_t)];
    187 			/* XXX 64 = cacheline size */
    188 } ani_free_t;
    189 
    190 #define	ANI_MAX_POOL	128
    191 extern	ani_free_t	ani_free_pool[];
    192 
    193 #define	ANI_ADD(inc)	{ \
    194 	ani_free_t	*anifp; \
    195 	int		index; \
    196 	index = (CPU->cpu_id & (ANI_MAX_POOL - 1)); \
    197 	anifp = &ani_free_pool[index]; \
    198 	mutex_enter(&anifp->ani_lock); \
    199 	anifp->ani_count += inc; \
    200 	mutex_exit(&anifp->ani_lock); \
    201 }
    202 
    203 /*
    204  * Anon array pointers are allocated in chunks. Each chunk
    205  * has PAGESIZE/sizeof(u_long *) of anon pointers.
    206  * There are two levels of arrays for anon array pointers larger
    207  * than a chunk. The first level points to anon array chunks.
    208  * The second level consists of chunks of anon pointers.
    209  *
    210  * If anon array is smaller than a chunk then the whole anon array
    211  * is created (memory is allocated for whole anon array).
    212  * If anon array is larger than a chunk only first level array is
    213  * allocated. Then other arrays (chunks) are allocated only when
    214  * they are initialized with anon pointers.
    215  */
    216 struct anon_hdr {
    217 	kmutex_t serial_lock;	/* serialize array chunk allocation */
    218 	pgcnt_t	size;		/* number of pointers to (anon) pages */
    219 	void	**array_chunk;	/* pointers to anon pointers or chunks of */
    220 				/* anon pointers */
    221 	int	flags;		/* ANON_ALLOC_FORCE force preallocation of */
    222 				/* whole anon array	*/
    223 };
    224 
    225 #ifdef	_LP64
    226 #define	ANON_PTRSHIFT	3
    227 #define	ANON_PTRMASK	~7
    228 #else
    229 #define	ANON_PTRSHIFT	2
    230 #define	ANON_PTRMASK	~3
    231 #endif
    232 
    233 #define	ANON_CHUNK_SIZE		(PAGESIZE >> ANON_PTRSHIFT)
    234 #define	ANON_CHUNK_SHIFT	(PAGESHIFT - ANON_PTRSHIFT)
    235 #define	ANON_CHUNK_OFF		(ANON_CHUNK_SIZE - 1)
    236 
    237 /*
    238  * Anon flags.
    239  */
    240 #define	ANON_SLEEP		0x0	/* ok to block */
    241 #define	ANON_NOSLEEP		0x1	/* non-blocking call */
    242 #define	ANON_ALLOC_FORCE	0x2	/* force single level anon array */
    243 #define	ANON_GROWDOWN		0x4	/* anon array should grow downward */
    244 
    245 struct kshmid;
    246 
    247 /*
    248  * The anon_map structure is used by various clients of the anon layer to
    249  * manage anonymous memory.   When anonymous memory is shared,
    250  * then the different clients sharing it will point to the
    251  * same anon_map structure.  Also, if a segment is unmapped
    252  * in the middle where an anon_map structure exists, the
    253  * newly created segment will also share the anon_map structure,
    254  * although the two segments will use different ranges of the
    255  * anon array.  When mappings are private (or shared with
    256  * a reference count of 1), an unmap operation will free up
    257  * a range of anon slots in the array given by the anon_map
    258  * structure.  Because of fragmentation due to this unmapping,
    259  * we have to store the size of the anon array in the anon_map
    260  * structure so that we can free everything when the referernce
    261  * count goes to zero.
    262  *
    263  * A new rangelock scheme is introduced to make the anon layer scale.
    264  * A reader/writer lock per anon_amp and an array of system-wide hash
    265  * locks, anon_array_lock[] are introduced to replace serial_lock and
    266  * anonmap lock.  The writer lock is held when we want to singlethreaD
    267  * the reference to the anon array pointers or when references to
    268  * anon_map's members, whereas reader lock and anon_array_lock are
    269  * held to allows multiple threads to reference different part of
    270  * anon array.  A global set of condition variables, anon_array_cv,
    271  * are used with anon_array_lock[] to make the hold time of the locks
    272  * short.
    273  *
    274  * szc is used to calculate the index of hash locks and cv's.  We
    275  * could've just used seg->s_szc if not for the possible sharing of
    276  * anon_amp between SYSV shared memory and ISM, so now we introduce
    277  * szc in the anon_map structure.  For MAP_SHARED, the amp->szc is either
    278  * 0 (base page size) or page_num_pagesizes() - 1, while MAP_PRIVATE
    279  * the amp->szc could be anything in [0, page_num_pagesizes() - 1].
    280  */
    281 struct anon_map {
    282 	krwlock_t a_rwlock;	/* protect anon_map and anon array */
    283 	size_t	size;		/* size in bytes mapped by the anon array */
    284 	struct	anon_hdr *ahp; 	/* anon array header pointer, containing */
    285 				/* anon pointer array(s) */
    286 	size_t	swresv;		/* swap space reserved for this anon_map */
    287 	ulong_t	refcnt;		/* reference count on this structure */
    288 	ushort_t a_szc;		/* max szc among shared processes */
    289 	void	*locality;	/* lgroup locality info */
    290 	struct kshmid *a_sp;	/* kshmid if amp backs sysV, or NULL */
    291 };
    292 
    293 #ifdef _KERNEL
    294 
    295 #define	ANON_BUSY		0x1
    296 #define	ANON_ISBUSY(slot)	(*(slot) & ANON_BUSY)
    297 #define	ANON_SETBUSY(slot)	(*(slot) |= ANON_BUSY)
    298 #define	ANON_CLRBUSY(slot)	(*(slot) &= ~ANON_BUSY)
    299 
    300 #define	ANON_MAP_SHIFT		6	/* log2(sizeof (struct anon_map)) */
    301 #define	ANON_ARRAY_SHIFT	7	/* log2(ANON_LOCKSIZE) */
    302 #define	ANON_LOCKSIZE		128
    303 
    304 #define	ANON_LOCK_ENTER(lock, type)	rw_enter((lock), (type))
    305 #define	ANON_LOCK_EXIT(lock)		rw_exit((lock))
    306 
    307 #define	ANON_ARRAY_HASH(amp, idx)\
    308 	((((idx) + ((idx) >> ANON_ARRAY_SHIFT) +\
    309 	((idx) >> (ANON_ARRAY_SHIFT << 1)) +\
    310 	((idx) >> (ANON_ARRAY_SHIFT + (ANON_ARRAY_SHIFT << 1)))) ^\
    311 	((uintptr_t)(amp) >> ANON_MAP_SHIFT)) & (ANON_LOCKSIZE - 1))
    312 
    313 typedef struct anon_sync_obj {
    314 	kmutex_t	*sync_mutex;
    315 	kcondvar_t	*sync_cv;
    316 	ulong_t		*sync_data;
    317 } anon_sync_obj_t;
    318 
    319 /*
    320  * Anonymous backing store accounting structure for kernel.
    321  * ani_max = total reservable slots on physical (disk-backed) swap
    322  * ani_phys_resv = total phys slots reserved for use by clients
    323  * ani_mem_resv = total mem slots reserved for use by clients
    324  * ani_free = # unallocated physical slots + # of reserved unallocated
    325  * memory slots
    326  */
    327 
    328 /*
    329  * Initial total swap slots available for reservation
    330  */
    331 #define	TOTAL_AVAILABLE_SWAP \
    332 	(k_anoninfo.ani_max + MAX((spgcnt_t)(availrmem - swapfs_minfree), 0))
    333 
    334 /*
    335  * Swap slots currently available for reservation
    336  */
    337 #define	CURRENT_TOTAL_AVAILABLE_SWAP \
    338 	((k_anoninfo.ani_max - k_anoninfo.ani_phys_resv) +	\
    339 			MAX((spgcnt_t)(availrmem - swapfs_minfree), 0))
    340 
    341 struct k_anoninfo {
    342 	pgcnt_t	ani_max;	/* total reservable slots on phys */
    343 					/* (disk) swap */
    344 	pgcnt_t	ani_free;	/* # of unallocated phys and mem slots */
    345 	pgcnt_t	ani_phys_resv;	/* # of reserved phys (disk) slots */
    346 	pgcnt_t	ani_mem_resv;	/* # of reserved mem slots */
    347 	pgcnt_t	ani_locked_swap; /* # of swap slots locked in reserved */
    348 				/* mem swap */
    349 };
    350 
    351 extern	struct k_anoninfo k_anoninfo;
    352 
    353 extern void	anon_init(void);
    354 extern struct	anon *anon_alloc(struct vnode *, anoff_t);
    355 extern void	anon_dup(struct anon_hdr *, ulong_t,
    356 		    struct anon_hdr *, ulong_t, size_t);
    357 extern void	anon_dup_fill_holes(struct anon_hdr *, ulong_t,
    358 		    struct anon_hdr *, ulong_t, size_t, uint_t, int);
    359 extern int	anon_fill_cow_holes(struct seg *, caddr_t, struct anon_hdr *,
    360 		    ulong_t, struct vnode *, u_offset_t, size_t, uint_t,
    361 		    uint_t, struct vpage [], struct cred *);
    362 extern void	anon_free(struct anon_hdr *, ulong_t, size_t);
    363 extern void	anon_free_pages(struct anon_hdr *, ulong_t, size_t, uint_t);
    364 extern void	anon_disclaim(struct anon_map *, ulong_t, size_t);
    365 extern int	anon_getpage(struct anon **, uint_t *, struct page **,
    366 		    size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
    367 extern int	swap_getconpage(struct vnode *, u_offset_t, size_t,
    368 		    uint_t *, page_t *[], size_t, page_t *, uint_t *,
    369 		    spgcnt_t *, struct seg *, caddr_t,
    370 		    enum seg_rw, struct cred *);
    371 extern int	anon_map_getpages(struct anon_map *, ulong_t,
    372 		    uint_t, struct seg *, caddr_t, uint_t,
    373 		    uint_t *, page_t *[], uint_t *,
    374 		    struct vpage [], enum seg_rw, int, int, int, struct cred *);
    375 extern int	anon_map_privatepages(struct anon_map *, ulong_t,
    376 		    uint_t, struct seg *, caddr_t, uint_t,
    377 		    page_t *[], struct vpage [], int, int, struct cred *);
    378 extern struct	page *anon_private(struct anon **, struct seg *,
    379 		    caddr_t, uint_t, struct page *,
    380 		    int, struct cred *);
    381 extern struct	page *anon_zero(struct seg *, caddr_t,
    382 		    struct anon **, struct cred *);
    383 extern int	anon_map_createpages(struct anon_map *, ulong_t,
    384 		    size_t, struct page **,
    385 		    struct seg *, caddr_t,
    386 		    enum seg_rw, struct cred *);
    387 extern int	anon_map_demotepages(struct anon_map *, ulong_t,
    388 		    struct seg *, caddr_t, uint_t,
    389 		    struct vpage [], struct cred *);
    390 extern void	anon_shmap_free_pages(struct anon_map *, ulong_t, size_t);
    391 extern int	anon_resvmem(size_t, boolean_t, zone_t *, int);
    392 extern void	anon_unresvmem(size_t, zone_t *);
    393 extern struct	anon_map *anonmap_alloc(size_t, size_t, int);
    394 extern void	anonmap_free(struct anon_map *);
    395 extern void	anon_decref(struct anon *);
    396 extern int	non_anon(struct anon_hdr *, ulong_t, u_offset_t *, size_t *);
    397 extern pgcnt_t	anon_pages(struct anon_hdr *, ulong_t, pgcnt_t);
    398 extern int	anon_swap_adjust(pgcnt_t);
    399 extern void	anon_swap_restore(pgcnt_t);
    400 extern struct	anon_hdr *anon_create(pgcnt_t, int);
    401 extern void	anon_release(struct anon_hdr *, pgcnt_t);
    402 extern struct	anon *anon_get_ptr(struct anon_hdr *, ulong_t);
    403 extern ulong_t	*anon_get_slot(struct anon_hdr *, ulong_t);
    404 extern struct	anon *anon_get_next_ptr(struct anon_hdr *, ulong_t *);
    405 extern int	anon_set_ptr(struct anon_hdr *, ulong_t, struct anon *, int);
    406 extern int 	anon_copy_ptr(struct anon_hdr *, ulong_t,
    407 		    struct anon_hdr *, ulong_t, pgcnt_t, int);
    408 extern pgcnt_t	anon_grow(struct anon_hdr *, ulong_t *, pgcnt_t, pgcnt_t, int);
    409 extern void	anon_array_enter(struct anon_map *, ulong_t,
    410 			anon_sync_obj_t *);
    411 extern int	anon_array_try_enter(struct anon_map *, ulong_t,
    412 			anon_sync_obj_t *);
    413 extern void	anon_array_exit(anon_sync_obj_t *);
    414 
    415 /*
    416  * anon_resv checks to see if there is enough swap space to fulfill a
    417  * request and if so, reserves the appropriate anonymous memory resources.
    418  * anon_checkspace just checks to see if there is space to fulfill the request,
    419  * without taking any resources.  Both return 1 if successful and 0 if not.
    420  *
    421  * Macros are provided as anon reservation is usually charged to the zone of
    422  * the current process.  In some cases (such as anon reserved by tmpfs), a
    423  * zone pointer is needed to charge the appropriate zone.
    424  */
    425 #define	anon_unresv(size)		anon_unresvmem(size, curproc->p_zone)
    426 #define	anon_unresv_zone(size, zone)	anon_unresvmem(size, zone)
    427 #define	anon_resv(size)			\
    428 	anon_resvmem((size), 1, curproc->p_zone, 1)
    429 #define	anon_resv_zone(size, zone)	anon_resvmem((size), 1, zone, 1)
    430 #define	anon_checkspace(size, zone)	anon_resvmem((size), 0, zone, 0)
    431 #define	anon_try_resv_zone(size, zone)	anon_resvmem((size), 1, zone, 0)
    432 
    433 /*
    434  * Flags to anon_private
    435  */
    436 #define	STEAL_PAGE	0x1	/* page can be stolen */
    437 #define	LOCK_PAGE	0x2	/* page must be ``logically'' locked */
    438 
    439 /*
    440  * SEGKP ANON pages that are locked are assumed to be LWP stack pages
    441  * and thus count towards the user pages locked count.
    442  * This value is protected by the same lock as availrmem.
    443  */
    444 extern pgcnt_t anon_segkp_pages_locked;
    445 
    446 extern int anon_debug;
    447 
    448 #ifdef ANON_DEBUG
    449 
    450 #define	A_ANON	0x01
    451 #define	A_RESV	0x02
    452 #define	A_MRESV	0x04
    453 
    454 /* vararg-like debugging macro. */
    455 #define	ANON_PRINT(f, printf_args) \
    456 		if (anon_debug & f) \
    457 			printf printf_args
    458 
    459 #else	/* ANON_DEBUG */
    460 
    461 #define	ANON_PRINT(f, printf_args)
    462 
    463 #endif	/* ANON_DEBUG */
    464 
    465 #endif	/* _KERNEL */
    466 
    467 #ifdef	__cplusplus
    468 }
    469 #endif
    470 
    471 #endif	/* _VM_ANON_H */
    472