Home | History | Annotate | Download | only in vm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef	_VM_VPM_H
     27 #define	_VM_VPM_H
     28 
     29 #pragma ident	"@(#)vpm.h	1.1	06/04/18 SMI"
     30 
     31 #ifdef	__cplusplus
     32 extern "C" {
     33 #endif
     34 
     35 /*
     36  * The vnode page mappings(VPM) interfaces.
     37  * "Commitment level - Consolidation private". They are subject
     38  * to change without notice. Use them at your own risk.
     39  *
     40  * At this stage these interfaces are provided only to utilize the
     41  * segkpm mappings and are enabled for solaris x64. Therefore these
     42  * interfaces have to be used under the 'vpm_enable' check as an
     43  * alternative to segmap interfaces where applicable.
     44  *
     45  * The VPM interfaces provide temporary mappings to file pages. They
     46  * return the mappings in a scatter gather list(SGL).
     47  * The SGL elements are the structure 'vmap_t'.
     48  *
     49  *	typedef struct vmap {
     50  *		caddr_t	vs_addr;        / public /
     51  *		size_t	vs_len;         / public - Currently not used /
     52  *		void	*vs_data;	/ opaque - private data /
     53  *	} vmap_t;
     54  *
     55  * An array of this structure has to be passed to the interface routines
     56  * along with the size(# of elements) of the SGL array. Depending on the
     57  * requested length and mapped chunk sizes(PAGESIZE here), the number of
     58  * valid mappings returned can be less then actual size of the SGL array.
     59  * Always, an element in the SGL will have 'vs_addr' set to NULL which
     60  * marks the end of the valid entires in the SGL.
     61  *
     62  * The vmap_t structure members are populated with the mapped address
     63  * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
     64  * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
     65  * and the caller should not access or modify it.
     66  *
     67  * Using a scatter gather list to return the mappings and length makes it
     68  * possible to provide mappings of variable length. Currently mapping length
     69  * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap
     70  * interfaces, on each request, the max length of 'MAXBSIZE' is supported
     71  * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements
     72  * of the SGL depending on the PAGESIZE. The scatter gather list array size
     73  * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE.
     74  * The MAXBSIZE restriction exists because the filesystems are not capable
     75  * of handling more(disk block allocations at a time) for now.
     76  *
     77  *
     78  * Interfaces:
     79  *
     80  * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len,
     81  *			int fetchpage, vmap_t *vml, int vmlsz,
     82  *			int *newpagecreated, enum seg_rw rw);
     83  *
     84  * This function returns mappings to vnode pages.
     85  *
     86  * It takes a vnode, offset and length and returns mappings to the  pages
     87  * covering the range [off, off +len) in the vmap_t SGL array 'vml'.
     88  * Currently these interfaces are subject to restrictions similar to the segmap
     89  * interfaces. The length passed in should satisfy the following criteria.
     90  * '(off + len)  <= ((off & PAGEMASK) + MAXBSIZE)'
     91  * The mapped address returned, in 'vs_addr', are for the page boundary.
     92  *
     93  * The 'vmlsz' is the size(# elements) of the 'vml' array.
     94  *
     95  * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
     96  * (calls VOP_GETPAGE) from the backing store(disk) if not found in the
     97  * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
     98  * given offset will be just created if they are not already present in the
     99  * system page cache. The 'newpagecreated' flag is set on return if new pages
    100  * are created when 'fetchpage == 0'(requested to just create new pages).
    101  *
    102  * The 'seg_rw rw' indicates the intended operation on these mappings
    103  * (S_WRITE or S_READ).
    104  *
    105  * Currently these interfaces only return segkpm mappings. Therefore the
    106  * vnode pages that are being accessed will be locked(at least SHARED locked)
    107  * for the duration these mappings are in use. After use, the  unmap
    108  * function, vpm_unmap_pages(), has to be called and the same SGL array
    109  * needs to be passed to the unmap function.
    110  *
    111  *
    112  * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
    113  *
    114  * This function unmaps the pages that where mapped by vpm_map_pages.
    115  * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
    116  *
    117  *
    118  * ex:
    119  * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
    120  * 'buf' the following code snippet shows how to use the above two interfaces.
    121  * Here the the copy length is till the MAXBSIZE boundary. This code can be
    122  * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
    123  *
    124  *	vmap_t  vml[MINVMAPS];
    125  *	int err, i, newpage, len;
    126  *	int pon;
    127  *
    128  *	pon = (off & PAGEOFFSET);
    129  *	len = MAXBSIZE - pon;
    130  *
    131  *	if (vpm_enable) {
    132  *             err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
    133  *				 &newpage, S_WRITE);
    134  *
    135  *		if (err)
    136  *			return;
    137  *
    138  *		for (i=0; vml[i].vs_addr != NULL); i++) {
    139  *			bcopy (buf, vml[i].vs_addr + pon,
    140  *				 PAGESIZE - pon);
    141  *			buf += (PAGESIZE - pon);
    142  *			pon = 0;
    143  *		}
    144  *
    145  *		if (newpage) {
    146  *			pon = (off & PAGEOFFSET);
    147  *			bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
    148  *		}
    149  *
    150  *		vpm_unmap_pages(vml, S_WRITE);
    151  *	}
    152  *
    153  *
    154  *
    155  *
    156  * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len,
    157  *		struct uio *uio, int fetchpage, int *newpagecreated,
    158  *		int zerostart, enum seg_rw rw);
    159  *
    160  * This function can be called if the need is to just transfer data to/from
    161  * the vnode pages. It takes a 'uio' structure and  calls 'uiomove()' to
    162  * do the data transfer. It can be used in the context of read and write
    163  * system calls to transfer data between a user buffer, which is specified
    164  * in the uio structure, and the vnode pages. If the data needs to be
    165  * transferred between a kernel buffer and the pages, like in the above
    166  * example, a uio structure can be set up accordingly and passed. The 'rw'
    167  * parameter will determine the direction of the data transfer.
    168  *
    169  * The 'fetchpage' and 'newpagecreated' are same as explained before.
    170  * The 'zerostart' flag when set will zero fill start of the page till the
    171  * offset 'off' in the first page. i.e  from 'off & PAGEMASK' to 'off'.
    172  * Here too the MAXBSIZE restriction mentioned above applies to the length
    173  * requested.
    174  *
    175  *
    176  * int vpm_sync_pages(struct vnode *vp, u_offset_t off,
    177  *					 size_t len, uint_t flags)
    178  *
    179  * This function can be called to flush or sync the vnode(file) pages that
    180  * have been accessed. It will call VOP_PUTPAGE().
    181  *
    182  * For the given vnode, off and len the pages covering the range
    183  * [off, off + len) are flushed. Currently it uses the same flags that
    184  * are used with segmap_release() interface. Refer vm/seg_map.h.
    185  * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
    186  *
    187  */
    188 
    189 
    190 /*
    191  * vpm cache related definitions.
    192  */
    193 #define	VPMAP_MINCACHE		(64 * 1024 * 1024)
    194 
    195 /*
    196  * vpm caching mode
    197  */
    198 #define	VPMCACHE_LRU		0
    199 #define	VPMCACHE_RANDOM		1
    200 /*
    201  * Data structures to manage the cache of pages referenced by
    202  * the vpm interfaces. There is one vpmap struct per page in the cache.
    203  */
    204 struct vpmap {
    205 	kmutex_t	vpm_mtx;	/* protects non list fields */
    206 	struct vnode	*vpm_vp;	/* pointer to vnode of cached page */
    207 	struct vpmap	*vpm_next;	/* free list pointers */
    208 	struct vpmap	*vpm_prev;
    209 	u_offset_t	vpm_off;	/* offset of the page */
    210 	page_t		*vpm_pp;	/* page pointer */
    211 	ushort_t	vpm_refcnt;	/* Number active references */
    212 	ushort_t	vpm_ndxflg;	/* indicates which queue */
    213 	ushort_t	vpm_free_ndx;	/* freelist it belongs to */
    214 };
    215 
    216 /*
    217  * Multiple vpmap free lists are maintaned so that allocations
    218  * scale with cpu count. To further reduce contentions between
    219  * allocation and deallocations, each list is made up of two queues.
    220  */
    221 #define	VPM_FREEQ_PAD	64
    222 union vpm_freeq {
    223 	struct {
    224 		struct vpmap	*vpmsq_free;
    225 		kmutex_t	vpmsq_mtx;
    226 	} vpmfq;
    227 	char vpmq_pad[VPM_FREEQ_PAD];
    228 };
    229 
    230 #define	vpmq_free	vpmfq.vpmsq_free
    231 #define	vpmq_mtx	vpmfq.vpmsq_mtx
    232 
    233 struct vpmfree {
    234 	union vpm_freeq vpm_freeq[2];	/* alloc and release queue */
    235 	union vpm_freeq *vpm_allocq;	/* current alloc queue */
    236 	union vpm_freeq *vpm_releq;	/* current release queue */
    237 	kcondvar_t	vpm_free_cv;
    238 	ushort_t	vpm_want;
    239 };
    240 
    241 #define	VPMALLOCQ	0
    242 #define	VPMRELEQ	1
    243 
    244 /*
    245  * VPM Interface definitions.
    246  */
    247 
    248 /*
    249  * This structure is the scatter gather list element. The page
    250  * mappings will be returned in this structure. A pointer to an
    251  * array of this structure is passed to the interface routines.
    252  */
    253 typedef struct vmap {
    254 	caddr_t	vs_addr;	/* mapped address */
    255 	size_t	vs_len;		/* length, currently fixed at PAGESIZE */
    256 	void	*vs_data;	/* opaque - private data */
    257 } vmap_t;
    258 
    259 /*
    260  * The minimum and maximum number of array elements in the scatter
    261  * gather list.
    262  */
    263 #define	MINVMAPS   3		/* ((MAXBSIZE/4096 + 1)  min # mappings */
    264 #define	MAXVMAPS   10		/* Max # the scatter gather list */
    265 
    266 #ifdef _KERNEL
    267 
    268 extern int	vpm_enable;
    269 /*
    270  * vpm page mapping operations.
    271  */
    272 extern void	vpm_init(void);
    273 extern int	vpm_map_pages(struct vnode *, u_offset_t, size_t, int,
    274 		vmap_t *, int, int  *, enum seg_rw);
    275 
    276 extern void	vpm_unmap_pages(vmap_t *, enum seg_rw);
    277 extern int	vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t);
    278 extern int	vpm_data_copy(struct vnode *, u_offset_t, size_t,
    279 		struct uio *, int, int *, int, enum seg_rw rw);
    280 #endif	/* _KERNEL */
    281 
    282 #ifdef	__cplusplus
    283 }
    284 #endif
    285 
    286 #endif	/* _VM_VPM_H */
    287