Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
     27 /*	  All Rights Reserved  	*/
     28 
     29 #ifndef	_NFS_RNODE_H
     30 #define	_NFS_RNODE_H
     31 
     32 #include <sys/avl.h>
     33 #include <sys/list.h>
     34 #include <nfs/nfs.h>
     35 
     36 #ifdef	__cplusplus
     37 extern "C" {
     38 #endif
     39 
     40 typedef enum nfs_access_type {
     41 	NFS_ACCESS_UNKNOWN,
     42 	NFS_ACCESS_ALLOWED,
     43 	NFS_ACCESS_DENIED
     44 } nfs_access_type_t;
     45 
     46 typedef struct acache_hash {
     47 	struct acache *next;	/* next and prev must be first */
     48 	struct acache *prev;
     49 	krwlock_t lock;
     50 } acache_hash_t;
     51 
     52 typedef struct acache {
     53 	struct acache *next;	/* next and prev must be first */
     54 	struct acache *prev;
     55 	uint32_t known;
     56 	uint32_t allowed;
     57 	struct rnode *rnode;
     58 	cred_t *cred;
     59 	struct acache *list;
     60 	struct acache_hash *hashq;
     61 } acache_t;
     62 
     63 #define	NFS_FHANDLE_LEN	72
     64 
     65 typedef struct nfs_fhandle {
     66 	int fh_len;
     67 	char fh_buf[NFS_FHANDLE_LEN];
     68 } nfs_fhandle;
     69 
     70 typedef struct rddir_cache {
     71 	lloff_t _cookie;	/* cookie used to find this cache entry */
     72 	lloff_t _ncookie;	/* cookie used to find the next cache entry */
     73 	char *entries;		/* buffer containing dirent entries */
     74 	int eof;		/* EOF reached after this request */
     75 	int entlen;		/* size of dirent entries in buf */
     76 	int buflen;		/* size of the buffer used to store entries */
     77 	int flags;		/* control flags, see below */
     78 	kcondvar_t cv;		/* cv for blocking */
     79 	int error;		/* error from RPC operation */
     80 	kmutex_t lock;
     81 	uint_t count;		/* reference count */
     82 	avl_node_t tree;	/* AVL tree links */
     83 } rddir_cache;
     84 
     85 #define	nfs_cookie	_cookie._p._l
     86 #define	nfs_ncookie	_ncookie._p._l
     87 #define	nfs3_cookie	_cookie._f
     88 #define	nfs3_ncookie	_ncookie._f
     89 
     90 #define	RDDIR		0x1	/* readdir operation in progress */
     91 #define	RDDIRWAIT	0x2	/* waiting on readdir in progress */
     92 #define	RDDIRREQ	0x4	/* a new readdir is required */
     93 #define	RDDIRCACHED	0x8	/* entry is in the cache */
     94 
     95 #define	HAVE_RDDIR_CACHE(rp)	(avl_numnodes(&(rp)->r_dir) > 0)
     96 
     97 typedef struct symlink_cache {
     98 	char *contents;		/* contents of the symbolic link */
     99 	int len;		/* length of the contents */
    100 	int size;		/* size of the allocated buffer */
    101 } symlink_cache;
    102 
    103 typedef struct commit {
    104 	page_t *c_pages;	/* list of pages to commit */
    105 	offset3 c_commbase;	/* base offset to do commit from */
    106 	count3 c_commlen;	/* len to commit */
    107 	kcondvar_t c_cv;	/* condvar for waiting for commit */
    108 } commit_t;
    109 
    110 /*
    111  * The various values for the commit states.  These are stored in
    112  * the p_fsdata byte in the page struct.
    113  */
    114 #define	C_NOCOMMIT	0	/* no commit is required */
    115 #define	C_COMMIT	1	/* a commit is required so do it now */
    116 #define	C_DELAYCOMMIT	2	/* a commit is required, but can be delayed */
    117 
    118 /*
    119  * The lock manager holds state making it possible for the client
    120  * and server to be out of sync.  For example, if the response from
    121  * the server granting a lock request is lost, the server will think
    122  * the lock is granted and the client will think the lock is lost.
    123  * To deal with this, a list of processes for which the client is
    124  * not sure if the server holds a lock is attached to the rnode.
    125  * When such a process closes the rnode, an unlock request is sent
    126  * to the server to unlock the entire file.
    127  *
    128  * The list is kept as a singularly linked NULL terminated list.
    129  * Because it is  only added to under extreme error conditions, the
    130  * list shouldn't get very big.  DEBUG kernels print a console warning
    131  * when the number of entries on a list go beyond nfs_lmpl_high_water
    132  * an  arbitrary number defined in nfs_add_locking_id()
    133  */
    134 #define	RLMPL_PID	1
    135 #define	RLMPL_OWNER	2
    136 typedef struct lock_manager_pid_list {
    137 	int lmpl_type;
    138 	pid_t lmpl_pid;
    139 	union {
    140 		pid_t _pid;
    141 		struct {
    142 			int len;
    143 			char *owner;
    144 		} _own;
    145 	} un;
    146 	struct lock_manager_pid_list *lmpl_next;
    147 } lmpl_t;
    148 
    149 #define	lmpl_opid un._pid
    150 #define	lmpl_own_len un._own.len
    151 #define	lmpl_owner un._own.owner
    152 
    153 /*
    154  * A homegrown reader/writer lock implementation.  It addresses
    155  * two requirements not addressed by the system primitives.  They
    156  * are that the `enter" operation is optionally interruptible and
    157  * that that they can be re`enter'ed by writers without deadlock.
    158  */
    159 typedef struct nfs_rwlock {
    160 	int count;
    161 	int waiters;
    162 	kthread_t *owner;
    163 	kmutex_t lock;
    164 	kcondvar_t cv;
    165 } nfs_rwlock_t;
    166 
    167 /*
    168  * The format of the hash bucket used to lookup rnodes from a file handle.
    169  */
    170 typedef struct rhashq {
    171 	struct rnode *r_hashf;
    172 	struct rnode *r_hashb;
    173 	krwlock_t r_lock;
    174 } rhashq_t;
    175 
    176 /*
    177  * Remote file information structure.
    178  *
    179  * The rnode is the "inode" for remote files.  It contains all the
    180  * information necessary to handle remote file on the client side.
    181  *
    182  * Note on file sizes:  we keep two file sizes in the rnode: the size
    183  * according to the client (r_size) and the size according to the server
    184  * (r_attr.va_size).  They can differ because we modify r_size during a
    185  * write system call (nfs_rdwr), before the write request goes over the
    186  * wire (before the file is actually modified on the server).  If an OTW
    187  * request occurs before the cached data is written to the server the file
    188  * size returned from the server (r_attr.va_size) may not match r_size.
    189  * r_size is the one we use, in general.  r_attr.va_size is only used to
    190  * determine whether or not our cached data is valid.
    191  *
    192  * Each rnode has 3 locks associated with it (not including the rnode
    193  * hash table and free list locks):
    194  *
    195  *	r_rwlock:	Serializes nfs_write and nfs_setattr requests
    196  *			and allows nfs_read requests to proceed in parallel.
    197  *			Serializes reads/updates to directories.
    198  *
    199  *	r_lkserlock:	Serializes lock requests with map, write, and
    200  *			readahead operations.
    201  *
    202  *	r_statelock:	Protects all fields in the rnode except for
    203  *			those listed below.  This lock is intented
    204  *			to be held for relatively short periods of
    205  *			time (not accross entire putpage operations,
    206  *			for example).
    207  *
    208  * The following members are protected by the mutex rpfreelist_lock:
    209  *	r_freef
    210  *	r_freeb
    211  *
    212  * The following members are protected by the hash bucket rwlock:
    213  *	r_hashf
    214  *	r_hashb
    215  *
    216  * Note: r_modaddr is only accessed when the r_statelock mutex is held.
    217  *	Its value is also controlled via r_rwlock.  It is assumed that
    218  *	there will be only 1 writer active at a time, so it safe to
    219  *	set r_modaddr and release r_statelock as long as the r_rwlock
    220  *	writer lock is held.
    221  *
    222  * r_inmap informs nfsX_read()/write() that there is a call to nfsX_map()
    223  * in progress. nfsX_read()/write() check r_inmap to decide whether
    224  * to perform directio on the file or not. r_inmap is atomically
    225  * incremented in nfsX_map() before the address space routines are
    226  * called and atomically decremented just before nfsX_map() exits.
    227  * r_inmap is not protected by any lock.
    228  *
    229  * r_mapcnt tells that the rnode has mapped pages. r_inmap can be 0
    230  * while the rnode has mapped pages.
    231  *
    232  * 64-bit offsets: the code formerly assumed that atomic reads of
    233  * r_size were safe and reliable; on 32-bit architectures, this is
    234  * not true since an intervening bus cycle from another processor
    235  * could update half of the size field.  The r_statelock must now
    236  * be held whenever any kind of access of r_size is made.
    237  *
    238  * Lock ordering:
    239  * 	r_rwlock > r_lkserlock > r_statelock
    240  */
    241 struct exportinfo;	/* defined in nfs/export.h */
    242 struct servinfo;	/* defined in nfs/nfs_clnt.h */
    243 struct failinfo;	/* defined in nfs/nfs_clnt.h */
    244 struct mntinfo;		/* defined in nfs/nfs_clnt.h */
    245 
    246 #ifdef _KERNEL
    247 
    248 typedef struct rnode {
    249 	/* the hash fields must be first to match the rhashq_t */
    250 	struct rnode	*r_hashf;	/* hash queue forward pointer */
    251 	struct rnode	*r_hashb;	/* hash queue back pointer */
    252 	struct rnode	*r_freef;	/* free list forward pointer */
    253 	struct rnode	*r_freeb;	/* free list back pointer */
    254 	rhashq_t	*r_hashq;	/* pointer to the hash bucket */
    255 	vnode_t		*r_vnode;	/* vnode for remote file */
    256 	nfs_rwlock_t	r_rwlock;	/* serializes write/setattr requests */
    257 	nfs_rwlock_t	r_lkserlock;	/* serialize lock with other ops */
    258 	kmutex_t	r_statelock;	/* protects (most of) rnode contents */
    259 	nfs_fhandle	r_fh;		/* file handle */
    260 	struct servinfo	*r_server;	/* current server */
    261 	char		*r_path;	/* path to this rnode */
    262 	u_offset_t	r_nextr;	/* next byte read offset (read-ahead) */
    263 	cred_t		*r_cred;	/* current credentials */
    264 	cred_t		*r_unlcred;	/* unlinked credentials */
    265 	char		*r_unlname;	/* unlinked file name */
    266 	vnode_t		*r_unldvp;	/* parent dir of unlinked file */
    267 	len_t		r_size;		/* client's view of file size */
    268 	struct vattr	r_attr;		/* cached vnode attributes */
    269 	hrtime_t	r_attrtime;	/* time attributes become invalid */
    270 	hrtime_t	r_mtime;	/* client time file last modified */
    271 	long		r_mapcnt;	/* count of mmapped pages */
    272 	uint_t		r_count;	/* # of refs not reflect in v_count */
    273 	uint_t		r_awcount;	/* # of outstanding async write */
    274 	uint_t		r_gcount;	/* getattrs waiting to flush pages */
    275 	ushort_t	r_flags;	/* flags, see below */
    276 	short		r_error;	/* async write error */
    277 	kcondvar_t	r_cv;		/* condvar for blocked threads */
    278 	int		(*r_putapage)	/* address of putapage routine */
    279 		(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *);
    280 	avl_tree_t	r_dir;		/* cache of readdir responses */
    281 	rddir_cache	*r_direof;	/* pointer to the EOF entry */
    282 	symlink_cache	r_symlink;	/* cached readlink response */
    283 	writeverf3	r_verf;		/* version 3 write verifier */
    284 	u_offset_t	r_modaddr;	/* address for page in writerp */
    285 	commit_t	r_commit;	/* commit information */
    286 	u_offset_t	r_truncaddr;	/* base for truncate operation */
    287 	vsecattr_t	*r_secattr;	/* cached security attributes (acls) */
    288 	cookieverf3	r_cookieverf;	/* version 3 readdir cookie verifier */
    289 	lmpl_t		*r_lmpl;	/* pids that may be holding locks */
    290 	nfs3_pathconf_info *r_pathconf;	/* cached pathconf information */
    291 	acache_t	*r_acache;	/* list of access cache entries */
    292 	kthread_t	*r_serial;	/* id of purging thread */
    293 	list_t		r_indelmap;	/* list of delmap callers */
    294 	uint_t		r_inmap;	/* to serialize read/write and mmap */
    295 } rnode_t;
    296 #endif /* _KERNEL */
    297 
    298 /*
    299  * Flags
    300  */
    301 #define	RREADDIRPLUS	0x1	/* issue a READDIRPLUS instead of READDIR */
    302 #define	RDIRTY		0x2	/* dirty pages from write operation */
    303 #define	RSTALE		0x4	/* file handle is stale */
    304 #define	RMODINPROGRESS	0x8	/* page modification happening */
    305 #define	RTRUNCATE	0x10	/* truncating, don't commit */
    306 #define	RHAVEVERF	0x20	/* have a write verifier to compare against */
    307 #define	RCOMMIT		0x40	/* commit in progress */
    308 #define	RCOMMITWAIT	0x80	/* someone is waiting to do a commit */
    309 #define	RHASHED		0x100	/* rnode is in hash queues */
    310 #define	ROUTOFSPACE	0x200	/* an out of space error has happened */
    311 #define	RDIRECTIO	0x400	/* bypass the buffer cache */
    312 #define	RLOOKUP		0x800	/* a lookup has been performed */
    313 #define	RWRITEATTR	0x1000	/* attributes came from WRITE */
    314 #define	RINDNLCPURGE	0x2000	/* in the process of purging DNLC references */
    315 #define	RDELMAPLIST	0x4000	/* delmap callers tracking for as callback */
    316 #define	RINCACHEPURGE	0x8000	/* purging caches due to file size change */
    317 
    318 /*
    319  * Convert between vnode and rnode
    320  */
    321 #define	RTOV(rp)	((rp)->r_vnode)
    322 #define	VTOR(vp)	((rnode_t *)((vp)->v_data))
    323 
    324 #define	VTOFH(vp)	(RTOFH(VTOR(vp)))
    325 #define	RTOFH(rp)	((fhandle_t *)(&(rp)->r_fh.fh_buf))
    326 #define	VTOFH3(vp)	(RTOFH3(VTOR(vp)))
    327 #define	RTOFH3(rp)	((nfs_fh3 *)(&(rp)->r_fh))
    328 
    329 #ifdef _KERNEL
    330 extern int	nfs_async_readahead(vnode_t *, u_offset_t, caddr_t,
    331 				struct seg *, cred_t *,
    332 				void (*)(vnode_t *, u_offset_t,
    333 				caddr_t, struct seg *, cred_t *));
    334 extern int	nfs_async_putapage(vnode_t *, page_t *, u_offset_t, size_t,
    335 				int, cred_t *, int (*)(vnode_t *, page_t *,
    336 				u_offset_t, size_t, int, cred_t *));
    337 extern int	nfs_async_pageio(vnode_t *, page_t *, u_offset_t, size_t,
    338 				int, cred_t *, int (*)(vnode_t *, page_t *,
    339 				u_offset_t, size_t, int, cred_t *));
    340 extern void	nfs_async_readdir(vnode_t *, rddir_cache *,
    341 				cred_t *, int (*)(vnode_t *,
    342 				rddir_cache *, cred_t *));
    343 extern void	nfs_async_commit(vnode_t *, page_t *, offset3, count3,
    344 				cred_t *, void (*)(vnode_t *, page_t *,
    345 				offset3, count3, cred_t *));
    346 extern void	nfs_async_inactive(vnode_t *, cred_t *, void (*)(vnode_t *,
    347 				cred_t *, caller_context_t *));
    348 extern int	writerp(rnode_t *, caddr_t, int, struct uio *, int);
    349 extern int	nfs_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *);
    350 extern void	nfs_invalidate_pages(vnode_t *, u_offset_t, cred_t *);
    351 extern int	rfs2call(struct mntinfo *, rpcproc_t, xdrproc_t, caddr_t,
    352 			xdrproc_t, caddr_t, cred_t *, int *, enum nfsstat *,
    353 			int, struct failinfo *);
    354 extern int	rfs3call(struct mntinfo *, rpcproc_t, xdrproc_t, caddr_t,
    355 			xdrproc_t, caddr_t, cred_t *, int *, nfsstat3 *,
    356 			int, struct failinfo *);
    357 extern void	nfs_setswaplike(vnode_t *, vattr_t *);
    358 extern vnode_t	*makenfsnode(fhandle_t *, struct nfsfattr *, struct vfs *,
    359 			hrtime_t, cred_t *, char *, char *);
    360 extern vnode_t	*makenfs3node_va(nfs_fh3 *, vattr_t *, struct vfs *, hrtime_t,
    361 			cred_t *, char *, char *);
    362 extern vnode_t	*makenfs3node(nfs_fh3 *, fattr3 *, struct vfs *, hrtime_t,
    363 			cred_t *, char *, char *);
    364 extern void	rp_addfree(rnode_t *, cred_t *);
    365 extern void	rp_rmhash(rnode_t *);
    366 extern int	check_rtable(struct vfs *);
    367 extern void	destroy_rtable(struct vfs *, cred_t *);
    368 extern void	rflush(struct vfs *, cred_t *);
    369 extern nfs_access_type_t nfs_access_check(rnode_t *, uint32_t, cred_t *);
    370 extern void	nfs_access_cache(rnode_t *rp, uint32_t, uint32_t, cred_t *);
    371 extern int	nfs_access_purge_rp(rnode_t *);
    372 extern int	nfs_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
    373 			int, cred_t *);
    374 extern int	nfs3_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
    375 			int, cred_t *);
    376 extern void	nfs_printfhandle(nfs_fhandle *);
    377 extern void	nfs_write_error(vnode_t *, int, cred_t *);
    378 extern rddir_cache	*rddir_cache_alloc(int);
    379 extern void		rddir_cache_hold(rddir_cache *);
    380 extern void		rddir_cache_rele(rddir_cache *);
    381 #ifdef DEBUG
    382 extern char		*rddir_cache_buf_alloc(size_t, int);
    383 extern void		rddir_cache_buf_free(void *, size_t);
    384 #endif
    385 extern int	nfs_rw_enter_sig(nfs_rwlock_t *, krw_t, int);
    386 extern int	nfs_rw_tryenter(nfs_rwlock_t *, krw_t);
    387 extern void	nfs_rw_exit(nfs_rwlock_t *);
    388 extern int	nfs_rw_lock_held(nfs_rwlock_t *, krw_t);
    389 extern void	nfs_rw_init(nfs_rwlock_t *, char *, krw_type_t, void *);
    390 extern void	nfs_rw_destroy(nfs_rwlock_t *);
    391 extern int	nfs_directio(vnode_t *, int, cred_t *);
    392 extern int	nfs3_rddir_compar(const void *, const void *);
    393 extern int	nfs_rddir_compar(const void *, const void *);
    394 extern struct zone *nfs_zone(void);
    395 extern zoneid_t nfs_zoneid(void);
    396 
    397 #endif
    398 
    399 #ifdef	__cplusplus
    400 }
    401 #endif
    402 
    403 #endif	/* _NFS_RNODE_H */
    404