Home | History | Annotate | Download | only in sys
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
     27 /*	  All Rights Reserved  	*/
     28 
     29 /*
     30  * University Copyright- Copyright (c) 1982, 1986, 1988
     31  * The Regents of the University of California
     32  * All Rights Reserved
     33  *
     34  * University Acknowledgment- Portions of this document are derived from
     35  * software developed by the University of California, Berkeley, and its
     36  * contributors.
     37  */
     38 
     39 #ifndef _SYS_DNLC_H
     40 #define	_SYS_DNLC_H
     41 
     42 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     43 
     44 #ifdef	__cplusplus
     45 extern "C" {
     46 #endif
     47 
     48 #include <sys/kstat.h>
     49 
     50 /*
     51  * DNLC - Directory name lookup cache.
     52  * There are now two sorts of name caching:
     53  *
     54  * Standard dnlc: This original cache holds recent mappings
     55  *                of <directory vnode, name> to vnode mappings.
     56  *
     57  * Directory caches: Entire large directories can be cached, subject to
     58  *		     memory availability and tunables. A directory cache
     59  *		     anchor point must be provided in the xxnode for
     60  *		     a directory.
     61  */
     62 
     63 
     64 /*
     65  * Standard dnlc
     66  * =============
     67  */
     68 
     69 /*
     70  * This structure describes the elements in the cache of recent
     71  * names looked up.
     72  *
     73  * Note namlen is a uchar_t to conserve space
     74  * and alignment padding. The max length of any
     75  * pathname component is defined as MAXNAMELEN
     76  * which is 256 (including the terminating null).
     77  * So provided this doesn't change, we don't include the null,
     78  * we always use bcmp to compare strings, and we don't start
     79  * storing full names, then we are ok. The space savings are worth it.
     80  */
     81 typedef struct ncache {
     82 	struct ncache *hash_next; 	/* hash chain, MUST BE FIRST */
     83 	struct ncache *hash_prev;
     84 	struct vnode *vp;		/* vnode the name refers to */
     85 	struct vnode *dp;		/* vnode of parent of name */
     86 	int hash;			/* hash signature */
     87 	uchar_t namlen;			/* length of name */
     88 	char name[1];			/* segment name - null terminated */
     89 } ncache_t;
     90 
     91 /*
     92  * Hash table bucket structure of name cache entries for fast lookup.
     93  */
     94 typedef struct nc_hash	{
     95 	ncache_t *hash_next;
     96 	ncache_t *hash_prev;
     97 	kmutex_t hash_lock;
     98 } nc_hash_t;
     99 
    100 /*
    101  * Statistics on name cache
    102  * Not protected by locks
    103  */
    104 /*
    105  * ncstats has been deprecated, due to the integer size of the counters
    106  * which can easily overflow in the dnlc.
    107  * It is maintained (at some expense) for compatability.
    108  * The preferred interface is the kstat accessible nc_stats below, ehich
    109  * is actually shared with directory caching.
    110  */
    111 struct ncstats {
    112 	int	hits;		/* hits that we can really use */
    113 	int	misses;		/* cache misses */
    114 	int	enters;		/* number of enters done */
    115 	int	dbl_enters;	/* number of enters tried when already cached */
    116 	int	long_enter;	/* deprecated, no longer accounted */
    117 	int	long_look;	/* deprecated, no longer accounted */
    118 	int	move_to_front;	/* entry moved to front of hash chain */
    119 	int	purges;		/* number of purges of cache */
    120 };
    121 
    122 struct nc_stats {
    123 	kstat_named_t ncs_hits;		/* cache hits */
    124 	kstat_named_t ncs_misses;	/* cache misses */
    125 	kstat_named_t ncs_neg_hits;	/* negative cache hits */
    126 	kstat_named_t ncs_enters;	/* enters */
    127 	kstat_named_t ncs_dbl_enters;	/* enters when entry already cached */
    128 	kstat_named_t ncs_purge_total;	/* total entries prurged */
    129 	kstat_named_t ncs_purge_all;	/* dnlc_purge() calls */
    130 	kstat_named_t ncs_purge_vp;	/* dnlc_purge_vp() calls */
    131 	kstat_named_t ncs_purge_vfs;	/* dnlc_purge_vfs() calls */
    132 	kstat_named_t ncs_purge_fs1;	/* dnlc_purge_fs1() calls */
    133 	kstat_named_t ncs_pick_free;	/* found a free ncache */
    134 	kstat_named_t ncs_pick_heur;	/* found ncache w/ NULL vpages */
    135 	kstat_named_t ncs_pick_last;	/* found last ncache on chain */
    136 
    137 	/* directory caching stats */
    138 
    139 	kstat_named_t ncs_dir_hits;	/* dir cache hits */
    140 	kstat_named_t ncs_dir_misses;	/* dir cache misses */
    141 	kstat_named_t ncs_cur_dirs;	/* current # directories cached */
    142 	kstat_named_t ncs_dir_num_ents;	/* current # entries cached */
    143 	kstat_named_t ncs_dirs_cached;	/* total # directories cached */
    144 	kstat_named_t ncs_dir_start_nm;	/* dir start no memory */
    145 	kstat_named_t ncs_dir_add_nm;	/* add entry/space - no memory */
    146 	kstat_named_t ncs_dir_addabort;	/* add entry/space - abort */
    147 	kstat_named_t ncs_dir_add_max;	/* add entry/space - max exceeded */
    148 	kstat_named_t ncs_dir_reme_fai;	/* remove entry fail */
    149 	kstat_named_t ncs_dir_rems_fai;	/* remove space fail */
    150 	kstat_named_t ncs_dir_upd_fail;	/* update space fail */
    151 	kstat_named_t ncs_dir_finipurg;	/* fini purges */
    152 	kstat_named_t ncs_dir_rec_last;	/* reclaim last */
    153 	kstat_named_t ncs_dir_recl_any;	/* reclaim any */
    154 };
    155 
    156 /*
    157  * The dnlc hashing function.
    158  * Although really a kernel macro we export it to allow validation
    159  * of ncache_t entries by mdb. Note, mdb can handle the ASSERT.
    160  *
    161  * 'hash' and 'namlen' must be l-values. A check is made to ensure
    162  * the name length fits into an unsigned char (see ncache_t).
    163  */
    164 #define	DNLCHASH(name, dvp, hash, namlen)			\
    165 	{							\
    166 		char Xc, *Xcp;					\
    167 		hash = (int)((uintptr_t)(dvp)) >> 8;		\
    168 		for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++)	\
    169 			(hash) = ((hash) << 4) + (hash) + Xc;	\
    170 		ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1));	\
    171 		(namlen) = Xcp - (name);			\
    172 	}
    173 
    174 #if defined(_KERNEL)
    175 
    176 #include <sys/vfs.h>
    177 #include <sys/vnode.h>
    178 
    179 extern int ncsize;		/* set in param_init() # of dnlc entries */
    180 extern vnode_t negative_cache_vnode;
    181 #define	DNLC_NO_VNODE &negative_cache_vnode
    182 
    183 void	dnlc_init(void);
    184 void	dnlc_enter(vnode_t *, char *, vnode_t *);
    185 void	dnlc_update(vnode_t *, char *, vnode_t *);
    186 vnode_t	*dnlc_lookup(vnode_t *, char *);
    187 void	dnlc_purge(void);
    188 void	dnlc_purge_vp(vnode_t *);
    189 int	dnlc_purge_vfsp(vfs_t *, int);
    190 void	dnlc_remove(vnode_t *, char *);
    191 int	dnlc_fs_purge1(struct vnodeops *);
    192 vnode_t	*dnlc_reverse_lookup(vnode_t *, char *, size_t);
    193 void	dnlc_reduce_cache(void *);
    194 
    195 #endif	/* defined(_KERNEL) */
    196 
    197 
    198 /*
    199  * Directory caching interfaces
    200  * ============================
    201  */
    202 
    203 /*
    204  * Typically for large directories, the file names will be the same or
    205  * at least similar lengths. So there's no point in anything more elaborate
    206  * than a simple unordered linked list of free space entries.
    207  * For small directories the name length distribution doesn't really matter.
    208  */
    209 typedef struct dcfree {
    210 	uint64_t df_handle;		/* fs supplied handle */
    211 	struct dcfree *df_next; 	/* link to next free entry in bucket */
    212 	uint_t df_len;			/* length of free entry */
    213 } dcfree_t;
    214 
    215 typedef struct dcentry {
    216 	uint64_t de_handle;		/* fs supplied and returned data */
    217 	struct dcentry *de_next;	/* link to next name entry in bucket */
    218 	int de_hash;			/* hash signature */
    219 	uchar_t de_namelen;		/* length of name excluding null */
    220 	char de_name[1];		/* null terminated name */
    221 } dcentry_t;
    222 
    223 typedef struct dircache {
    224 	struct dircache *dc_next;	/* chain - for purge purposes */
    225 	struct dircache *dc_prev;	/* chain - for purge purposes */
    226 	int64_t dc_actime;		/* dir access time, from lbolt64 */
    227 	dcentry_t **dc_namehash;	/* entry hash table pointer */
    228 	dcfree_t **dc_freehash;		/* free entry hash table pointer */
    229 	uint_t dc_num_entries;		/* no of named entries */
    230 	uint_t dc_num_free;		/* no of free space entries */
    231 	uint_t dc_nhash_mask;		/* name hash table size - 1 */
    232 	uint_t dc_fhash_mask;		/* free space hash table size - 1 */
    233 	struct dcanchor *dc_anchor;	/* back ptr to anchor */
    234 	boolean_t dc_complete;		/* cache complete boolean */
    235 } dircache_t;
    236 
    237 typedef struct dcanchor {
    238 	void *dca_dircache;	/* pointer to directory cache */
    239 	kmutex_t dca_lock;		/* protects the directory cache */
    240 } dcanchor_t;
    241 
    242 /*
    243  * Head struct for doubly linked chain of dircache_t
    244  * The next and prev fields must match those of a dircache_t
    245  */
    246 typedef struct {
    247 	dircache_t *dch_next;		/* next in chain */
    248 	dircache_t *dch_prev;		/* prev in chain */
    249 	kmutex_t dch_lock;		/* lock for the chain */
    250 } dchead_t;
    251 
    252 
    253 #if defined(_KERNEL)
    254 
    255 /*
    256  * Status returns from the directory cache interfaces
    257  */
    258 typedef enum {
    259 	DOK,		/* operation sucessful */
    260 	DNOCACHE,	/* there is no cache */
    261 	DFOUND,		/* entry found */
    262 	DNOENT,		/* no entry found */
    263 	DTOOBIG,	/* exceeds tunable dnlc_max_dir_cache */
    264 	DNOMEM		/* no memory */
    265 } dcret_t;
    266 
    267 /*
    268  * dnlc_dir_start() requests that a directory be cached.
    269  * This must be called initially to enable caching on a directory.
    270  * After a successful call, directory entries and free space can be
    271  * added (see below) until the directory is marked complete.
    272  * "num_entries" is an estimate of the current number of
    273  * directory entries. The request is rejected with DNOCACHE
    274  * if num_entries falls below the tunable dnlc_dir_min_size (see
    275  * below), and rejected with DTOOBIG if it's above dnlc_dir_max_size.
    276  * Returns DOK, DNOCACHE, DTOOBIG, DNOMEM.
    277  *
    278  * Due to memory shortages, directory caches can be purged at
    279  * any time. If the last directory cache is purged due to memory
    280  * shortage, then the directory cache is marked internally
    281  * as "no memory". Future returns will all be DNOCACHE until
    282  * the next dnlc_start_dir() which will return DNOMEM once.
    283  * This memory shortage may only be transient. It's up to the
    284  * file system as to what to do about this condition, but an
    285  * attempt to immediately re-build the cache will very likely
    286  * lead to the same shortage of memory and a thrashing situation.
    287  *
    288  * It's file system policy as to when and what size directories to cache.
    289  */
    290 dcret_t dnlc_dir_start(dcanchor_t *dcap, uint_t num_entries);
    291 
    292 /*
    293  * dnlc_dir_add_entry() adds an entry (name and handle) into a
    294  * partial or complete cache. "handle" is a file system specific
    295  * quantity that is returned on calls to dnlc_dir_lookup() - see below.
    296  * For example, "handle" for ufs holds the inumber and a directory
    297  * entry offset. Returns DOK, DNOCACHE, DTOOBIG.
    298  */
    299 dcret_t dnlc_dir_add_entry(dcanchor_t *dcap, char *name, uint64_t handle);
    300 
    301 /*
    302  * dnlc_dir_add_space adds free space (length and file system specific
    303  * handle) into a partial or complete cache. "handle" is a file
    304  * system specific quantity that is returned on calls to
    305  * dnlc_dir_rem_space_by_len(). For example, "handle" for ufs holds
    306  * the directory entry offset.  Returns DOK, DNOCACHE, DTOOBIG.
    307  */
    308 dcret_t dnlc_dir_add_space(dcanchor_t *dcap, uint_t len, uint64_t handle);
    309 
    310 /*
    311  * dnlc_dir_complete() indicates the previously partial cache is now complete.
    312  */
    313 void dnlc_dir_complete(dcanchor_t *dcap);
    314 
    315 /*
    316  * dnlc_dir_purge() deletes a partial or complete directory cache
    317  */
    318 void dnlc_dir_purge(dcanchor_t *dcap);
    319 
    320 /*
    321  * dnlc_dir_lookup() lookups a file name in a directory cache
    322  * and returns the file system handle specified on dnlc_dir_add_entry()
    323  * in "handlep". Returns DFOUND, DNOENT, DNOCACHE.
    324  */
    325 dcret_t dnlc_dir_lookup(dcanchor_t *dcap, char *name, uint64_t *handlep);
    326 
    327 /*
    328  * dnlc_dir_update() amends the handle for an entry in a directory cache
    329  * "handle" is the new file system specific handle for the file "name".
    330  * Returns DFOUND, DNOENT, DNOCACHE.
    331  */
    332 dcret_t dnlc_dir_update(dcanchor_t *dcap, char *name, uint64_t handle);
    333 
    334 /*
    335  * dnlc_dir_rem_entry() removes an entry form a directory cache.
    336  * Returns the handle if "handlep" non null.
    337  * Returns DFOUND, DNOENT, DNOCACHE.
    338  */
    339 dcret_t dnlc_dir_rem_entry(dcanchor_t *dcap, char *name, uint64_t *handlep);
    340 
    341 /*
    342  * dnlc_dir_rem_space_by_len() looks up and returns free space in a
    343  * directory cache of at least the given "len". Returns in "handlep"
    344  * the handle supplied when adding the free space in dnlc_dir_add_space().
    345  * Returns DFOUND, DNOENT, DNOCACHE.
    346  */
    347 dcret_t dnlc_dir_rem_space_by_len(dcanchor_t *dcap, uint_t len,
    348     uint64_t *handlep);
    349 
    350 /*
    351  * dnlc_dir_rem_space_by_handle() looks up and removes the free space in
    352  * a directory cache with the given handle. Returns DFOUND, DNOENT, DNOCACHE.
    353  */
    354 dcret_t dnlc_dir_rem_space_by_handle(dcanchor_t *dcap, uint64_t handle);
    355 
    356 /*
    357  * dnlc_dir_init() initialises a directory anchor
    358  */
    359 #define	dnlc_dir_init(dcap) { \
    360 	(dcap)->dca_dircache = NULL; \
    361 	mutex_init(&(dcap)->dca_lock, NULL, MUTEX_DEFAULT, NULL); }
    362 
    363 /*
    364  * dnlc_dir_fini() is called to indicate the anchor is no longer used.
    365  * It ensures there's no directory cache and mutex_destroys the lock
    366  */
    367 void dnlc_dir_fini(dcanchor_t *dcap);
    368 
    369 #endif	/* defined(_KERNEL) */
    370 
    371 #ifdef	__cplusplus
    372 }
    373 #endif
    374 
    375 #endif	/* _SYS_DNLC_H */
    376