mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 13:41:51 +00:00
8e6e2ffa65
nfsd_file_put() in one thread can race with another thread doing garbage collection (running nfsd_file_gc() -> list_lru_walk() -> nfsd_file_lru_cb()): * In nfsd_file_put(), nf->nf_ref is 1, so it tries to do nfsd_file_lru_add(). * nfsd_file_lru_add() returns true (with NFSD_FILE_REFERENCED bit set) * garbage collector kicks in, nfsd_file_lru_cb() clears REFERENCED bit and returns LRU_ROTATE. * garbage collector kicks in again, nfsd_file_lru_cb() now decrements nf->nf_ref to 0, runs nfsd_file_unhash(), removes it from the LRU and adds to the dispose list [list_lru_isolate_move(lru, &nf->nf_lru, head)] * nfsd_file_put() detects NFSD_FILE_HASHED bit is cleared, so it tries to remove the 'nf' from the LRU [if (!nfsd_file_lru_remove(nf))]. The 'nf' has been added to the 'dispose' list by nfsd_file_lru_cb(), so nfsd_file_lru_remove(nf) simply treats it as part of the LRU and removes it, which leads to its removal from the 'dispose' list. * At this moment, 'nf' is unhashed with its nf_ref being 0, and not on the LRU. nfsd_file_put() continues its execution [if (refcount_dec_and_test(&nf->nf_ref))], as nf->nf_ref is already 0, nf->nf_ref is set to REFCOUNT_SATURATED, and the 'nf' gets no chance of being freed. nfsd_file_put() can also race with nfsd_file_cond_queue(): * In nfsd_file_put(), nf->nf_ref is 1, so it tries to do nfsd_file_lru_add(). * nfsd_file_lru_add() sets REFERENCED bit and returns true. * Some userland application runs 'exportfs -f' or something like that, which triggers __nfsd_file_cache_purge() -> nfsd_file_cond_queue(). * In nfsd_file_cond_queue(), it runs [if (!nfsd_file_unhash(nf))], unhash is done successfully. * nfsd_file_cond_queue() runs [if (!nfsd_file_get(nf))], now nf->nf_ref goes to 2. * nfsd_file_cond_queue() runs [if (nfsd_file_lru_remove(nf))], it succeeds. * nfsd_file_cond_queue() runs [if (refcount_sub_and_test(decrement, &nf->nf_ref))] (with "decrement" being 2), so the nf->nf_ref goes to 0, the 'nf' is added to the dispose list [list_add(&nf->nf_lru, dispose)] * nfsd_file_put() detects NFSD_FILE_HASHED bit is cleared, so it tries to remove the 'nf' from the LRU [if (!nfsd_file_lru_remove(nf))], although the 'nf' is not in the LRU, but it is linked in the 'dispose' list, nfsd_file_lru_remove() simply treats it as part of the LRU and removes it. This leads to its removal from the 'dispose' list! * Now nf->ref is 0, unhashed. nfsd_file_put() continues its execution and set nf->nf_ref to REFCOUNT_SATURATED. As shown in the above analysis, using nf_lru for both the LRU list and dispose list can cause the leaks. This patch adds a new list_head nf_gc in struct nfsd_file, and uses it for the dispose list. This does not fix the nfsd_file leaking issue completely. Signed-off-by: Youzhong Yang <youzhong@gmail.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
71 lines
2.7 KiB
C
71 lines
2.7 KiB
C
#ifndef _FS_NFSD_FILECACHE_H
|
|
#define _FS_NFSD_FILECACHE_H
|
|
|
|
#include <linux/fsnotify_backend.h>
|
|
|
|
/*
|
|
* This is the fsnotify_mark container that nfsd attaches to the files that it
|
|
* is holding open. Note that we have a separate refcount here aside from the
|
|
* one in the fsnotify_mark. We only want a single fsnotify_mark attached to
|
|
* the inode, and for each nfsd_file to hold a reference to it.
|
|
*
|
|
* The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
|
|
* how to put that reference. If there are still outstanding nfsd_files that
|
|
* reference the mark, then we would want to call fsnotify_put_mark on it.
|
|
* If there were not, then we'd need to call fsnotify_destroy_mark. Since we
|
|
* can't really tell the difference, we use the nfm_mark to keep track of how
|
|
* many nfsd_files hold references to the mark. When that counter goes to zero
|
|
* then we know to call fsnotify_destroy_mark on it.
|
|
*/
|
|
struct nfsd_file_mark {
|
|
struct fsnotify_mark nfm_mark;
|
|
refcount_t nfm_ref;
|
|
};
|
|
|
|
/*
|
|
* A representation of a file that has been opened by knfsd. These are hashed
|
|
* in the hashtable by inode pointer value. Note that this object doesn't
|
|
* hold a reference to the inode by itself, so the nf_inode pointer should
|
|
* never be dereferenced, only used for comparison.
|
|
*/
|
|
struct nfsd_file {
|
|
struct rhlist_head nf_rlist;
|
|
void *nf_inode;
|
|
struct file *nf_file;
|
|
const struct cred *nf_cred;
|
|
struct net *nf_net;
|
|
#define NFSD_FILE_HASHED (0)
|
|
#define NFSD_FILE_PENDING (1)
|
|
#define NFSD_FILE_REFERENCED (2)
|
|
#define NFSD_FILE_GC (3)
|
|
unsigned long nf_flags;
|
|
refcount_t nf_ref;
|
|
unsigned char nf_may;
|
|
|
|
struct nfsd_file_mark *nf_mark;
|
|
struct list_head nf_lru;
|
|
struct list_head nf_gc;
|
|
struct rcu_head nf_rcu;
|
|
ktime_t nf_birthtime;
|
|
};
|
|
|
|
int nfsd_file_cache_init(void);
|
|
void nfsd_file_cache_purge(struct net *);
|
|
void nfsd_file_cache_shutdown(void);
|
|
int nfsd_file_cache_start_net(struct net *net);
|
|
void nfsd_file_cache_shutdown_net(struct net *net);
|
|
void nfsd_file_put(struct nfsd_file *nf);
|
|
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
|
|
void nfsd_file_close_inode_sync(struct inode *inode);
|
|
void nfsd_file_net_dispose(struct nfsd_net *nn);
|
|
bool nfsd_file_is_cached(struct inode *inode);
|
|
__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
|
|
unsigned int may_flags, struct nfsd_file **nfp);
|
|
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
|
|
unsigned int may_flags, struct nfsd_file **nfp);
|
|
__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
|
|
unsigned int may_flags, struct file *file,
|
|
struct nfsd_file **nfp);
|
|
int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
|
|
#endif /* _FS_NFSD_FILECACHE_H */
|