055b24a8f2
If a file may contain unstable writes that can error out, then we want to avoid garbage collecting the struct nfsd_file that may be tracking those errors. So in the garbage collector, we try to avoid collecting files that aren't clean. Furthermore, we avoid immediately kicking off the garbage collector in the case where the reference drops to zero for the case where there is a write error that is being tracked. If the file is unhashed while an error is pending, then declare a reboot, to ensure the client resends any unstable writes. Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
935 lines
24 KiB
C
935 lines
24 KiB
C
/*
|
|
* Open file cache.
|
|
*
|
|
* (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
|
|
*/
|
|
|
|
#include <linux/hash.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/file.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/list_lru.h>
|
|
#include <linux/fsnotify_backend.h>
|
|
#include <linux/fsnotify.h>
|
|
#include <linux/seq_file.h>
|
|
|
|
#include "vfs.h"
|
|
#include "nfsd.h"
|
|
#include "nfsfh.h"
|
|
#include "netns.h"
|
|
#include "filecache.h"
|
|
#include "trace.h"
|
|
|
|
#define NFSDDBG_FACILITY NFSDDBG_FH
|
|
|
|
/* FIXME: dynamically size this for the machine somehow? */
|
|
#define NFSD_FILE_HASH_BITS 12
|
|
#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
|
|
#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
|
|
|
|
#define NFSD_FILE_LRU_RESCAN (0)
|
|
#define NFSD_FILE_SHUTDOWN (1)
|
|
#define NFSD_FILE_LRU_THRESHOLD (4096UL)
|
|
#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
|
|
|
|
/* We only care about NFSD_MAY_READ/WRITE for this cache */
|
|
#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
|
|
|
|
struct nfsd_fcache_bucket {
|
|
struct hlist_head nfb_head;
|
|
spinlock_t nfb_lock;
|
|
unsigned int nfb_count;
|
|
unsigned int nfb_maxcount;
|
|
};
|
|
|
|
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
|
|
|
|
static struct kmem_cache *nfsd_file_slab;
|
|
static struct kmem_cache *nfsd_file_mark_slab;
|
|
static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
|
|
static struct list_lru nfsd_file_lru;
|
|
static long nfsd_file_lru_flags;
|
|
static struct fsnotify_group *nfsd_file_fsnotify_group;
|
|
static atomic_long_t nfsd_filecache_count;
|
|
static struct delayed_work nfsd_filecache_laundrette;
|
|
|
|
enum nfsd_file_laundrette_ctl {
|
|
NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
|
|
NFSD_FILE_LAUNDRETTE_MAY_FLUSH
|
|
};
|
|
|
|
static void
|
|
nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
|
|
{
|
|
long count = atomic_long_read(&nfsd_filecache_count);
|
|
|
|
if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
|
|
return;
|
|
|
|
/* Be more aggressive about scanning if over the threshold */
|
|
if (count > NFSD_FILE_LRU_THRESHOLD)
|
|
mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
|
|
else
|
|
schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
|
|
|
|
if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
|
|
return;
|
|
|
|
/* ...and don't delay flushing if we're out of control */
|
|
if (count >= NFSD_FILE_LRU_LIMIT)
|
|
flush_delayed_work(&nfsd_filecache_laundrette);
|
|
}
|
|
|
|
static void
|
|
nfsd_file_slab_free(struct rcu_head *rcu)
|
|
{
|
|
struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
|
|
|
|
put_cred(nf->nf_cred);
|
|
kmem_cache_free(nfsd_file_slab, nf);
|
|
}
|
|
|
|
static void
|
|
nfsd_file_mark_free(struct fsnotify_mark *mark)
|
|
{
|
|
struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
|
|
nfm_mark);
|
|
|
|
kmem_cache_free(nfsd_file_mark_slab, nfm);
|
|
}
|
|
|
|
static struct nfsd_file_mark *
|
|
nfsd_file_mark_get(struct nfsd_file_mark *nfm)
|
|
{
|
|
if (!atomic_inc_not_zero(&nfm->nfm_ref))
|
|
return NULL;
|
|
return nfm;
|
|
}
|
|
|
|
static void
|
|
nfsd_file_mark_put(struct nfsd_file_mark *nfm)
|
|
{
|
|
if (atomic_dec_and_test(&nfm->nfm_ref)) {
|
|
|
|
fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
|
|
fsnotify_put_mark(&nfm->nfm_mark);
|
|
}
|
|
}
|
|
|
|
static struct nfsd_file_mark *
|
|
nfsd_file_mark_find_or_create(struct nfsd_file *nf)
|
|
{
|
|
int err;
|
|
struct fsnotify_mark *mark;
|
|
struct nfsd_file_mark *nfm = NULL, *new;
|
|
struct inode *inode = nf->nf_inode;
|
|
|
|
do {
|
|
mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
|
|
mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
|
|
nfsd_file_fsnotify_group);
|
|
if (mark) {
|
|
nfm = nfsd_file_mark_get(container_of(mark,
|
|
struct nfsd_file_mark,
|
|
nfm_mark));
|
|
mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
|
|
fsnotify_put_mark(mark);
|
|
if (likely(nfm))
|
|
break;
|
|
} else
|
|
mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
|
|
|
|
/* allocate a new nfm */
|
|
new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
|
|
if (!new)
|
|
return NULL;
|
|
fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
|
|
new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
|
|
atomic_set(&new->nfm_ref, 1);
|
|
|
|
err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
|
|
|
|
/*
|
|
* If the add was successful, then return the object.
|
|
* Otherwise, we need to put the reference we hold on the
|
|
* nfm_mark. The fsnotify code will take a reference and put
|
|
* it on failure, so we can't just free it directly. It's also
|
|
* not safe to call fsnotify_destroy_mark on it as the
|
|
* mark->group will be NULL. Thus, we can't let the nfm_ref
|
|
* counter drive the destruction at this point.
|
|
*/
|
|
if (likely(!err))
|
|
nfm = new;
|
|
else
|
|
fsnotify_put_mark(&new->nfm_mark);
|
|
} while (unlikely(err == -EEXIST));
|
|
|
|
return nfm;
|
|
}
|
|
|
|
static struct nfsd_file *
|
|
nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
|
|
struct net *net)
|
|
{
|
|
struct nfsd_file *nf;
|
|
|
|
nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
|
|
if (nf) {
|
|
INIT_HLIST_NODE(&nf->nf_node);
|
|
INIT_LIST_HEAD(&nf->nf_lru);
|
|
nf->nf_file = NULL;
|
|
nf->nf_cred = get_current_cred();
|
|
nf->nf_net = net;
|
|
nf->nf_flags = 0;
|
|
nf->nf_inode = inode;
|
|
nf->nf_hashval = hashval;
|
|
atomic_set(&nf->nf_ref, 1);
|
|
nf->nf_may = may & NFSD_FILE_MAY_MASK;
|
|
if (may & NFSD_MAY_NOT_BREAK_LEASE) {
|
|
if (may & NFSD_MAY_WRITE)
|
|
__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
|
|
if (may & NFSD_MAY_READ)
|
|
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
|
|
}
|
|
nf->nf_mark = NULL;
|
|
trace_nfsd_file_alloc(nf);
|
|
}
|
|
return nf;
|
|
}
|
|
|
|
static bool
|
|
nfsd_file_free(struct nfsd_file *nf)
|
|
{
|
|
bool flush = false;
|
|
|
|
trace_nfsd_file_put_final(nf);
|
|
if (nf->nf_mark)
|
|
nfsd_file_mark_put(nf->nf_mark);
|
|
if (nf->nf_file) {
|
|
get_file(nf->nf_file);
|
|
filp_close(nf->nf_file, NULL);
|
|
fput(nf->nf_file);
|
|
flush = true;
|
|
}
|
|
call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
|
|
return flush;
|
|
}
|
|
|
|
static bool
|
|
nfsd_file_check_writeback(struct nfsd_file *nf)
|
|
{
|
|
struct file *file = nf->nf_file;
|
|
struct address_space *mapping;
|
|
|
|
if (!file || !(file->f_mode & FMODE_WRITE))
|
|
return false;
|
|
mapping = file->f_mapping;
|
|
return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
|
|
mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
|
|
}
|
|
|
|
static int
|
|
nfsd_file_check_write_error(struct nfsd_file *nf)
|
|
{
|
|
struct file *file = nf->nf_file;
|
|
|
|
if (!file || !(file->f_mode & FMODE_WRITE))
|
|
return 0;
|
|
return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
|
|
}
|
|
|
|
static bool
|
|
nfsd_file_in_use(struct nfsd_file *nf)
|
|
{
|
|
return nfsd_file_check_writeback(nf) ||
|
|
nfsd_file_check_write_error(nf);
|
|
}
|
|
|
|
static void
|
|
nfsd_file_do_unhash(struct nfsd_file *nf)
|
|
{
|
|
lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
|
|
|
|
trace_nfsd_file_unhash(nf);
|
|
|
|
if (nfsd_file_check_write_error(nf))
|
|
nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
|
|
--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
|
|
hlist_del_rcu(&nf->nf_node);
|
|
if (!list_empty(&nf->nf_lru))
|
|
list_lru_del(&nfsd_file_lru, &nf->nf_lru);
|
|
atomic_long_dec(&nfsd_filecache_count);
|
|
}
|
|
|
|
static bool
|
|
nfsd_file_unhash(struct nfsd_file *nf)
|
|
{
|
|
if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
|
|
nfsd_file_do_unhash(nf);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Return true if the file was unhashed.
|
|
*/
|
|
static bool
|
|
nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
|
|
{
|
|
lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
|
|
|
|
trace_nfsd_file_unhash_and_release_locked(nf);
|
|
if (!nfsd_file_unhash(nf))
|
|
return false;
|
|
/* keep final reference for nfsd_file_lru_dispose */
|
|
if (atomic_add_unless(&nf->nf_ref, -1, 1))
|
|
return true;
|
|
|
|
list_add(&nf->nf_lru, dispose);
|
|
return true;
|
|
}
|
|
|
|
static int
|
|
nfsd_file_put_noref(struct nfsd_file *nf)
|
|
{
|
|
int count;
|
|
trace_nfsd_file_put(nf);
|
|
|
|
count = atomic_dec_return(&nf->nf_ref);
|
|
if (!count) {
|
|
WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
|
|
nfsd_file_free(nf);
|
|
}
|
|
return count;
|
|
}
|
|
|
|
void
|
|
nfsd_file_put(struct nfsd_file *nf)
|
|
{
|
|
bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
|
|
bool unused = !nfsd_file_in_use(nf);
|
|
|
|
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
|
|
if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
|
|
nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
|
|
}
|
|
|
|
struct nfsd_file *
|
|
nfsd_file_get(struct nfsd_file *nf)
|
|
{
|
|
if (likely(atomic_inc_not_zero(&nf->nf_ref)))
|
|
return nf;
|
|
return NULL;
|
|
}
|
|
|
|
static void
|
|
nfsd_file_dispose_list(struct list_head *dispose)
|
|
{
|
|
struct nfsd_file *nf;
|
|
|
|
while(!list_empty(dispose)) {
|
|
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
|
|
list_del(&nf->nf_lru);
|
|
nfsd_file_put_noref(nf);
|
|
}
|
|
}
|
|
|
|
static void
|
|
nfsd_file_dispose_list_sync(struct list_head *dispose)
|
|
{
|
|
bool flush = false;
|
|
struct nfsd_file *nf;
|
|
|
|
while(!list_empty(dispose)) {
|
|
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
|
|
list_del(&nf->nf_lru);
|
|
if (!atomic_dec_and_test(&nf->nf_ref))
|
|
continue;
|
|
if (nfsd_file_free(nf))
|
|
flush = true;
|
|
}
|
|
if (flush)
|
|
flush_delayed_fput();
|
|
}
|
|
|
|
/*
|
|
* Note this can deadlock with nfsd_file_cache_purge.
|
|
*/
|
|
static enum lru_status
|
|
nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
|
|
spinlock_t *lock, void *arg)
|
|
__releases(lock)
|
|
__acquires(lock)
|
|
{
|
|
struct list_head *head = arg;
|
|
struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
|
|
|
|
/*
|
|
* Do a lockless refcount check. The hashtable holds one reference, so
|
|
* we look to see if anything else has a reference, or if any have
|
|
* been put since the shrinker last ran. Those don't get unhashed and
|
|
* released.
|
|
*
|
|
* Note that in the put path, we set the flag and then decrement the
|
|
* counter. Here we check the counter and then test and clear the flag.
|
|
* That order is deliberate to ensure that we can do this locklessly.
|
|
*/
|
|
if (atomic_read(&nf->nf_ref) > 1)
|
|
goto out_skip;
|
|
|
|
/*
|
|
* Don't throw out files that are still undergoing I/O or
|
|
* that have uncleared errors pending.
|
|
*/
|
|
if (nfsd_file_check_writeback(nf))
|
|
goto out_skip;
|
|
|
|
if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
|
|
goto out_rescan;
|
|
|
|
if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
|
|
goto out_skip;
|
|
|
|
list_lru_isolate_move(lru, &nf->nf_lru, head);
|
|
return LRU_REMOVED;
|
|
out_rescan:
|
|
set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
|
|
out_skip:
|
|
return LRU_SKIP;
|
|
}
|
|
|
|
static void
|
|
nfsd_file_lru_dispose(struct list_head *head)
|
|
{
|
|
while(!list_empty(head)) {
|
|
struct nfsd_file *nf = list_first_entry(head,
|
|
struct nfsd_file, nf_lru);
|
|
list_del_init(&nf->nf_lru);
|
|
spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
|
|
nfsd_file_do_unhash(nf);
|
|
spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
|
|
nfsd_file_put_noref(nf);
|
|
}
|
|
}
|
|
|
|
static unsigned long
|
|
nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
|
|
{
|
|
return list_lru_count(&nfsd_file_lru);
|
|
}
|
|
|
|
static unsigned long
|
|
nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
|
|
{
|
|
LIST_HEAD(head);
|
|
unsigned long ret;
|
|
|
|
ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
|
|
nfsd_file_lru_dispose(&head);
|
|
return ret;
|
|
}
|
|
|
|
static struct shrinker nfsd_file_shrinker = {
|
|
.scan_objects = nfsd_file_lru_scan,
|
|
.count_objects = nfsd_file_lru_count,
|
|
.seeks = 1,
|
|
};
|
|
|
|
static void
|
|
__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
|
|
struct list_head *dispose)
|
|
{
|
|
struct nfsd_file *nf;
|
|
struct hlist_node *tmp;
|
|
|
|
spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
|
|
hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
|
|
if (inode == nf->nf_inode)
|
|
nfsd_file_unhash_and_release_locked(nf, dispose);
|
|
}
|
|
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
|
|
}
|
|
|
|
/**
|
|
* nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
|
|
* @inode: inode of the file to attempt to remove
|
|
*
|
|
* Walk the whole hash bucket, looking for any files that correspond to "inode".
|
|
* If any do, then unhash them and put the hashtable reference to them and
|
|
* destroy any that had their last reference put. Also ensure that any of the
|
|
* fputs also have their final __fput done as well.
|
|
*/
|
|
void
|
|
nfsd_file_close_inode_sync(struct inode *inode)
|
|
{
|
|
unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
|
|
NFSD_FILE_HASH_BITS);
|
|
LIST_HEAD(dispose);
|
|
|
|
__nfsd_file_close_inode(inode, hashval, &dispose);
|
|
trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
|
|
nfsd_file_dispose_list_sync(&dispose);
|
|
}
|
|
|
|
/**
|
|
* nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
|
|
* @inode: inode of the file to attempt to remove
|
|
*
|
|
* Walk the whole hash bucket, looking for any files that correspond to "inode".
|
|
* If any do, then unhash them and put the hashtable reference to them and
|
|
* destroy any that had their last reference put.
|
|
*/
|
|
static void
|
|
nfsd_file_close_inode(struct inode *inode)
|
|
{
|
|
unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
|
|
NFSD_FILE_HASH_BITS);
|
|
LIST_HEAD(dispose);
|
|
|
|
__nfsd_file_close_inode(inode, hashval, &dispose);
|
|
trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
|
|
nfsd_file_dispose_list(&dispose);
|
|
}
|
|
|
|
/**
|
|
* nfsd_file_delayed_close - close unused nfsd_files
|
|
* @work: dummy
|
|
*
|
|
* Walk the LRU list and close any entries that have not been used since
|
|
* the last scan.
|
|
*
|
|
* Note this can deadlock with nfsd_file_cache_purge.
|
|
*/
|
|
static void
|
|
nfsd_file_delayed_close(struct work_struct *work)
|
|
{
|
|
LIST_HEAD(head);
|
|
|
|
list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
|
|
|
|
if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
|
|
nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
|
|
|
|
if (!list_empty(&head)) {
|
|
nfsd_file_lru_dispose(&head);
|
|
flush_delayed_fput();
|
|
}
|
|
}
|
|
|
|
static int
|
|
nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
|
|
void *data)
|
|
{
|
|
struct file_lock *fl = data;
|
|
|
|
/* Only close files for F_SETLEASE leases */
|
|
if (fl->fl_flags & FL_LEASE)
|
|
nfsd_file_close_inode_sync(file_inode(fl->fl_file));
|
|
return 0;
|
|
}
|
|
|
|
static struct notifier_block nfsd_file_lease_notifier = {
|
|
.notifier_call = nfsd_file_lease_notifier_call,
|
|
};
|
|
|
|
static int
|
|
nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
|
|
struct inode *inode,
|
|
u32 mask, const void *data, int data_type,
|
|
const struct qstr *file_name, u32 cookie,
|
|
struct fsnotify_iter_info *iter_info)
|
|
{
|
|
trace_nfsd_file_fsnotify_handle_event(inode, mask);
|
|
|
|
/* Should be no marks on non-regular files */
|
|
if (!S_ISREG(inode->i_mode)) {
|
|
WARN_ON_ONCE(1);
|
|
return 0;
|
|
}
|
|
|
|
/* don't close files if this was not the last link */
|
|
if (mask & FS_ATTRIB) {
|
|
if (inode->i_nlink)
|
|
return 0;
|
|
}
|
|
|
|
nfsd_file_close_inode(inode);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
|
|
.handle_event = nfsd_file_fsnotify_handle_event,
|
|
.free_mark = nfsd_file_mark_free,
|
|
};
|
|
|
|
int
|
|
nfsd_file_cache_init(void)
|
|
{
|
|
int ret = -ENOMEM;
|
|
unsigned int i;
|
|
|
|
clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
|
|
|
|
if (nfsd_file_hashtbl)
|
|
return 0;
|
|
|
|
nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
|
|
sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
|
|
if (!nfsd_file_hashtbl) {
|
|
pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
|
|
goto out_err;
|
|
}
|
|
|
|
nfsd_file_slab = kmem_cache_create("nfsd_file",
|
|
sizeof(struct nfsd_file), 0, 0, NULL);
|
|
if (!nfsd_file_slab) {
|
|
pr_err("nfsd: unable to create nfsd_file_slab\n");
|
|
goto out_err;
|
|
}
|
|
|
|
nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
|
|
sizeof(struct nfsd_file_mark), 0, 0, NULL);
|
|
if (!nfsd_file_mark_slab) {
|
|
pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
|
|
goto out_err;
|
|
}
|
|
|
|
|
|
ret = list_lru_init(&nfsd_file_lru);
|
|
if (ret) {
|
|
pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
|
|
goto out_err;
|
|
}
|
|
|
|
ret = register_shrinker(&nfsd_file_shrinker);
|
|
if (ret) {
|
|
pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
|
|
goto out_lru;
|
|
}
|
|
|
|
ret = lease_register_notifier(&nfsd_file_lease_notifier);
|
|
if (ret) {
|
|
pr_err("nfsd: unable to register lease notifier: %d\n", ret);
|
|
goto out_shrinker;
|
|
}
|
|
|
|
nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
|
|
if (IS_ERR(nfsd_file_fsnotify_group)) {
|
|
pr_err("nfsd: unable to create fsnotify group: %ld\n",
|
|
PTR_ERR(nfsd_file_fsnotify_group));
|
|
nfsd_file_fsnotify_group = NULL;
|
|
goto out_notifier;
|
|
}
|
|
|
|
for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
|
|
INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
|
|
spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
|
|
}
|
|
|
|
INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
|
|
out:
|
|
return ret;
|
|
out_notifier:
|
|
lease_unregister_notifier(&nfsd_file_lease_notifier);
|
|
out_shrinker:
|
|
unregister_shrinker(&nfsd_file_shrinker);
|
|
out_lru:
|
|
list_lru_destroy(&nfsd_file_lru);
|
|
out_err:
|
|
kmem_cache_destroy(nfsd_file_slab);
|
|
nfsd_file_slab = NULL;
|
|
kmem_cache_destroy(nfsd_file_mark_slab);
|
|
nfsd_file_mark_slab = NULL;
|
|
kfree(nfsd_file_hashtbl);
|
|
nfsd_file_hashtbl = NULL;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Note this can deadlock with nfsd_file_lru_cb.
|
|
*/
|
|
void
|
|
nfsd_file_cache_purge(struct net *net)
|
|
{
|
|
unsigned int i;
|
|
struct nfsd_file *nf;
|
|
struct hlist_node *next;
|
|
LIST_HEAD(dispose);
|
|
bool del;
|
|
|
|
if (!nfsd_file_hashtbl)
|
|
return;
|
|
|
|
for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
|
|
struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
|
|
|
|
spin_lock(&nfb->nfb_lock);
|
|
hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
|
|
if (net && nf->nf_net != net)
|
|
continue;
|
|
del = nfsd_file_unhash_and_release_locked(nf, &dispose);
|
|
|
|
/*
|
|
* Deadlock detected! Something marked this entry as
|
|
* unhased, but hasn't removed it from the hash list.
|
|
*/
|
|
WARN_ON_ONCE(!del);
|
|
}
|
|
spin_unlock(&nfb->nfb_lock);
|
|
nfsd_file_dispose_list(&dispose);
|
|
}
|
|
}
|
|
|
|
void
|
|
nfsd_file_cache_shutdown(void)
|
|
{
|
|
LIST_HEAD(dispose);
|
|
|
|
set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
|
|
|
|
lease_unregister_notifier(&nfsd_file_lease_notifier);
|
|
unregister_shrinker(&nfsd_file_shrinker);
|
|
/*
|
|
* make sure all callers of nfsd_file_lru_cb are done before
|
|
* calling nfsd_file_cache_purge
|
|
*/
|
|
cancel_delayed_work_sync(&nfsd_filecache_laundrette);
|
|
nfsd_file_cache_purge(NULL);
|
|
list_lru_destroy(&nfsd_file_lru);
|
|
rcu_barrier();
|
|
fsnotify_put_group(nfsd_file_fsnotify_group);
|
|
nfsd_file_fsnotify_group = NULL;
|
|
kmem_cache_destroy(nfsd_file_slab);
|
|
nfsd_file_slab = NULL;
|
|
fsnotify_wait_marks_destroyed();
|
|
kmem_cache_destroy(nfsd_file_mark_slab);
|
|
nfsd_file_mark_slab = NULL;
|
|
kfree(nfsd_file_hashtbl);
|
|
nfsd_file_hashtbl = NULL;
|
|
}
|
|
|
|
static bool
|
|
nfsd_match_cred(const struct cred *c1, const struct cred *c2)
|
|
{
|
|
int i;
|
|
|
|
if (!uid_eq(c1->fsuid, c2->fsuid))
|
|
return false;
|
|
if (!gid_eq(c1->fsgid, c2->fsgid))
|
|
return false;
|
|
if (c1->group_info == NULL || c2->group_info == NULL)
|
|
return c1->group_info == c2->group_info;
|
|
if (c1->group_info->ngroups != c2->group_info->ngroups)
|
|
return false;
|
|
for (i = 0; i < c1->group_info->ngroups; i++) {
|
|
if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static struct nfsd_file *
|
|
nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
|
|
unsigned int hashval, struct net *net)
|
|
{
|
|
struct nfsd_file *nf;
|
|
unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
|
|
|
|
hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
|
|
nf_node) {
|
|
if ((need & nf->nf_may) != need)
|
|
continue;
|
|
if (nf->nf_inode != inode)
|
|
continue;
|
|
if (nf->nf_net != net)
|
|
continue;
|
|
if (!nfsd_match_cred(nf->nf_cred, current_cred()))
|
|
continue;
|
|
if (nfsd_file_get(nf) != NULL)
|
|
return nf;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* nfsd_file_is_cached - are there any cached open files for this fh?
|
|
* @inode: inode of the file to check
|
|
*
|
|
* Scan the hashtable for open files that match this fh. Returns true if there
|
|
* are any, and false if not.
|
|
*/
|
|
bool
|
|
nfsd_file_is_cached(struct inode *inode)
|
|
{
|
|
bool ret = false;
|
|
struct nfsd_file *nf;
|
|
unsigned int hashval;
|
|
|
|
hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
|
|
|
|
rcu_read_lock();
|
|
hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
|
|
nf_node) {
|
|
if (inode == nf->nf_inode) {
|
|
ret = true;
|
|
break;
|
|
}
|
|
}
|
|
rcu_read_unlock();
|
|
trace_nfsd_file_is_cached(inode, hashval, (int)ret);
|
|
return ret;
|
|
}
|
|
|
|
__be32
|
|
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
|
|
unsigned int may_flags, struct nfsd_file **pnf)
|
|
{
|
|
__be32 status;
|
|
struct net *net = SVC_NET(rqstp);
|
|
struct nfsd_file *nf, *new;
|
|
struct inode *inode;
|
|
unsigned int hashval;
|
|
|
|
/* FIXME: skip this if fh_dentry is already set? */
|
|
status = fh_verify(rqstp, fhp, S_IFREG,
|
|
may_flags|NFSD_MAY_OWNER_OVERRIDE);
|
|
if (status != nfs_ok)
|
|
return status;
|
|
|
|
inode = d_inode(fhp->fh_dentry);
|
|
hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
|
|
retry:
|
|
rcu_read_lock();
|
|
nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
|
|
rcu_read_unlock();
|
|
if (nf)
|
|
goto wait_for_construction;
|
|
|
|
new = nfsd_file_alloc(inode, may_flags, hashval, net);
|
|
if (!new) {
|
|
trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
|
|
NULL, nfserr_jukebox);
|
|
return nfserr_jukebox;
|
|
}
|
|
|
|
spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
|
|
nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
|
|
if (nf == NULL)
|
|
goto open_file;
|
|
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
|
|
nfsd_file_slab_free(&new->nf_rcu);
|
|
|
|
wait_for_construction:
|
|
wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
|
|
|
|
/* Did construction of this file fail? */
|
|
if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
|
|
nfsd_file_put_noref(nf);
|
|
goto retry;
|
|
}
|
|
|
|
this_cpu_inc(nfsd_file_cache_hits);
|
|
|
|
if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
|
|
bool write = (may_flags & NFSD_MAY_WRITE);
|
|
|
|
if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
|
|
(test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
|
|
status = nfserrno(nfsd_open_break_lease(
|
|
file_inode(nf->nf_file), may_flags));
|
|
if (status == nfs_ok) {
|
|
clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
|
|
if (write)
|
|
clear_bit(NFSD_FILE_BREAK_WRITE,
|
|
&nf->nf_flags);
|
|
}
|
|
}
|
|
}
|
|
out:
|
|
if (status == nfs_ok) {
|
|
*pnf = nf;
|
|
} else {
|
|
nfsd_file_put(nf);
|
|
nf = NULL;
|
|
}
|
|
|
|
trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
|
|
return status;
|
|
open_file:
|
|
nf = new;
|
|
/* Take reference for the hashtable */
|
|
atomic_inc(&nf->nf_ref);
|
|
__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
|
|
__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
|
|
list_lru_add(&nfsd_file_lru, &nf->nf_lru);
|
|
hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
|
|
++nfsd_file_hashtbl[hashval].nfb_count;
|
|
nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
|
|
nfsd_file_hashtbl[hashval].nfb_count);
|
|
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
|
|
atomic_long_inc(&nfsd_filecache_count);
|
|
|
|
nf->nf_mark = nfsd_file_mark_find_or_create(nf);
|
|
if (nf->nf_mark)
|
|
status = nfsd_open_verified(rqstp, fhp, S_IFREG,
|
|
may_flags, &nf->nf_file);
|
|
else
|
|
status = nfserr_jukebox;
|
|
/*
|
|
* If construction failed, or we raced with a call to unlink()
|
|
* then unhash.
|
|
*/
|
|
if (status != nfs_ok || inode->i_nlink == 0) {
|
|
bool do_free;
|
|
spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
|
|
do_free = nfsd_file_unhash(nf);
|
|
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
|
|
if (do_free)
|
|
nfsd_file_put_noref(nf);
|
|
}
|
|
clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
|
|
smp_mb__after_atomic();
|
|
wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Note that fields may be added, removed or reordered in the future. Programs
|
|
* scraping this file for info should test the labels to ensure they're
|
|
* getting the correct field.
|
|
*/
|
|
static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
|
|
{
|
|
unsigned int i, count = 0, longest = 0;
|
|
unsigned long hits = 0;
|
|
|
|
/*
|
|
* No need for spinlocks here since we're not terribly interested in
|
|
* accuracy. We do take the nfsd_mutex simply to ensure that we
|
|
* don't end up racing with server shutdown
|
|
*/
|
|
mutex_lock(&nfsd_mutex);
|
|
if (nfsd_file_hashtbl) {
|
|
for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
|
|
count += nfsd_file_hashtbl[i].nfb_count;
|
|
longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
|
|
}
|
|
}
|
|
mutex_unlock(&nfsd_mutex);
|
|
|
|
for_each_possible_cpu(i)
|
|
hits += per_cpu(nfsd_file_cache_hits, i);
|
|
|
|
seq_printf(m, "total entries: %u\n", count);
|
|
seq_printf(m, "longest chain: %u\n", longest);
|
|
seq_printf(m, "cache hits: %lu\n", hits);
|
|
return 0;
|
|
}
|
|
|
|
int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
|
|
{
|
|
return single_open(file, nfsd_file_cache_stats_show, NULL);
|
|
}
|