mirror of
https://github.com/torvalds/linux.git
synced 2024-11-30 08:01:59 +00:00
f2e70d8f2f
Before this patch, when a glock was locked, the very first holder on the queue would unlock the lockref and call the go_instantiate glops function (if one existed), unless GL_SKIP was specified. When we introduced the new node-scope concept, we allowed multiple holders to lock glocks in EX mode and share the lock. But node-scope introduced a new problem: if the first holder has GL_SKIP and the next one does NOT, since it is not the first holder on the queue, the go_instantiate op was not called. Eventually the GL_SKIP holder may call the instantiate sub-function (e.g. gfs2_rgrp_bh_get) but there was still a window of time in which another non-GL_SKIP holder assumes the instantiate function had been called by the first holder. In the case of rgrp glocks, this led to a NULL pointer dereference on the buffer_heads. This patch tries to fix the problem by introducing two new glock flags: GLF_INSTANTIATE_NEEDED, which keeps track of when the instantiate function needs to be called to "fill in" or "read in" the object before it is referenced. GLF_INSTANTIATE_IN_PROG which is used to determine when a process is in the process of reading in the object. Whenever a function needs to reference the object, it checks the GLF_INSTANTIATE_NEEDED flag, and if set, it sets GLF_INSTANTIATE_IN_PROG and calls the glops "go_instantiate" function. As before, the gl_lockref spin_lock is unlocked during the IO operation, which may take a relatively long amount of time to complete. While unlocked, if another process determines go_instantiate is still needed, it sees GLF_INSTANTIATE_IN_PROG is set, and waits for the go_instantiate glop operation to be completed. Once GLF_INSTANTIATE_IN_PROG is cleared, it needs to check GLF_INSTANTIATE_NEEDED again because the other process's go_instantiate operation may not have been successful. Functions that previously called the instantiate sub-functions now call directly into gfs2_instantiate so the new bits are managed properly. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
360 lines
11 KiB
C
360 lines
11 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
|
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
|
|
*/
|
|
|
|
#ifndef __GLOCK_DOT_H__
|
|
#define __GLOCK_DOT_H__
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/parser.h>
|
|
#include "incore.h"
|
|
#include "util.h"
|
|
|
|
/* Options for hostdata parser */
|
|
|
|
enum {
|
|
Opt_jid,
|
|
Opt_id,
|
|
Opt_first,
|
|
Opt_nodir,
|
|
Opt_err,
|
|
};
|
|
|
|
/*
|
|
* lm_lockname types
|
|
*/
|
|
|
|
#define LM_TYPE_RESERVED 0x00
|
|
#define LM_TYPE_NONDISK 0x01
|
|
#define LM_TYPE_INODE 0x02
|
|
#define LM_TYPE_RGRP 0x03
|
|
#define LM_TYPE_META 0x04
|
|
#define LM_TYPE_IOPEN 0x05
|
|
#define LM_TYPE_FLOCK 0x06
|
|
#define LM_TYPE_PLOCK 0x07
|
|
#define LM_TYPE_QUOTA 0x08
|
|
#define LM_TYPE_JOURNAL 0x09
|
|
|
|
/*
|
|
* lm_lock() states
|
|
*
|
|
* SHARED is compatible with SHARED, not with DEFERRED or EX.
|
|
* DEFERRED is compatible with DEFERRED, not with SHARED or EX.
|
|
*/
|
|
|
|
#define LM_ST_UNLOCKED 0
|
|
#define LM_ST_EXCLUSIVE 1
|
|
#define LM_ST_DEFERRED 2
|
|
#define LM_ST_SHARED 3
|
|
|
|
/*
|
|
* lm_lock() flags
|
|
*
|
|
* LM_FLAG_TRY
|
|
* Don't wait to acquire the lock if it can't be granted immediately.
|
|
*
|
|
* LM_FLAG_TRY_1CB
|
|
* Send one blocking callback if TRY is set and the lock is not granted.
|
|
*
|
|
* LM_FLAG_NOEXP
|
|
* GFS sets this flag on lock requests it makes while doing journal recovery.
|
|
* These special requests should not be blocked due to the recovery like
|
|
* ordinary locks would be.
|
|
*
|
|
* LM_FLAG_ANY
|
|
* A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
|
|
* also be granted in SHARED. The preferred state is whichever is compatible
|
|
* with other granted locks, or the specified state if no other locks exist.
|
|
*
|
|
* LM_FLAG_PRIORITY
|
|
* Override fairness considerations. Suppose a lock is held in a shared state
|
|
* and there is a pending request for the deferred state. A shared lock
|
|
* request with the priority flag would be allowed to bypass the deferred
|
|
* request and directly join the other shared lock. A shared lock request
|
|
* without the priority flag might be forced to wait until the deferred
|
|
* requested had acquired and released the lock.
|
|
*
|
|
* LM_FLAG_NODE_SCOPE
|
|
* This holder agrees to share the lock within this node. In other words,
|
|
* the glock is held in EX mode according to DLM, but local holders on the
|
|
* same node can share it.
|
|
*/
|
|
|
|
#define LM_FLAG_TRY 0x0001
|
|
#define LM_FLAG_TRY_1CB 0x0002
|
|
#define LM_FLAG_NOEXP 0x0004
|
|
#define LM_FLAG_ANY 0x0008
|
|
#define LM_FLAG_PRIORITY 0x0010
|
|
#define LM_FLAG_NODE_SCOPE 0x0020
|
|
#define GL_ASYNC 0x0040
|
|
#define GL_EXACT 0x0080
|
|
#define GL_SKIP 0x0100
|
|
#define GL_NOCACHE 0x0400
|
|
|
|
/*
|
|
* lm_async_cb return flags
|
|
*
|
|
* LM_OUT_ST_MASK
|
|
* Masks the lower two bits of lock state in the returned value.
|
|
*
|
|
* LM_OUT_CANCELED
|
|
* The lock request was canceled.
|
|
*
|
|
*/
|
|
|
|
#define LM_OUT_ST_MASK 0x00000003
|
|
#define LM_OUT_CANCELED 0x00000008
|
|
#define LM_OUT_ERROR 0x00000004
|
|
|
|
/*
|
|
* lm_recovery_done() messages
|
|
*/
|
|
|
|
#define LM_RD_GAVEUP 308
|
|
#define LM_RD_SUCCESS 309
|
|
|
|
#define GLR_TRYFAILED 13
|
|
|
|
#define GL_GLOCK_MAX_HOLD (long)(HZ / 5)
|
|
#define GL_GLOCK_DFT_HOLD (long)(HZ / 5)
|
|
#define GL_GLOCK_MIN_HOLD (long)(10)
|
|
#define GL_GLOCK_HOLD_INCR (long)(HZ / 20)
|
|
#define GL_GLOCK_HOLD_DECR (long)(HZ / 40)
|
|
|
|
struct lm_lockops {
|
|
const char *lm_proto_name;
|
|
int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
|
|
void (*lm_first_done) (struct gfs2_sbd *sdp);
|
|
void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
|
|
unsigned int result);
|
|
void (*lm_unmount) (struct gfs2_sbd *sdp);
|
|
void (*lm_withdraw) (struct gfs2_sbd *sdp);
|
|
void (*lm_put_lock) (struct gfs2_glock *gl);
|
|
int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
|
|
unsigned int flags);
|
|
void (*lm_cancel) (struct gfs2_glock *gl);
|
|
const match_table_t *lm_tokens;
|
|
};
|
|
|
|
extern struct workqueue_struct *gfs2_delete_workqueue;
|
|
static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
|
|
{
|
|
struct gfs2_holder *gh;
|
|
struct pid *pid;
|
|
|
|
/* Look in glock's list of holders for one with current task as owner */
|
|
spin_lock(&gl->gl_lockref.lock);
|
|
pid = task_pid(current);
|
|
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
|
|
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
|
|
break;
|
|
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
|
|
continue;
|
|
if (gh->gh_owner_pid == pid)
|
|
goto out;
|
|
}
|
|
gh = NULL;
|
|
out:
|
|
spin_unlock(&gl->gl_lockref.lock);
|
|
|
|
return gh;
|
|
}
|
|
|
|
static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
|
|
{
|
|
return gl->gl_state == LM_ST_EXCLUSIVE;
|
|
}
|
|
|
|
static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
|
|
{
|
|
return gl->gl_state == LM_ST_DEFERRED;
|
|
}
|
|
|
|
static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
|
|
{
|
|
return gl->gl_state == LM_ST_SHARED;
|
|
}
|
|
|
|
static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
|
|
{
|
|
if (gl->gl_ops->go_flags & GLOF_ASPACE)
|
|
return (struct address_space *)(gl + 1);
|
|
return NULL;
|
|
}
|
|
|
|
extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
|
|
const struct gfs2_glock_operations *glops,
|
|
int create, struct gfs2_glock **glp);
|
|
extern void gfs2_glock_hold(struct gfs2_glock *gl);
|
|
extern void gfs2_glock_put(struct gfs2_glock *gl);
|
|
extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
|
|
|
|
extern void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
|
|
u16 flags, struct gfs2_holder *gh,
|
|
unsigned long ip);
|
|
static inline void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
|
|
u16 flags, struct gfs2_holder *gh) {
|
|
__gfs2_holder_init(gl, state, flags, gh, _RET_IP_);
|
|
}
|
|
|
|
extern void gfs2_holder_reinit(unsigned int state, u16 flags,
|
|
struct gfs2_holder *gh);
|
|
extern void gfs2_holder_uninit(struct gfs2_holder *gh);
|
|
extern int gfs2_glock_nq(struct gfs2_holder *gh);
|
|
extern int gfs2_glock_poll(struct gfs2_holder *gh);
|
|
extern int gfs2_instantiate(struct gfs2_holder *gh);
|
|
extern int gfs2_glock_wait(struct gfs2_holder *gh);
|
|
extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
|
|
extern void gfs2_glock_dq(struct gfs2_holder *gh);
|
|
extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
|
|
extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
|
|
extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
|
|
const struct gfs2_glock_operations *glops,
|
|
unsigned int state, u16 flags,
|
|
struct gfs2_holder *gh);
|
|
extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
|
|
extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
|
|
extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
|
|
bool fsid);
|
|
#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \
|
|
gfs2_dump_glock(NULL, gl, true); \
|
|
BUG(); } } while(0)
|
|
#define gfs2_glock_assert_warn(gl, x) do { if (unlikely(!(x))) { \
|
|
gfs2_dump_glock(NULL, gl, true); \
|
|
gfs2_assert_warn((gl)->gl_name.ln_sbd, (x)); } } \
|
|
while (0)
|
|
#define gfs2_glock_assert_withdraw(gl, x) do { if (unlikely(!(x))) { \
|
|
gfs2_dump_glock(NULL, gl, true); \
|
|
gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \
|
|
while (0)
|
|
|
|
extern __printf(2, 3)
|
|
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
|
|
|
|
/**
|
|
* gfs2_glock_nq_init - initialize a holder and enqueue it on a glock
|
|
* @gl: the glock
|
|
* @state: the state we're requesting
|
|
* @flags: the modifier flags
|
|
* @gh: the holder structure
|
|
*
|
|
* Returns: 0, GLR_*, or errno
|
|
*/
|
|
|
|
static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
|
|
unsigned int state, u16 flags,
|
|
struct gfs2_holder *gh)
|
|
{
|
|
int error;
|
|
|
|
__gfs2_holder_init(gl, state, flags, gh, _RET_IP_);
|
|
|
|
error = gfs2_glock_nq(gh);
|
|
if (error)
|
|
gfs2_holder_uninit(gh);
|
|
|
|
return error;
|
|
}
|
|
|
|
extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
|
|
extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
|
|
extern bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay);
|
|
extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
|
|
extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl);
|
|
extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
|
|
extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
|
|
extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
|
|
extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
|
|
extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
|
|
extern void gfs2_glock_free(struct gfs2_glock *gl);
|
|
|
|
extern int __init gfs2_glock_init(void);
|
|
extern void gfs2_glock_exit(void);
|
|
|
|
extern void gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
|
|
extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
|
|
extern void gfs2_register_debugfs(void);
|
|
extern void gfs2_unregister_debugfs(void);
|
|
|
|
extern const struct lm_lockops gfs2_dlm_ops;
|
|
|
|
static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh)
|
|
{
|
|
gh->gh_gl = NULL;
|
|
}
|
|
|
|
static inline bool gfs2_holder_initialized(struct gfs2_holder *gh)
|
|
{
|
|
return gh->gh_gl;
|
|
}
|
|
|
|
static inline bool gfs2_holder_queued(struct gfs2_holder *gh)
|
|
{
|
|
return !list_empty(&gh->gh_list);
|
|
}
|
|
|
|
/**
|
|
* glock_set_object - set the gl_object field of a glock
|
|
* @gl: the glock
|
|
* @object: the object
|
|
*/
|
|
static inline void glock_set_object(struct gfs2_glock *gl, void *object)
|
|
{
|
|
spin_lock(&gl->gl_lockref.lock);
|
|
if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
|
|
gfs2_dump_glock(NULL, gl, true);
|
|
gl->gl_object = object;
|
|
spin_unlock(&gl->gl_lockref.lock);
|
|
}
|
|
|
|
/**
|
|
* glock_clear_object - clear the gl_object field of a glock
|
|
* @gl: the glock
|
|
* @object: the object
|
|
*
|
|
* I'd love to similarly add this:
|
|
* else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
|
|
* gfs2_dump_glock(NULL, gl, true);
|
|
* Unfortunately, that's not possible because as soon as gfs2_delete_inode
|
|
* frees the block in the rgrp, another process can reassign it for an I_NEW
|
|
* inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
|
|
* That means gfs2_delete_inode may subsequently try to call this function
|
|
* for a glock that's already pointing to a brand new inode. If we clear the
|
|
* new inode's gl_object, we'll introduce metadata corruption. Function
|
|
* gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also
|
|
* tries to clear gl_object, so it's more than just gfs2_delete_inode.
|
|
*
|
|
*/
|
|
static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
|
|
{
|
|
spin_lock(&gl->gl_lockref.lock);
|
|
if (gl->gl_object == object)
|
|
gl->gl_object = NULL;
|
|
spin_unlock(&gl->gl_lockref.lock);
|
|
}
|
|
|
|
static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
|
|
{
|
|
struct gfs2_glock *gl = gh->gh_gl;
|
|
|
|
spin_lock(&gl->gl_lockref.lock);
|
|
set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
|
|
spin_unlock(&gl->gl_lockref.lock);
|
|
}
|
|
|
|
static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
|
|
{
|
|
struct gfs2_glock *gl = gh->gh_gl;
|
|
|
|
spin_lock(&gl->gl_lockref.lock);
|
|
clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
|
|
spin_unlock(&gl->gl_lockref.lock);
|
|
}
|
|
|
|
extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
|
|
extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
|
|
|
|
#endif /* __GLOCK_DOT_H__ */
|