forked from Minki/linux
e091eab028
In some cases, ocfs2_iget() reads the data of inode, which has been deleted for some reason. That will make the system panic. So We should judge whether this inode has been deleted, and tell the caller that the inode is a bad inode. For example, the ocfs2 is used as the backed of nfs, and the client is nfsv3. This issue can be reproduced by the following steps. on the nfs server side, ..../patha/pathb Step 1: The process A was scheduled before calling the function fh_verify. Step 2: The process B is removing the 'pathb', and just completed the call to function dput. Then the dentry of 'pathb' has been deleted from the dcache, and all ancestors have been deleted also. The relationship of dentry and inode was deleted through the function hlist_del_init. The following is the call stack. dentry_iput->hlist_del_init(&dentry->d_u.d_alias) At this time, the inode is still in the dcache. Step 3: The process A call the function ocfs2_get_dentry, which get the inode from dcache. Then the refcount of inode is 1. The following is the call stack. nfsd3_proc_getacl->fh_verify->exportfs_decode_fh->fh_to_dentry(ocfs2_get_dentry) Step 4: Dirty pages are flushed by bdi threads. So the inode of 'patha' is evicted, and this directory was deleted. But the inode of 'pathb' can't be evicted, because the refcount of the inode was 1. Step 5: The process A keep running, and call the function reconnect_path(in exportfs_decode_fh), which call function ocfs2_get_parent of ocfs2. Get the block number of parent directory(patha) by the name of ... Then read the data from disk by the block number. But this inode has been deleted, so the system panic. Process A Process B 1. in nfsd3_proc_getacl | 2. | dput 3. fh_to_dentry(ocfs2_get_dentry) | 4. bdi flush dirty cache | 5. ocfs2_iget | [283465.542049] OCFS2: ERROR (device sdp): ocfs2_validate_inode_block: Invalid dinode #580640: OCFS2_VALID_FL not set [283465.545490] Kernel panic - not syncing: OCFS2: (device sdp): panic forced after error [283465.546889] CPU: 5 PID: 12416 Comm: nfsd Tainted: G W 4.1.12-124.18.6.el6uek.bug28762940v3.x86_64 #2 [283465.548382] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/21/2015 [283465.549657] 0000000000000000 ffff8800a56fb7b8 ffffffff816e839c ffffffffa0514758 [283465.550392] 000000000008dc20 ffff8800a56fb838 ffffffff816e62d3 0000000000000008 [283465.551056] ffff880000000010 ffff8800a56fb848 ffff8800a56fb7e8 ffff88005df9f000 [283465.551710] Call Trace: [283465.552516] [<ffffffff816e839c>] dump_stack+0x63/0x81 [283465.553291] [<ffffffff816e62d3>] panic+0xcb/0x21b [283465.554037] [<ffffffffa04e66b0>] ocfs2_handle_error+0xf0/0xf0 [ocfs2] [283465.554882] [<ffffffffa04e7737>] __ocfs2_error+0x67/0x70 [ocfs2] [283465.555768] [<ffffffffa049c0f9>] ocfs2_validate_inode_block+0x229/0x230 [ocfs2] [283465.556683] [<ffffffffa047bcbc>] ocfs2_read_blocks+0x46c/0x7b0 [ocfs2] [283465.557408] [<ffffffffa049bed0>] ? ocfs2_inode_cache_io_unlock+0x20/0x20 [ocfs2] [283465.557973] [<ffffffffa049f0eb>] ocfs2_read_inode_block_full+0x3b/0x60 [ocfs2] [283465.558525] [<ffffffffa049f5ba>] ocfs2_iget+0x4aa/0x880 [ocfs2] [283465.559082] [<ffffffffa049146e>] ocfs2_get_parent+0x9e/0x220 [ocfs2] [283465.559622] [<ffffffff81297c05>] reconnect_path+0xb5/0x300 [283465.560156] [<ffffffff81297f46>] exportfs_decode_fh+0xf6/0x2b0 [283465.560708] [<ffffffffa062faf0>] ? nfsd_proc_getattr+0xa0/0xa0 [nfsd] [283465.561262] [<ffffffff810a8196>] ? prepare_creds+0x26/0x110 [283465.561932] [<ffffffffa0630860>] fh_verify+0x350/0x660 [nfsd] [283465.562862] [<ffffffffa0637804>] ? nfsd_cache_lookup+0x44/0x630 [nfsd] [283465.563697] [<ffffffffa063a8b9>] nfsd3_proc_getattr+0x69/0xf0 [nfsd] [283465.564510] [<ffffffffa062cf60>] nfsd_dispatch+0xe0/0x290 [nfsd] [283465.565358] [<ffffffffa05eb892>] ? svc_tcp_adjust_wspace+0x12/0x30 [sunrpc] [283465.566272] [<ffffffffa05ea652>] svc_process_common+0x412/0x6a0 [sunrpc] [283465.567155] [<ffffffffa05eaa03>] svc_process+0x123/0x210 [sunrpc] [283465.568020] [<ffffffffa062c90f>] nfsd+0xff/0x170 [nfsd] [283465.568962] [<ffffffffa062c810>] ? nfsd_destroy+0x80/0x80 [nfsd] [283465.570112] [<ffffffff810a622b>] kthread+0xcb/0xf0 [283465.571099] [<ffffffff810a6160>] ? kthread_create_on_node+0x180/0x180 [283465.572114] [<ffffffff816f11b8>] ret_from_fork+0x58/0x90 [283465.573156] [<ffffffff810a6160>] ? kthread_create_on_node+0x180/0x180 Link: http://lkml.kernel.org/r/1554185919-3010-1-git-send-email-sunny.s.zhang@oracle.com Signed-off-by: Shuning Zhang <sunny.s.zhang@oracle.com> Reviewed-by: Joseph Qi <jiangqi903@gmail.com> Cc: Mark Fasheh <mark@fasheh.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: Changwei Ge <gechangwei@live.cn> Cc: piaojun <piaojun@huawei.com> Cc: "Gang He" <ghe@suse.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
300 lines
6.8 KiB
C
300 lines
6.8 KiB
C
/* -*- mode: c; c-basic-offset: 8; -*-
|
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
|
*
|
|
* export.c
|
|
*
|
|
* Functions to facilitate NFS exporting
|
|
*
|
|
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <cluster/masklog.h>
|
|
|
|
#include "ocfs2.h"
|
|
|
|
#include "alloc.h"
|
|
#include "dir.h"
|
|
#include "dlmglue.h"
|
|
#include "dcache.h"
|
|
#include "export.h"
|
|
#include "inode.h"
|
|
|
|
#include "buffer_head_io.h"
|
|
#include "suballoc.h"
|
|
#include "ocfs2_trace.h"
|
|
|
|
struct ocfs2_inode_handle
|
|
{
|
|
u64 ih_blkno;
|
|
u32 ih_generation;
|
|
};
|
|
|
|
static struct dentry *ocfs2_get_dentry(struct super_block *sb,
|
|
struct ocfs2_inode_handle *handle)
|
|
{
|
|
struct inode *inode;
|
|
struct ocfs2_super *osb = OCFS2_SB(sb);
|
|
u64 blkno = handle->ih_blkno;
|
|
int status, set;
|
|
struct dentry *result;
|
|
|
|
trace_ocfs2_get_dentry_begin(sb, handle, (unsigned long long)blkno);
|
|
|
|
if (blkno == 0) {
|
|
result = ERR_PTR(-ESTALE);
|
|
goto bail;
|
|
}
|
|
|
|
inode = ocfs2_ilookup(sb, blkno);
|
|
/*
|
|
* If the inode exists in memory, we only need to check it's
|
|
* generation number
|
|
*/
|
|
if (inode)
|
|
goto check_gen;
|
|
|
|
/*
|
|
* This will synchronize us against ocfs2_delete_inode() on
|
|
* all nodes
|
|
*/
|
|
status = ocfs2_nfs_sync_lock(osb, 1);
|
|
if (status < 0) {
|
|
mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
|
|
goto check_err;
|
|
}
|
|
|
|
status = ocfs2_test_inode_bit(osb, blkno, &set);
|
|
if (status < 0) {
|
|
if (status == -EINVAL) {
|
|
/*
|
|
* The blkno NFS gave us doesn't even show up
|
|
* as an inode, we return -ESTALE to be
|
|
* nice
|
|
*/
|
|
status = -ESTALE;
|
|
} else
|
|
mlog(ML_ERROR, "test inode bit failed %d\n", status);
|
|
goto unlock_nfs_sync;
|
|
}
|
|
|
|
trace_ocfs2_get_dentry_test_bit(status, set);
|
|
/* If the inode allocator bit is clear, this inode must be stale */
|
|
if (!set) {
|
|
status = -ESTALE;
|
|
goto unlock_nfs_sync;
|
|
}
|
|
|
|
inode = ocfs2_iget(osb, blkno, 0, 0);
|
|
|
|
unlock_nfs_sync:
|
|
ocfs2_nfs_sync_unlock(osb, 1);
|
|
|
|
check_err:
|
|
if (status < 0) {
|
|
if (status == -ESTALE) {
|
|
trace_ocfs2_get_dentry_stale((unsigned long long)blkno,
|
|
handle->ih_generation);
|
|
}
|
|
result = ERR_PTR(status);
|
|
goto bail;
|
|
}
|
|
|
|
if (IS_ERR(inode)) {
|
|
mlog_errno(PTR_ERR(inode));
|
|
result = ERR_CAST(inode);
|
|
goto bail;
|
|
}
|
|
|
|
check_gen:
|
|
if (handle->ih_generation != inode->i_generation) {
|
|
trace_ocfs2_get_dentry_generation((unsigned long long)blkno,
|
|
handle->ih_generation,
|
|
inode->i_generation);
|
|
iput(inode);
|
|
result = ERR_PTR(-ESTALE);
|
|
goto bail;
|
|
}
|
|
|
|
result = d_obtain_alias(inode);
|
|
if (IS_ERR(result))
|
|
mlog_errno(PTR_ERR(result));
|
|
|
|
bail:
|
|
trace_ocfs2_get_dentry_end(result);
|
|
return result;
|
|
}
|
|
|
|
static struct dentry *ocfs2_get_parent(struct dentry *child)
|
|
{
|
|
int status;
|
|
u64 blkno;
|
|
struct dentry *parent;
|
|
struct inode *dir = d_inode(child);
|
|
int set;
|
|
|
|
trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
|
|
(unsigned long long)OCFS2_I(dir)->ip_blkno);
|
|
|
|
status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1);
|
|
if (status < 0) {
|
|
mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
|
|
parent = ERR_PTR(status);
|
|
goto bail;
|
|
}
|
|
|
|
status = ocfs2_inode_lock(dir, NULL, 0);
|
|
if (status < 0) {
|
|
if (status != -ENOENT)
|
|
mlog_errno(status);
|
|
parent = ERR_PTR(status);
|
|
goto unlock_nfs_sync;
|
|
}
|
|
|
|
status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
|
|
if (status < 0) {
|
|
parent = ERR_PTR(-ENOENT);
|
|
goto bail_unlock;
|
|
}
|
|
|
|
status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set);
|
|
if (status < 0) {
|
|
if (status == -EINVAL) {
|
|
status = -ESTALE;
|
|
} else
|
|
mlog(ML_ERROR, "test inode bit failed %d\n", status);
|
|
parent = ERR_PTR(status);
|
|
goto bail_unlock;
|
|
}
|
|
|
|
trace_ocfs2_get_dentry_test_bit(status, set);
|
|
if (!set) {
|
|
status = -ESTALE;
|
|
parent = ERR_PTR(status);
|
|
goto bail_unlock;
|
|
}
|
|
|
|
parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
|
|
|
|
bail_unlock:
|
|
ocfs2_inode_unlock(dir, 0);
|
|
|
|
unlock_nfs_sync:
|
|
ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1);
|
|
|
|
bail:
|
|
trace_ocfs2_get_parent_end(parent);
|
|
|
|
return parent;
|
|
}
|
|
|
|
static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len,
|
|
struct inode *parent)
|
|
{
|
|
int len = *max_len;
|
|
int type = 1;
|
|
u64 blkno;
|
|
u32 generation;
|
|
__le32 *fh = (__force __le32 *) fh_in;
|
|
|
|
#ifdef TRACE_HOOKS_ARE_NOT_BRAINDEAD_IN_YOUR_OPINION
|
|
#error "You go ahead and fix that mess, then. Somehow"
|
|
trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len,
|
|
dentry->d_name.name,
|
|
fh, len, connectable);
|
|
#endif
|
|
|
|
if (parent && (len < 6)) {
|
|
*max_len = 6;
|
|
type = FILEID_INVALID;
|
|
goto bail;
|
|
} else if (len < 3) {
|
|
*max_len = 3;
|
|
type = FILEID_INVALID;
|
|
goto bail;
|
|
}
|
|
|
|
blkno = OCFS2_I(inode)->ip_blkno;
|
|
generation = inode->i_generation;
|
|
|
|
trace_ocfs2_encode_fh_self((unsigned long long)blkno, generation);
|
|
|
|
len = 3;
|
|
fh[0] = cpu_to_le32((u32)(blkno >> 32));
|
|
fh[1] = cpu_to_le32((u32)(blkno & 0xffffffff));
|
|
fh[2] = cpu_to_le32(generation);
|
|
|
|
if (parent) {
|
|
blkno = OCFS2_I(parent)->ip_blkno;
|
|
generation = parent->i_generation;
|
|
|
|
fh[3] = cpu_to_le32((u32)(blkno >> 32));
|
|
fh[4] = cpu_to_le32((u32)(blkno & 0xffffffff));
|
|
fh[5] = cpu_to_le32(generation);
|
|
|
|
len = 6;
|
|
type = 2;
|
|
|
|
trace_ocfs2_encode_fh_parent((unsigned long long)blkno,
|
|
generation);
|
|
}
|
|
|
|
*max_len = len;
|
|
|
|
bail:
|
|
trace_ocfs2_encode_fh_type(type);
|
|
return type;
|
|
}
|
|
|
|
static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb,
|
|
struct fid *fid, int fh_len, int fh_type)
|
|
{
|
|
struct ocfs2_inode_handle handle;
|
|
|
|
if (fh_len < 3 || fh_type > 2)
|
|
return NULL;
|
|
|
|
handle.ih_blkno = (u64)le32_to_cpu(fid->raw[0]) << 32;
|
|
handle.ih_blkno |= (u64)le32_to_cpu(fid->raw[1]);
|
|
handle.ih_generation = le32_to_cpu(fid->raw[2]);
|
|
return ocfs2_get_dentry(sb, &handle);
|
|
}
|
|
|
|
static struct dentry *ocfs2_fh_to_parent(struct super_block *sb,
|
|
struct fid *fid, int fh_len, int fh_type)
|
|
{
|
|
struct ocfs2_inode_handle parent;
|
|
|
|
if (fh_type != 2 || fh_len < 6)
|
|
return NULL;
|
|
|
|
parent.ih_blkno = (u64)le32_to_cpu(fid->raw[3]) << 32;
|
|
parent.ih_blkno |= (u64)le32_to_cpu(fid->raw[4]);
|
|
parent.ih_generation = le32_to_cpu(fid->raw[5]);
|
|
return ocfs2_get_dentry(sb, &parent);
|
|
}
|
|
|
|
const struct export_operations ocfs2_export_ops = {
|
|
.encode_fh = ocfs2_encode_fh,
|
|
.fh_to_dentry = ocfs2_fh_to_dentry,
|
|
.fh_to_parent = ocfs2_fh_to_parent,
|
|
.get_parent = ocfs2_get_parent,
|
|
};
|