forked from Minki/linux
b4d91aeb6e
rdma_nl_rcv_msg() checks to see if it should use the .dump() callback
or the .doit() callback. The check is done with this check:
if (flags & NLM_F_DUMP) ...
The NLM_F_DUMP flag is two bits (NLM_F_ROOT | NLM_F_MATCH).
When an RDMA_NL_LS message (response) is received, the bit used for
indicating an error is the same bit as NLM_F_ROOT.
NLM_F_ROOT == (0x100) == RDMA_NL_LS_F_ERR.
ibacm sends a response with the RDMA_NL_LS_F_ERR bit set if an error
occurs in the service. The current code then misinterprets the
NLM_F_DUMP bit and trys to call the .dump() callback.
If the .dump() callback for the specified request is not available
(which is true for the RDMA_NL_LS messages) the following Oops occurs:
[ 4555.960256] BUG: unable to handle kernel NULL pointer dereference at
(null)
[ 4555.969046] IP: (null)
[ 4555.972664] PGD 10543f1067 P4D 10543f1067 PUD 1033f93067 PMD 0
[ 4555.979287] Oops: 0010 [#1] SMP
[ 4555.982809] Modules linked in: rpcrdma ib_isert iscsi_target_mod
target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_ipoib rdma_ucm ib_ucm
ib_uverbs ib_umad rdma_cm ib_cm iw_cm dm_mirror dm_region_hash dm_log dm_mod
dax sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm irqbypass
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd
glue_helper cryptd hfi1 rdmavt iTCO_wdt iTCO_vendor_support ib_core mei_me
lpc_ich pcspkr mei ioatdma sg shpchp i2c_i801 mfd_core wmi ipmi_si ipmi_devintf
ipmi_msghandler acpi_power_meter acpi_pad nfsd auth_rpcgss nfs_acl lockd grace
sunrpc ip_tables ext4 mbcache jbd2 sd_mod mgag200 drm_kms_helper syscopyarea
sysfillrect sysimgblt fb_sys_fops ttm igb ahci crc32c_intel ptp libahci
pps_core drm dca libata i2c_algo_bit i2c_core
[ 4556.061190] CPU: 54 PID: 9841 Comm: ibacm Tainted: G I
4.14.0-rc2+ #6
[ 4556.069667] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS
SE5C610.86B.01.01.0008.021120151325 02/11/2015
[ 4556.081339] task: ffff880855f42d00 task.stack: ffffc900246b4000
[ 4556.087967] RIP: 0010: (null)
[ 4556.092166] RSP: 0018:ffffc900246b7bc8 EFLAGS: 00010246
[ 4556.098018] RAX: ffffffff81dbe9e0 RBX: ffff881058bb1000 RCX:
0000000000000000
[ 4556.105997] RDX: 0000000000001100 RSI: ffff881058bb1320 RDI:
ffff881056362000
[ 4556.113984] RBP: ffffc900246b7bf8 R08: 0000000000000ec0 R09:
0000000000001100
[ 4556.121971] R10: ffff8810573a5000 R11: 0000000000000000 R12:
ffff881056362000
[ 4556.129957] R13: 0000000000000ec0 R14: ffff881058bb1320 R15:
0000000000000ec0
[ 4556.137945] FS: 00007fe0ba5a38c0(0000) GS:ffff88105f080000(0000)
knlGS:0000000000000000
[ 4556.147000] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 4556.153433] CR2: 0000000000000000 CR3: 0000001056f5d003 CR4:
00000000001606e0
[ 4556.161419] Call Trace:
[ 4556.164167] ? netlink_dump+0x12c/0x290
[ 4556.168468] __netlink_dump_start+0x186/0x1f0
[ 4556.173357] rdma_nl_rcv_msg+0x193/0x1b0 [ib_core]
[ 4556.178724] rdma_nl_rcv+0xdc/0x130 [ib_core]
[ 4556.183604] netlink_unicast+0x181/0x240
[ 4556.187998] netlink_sendmsg+0x2c2/0x3b0
[ 4556.192392] sock_sendmsg+0x38/0x50
[ 4556.196299] SYSC_sendto+0x102/0x190
[ 4556.200308] ? __audit_syscall_entry+0xaf/0x100
[ 4556.205387] ? syscall_trace_enter+0x1d0/0x2b0
[ 4556.210366] ? __audit_syscall_exit+0x209/0x290
[ 4556.215442] SyS_sendto+0xe/0x10
[ 4556.219060] do_syscall_64+0x67/0x1b0
[ 4556.223165] entry_SYSCALL64_slow_path+0x25/0x25
[ 4556.228328] RIP: 0033:0x7fe0b9db2a63
[ 4556.232333] RSP: 002b:00007ffc55edc260 EFLAGS: 00000293 ORIG_RAX:
000000000000002c
[ 4556.240808] RAX: ffffffffffffffda RBX: 0000000000000010 RCX:
00007fe0b9db2a63
[ 4556.248796] RDX: 0000000000000010 RSI: 00007ffc55edc280 RDI:
000000000000000d
[ 4556.256782] RBP: 00007ffc55edc670 R08: 00007ffc55edc270 R09:
000000000000000c
[ 4556.265321] R10: 0000000000000000 R11: 0000000000000293 R12:
00007ffc55edc280
[ 4556.273846] R13: 000000000260b400 R14: 000000000000000d R15:
0000000000000001
[ 4556.282368] Code: Bad RIP value.
[ 4556.286629] RIP: (null) RSP: ffffc900246b7bc8
[ 4556.293013] CR2: 0000000000000000
[ 4556.297292] ---[ end trace 8d67abcfd10ec209 ]---
[ 4556.305465] Kernel panic - not syncing: Fatal exception
[ 4556.313786] Kernel Offset: disabled
[ 4556.321563] ---[ end Kernel panic - not syncing: Fatal exception
[ 4556.328960] ------------[ cut here ]------------
Special case RDMA_NL_LS response messages to call the appropriate
callback.
Additionally, make sure that the .dump() callback is not NULL
before calling it.
Fixes: 647c75ac59
("RDMA/netlink: Convert LS to doit callback")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Kaike Wan <kaike.wan@intel.com>
Reviewed-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
316 lines
8.1 KiB
C
316 lines
8.1 KiB
C
/*
|
|
* Copyright (c) 2017 Mellanox Technologies Inc. All rights reserved.
|
|
* Copyright (c) 2010 Voltaire Inc. All rights reserved.
|
|
*
|
|
* This software is available to you under a choice of one of two
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
* General Public License (GPL) Version 2, available from the file
|
|
* COPYING in the main directory of this source tree, or the
|
|
* OpenIB.org BSD license below:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or
|
|
* without modification, are permitted provided that the following
|
|
* conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer.
|
|
*
|
|
* - Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer in the documentation and/or other materials
|
|
* provided with the distribution.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
|
|
|
|
#include <linux/export.h>
|
|
#include <net/netlink.h>
|
|
#include <net/net_namespace.h>
|
|
#include <net/sock.h>
|
|
#include <rdma/rdma_netlink.h>
|
|
#include <linux/module.h>
|
|
#include "core_priv.h"
|
|
|
|
#include "core_priv.h"
|
|
|
|
static DEFINE_MUTEX(rdma_nl_mutex);
|
|
static struct sock *nls;
|
|
static struct {
|
|
const struct rdma_nl_cbs *cb_table;
|
|
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
|
|
|
|
int rdma_nl_chk_listeners(unsigned int group)
|
|
{
|
|
return (netlink_has_listeners(nls, group)) ? 0 : -1;
|
|
}
|
|
EXPORT_SYMBOL(rdma_nl_chk_listeners);
|
|
|
|
static bool is_nl_msg_valid(unsigned int type, unsigned int op)
|
|
{
|
|
static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS] = {
|
|
[RDMA_NL_RDMA_CM] = RDMA_NL_RDMA_CM_NUM_OPS,
|
|
[RDMA_NL_IWCM] = RDMA_NL_IWPM_NUM_OPS,
|
|
[RDMA_NL_LS] = RDMA_NL_LS_NUM_OPS,
|
|
[RDMA_NL_NLDEV] = RDMA_NLDEV_NUM_OPS,
|
|
};
|
|
|
|
/*
|
|
* This BUILD_BUG_ON is intended to catch addition of new
|
|
* RDMA netlink protocol without updating the array above.
|
|
*/
|
|
BUILD_BUG_ON(RDMA_NL_NUM_CLIENTS != 6);
|
|
|
|
if (type >= RDMA_NL_NUM_CLIENTS)
|
|
return false;
|
|
|
|
return (op < max_num_ops[type]) ? true : false;
|
|
}
|
|
|
|
static bool is_nl_valid(unsigned int type, unsigned int op)
|
|
{
|
|
const struct rdma_nl_cbs *cb_table;
|
|
|
|
if (!is_nl_msg_valid(type, op))
|
|
return false;
|
|
|
|
cb_table = rdma_nl_types[type].cb_table;
|
|
#ifdef CONFIG_MODULES
|
|
if (!cb_table) {
|
|
mutex_unlock(&rdma_nl_mutex);
|
|
request_module("rdma-netlink-subsys-%d", type);
|
|
mutex_lock(&rdma_nl_mutex);
|
|
cb_table = rdma_nl_types[type].cb_table;
|
|
}
|
|
#endif
|
|
|
|
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
void rdma_nl_register(unsigned int index,
|
|
const struct rdma_nl_cbs cb_table[])
|
|
{
|
|
mutex_lock(&rdma_nl_mutex);
|
|
if (!is_nl_msg_valid(index, 0)) {
|
|
/*
|
|
* All clients are not interesting in success/failure of
|
|
* this call. They want to see the print to error log and
|
|
* continue their initialization. Print warning for them,
|
|
* because it is programmer's error to be here.
|
|
*/
|
|
mutex_unlock(&rdma_nl_mutex);
|
|
WARN(true,
|
|
"The not-valid %u index was supplied to RDMA netlink\n",
|
|
index);
|
|
return;
|
|
}
|
|
|
|
if (rdma_nl_types[index].cb_table) {
|
|
mutex_unlock(&rdma_nl_mutex);
|
|
WARN(true,
|
|
"The %u index is already registered in RDMA netlink\n",
|
|
index);
|
|
return;
|
|
}
|
|
|
|
rdma_nl_types[index].cb_table = cb_table;
|
|
mutex_unlock(&rdma_nl_mutex);
|
|
}
|
|
EXPORT_SYMBOL(rdma_nl_register);
|
|
|
|
void rdma_nl_unregister(unsigned int index)
|
|
{
|
|
mutex_lock(&rdma_nl_mutex);
|
|
rdma_nl_types[index].cb_table = NULL;
|
|
mutex_unlock(&rdma_nl_mutex);
|
|
}
|
|
EXPORT_SYMBOL(rdma_nl_unregister);
|
|
|
|
void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
|
|
int len, int client, int op, int flags)
|
|
{
|
|
*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), len, flags);
|
|
if (!*nlh)
|
|
return NULL;
|
|
return nlmsg_data(*nlh);
|
|
}
|
|
EXPORT_SYMBOL(ibnl_put_msg);
|
|
|
|
int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
int len, void *data, int type)
|
|
{
|
|
if (nla_put(skb, type, len, data)) {
|
|
nlmsg_cancel(skb, nlh);
|
|
return -EMSGSIZE;
|
|
}
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(ibnl_put_attr);
|
|
|
|
static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
int type = nlh->nlmsg_type;
|
|
unsigned int index = RDMA_NL_GET_CLIENT(type);
|
|
unsigned int op = RDMA_NL_GET_OP(type);
|
|
const struct rdma_nl_cbs *cb_table;
|
|
|
|
if (!is_nl_valid(index, op))
|
|
return -EINVAL;
|
|
|
|
cb_table = rdma_nl_types[index].cb_table;
|
|
|
|
if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
|
|
!netlink_capable(skb, CAP_NET_ADMIN))
|
|
return -EPERM;
|
|
|
|
/*
|
|
* LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
|
|
* mistakenly call the .dump() function.
|
|
*/
|
|
if (index == RDMA_NL_LS) {
|
|
if (cb_table[op].doit)
|
|
return cb_table[op].doit(skb, nlh, extack);
|
|
return -EINVAL;
|
|
}
|
|
/* FIXME: Convert IWCM to properly handle doit callbacks */
|
|
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM ||
|
|
index == RDMA_NL_IWCM) {
|
|
struct netlink_dump_control c = {
|
|
.dump = cb_table[op].dump,
|
|
};
|
|
if (c.dump)
|
|
return netlink_dump_start(nls, skb, nlh, &c);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (cb_table[op].doit)
|
|
return cb_table[op].doit(skb, nlh, extack);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This function is similar to netlink_rcv_skb with one exception:
|
|
* It calls to the callback for the netlink messages without NLM_F_REQUEST
|
|
* flag. These messages are intended for RDMA_NL_LS consumer, so it is allowed
|
|
* for that consumer only.
|
|
*/
|
|
static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
|
|
struct nlmsghdr *,
|
|
struct netlink_ext_ack *))
|
|
{
|
|
struct netlink_ext_ack extack = {};
|
|
struct nlmsghdr *nlh;
|
|
int err;
|
|
|
|
while (skb->len >= nlmsg_total_size(0)) {
|
|
int msglen;
|
|
|
|
nlh = nlmsg_hdr(skb);
|
|
err = 0;
|
|
|
|
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
|
|
return 0;
|
|
|
|
/*
|
|
* Generally speaking, the only requests are handled
|
|
* by the kernel, but RDMA_NL_LS is different, because it
|
|
* runs backward netlink scheme. Kernel initiates messages
|
|
* and waits for reply with data to keep pathrecord cache
|
|
* in sync.
|
|
*/
|
|
if (!(nlh->nlmsg_flags & NLM_F_REQUEST) &&
|
|
(RDMA_NL_GET_CLIENT(nlh->nlmsg_type) != RDMA_NL_LS))
|
|
goto ack;
|
|
|
|
/* Skip control messages */
|
|
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
|
|
goto ack;
|
|
|
|
err = cb(skb, nlh, &extack);
|
|
if (err == -EINTR)
|
|
goto skip;
|
|
|
|
ack:
|
|
if (nlh->nlmsg_flags & NLM_F_ACK || err)
|
|
netlink_ack(skb, nlh, err, &extack);
|
|
|
|
skip:
|
|
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
|
|
if (msglen > skb->len)
|
|
msglen = skb->len;
|
|
skb_pull(skb, msglen);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void rdma_nl_rcv(struct sk_buff *skb)
|
|
{
|
|
mutex_lock(&rdma_nl_mutex);
|
|
rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
|
|
mutex_unlock(&rdma_nl_mutex);
|
|
}
|
|
|
|
int rdma_nl_unicast(struct sk_buff *skb, u32 pid)
|
|
{
|
|
int err;
|
|
|
|
err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
|
|
return (err < 0) ? err : 0;
|
|
}
|
|
EXPORT_SYMBOL(rdma_nl_unicast);
|
|
|
|
int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid)
|
|
{
|
|
int err;
|
|
|
|
err = netlink_unicast(nls, skb, pid, 0);
|
|
return (err < 0) ? err : 0;
|
|
}
|
|
EXPORT_SYMBOL(rdma_nl_unicast_wait);
|
|
|
|
int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags)
|
|
{
|
|
return nlmsg_multicast(nls, skb, 0, group, flags);
|
|
}
|
|
EXPORT_SYMBOL(rdma_nl_multicast);
|
|
|
|
int __init rdma_nl_init(void)
|
|
{
|
|
struct netlink_kernel_cfg cfg = {
|
|
.input = rdma_nl_rcv,
|
|
};
|
|
|
|
nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
|
|
if (!nls)
|
|
return -ENOMEM;
|
|
|
|
nls->sk_sndtimeo = 10 * HZ;
|
|
return 0;
|
|
}
|
|
|
|
void rdma_nl_exit(void)
|
|
{
|
|
int idx;
|
|
|
|
for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
|
|
rdma_nl_unregister(idx);
|
|
|
|
netlink_kernel_release(nls);
|
|
}
|
|
|
|
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA);
|