forked from Minki/linux
RDS/IB: protect the list of IB devices
The RDS IB device list wasn't protected by any locking. Traversal in both the get_mr and FMR flushing paths could race with additon and removal. List manipulation is done with RCU primatives and is protected by the write side of a rwsem. The list traversal in the get_mr fast path is protected by a rcu read critical section. The FMR list traversal is more problematic because it can block while traversing the list. We protect this with the read side of the rwsem. Signed-off-by: Zach Brown <zach.brown@oracle.com>
This commit is contained in:
parent
1bde04a63d
commit
ea819867b7
29
net/rds/ib.c
29
net/rds/ib.c
@ -53,6 +53,12 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
|
|||||||
module_param(rds_ib_retry_count, int, 0444);
|
module_param(rds_ib_retry_count, int, 0444);
|
||||||
MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
|
MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* we have a clumsy combination of RCU and a rwsem protecting this list
|
||||||
|
* because it is used both in the get_mr fast path and while blocking in
|
||||||
|
* the FMR flushing path.
|
||||||
|
*/
|
||||||
|
DECLARE_RWSEM(rds_ib_devices_lock);
|
||||||
struct list_head rds_ib_devices;
|
struct list_head rds_ib_devices;
|
||||||
|
|
||||||
/* NOTE: if also grabbing ibdev lock, grab this first */
|
/* NOTE: if also grabbing ibdev lock, grab this first */
|
||||||
@ -171,7 +177,10 @@ void rds_ib_add_one(struct ib_device *device)
|
|||||||
|
|
||||||
INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
|
INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
|
||||||
INIT_LIST_HEAD(&rds_ibdev->conn_list);
|
INIT_LIST_HEAD(&rds_ibdev->conn_list);
|
||||||
list_add_tail(&rds_ibdev->list, &rds_ib_devices);
|
|
||||||
|
down_write(&rds_ib_devices_lock);
|
||||||
|
list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
|
||||||
|
up_write(&rds_ib_devices_lock);
|
||||||
atomic_inc(&rds_ibdev->refcount);
|
atomic_inc(&rds_ibdev->refcount);
|
||||||
|
|
||||||
ib_set_client_data(device, &rds_ib_client, rds_ibdev);
|
ib_set_client_data(device, &rds_ib_client, rds_ibdev);
|
||||||
@ -230,16 +239,20 @@ void rds_ib_remove_one(struct ib_device *device)
|
|||||||
|
|
||||||
rds_ib_dev_shutdown(rds_ibdev);
|
rds_ib_dev_shutdown(rds_ibdev);
|
||||||
|
|
||||||
/*
|
/* stop connection attempts from getting a reference to this device. */
|
||||||
* prevent future connection attempts from getting a reference to this
|
|
||||||
* device and wait for currently racing connection attempts to finish
|
|
||||||
* getting their reference
|
|
||||||
*/
|
|
||||||
ib_set_client_data(device, &rds_ib_client, NULL);
|
ib_set_client_data(device, &rds_ib_client, NULL);
|
||||||
|
|
||||||
|
down_write(&rds_ib_devices_lock);
|
||||||
|
list_del_rcu(&rds_ibdev->list);
|
||||||
|
up_write(&rds_ib_devices_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This synchronize rcu is waiting for readers of both the ib
|
||||||
|
* client data and the devices list to finish before we drop
|
||||||
|
* both of those references.
|
||||||
|
*/
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
rds_ib_dev_put(rds_ibdev);
|
rds_ib_dev_put(rds_ibdev);
|
||||||
|
|
||||||
list_del(&rds_ibdev->list);
|
|
||||||
rds_ib_dev_put(rds_ibdev);
|
rds_ib_dev_put(rds_ibdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
|
|
||||||
#define RDS_IB_RECYCLE_BATCH_COUNT 32
|
#define RDS_IB_RECYCLE_BATCH_COUNT 32
|
||||||
|
|
||||||
|
extern struct rw_semaphore rds_ib_devices_lock;
|
||||||
extern struct list_head rds_ib_devices;
|
extern struct list_head rds_ib_devices;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -94,8 +94,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
|
|||||||
struct rds_ib_device *rds_ibdev;
|
struct rds_ib_device *rds_ibdev;
|
||||||
struct rds_ib_ipaddr *i_ipaddr;
|
struct rds_ib_ipaddr *i_ipaddr;
|
||||||
|
|
||||||
list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
|
rcu_read_lock();
|
||||||
rcu_read_lock();
|
list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
|
||||||
list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
|
list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
|
||||||
if (i_ipaddr->ipaddr == ipaddr) {
|
if (i_ipaddr->ipaddr == ipaddr) {
|
||||||
atomic_inc(&rds_ibdev->refcount);
|
atomic_inc(&rds_ibdev->refcount);
|
||||||
@ -103,8 +103,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
|
|||||||
return rds_ibdev;
|
return rds_ibdev;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -761,12 +761,14 @@ void rds_ib_flush_mrs(void)
|
|||||||
{
|
{
|
||||||
struct rds_ib_device *rds_ibdev;
|
struct rds_ib_device *rds_ibdev;
|
||||||
|
|
||||||
|
down_read(&rds_ib_devices_lock);
|
||||||
list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
|
list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
|
||||||
struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
|
struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
|
||||||
|
|
||||||
if (pool)
|
if (pool)
|
||||||
rds_ib_flush_mr_pool(pool, 0, NULL);
|
rds_ib_flush_mr_pool(pool, 0, NULL);
|
||||||
}
|
}
|
||||||
|
up_read(&rds_ib_devices_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
|
void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
|
||||||
|
Loading…
Reference in New Issue
Block a user