linux/drivers/infiniband/sw/rxe/rxe_cq.c
Andrew Boyer bfc3ae0566 IB/rxe: Disable completion upcalls when a CQ is destroyed
This prevents the stack from accessing userspace objects while they
are being torn down.

One possible sequence of events:
 - Userspace program exits
 - ib_uverbs_cleanup_ucontext() runs, calling ib_destroy_qp(),
   ib_destroy_cq(), etc. and releasing/freeing the UCQ
   - The QP still has tasklets running, so it isn't destroyed yet
   - The CQ is referenced by the QP, so the CQ isn't destroyed yet
   - The UCQ is kfree()'d anyway
 - A send work request completes
 - rxe_send_complete() calls cq->ibcq.comp_handler()
 - ib_uverbs_comp_handler() runs and crashes; the event queue is checked
   for is_closed, but it has no way to check the ib_ucq_object before
   accessing it

The reference counting on the CQ doesn't protect against this since the CQ
hasn't been destroyed yet.
There's no available interface to deregister the UCQ from the CQ, and it
didn't appear that attempting to add reference counting to the UCQ was
going to be a good way to go since this solution is much simpler.

Fixes: 8700e3e7c4 ("Soft RoCE driver")
Signed-off-by: Andrew Boyer <andrew.boyer@dell.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-08-28 19:12:32 -04:00

185 lines
4.5 KiB
C

/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
int cqe, int comp_vector, struct ib_udata *udata)
{
int count;
if (cqe <= 0) {
pr_warn("cqe(%d) <= 0\n", cqe);
goto err1;
}
if (cqe > rxe->attr.max_cqe) {
pr_warn("cqe(%d) > max_cqe(%d)\n",
cqe, rxe->attr.max_cqe);
goto err1;
}
if (cq) {
count = queue_count(cq->queue);
if (cqe < count) {
pr_warn("cqe(%d) < current # elements in queue (%d)",
cqe, count);
goto err1;
}
}
return 0;
err1:
return -EINVAL;
}
static void rxe_send_complete(unsigned long data)
{
struct rxe_cq *cq = (struct rxe_cq *)data;
unsigned long flags;
spin_lock_irqsave(&cq->cq_lock, flags);
if (cq->is_dying) {
spin_unlock_irqrestore(&cq->cq_lock, flags);
return;
}
spin_unlock_irqrestore(&cq->cq_lock, flags);
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_ucontext *context,
struct ib_udata *udata)
{
int err;
cq->queue = rxe_queue_init(rxe, &cqe,
sizeof(struct rxe_cqe));
if (!cq->queue) {
pr_warn("unable to create cq\n");
return -ENOMEM;
}
err = do_mmap_info(rxe, udata, false, context, cq->queue->buf,
cq->queue->buf_size, &cq->queue->ip);
if (err) {
kvfree(cq->queue->buf);
kfree(cq->queue);
return err;
}
if (udata)
cq->is_user = 1;
cq->is_dying = false;
tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);
spin_lock_init(&cq->cq_lock);
cq->ibcq.cqe = cqe;
return 0;
}
int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata)
{
int err;
err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe,
sizeof(struct rxe_cqe),
cq->queue->ip ? cq->queue->ip->context : NULL,
udata, NULL, &cq->cq_lock);
if (!err)
cq->ibcq.cqe = cqe;
return err;
}
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
{
struct ib_event ev;
unsigned long flags;
spin_lock_irqsave(&cq->cq_lock, flags);
if (unlikely(queue_full(cq->queue))) {
spin_unlock_irqrestore(&cq->cq_lock, flags);
if (cq->ibcq.event_handler) {
ev.device = cq->ibcq.device;
ev.element.cq = &cq->ibcq;
ev.event = IB_EVENT_CQ_ERR;
cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
}
return -EBUSY;
}
memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
/* make sure all changes to the CQ are written before we update the
* producer pointer
*/
smp_wmb();
advance_producer(cq->queue);
spin_unlock_irqrestore(&cq->cq_lock, flags);
if ((cq->notify == IB_CQ_NEXT_COMP) ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
tasklet_schedule(&cq->comp_task);
}
return 0;
}
void rxe_cq_disable(struct rxe_cq *cq)
{
unsigned long flags;
spin_lock_irqsave(&cq->cq_lock, flags);
cq->is_dying = true;
spin_unlock_irqrestore(&cq->cq_lock, flags);
}
void rxe_cq_cleanup(struct rxe_pool_entry *arg)
{
struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem);
if (cq->queue)
rxe_queue_cleanup(cq->queue);
}