linux/drivers/infiniband/sw/rxe/rxe_mr.c
Andrew Boyer 13eb1e21d6 IB/rxe: Avoid ICRC errors by copying into the skb first
The current process is to first calculate the CRC and then copy the client
data into the packet. This leaves a window in which the packet contents and
CRC can get out of sync, if the client changes the data after the CRC is
calculated but before the data is copied.

By copying the data into the packet and then calculating the CRC directly
from the packet contents we eliminate the window.

This can be seen with qperf's ud_bi_bw test. This seems like very
strange/reckless client behavior, but whether the client has mangled its
data or not RXE should be able to transfer it reliably.

Fixes: 8700e3e7c4 ("Soft RoCE driver")
Signed-off-by: Andrew Boyer <andrew.boyer@dell.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-08-28 19:12:36 -04:00

649 lines
12 KiB
C

/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rxe.h"
#include "rxe_loc.h"
/*
* lfsr (linear feedback shift register) with period 255
*/
static u8 rxe_get_key(void)
{
static u32 key = 1;
key = key << 1;
key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
key &= 0xff;
return key;
}
int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
{
switch (mem->type) {
case RXE_MEM_TYPE_DMA:
return 0;
case RXE_MEM_TYPE_MR:
case RXE_MEM_TYPE_FMR:
if (iova < mem->iova ||
length > mem->length ||
iova > mem->iova + mem->length - length)
return -EFAULT;
return 0;
default:
return -EFAULT;
}
}
#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
| IB_ACCESS_REMOTE_WRITE \
| IB_ACCESS_REMOTE_ATOMIC)
static void rxe_mem_init(int access, struct rxe_mem *mem)
{
u32 lkey = mem->pelem.index << 8 | rxe_get_key();
u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
if (mem->pelem.pool->type == RXE_TYPE_MR) {
mem->ibmr.lkey = lkey;
mem->ibmr.rkey = rkey;
}
mem->lkey = lkey;
mem->rkey = rkey;
mem->state = RXE_MEM_STATE_INVALID;
mem->type = RXE_MEM_TYPE_NONE;
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
}
void rxe_mem_cleanup(struct rxe_pool_entry *arg)
{
struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem);
int i;
if (mem->umem)
ib_umem_release(mem->umem);
if (mem->map) {
for (i = 0; i < mem->num_map; i++)
kfree(mem->map[i]);
kfree(mem->map);
}
}
static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)
{
int i;
int num_map;
struct rxe_map **map = mem->map;
num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
if (!mem->map)
goto err1;
for (i = 0; i < num_map; i++) {
mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
if (!mem->map[i])
goto err2;
}
BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
mem->map_mask = RXE_BUF_PER_MAP - 1;
mem->num_buf = num_buf;
mem->num_map = num_map;
mem->max_buf = num_map * RXE_BUF_PER_MAP;
return 0;
err2:
for (i--; i >= 0; i--)
kfree(mem->map[i]);
kfree(mem->map);
err1:
return -ENOMEM;
}
int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
int access, struct rxe_mem *mem)
{
rxe_mem_init(access, mem);
mem->pd = pd;
mem->access = access;
mem->state = RXE_MEM_STATE_VALID;
mem->type = RXE_MEM_TYPE_DMA;
return 0;
}
int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
u64 length, u64 iova, int access, struct ib_udata *udata,
struct rxe_mem *mem)
{
int entry;
struct rxe_map **map;
struct rxe_phys_buf *buf = NULL;
struct ib_umem *umem;
struct scatterlist *sg;
int num_buf;
void *vaddr;
int err;
umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0);
if (IS_ERR(umem)) {
pr_warn("err %d from rxe_umem_get\n",
(int)PTR_ERR(umem));
err = -EINVAL;
goto err1;
}
mem->umem = umem;
num_buf = umem->nmap;
rxe_mem_init(access, mem);
err = rxe_mem_alloc(rxe, mem, num_buf);
if (err) {
pr_warn("err %d from rxe_mem_alloc\n", err);
ib_umem_release(umem);
goto err1;
}
mem->page_shift = umem->page_shift;
mem->page_mask = BIT(umem->page_shift) - 1;
num_buf = 0;
map = mem->map;
if (length > 0) {
buf = map[0]->buf;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
vaddr = page_address(sg_page(sg));
if (!vaddr) {
pr_warn("null vaddr\n");
err = -ENOMEM;
goto err1;
}
buf->addr = (uintptr_t)vaddr;
buf->size = BIT(umem->page_shift);
num_buf++;
buf++;
if (num_buf >= RXE_BUF_PER_MAP) {
map++;
buf = map[0]->buf;
num_buf = 0;
}
}
}
mem->pd = pd;
mem->umem = umem;
mem->access = access;
mem->length = length;
mem->iova = iova;
mem->va = start;
mem->offset = ib_umem_offset(umem);
mem->state = RXE_MEM_STATE_VALID;
mem->type = RXE_MEM_TYPE_MR;
return 0;
err1:
return err;
}
int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
int max_pages, struct rxe_mem *mem)
{
int err;
rxe_mem_init(0, mem);
/* In fastreg, we also set the rkey */
mem->ibmr.rkey = mem->ibmr.lkey;
err = rxe_mem_alloc(rxe, mem, max_pages);
if (err)
goto err1;
mem->pd = pd;
mem->max_buf = max_pages;
mem->state = RXE_MEM_STATE_FREE;
mem->type = RXE_MEM_TYPE_MR;
return 0;
err1:
return err;
}
static void lookup_iova(
struct rxe_mem *mem,
u64 iova,
int *m_out,
int *n_out,
size_t *offset_out)
{
size_t offset = iova - mem->iova + mem->offset;
int map_index;
int buf_index;
u64 length;
if (likely(mem->page_shift)) {
*offset_out = offset & mem->page_mask;
offset >>= mem->page_shift;
*n_out = offset & mem->map_mask;
*m_out = offset >> mem->map_shift;
} else {
map_index = 0;
buf_index = 0;
length = mem->map[map_index]->buf[buf_index].size;
while (offset >= length) {
offset -= length;
buf_index++;
if (buf_index == RXE_BUF_PER_MAP) {
map_index++;
buf_index = 0;
}
length = mem->map[map_index]->buf[buf_index].size;
}
*m_out = map_index;
*n_out = buf_index;
*offset_out = offset;
}
}
void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)
{
size_t offset;
int m, n;
void *addr;
if (mem->state != RXE_MEM_STATE_VALID) {
pr_warn("mem not in valid state\n");
addr = NULL;
goto out;
}
if (!mem->map) {
addr = (void *)(uintptr_t)iova;
goto out;
}
if (mem_check_range(mem, iova, length)) {
pr_warn("range violation\n");
addr = NULL;
goto out;
}
lookup_iova(mem, iova, &m, &n, &offset);
if (offset + length > mem->map[m]->buf[n].size) {
pr_warn("crosses page boundary\n");
addr = NULL;
goto out;
}
addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;
out:
return addr;
}
/* copy data from a range (vaddr, vaddr+length-1) to or from
* a mem object starting at iova. Compute incremental value of
* crc32 if crcp is not zero. caller must hold a reference to mem
*/
int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
enum copy_direction dir, u32 *crcp)
{
int err;
int bytes;
u8 *va;
struct rxe_map **map;
struct rxe_phys_buf *buf;
int m;
int i;
size_t offset;
u32 crc = crcp ? (*crcp) : 0;
if (length == 0)
return 0;
if (mem->type == RXE_MEM_TYPE_DMA) {
u8 *src, *dest;
src = (dir == to_mem_obj) ?
addr : ((void *)(uintptr_t)iova);
dest = (dir == to_mem_obj) ?
((void *)(uintptr_t)iova) : addr;
memcpy(dest, src, length);
if (crcp)
*crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
*crcp, dest, length);
return 0;
}
WARN_ON_ONCE(!mem->map);
err = mem_check_range(mem, iova, length);
if (err) {
err = -EFAULT;
goto err1;
}
lookup_iova(mem, iova, &m, &i, &offset);
map = mem->map + m;
buf = map[0]->buf + i;
while (length > 0) {
u8 *src, *dest;
va = (u8 *)(uintptr_t)buf->addr + offset;
src = (dir == to_mem_obj) ? addr : va;
dest = (dir == to_mem_obj) ? va : addr;
bytes = buf->size - offset;
if (bytes > length)
bytes = length;
memcpy(dest, src, bytes);
if (crcp)
crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
crc, dest, bytes);
length -= bytes;
addr += bytes;
offset = 0;
buf++;
i++;
if (i == RXE_BUF_PER_MAP) {
i = 0;
map++;
buf = map[0]->buf;
}
}
if (crcp)
*crcp = crc;
return 0;
err1:
return err;
}
/* copy data in or out of a wqe, i.e. sg list
* under the control of a dma descriptor
*/
int copy_data(
struct rxe_dev *rxe,
struct rxe_pd *pd,
int access,
struct rxe_dma_info *dma,
void *addr,
int length,
enum copy_direction dir,
u32 *crcp)
{
int bytes;
struct rxe_sge *sge = &dma->sge[dma->cur_sge];
int offset = dma->sge_offset;
int resid = dma->resid;
struct rxe_mem *mem = NULL;
u64 iova;
int err;
if (length == 0)
return 0;
if (length > resid) {
err = -EINVAL;
goto err2;
}
if (sge->length && (offset < sge->length)) {
mem = lookup_mem(pd, access, sge->lkey, lookup_local);
if (!mem) {
err = -EINVAL;
goto err1;
}
}
while (length > 0) {
bytes = length;
if (offset >= sge->length) {
if (mem) {
rxe_drop_ref(mem);
mem = NULL;
}
sge++;
dma->cur_sge++;
offset = 0;
if (dma->cur_sge >= dma->num_sge) {
err = -ENOSPC;
goto err2;
}
if (sge->length) {
mem = lookup_mem(pd, access, sge->lkey,
lookup_local);
if (!mem) {
err = -EINVAL;
goto err1;
}
} else {
continue;
}
}
if (bytes > sge->length - offset)
bytes = sge->length - offset;
if (bytes > 0) {
iova = sge->addr + offset;
err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);
if (err)
goto err2;
offset += bytes;
resid -= bytes;
length -= bytes;
addr += bytes;
}
}
dma->sge_offset = offset;
dma->resid = resid;
if (mem)
rxe_drop_ref(mem);
return 0;
err2:
if (mem)
rxe_drop_ref(mem);
err1:
return err;
}
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
{
struct rxe_sge *sge = &dma->sge[dma->cur_sge];
int offset = dma->sge_offset;
int resid = dma->resid;
while (length) {
unsigned int bytes;
if (offset >= sge->length) {
sge++;
dma->cur_sge++;
offset = 0;
if (dma->cur_sge >= dma->num_sge)
return -ENOSPC;
}
bytes = length;
if (bytes > sge->length - offset)
bytes = sge->length - offset;
offset += bytes;
resid -= bytes;
length -= bytes;
}
dma->sge_offset = offset;
dma->resid = resid;
return 0;
}
/* (1) find the mem (mr or mw) corresponding to lkey/rkey
* depending on lookup_type
* (2) verify that the (qp) pd matches the mem pd
* (3) verify that the mem can support the requested access
* (4) verify that mem state is valid
*/
struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
enum lookup_type type)
{
struct rxe_mem *mem;
struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
int index = key >> 8;
if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) {
mem = rxe_pool_get_index(&rxe->mr_pool, index);
if (!mem)
goto err1;
} else {
goto err1;
}
if ((type == lookup_local && mem->lkey != key) ||
(type == lookup_remote && mem->rkey != key))
goto err2;
if (mem->pd != pd)
goto err2;
if (access && !(access & mem->access))
goto err2;
if (mem->state != RXE_MEM_STATE_VALID)
goto err2;
return mem;
err2:
rxe_drop_ref(mem);
err1:
return NULL;
}
int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
u64 *page, int num_pages, u64 iova)
{
int i;
int num_buf;
int err;
struct rxe_map **map;
struct rxe_phys_buf *buf;
int page_size;
if (num_pages > mem->max_buf) {
err = -EINVAL;
goto err1;
}
num_buf = 0;
page_size = 1 << mem->page_shift;
map = mem->map;
buf = map[0]->buf;
for (i = 0; i < num_pages; i++) {
buf->addr = *page++;
buf->size = page_size;
buf++;
num_buf++;
if (num_buf == RXE_BUF_PER_MAP) {
map++;
buf = map[0]->buf;
num_buf = 0;
}
}
mem->iova = iova;
mem->va = iova;
mem->length = num_pages << mem->page_shift;
mem->state = RXE_MEM_STATE_VALID;
return 0;
err1:
return err;
}