Merge branches 'amso1100', 'cma', 'cxgb3', 'ehca', 'ipath', 'ipoib', 'iser', 'misc', 'mlx4' and 'nes' into for-next

This commit is contained in:
Roland Dreier 2009-12-15 23:39:25 -08:00
38 changed files with 1077 additions and 906 deletions

View File

@ -36,11 +36,11 @@ Datagram vs Connected modes
fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
In connected mode, the IB RC (Reliable Connected) transport is used.
Connected mode is to takes advantage of the connected nature of the
IB transport and allows an MTU up to the maximal IP packet size of
64K, which reduces the number of IP packets needed for handling
large UDP datagrams, TCP segments, etc and increases the performance
for large messages.
Connected mode takes advantage of the connected nature of the IB
transport and allows an MTU up to the maximal IP packet size of 64K,
which reduces the number of IP packets needed for handling large UDP
datagrams, TCP segments, etc and increases the performance for large
messages.
In connected mode, the interface's UD QP is still used for multicast
and communication with peers that don't support connected mode. In

View File

@ -36,7 +36,6 @@
#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@ -92,22 +91,12 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
switch (dev->type) {
case ARPHRD_INFINIBAND:
dev_addr->dev_type = RDMA_NODE_IB_CA;
break;
case ARPHRD_ETHER:
dev_addr->dev_type = RDMA_NODE_RNIC;
break;
default:
return -EADDRNOTAVAIL;
}
dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->src_dev = dev;
dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
@ -117,6 +106,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
if (dev_addr->bound_dev_if) {
dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (!dev)
return -ENODEV;
ret = rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
return ret;
}
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(&init_net,
@ -131,6 +129,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
read_lock(&dev_base_lock);
for_each_netdev(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
@ -139,6 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
break;
}
}
read_unlock(&dev_base_lock);
break;
#endif
}
@ -176,48 +176,9 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
static void addr_send_arp(struct sockaddr *dst_in)
{
struct rtable *rt;
struct flowi fl;
memset(&fl, 0, sizeof fl);
switch (dst_in->sa_family) {
case AF_INET:
fl.nl_u.ip4_u.daddr =
((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
if (ip_route_output_key(&init_net, &rt, &fl))
return;
neigh_event_send(rt->u.dst.neighbour, NULL);
ip_rt_put(rt);
break;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
{
struct dst_entry *dst;
fl.nl_u.ip6_u.daddr =
((struct sockaddr_in6 *) dst_in)->sin6_addr;
dst = ip6_route_output(&init_net, NULL, &fl);
if (!dst)
return;
neigh_event_send(dst->neighbour, NULL);
dst_release(dst);
break;
}
#endif
}
}
static int addr4_resolve_remote(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
static int addr4_resolve(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
@ -229,10 +190,22 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
fl.oif = addr->bound_dev_if;
ret = ip_route_output_key(&init_net, &rt, &fl);
if (ret)
goto out;
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = rt->rt_src;
if (rt->idev->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If the device does ARP internally, return 'done' */
if (rt->idev->dev->flags & IFF_NOARP) {
rdma_copy_addr(addr, rt->idev->dev, NULL);
@ -240,21 +213,14 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
}
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
if (!neigh) {
if (!neigh || !(neigh->nud_state & NUD_VALID)) {
neigh_event_send(rt->u.dst.neighbour, NULL);
ret = -ENODATA;
if (neigh)
goto release;
goto put;
}
if (!(neigh->nud_state & NUD_VALID)) {
ret = -ENODATA;
goto release;
}
if (!src_ip) {
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = rt->rt_src;
}
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
release:
neigh_release(neigh);
@ -265,52 +231,77 @@ out:
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
struct flowi fl;
struct neighbour *neigh;
struct dst_entry *dst;
int ret = -ENODATA;
int ret;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
fl.oif = addr->bound_dev_if;
dst = ip6_route_output(&init_net, NULL, &fl);
if (!dst)
return ret;
if ((ret = dst->error))
goto put;
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
} else {
neigh = dst->neighbour;
if (neigh && (neigh->nud_state & NUD_VALID))
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
if (ipv6_addr_any(&fl.fl6_src)) {
ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
&fl.fl6_dst, 0, &fl.fl6_src);
if (ret)
goto put;
src_in->sin6_family = AF_INET6;
ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
}
if (dst->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If the device does ARP internally, return 'done' */
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
goto put;
}
neigh = dst->neighbour;
if (!neigh || !(neigh->nud_state & NUD_VALID)) {
neigh_event_send(dst->neighbour, NULL);
ret = -ENODATA;
goto put;
}
ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
put:
dst_release(dst);
return ret;
}
#else
static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
static int addr_resolve_remote(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
static int addr_resolve(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
{
if (src_in->sa_family == AF_INET) {
return addr4_resolve_remote((struct sockaddr_in *) src_in,
return addr4_resolve((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
return addr6_resolve((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
}
@ -327,8 +318,7 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve_remote(src_in, dst_in,
req->addr);
req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@ -352,82 +342,6 @@ static void process_req(struct work_struct *work)
}
}
static int addr_resolve_local(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
{
struct net_device *dev;
int ret;
switch (dst_in->sa_family) {
case AF_INET:
{
__be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
__be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
dev = ip_dev_find(&init_net, dst_ip);
if (!dev)
return -EADDRNOTAVAIL;
if (ipv4_is_zeronet(src_ip)) {
src_in->sa_family = dst_in->sa_family;
((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
ret = rdma_copy_addr(addr, dev, dev->dev_addr);
} else if (ipv4_is_loopback(src_ip)) {
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
} else {
ret = rdma_translate_ip(src_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
}
dev_put(dev);
break;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
{
struct in6_addr *a;
for_each_netdev(&init_net, dev)
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) dst_in)->sin6_addr,
dev, 1))
break;
if (!dev)
return -EADDRNOTAVAIL;
a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
if (ipv6_addr_any(a)) {
src_in->sa_family = dst_in->sa_family;
((struct sockaddr_in6 *) src_in)->sin6_addr =
((struct sockaddr_in6 *) dst_in)->sin6_addr;
ret = rdma_copy_addr(addr, dev, dev->dev_addr);
} else if (ipv6_addr_loopback(a)) {
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
} else {
ret = rdma_translate_ip(src_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
}
break;
}
#endif
default:
ret = -EADDRNOTAVAIL;
break;
}
return ret;
}
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
@ -443,22 +357,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
if (!req)
return -ENOMEM;
if (src_addr)
memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
if (src_addr) {
if (src_addr->sa_family != dst_addr->sa_family) {
ret = -EINVAL;
goto err;
}
memcpy(src_in, src_addr, ip_addr_size(src_addr));
} else {
src_in->sa_family = dst_addr->sa_family;
}
memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve_local(src_in, dst_in, addr);
if (req->status == -EADDRNOTAVAIL)
req->status = addr_resolve_remote(src_in, dst_in, addr);
req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
@ -467,15 +387,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
addr_send_arp(dst_in);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
kfree(req);
break;
goto err;
}
return ret;
err:
kfree(req);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);

View File

@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
union ib_gid gid;
int ret = -ENODEV;
switch (rdma_node_get_transport(dev_addr->dev_type)) {
case RDMA_TRANSPORT_IB:
ib_addr_get_sgid(dev_addr, &gid);
break;
case RDMA_TRANSPORT_IWARP:
iw_addr_get_sgid(dev_addr, &gid);
break;
default:
return -ENODEV;
}
rdma_addr_get_sgid(dev_addr, &gid);
list_for_each_entry(cma_dev, &dev_list, list) {
ret = ib_find_cached_gid(cma_dev->device, &gid,
&id_priv->id.port_num, NULL);
@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto destroy_id;
if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
} else {
ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
&rt->addr.dev_addr);
if (ret)
goto destroy_id;
}
rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto err;
if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto err;
}
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@ -1474,15 +1472,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
{
struct sockaddr_storage addr_in;
memset(&addr_in, 0, sizeof addr_in);
addr_in.ss_family = af;
return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
}
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
struct rdma_id_private *id_priv;
@ -1490,7 +1479,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == CMA_IDLE) {
ret = cma_bind_any(id, AF_INET);
((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
if (ret)
return ret;
}
@ -1565,8 +1555,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct sockaddr_in6 *sin6;
memset(&path_rec, 0, sizeof path_rec);
ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
@ -1781,7 +1771,11 @@ port_found:
if (ret)
goto out;
ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
id_priv->id.route.addr.dev_addr.dev_type =
(rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
ARPHRD_INFINIBAND : ARPHRD_ETHER;
rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
@ -1839,7 +1833,7 @@ out:
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
struct sockaddr_in *src_in, *dst_in;
struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
@ -1853,14 +1847,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
goto err;
}
ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
if (cma_zero_addr(src)) {
dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
if ((src->sa_family = dst->sa_family) == AF_INET) {
((struct sockaddr_in *) src)->sin_addr.s_addr =
((struct sockaddr_in *) dst)->sin_addr.s_addr;
} else {
ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
&((struct sockaddr_in6 *) dst)->sin6_addr);
}
}
work->id = id_priv;
@ -1878,10 +1877,14 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
if (src_addr && src_addr->sa_family)
return rdma_bind_addr(id, src_addr);
else
return cma_bind_any(id, dst_addr->sa_family);
if (!src_addr || !src_addr->sa_family) {
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
}
}
return rdma_bind_addr(id, src_addr);
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@ -2077,6 +2080,25 @@ static int cma_get_port(struct rdma_id_private *id_priv)
return ret;
}
static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
struct sockaddr *addr)
{
#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE)
struct sockaddr_in6 *sin6;
if (addr->sa_family != AF_INET6)
return 0;
sin6 = (struct sockaddr_in6 *) addr;
if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
!sin6->sin6_scope_id)
return -EINVAL;
dev_addr->bound_dev_if = sin6->sin6_scope_id;
#endif
return 0;
}
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
@ -2089,7 +2111,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
return -EINVAL;
if (!cma_any_addr(addr)) {
ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
if (ret)
goto err1;
if (cma_loopback_addr(addr)) {
ret = cma_bind_loopback(id_priv);
} else if (!cma_zero_addr(addr)) {
ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
if (ret)
goto err1;
@ -2108,7 +2136,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
if (!cma_any_addr(addr)) {
if (id_priv->cma_dev) {
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
mutex_unlock(&lock);
@ -2687,10 +2715,15 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6) &&
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6)) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
*mgid = *(union ib_gid *) (mc_map + 4);
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
@ -2716,7 +2749,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
if (id_priv->id.ps == RDMA_PS_UDP)
rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
ib_addr_get_sgid(dev_addr, &rec.port_gid);
rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
@ -2815,7 +2848,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
dev_addr = &id_priv->id.route.addr.dev_addr;
if ((dev_addr->src_dev == ndev) &&
if ((dev_addr->bound_dev_if == ndev->ifindex) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->name, &id_priv->id);

View File

@ -604,6 +604,12 @@ retry:
return ret ? ret : id;
}
void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)

View File

@ -43,6 +43,7 @@
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@ -562,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
switch (route->num_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
ib_addr_get_dgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].dgid);
ib_addr_get_sgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].sgid);
rdma_addr_get_dgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].dgid);
rdma_addr_get_sgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
break;
case 2:
@ -812,6 +813,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
return ret;
}
static int ucma_set_ib_path(struct ucma_context *ctx,
struct ib_path_rec_data *path_data, size_t optlen)
{
struct ib_sa_path_rec sa_path;
struct rdma_cm_event event;
int ret;
if (optlen % sizeof(*path_data))
return -EINVAL;
for (; optlen; optlen -= sizeof(*path_data), path_data++) {
if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL))
break;
}
if (!optlen)
return -EINVAL;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
if (ret)
return ret;
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
return ucma_event_handler(ctx->cm_id, &event);
}
static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
void *optval, size_t optlen)
{
int ret;
switch (optname) {
case RDMA_OPTION_IB_PATH:
ret = ucma_set_ib_path(ctx, optval, optlen);
break;
default:
ret = -ENOSYS;
}
return ret;
}
static int ucma_set_option_level(struct ucma_context *ctx, int level,
int optname, void *optval, size_t optlen)
{
@ -821,6 +867,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
case RDMA_OPTION_ID:
ret = ucma_set_option_id(ctx, optname, optval, optlen);
break;
case RDMA_OPTION_IB:
ret = ucma_set_option_ib(ctx, optname, optval, optlen);
break;
default:
ret = -ENOSYS;
}

View File

@ -285,7 +285,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(file->ucontext);
ret = PTR_ERR(ucontext);
goto err;
}

View File

@ -365,18 +365,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -EINVAL;
goto out;
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
if (num_wrs <= 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -ENOMEM;
err = -ENOMEM;
goto out;
}
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
*bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@ -428,10 +429,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wr->opcode);
err = -EINVAL;
}
if (err) {
*bad_wr = wr;
if (err)
break;
}
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp->wr_id = wr->wr_id;
sqp->opcode = wr2opcode(t3_wr_opcode);
@ -454,6 +453,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
*bad_wr = wr;
return err;
}
@ -471,18 +474,19 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -EINVAL;
goto out;
}
num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
qhp->wq.rq_size_log2) - 1;
if (!wr) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -ENOMEM;
goto out;
}
while (wr) {
if (wr->num_sge > T3_MAX_SGE) {
err = -EINVAL;
*bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@ -494,10 +498,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
err = build_zero_stag_recv(qhp, wqe, wr);
else
err = -ENOMEM;
if (err) {
*bad_wr = wr;
if (err)
break;
}
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
@ -511,6 +515,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
*bad_wr = wr;
return err;
}

View File

@ -375,6 +375,7 @@ extern rwlock_t ehca_qp_idr_lock;
extern rwlock_t ehca_cq_idr_lock;
extern struct idr ehca_qp_idr;
extern struct idr ehca_cq_idr;
extern spinlock_t shca_list_lock;
extern int ehca_static_rate;
extern int ehca_port_act_time;

View File

@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
unsigned long flags;
u64 h_ret;
spin_lock_irqsave(&eq->spinlock, flags);
ibmebus_free_irq(eq->ist, (void *)shca);
h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
spin_lock_irqsave(&shca_list_lock, flags);
eq->is_initialized = 0;
spin_unlock_irqrestore(&shca_list_lock, flags);
spin_unlock_irqrestore(&eq->spinlock, flags);
tasklet_kill(&eq->interrupt_task);
h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't free EQ resources.");

View File

@ -123,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr);
DEFINE_IDR(ehca_cq_idr);
static LIST_HEAD(shca_list); /* list of all registered ehcas */
static DEFINE_SPINLOCK(shca_list_lock);
DEFINE_SPINLOCK(shca_list_lock);
static struct timer_list poll_eqs_timer;

View File

@ -400,7 +400,6 @@ static inline void map_ib_wc_status(u32 cqe_status,
static inline int post_one_send(struct ehca_qp *my_qp,
struct ib_send_wr *cur_send_wr,
struct ib_send_wr **bad_send_wr,
int hidden)
{
struct ehca_wqe *wqe_p;
@ -412,8 +411,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
if (bad_send_wr)
*bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -ENOMEM;
@ -433,8 +430,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
*/
if (unlikely(ret)) {
my_qp->ipz_squeue.current_q_offset = start_offset;
if (bad_send_wr)
*bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Could not write WQE "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -EINVAL;
@ -448,7 +443,6 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr **bad_send_wr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
struct ib_send_wr *cur_send_wr;
int wqe_cnt = 0;
int ret = 0;
unsigned long flags;
@ -457,7 +451,8 @@ int ehca_post_send(struct ib_qp *qp,
if (unlikely(my_qp->state < IB_QPS_RTS)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
return -EINVAL;
ret = -EINVAL;
goto out;
}
/* LOCK the QUEUE */
@ -476,24 +471,21 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr circ_wr;
memset(&circ_wr, 0, sizeof(circ_wr));
circ_wr.opcode = IB_WR_RDMA_READ;
post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
wqe_cnt++;
ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
my_qp->message_count = my_qp->packet_count = 0;
}
/* loop processes list of send reqs */
for (cur_send_wr = send_wr; cur_send_wr != NULL;
cur_send_wr = cur_send_wr->next) {
ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
while (send_wr) {
ret = post_one_send(my_qp, send_wr, 0);
if (unlikely(ret)) {
/* if one or more WQEs were successful, don't fail */
if (wqe_cnt)
ret = 0;
goto post_send_exit0;
}
wqe_cnt++;
} /* eof for cur_send_wr */
send_wr = send_wr->next;
}
post_send_exit0:
iosync(); /* serialize GAL register access */
@ -503,6 +495,10 @@ post_send_exit0:
my_qp, qp->qp_num, wqe_cnt, ret);
my_qp->message_count += wqe_cnt;
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
out:
if (ret)
*bad_send_wr = send_wr;
return ret;
}
@ -511,7 +507,6 @@ static int internal_post_recv(struct ehca_qp *my_qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
@ -522,27 +517,23 @@ static int internal_post_recv(struct ehca_qp *my_qp,
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
my_qp, my_qp->real_qp_num, my_qp->ext_type);
return -ENODEV;
ret = -ENODEV;
goto out;
}
/* LOCK the QUEUE */
spin_lock_irqsave(&my_qp->spinlock_r, flags);
/* loop processes list of send reqs */
for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
cur_recv_wr = cur_recv_wr->next) {
/* loop processes list of recv reqs */
while (recv_wr) {
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
if (bad_recv_wr)
*bad_recv_wr = cur_recv_wr;
if (wqe_cnt == 0) {
ret = -ENOMEM;
ehca_err(dev, "Too many posted WQEs "
"qp_num=%x", my_qp->real_qp_num);
}
ret = -ENOMEM;
ehca_err(dev, "Too many posted WQEs "
"qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
/*
@ -552,7 +543,7 @@ static int internal_post_recv(struct ehca_qp *my_qp,
rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
/* write a RECV WQE into the QUEUE */
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
rq_map_idx);
/*
* if something failed,
@ -560,22 +551,20 @@ static int internal_post_recv(struct ehca_qp *my_qp,
*/
if (unlikely(ret)) {
my_qp->ipz_rqueue.current_q_offset = start_offset;
*bad_recv_wr = cur_recv_wr;
if (wqe_cnt == 0) {
ret = -EINVAL;
ehca_err(dev, "Could not write WQE "
"qp_num=%x", my_qp->real_qp_num);
}
ret = -EINVAL;
ehca_err(dev, "Could not write WQE "
"qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
qmap_entry->reported = 0;
qmap_entry->cqe_req = 1;
wqe_cnt++;
} /* eof for cur_recv_wr */
recv_wr = recv_wr->next;
} /* eof for recv_wr */
post_recv_exit0:
iosync(); /* serialize GAL register access */
@ -584,6 +573,11 @@ post_recv_exit0:
ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
my_qp, my_qp->real_qp_num, wqe_cnt, ret);
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
out:
if (ret)
*bad_recv_wr = recv_wr;
return ret;
}
@ -597,6 +591,7 @@ int ehca_post_recv(struct ib_qp *qp,
if (unlikely(my_qp->state == IB_QPS_RESET)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
*bad_recv_wr = recv_wr;
return -EINVAL;
}

View File

@ -39,6 +39,7 @@
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include <linux/bitmap.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@ -1697,7 +1698,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail)
{
unsigned long flags;
unsigned end, cnt = 0, next;
unsigned end, cnt = 0;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
@ -1748,12 +1749,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
if (dd->ipath_pioupd_thresh) {
end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
next = find_first_bit(dd->ipath_pioavailkernel, end);
while (next < end) {
cnt++;
next = find_next_bit(dd->ipath_pioavailkernel, end,
next + 1);
}
cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);

View File

@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (dev->dev->caps.max_gso_sz)
if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;

View File

@ -54,7 +54,8 @@ enum {
/*
* Largest possible UD header: send with GRH and immediate data.
*/
MLX4_IB_UD_HEADER_SIZE = 72
MLX4_IB_UD_HEADER_SIZE = 72,
MLX4_IB_LSO_HEADER_SPARE = 128,
};
struct mlx4_ib_sqp {
@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
};
enum {
MLX4_IB_MIN_SQ_STRIDE = 6
MLX4_IB_MIN_SQ_STRIDE = 6,
MLX4_IB_CACHE_LINE_SIZE = 64,
};
static const __be32 mlx4_ib_opcode[] = {
@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
case IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
((flags & MLX4_IB_QP_LSO) ? 64 : 0);
((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
case IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
@ -897,7 +899,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
context->flags |= cpu_to_be32(1 << 8); /* DE? */
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@ -1467,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
__be32 *lso_hdr_sz)
__be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
/*
* This is a temporary limitation and will be removed in
* a forthcoming FW release:
*/
if (unlikely(halign > 64))
return -EINVAL;
if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
*blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@ -1522,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
__be32 blh;
int i;
spin_lock_irqsave(&qp->sq.lock, flags);
@ -1530,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
blh = 0;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
@ -1616,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@ -1687,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);

View File

@ -4,14 +4,13 @@ config INFINIBAND_NES
select LIBCRC32C
select INET_LRO
---help---
This is a low-level driver for NetEffect RDMA enabled
Network Interface Cards (RNIC).
This is the RDMA Network Interface Card (RNIC) driver for
NetEffect Ethernet Cluster Server Adapters.
config INFINIBAND_NES_DEBUG
bool "Verbose debugging output"
depends on INFINIBAND_NES
default n
---help---
This option causes the NetEffect RNIC driver to produce debug
messages. Select this if you are developing the driver
or trying to diagnose a problem.
This option enables debug messages from the NetEffect RNIC
driver. Select this if you are diagnosing a problem.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@ -521,7 +521,8 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
spin_lock_init(&nesdev->indexed_regs_lock);
/* Remap the PCI registers in adapter BAR0 to kernel VA space */
mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
pci_resource_len(pcidev, BAR_0));
if (mmio_regs == NULL) {
printk(KERN_ERR PFX "Unable to remap BAR0\n");
ret = -EIO;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -52,6 +52,7 @@
#include <linux/random.h>
#include <linux/list.h>
#include <linux/threads.h>
#include <linux/highmem.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@ -251,6 +252,33 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
mpa_frame = (struct ietf_mpa_frame *)buffer;
cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
/* make sure mpa private data len is less than 512 bytes */
if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) {
nes_debug(NES_DBG_CM, "The received Length of Private"
" Data field exceeds 512 octets\n");
return -EINVAL;
}
/*
* make sure MPA receiver interoperate with the
* received MPA version and MPA key information
*
*/
if (mpa_frame->rev != mpa_version) {
nes_debug(NES_DBG_CM, "The received mpa version"
" can not be interoperated\n");
return -EINVAL;
}
if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
return -EINVAL;
}
} else {
if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
return -EINVAL;
}
}
if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
@ -486,6 +514,8 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node)
send_reset(cm_node, NULL);
break;
default:
add_ref_cm_node(cm_node);
send_reset(cm_node, NULL);
create_event(cm_node, NES_CM_EVENT_ABORTED);
}
}
@ -949,6 +979,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
reset_entry);
{
struct nes_cm_node *loopback = cm_node->loopbackpartner;
enum nes_cm_node_state old_state;
if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
rem_ref_cm_node(cm_node->cm_core, cm_node);
} else {
@ -960,11 +991,12 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
NES_CM_STATE_CLOSED;
WARN_ON(1);
} else {
cm_node->state =
NES_CM_STATE_CLOSED;
rem_ref_cm_node(
cm_node->cm_core,
cm_node);
old_state = cm_node->state;
cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
if (old_state != NES_CM_STATE_MPAREQ_RCVD)
rem_ref_cm_node(
cm_node->cm_core,
cm_node);
}
} else {
struct nes_cm_event event;
@ -980,20 +1012,9 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
loopback->loc_port;
event.cm_info.cm_id = loopback->cm_id;
cm_event_connect_error(&event);
cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
loopback->state = NES_CM_STATE_CLOSED;
event.cm_node = cm_node;
event.cm_info.rem_addr =
cm_node->rem_addr;
event.cm_info.loc_addr =
cm_node->loc_addr;
event.cm_info.rem_port =
cm_node->rem_port;
event.cm_info.loc_port =
cm_node->loc_port;
event.cm_info.cm_id = cm_node->cm_id;
cm_event_reset(&event);
rem_ref_cm_node(cm_node->cm_core,
cm_node);
@ -1077,12 +1098,13 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
/**
* nes_addr_resolve_neigh
*/
static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
{
struct rtable *rt;
struct flowi fl;
struct neighbour *neigh;
int rc = -1;
int rc = arpindex;
struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = htonl(dst_ip);
@ -1098,6 +1120,21 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
" is %pM, Gateway is 0x%08X \n", dst_ip,
neigh->ha, ntohl(rt->rt_gateway));
if (arpindex >= 0) {
if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
neigh->ha, ETH_ALEN)){
/* Mac address same as in nes_arp_table */
neigh_release(neigh);
ip_rt_put(rt);
return rc;
}
nes_manage_arp_cache(nesvnic->netdev,
nesadapter->arp_table[arpindex].mac_addr,
dst_ip, NES_ARP_DELETE);
}
nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
dst_ip, NES_ARP_ADD);
rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
@ -1113,7 +1150,6 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
return rc;
}
/**
* make_cm_node - create a new instance of a cm node
*/
@ -1123,6 +1159,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
{
struct nes_cm_node *cm_node;
struct timespec ts;
int oldarpindex = 0;
int arpindex = 0;
struct nes_device *nesdev;
struct nes_adapter *nesadapter;
@ -1176,17 +1213,18 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
nesadapter = nesdev->nesadapter;
cm_node->loopbackpartner = NULL;
/* get the mac addr for the remote node */
if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
else
arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
else {
oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
}
if (arpindex < 0) {
arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
if (arpindex < 0) {
kfree(cm_node);
return NULL;
}
kfree(cm_node);
return NULL;
}
/* copy the mac addr to node context */
@ -1333,13 +1371,20 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
case NES_CM_STATE_SYN_RCVD:
case NES_CM_STATE_SYN_SENT:
case NES_CM_STATE_ESTABLISHED:
case NES_CM_STATE_MPAREQ_SENT:
case NES_CM_STATE_MPAREJ_RCVD:
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_LAST_ACK;
send_fin(cm_node, NULL);
break;
case NES_CM_STATE_MPAREQ_SENT:
create_event(cm_node, NES_CM_EVENT_ABORTED);
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
add_ref_cm_node(cm_node);
send_reset(cm_node, NULL);
break;
case NES_CM_STATE_FIN_WAIT1:
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
@ -1590,6 +1635,7 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
break;
case NES_CM_STATE_CLOSED:
cleanup_retrans_entry(cm_node);
add_ref_cm_node(cm_node);
send_reset(cm_node, skb);
break;
case NES_CM_STATE_TSA:
@ -1641,9 +1687,15 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
passive_open_err(cm_node, skb, 1);
break;
case NES_CM_STATE_LISTENING:
cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
send_reset(cm_node, skb);
break;
case NES_CM_STATE_CLOSED:
cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
cleanup_retrans_entry(cm_node);
add_ref_cm_node(cm_node);
send_reset(cm_node, skb);
break;
case NES_CM_STATE_ESTABLISHED:
@ -1712,8 +1764,13 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
dev_kfree_skb_any(skb);
break;
case NES_CM_STATE_LISTENING:
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
send_reset(cm_node, skb);
break;
case NES_CM_STATE_CLOSED:
cleanup_retrans_entry(cm_node);
add_ref_cm_node(cm_node);
send_reset(cm_node, skb);
break;
case NES_CM_STATE_LAST_ACK:
@ -1974,7 +2031,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
if (!cm_node)
return NULL;
mpa_frame = &cm_node->mpa_frame;
strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ);
memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
mpa_frame->flags = IETF_MPA_FLAGS_CRC;
mpa_frame->rev = IETF_MPA_VERSION;
mpa_frame->priv_data_len = htons(private_data_len);
@ -2102,30 +2159,39 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
cm_node->state = NES_CM_STATE_CLOSED;
rem_ref_cm_node(cm_core, cm_node);
} else {
ret = send_mpa_reject(cm_node);
if (ret) {
cm_node->state = NES_CM_STATE_CLOSED;
err = send_reset(cm_node, NULL);
if (err)
WARN_ON(1);
} else
cm_id->add_ref(cm_id);
if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
rem_ref_cm_node(cm_core, cm_node);
} else {
ret = send_mpa_reject(cm_node);
if (ret) {
cm_node->state = NES_CM_STATE_CLOSED;
err = send_reset(cm_node, NULL);
if (err)
WARN_ON(1);
} else
cm_id->add_ref(cm_id);
}
}
} else {
cm_node->cm_id = NULL;
event.cm_node = loopback;
event.cm_info.rem_addr = loopback->rem_addr;
event.cm_info.loc_addr = loopback->loc_addr;
event.cm_info.rem_port = loopback->rem_port;
event.cm_info.loc_port = loopback->loc_port;
event.cm_info.cm_id = loopback->cm_id;
cm_event_mpa_reject(&event);
rem_ref_cm_node(cm_core, cm_node);
loopback->state = NES_CM_STATE_CLOSING;
if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
rem_ref_cm_node(cm_core, cm_node);
rem_ref_cm_node(cm_core, loopback);
} else {
event.cm_node = loopback;
event.cm_info.rem_addr = loopback->rem_addr;
event.cm_info.loc_addr = loopback->loc_addr;
event.cm_info.rem_port = loopback->rem_port;
event.cm_info.loc_port = loopback->loc_port;
event.cm_info.cm_id = loopback->cm_id;
cm_event_mpa_reject(&event);
rem_ref_cm_node(cm_core, cm_node);
loopback->state = NES_CM_STATE_CLOSING;
cm_id = loopback->cm_id;
rem_ref_cm_node(cm_core, loopback);
cm_id->rem_ref(cm_id);
cm_id = loopback->cm_id;
rem_ref_cm_node(cm_core, loopback);
cm_id->rem_ref(cm_id);
}
}
return ret;
@ -2164,11 +2230,15 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
case NES_CM_STATE_CLOSING:
ret = -1;
break;
case NES_CM_STATE_MPAREJ_RCVD:
case NES_CM_STATE_LISTENING:
cleanup_retrans_entry(cm_node);
send_reset(cm_node, NULL);
break;
case NES_CM_STATE_MPAREJ_RCVD:
case NES_CM_STATE_UNKNOWN:
case NES_CM_STATE_INITED:
case NES_CM_STATE_CLOSED:
case NES_CM_STATE_LISTENER_DESTROYED:
ret = rem_ref_cm_node(cm_core, cm_node);
break;
case NES_CM_STATE_TSA:
@ -2687,8 +2757,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct nes_pd *nespd;
u64 tagged_offset;
ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
if (!ibqp)
return -EINVAL;
@ -2704,6 +2772,13 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
"%s\n", cm_node, nesvnic, nesvnic->netdev,
nesvnic->netdev->name);
if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
if (cm_node->loopbackpartner)
rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
rem_ref_cm_node(cm_node->cm_core, cm_node);
return -EINVAL;
}
/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
cm_node->nesqp = nesqp;
@ -2786,6 +2861,10 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cpu_to_le32(conn_param->private_data_len +
sizeof(struct ietf_mpa_frame));
wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
if (nesqp->sq_kmapped) {
nesqp->sq_kmapped = 0;
kunmap(nesqp->page);
}
nesqp->nesqp_context->ird_ord_sizes |=
cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@ -2929,7 +3008,7 @@ int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
if (cm_node->mpa_frame_size > MAX_CM_BUFFER)
return -EINVAL;
strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP);
memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
if (loopback) {
memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
loopback->mpa_frame.priv_data_len = pdata_len;
@ -2974,6 +3053,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!nesdev)
return -EINVAL;
if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port))
return -EINVAL;
nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
"0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
ntohl(nesvnic->local_ipaddr),
@ -3251,6 +3333,11 @@ static void cm_event_connected(struct nes_cm_event *event)
wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
if (nesqp->sq_kmapped) {
nesqp->sq_kmapped = 0;
kunmap(nesqp->page);
}
/* use the reserved spot on the WQ for the extra first WQE */
nesqp->nesqp_context->ird_ord_sizes &=
cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@ -3346,7 +3433,7 @@ static void cm_event_connect_error(struct nes_cm_event *event)
nesqp->cm_id = NULL;
cm_id->provider_data = NULL;
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
cm_event.status = IW_CM_EVENT_STATUS_REJECTED;
cm_event.status = -ECONNRESET;
cm_event.provider_data = cm_id->provider_data;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
@ -3390,6 +3477,8 @@ static void cm_event_reset(struct nes_cm_event *event)
nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
nesqp = cm_id->provider_data;
if (!nesqp)
return;
nesqp->cm_id = NULL;
/* cm_id->provider_data = NULL; */
@ -3401,8 +3490,8 @@ static void cm_event_reset(struct nes_cm_event *event)
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
ret = cm_id->event_handler(cm_id, &cm_event);
cm_id->add_ref(cm_id);
ret = cm_id->event_handler(cm_id, &cm_event);
atomic_inc(&cm_closes);
cm_event.event = IW_CM_EVENT_CLOSE;
cm_event.status = IW_CM_EVENT_STATUS_OK;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -47,6 +47,8 @@
#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
#define IETF_MPA_KEY_SIZE 16
#define IETF_MPA_VERSION 1
#define IETF_MAX_PRIV_DATA_LEN 512
#define IETF_MPA_FRAME_SIZE 20
enum ietf_mpa_flags {
IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
@ -169,7 +171,7 @@ struct nes_timer_entry {
#define NES_CM_DEF_SEQ2 0x18ed5740
#define NES_CM_DEF_LOCAL_ID2 0xb807
#define MAX_CM_BUFFER 512
#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
typedef u32 nes_addr_t;
@ -198,6 +200,7 @@ enum nes_cm_node_state {
NES_CM_STATE_TIME_WAIT,
NES_CM_STATE_LAST_ACK,
NES_CM_STATE_CLOSING,
NES_CM_STATE_LISTENER_DESTROYED,
NES_CM_STATE_CLOSED
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -424,8 +424,9 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->base_pd = 1;
nesadapter->device_cap_flags =
IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_MGT_EXTENSIONS;
nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@ -436,11 +437,12 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
/* mark the usual suspect QPs and CQs as in use */
/* mark the usual suspect QPs, MR and CQs as in use */
for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
set_bit(u32temp, nesadapter->allocated_qps);
set_bit(u32temp, nesadapter->allocated_cqs);
}
set_bit(0, nesadapter->allocated_mrs);
for (u32temp = 0; u32temp < 20; u32temp++)
set_bit(u32temp, nesadapter->allocated_pds);
@ -481,7 +483,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
nesadapter->max_sge = 4;
nesadapter->max_cqe = 32767;
nesadapter->max_cqe = 32766;
if (nes_read_eeprom_values(nesdev, nesadapter)) {
printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
@ -1355,6 +1357,8 @@ int nes_init_phy(struct nes_device *nesdev)
}
if ((phy_type == NES_PHY_TYPE_ARGUS) ||
(phy_type == NES_PHY_TYPE_SFP_D)) {
u32 first_time = 1;
/* Check firmware heartbeat */
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
@ -1362,8 +1366,13 @@ int nes_init_phy(struct nes_device *nesdev)
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
if (temp_phy_data != temp_phy_data2)
return 0;
if (temp_phy_data != temp_phy_data2) {
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
if ((temp_phy_data & 0xff) > 0x20)
return 0;
printk(PFX "Reinitializing PHY\n");
}
/* no heartbeat, configure the PHY */
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
@ -1399,7 +1408,7 @@ int nes_init_phy(struct nes_device *nesdev)
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
do {
if (counter++ > 150) {
nes_debug(NES_DBG_PHY, "No PHY heartbeat\n");
printk(PFX "No PHY heartbeat\n");
break;
}
mdelay(1);
@ -1413,11 +1422,20 @@ int nes_init_phy(struct nes_device *nesdev)
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
if (counter++ > 300) {
nes_debug(NES_DBG_PHY, "PHY did not track\n");
break;
if (((temp_phy_data & 0xff) == 0x0) && first_time) {
first_time = 0;
counter = 0;
/* reset AMCC PHY and try again */
nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
continue;
} else {
printk(PFX "PHY did not track\n");
break;
}
}
mdelay(10);
} while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
} while ((temp_phy_data & 0xff) < 0x30);
/* setup signal integrity */
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -546,11 +546,23 @@ enum nes_iwarp_sq_fmr_wqe_word_idx {
NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
};
enum nes_iwarp_sq_fmr_opcodes {
NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6),
NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7),
NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20),
};
#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF;
enum nes_iwarp_sq_locinv_wqe_word_idx {
NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
};
enum nes_iwarp_rq_wqe_word_idx {
NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
@ -1153,6 +1165,19 @@ struct nes_pbl {
/* TODO: need to add list for two level tables */
};
#define NES_4K_PBL_CHUNK_SIZE 4096
struct nes_fast_mr_wqe_pbl {
u64 *kva;
dma_addr_t paddr;
};
struct nes_ib_fast_reg_page_list {
struct ib_fast_reg_page_list ibfrpl;
struct nes_fast_mr_wqe_pbl nes_wqe_pbl;
u64 pbl;
};
struct nes_listener {
struct work_struct work;
struct workqueue_struct *wq;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
@ -86,6 +86,7 @@ enum iwnes_memreg_type {
IWNES_MEMREG_TYPE_CQ = 0x0002,
IWNES_MEMREG_TYPE_MW = 0x0003,
IWNES_MEMREG_TYPE_FMR = 0x0004,
IWNES_MEMREG_TYPE_FMEM = 0x0005,
};
struct nes_mem_reg_req {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@ -135,19 +135,15 @@ struct nes_qp {
struct ib_qp ibqp;
void *allocated_buffer;
struct iw_cm_id *cm_id;
struct workqueue_struct *wq;
struct nes_cq *nesscq;
struct nes_cq *nesrcq;
struct nes_pd *nespd;
void *cm_node; /* handle of the node this QP is associated with */
struct ietf_mpa_frame *ietf_frame;
dma_addr_t ietf_frame_pbase;
wait_queue_head_t state_waitq;
struct ib_mr *lsmm_mr;
unsigned long socket;
struct nes_hw_qp hwqp;
struct work_struct work;
struct work_struct ae_work;
enum ib_qp_state ibqp_state;
u32 iwarp_state;
u32 hte_index;
@ -165,19 +161,20 @@ struct nes_qp {
struct page *page;
struct timer_list terminate_timer;
enum ib_event_type terminate_eventtype;
wait_queue_head_t kick_waitq;
u16 in_disconnect;
u16 active_conn:1;
u16 skip_lsmm:1;
u16 user_mode:1;
u16 hte_added:1;
u16 flush_issued:1;
u16 destroyed:1;
u16 sig_all:1;
u16 rsvd:9;
u16 private_data_len;
u16 term_sq_flush_code;
u16 term_rq_flush_code;
u8 active_conn;
u8 skip_lsmm;
u8 user_mode;
u8 hte_added;
u8 hw_iwarp_state;
u8 flush_issued;
u8 hw_tcp_state;
u8 term_flags;
u8 destroyed;
u8 sq_kmapped;
};
#endif /* NES_VERBS_H */

View File

@ -884,6 +884,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
neigh->neighbour = neighbour;
neigh->dev = dev;
memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
*to_ipoib_neigh(neighbour) = neigh;
skb_queue_head_init(&neigh->queue);
ipoib_cm_set(neigh, NULL);

View File

@ -209,6 +209,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
mem_copy->copy_buf = NULL;
}
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
/**
* iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
* and returns the length of resulting physical address array (may be less than
@ -221,62 +223,52 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
* where --few fragments of the same page-- are present in the SG as
* consecutive elements. Also, it handles one entry SG.
*/
static int iser_sg_to_page_vec(struct iser_data_buf *data,
struct iser_page_vec *page_vec,
struct ib_device *ibdev)
{
struct scatterlist *sgl = (struct scatterlist *)data->buf;
struct scatterlist *sg;
u64 first_addr, last_addr, page;
int end_aligned;
unsigned int cur_page = 0;
struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
u64 start_addr, end_addr, page, chunk_start = 0;
unsigned long total_sz = 0;
int i;
unsigned int dma_len;
int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
/* compute the offset of first element */
page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
new_chunk = 1;
cur_page = 0;
for_each_sg(sgl, sg, data->dma_nents, i) {
unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
start_addr = ib_sg_dma_address(ibdev, sg);
if (new_chunk)
chunk_start = start_addr;
dma_len = ib_sg_dma_len(ibdev, sg);
end_addr = start_addr + dma_len;
total_sz += dma_len;
first_addr = ib_sg_dma_address(ibdev, sg);
last_addr = first_addr + dma_len;
end_aligned = !(last_addr & ~MASK_4K);
/* continue to collect page fragments till aligned or SG ends */
while (!end_aligned && (i + 1 < data->dma_nents)) {
sg = sg_next(sg);
i++;
dma_len = ib_sg_dma_len(ibdev, sg);
total_sz += dma_len;
last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
end_aligned = !(last_addr & ~MASK_4K);
/* collect page fragments until aligned or end of SG list */
if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
new_chunk = 0;
continue;
}
new_chunk = 1;
/* handle the 1st page in the 1st DMA element */
if (cur_page == 0) {
page = first_addr & MASK_4K;
page_vec->pages[cur_page] = page;
cur_page++;
/* address of the first page in the contiguous chunk;
masking relevant for the very first SG entry,
which might be unaligned */
page = chunk_start & MASK_4K;
do {
page_vec->pages[cur_page++] = page;
page += SIZE_4K;
} else
page = first_addr;
for (; page < last_addr; page += SIZE_4K) {
page_vec->pages[cur_page] = page;
cur_page++;
}
} while (page < end_addr);
}
page_vec->data_size = total_sz;
iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
return cur_page;
}
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
/**
* iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@ -284,42 +276,40 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
* the number of entries which are aligned correctly. Supports the case where
* consecutive SG elements are actually fragments of the same physcial page.
*/
static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
struct ib_device *ibdev)
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
struct ib_device *ibdev)
{
struct scatterlist *sgl, *sg;
u64 end_addr, next_addr;
int i, cnt;
unsigned int ret_len = 0;
struct scatterlist *sgl, *sg, *next_sg = NULL;
u64 start_addr, end_addr;
int i, ret_len, start_check = 0;
if (data->dma_nents == 1)
return 1;
sgl = (struct scatterlist *)data->buf;
start_addr = ib_sg_dma_address(ibdev, sgl);
cnt = 0;
for_each_sg(sgl, sg, data->dma_nents, i) {
/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
"offset: %ld sz: %ld\n", i,
(unsigned long)sg_phys(sg),
(unsigned long)sg->offset,
(unsigned long)sg->length); */
end_addr = ib_sg_dma_address(ibdev, sg) +
ib_sg_dma_len(ibdev, sg);
/* iser_dbg("Checking sg iobuf end address "
"0x%08lX\n", end_addr); */
if (i + 1 < data->dma_nents) {
next_addr = ib_sg_dma_address(ibdev, sg_next(sg));
/* are i, i+1 fragments of the same page? */
if (end_addr == next_addr) {
cnt++;
continue;
} else if (!IS_4K_ALIGNED(end_addr)) {
ret_len = cnt + 1;
break;
}
}
cnt++;
if (start_check && !IS_4K_ALIGNED(start_addr))
break;
next_sg = sg_next(sg);
if (!next_sg)
break;
end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
start_addr = ib_sg_dma_address(ibdev, next_sg);
if (end_addr == start_addr) {
start_check = 0;
continue;
} else
start_check = 1;
if (!IS_4K_ALIGNED(end_addr))
break;
}
if (i == data->dma_nents)
ret_len = cnt; /* loop ended */
ret_len = (next_sg) ? i : i+1;
iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
ret_len, data->dma_nents, data);
return ret_len;

View File

@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
[ 9] = "Q_Key violation counter",
[10] = "VMM",
[12] = "DPDP",
[15] = "Big LSO headers",
[16] = "MW support",
[17] = "APM support",
[18] = "Atomic ops support",
@ -235,7 +236,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
dev_cap->max_mpts = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
dev_cap->reserved_eqs = 1 << (field & 0xf);
dev_cap->reserved_eqs = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
dev_cap->max_eqs = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);

View File

@ -61,6 +61,7 @@ enum {
MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8,
MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9,
MLX4_DEV_CAP_FLAG_DPDP = 1 << 12,
MLX4_DEV_CAP_FLAG_BLH = 1 << 15,
MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16,
MLX4_DEV_CAP_FLAG_APM = 1 << 17,
MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18,

View File

@ -36,6 +36,7 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/socket.h>
#include <rdma/ib_verbs.h>
@ -60,8 +61,8 @@ struct rdma_dev_addr {
unsigned char src_dev_addr[MAX_ADDR_LEN];
unsigned char dst_dev_addr[MAX_ADDR_LEN];
unsigned char broadcast[MAX_ADDR_LEN];
enum rdma_node_type dev_type;
struct net_device *src_dev;
unsigned short dev_type;
int bound_dev_if;
};
/**
@ -121,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
}
static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
{
memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid);
return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
}
static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
}
static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid);
memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
}
static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
}
static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
}
static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
{
memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
}
#endif /* IB_ADDR_H */

View File

@ -379,4 +379,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec,
struct ib_ah_attr *ah_attr);
/**
* ib_sa_unpack_path - Convert a path record from MAD format to struct
* ib_sa_path_rec.
*/
void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
#endif /* IB_SA_H */

View File

@ -35,6 +35,22 @@
#include <linux/types.h>
enum {
IB_PATH_GMP = 1,
IB_PATH_PRIMARY = (1<<1),
IB_PATH_ALTERNATE = (1<<2),
IB_PATH_OUTBOUND = (1<<3),
IB_PATH_INBOUND = (1<<4),
IB_PATH_INBOUND_REVERSE = (1<<5),
IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE
};
struct ib_path_rec_data {
__u32 flags;
__u32 reserved;
__u32 path_rec[16];
};
struct ib_user_path_rec {
__u8 dgid[16];
__u8 sgid[16];

View File

@ -1425,6 +1425,11 @@ int ib_destroy_qp(struct ib_qp *qp);
* @send_wr: A list of work requests to post on the send queue.
* @bad_send_wr: On an immediate failure, this parameter will reference
* the work request that failed to be posted on the QP.
*
* While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
* error is returned, the QP state shall not be affected,
* ib_post_send() will return an immediate error after queueing any
* earlier work requests in the list.
*/
static inline int ib_post_send(struct ib_qp *qp,
struct ib_send_wr *send_wr,

View File

@ -215,12 +215,14 @@ struct rdma_ucm_event_resp {
/* Option levels */
enum {
RDMA_OPTION_ID = 0
RDMA_OPTION_ID = 0,
RDMA_OPTION_IB = 1
};
/* Option details */
enum {
RDMA_OPTION_ID_TOS = 0
RDMA_OPTION_ID_TOS = 0,
RDMA_OPTION_IB_PATH = 1
};
struct rdma_ucm_set_option {

View File

@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
ic = conn->c_transport_data;
dev_addr = &ic->i_cm_id->route.addr.dev_addr;
ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
iinfo->max_send_wr = ic->i_send_ring.w_nr;

View File

@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn,
ic = conn->c_transport_data;
dev_addr = &ic->i_cm_id->route.addr.dev_addr;
ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
iinfo->max_send_wr = ic->i_send_ring.w_nr;