mirror of
https://github.com/torvalds/linux.git
synced 2024-12-11 13:41:55 +00:00
IPoIB: Connected mode experimental support
The following patch adds experimental support for IPoIB connected mode, as defined by the draft from the IETF ipoib working group. The idea is to increase performance by increasing the MTU from the maximum of 2K (theoretically 4K) supported by IPoIB on top of UD. With this code, I'm able to get 800MByte/sec or more with netperf without options on a Mellanox 4x back-to-back DDR system. Some notes on code: 1. SRQ is used for scalability to large cluster sizes 2. Only RC connections are used (UC does not support SRQ now) 3. Retry count is set to 0 since spec draft warns against retries 4. Each connection is used for data transfers in only 1 direction, so each connection is either active(TX) or passive (RX). 2 sides that want to communicate create 2 connections. 5. Each active (TX) connection has a separate CQ for send completions - this keeps the code simple without CQ resize and other tricks 6. To detect stale passive side connections (where the remote side is down), we keep an LRU list of passive connections (updated once per second per connection) and destroy a connection after it has been unused for several seconds. The LRU rule makes it possible to avoid scanning connections that have recently been active. Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
parent
9a6b090c0d
commit
839fcaba35
@ -1,6 +1,6 @@
|
||||
config INFINIBAND_IPOIB
|
||||
tristate "IP-over-InfiniBand"
|
||||
depends on INFINIBAND && NETDEVICES && INET
|
||||
depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n)
|
||||
---help---
|
||||
Support for the IP-over-InfiniBand protocol (IPoIB). This
|
||||
transports IP packets over InfiniBand so you can use your IB
|
||||
@ -8,6 +8,20 @@ config INFINIBAND_IPOIB
|
||||
|
||||
See Documentation/infiniband/ipoib.txt for more information
|
||||
|
||||
config INFINIBAND_IPOIB_CM
|
||||
bool "IP-over-InfiniBand Connected Mode support"
|
||||
depends on INFINIBAND_IPOIB && EXPERIMENTAL
|
||||
default n
|
||||
---help---
|
||||
This option enables experimental support for IPoIB connected mode.
|
||||
After enabling this option, you need to switch to connected mode through
|
||||
/sys/class/net/ibXXX/mode to actually create connections, and then increase
|
||||
the interface MTU with e.g. ifconfig ib0 mtu 65520.
|
||||
|
||||
WARNING: Enabling connected mode will trigger some
|
||||
packet drops for multicast and UD mode traffic from this interface,
|
||||
unless you limit mtu for these destinations to 2044.
|
||||
|
||||
config INFINIBAND_IPOIB_DEBUG
|
||||
bool "IP-over-InfiniBand debugging" if EMBEDDED
|
||||
depends on INFINIBAND_IPOIB
|
||||
|
@ -5,5 +5,6 @@ ib_ipoib-y := ipoib_main.o \
|
||||
ipoib_multicast.o \
|
||||
ipoib_verbs.o \
|
||||
ipoib_vlan.o
|
||||
ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o
|
||||
ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o
|
||||
|
||||
|
@ -62,6 +62,10 @@ enum {
|
||||
|
||||
IPOIB_ENCAP_LEN = 4,
|
||||
|
||||
IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */
|
||||
IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
|
||||
IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
|
||||
IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
|
||||
IPOIB_RX_RING_SIZE = 128,
|
||||
IPOIB_TX_RING_SIZE = 64,
|
||||
IPOIB_MAX_QUEUE_SIZE = 8192,
|
||||
@ -81,6 +85,8 @@ enum {
|
||||
IPOIB_MCAST_RUN = 6,
|
||||
IPOIB_STOP_REAPER = 7,
|
||||
IPOIB_MCAST_STARTED = 8,
|
||||
IPOIB_FLAG_NETIF_STOPPED = 9,
|
||||
IPOIB_FLAG_ADMIN_CM = 10,
|
||||
|
||||
IPOIB_MAX_BACKOFF_SECONDS = 16,
|
||||
|
||||
@ -90,6 +96,13 @@ enum {
|
||||
IPOIB_MCAST_FLAG_ATTACHED = 3,
|
||||
};
|
||||
|
||||
#define IPOIB_OP_RECV (1ul << 31)
|
||||
#ifdef CONFIG_INFINIBAND_IPOIB_CM
|
||||
#define IPOIB_CM_OP_SRQ (1ul << 30)
|
||||
#else
|
||||
#define IPOIB_CM_OP_SRQ (0)
|
||||
#endif
|
||||
|
||||
/* structs */
|
||||
|
||||
struct ipoib_header {
|
||||
@ -113,6 +126,59 @@ struct ipoib_tx_buf {
|
||||
u64 mapping;
|
||||
};
|
||||
|
||||
struct ib_cm_id;
|
||||
|
||||
struct ipoib_cm_data {
|
||||
__be32 qpn; /* High byte MUST be ignored on receive */
|
||||
__be32 mtu;
|
||||
};
|
||||
|
||||
struct ipoib_cm_rx {
|
||||
struct ib_cm_id *id;
|
||||
struct ib_qp *qp;
|
||||
struct list_head list;
|
||||
struct net_device *dev;
|
||||
unsigned long jiffies;
|
||||
};
|
||||
|
||||
struct ipoib_cm_tx {
|
||||
struct ib_cm_id *id;
|
||||
struct ib_cq *cq;
|
||||
struct ib_qp *qp;
|
||||
struct list_head list;
|
||||
struct net_device *dev;
|
||||
struct ipoib_neigh *neigh;
|
||||
struct ipoib_path *path;
|
||||
struct ipoib_tx_buf *tx_ring;
|
||||
unsigned tx_head;
|
||||
unsigned tx_tail;
|
||||
unsigned long flags;
|
||||
u32 mtu;
|
||||
struct ib_wc ibwc[IPOIB_NUM_WC];
|
||||
};
|
||||
|
||||
struct ipoib_cm_rx_buf {
|
||||
struct sk_buff *skb;
|
||||
u64 mapping[IPOIB_CM_RX_SG];
|
||||
};
|
||||
|
||||
struct ipoib_cm_dev_priv {
|
||||
struct ib_srq *srq;
|
||||
struct ipoib_cm_rx_buf *srq_ring;
|
||||
struct ib_cm_id *id;
|
||||
struct list_head passive_ids;
|
||||
struct work_struct start_task;
|
||||
struct work_struct reap_task;
|
||||
struct work_struct skb_task;
|
||||
struct delayed_work stale_task;
|
||||
struct sk_buff_head skb_queue;
|
||||
struct list_head start_list;
|
||||
struct list_head reap_list;
|
||||
struct ib_wc ibwc[IPOIB_NUM_WC];
|
||||
struct ib_sge rx_sge[IPOIB_CM_RX_SG];
|
||||
struct ib_recv_wr rx_wr;
|
||||
};
|
||||
|
||||
/*
|
||||
* Device private locking: tx_lock protects members used in TX fast
|
||||
* path (and we use LLTX so upper layers don't do extra locking).
|
||||
@ -179,6 +245,10 @@ struct ipoib_dev_priv {
|
||||
struct list_head child_intfs;
|
||||
struct list_head list;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_IPOIB_CM
|
||||
struct ipoib_cm_dev_priv cm;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
|
||||
struct list_head fs_list;
|
||||
struct dentry *mcg_dentry;
|
||||
@ -212,6 +282,9 @@ struct ipoib_path {
|
||||
|
||||
struct ipoib_neigh {
|
||||
struct ipoib_ah *ah;
|
||||
#ifdef CONFIG_INFINIBAND_IPOIB_CM
|
||||
struct ipoib_cm_tx *cm;
|
||||
#endif
|
||||
union ib_gid dgid;
|
||||
struct sk_buff_head queue;
|
||||
|
||||
@ -315,6 +388,146 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
|
||||
void ipoib_pkey_poll(struct work_struct *work);
|
||||
int ipoib_pkey_dev_delay_open(struct net_device *dev);
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_IPOIB_CM
|
||||
|
||||
#define IPOIB_FLAGS_RC 0x80
|
||||
#define IPOIB_FLAGS_UC 0x40
|
||||
|
||||
/* We don't support UC connections at the moment */
|
||||
#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
|
||||
|
||||
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = netdev_priv(dev);
|
||||
return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
|
||||
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
|
||||
}
|
||||
|
||||
static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = netdev_priv(dev);
|
||||
return IPOIB_CM_SUPPORTED(n->ha) &&
|
||||
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
|
||||
}
|
||||
|
||||
static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
|
||||
|
||||
{
|
||||
return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags);
|
||||
}
|
||||
|
||||
static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
|
||||
{
|
||||
return neigh->cm;
|
||||
}
|
||||
|
||||
static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
|
||||
{
|
||||
neigh->cm = tx;
|
||||
}
|
||||
|
||||
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
|
||||
int ipoib_cm_dev_open(struct net_device *dev);
|
||||
void ipoib_cm_dev_stop(struct net_device *dev);
|
||||
int ipoib_cm_dev_init(struct net_device *dev);
|
||||
int ipoib_cm_add_mode_attr(struct net_device *dev);
|
||||
void ipoib_cm_dev_cleanup(struct net_device *dev);
|
||||
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
|
||||
struct ipoib_neigh *neigh);
|
||||
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
|
||||
void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
|
||||
unsigned int mtu);
|
||||
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
|
||||
#else
|
||||
|
||||
struct ipoib_cm_tx;
|
||||
|
||||
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
|
||||
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
|
||||
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline
|
||||
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline
|
||||
int ipoib_cm_dev_open(struct net_device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
void ipoib_cm_dev_stop(struct net_device *dev)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline
|
||||
int ipoib_cm_dev_init(struct net_device *dev)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline
|
||||
void ipoib_cm_dev_cleanup(struct net_device *dev)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
|
||||
struct ipoib_neigh *neigh)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline
|
||||
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline
|
||||
int ipoib_cm_add_mode_attr(struct net_device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
|
||||
unsigned int mtu)
|
||||
{
|
||||
dev_kfree_skb_any(skb);
|
||||
}
|
||||
|
||||
static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
|
||||
void ipoib_create_debug_files(struct net_device *dev);
|
||||
void ipoib_delete_debug_files(struct net_device *dev);
|
||||
@ -392,4 +605,6 @@ extern int ipoib_debug_level;
|
||||
|
||||
#define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw)
|
||||
|
||||
#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
|
||||
|
||||
#endif /* _IPOIB_H */
|
||||
|
1237
drivers/infiniband/ulp/ipoib/ipoib_cm.c
Normal file
1237
drivers/infiniband/ulp/ipoib/ipoib_cm.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -50,8 +50,6 @@ MODULE_PARM_DESC(data_debug_level,
|
||||
"Enable data path debug tracing if > 0");
|
||||
#endif
|
||||
|
||||
#define IPOIB_OP_RECV (1ul << 31)
|
||||
|
||||
static DEFINE_MUTEX(pkey_mutex);
|
||||
|
||||
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
|
||||
@ -268,10 +266,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
|
||||
|
||||
spin_lock_irqsave(&priv->tx_lock, flags);
|
||||
++priv->tx_tail;
|
||||
if (netif_queue_stopped(dev) &&
|
||||
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
|
||||
priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
|
||||
if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) &&
|
||||
priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) {
|
||||
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
|
||||
netif_wake_queue(dev);
|
||||
}
|
||||
spin_unlock_irqrestore(&priv->tx_lock, flags);
|
||||
|
||||
if (wc->status != IB_WC_SUCCESS &&
|
||||
@ -283,7 +282,9 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
|
||||
|
||||
static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
|
||||
{
|
||||
if (wc->wr_id & IPOIB_OP_RECV)
|
||||
if (wc->wr_id & IPOIB_CM_OP_SRQ)
|
||||
ipoib_cm_handle_rx_wc(dev, wc);
|
||||
else if (wc->wr_id & IPOIB_OP_RECV)
|
||||
ipoib_ib_handle_rx_wc(dev, wc);
|
||||
else
|
||||
ipoib_ib_handle_tx_wc(dev, wc);
|
||||
@ -327,12 +328,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
|
||||
struct ipoib_tx_buf *tx_req;
|
||||
u64 addr;
|
||||
|
||||
if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) {
|
||||
if (unlikely(skb->len > priv->mcast_mtu + INFINIBAND_ALEN)) {
|
||||
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
|
||||
skb->len, dev->mtu + INFINIBAND_ALEN);
|
||||
skb->len, priv->mcast_mtu + INFINIBAND_ALEN);
|
||||
++priv->stats.tx_dropped;
|
||||
++priv->stats.tx_errors;
|
||||
dev_kfree_skb_any(skb);
|
||||
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -372,6 +373,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
|
||||
if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
|
||||
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
|
||||
netif_stop_queue(dev);
|
||||
set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -424,6 +426,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = ipoib_cm_dev_open(dev);
|
||||
if (ret) {
|
||||
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
|
||||
ipoib_ib_dev_stop(dev);
|
||||
return -1;
|
||||
}
|
||||
|
||||
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
|
||||
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
|
||||
|
||||
@ -509,6 +518,8 @@ int ipoib_ib_dev_stop(struct net_device *dev)
|
||||
|
||||
clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
|
||||
|
||||
ipoib_cm_dev_stop(dev);
|
||||
|
||||
/*
|
||||
* Move our QP to the error state and then reinitialize in
|
||||
* when all work requests have completed or have been flushed.
|
||||
|
@ -49,8 +49,6 @@
|
||||
|
||||
#include <net/dst.h>
|
||||
|
||||
#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
|
||||
|
||||
MODULE_AUTHOR("Roland Dreier");
|
||||
MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
@ -145,6 +143,8 @@ static int ipoib_stop(struct net_device *dev)
|
||||
|
||||
netif_stop_queue(dev);
|
||||
|
||||
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
|
||||
|
||||
/*
|
||||
* Now flush workqueue to make sure a scheduled task doesn't
|
||||
* bring our internal state back up.
|
||||
@ -178,8 +178,18 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = netdev_priv(dev);
|
||||
|
||||
if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
|
||||
/* dev->mtu > 2K ==> connected mode */
|
||||
if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) {
|
||||
if (new_mtu > priv->mcast_mtu)
|
||||
ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
|
||||
priv->mcast_mtu);
|
||||
dev->mtu = new_mtu;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
priv->admin_mtu = new_mtu;
|
||||
|
||||
@ -414,6 +424,20 @@ static void path_rec_completion(int status,
|
||||
memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
|
||||
sizeof(union ib_gid));
|
||||
|
||||
if (ipoib_cm_enabled(dev, neigh->neighbour)) {
|
||||
if (!ipoib_cm_get(neigh))
|
||||
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
|
||||
path,
|
||||
neigh));
|
||||
if (!ipoib_cm_get(neigh)) {
|
||||
list_del(&neigh->list);
|
||||
if (neigh->ah)
|
||||
ipoib_put_ah(neigh->ah);
|
||||
ipoib_neigh_free(dev, neigh);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
while ((skb = __skb_dequeue(&neigh->queue)))
|
||||
__skb_queue_tail(&skqueue, skb);
|
||||
}
|
||||
@ -520,7 +544,25 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
|
||||
memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
|
||||
sizeof(union ib_gid));
|
||||
|
||||
ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
|
||||
if (ipoib_cm_enabled(dev, neigh->neighbour)) {
|
||||
if (!ipoib_cm_get(neigh))
|
||||
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
|
||||
if (!ipoib_cm_get(neigh)) {
|
||||
list_del(&neigh->list);
|
||||
if (neigh->ah)
|
||||
ipoib_put_ah(neigh->ah);
|
||||
ipoib_neigh_free(dev, neigh);
|
||||
goto err_drop;
|
||||
}
|
||||
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
|
||||
__skb_queue_tail(&neigh->queue, skb);
|
||||
else {
|
||||
ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
|
||||
skb_queue_len(&neigh->queue));
|
||||
goto err_drop;
|
||||
}
|
||||
} else
|
||||
ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
|
||||
} else {
|
||||
neigh->ah = NULL;
|
||||
|
||||
@ -538,6 +580,7 @@ err_list:
|
||||
|
||||
err_path:
|
||||
ipoib_neigh_free(dev, neigh);
|
||||
err_drop:
|
||||
++priv->stats.tx_dropped;
|
||||
dev_kfree_skb_any(skb);
|
||||
|
||||
@ -640,7 +683,12 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
|
||||
neigh = *to_ipoib_neigh(skb->dst->neighbour);
|
||||
|
||||
if (likely(neigh->ah)) {
|
||||
if (ipoib_cm_get(neigh)) {
|
||||
if (ipoib_cm_up(neigh)) {
|
||||
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
|
||||
goto out;
|
||||
}
|
||||
} else if (neigh->ah) {
|
||||
if (unlikely(memcmp(&neigh->dgid.raw,
|
||||
skb->dst->neighbour->ha + 4,
|
||||
sizeof(union ib_gid)))) {
|
||||
@ -805,6 +853,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
|
||||
neigh->neighbour = neighbour;
|
||||
*to_ipoib_neigh(neighbour) = neigh;
|
||||
skb_queue_head_init(&neigh->queue);
|
||||
ipoib_cm_set(neigh, NULL);
|
||||
|
||||
return neigh;
|
||||
}
|
||||
@ -818,6 +867,8 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
|
||||
++priv->stats.tx_dropped;
|
||||
dev_kfree_skb_any(skb);
|
||||
}
|
||||
if (ipoib_cm_get(neigh))
|
||||
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
|
||||
kfree(neigh);
|
||||
}
|
||||
|
||||
@ -1080,6 +1131,8 @@ static struct net_device *ipoib_add_port(const char *format,
|
||||
|
||||
ipoib_create_debug_files(priv->dev);
|
||||
|
||||
if (ipoib_cm_add_mode_attr(priv->dev))
|
||||
goto sysfs_failed;
|
||||
if (ipoib_add_pkey_attr(priv->dev))
|
||||
goto sysfs_failed;
|
||||
if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
|
||||
|
@ -597,7 +597,9 @@ void ipoib_mcast_join_task(struct work_struct *work)
|
||||
|
||||
priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -
|
||||
IPOIB_ENCAP_LEN;
|
||||
dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
|
||||
|
||||
if (!ipoib_cm_admin_enabled(dev))
|
||||
dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
|
||||
|
||||
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
|
||||
|
||||
|
@ -168,35 +168,41 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
|
||||
.qp_type = IB_QPT_UD
|
||||
};
|
||||
|
||||
int ret, size;
|
||||
|
||||
priv->pd = ib_alloc_pd(priv->ca);
|
||||
if (IS_ERR(priv->pd)) {
|
||||
printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
|
||||
ipoib_sendq_size + ipoib_recvq_size + 1);
|
||||
priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
|
||||
if (IS_ERR(priv->mr)) {
|
||||
printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
|
||||
goto out_free_pd;
|
||||
}
|
||||
|
||||
size = ipoib_sendq_size + ipoib_recvq_size + 1;
|
||||
ret = ipoib_cm_dev_init(dev);
|
||||
if (!ret)
|
||||
size += ipoib_recvq_size;
|
||||
|
||||
priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size);
|
||||
if (IS_ERR(priv->cq)) {
|
||||
printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
|
||||
goto out_free_pd;
|
||||
goto out_free_mr;
|
||||
}
|
||||
|
||||
if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
|
||||
goto out_free_cq;
|
||||
|
||||
priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
|
||||
if (IS_ERR(priv->mr)) {
|
||||
printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
|
||||
goto out_free_cq;
|
||||
}
|
||||
|
||||
init_attr.send_cq = priv->cq;
|
||||
init_attr.recv_cq = priv->cq,
|
||||
|
||||
priv->qp = ib_create_qp(priv->pd, &init_attr);
|
||||
if (IS_ERR(priv->qp)) {
|
||||
printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
|
||||
goto out_free_mr;
|
||||
goto out_free_cq;
|
||||
}
|
||||
|
||||
priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
|
||||
@ -212,12 +218,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_mr:
|
||||
ib_dereg_mr(priv->mr);
|
||||
|
||||
out_free_cq:
|
||||
ib_destroy_cq(priv->cq);
|
||||
|
||||
out_free_mr:
|
||||
ib_dereg_mr(priv->mr);
|
||||
|
||||
out_free_pd:
|
||||
ib_dealloc_pd(priv->pd);
|
||||
return -ENODEV;
|
||||
@ -235,12 +241,14 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
|
||||
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
|
||||
}
|
||||
|
||||
if (ib_dereg_mr(priv->mr))
|
||||
ipoib_warn(priv, "ib_dereg_mr failed\n");
|
||||
|
||||
if (ib_destroy_cq(priv->cq))
|
||||
ipoib_warn(priv, "ib_cq_destroy failed\n");
|
||||
|
||||
ipoib_cm_dev_cleanup(dev);
|
||||
|
||||
if (ib_dereg_mr(priv->mr))
|
||||
ipoib_warn(priv, "ib_dereg_mr failed\n");
|
||||
|
||||
if (ib_dealloc_pd(priv->pd))
|
||||
ipoib_warn(priv, "ib_dealloc_pd failed\n");
|
||||
}
|
||||
|
@ -115,6 +115,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
|
||||
|
||||
ipoib_create_debug_files(priv->dev);
|
||||
|
||||
if (ipoib_cm_add_mode_attr(priv->dev))
|
||||
goto sysfs_failed;
|
||||
if (ipoib_add_pkey_attr(priv->dev))
|
||||
goto sysfs_failed;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user