From 8880fc669deda4fafde3ce90b3128f079567447f Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 13 Apr 2022 13:10:21 -0400 Subject: [PATCH 1/6] ibmvnic: rename local variable index to bufidx The local variable 'index' is heavily used in some functions and is confusing with the presence of other "index" fields like pool->index, ->consumer_index, etc. Rename it to bufidx to better reflect that its the index of a buffer in the pool. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 77683909ca3d..7fd9dd3759df 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -369,7 +369,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, dma_addr_t dma_addr; unsigned char *dst; int shift = 0; - int index; + int bufidx; int i; if (!pool->active) @@ -385,14 +385,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, * be 0. */ for (i = ind_bufp->index; i < count; ++i) { - index = pool->free_map[pool->next_free]; + bufidx = pool->free_map[pool->next_free]; /* We maybe reusing the skb from earlier resets. Allocate * only if necessary. But since the LTB may have changed * during reset (see init_rx_pools()), update LTB below * even if reusing skb. */ - skb = pool->rx_buff[index].skb; + skb = pool->rx_buff[bufidx].skb; if (!skb) { skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); @@ -407,24 +407,24 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, pool->next_free = (pool->next_free + 1) % pool->size; /* Copy the skb to the long term mapped DMA buffer */ - offset = index * pool->buff_size; + offset = bufidx * pool->buff_size; dst = pool->long_term_buff.buff + offset; memset(dst, 0, pool->buff_size); dma_addr = pool->long_term_buff.addr + offset; /* add the skb to an rx_buff in the pool */ - pool->rx_buff[index].data = dst; - pool->rx_buff[index].dma = dma_addr; - pool->rx_buff[index].skb = skb; - pool->rx_buff[index].pool_index = pool->index; - pool->rx_buff[index].size = pool->buff_size; + pool->rx_buff[bufidx].data = dst; + pool->rx_buff[bufidx].dma = dma_addr; + pool->rx_buff[bufidx].skb = skb; + pool->rx_buff[bufidx].pool_index = pool->index; + pool->rx_buff[bufidx].size = pool->buff_size; /* queue the rx_buff for the next send_subcrq_indirect */ sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; memset(sub_crq, 0, sizeof(*sub_crq)); sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; sub_crq->rx_add.correlator = - cpu_to_be64((u64)&pool->rx_buff[index]); + cpu_to_be64((u64)&pool->rx_buff[bufidx]); sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); sub_crq->rx_add.map_id = pool->long_term_buff.map_id; @@ -466,10 +466,10 @@ failure: sub_crq = &ind_bufp->indir_arr[i]; rx_buff = (struct ibmvnic_rx_buff *) be64_to_cpu(sub_crq->rx_add.correlator); - index = (int)(rx_buff - pool->rx_buff); - pool->free_map[pool->next_free] = index; - dev_kfree_skb_any(pool->rx_buff[index].skb); - pool->rx_buff[index].skb = NULL; + bufidx = (int)(rx_buff - pool->rx_buff); + pool->free_map[pool->next_free] = bufidx; + dev_kfree_skb_any(pool->rx_buff[bufidx].skb); + pool->rx_buff[bufidx].skb = NULL; } adapter->replenish_add_buff_failure += ind_bufp->index; atomic_add(buffers_added, &pool->available); @@ -1926,7 +1926,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int offset; int num_entries = 1; unsigned char *dst; - int index = 0; + int bufidx = 0; u8 proto = 0; /* If a reset is in progress, drop the packet since @@ -1960,9 +1960,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) else tx_pool = &adapter->tx_pool[queue_num]; - index = tx_pool->free_map[tx_pool->consumer_index]; + bufidx = tx_pool->free_map[tx_pool->consumer_index]; - if (index == IBMVNIC_INVALID_MAP) { + if (bufidx == IBMVNIC_INVALID_MAP) { dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; @@ -1973,7 +1973,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; - offset = index * tx_pool->buf_size; + offset = bufidx * tx_pool->buf_size; dst = tx_pool->long_term_buff.buff + offset; memset(dst, 0, tx_pool->buf_size); data_dma_addr = tx_pool->long_term_buff.addr + offset; @@ -2003,9 +2003,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_pool->consumer_index = (tx_pool->consumer_index + 1) % tx_pool->num_buffers; - tx_buff = &tx_pool->tx_buff[index]; + tx_buff = &tx_pool->tx_buff[bufidx]; tx_buff->skb = skb; - tx_buff->index = index; + tx_buff->index = bufidx; tx_buff->pool_index = queue_num; memset(&tx_crq, 0, sizeof(tx_crq)); @@ -2017,9 +2017,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if (skb_is_gso(skb)) tx_crq.v1.correlator = - cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK); + cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK); else - tx_crq.v1.correlator = cpu_to_be32(index); + tx_crq.v1.correlator = cpu_to_be32(bufidx); tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); From 2872a67c6bcfbd996fda08ea0a8119be230f428e Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 13 Apr 2022 13:10:22 -0400 Subject: [PATCH 2/6] ibmvnic: define map_rxpool_buf_to_ltb() Define a helper to map a given rx pool buffer into its corresponding long term buffer (LTB) and offset. Currently there is just one LTB per pool so the mapping is trivial. When we add support for multiple LTBs per pool, this helper will be more useful. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 7fd9dd3759df..753afda76296 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -345,6 +345,25 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, ltb->map_id = 0; } +/** + * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB. + * @rxpool: The receive buffer pool containing buffer + * @bufidx: Index of buffer in rxpool + * @ltbp: (Output) pointer to the long term buffer containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [rxpool, bufidx] to an LTB in the + * pool and its corresponding offset. + */ +static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + *ltbp = &rxpool->long_term_buff; + *offset = bufidx * rxpool->buff_size; +} + static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) { int i; @@ -361,6 +380,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct device *dev = &adapter->vdev->dev; struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_long_term_buff *ltb; union sub_crq *sub_crq; int buffers_added = 0; unsigned long lpar_rc; @@ -407,10 +427,10 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, pool->next_free = (pool->next_free + 1) % pool->size; /* Copy the skb to the long term mapped DMA buffer */ - offset = bufidx * pool->buff_size; - dst = pool->long_term_buff.buff + offset; + map_rxpool_buf_to_ltb(pool, bufidx, <b, &offset); + dst = ltb->buff + offset; memset(dst, 0, pool->buff_size); - dma_addr = pool->long_term_buff.addr + offset; + dma_addr = ltb->addr + offset; /* add the skb to an rx_buff in the pool */ pool->rx_buff[bufidx].data = dst; @@ -426,7 +446,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, sub_crq->rx_add.correlator = cpu_to_be64((u64)&pool->rx_buff[bufidx]); sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); - sub_crq->rx_add.map_id = pool->long_term_buff.map_id; + sub_crq->rx_add.map_id = ltb->map_id; /* The length field of the sCRQ is defined to be 24 bits so the * buffer size needs to be left shifted by a byte before it is From 0c91bf9ceba6296892010a48c5eef0bd17ec35d5 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 13 Apr 2022 13:10:23 -0400 Subject: [PATCH 3/6] ibmvnic: define map_txpool_buf_to_ltb() Define a helper to map a given txpool buffer into its corresponding long term buffer (LTB) and offset. Currently there is just one LTB per txpool so the mapping is trivial. When we add support for multiple LTBs per txpool, this helper will be more useful. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 753afda76296..2e6042409585 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -364,6 +364,25 @@ static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, *offset = bufidx * rxpool->buff_size; } +/** + * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB. + * @txpool: The transmit buffer pool containing buffer + * @bufidx: Index of buffer in txpool + * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [txpool, bufidx] to an LTB in the + * pool and its corresponding offset. + */ +static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + *ltbp = &txpool->long_term_buff; + *offset = bufidx * txpool->buf_size; +} + static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) { int i; @@ -1931,6 +1950,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_tx_buff *tx_buff = NULL; struct ibmvnic_sub_crq_queue *tx_scrq; + struct ibmvnic_long_term_buff *ltb; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; netdev_tx_t ret = NETDEV_TX_OK; @@ -1993,10 +2013,11 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; - offset = bufidx * tx_pool->buf_size; - dst = tx_pool->long_term_buff.buff + offset; + map_txpool_buf_to_ltb(tx_pool, bufidx, <b, &offset); + + dst = ltb->buff + offset; memset(dst, 0, tx_pool->buf_size); - data_dma_addr = tx_pool->long_term_buff.addr + offset; + data_dma_addr = ltb->addr + offset; if (skb_shinfo(skb)->nr_frags) { int cur, i; @@ -2040,7 +2061,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK); else tx_crq.v1.correlator = cpu_to_be32(bufidx); - tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); + tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id); tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); From d6b458509035db32b935f15922b530373fb6d27e Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 13 Apr 2022 13:10:24 -0400 Subject: [PATCH 4/6] ibmvnic: convert rxpool ltb to a set of ltbs Define and use interfaces that treat the long term buffer (LTB) of an rxpool as a set of LTBs rather than a single LTB. The set only has one LTB for now. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 44 ++++++++++++++++++++++++++---- drivers/net/ethernet/ibm/ibmvnic.h | 7 ++++- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2e6042409585..765a48833b3b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -345,6 +345,41 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, ltb->map_id = 0; } +static void free_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set) +{ + int i; + + for (i = 0; i < ltb_set->num_ltbs; i++) + free_long_term_buff(adapter, <b_set->ltbs[i]); + + kfree(ltb_set->ltbs); + ltb_set->ltbs = NULL; + ltb_set->num_ltbs = 0; +} + +static int alloc_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set, int num_buffs, + int buff_size) +{ + struct ibmvnic_long_term_buff *ltb; + int ltb_size; + int size; + + size = sizeof(struct ibmvnic_long_term_buff); + + ltb_set->ltbs = kmalloc(size, GFP_KERNEL); + if (!ltb_set->ltbs) + return -ENOMEM; + + ltb_set->num_ltbs = 1; + ltb = <b_set->ltbs[0]; + + ltb_size = num_buffs * buff_size; + + return alloc_long_term_buff(adapter, ltb, ltb_size); +} + /** * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB. * @rxpool: The receive buffer pool containing buffer @@ -360,7 +395,7 @@ static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, struct ibmvnic_long_term_buff **ltbp, unsigned int *offset) { - *ltbp = &rxpool->long_term_buff; + *ltbp = &rxpool->ltb_set.ltbs[0]; *offset = bufidx * rxpool->buff_size; } @@ -618,7 +653,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) kfree(rx_pool->free_map); - free_long_term_buff(adapter, &rx_pool->long_term_buff); + free_ltb_set(adapter, &rx_pool->ltb_set); if (!rx_pool->rx_buff) continue; @@ -763,9 +798,8 @@ update_ltb: dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", i, rx_pool->size, rx_pool->buff_size); - rc = alloc_long_term_buff(adapter, &rx_pool->long_term_buff, - rx_pool->size * rx_pool->buff_size); - if (rc) + if (alloc_ltb_set(adapter, &rx_pool->ltb_set, + rx_pool->size, rx_pool->buff_size)) goto out; for (j = 0; j < rx_pool->size; ++j) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 8f5cefb932dd..7d430fdf47c0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -798,6 +798,11 @@ struct ibmvnic_long_term_buff { u8 map_id; }; +struct ibmvnic_ltb_set { + int num_ltbs; + struct ibmvnic_long_term_buff *ltbs; +}; + struct ibmvnic_tx_buff { struct sk_buff *skb; int index; @@ -833,7 +838,7 @@ struct ibmvnic_rx_pool { int next_free; int next_alloc; int active; - struct ibmvnic_long_term_buff long_term_buff; + struct ibmvnic_ltb_set ltb_set; } ____cacheline_aligned; struct ibmvnic_vpd { From a75de820575d54185a7569494e89f83dca49368e Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 13 Apr 2022 13:10:25 -0400 Subject: [PATCH 5/6] ibmvnic: Allow multiple ltbs in rxpool ltb_set Allow multiple LTBs in the rxpool's ltb_set. The first n-1 LTBs will all be of the same size. The size of the last LTB in the set depends on the number of buffers and buffer (mtu) size. Having a set of LTBs per pool provides a couple of benefits. First, with the current value of IBMVNIC_MAX_LTB_SIZE of 16MB, with an MTU of 9000, we need a LTB (DMA buffer) of that size but the allocation can fail in low memory conditions. With a set of LTBs per pool, we can use several smaller (8MB) LTBs and hopefully have fewer allocation failures. (See also comments in ibmvnic.h on the trade-off with smaller LTBs) Second since the kernel limits the size of the DMA buffer to 16MB (based on MAX_ORDER), with a single DMA buffer per pool, the pool is also limited to 16MB. This in turn limits the number of buffers per pool to 1763 when MTU is 9000. With a set of LTBs per pool, we can have upto the max of 4096 buffers per pool even when MTU is 9000. Suggested-by: Brian King Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 152 +++++++++++++++++++++++++---- drivers/net/ethernet/ibm/ibmvnic.h | 45 ++++++++- 2 files changed, 177 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 765a48833b3b..16fd1f1f1228 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -345,6 +345,14 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, ltb->map_id = 0; } +/** + * free_ltb_set - free the given set of long term buffers (LTBS) + * @adapter: The ibmvnic adapter containing this ltb set + * @ltb_set: The ltb_set to be freed + * + * Free the set of LTBs in the given set. + */ + static void free_ltb_set(struct ibmvnic_adapter *adapter, struct ibmvnic_ltb_set *ltb_set) { @@ -358,26 +366,117 @@ static void free_ltb_set(struct ibmvnic_adapter *adapter, ltb_set->num_ltbs = 0; } +/** + * alloc_ltb_set() - Allocate a set of long term buffers (LTBs) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb_set: container object for the set of LTBs + * @num_buffs: Number of buffers in the LTB + * @buff_size: Size of each buffer in the LTB + * + * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size + * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the + * new set of LTBs have fewer LTBs than the old set, free the excess LTBs. + * If new set needs more than in old set, allocate the remaining ones. + * Try and reuse as many LTBs as possible and avoid reallocation. + * + * Any changes to this allocation strategy must be reflected in + * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb(). + */ static int alloc_ltb_set(struct ibmvnic_adapter *adapter, struct ibmvnic_ltb_set *ltb_set, int num_buffs, int buff_size) { - struct ibmvnic_long_term_buff *ltb; - int ltb_size; - int size; + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ltb_set old_set; + struct ibmvnic_ltb_set new_set; + int rem_size; + int tot_size; /* size of all ltbs */ + int ltb_size; /* size of one ltb */ + int nltbs; + int rc; + int n; + int i; - size = sizeof(struct ibmvnic_long_term_buff); + dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs, + buff_size); - ltb_set->ltbs = kmalloc(size, GFP_KERNEL); - if (!ltb_set->ltbs) - return -ENOMEM; + ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size); + tot_size = num_buffs * buff_size; - ltb_set->num_ltbs = 1; - ltb = <b_set->ltbs[0]; + if (ltb_size > tot_size) + ltb_size = tot_size; - ltb_size = num_buffs * buff_size; + nltbs = tot_size / ltb_size; + if (tot_size % ltb_size) + nltbs++; - return alloc_long_term_buff(adapter, ltb, ltb_size); + old_set = *ltb_set; + + if (old_set.num_ltbs == nltbs) { + new_set = old_set; + } else { + int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff); + + new_set.ltbs = kzalloc(tmp, GFP_KERNEL); + if (!new_set.ltbs) + return -ENOMEM; + + new_set.num_ltbs = nltbs; + + /* Free any excess ltbs in old set */ + for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++) + free_long_term_buff(adapter, &old_set.ltbs[i]); + + /* Copy remaining ltbs to new set. All LTBs except the + * last one are of the same size. alloc_long_term_buff() + * will realloc if the size changes. + */ + n = min(old_set.num_ltbs, new_set.num_ltbs); + for (i = 0; i < n; i++) + new_set.ltbs[i] = old_set.ltbs[i]; + + /* Any additional ltbs in new set will have NULL ltbs for + * now and will be allocated in alloc_long_term_buff(). + */ + + /* We no longer need the old_set so free it. Note that we + * may have reused some ltbs from old set and freed excess + * ltbs above. So we only need to free the container now + * not the LTBs themselves. (i.e. dont free_ltb_set()!) + */ + kfree(old_set.ltbs); + old_set.ltbs = NULL; + old_set.num_ltbs = 0; + + /* Install the new set. If allocations fail below, we will + * retry later and know what size LTBs we need. + */ + *ltb_set = new_set; + } + + i = 0; + rem_size = tot_size; + while (rem_size) { + if (ltb_size > rem_size) + ltb_size = rem_size; + + rem_size -= ltb_size; + + rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size); + if (rc) + goto out; + i++; + } + + WARN_ON(i != new_set.num_ltbs); + + return 0; +out: + /* We may have allocated one/more LTBs before failing and we + * want to try and reuse on next reset. So don't free ltb set. + */ + return rc; } /** @@ -388,14 +487,30 @@ static int alloc_ltb_set(struct ibmvnic_adapter *adapter, * @offset: (Output) offset of buffer in the LTB from @ltbp * * Map the given buffer identified by [rxpool, bufidx] to an LTB in the - * pool and its corresponding offset. + * pool and its corresponding offset. Assume for now that each LTB is of + * different size but could possibly be optimized based on the allocation + * strategy in alloc_ltb_set(). */ static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, unsigned int bufidx, struct ibmvnic_long_term_buff **ltbp, unsigned int *offset) { - *ltbp = &rxpool->ltb_set.ltbs[0]; + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON(bufidx >= rxpool->size); + + for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) { + ltb = &rxpool->ltb_set.ltbs[i]; + nbufs = ltb->size / rxpool->buff_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; *offset = bufidx * rxpool->buff_size; } @@ -798,8 +913,9 @@ update_ltb: dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", i, rx_pool->size, rx_pool->buff_size); - if (alloc_ltb_set(adapter, &rx_pool->ltb_set, - rx_pool->size, rx_pool->buff_size)) + rc = alloc_ltb_set(adapter, &rx_pool->ltb_set, + rx_pool->size, rx_pool->buff_size); + if (rc) goto out; for (j = 0; j < rx_pool->size; ++j) { @@ -4106,16 +4222,16 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) adapter->desired.rx_entries = adapter->max_rx_add_entries_per_subcrq; - max_entries = IBMVNIC_MAX_LTB_SIZE / + max_entries = IBMVNIC_LTB_SET_SIZE / (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * - adapter->desired.tx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) { adapter->desired.tx_entries = max_entries; } if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * - adapter->desired.rx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) { adapter->desired.rx_entries = max_entries; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 7d430fdf47c0..178035872c32 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -36,9 +36,50 @@ #define IBMVNIC_TSO_BUFS 64 #define IBMVNIC_TSO_POOL_MASK 0x80000000 -#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE) -#define IBMVNIC_BUFFER_HLEN 500 +/* A VNIC adapter has set of Rx and Tx pools (aka queues). Each Rx/Tx pool + * has a set of buffers. The size of each buffer is determined by the MTU. + * + * Each Rx/Tx pool is also associated with a DMA region that is shared + * with the "hardware" (VIOS) and used to send/receive packets. The DMA + * region is also referred to as a Long Term Buffer or LTB. + * + * The size of the DMA region required for an Rx/Tx pool depends on the + * number and size (MTU) of the buffers in the pool. At the max levels + * of 4096 jumbo frames (MTU=9000) we will need about 9K*4K = 36MB plus + * some padding. + * + * But the size of a single DMA region is limited by MAX_ORDER in the + * kernel (about 16MB currently). To support say 4K Jumbo frames, we + * use a set of LTBs (struct ltb_set) per pool. + * + * IBMVNIC_ONE_LTB_MAX - max size of each LTB supported by kernel + * IBMVNIC_ONE_LTB_SIZE - current max size of each LTB in an ltb_set + * (must be <= IBMVNIC_ONE_LTB_MAX) + * IBMVNIC_LTB_SET_SIZE - current size of all LTBs in an ltb_set + * + * Each VNIC can have upto 16 Rx, 16 Tx and 16 TSO pools. The TSO pools + * are of fixed length (IBMVNIC_TSO_BUF_SZ * IBMVNIC_TSO_BUFS) of 4MB. + * + * The Rx and Tx pools can have upto 4096 buffers. The max size of these + * buffers is about 9588 (for jumbo frames, including IBMVNIC_BUFFER_HLEN). + * So, setting the IBMVNIC_LTB_SET_SIZE for a pool to 4096 * 9588 ~= 38MB. + * + * There is a trade-off in setting IBMVNIC_ONE_LTB_SIZE. If it is large, + * the allocation of the LTB can fail when system is low in memory. If + * its too small, we would need several mappings for each of the Rx/ + * Tx/TSO pools but there is a limit of 255 mappings per vnic in the + * VNIC protocol. + * + * So setting IBMVNIC_ONE_LTB_SIZE to 8MB. With IBMVNIC_LTB_SET_SIZE set + * to 38MB, we will need 5 LTBs per Rx and Tx pool and 1 LTB per TSO + * pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160 + * plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC. + */ +#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << (MAX_ORDER - 1)) * PAGE_SIZE)) +#define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX) +#define IBMVNIC_LTB_SET_SIZE (38 << 20) +#define IBMVNIC_BUFFER_HLEN 500 #define IBMVNIC_RESET_DELAY 100 static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = { From 93b1ebb348a94a04ae4bfbd028f89005090cf8c7 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 13 Apr 2022 13:10:26 -0400 Subject: [PATCH 6/6] ibmvnic: Allow multiple ltbs in txpool ltb_set Allow multiple LTBs in the txpool's ltb_set. i.e rather than using a single large LTB, use several smaller LTBs. The first n-1 LTBs will all be of the same size. The size of the last LTB in the set depends on the number of buffers and buffer (mtu) size. This strategy hopefully allows more reuse of the initial LTBs and also reduces the chances of an allocation failure (of the large LTB) when system is low in memory. Suggested-by: Brian King Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 53 +++++++++++++++++++----------- drivers/net/ethernet/ibm/ibmvnic.h | 2 +- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 16fd1f1f1228..4840ad4ccd10 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -257,12 +257,14 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb, int size) { struct device *dev = &adapter->vdev->dev; + u64 prev = 0; int rc; if (!reuse_ltb(ltb, size)) { dev_dbg(dev, "LTB size changed from 0x%llx to 0x%x, reallocating\n", ltb->size, size); + prev = ltb->size; free_long_term_buff(adapter, ltb); } @@ -283,8 +285,8 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, bitmap_set(adapter->map_ids, ltb->map_id, 1); dev_dbg(dev, - "Allocated new LTB [map %d, size 0x%llx]\n", - ltb->map_id, ltb->size); + "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n", + ltb->map_id, ltb->size, prev); } /* Ensure ltb is zeroed - specially when reusing it. */ @@ -529,7 +531,21 @@ static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool, struct ibmvnic_long_term_buff **ltbp, unsigned int *offset) { - *ltbp = &txpool->long_term_buff; + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON_ONCE(bufidx >= txpool->num_buffers); + + for (i = 0; i < txpool->ltb_set.num_ltbs; i++) { + ltb = &txpool->ltb_set.ltbs[i]; + nbufs = ltb->size / txpool->buf_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; *offset = bufidx * txpool->buf_size; } @@ -971,7 +987,7 @@ static void release_one_tx_pool(struct ibmvnic_adapter *adapter, { kfree(tx_pool->tx_buff); kfree(tx_pool->free_map); - free_long_term_buff(adapter, &tx_pool->long_term_buff); + free_ltb_set(adapter, &tx_pool->ltb_set); } /** @@ -1161,17 +1177,16 @@ update_ltb: for (i = 0; i < num_pools; i++) { struct ibmvnic_tx_pool *tso_pool; struct ibmvnic_tx_pool *tx_pool; - u32 ltb_size; tx_pool = &adapter->tx_pool[i]; - ltb_size = tx_pool->num_buffers * tx_pool->buf_size; - if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, - ltb_size)) - goto out; - dev_dbg(dev, "Updated LTB for tx pool %d [%p, %d, %d]\n", - i, tx_pool->long_term_buff.buff, - tx_pool->num_buffers, tx_pool->buf_size); + dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n", + i, tx_pool->num_buffers, tx_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tx_pool->ltb_set, + tx_pool->num_buffers, tx_pool->buf_size); + if (rc) + goto out; tx_pool->consumer_index = 0; tx_pool->producer_index = 0; @@ -1180,14 +1195,14 @@ update_ltb: tx_pool->free_map[j] = j; tso_pool = &adapter->tso_pool[i]; - ltb_size = tso_pool->num_buffers * tso_pool->buf_size; - if (alloc_long_term_buff(adapter, &tso_pool->long_term_buff, - ltb_size)) - goto out; - dev_dbg(dev, "Updated LTB for tso pool %d [%p, %d, %d]\n", - i, tso_pool->long_term_buff.buff, - tso_pool->num_buffers, tso_pool->buf_size); + dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n", + i, tso_pool->num_buffers, tso_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tso_pool->ltb_set, + tso_pool->num_buffers, tso_pool->buf_size); + if (rc) + goto out; tso_pool->consumer_index = 0; tso_pool->producer_index = 0; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 178035872c32..90d6833fb1db 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -856,7 +856,7 @@ struct ibmvnic_tx_pool { int *free_map; int consumer_index; int producer_index; - struct ibmvnic_long_term_buff long_term_buff; + struct ibmvnic_ltb_set ltb_set; int num_buffers; int buf_size; } ____cacheline_aligned;