From b0153fdd7e8a625d4879f07d3cfb1ee1e2364222 Mon Sep 17 00:00:00 2001 From: Victor Raj Date: Tue, 26 Feb 2019 16:35:20 -0800 Subject: [PATCH 01/15] ice: update VSI config dynamically When VSI increases the number of queues dynamically, the scheduler just needs to add the new required nodes rather than re-adjusting with previously allocated number of nodes. Readjusting didn't provide enough parents to add the upper layer nodes also can't place lan and rdma subtrees separately. In decrease case, keep the VSI configuration with max number of queues always. This will leave some extra nodes in the tree but no harm done. Signed-off-by: Victor Raj Reviewed-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_sched.c | 75 ++++------------------ 1 file changed, 13 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index cf1142ec16ee..efbebf7a050f 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1268,42 +1268,6 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, return 0; } -/** - * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree - * @pi: port information structure - * @vsi_node: pointer to the VSI node - * @num_nodes: pointer to the num nodes that needs to be removed per layer - * @owner: node owner (lan or rdma) - * - * This function removes the VSI child nodes from the tree. It gets called for - * lan and rdma separately. - */ -static void -ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi, - struct ice_sched_node *vsi_node, u16 *num_nodes, - u8 owner) -{ - struct ice_sched_node *node, *next; - u8 i, qgl, vsil; - u16 num; - - qgl = ice_sched_get_qgrp_layer(pi->hw); - vsil = ice_sched_get_vsi_layer(pi->hw); - - for (i = qgl; i > vsil; i--) { - num = num_nodes[i]; - node = ice_sched_get_first_node(pi->hw, vsi_node, i); - while (node && num) { - next = node->sibling; - if (node->owner == owner && !node->num_children) { - ice_free_sched_node(pi, node); - num--; - } - node = next; - } - } -} - /** * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes * @hw: pointer to the hw struct @@ -1444,7 +1408,6 @@ static enum ice_status ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 new_numqs, u8 owner) { - u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; struct ice_sched_node *vsi_node; struct ice_sched_node *tc_node; @@ -1452,7 +1415,6 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, enum ice_status status = 0; struct ice_hw *hw = pi->hw; u16 prev_numqs; - u8 i; tc_node = ice_sched_get_tc_node(pi, tc); if (!tc_node) @@ -1471,33 +1433,22 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, else return ICE_ERR_PARAM; - /* num queues are not changed */ - if (prev_numqs == new_numqs) + /* num queues are not changed or less than the previous number */ + if (new_numqs <= prev_numqs) return status; - - /* calculate number of nodes based on prev/new number of qs */ - if (prev_numqs) - ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes); - if (new_numqs) ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes); - - if (prev_numqs > new_numqs) { - for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) - new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i]; - - ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes, - owner); - } else { - for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) - new_num_nodes[i] -= prev_num_nodes[i]; - - status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node, - new_num_nodes, owner); - if (status) - return status; - } - + /* Keep the max number of queue configuration all the time. Update the + * tree only if number of queues > previous number of queues. This may + * leave some extra nodes in the tree if number of queues < previous + * number but that wouldn't harm anything. Removing those extra nodes + * may complicate the code if those nodes are part of SRL or + * individually rate limited. + */ + status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node, + new_num_nodes, owner); + if (status) + return status; vsi_ctx->sched.max_lanq[tc] = new_numqs; return 0; From 840bcd88f899da6a5076ab4bc8dd0f6fcd8d19ef Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 26 Feb 2019 16:35:21 -0800 Subject: [PATCH 02/15] ice: Restore VLAN switch rule if port VLAN existed before The VLAN rule is lost when VM starts or the AVF driver (iavf.ko) is reloaded. So it is necessary to add this rule again. Signed-off-by: Michal Swiatkowski Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index c725fb8e81bf..011c52a9442c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -457,8 +457,10 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) vsi->hw_base_vector += 1; /* Check if port VLAN exist before, and restore it accordingly */ - if (vf->port_vlan_id) + if (vf->port_vlan_id) { ice_vsi_manage_pvid(vsi, vf->port_vlan_id, true); + ice_vsi_add_vlan(vsi, vf->port_vlan_id & ICE_VLAN_M); + } eth_broadcast_addr(broadcast); From 8d051b8b5d52a32ac096a62bb932193202e2e759 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 26 Feb 2019 16:35:22 -0800 Subject: [PATCH 03/15] ice: use irq_num var in ice_vsi_req_irq_msix Someone went through the effort of making this a variable so let's use it instead of recalculating it again. Signed-off-by: Alan Brady Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 879c1f176a17..4731b5d147b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1248,10 +1248,9 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - err = devm_request_irq(&pf->pdev->dev, - pf->msix_entries[base + vector].vector, - vsi->irq_handler, 0, q_vector->name, - q_vector); + err = devm_request_irq(&pf->pdev->dev, irq_num, + vsi->irq_handler, 0, + q_vector->name, q_vector); if (err) { netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n", err); From 250c3b3e0aa25ad09c0c7638ba9ba3c0e54464a1 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Tue, 26 Feb 2019 16:35:23 -0800 Subject: [PATCH 04/15] ice: Enable link events over the ARQ The hardware now supports link events over the admin receive queue (ARQ), so enable HW link events over the ARQ and remove code for link event polling. Signed-off-by: Brett Creeley Reviewed-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 28 ++++++++- drivers/net/ethernet/intel/ice/ice_common.h | 6 ++ drivers/net/ethernet/intel/ice/ice_main.c | 68 ++++++++++++++++++++- 3 files changed, 98 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 06a1a2cb5358..be67d07b75cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -262,7 +262,7 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) * * Get Link Status (0x607). Returns the link status of the adapter. */ -static enum ice_status +enum ice_status ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, struct ice_link_status *link, struct ice_sq_cd *cd) { @@ -2150,6 +2150,32 @@ ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd); } +/** + * ice_aq_set_event_mask + * @hw: pointer to the HW struct + * @port_num: port number of the physical function + * @mask: event mask to be set + * @cd: pointer to command details structure or NULL + * + * Set event mask (0x0613) + */ +enum ice_status +ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, + struct ice_sq_cd *cd) +{ + struct ice_aqc_set_event_mask *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.set_event_mask; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask); + + cmd->lport_num = port_num; + + cmd->event_mask = cpu_to_le16(mask); + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + /** * ice_aq_set_port_id_led * @pi: pointer to the port information diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 38578f7c3622..fbdfdee353bc 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -89,6 +89,12 @@ enum ice_status ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, struct ice_sq_cd *cd); enum ice_status +ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, + struct ice_link_status *link, struct ice_sq_cd *cd); +enum ice_status +ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, + struct ice_sq_cd *cd); +enum ice_status ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, struct ice_sq_cd *cd); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4731b5d147b7..d0ea4d059a05 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -698,9 +698,6 @@ static void ice_watchdog_subtask(struct ice_pf *pf) pf->serv_tmr_prev = jiffies; - if (ice_link_event(pf, pf->hw.port_info)) - dev_dbg(&pf->pdev->dev, "ice_link_event failed\n"); - /* Update the stats for active netdevs so the network stack * can look at updated numbers whenever it cares to */ @@ -710,6 +707,60 @@ static void ice_watchdog_subtask(struct ice_pf *pf) ice_update_vsi_stats(pf->vsi[i]); } +/** + * ice_init_link_events - enable/initialize link events + * @pi: pointer to the port_info instance + * + * Returns -EIO on failure, 0 on success + */ +static int ice_init_link_events(struct ice_port_info *pi) +{ + u16 mask; + + mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | + ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); + + if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { + dev_dbg(ice_hw_to_dev(pi->hw), + "Failed to set link event mask for port %d\n", + pi->lport); + return -EIO; + } + + if (ice_aq_get_link_info(pi, true, NULL, NULL)) { + dev_dbg(ice_hw_to_dev(pi->hw), + "Failed to enable link events for port %d\n", + pi->lport); + return -EIO; + } + + return 0; +} + +/** + * ice_handle_link_event - handle link event via ARQ + * @pf: pf that the link event is associated with + * + * Return -EINVAL if port_info is null + * Return status on success + */ +static int ice_handle_link_event(struct ice_pf *pf) +{ + struct ice_port_info *port_info; + int status; + + port_info = pf->hw.port_info; + if (!port_info) + return -EINVAL; + + status = ice_link_event(pf, port_info); + if (status) + dev_dbg(&pf->pdev->dev, + "Could not process link event, error %d\n", status); + + return status; +} + /** * __ice_clean_ctrlq - helper function to clean controlq rings * @pf: ptr to struct ice_pf @@ -813,6 +864,11 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) opcode = le16_to_cpu(event.desc.opcode); switch (opcode) { + case ice_aqc_opc_get_link_status: + if (ice_handle_link_event(pf)) + dev_err(&pf->pdev->dev, + "Could not handle link event\n"); + break; case ice_mbx_opc_send_msg_to_pf: ice_vc_process_vf_msg(pf, &event); break; @@ -2267,6 +2323,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) /* since everything is good, start the service timer */ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); + err = ice_init_link_events(pf->hw.port_info); + if (err) { + dev_err(dev, "ice_init_link_events failed: %d\n", err); + goto err_alloc_sw_unroll; + } + ice_verify_cacheline_size(pf); return 0; From 6c869cb7a8f02b0c5f5494bb37c29b6686711ec8 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 13 Feb 2019 10:51:01 -0800 Subject: [PATCH 05/15] ice: Retrieve rx_buf in separate function Introduce ice_get_rx_buf, which will fetch the Rx buffer and do the DMA synchronization. Length of the packet that hardware Rx descriptor contains is now read in ice_clean_rx_irq, so we can feed ice_get_rx_buf with it and resign from rx_desc passed as argument in ice_fetch_rx_buf and ice_add_rx_frag. Signed-off-by: Maciej Fijalkowski Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_txrx.c | 75 +++++++++++++---------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 64baedee6336..8c0a8b63670b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -499,8 +499,8 @@ static bool ice_page_is_reserved(struct page *page) /** * ice_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_buf: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buf to place the data into + * @size: the length of the packet * * This function will add the data contained in rx_buf->page to the skb. * This is done either through a direct copy if the data in the buffer is @@ -511,8 +511,8 @@ static bool ice_page_is_reserved(struct page *page) * true if the buffer can be reused by the adapter. */ static bool -ice_add_rx_frag(struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb) +ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size) { #if (PAGE_SIZE < 8192) unsigned int truesize = ICE_RXBUF_2048; @@ -522,10 +522,6 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *rx_desc, #endif /* PAGE_SIZE < 8192) */ struct page *page; - unsigned int size; - - size = le16_to_cpu(rx_desc->wb.pkt_len) & - ICE_RX_FLX_DESC_PKT_LEN_M; page = rx_buf->page; @@ -603,10 +599,35 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) *new_buf = *old_buf; } +/** + * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use + * @rx_ring: Rx descriptor ring to transact packets on + * @size: size of buffer to add to skb + * + * This function will pull an Rx buffer from the ring and synchronize it + * for use by the CPU. + */ +static struct ice_rx_buf * +ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size) +{ + struct ice_rx_buf *rx_buf; + + rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; + prefetchw(rx_buf->page); + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, + rx_buf->page_offset, size, + DMA_FROM_DEVICE); + + return rx_buf; +} + /** * ice_fetch_rx_buf - Allocate skb and populate it * @rx_ring: Rx descriptor ring to transact packets on - * @rx_desc: descriptor containing info written by hardware + * @rx_buf: Rx buffer to pull data from + * @size: the length of the packet * * This function allocates an skb on the fly, and populates it with the page * data from the current receive descriptor, taking care to set up the skb @@ -614,20 +635,14 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) * necessary. */ static struct sk_buff * -ice_fetch_rx_buf(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) +ice_fetch_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + unsigned int size) { - struct ice_rx_buf *rx_buf; - struct sk_buff *skb; - struct page *page; - - rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; - page = rx_buf->page; - prefetchw(page); - - skb = rx_buf->skb; + struct sk_buff *skb = rx_buf->skb; if (likely(!skb)) { - u8 *page_addr = page_address(page) + rx_buf->page_offset; + u8 *page_addr = page_address(rx_buf->page) + + rx_buf->page_offset; /* prefetch first cache line of first page */ prefetch(page_addr); @@ -644,25 +659,13 @@ ice_fetch_rx_buf(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) return NULL; } - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - skb_record_rx_queue(skb, rx_ring->q_index); } else { - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, - rx_buf->page_offset, - ICE_RXBUF_2048, - DMA_FROM_DEVICE); - rx_buf->skb = NULL; } /* pull page into skb */ - if (ice_add_rx_frag(rx_buf, rx_desc, skb)) { + if (ice_add_rx_frag(rx_buf, skb, size)) { /* hand second half of page back to the ring */ ice_reuse_rx_page(rx_ring, rx_buf); rx_ring->rx_stats.page_reuse_count++; @@ -963,7 +966,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) /* start the loop to process RX packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; + struct ice_rx_buf *rx_buf; struct sk_buff *skb; + unsigned int size; u16 stat_err_bits; u16 vlan_tag = 0; u8 rx_ptype; @@ -993,8 +998,12 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) */ dma_rmb(); + size = le16_to_cpu(rx_desc->wb.pkt_len) & + ICE_RX_FLX_DESC_PKT_LEN_M; + + rx_buf = ice_get_rx_buf(rx_ring, size); /* allocate (if needed) and populate skb */ - skb = ice_fetch_rx_buf(rx_ring, rx_desc); + skb = ice_fetch_rx_buf(rx_ring, rx_buf, size); if (!skb) break; From bbb97808a0eff71fd841d297dba8cd3ebc4d700d Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 13 Feb 2019 10:51:02 -0800 Subject: [PATCH 06/15] ice: Pull out page reuse checks onto separate function Introduce ice_can_reuse_rx_page which will verify whether the page can be reused and return the boolean result to caller. Signed-off-by: Maciej Fijalkowski Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_txrx.c | 80 +++++++++++++---------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8c0a8b63670b..50321d39e463 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -496,6 +496,48 @@ static bool ice_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } +/** + * ice_can_reuse_rx_page - Determine if page can be reused for another Rx + * @rx_buf: buffer containing the page + * @truesize: the offset that needs to be applied to page + * + * If page is reusable, we have a green light for calling ice_reuse_rx_page, + * which will assign the current buffer to the buffer that next_to_alloc is + * pointing to; otherwise, the DMA mapping needs to be destroyed and + * page freed + */ +static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, + unsigned int truesize) +{ + struct page *page = rx_buf->page; + + /* avoid re-using remote pages */ + if (unlikely(ice_page_is_reserved(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buf->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buf->page_offset += truesize; + + if (rx_buf->page_offset > PAGE_SIZE - ICE_RXBUF_2048) + return false; +#endif /* PAGE_SIZE < 8192) */ + + /* Even if we own the page, we are not allowed to use atomic_set() + * This would break get_page_unless_zero() users. + */ + get_page(page); + + return true; +} + /** * ice_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_buf: buffer containing page to add @@ -517,17 +559,9 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, #if (PAGE_SIZE < 8192) unsigned int truesize = ICE_RXBUF_2048; #else - unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; - unsigned int truesize; + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); #endif /* PAGE_SIZE < 8192) */ - - struct page *page; - - page = rx_buf->page; - -#if (PAGE_SIZE >= 8192) - truesize = ALIGN(size, L1_CACHE_BYTES); -#endif /* PAGE_SIZE >= 8192) */ + struct page *page = rx_buf->page; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway @@ -549,31 +583,7 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, rx_buf->page_offset, size, truesize); - /* avoid re-using remote pages */ - if (unlikely(ice_page_is_reserved(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; - - /* flip page offset to other buffer */ - rx_buf->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buf->page_offset += truesize; - - if (rx_buf->page_offset > last_offset) - return false; -#endif /* PAGE_SIZE < 8192) */ - - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buf->page); - - return true; + return ice_can_reuse_rx_page(rx_buf, truesize); } /** From 1857ca42a734d41261f4c30e5f625fa7e2b70b0d Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 13 Feb 2019 10:51:03 -0800 Subject: [PATCH 07/15] ice: Get rid of ice_pull_tail Instead of adding a frag and later when dealing with EOP frame accessing that frag in order to copy the headers onto linear part of skb, we can do this in ice_add_rx_frag in case where the data_len is still 0 and frame won't fit onto the linear part as a whole. Function comment of ice_pull_tail was a bit misleading because of mentioned optimizations that can be performed (drop a frag/maintaining accurate truesize of skb) - it seems that this part of logic was dropped and the comment was not updated to reflect this change. Signed-off-by: Maciej Fijalkowski Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_txrx.c | 70 ++++++++--------------- 1 file changed, 24 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 50321d39e463..becee476002d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -562,13 +562,17 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); #endif /* PAGE_SIZE < 8192) */ struct page *page = rx_buf->page; + unsigned int pull_len; + unsigned char *va; + + va = page_address(page) + rx_buf->page_offset; + if (unlikely(skb_is_nonlinear(skb))) + goto add_tail_frag; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway */ - if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buf->page_offset; - + if (size <= ICE_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); /* page is not reserved, we can reuse buffer as-is */ @@ -580,8 +584,24 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, return false; } + /* we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); + + /* the header from the frame that we're adding as a frag was added to + * linear part of skb so move the pointer past that header and + * reduce the size of data + */ + va += pull_len; + size -= pull_len; + +add_tail_frag: skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buf->page_offset, size, truesize); + (unsigned long)va & ~PAGE_MASK, size, truesize); return ice_can_reuse_rx_page(rx_buf, truesize); } @@ -691,44 +711,6 @@ ice_fetch_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, return skb; } -/** - * ice_pull_tail - ice specific version of skb_pull_tail - * @skb: pointer to current skb being adjusted - * - * This function is an ice specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void ice_pull_tail(struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned int pull_len; - unsigned char *va; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - /** * ice_cleanup_headers - Correct empty headers * @skb: pointer to current skb being fixed @@ -743,10 +725,6 @@ static void ice_pull_tail(struct sk_buff *skb) */ static bool ice_cleanup_headers(struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - ice_pull_tail(skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; From 03c66a1376616015b04b6783feadfcf02ba37c3f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 13 Feb 2019 10:51:04 -0800 Subject: [PATCH 08/15] ice: Introduce bulk update for page count {get,put}_page are atomic operations which we use for page count handling. The current logic for refcount handling is that we increment it when passing a skb with the data from the first half of page up to netstack and recycle the second half of page. This operation protects us from losing a page since the network stack can decrement the refcount of page from skb. The performance can be gently improved by doing the bulk updates of refcount instead of doing it one by one. During the buffer initialization, maximize the page's refcount and don't allow the refcount to become less than two. Signed-off-by: Maciej Fijalkowski Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_txrx.c | 26 +++++++++++++++++------ drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index becee476002d..d003f4d49ae6 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -283,7 +283,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) continue; dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE); - __free_pages(rx_buf->page, 0); + __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); rx_buf->page = NULL; rx_buf->page_offset = 0; @@ -423,6 +423,8 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) bi->dma = dma; bi->page = page; bi->page_offset = 0; + page_ref_add(page, USHRT_MAX - 1); + bi->pagecnt_bias = USHRT_MAX; return true; } @@ -509,6 +511,7 @@ static bool ice_page_is_reserved(struct page *page) static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, unsigned int truesize) { + unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; /* avoid re-using remote pages */ @@ -517,7 +520,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) + if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; /* flip page offset to other buffer */ @@ -530,10 +533,14 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, return false; #endif /* PAGE_SIZE < 8192) */ - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. */ - get_page(page); + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX - 1); + rx_buf->pagecnt_bias = USHRT_MAX; + } return true; } @@ -576,11 +583,12 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); /* page is not reserved, we can reuse buffer as-is */ - if (likely(!ice_page_is_reserved(page))) + if (likely(!ice_page_is_reserved(page))) { + rx_buf->pagecnt_bias++; return true; + } /* this page cannot be reused so discard it */ - __free_pages(page, 0); return false; } @@ -650,6 +658,9 @@ ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size) rx_buf->page_offset, size, DMA_FROM_DEVICE); + /* We have pulled a buffer for use, so decrement pagecnt_bias */ + rx_buf->pagecnt_bias--; + return rx_buf; } @@ -703,6 +714,7 @@ ice_fetch_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, /* we are not reusing the buffer so unmap it */ dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE); + __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); } /* clear contents of buffer_info */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index b7ff0ff82517..43b39e7ce470 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -73,6 +73,7 @@ struct ice_rx_buf { dma_addr_t dma; struct page *page; unsigned int page_offset; + u16 pagecnt_bias; }; struct ice_q_stats { From 1d032bc77bb8f85d8adfd14a1a8c67c12b07eece Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 13 Feb 2019 10:51:05 -0800 Subject: [PATCH 09/15] ice: Gather the rx buf clean-up logic for better reuse Pull out the code responsible for page counting and buffer recycling so that it will be possible to clean up the Rx buffers in cases where we won't allocate skb (ex. XDP) Signed-off-by: Maciej Fijalkowski Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_txrx.c | 76 +++++++++++++++-------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index d003f4d49ae6..122a0af1ea52 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -498,19 +498,42 @@ static bool ice_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } +/** + * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse + * @rx_buf: Rx buffer to adjust + * @size: Size of adjustment + * + * Update the offset within page so that Rx buf will be ready to be reused. + * For systems with PAGE_SIZE < 8192 this function will flip the page offset + * so the second half of page assigned to Rx buffer will be used, otherwise + * the offset is moved by the @size bytes + */ +static void +ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) +{ +#if (PAGE_SIZE < 8192) + /* flip page offset to other buffer */ + rx_buf->page_offset ^= size; +#else + /* move offset up to the next cache line */ + rx_buf->page_offset += size; +#endif +} + /** * ice_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buf: buffer containing the page - * @truesize: the offset that needs to be applied to page * * If page is reusable, we have a green light for calling ice_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is * pointing to; otherwise, the DMA mapping needs to be destroyed and * page freed */ -static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, - unsigned int truesize) +static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) { +#if (PAGE_SIZE >= 8192) + unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; +#endif unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; @@ -522,14 +545,8 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, /* if we are only owner of page we can reuse it */ if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; - - /* flip page offset to other buffer */ - rx_buf->page_offset ^= truesize; #else - /* move offset up to the next cache line */ - rx_buf->page_offset += truesize; - - if (rx_buf->page_offset > PAGE_SIZE - ICE_RXBUF_2048) + if (rx_buf->page_offset > last_offset) return false; #endif /* PAGE_SIZE < 8192) */ @@ -556,10 +573,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, * less than the skb header size, otherwise it will just attach the page as * a frag to the skb. * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. + * The function will then update the page offset */ -static bool +static void ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, unsigned int size) { @@ -582,14 +598,8 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, if (size <= ICE_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!ice_page_is_reserved(page))) { - rx_buf->pagecnt_bias++; - return true; - } - - /* this page cannot be reused so discard it */ - return false; + rx_buf->pagecnt_bias++; + return; } /* we need the header to contain the greater of either ETH_HLEN or @@ -610,8 +620,7 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, add_tail_frag: skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, (unsigned long)va & ~PAGE_MASK, size, truesize); - - return ice_can_reuse_rx_page(rx_buf, truesize); + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); } /** @@ -697,6 +706,7 @@ ice_fetch_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) { rx_ring->rx_stats.alloc_buf_failed++; + rx_buf->pagecnt_bias++; return NULL; } @@ -706,8 +716,23 @@ ice_fetch_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, } /* pull page into skb */ - if (ice_add_rx_frag(rx_buf, skb, size)) { + ice_add_rx_frag(rx_buf, skb, size); + + return skb; +} + +/** + * ice_put_rx_buf - Clean up used buffer and either recycle or free + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buf: Rx buffer to pull data from + * + * This function will clean up the contents of the rx_buf. It will + * either recycle the buffer or unmap it and free the associated resources. + */ +static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +{ /* hand second half of page back to the ring */ + if (ice_can_reuse_rx_page(rx_buf)) { ice_reuse_rx_page(rx_ring, rx_buf); rx_ring->rx_stats.page_reuse_count++; } else { @@ -719,8 +744,6 @@ ice_fetch_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, /* clear contents of buffer_info */ rx_buf->page = NULL; - - return skb; } /** @@ -1007,6 +1030,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) if (!skb) break; + ice_put_rx_buf(rx_ring, rx_buf); cleaned_count++; /* skip if it is NOP desc */ From 712edbbb67d404bae055d88e162e13980b426663 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 13 Feb 2019 10:51:06 -0800 Subject: [PATCH 10/15] ice: Limit the ice_add_rx_frag to frag addition Refactor ice_fetch_rx_buf and ice_add_rx_frag in a way that we have standalone functions that do either the skb construction or frag addition to previously constructed skb. The skb handling between rx_bufs is spread among various functions. The ice_get_rx_buf will retrieve the skb pointer from rx_buf and if it is a NULL pointer then we do the ice_construct_skb, otherwise we add a frag to the current skb via ice_add_rx_frag. Then, on the ice_put_rx_buf the skb pointer that belongs to rx_buf will be cleared. Moving further, if the current frame is not EOP frame we assign the current skb to the rx_buf that is pointed by updated next_to_clean indicator. What is more during the buffer reuse let's assign each member of ice_rx_buf individually so we avoid the unnecessary copy of skb. Last but not least, this logic split will allow us for better code reuse when adding a support for build_skb. Signed-off-by: Maciej Fijalkowski Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_txrx.c | 160 +++++++++++----------- 1 file changed, 79 insertions(+), 81 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 122a0af1ea52..63af5af3c3e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -563,63 +563,29 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) } /** - * ice_add_rx_frag - Add contents of Rx buffer to sk_buff + * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag * @rx_buf: buffer containing page to add - * @skb: sk_buf to place the data into - * @size: the length of the packet + * @skb: sk_buff to place the data into + * @size: packet length from rx_desc * * This function will add the data contained in rx_buf->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset + * It will just attach the page as a frag to the skb. + * The function will then update the page offset. */ static void ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, unsigned int size) { -#if (PAGE_SIZE < 8192) - unsigned int truesize = ICE_RXBUF_2048; +#if (PAGE_SIZE >= 8192) + unsigned int truesize = SKB_DATA_ALIGN(size); #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); -#endif /* PAGE_SIZE < 8192) */ - struct page *page = rx_buf->page; - unsigned int pull_len; - unsigned char *va; + unsigned int truesize = ICE_RXBUF_2048; +#endif - va = page_address(page) + rx_buf->page_offset; - if (unlikely(skb_is_nonlinear(skb))) - goto add_tail_frag; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, + rx_buf->page_offset, size, truesize); - /* will the data fit in the skb we allocated? if so, just - * copy it as it is pretty small anyway - */ - if (size <= ICE_RX_HDR_SIZE) { - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - - rx_buf->pagecnt_bias++; - return; - } - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - - /* the header from the frame that we're adding as a frag was added to - * linear part of skb so move the pointer past that header and - * reduce the size of data - */ - va += pull_len; - size -= pull_len; - -add_tail_frag: - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - (unsigned long)va & ~PAGE_MASK, size, truesize); + /* page is being used so we must update the page offset */ ice_rx_buf_adjust_pg_offset(rx_buf, truesize); } @@ -642,25 +608,34 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) nta++; rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - /* transfer page from old buffer to new buffer */ - *new_buf = *old_buf; + /* Transfer page from old buffer to new buffer. + * Move each member individually to avoid possible store + * forwarding stalls and unnecessary copy of skb. + */ + new_buf->dma = old_buf->dma; + new_buf->page = old_buf->page; + new_buf->page_offset = old_buf->page_offset; + new_buf->pagecnt_bias = old_buf->pagecnt_bias; } /** * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use * @rx_ring: Rx descriptor ring to transact packets on + * @skb: skb to be used * @size: size of buffer to add to skb * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. */ static struct ice_rx_buf * -ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size) +ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, + const unsigned int size) { struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; prefetchw(rx_buf->page); + *skb = rx_buf->skb; /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, @@ -674,50 +649,64 @@ ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size) } /** - * ice_fetch_rx_buf - Allocate skb and populate it + * ice_construct_skb - Allocate skb and populate it * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from * @size: the length of the packet * - * This function allocates an skb on the fly, and populates it with the page - * data from the current receive descriptor, taking care to set up the skb - * correctly, as well as handling calling the page recycle function if - * necessary. + * This function allocates an skb. It then populates it with the page + * data from the current receive descriptor, taking care to set up the + * skb correctly. */ static struct sk_buff * -ice_fetch_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, - unsigned int size) +ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + unsigned int size) { - struct sk_buff *skb = rx_buf->skb; + void *va = page_address(rx_buf->page) + rx_buf->page_offset; + unsigned int headlen; + struct sk_buff *skb; - if (likely(!skb)) { - u8 *page_addr = page_address(rx_buf->page) + - rx_buf->page_offset; - - /* prefetch first cache line of first page */ - prefetch(page_addr); + /* prefetch first cache line of first page */ + prefetch(va); #if L1_CACHE_BYTES < 128 - prefetch((void *)(page_addr + L1_CACHE_BYTES)); + prefetch((u8 *)va + L1_CACHE_BYTES); #endif /* L1_CACHE_BYTES */ - /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - ICE_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_buf_failed++; - rx_buf->pagecnt_bias++; - return NULL; - } + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; - skb_record_rx_queue(skb, rx_ring->q_index); + skb_record_rx_queue(skb, rx_ring->q_index); + /* Determine available headroom for copy */ + headlen = size; + if (headlen > ICE_RX_HDR_SIZE) + headlen = eth_get_headlen(va, ICE_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* if we exhaust the linear part then add what is left as a frag */ + size -= headlen; + if (size) { +#if (PAGE_SIZE >= 8192) + unsigned int truesize = SKB_DATA_ALIGN(size); +#else + unsigned int truesize = ICE_RXBUF_2048; +#endif + skb_add_rx_frag(skb, 0, rx_buf->page, + rx_buf->page_offset + headlen, size, truesize); + /* buffer is used by skb, update page_offset */ + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); } else { - rx_buf->skb = NULL; + /* buffer is unused, reset bias back to rx_buf; data was copied + * onto skb's linear part so there's no need for adjusting + * page offset and we can reuse this buffer as-is + */ + rx_buf->pagecnt_bias++; } - /* pull page into skb */ - ice_add_rx_frag(rx_buf, skb, size); - return skb; } @@ -744,6 +733,7 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) /* clear contents of buffer_info */ rx_buf->page = NULL; + rx_buf->skb = NULL; } /** @@ -1024,11 +1014,19 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) size = le16_to_cpu(rx_desc->wb.pkt_len) & ICE_RX_FLX_DESC_PKT_LEN_M; - rx_buf = ice_get_rx_buf(rx_ring, size); + rx_buf = ice_get_rx_buf(rx_ring, &skb, size); /* allocate (if needed) and populate skb */ - skb = ice_fetch_rx_buf(rx_ring, rx_buf, size); - if (!skb) + if (skb) + ice_add_rx_frag(rx_buf, skb, size); + else + skb = ice_construct_skb(rx_ring, rx_buf, size); + + /* exit if we failed to retrieve a buffer */ + if (!skb) { + rx_ring->rx_stats.alloc_buf_failed++; + rx_buf->pagecnt_bias++; break; + } ice_put_rx_buf(rx_ring, rx_buf); cleaned_count++; From a65f71fed5add2ec5713fcc605842f5f2dff22a3 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 13 Feb 2019 10:51:07 -0800 Subject: [PATCH 11/15] ice: map Rx buffer pages with DMA attributes Provide DMA_ATTR_WEAK_ORDERING and DMA_ATTR_SKIP_CPU_SYNC attributes to the DMA API during the mapping operations on Rx side. With this change the non-x86 platforms will be able to sync only with what is being used (2k buffer) instead of entire page. This should yield a slight performance improvement. Furthermore, DMA unmap may destroy the changes that were made to the buffer by CPU when platform is not a x86 one. DMA_ATTR_SKIP_CPU_SYNC attribute usage fixes this issue. Also add a sync_single_for_device call during the Rx buffer assignment, to make sure that the cache lines are cleared before device attempting to write to the buffer. Signed-off-by: Maciej Fijalkowski Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_txrx.c | 24 +++++++++++++++++++---- drivers/net/ethernet/intel/ice/ice_txrx.h | 3 +++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 63af5af3c3e8..ea4ec3760f8b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -282,7 +282,16 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) if (!rx_buf->page) continue; - dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE); + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(dev, rx_buf->dma, + rx_buf->page_offset, + ICE_RXBUF_2048, DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE, + DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); rx_buf->page = NULL; @@ -409,7 +418,8 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use @@ -454,6 +464,12 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) if (!ice_alloc_mapped_page(rx_ring, bi)) goto no_bufs; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, + ICE_RXBUF_2048, + DMA_FROM_DEVICE); + /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ @@ -726,8 +742,8 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_ring->rx_stats.page_reuse_count++; } else { /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE, - DMA_FROM_DEVICE); + dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE, + DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 43b39e7ce470..bd446ed423e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -47,6 +47,9 @@ #define ICE_TX_FLAGS_VLAN_M 0xffff0000 #define ICE_TX_FLAGS_VLAN_S 16 +#define ICE_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + struct ice_tx_buf { struct ice_tx_desc *next_to_watch; struct sk_buff *skb; From 2ebd4428d93a2f6ce0c813b10a1a43b6a8241fe5 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Wed, 13 Feb 2019 10:51:08 -0800 Subject: [PATCH 12/15] ice: Prevent unintended multiple chain resets In the current implementation of ice_reset_subtask, if multiple reset types are set in the pf->state, the most intrusive one is meant to be performed only, but the bits requesting the other types are not being cleared. This would lead to another reset being performed the next time the service task is scheduled. Change the flow of ice_reset_subtask so that all reset request bits in pf->state are cleared, and we still perform the most intrusive of the resets requested. Signed-off-by: Dave Ertman Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d0ea4d059a05..3588cbb8ccd2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -478,8 +478,14 @@ static void ice_reset_subtask(struct ice_pf *pf) * for the reset now), poll for reset done, rebuild and return. */ if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) { - clear_bit(__ICE_GLOBR_RECV, pf->state); - clear_bit(__ICE_CORER_RECV, pf->state); + /* Perform the largest reset requested */ + if (test_and_clear_bit(__ICE_CORER_RECV, pf->state)) + reset_type = ICE_RESET_CORER; + if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state)) + reset_type = ICE_RESET_GLOBR; + /* return if no valid reset type requested */ + if (reset_type == ICE_RESET_INVAL) + return; if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) ice_prepare_for_reset(pf); From 105e5bc23a3a9de3f6c2d6af116bfc0d2334519a Mon Sep 17 00:00:00 2001 From: Preethi Banala Date: Wed, 13 Feb 2019 10:51:09 -0800 Subject: [PATCH 13/15] ice: change VF VSI tc info along with num_queues Update VF VSI tc info along with vsi->num_txq/num_rxq when VF requests to configure queues. Signed-off-by: Preethi Banala Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 011c52a9442c..9d37484bc60f 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1927,6 +1927,9 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) */ vsi->num_txq = qci->num_queue_pairs; vsi->num_rxq = qci->num_queue_pairs; + /* All queues of VF VSI are in TC 0 */ + vsi->tc_cfg.tc_info[0].qcount_tx = qci->num_queue_pairs; + vsi->tc_cfg.tc_info[0].qcount_rx = qci->num_queue_pairs; if (!ice_vsi_cfg_lan_txqs(vsi) && !ice_vsi_cfg_rxqs(vsi)) aq_ret = 0; From 2bdc97be97136004e4a13d3ade50ad2e6d6c7d44 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Wed, 13 Feb 2019 10:51:10 -0800 Subject: [PATCH 14/15] ice: add and use new ice_for_each_traffic_class() macro There are numerous for() loops iterating over each of the max traffic classes. Use a simple iterator macro instead to make the code cleaner. Signed-off-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_sched.c | 2 +- drivers/net/ethernet/intel/ice/ice_type.h | 3 +++ 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index be67d07b75cb..ca9a8c52a8d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2949,7 +2949,7 @@ ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap, mutex_lock(&pi->sched_lock); - for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + ice_for_each_traffic_class(i) { /* configuration is possible only if TC node is present */ if (!ice_sched_get_tc_node(pi, i)) continue; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7a692f80bda4..a64db22e6ba4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -856,7 +856,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) /* find the (rounded up) power-of-2 of qcount */ pow = order_base_2(qcount_rx); - for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + ice_for_each_traffic_class(i) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { /* TC is not enabled */ vsi->tc_cfg.tc_info[i].qoffset = 0; @@ -1689,7 +1689,7 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset) num_q_grps = 1; /* set up and configure the Tx queues for each enabled TC */ - for (tc = 0; tc < ICE_MAX_TRAFFIC_CLASS; tc++) { + ice_for_each_traffic_class(tc) { if (!(vsi->tc_cfg.ena_tc & BIT(tc))) break; diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index efbebf7a050f..e0218f4c8f0b 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1606,7 +1606,7 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) if (!vsi_ctx) goto exit_sched_rm_vsi_cfg; - for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + ice_for_each_traffic_class(i) { struct ice_sched_node *vsi_node, *tc_node; u8 j = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 584f260f2e4f..3a4e67484487 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -210,6 +210,9 @@ struct ice_nvm_info { #define ICE_MAX_TRAFFIC_CLASS 8 #define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS +#define ice_for_each_traffic_class(_i) \ + for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++) + struct ice_sched_node { struct ice_sched_node *parent; struct ice_sched_node *sibling; /* next sibling in the same layer */ From 86e81794acdfcc48dc6fde79195208e5c8324234 Mon Sep 17 00:00:00 2001 From: Chinh T Cao Date: Wed, 13 Feb 2019 10:51:11 -0800 Subject: [PATCH 15/15] ice: Create a generic name for the ice_rx_flg64_bits structure This structure is used to define the packet flags. These flags are applicable for both TX and RX packet. Thus, this patch changes its name from ice_rx_flag64_bits to ice_flg64_bits, and its member definition. Signed-off-by: Chinh T Cao Reviewed-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 26 +++++++------- .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 34 +++++++++---------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index ca9a8c52a8d6..5e7a31421c0d 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -358,22 +358,22 @@ static void ice_init_flex_flags(struct ice_hw *hw, enum ice_rxdid prof_id) */ case ICE_RXDID_FLEX_NIC: case ICE_RXDID_FLEX_NIC_2: - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_PKT_FRG, - ICE_RXFLG_UDP_GRE, ICE_RXFLG_PKT_DSI, - ICE_RXFLG_FIN, idx++); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_FRG, + ICE_FLG_UDP_GRE, ICE_FLG_PKT_DSI, + ICE_FLG_FIN, idx++); /* flex flag 1 is not used for flexi-flag programming, skipping * these four FLG64 bits. */ - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_SYN, ICE_RXFLG_RST, - ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_PKT_DSI, - ICE_RXFLG_PKT_DSI, ICE_RXFLG_EVLAN_x8100, - ICE_RXFLG_EVLAN_x9100, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_VLAN_x8100, - ICE_RXFLG_TNL_VLAN, ICE_RXFLG_TNL_MAC, - ICE_RXFLG_TNL0, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_TNL1, ICE_RXFLG_TNL2, - ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_SYN, ICE_FLG_RST, + ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx++); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_DSI, + ICE_FLG_PKT_DSI, ICE_FLG_EVLAN_x8100, + ICE_FLG_EVLAN_x9100, idx++); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_VLAN_x8100, + ICE_FLG_TNL_VLAN, ICE_FLG_TNL_MAC, + ICE_FLG_TNL0, idx++); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_TNL1, ICE_FLG_TNL2, + ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx); break; default: diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index ef4c79b5aa32..2e87b69aff4f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -208,23 +208,23 @@ enum ice_flex_rx_mdid { ICE_RX_MDID_HASH_HIGH, }; -/* Rx Flag64 packet flag bits */ -enum ice_rx_flg64_bits { - ICE_RXFLG_PKT_DSI = 0, - ICE_RXFLG_EVLAN_x8100 = 15, - ICE_RXFLG_EVLAN_x9100, - ICE_RXFLG_VLAN_x8100, - ICE_RXFLG_TNL_MAC = 22, - ICE_RXFLG_TNL_VLAN, - ICE_RXFLG_PKT_FRG, - ICE_RXFLG_FIN = 32, - ICE_RXFLG_SYN, - ICE_RXFLG_RST, - ICE_RXFLG_TNL0 = 38, - ICE_RXFLG_TNL1, - ICE_RXFLG_TNL2, - ICE_RXFLG_UDP_GRE, - ICE_RXFLG_RSVD = 63 +/* RX/TX Flag64 packet flag bits */ +enum ice_flg64_bits { + ICE_FLG_PKT_DSI = 0, + ICE_FLG_EVLAN_x8100 = 15, + ICE_FLG_EVLAN_x9100, + ICE_FLG_VLAN_x8100, + ICE_FLG_TNL_MAC = 22, + ICE_FLG_TNL_VLAN, + ICE_FLG_PKT_FRG, + ICE_FLG_FIN = 32, + ICE_FLG_SYN, + ICE_FLG_RST, + ICE_FLG_TNL0 = 38, + ICE_FLG_TNL1, + ICE_FLG_TNL2, + ICE_FLG_UDP_GRE, + ICE_FLG_RSVD = 63 }; /* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */