tcp: add one skb cache for tx
On hosts with a lot of cores, RPC workloads suffer from heavy contention on slab spinlocks. 20.69% [kernel] [k] queued_spin_lock_slowpath 5.64% [kernel] [k] _raw_spin_lock 3.83% [kernel] [k] syscall_return_via_sysret 3.48% [kernel] [k] __entry_text_start 1.76% [kernel] [k] __netif_receive_skb_core 1.64% [kernel] [k] __fget For each sendmsg(), we allocate one skb, and free it at the time ACK packet comes. In many cases, ACK packets are handled by another cpus, and this unfortunately incurs heavy costs for slab layer. This patch uses an extra pointer in socket structure, so that we try to reuse the same skb and avoid these expensive costs. We cache at most one skb per socket so this should be safe as far as memory pressure is concerned. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
dc05360fee
commit
472c2e07ee
@ -414,6 +414,7 @@ struct sock {
|
|||||||
struct sk_buff *sk_send_head;
|
struct sk_buff *sk_send_head;
|
||||||
struct rb_root tcp_rtx_queue;
|
struct rb_root tcp_rtx_queue;
|
||||||
};
|
};
|
||||||
|
struct sk_buff *sk_tx_skb_cache;
|
||||||
struct sk_buff_head sk_write_queue;
|
struct sk_buff_head sk_write_queue;
|
||||||
__s32 sk_peek_off;
|
__s32 sk_peek_off;
|
||||||
int sk_write_pending;
|
int sk_write_pending;
|
||||||
@ -1463,6 +1464,10 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
|
|||||||
|
|
||||||
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
|
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
|
if (!sk->sk_tx_skb_cache) {
|
||||||
|
sk->sk_tx_skb_cache = skb;
|
||||||
|
return;
|
||||||
|
}
|
||||||
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
|
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
|
||||||
sk->sk_wmem_queued -= skb->truesize;
|
sk->sk_wmem_queued -= skb->truesize;
|
||||||
sk_mem_uncharge(sk, skb->truesize);
|
sk_mem_uncharge(sk, skb->truesize);
|
||||||
|
@ -865,6 +865,21 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
|
|||||||
{
|
{
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
|
||||||
|
skb = sk->sk_tx_skb_cache;
|
||||||
|
if (skb && !size) {
|
||||||
|
const struct sk_buff_fclones *fclones;
|
||||||
|
|
||||||
|
fclones = container_of(skb, struct sk_buff_fclones, skb1);
|
||||||
|
if (refcount_read(&fclones->fclone_ref) == 1) {
|
||||||
|
sk->sk_wmem_queued -= skb->truesize;
|
||||||
|
sk_mem_uncharge(sk, skb->truesize);
|
||||||
|
skb->truesize -= skb->data_len;
|
||||||
|
sk->sk_tx_skb_cache = NULL;
|
||||||
|
pskb_trim(skb, 0);
|
||||||
|
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
}
|
||||||
/* The TCP header must be at least 32-bit aligned. */
|
/* The TCP header must be at least 32-bit aligned. */
|
||||||
size = ALIGN(size, 4);
|
size = ALIGN(size, 4);
|
||||||
|
|
||||||
@ -1098,30 +1113,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(tcp_sendpage);
|
EXPORT_SYMBOL(tcp_sendpage);
|
||||||
|
|
||||||
/* Do not bother using a page frag for very small frames.
|
|
||||||
* But use this heuristic only for the first skb in write queue.
|
|
||||||
*
|
|
||||||
* Having no payload in skb->head allows better SACK shifting
|
|
||||||
* in tcp_shift_skb_data(), reducing sack/rack overhead, because
|
|
||||||
* write queue has less skbs.
|
|
||||||
* Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB.
|
|
||||||
* This also speeds up tso_fragment(), since it wont fallback
|
|
||||||
* to tcp_fragment().
|
|
||||||
*/
|
|
||||||
static int linear_payload_sz(bool first_skb)
|
|
||||||
{
|
|
||||||
if (first_skb)
|
|
||||||
return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int select_size(bool first_skb, bool zc)
|
|
||||||
{
|
|
||||||
if (zc)
|
|
||||||
return 0;
|
|
||||||
return linear_payload_sz(first_skb);
|
|
||||||
}
|
|
||||||
|
|
||||||
void tcp_free_fastopen_req(struct tcp_sock *tp)
|
void tcp_free_fastopen_req(struct tcp_sock *tp)
|
||||||
{
|
{
|
||||||
if (tp->fastopen_req) {
|
if (tp->fastopen_req) {
|
||||||
@ -1272,7 +1263,6 @@ restart:
|
|||||||
|
|
||||||
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
|
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
|
||||||
bool first_skb;
|
bool first_skb;
|
||||||
int linear;
|
|
||||||
|
|
||||||
new_segment:
|
new_segment:
|
||||||
if (!sk_stream_memory_free(sk))
|
if (!sk_stream_memory_free(sk))
|
||||||
@ -1283,8 +1273,7 @@ new_segment:
|
|||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
first_skb = tcp_rtx_and_write_queues_empty(sk);
|
first_skb = tcp_rtx_and_write_queues_empty(sk);
|
||||||
linear = select_size(first_skb, zc);
|
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
|
||||||
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
|
|
||||||
first_skb);
|
first_skb);
|
||||||
if (!skb)
|
if (!skb)
|
||||||
goto wait_for_memory;
|
goto wait_for_memory;
|
||||||
@ -2552,6 +2541,13 @@ void tcp_write_queue_purge(struct sock *sk)
|
|||||||
sk_wmem_free_skb(sk, skb);
|
sk_wmem_free_skb(sk, skb);
|
||||||
}
|
}
|
||||||
tcp_rtx_queue_purge(sk);
|
tcp_rtx_queue_purge(sk);
|
||||||
|
skb = sk->sk_tx_skb_cache;
|
||||||
|
if (skb) {
|
||||||
|
sk->sk_wmem_queued -= skb->truesize;
|
||||||
|
sk_mem_uncharge(sk, skb->truesize);
|
||||||
|
__kfree_skb(skb);
|
||||||
|
sk->sk_tx_skb_cache = NULL;
|
||||||
|
}
|
||||||
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
|
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
|
||||||
sk_mem_reclaim(sk);
|
sk_mem_reclaim(sk);
|
||||||
tcp_clear_all_retrans_hints(tcp_sk(sk));
|
tcp_clear_all_retrans_hints(tcp_sk(sk));
|
||||||
|
Loading…
Reference in New Issue
Block a user