[NET]: Implement SKB fast cloning.

Protocols that make extensive use of SKB cloning,
for example TCP, eat at least 2 allocations per
packet sent as a result.

To cut the kmalloc() count in half, we implement
a pre-allocation scheme wherein we allocate
2 sk_buff objects in advance, then use a simple
reference count to free up the memory at the
correct time.

Based upon an initial patch by Thomas Graf and
suggestions from Herbert Xu.

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2005-08-17 14:57:30 -07:00 committed by David S. Miller
parent e92ae93a8a
commit d179cd1292
4 changed files with 97 additions and 15 deletions

View File

@ -162,6 +162,13 @@ struct skb_timeval {
u32 off_usec;
};
enum {
SKB_FCLONE_UNAVAILABLE,
SKB_FCLONE_ORIG,
SKB_FCLONE_CLONE,
};
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
@ -255,7 +262,8 @@ struct sk_buff {
ip_summed:2,
nohdr:1,
nfctinfo:3;
__u8 pkt_type;
__u8 pkt_type:3,
fclone:2;
__be16 protocol;
void (*destructor)(struct sk_buff *skb);
@ -295,8 +303,20 @@ struct sk_buff {
#include <asm/system.h>
extern void __kfree_skb(struct sk_buff *skb);
extern struct sk_buff *alloc_skb(unsigned int size,
unsigned int __nocast priority);
extern struct sk_buff *__alloc_skb(unsigned int size,
unsigned int __nocast priority, int fclone);
static inline struct sk_buff *alloc_skb(unsigned int size,
unsigned int __nocast priority)
{
return __alloc_skb(size, priority, 0);
}
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
unsigned int __nocast priority)
{
return __alloc_skb(size, priority, 1);
}
extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
unsigned int size,
unsigned int __nocast priority);

View File

@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
int hdr_len;
hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
skb = alloc_skb(size + hdr_len, gfp);
skb = alloc_skb_fclone(size + hdr_len, gfp);
if (skb) {
skb->truesize += mem;
if (sk->sk_forward_alloc >= (int)skb->truesize ||

View File

@ -69,6 +69,7 @@
#include <asm/system.h>
static kmem_cache_t *skbuff_head_cache;
static kmem_cache_t *skbuff_fclone_cache;
struct timeval __read_mostly skb_tv_base;
@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
*/
/**
* alloc_skb - allocate a network buffer
* __alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
*
@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
* Buffers may only be allocated from interrupts using a @gfp_mask of
* %GFP_ATOMIC.
*/
struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
int fclone)
{
struct sk_buff *skb;
u8 *data;
/* Get the HEAD */
if (fclone)
skb = kmem_cache_alloc(skbuff_fclone_cache,
gfp_mask & ~__GFP_DMA);
else
skb = kmem_cache_alloc(skbuff_head_cache,
gfp_mask & ~__GFP_DMA);
if (!skb)
goto out;
@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
skb->data = data;
skb->tail = data;
skb->end = data + size;
if (fclone) {
struct sk_buff *child = skb + 1;
atomic_t *fclone_ref = (atomic_t *) (child + 1);
skb->fclone = SKB_FCLONE_ORIG;
atomic_set(fclone_ref, 1);
child->fclone = SKB_FCLONE_UNAVAILABLE;
}
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->tso_size = 0;
@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *skb)
*/
void kfree_skbmem(struct sk_buff *skb)
{
struct sk_buff *other;
atomic_t *fclone_ref;
skb_release_data(skb);
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
kmem_cache_free(skbuff_head_cache, skb);
break;
case SKB_FCLONE_ORIG:
fclone_ref = (atomic_t *) (skb + 2);
if (atomic_dec_and_test(fclone_ref))
kmem_cache_free(skbuff_fclone_cache, skb);
break;
case SKB_FCLONE_CLONE:
fclone_ref = (atomic_t *) (skb + 1);
other = skb - 1;
/* The clone portion is available for
* fast-cloning again.
*/
skb->fclone = SKB_FCLONE_UNAVAILABLE;
if (atomic_dec_and_test(fclone_ref))
kmem_cache_free(skbuff_fclone_cache, other);
break;
};
}
/**
@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb)
struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
struct sk_buff *n;
n = skb + 1;
if (skb->fclone == SKB_FCLONE_ORIG &&
n->fclone == SKB_FCLONE_UNAVAILABLE) {
atomic_t *fclone_ref = (atomic_t *) (n + 1);
n->fclone = SKB_FCLONE_CLONE;
atomic_inc(fclone_ref);
} else {
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (!n)
return NULL;
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
#define C(x) n->x = skb->x
@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mac.raw = old->mac.raw + offset;
memcpy(new->cb, old->cb, sizeof(old->cb));
new->local_df = old->local_df;
new->fclone = SKB_FCLONE_UNAVAILABLE;
new->pkt_type = old->pkt_type;
new->tstamp = old->tstamp;
new->destructor = NULL;
@ -1647,13 +1699,23 @@ void __init skb_init(void)
NULL, NULL);
if (!skbuff_head_cache)
panic("cannot create skbuff cache");
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
(2*sizeof(struct sk_buff)) +
sizeof(atomic_t),
0,
SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!skbuff_fclone_cache)
panic("cannot create skbuff cache");
do_gettimeofday(&skb_tv_base);
}
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
EXPORT_SYMBOL(alloc_skb);
EXPORT_SYMBOL(__alloc_skb);
EXPORT_SYMBOL(pskb_copy);
EXPORT_SYMBOL(pskb_expand_head);
EXPORT_SYMBOL(skb_checksum);

View File

@ -1582,7 +1582,7 @@ void tcp_send_fin(struct sock *sk)
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
if (skb)
break;
yield();
@ -1804,7 +1804,7 @@ int tcp_connect(struct sock *sk)
tcp_connect_init(sk);
buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
if (unlikely(buff == NULL))
return -ENOBUFS;