Merge branch 'inet-frags-avoid-possible-races-at-netns-dismantle'
Eric Dumazet says: ==================== inet: frags: avoid possible races at netns dismantle This patch series fixes a race happening on netns dismantle with frag queues. While rhashtable_free_and_destroy() is running, concurrent timers might run inet_frag_kill() and attempt rhashtable_remove_fast() calls. This is not allowed by rhashtable logic. Since I do not want to add expensive synchronize_rcu() calls in the netns dismantle path, I had to no longer inline netns_frags structures, but dynamically allocate them. The ten first patches make this preparation, so that the last patch clearly shows the fix. As this patch series is not exactly trivial, I chose to target 5.3. We will backport it once soaked a bit. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
8fb91c3550
@ -4,18 +4,22 @@
|
||||
|
||||
#include <linux/rhashtable-types.h>
|
||||
|
||||
struct netns_frags {
|
||||
/* Per netns frag queues directory */
|
||||
struct fqdir {
|
||||
/* sysctls */
|
||||
long high_thresh;
|
||||
long low_thresh;
|
||||
int timeout;
|
||||
int max_dist;
|
||||
struct inet_frags *f;
|
||||
struct net *net;
|
||||
bool dead;
|
||||
|
||||
struct rhashtable rhashtable ____cacheline_aligned_in_smp;
|
||||
|
||||
/* Keep atomic mem on separate cachelines in structs that include it */
|
||||
atomic_long_t mem ____cacheline_aligned_in_smp;
|
||||
struct rcu_work destroy_rwork;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -24,11 +28,13 @@ struct netns_frags {
|
||||
* @INET_FRAG_FIRST_IN: first fragment has arrived
|
||||
* @INET_FRAG_LAST_IN: final fragment has arrived
|
||||
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
|
||||
* @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable
|
||||
*/
|
||||
enum {
|
||||
INET_FRAG_FIRST_IN = BIT(0),
|
||||
INET_FRAG_LAST_IN = BIT(1),
|
||||
INET_FRAG_COMPLETE = BIT(2),
|
||||
INET_FRAG_HASH_DEAD = BIT(3),
|
||||
};
|
||||
|
||||
struct frag_v4_compare_key {
|
||||
@ -64,7 +70,7 @@ struct frag_v6_compare_key {
|
||||
* @meat: length of received fragments so far
|
||||
* @flags: fragment queue flags
|
||||
* @max_size: maximum received fragment size
|
||||
* @net: namespace that this frag belongs to
|
||||
* @fqdir: pointer to struct fqdir
|
||||
* @rcu: rcu head for freeing deferall
|
||||
*/
|
||||
struct inet_frag_queue {
|
||||
@ -84,7 +90,7 @@ struct inet_frag_queue {
|
||||
int meat;
|
||||
__u8 flags;
|
||||
u16 max_size;
|
||||
struct netns_frags *net;
|
||||
struct fqdir *fqdir;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
@ -103,16 +109,30 @@ struct inet_frags {
|
||||
int inet_frags_init(struct inet_frags *);
|
||||
void inet_frags_fini(struct inet_frags *);
|
||||
|
||||
static inline int inet_frags_init_net(struct netns_frags *nf)
|
||||
static inline int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f,
|
||||
struct net *net)
|
||||
{
|
||||
atomic_long_set(&nf->mem, 0);
|
||||
return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
|
||||
struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
|
||||
int res;
|
||||
|
||||
if (!fqdir)
|
||||
return -ENOMEM;
|
||||
fqdir->f = f;
|
||||
fqdir->net = net;
|
||||
res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
|
||||
if (res < 0) {
|
||||
kfree(fqdir);
|
||||
return res;
|
||||
}
|
||||
*fqdirp = fqdir;
|
||||
return 0;
|
||||
}
|
||||
void inet_frags_exit_net(struct netns_frags *nf);
|
||||
|
||||
void fqdir_exit(struct fqdir *fqdir);
|
||||
|
||||
void inet_frag_kill(struct inet_frag_queue *q);
|
||||
void inet_frag_destroy(struct inet_frag_queue *q);
|
||||
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
|
||||
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
|
||||
|
||||
/* Free all skbs in the queue; return the sum of their truesizes. */
|
||||
unsigned int inet_frag_rbtree_purge(struct rb_root *root);
|
||||
@ -125,19 +145,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
|
||||
|
||||
/* Memory Tracking Functions. */
|
||||
|
||||
static inline long frag_mem_limit(const struct netns_frags *nf)
|
||||
static inline long frag_mem_limit(const struct fqdir *fqdir)
|
||||
{
|
||||
return atomic_long_read(&nf->mem);
|
||||
return atomic_long_read(&fqdir->mem);
|
||||
}
|
||||
|
||||
static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
|
||||
static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
|
||||
{
|
||||
atomic_long_sub(val, &nf->mem);
|
||||
atomic_long_sub(val, &fqdir->mem);
|
||||
}
|
||||
|
||||
static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
|
||||
static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
|
||||
{
|
||||
atomic_long_add(val, &nf->mem);
|
||||
atomic_long_add(val, &fqdir->mem);
|
||||
}
|
||||
|
||||
/* RFC 3168 support :
|
||||
|
@ -16,7 +16,7 @@ struct netns_sysctl_lowpan {
|
||||
|
||||
struct netns_ieee802154_lowpan {
|
||||
struct netns_sysctl_lowpan sysctl;
|
||||
struct netns_frags frags;
|
||||
struct fqdir *fqdir;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -72,7 +72,7 @@ struct netns_ipv4 {
|
||||
|
||||
struct inet_peer_base *peers;
|
||||
struct sock * __percpu *tcp_sk;
|
||||
struct netns_frags frags;
|
||||
struct fqdir *fqdir;
|
||||
#ifdef CONFIG_NETFILTER
|
||||
struct xt_table *iptable_filter;
|
||||
struct xt_table *iptable_mangle;
|
||||
|
@ -58,7 +58,7 @@ struct netns_ipv6 {
|
||||
struct ipv6_devconf *devconf_all;
|
||||
struct ipv6_devconf *devconf_dflt;
|
||||
struct inet_peer_base *peers;
|
||||
struct netns_frags frags;
|
||||
struct fqdir *fqdir;
|
||||
#ifdef CONFIG_NETFILTER
|
||||
struct xt_table *ip6table_filter;
|
||||
struct xt_table *ip6table_mangle;
|
||||
@ -116,7 +116,7 @@ struct netns_ipv6 {
|
||||
|
||||
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
|
||||
struct netns_nf_frag {
|
||||
struct netns_frags frags;
|
||||
struct fqdir *fqdir;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -79,7 +79,7 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
|
||||
key.src = *src;
|
||||
key.dst = *dst;
|
||||
|
||||
q = inet_frag_find(&ieee802154_lowpan->frags, &key);
|
||||
q = inet_frag_find(ieee802154_lowpan->fqdir, &key);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
@ -139,7 +139,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
|
||||
fq->q.flags |= INET_FRAG_FIRST_IN;
|
||||
|
||||
fq->q.meat += skb->len;
|
||||
add_frag_mem_limit(fq->q.net, skb->truesize);
|
||||
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
|
||||
|
||||
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
|
||||
fq->q.meat == fq->q.len) {
|
||||
@ -326,23 +326,18 @@ err:
|
||||
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
|
||||
{
|
||||
.procname = "6lowpanfrag_high_thresh",
|
||||
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
|
||||
},
|
||||
{
|
||||
.procname = "6lowpanfrag_low_thresh",
|
||||
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
|
||||
},
|
||||
{
|
||||
.procname = "6lowpanfrag_time",
|
||||
.data = &init_net.ieee802154_lowpan.frags.timeout,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
@ -377,17 +372,17 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
|
||||
if (table == NULL)
|
||||
goto err_alloc;
|
||||
|
||||
table[0].data = &ieee802154_lowpan->frags.high_thresh;
|
||||
table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
|
||||
table[1].data = &ieee802154_lowpan->frags.low_thresh;
|
||||
table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
|
||||
table[2].data = &ieee802154_lowpan->frags.timeout;
|
||||
|
||||
/* Don't export sysctls to unprivileged users */
|
||||
if (net->user_ns != &init_user_ns)
|
||||
table[0].procname = NULL;
|
||||
}
|
||||
|
||||
table[0].data = &ieee802154_lowpan->fqdir->high_thresh;
|
||||
table[0].extra1 = &ieee802154_lowpan->fqdir->low_thresh;
|
||||
table[1].data = &ieee802154_lowpan->fqdir->low_thresh;
|
||||
table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh;
|
||||
table[2].data = &ieee802154_lowpan->fqdir->timeout;
|
||||
|
||||
hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
|
||||
if (hdr == NULL)
|
||||
goto err_reg;
|
||||
@ -454,17 +449,18 @@ static int __net_init lowpan_frags_init_net(struct net *net)
|
||||
net_ieee802154_lowpan(net);
|
||||
int res;
|
||||
|
||||
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
|
||||
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
|
||||
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
|
||||
ieee802154_lowpan->frags.f = &lowpan_frags;
|
||||
|
||||
res = inet_frags_init_net(&ieee802154_lowpan->frags);
|
||||
res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net);
|
||||
if (res < 0)
|
||||
return res;
|
||||
|
||||
ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
|
||||
ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
|
||||
ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT;
|
||||
|
||||
res = lowpan_frags_ns_sysctl_register(net);
|
||||
if (res < 0)
|
||||
inet_frags_exit_net(&ieee802154_lowpan->frags);
|
||||
fqdir_exit(ieee802154_lowpan->fqdir);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -474,7 +470,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
|
||||
net_ieee802154_lowpan(net);
|
||||
|
||||
lowpan_frags_ns_sysctl_unregister(net);
|
||||
inet_frags_exit_net(&ieee802154_lowpan->frags);
|
||||
fqdir_exit(ieee802154_lowpan->fqdir);
|
||||
}
|
||||
|
||||
static struct pernet_operations lowpan_frags_ops = {
|
||||
|
@ -124,34 +124,50 @@ void inet_frags_fini(struct inet_frags *f)
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frags_fini);
|
||||
|
||||
/* called from rhashtable_free_and_destroy() at netns_frags dismantle */
|
||||
static void inet_frags_free_cb(void *ptr, void *arg)
|
||||
{
|
||||
struct inet_frag_queue *fq = ptr;
|
||||
int count;
|
||||
|
||||
/* If we can not cancel the timer, it means this frag_queue
|
||||
* is already disappearing, we have nothing to do.
|
||||
* Otherwise, we own a refcount until the end of this function.
|
||||
*/
|
||||
if (!del_timer(&fq->timer))
|
||||
return;
|
||||
count = del_timer_sync(&fq->timer) ? 1 : 0;
|
||||
|
||||
spin_lock_bh(&fq->lock);
|
||||
if (!(fq->flags & INET_FRAG_COMPLETE)) {
|
||||
fq->flags |= INET_FRAG_COMPLETE;
|
||||
refcount_dec(&fq->refcnt);
|
||||
count++;
|
||||
} else if (fq->flags & INET_FRAG_HASH_DEAD) {
|
||||
count++;
|
||||
}
|
||||
spin_unlock_bh(&fq->lock);
|
||||
|
||||
inet_frag_put(fq);
|
||||
if (refcount_sub_and_test(count, &fq->refcnt))
|
||||
inet_frag_destroy(fq);
|
||||
}
|
||||
|
||||
void inet_frags_exit_net(struct netns_frags *nf)
|
||||
static void fqdir_rwork_fn(struct work_struct *work)
|
||||
{
|
||||
nf->high_thresh = 0; /* prevent creation of new frags */
|
||||
struct fqdir *fqdir = container_of(to_rcu_work(work),
|
||||
struct fqdir, destroy_rwork);
|
||||
|
||||
rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
|
||||
rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
|
||||
kfree(fqdir);
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frags_exit_net);
|
||||
|
||||
void fqdir_exit(struct fqdir *fqdir)
|
||||
{
|
||||
fqdir->high_thresh = 0; /* prevent creation of new frags */
|
||||
|
||||
/* paired with READ_ONCE() in inet_frag_kill() :
|
||||
* We want to prevent rhashtable_remove_fast() calls
|
||||
*/
|
||||
smp_store_release(&fqdir->dead, true);
|
||||
|
||||
INIT_RCU_WORK(&fqdir->destroy_rwork, fqdir_rwork_fn);
|
||||
queue_rcu_work(system_wq, &fqdir->destroy_rwork);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL(fqdir_exit);
|
||||
|
||||
void inet_frag_kill(struct inet_frag_queue *fq)
|
||||
{
|
||||
@ -159,11 +175,21 @@ void inet_frag_kill(struct inet_frag_queue *fq)
|
||||
refcount_dec(&fq->refcnt);
|
||||
|
||||
if (!(fq->flags & INET_FRAG_COMPLETE)) {
|
||||
struct netns_frags *nf = fq->net;
|
||||
struct fqdir *fqdir = fq->fqdir;
|
||||
|
||||
fq->flags |= INET_FRAG_COMPLETE;
|
||||
rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
|
||||
refcount_dec(&fq->refcnt);
|
||||
rcu_read_lock();
|
||||
/* This READ_ONCE() is paired with smp_store_release()
|
||||
* in inet_frags_exit_net().
|
||||
*/
|
||||
if (!READ_ONCE(fqdir->dead)) {
|
||||
rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
|
||||
fqdir->f->rhash_params);
|
||||
refcount_dec(&fq->refcnt);
|
||||
} else {
|
||||
fq->flags |= INET_FRAG_HASH_DEAD;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frag_kill);
|
||||
@ -172,7 +198,7 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
|
||||
rcu);
|
||||
struct inet_frags *f = q->net->f;
|
||||
struct inet_frags *f = q->fqdir->f;
|
||||
|
||||
if (f->destructor)
|
||||
f->destructor(q);
|
||||
@ -203,7 +229,7 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge);
|
||||
|
||||
void inet_frag_destroy(struct inet_frag_queue *q)
|
||||
{
|
||||
struct netns_frags *nf;
|
||||
struct fqdir *fqdir;
|
||||
unsigned int sum, sum_truesize = 0;
|
||||
struct inet_frags *f;
|
||||
|
||||
@ -211,18 +237,18 @@ void inet_frag_destroy(struct inet_frag_queue *q)
|
||||
WARN_ON(del_timer(&q->timer) != 0);
|
||||
|
||||
/* Release all fragment data. */
|
||||
nf = q->net;
|
||||
f = nf->f;
|
||||
fqdir = q->fqdir;
|
||||
f = fqdir->f;
|
||||
sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
|
||||
sum = sum_truesize + f->qsize;
|
||||
|
||||
call_rcu(&q->rcu, inet_frag_destroy_rcu);
|
||||
|
||||
sub_frag_mem_limit(nf, sum);
|
||||
sub_frag_mem_limit(fqdir, sum);
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frag_destroy);
|
||||
|
||||
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
|
||||
static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
|
||||
struct inet_frags *f,
|
||||
void *arg)
|
||||
{
|
||||
@ -232,9 +258,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
q->net = nf;
|
||||
q->fqdir = fqdir;
|
||||
f->constructor(q, arg);
|
||||
add_frag_mem_limit(nf, f->qsize);
|
||||
add_frag_mem_limit(fqdir, f->qsize);
|
||||
|
||||
timer_setup(&q->timer, f->frag_expire, 0);
|
||||
spin_lock_init(&q->lock);
|
||||
@ -243,21 +269,21 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
|
||||
return q;
|
||||
}
|
||||
|
||||
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
|
||||
static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
|
||||
void *arg,
|
||||
struct inet_frag_queue **prev)
|
||||
{
|
||||
struct inet_frags *f = nf->f;
|
||||
struct inet_frags *f = fqdir->f;
|
||||
struct inet_frag_queue *q;
|
||||
|
||||
q = inet_frag_alloc(nf, f, arg);
|
||||
q = inet_frag_alloc(fqdir, f, arg);
|
||||
if (!q) {
|
||||
*prev = ERR_PTR(-ENOMEM);
|
||||
return NULL;
|
||||
}
|
||||
mod_timer(&q->timer, jiffies + nf->timeout);
|
||||
mod_timer(&q->timer, jiffies + fqdir->timeout);
|
||||
|
||||
*prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
|
||||
*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
|
||||
&q->node, f->rhash_params);
|
||||
if (*prev) {
|
||||
q->flags |= INET_FRAG_COMPLETE;
|
||||
@ -269,18 +295,18 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
|
||||
}
|
||||
|
||||
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
|
||||
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
|
||||
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
|
||||
{
|
||||
struct inet_frag_queue *fq = NULL, *prev;
|
||||
|
||||
if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
|
||||
if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
|
||||
return NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
|
||||
prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
|
||||
if (!prev)
|
||||
fq = inet_frag_create(nf, key, &prev);
|
||||
fq = inet_frag_create(fqdir, key, &prev);
|
||||
if (prev && !IS_ERR(prev)) {
|
||||
fq = prev;
|
||||
if (!refcount_inc_not_zero(&fq->refcnt))
|
||||
@ -391,7 +417,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
|
||||
|
||||
delta += head->truesize;
|
||||
if (delta)
|
||||
add_frag_mem_limit(q->net, delta);
|
||||
add_frag_mem_limit(q->fqdir, delta);
|
||||
|
||||
/* If the first fragment is fragmented itself, we split
|
||||
* it to two chunks: the first with data and paged part
|
||||
@ -413,7 +439,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
|
||||
head->truesize += clone->truesize;
|
||||
clone->csum = 0;
|
||||
clone->ip_summed = head->ip_summed;
|
||||
add_frag_mem_limit(q->net, clone->truesize);
|
||||
add_frag_mem_limit(q->fqdir, clone->truesize);
|
||||
skb_shinfo(head)->frag_list = clone;
|
||||
nextp = &clone->next;
|
||||
} else {
|
||||
@ -466,7 +492,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
|
||||
rbn = rbnext;
|
||||
}
|
||||
}
|
||||
sub_frag_mem_limit(q->net, head->truesize);
|
||||
sub_frag_mem_limit(q->fqdir, head->truesize);
|
||||
|
||||
*nextp = NULL;
|
||||
skb_mark_not_on_list(head);
|
||||
@ -494,7 +520,7 @@ struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
|
||||
if (head == q->fragments_tail)
|
||||
q->fragments_tail = NULL;
|
||||
|
||||
sub_frag_mem_limit(q->net, head->truesize);
|
||||
sub_frag_mem_limit(q->fqdir, head->truesize);
|
||||
|
||||
return head;
|
||||
}
|
||||
|
@ -82,15 +82,13 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
|
||||
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
|
||||
{
|
||||
struct ipq *qp = container_of(q, struct ipq, q);
|
||||
struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
|
||||
frags);
|
||||
struct net *net = container_of(ipv4, struct net, ipv4);
|
||||
struct net *net = q->fqdir->net;
|
||||
|
||||
const struct frag_v4_compare_key *key = a;
|
||||
|
||||
q->key.v4 = *key;
|
||||
qp->ecn = 0;
|
||||
qp->peer = q->net->max_dist ?
|
||||
qp->peer = q->fqdir->max_dist ?
|
||||
inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
|
||||
NULL;
|
||||
}
|
||||
@ -142,7 +140,7 @@ static void ip_expire(struct timer_list *t)
|
||||
int err;
|
||||
|
||||
qp = container_of(frag, struct ipq, q);
|
||||
net = container_of(qp->q.net, struct net, ipv4.frags);
|
||||
net = qp->q.fqdir->net;
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock(&qp->q.lock);
|
||||
@ -211,7 +209,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
|
||||
};
|
||||
struct inet_frag_queue *q;
|
||||
|
||||
q = inet_frag_find(&net->ipv4.frags, &key);
|
||||
q = inet_frag_find(net->ipv4.fqdir, &key);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
@ -222,7 +220,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
|
||||
static int ip_frag_too_far(struct ipq *qp)
|
||||
{
|
||||
struct inet_peer *peer = qp->peer;
|
||||
unsigned int max = qp->q.net->max_dist;
|
||||
unsigned int max = qp->q.fqdir->max_dist;
|
||||
unsigned int start, end;
|
||||
|
||||
int rc;
|
||||
@ -236,12 +234,8 @@ static int ip_frag_too_far(struct ipq *qp)
|
||||
|
||||
rc = qp->q.fragments_tail && (end - start) > max;
|
||||
|
||||
if (rc) {
|
||||
struct net *net;
|
||||
|
||||
net = container_of(qp->q.net, struct net, ipv4.frags);
|
||||
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
|
||||
}
|
||||
if (rc)
|
||||
__IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -250,13 +244,13 @@ static int ip_frag_reinit(struct ipq *qp)
|
||||
{
|
||||
unsigned int sum_truesize = 0;
|
||||
|
||||
if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
|
||||
if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
|
||||
refcount_inc(&qp->q.refcnt);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
|
||||
sub_frag_mem_limit(qp->q.net, sum_truesize);
|
||||
sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
|
||||
|
||||
qp->q.flags = 0;
|
||||
qp->q.len = 0;
|
||||
@ -273,7 +267,7 @@ static int ip_frag_reinit(struct ipq *qp)
|
||||
/* Add new segment to existing queue. */
|
||||
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
|
||||
{
|
||||
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
|
||||
struct net *net = qp->q.fqdir->net;
|
||||
int ihl, end, flags, offset;
|
||||
struct sk_buff *prev_tail;
|
||||
struct net_device *dev;
|
||||
@ -352,7 +346,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
|
||||
qp->q.stamp = skb->tstamp;
|
||||
qp->q.meat += skb->len;
|
||||
qp->ecn |= ecn;
|
||||
add_frag_mem_limit(qp->q.net, skb->truesize);
|
||||
add_frag_mem_limit(qp->q.fqdir, skb->truesize);
|
||||
if (offset == 0)
|
||||
qp->q.flags |= INET_FRAG_FIRST_IN;
|
||||
|
||||
@ -399,7 +393,7 @@ err:
|
||||
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
|
||||
struct sk_buff *prev_tail, struct net_device *dev)
|
||||
{
|
||||
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
|
||||
struct net *net = qp->q.fqdir->net;
|
||||
struct iphdr *iph;
|
||||
void *reasm_data;
|
||||
int len, err;
|
||||
@ -544,30 +538,24 @@ static int dist_min;
|
||||
static struct ctl_table ip4_frags_ns_ctl_table[] = {
|
||||
{
|
||||
.procname = "ipfrag_high_thresh",
|
||||
.data = &init_net.ipv4.frags.high_thresh,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra1 = &init_net.ipv4.frags.low_thresh
|
||||
},
|
||||
{
|
||||
.procname = "ipfrag_low_thresh",
|
||||
.data = &init_net.ipv4.frags.low_thresh,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra2 = &init_net.ipv4.frags.high_thresh
|
||||
},
|
||||
{
|
||||
.procname = "ipfrag_time",
|
||||
.data = &init_net.ipv4.frags.timeout,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
},
|
||||
{
|
||||
.procname = "ipfrag_max_dist",
|
||||
.data = &init_net.ipv4.frags.max_dist,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
@ -600,13 +588,13 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
|
||||
if (!table)
|
||||
goto err_alloc;
|
||||
|
||||
table[0].data = &net->ipv4.frags.high_thresh;
|
||||
table[0].extra1 = &net->ipv4.frags.low_thresh;
|
||||
table[1].data = &net->ipv4.frags.low_thresh;
|
||||
table[1].extra2 = &net->ipv4.frags.high_thresh;
|
||||
table[2].data = &net->ipv4.frags.timeout;
|
||||
table[3].data = &net->ipv4.frags.max_dist;
|
||||
}
|
||||
table[0].data = &net->ipv4.fqdir->high_thresh;
|
||||
table[0].extra1 = &net->ipv4.fqdir->low_thresh;
|
||||
table[1].data = &net->ipv4.fqdir->low_thresh;
|
||||
table[1].extra2 = &net->ipv4.fqdir->high_thresh;
|
||||
table[2].data = &net->ipv4.fqdir->timeout;
|
||||
table[3].data = &net->ipv4.fqdir->max_dist;
|
||||
|
||||
hdr = register_net_sysctl(net, "net/ipv4", table);
|
||||
if (!hdr)
|
||||
@ -654,6 +642,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
|
||||
{
|
||||
int res;
|
||||
|
||||
res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net);
|
||||
if (res < 0)
|
||||
return res;
|
||||
/* Fragment cache limits.
|
||||
*
|
||||
* The fragment memory accounting code, (tries to) account for
|
||||
@ -668,31 +659,27 @@ static int __net_init ipv4_frags_init_net(struct net *net)
|
||||
* we will prune down to 3MB, making room for approx 8 big 64K
|
||||
* fragments 8x128k.
|
||||
*/
|
||||
net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
|
||||
net->ipv4.frags.low_thresh = 3 * 1024 * 1024;
|
||||
net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024;
|
||||
net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024;
|
||||
/*
|
||||
* Important NOTE! Fragment queue must be destroyed before MSL expires.
|
||||
* RFC791 is wrong proposing to prolongate timer each fragment arrival
|
||||
* by TTL.
|
||||
*/
|
||||
net->ipv4.frags.timeout = IP_FRAG_TIME;
|
||||
net->ipv4.fqdir->timeout = IP_FRAG_TIME;
|
||||
|
||||
net->ipv4.frags.max_dist = 64;
|
||||
net->ipv4.frags.f = &ip4_frags;
|
||||
net->ipv4.fqdir->max_dist = 64;
|
||||
|
||||
res = inet_frags_init_net(&net->ipv4.frags);
|
||||
if (res < 0)
|
||||
return res;
|
||||
res = ip4_frags_ns_ctl_register(net);
|
||||
if (res < 0)
|
||||
inet_frags_exit_net(&net->ipv4.frags);
|
||||
fqdir_exit(net->ipv4.fqdir);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void __net_exit ipv4_frags_exit_net(struct net *net)
|
||||
{
|
||||
ip4_frags_ns_ctl_unregister(net);
|
||||
inet_frags_exit_net(&net->ipv4.frags);
|
||||
fqdir_exit(net->ipv4.fqdir);
|
||||
}
|
||||
|
||||
static struct pernet_operations ip4_frags_ops = {
|
||||
|
@ -72,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
|
||||
seq_printf(seq, "RAW: inuse %d\n",
|
||||
sock_prot_inuse_get(net, &raw_prot));
|
||||
seq_printf(seq, "FRAG: inuse %u memory %lu\n",
|
||||
atomic_read(&net->ipv4.frags.rhashtable.nelems),
|
||||
frag_mem_limit(&net->ipv4.frags));
|
||||
atomic_read(&net->ipv4.fqdir->rhashtable.nelems),
|
||||
frag_mem_limit(net->ipv4.fqdir));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -58,26 +58,21 @@ static struct inet_frags nf_frags;
|
||||
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
|
||||
{
|
||||
.procname = "nf_conntrack_frag6_timeout",
|
||||
.data = &init_net.nf_frag.frags.timeout,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
},
|
||||
{
|
||||
.procname = "nf_conntrack_frag6_low_thresh",
|
||||
.data = &init_net.nf_frag.frags.low_thresh,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra2 = &init_net.nf_frag.frags.high_thresh
|
||||
},
|
||||
{
|
||||
.procname = "nf_conntrack_frag6_high_thresh",
|
||||
.data = &init_net.nf_frag.frags.high_thresh,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra1 = &init_net.nf_frag.frags.low_thresh
|
||||
},
|
||||
{ }
|
||||
};
|
||||
@ -93,15 +88,15 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
|
||||
GFP_KERNEL);
|
||||
if (table == NULL)
|
||||
goto err_alloc;
|
||||
|
||||
table[0].data = &net->nf_frag.frags.timeout;
|
||||
table[1].data = &net->nf_frag.frags.low_thresh;
|
||||
table[1].extra2 = &net->nf_frag.frags.high_thresh;
|
||||
table[2].data = &net->nf_frag.frags.high_thresh;
|
||||
table[2].extra1 = &net->nf_frag.frags.low_thresh;
|
||||
table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
|
||||
}
|
||||
|
||||
table[0].data = &net->nf_frag.fqdir->timeout;
|
||||
table[1].data = &net->nf_frag.fqdir->low_thresh;
|
||||
table[1].extra2 = &net->nf_frag.fqdir->high_thresh;
|
||||
table[2].data = &net->nf_frag.fqdir->high_thresh;
|
||||
table[2].extra1 = &net->nf_frag.fqdir->low_thresh;
|
||||
table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh;
|
||||
|
||||
hdr = register_net_sysctl(net, "net/netfilter", table);
|
||||
if (hdr == NULL)
|
||||
goto err_reg;
|
||||
@ -148,12 +143,10 @@ static void nf_ct_frag6_expire(struct timer_list *t)
|
||||
{
|
||||
struct inet_frag_queue *frag = from_timer(frag, t, timer);
|
||||
struct frag_queue *fq;
|
||||
struct net *net;
|
||||
|
||||
fq = container_of(frag, struct frag_queue, q);
|
||||
net = container_of(fq->q.net, struct net, nf_frag.frags);
|
||||
|
||||
ip6frag_expire_frag_queue(net, fq);
|
||||
ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
|
||||
}
|
||||
|
||||
/* Creation primitives. */
|
||||
@ -169,7 +162,7 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
|
||||
};
|
||||
struct inet_frag_queue *q;
|
||||
|
||||
q = inet_frag_find(&net->nf_frag.frags, &key);
|
||||
q = inet_frag_find(net->nf_frag.fqdir, &key);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
@ -276,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
|
||||
fq->ecn |= ecn;
|
||||
if (payload_len > fq->q.max_size)
|
||||
fq->q.max_size = payload_len;
|
||||
add_frag_mem_limit(fq->q.net, skb->truesize);
|
||||
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
|
||||
|
||||
/* The first fragment.
|
||||
* nhoffset is obtained from the first fragment, of course.
|
||||
@ -496,24 +489,24 @@ static int nf_ct_net_init(struct net *net)
|
||||
{
|
||||
int res;
|
||||
|
||||
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
|
||||
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
|
||||
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
|
||||
net->nf_frag.frags.f = &nf_frags;
|
||||
|
||||
res = inet_frags_init_net(&net->nf_frag.frags);
|
||||
res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
|
||||
if (res < 0)
|
||||
return res;
|
||||
|
||||
net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
|
||||
net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
|
||||
net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
|
||||
|
||||
res = nf_ct_frag6_sysctl_register(net);
|
||||
if (res < 0)
|
||||
inet_frags_exit_net(&net->nf_frag.frags);
|
||||
fqdir_exit(net->nf_frag.fqdir);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void nf_ct_net_exit(struct net *net)
|
||||
{
|
||||
nf_ct_frags6_sysctl_unregister(net);
|
||||
inet_frags_exit_net(&net->nf_frag.frags);
|
||||
fqdir_exit(net->nf_frag.fqdir);
|
||||
}
|
||||
|
||||
static struct pernet_operations nf_ct_net_ops = {
|
||||
|
@ -48,8 +48,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
|
||||
seq_printf(seq, "RAW6: inuse %d\n",
|
||||
sock_prot_inuse_get(net, &rawv6_prot));
|
||||
seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
|
||||
atomic_read(&net->ipv6.frags.rhashtable.nelems),
|
||||
frag_mem_limit(&net->ipv6.frags));
|
||||
atomic_read(&net->ipv6.fqdir->rhashtable.nelems),
|
||||
frag_mem_limit(net->ipv6.fqdir));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -76,12 +76,10 @@ static void ip6_frag_expire(struct timer_list *t)
|
||||
{
|
||||
struct inet_frag_queue *frag = from_timer(frag, t, timer);
|
||||
struct frag_queue *fq;
|
||||
struct net *net;
|
||||
|
||||
fq = container_of(frag, struct frag_queue, q);
|
||||
net = container_of(fq->q.net, struct net, ipv6.frags);
|
||||
|
||||
ip6frag_expire_frag_queue(net, fq);
|
||||
ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
|
||||
}
|
||||
|
||||
static struct frag_queue *
|
||||
@ -100,7 +98,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
|
||||
IPV6_ADDR_LINKLOCAL)))
|
||||
key.iif = 0;
|
||||
|
||||
q = inet_frag_find(&net->ipv6.frags, &key);
|
||||
q = inet_frag_find(net->ipv6.fqdir, &key);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
@ -200,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
|
||||
fq->q.stamp = skb->tstamp;
|
||||
fq->q.meat += skb->len;
|
||||
fq->ecn |= ecn;
|
||||
add_frag_mem_limit(fq->q.net, skb->truesize);
|
||||
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
|
||||
|
||||
fragsize = -skb_network_offset(skb) + skb->len;
|
||||
if (fragsize > fq->q.max_size)
|
||||
@ -254,7 +252,7 @@ err:
|
||||
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
|
||||
struct sk_buff *prev_tail, struct net_device *dev)
|
||||
{
|
||||
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
|
||||
struct net *net = fq->q.fqdir->net;
|
||||
unsigned int nhoff;
|
||||
void *reasm_data;
|
||||
int payload_len;
|
||||
@ -401,23 +399,18 @@ static const struct inet6_protocol frag_protocol = {
|
||||
static struct ctl_table ip6_frags_ns_ctl_table[] = {
|
||||
{
|
||||
.procname = "ip6frag_high_thresh",
|
||||
.data = &init_net.ipv6.frags.high_thresh,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra1 = &init_net.ipv6.frags.low_thresh
|
||||
},
|
||||
{
|
||||
.procname = "ip6frag_low_thresh",
|
||||
.data = &init_net.ipv6.frags.low_thresh,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
.extra2 = &init_net.ipv6.frags.high_thresh
|
||||
},
|
||||
{
|
||||
.procname = "ip6frag_time",
|
||||
.data = &init_net.ipv6.frags.timeout,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
@ -449,12 +442,12 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
|
||||
if (!table)
|
||||
goto err_alloc;
|
||||
|
||||
table[0].data = &net->ipv6.frags.high_thresh;
|
||||
table[0].extra1 = &net->ipv6.frags.low_thresh;
|
||||
table[1].data = &net->ipv6.frags.low_thresh;
|
||||
table[1].extra2 = &net->ipv6.frags.high_thresh;
|
||||
table[2].data = &net->ipv6.frags.timeout;
|
||||
}
|
||||
table[0].data = &net->ipv6.fqdir->high_thresh;
|
||||
table[0].extra1 = &net->ipv6.fqdir->low_thresh;
|
||||
table[1].data = &net->ipv6.fqdir->low_thresh;
|
||||
table[1].extra2 = &net->ipv6.fqdir->high_thresh;
|
||||
table[2].data = &net->ipv6.fqdir->timeout;
|
||||
|
||||
hdr = register_net_sysctl(net, "net/ipv6", table);
|
||||
if (!hdr)
|
||||
@ -517,25 +510,24 @@ static int __net_init ipv6_frags_init_net(struct net *net)
|
||||
{
|
||||
int res;
|
||||
|
||||
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
|
||||
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
|
||||
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
|
||||
net->ipv6.frags.f = &ip6_frags;
|
||||
|
||||
res = inet_frags_init_net(&net->ipv6.frags);
|
||||
res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
|
||||
if (res < 0)
|
||||
return res;
|
||||
|
||||
net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
|
||||
net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
|
||||
net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
|
||||
|
||||
res = ip6_frags_ns_sysctl_register(net);
|
||||
if (res < 0)
|
||||
inet_frags_exit_net(&net->ipv6.frags);
|
||||
fqdir_exit(net->ipv6.fqdir);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void __net_exit ipv6_frags_exit_net(struct net *net)
|
||||
{
|
||||
ip6_frags_ns_sysctl_unregister(net);
|
||||
inet_frags_exit_net(&net->ipv6.frags);
|
||||
fqdir_exit(net->ipv6.fqdir);
|
||||
}
|
||||
|
||||
static struct pernet_operations ip6_frags_ops = {
|
||||
|
Loading…
Reference in New Issue
Block a user