netfilter: xtables: make ip_tables reentrant
Currently, the table traverser stores return addresses in the ruleset itself (struct ip6t_entry->comefrom). This has a well-known drawback: the jumpstack is overwritten on reentry, making it necessary for targets to return absolute verdicts. Also, the ruleset (which might be heavy memory-wise) needs to be replicated for each CPU that can possibly invoke ip6t_do_table. This patch decouples the jumpstack from struct ip6t_entry and instead puts it into xt_table_info. Not being restricted by 'comefrom' anymore, we can set up a stack as needed. By default, there is room allocated for two entries into the traverser. arp_tables is not touched though, because there is just one/two modules and further patches seek to collapse the table traverser anyhow. Signed-off-by: Jan Engelhardt <jengelh@medozas.de> Signed-off-by: Patrick McHardy <kaber@trash.net>
This commit is contained in:
parent
e281b19897
commit
f3c5c1bfd4
@ -401,6 +401,13 @@ struct xt_table_info {
|
||||
unsigned int hook_entry[NF_INET_NUMHOOKS];
|
||||
unsigned int underflow[NF_INET_NUMHOOKS];
|
||||
|
||||
/*
|
||||
* Number of user chains. Since tables cannot have loops, at most
|
||||
* @stacksize jumps (number of user chains) can possibly be made.
|
||||
*/
|
||||
unsigned int stacksize;
|
||||
unsigned int *stackptr;
|
||||
void ***jumpstack;
|
||||
/* ipt_entry tables: one per CPU */
|
||||
/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
|
||||
void *entries[1];
|
||||
|
@ -649,6 +649,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
|
||||
if (ret != 0)
|
||||
break;
|
||||
++i;
|
||||
if (strcmp(arpt_get_target(iter)->u.user.name,
|
||||
XT_ERROR_TARGET) == 0)
|
||||
++newinfo->stacksize;
|
||||
}
|
||||
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
|
||||
if (ret != 0)
|
||||
@ -1774,8 +1777,7 @@ struct xt_table *arpt_register_table(struct net *net,
|
||||
{
|
||||
int ret;
|
||||
struct xt_table_info *newinfo;
|
||||
struct xt_table_info bootstrap
|
||||
= { 0, 0, 0, { 0 }, { 0 }, { } };
|
||||
struct xt_table_info bootstrap = {0};
|
||||
void *loc_cpu_entry;
|
||||
struct xt_table *new_table;
|
||||
|
||||
|
@ -321,8 +321,6 @@ ipt_do_table(struct sk_buff *skb,
|
||||
const struct net_device *out,
|
||||
struct xt_table *table)
|
||||
{
|
||||
#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
|
||||
|
||||
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
|
||||
const struct iphdr *ip;
|
||||
bool hotdrop = false;
|
||||
@ -330,7 +328,8 @@ ipt_do_table(struct sk_buff *skb,
|
||||
unsigned int verdict = NF_DROP;
|
||||
const char *indev, *outdev;
|
||||
const void *table_base;
|
||||
struct ipt_entry *e, *back;
|
||||
struct ipt_entry *e, **jumpstack;
|
||||
unsigned int *stackptr, origptr, cpu;
|
||||
const struct xt_table_info *private;
|
||||
struct xt_match_param mtpar;
|
||||
struct xt_target_param tgpar;
|
||||
@ -356,19 +355,23 @@ ipt_do_table(struct sk_buff *skb,
|
||||
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
|
||||
xt_info_rdlock_bh();
|
||||
private = table->private;
|
||||
table_base = private->entries[smp_processor_id()];
|
||||
cpu = smp_processor_id();
|
||||
table_base = private->entries[cpu];
|
||||
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
|
||||
stackptr = &private->stackptr[cpu];
|
||||
origptr = *stackptr;
|
||||
|
||||
e = get_entry(table_base, private->hook_entry[hook]);
|
||||
|
||||
/* For return from builtin chain */
|
||||
back = get_entry(table_base, private->underflow[hook]);
|
||||
pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
|
||||
table->name, hook, origptr,
|
||||
get_entry(table_base, private->underflow[hook]));
|
||||
|
||||
do {
|
||||
const struct ipt_entry_target *t;
|
||||
const struct xt_entry_match *ematch;
|
||||
|
||||
IP_NF_ASSERT(e);
|
||||
IP_NF_ASSERT(back);
|
||||
if (!ip_packet_match(ip, indev, outdev,
|
||||
&e->ip, mtpar.fragoff)) {
|
||||
no_match:
|
||||
@ -403,17 +406,28 @@ ipt_do_table(struct sk_buff *skb,
|
||||
verdict = (unsigned)(-v) - 1;
|
||||
break;
|
||||
}
|
||||
e = back;
|
||||
back = get_entry(table_base, back->comefrom);
|
||||
if (*stackptr == 0) {
|
||||
e = get_entry(table_base,
|
||||
private->underflow[hook]);
|
||||
pr_devel("Underflow (this is normal) "
|
||||
"to %p\n", e);
|
||||
} else {
|
||||
e = jumpstack[--*stackptr];
|
||||
pr_devel("Pulled %p out from pos %u\n",
|
||||
e, *stackptr);
|
||||
e = ipt_next_entry(e);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (table_base + v != ipt_next_entry(e) &&
|
||||
!(e->ip.flags & IPT_F_GOTO)) {
|
||||
/* Save old back ptr in next entry */
|
||||
struct ipt_entry *next = ipt_next_entry(e);
|
||||
next->comefrom = (void *)back - table_base;
|
||||
/* set back pointer to next entry */
|
||||
back = next;
|
||||
if (*stackptr >= private->stacksize) {
|
||||
verdict = NF_DROP;
|
||||
break;
|
||||
}
|
||||
jumpstack[(*stackptr)++] = e;
|
||||
pr_devel("Pushed %p into pos %u\n",
|
||||
e, *stackptr - 1);
|
||||
}
|
||||
|
||||
e = get_entry(table_base, v);
|
||||
@ -426,18 +440,7 @@ ipt_do_table(struct sk_buff *skb,
|
||||
tgpar.targinfo = t->data;
|
||||
|
||||
|
||||
#ifdef CONFIG_NETFILTER_DEBUG
|
||||
tb_comefrom = 0xeeeeeeec;
|
||||
#endif
|
||||
verdict = t->u.kernel.target->target(skb, &tgpar);
|
||||
#ifdef CONFIG_NETFILTER_DEBUG
|
||||
if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
|
||||
printk("Target %s reentered!\n",
|
||||
t->u.kernel.target->name);
|
||||
verdict = NF_DROP;
|
||||
}
|
||||
tb_comefrom = 0x57acc001;
|
||||
#endif
|
||||
/* Target might have changed stuff. */
|
||||
ip = ip_hdr(skb);
|
||||
if (verdict == IPT_CONTINUE)
|
||||
@ -447,7 +450,9 @@ ipt_do_table(struct sk_buff *skb,
|
||||
break;
|
||||
} while (!hotdrop);
|
||||
xt_info_rdunlock_bh();
|
||||
|
||||
pr_devel("Exiting %s; resetting sp from %u to %u\n",
|
||||
__func__, *stackptr, origptr);
|
||||
*stackptr = origptr;
|
||||
#ifdef DEBUG_ALLOW_ALL
|
||||
return NF_ACCEPT;
|
||||
#else
|
||||
@ -455,8 +460,6 @@ ipt_do_table(struct sk_buff *skb,
|
||||
return NF_DROP;
|
||||
else return verdict;
|
||||
#endif
|
||||
|
||||
#undef tb_comefrom
|
||||
}
|
||||
|
||||
/* Figures out from what hook each rule can be called: returns 0 if
|
||||
@ -838,6 +841,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
++i;
|
||||
if (strcmp(ipt_get_target(iter)->u.user.name,
|
||||
XT_ERROR_TARGET) == 0)
|
||||
++newinfo->stacksize;
|
||||
}
|
||||
|
||||
if (i != repl->num_entries) {
|
||||
@ -2086,8 +2092,7 @@ struct xt_table *ipt_register_table(struct net *net,
|
||||
{
|
||||
int ret;
|
||||
struct xt_table_info *newinfo;
|
||||
struct xt_table_info bootstrap
|
||||
= { 0, 0, 0, { 0 }, { 0 }, { } };
|
||||
struct xt_table_info bootstrap = {0};
|
||||
void *loc_cpu_entry;
|
||||
struct xt_table *new_table;
|
||||
|
||||
|
@ -351,15 +351,14 @@ ip6t_do_table(struct sk_buff *skb,
|
||||
const struct net_device *out,
|
||||
struct xt_table *table)
|
||||
{
|
||||
#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
|
||||
|
||||
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
|
||||
bool hotdrop = false;
|
||||
/* Initializing verdict to NF_DROP keeps gcc happy. */
|
||||
unsigned int verdict = NF_DROP;
|
||||
const char *indev, *outdev;
|
||||
const void *table_base;
|
||||
struct ip6t_entry *e, *back;
|
||||
struct ip6t_entry *e, **jumpstack;
|
||||
unsigned int *stackptr, origptr, cpu;
|
||||
const struct xt_table_info *private;
|
||||
struct xt_match_param mtpar;
|
||||
struct xt_target_param tgpar;
|
||||
@ -383,19 +382,19 @@ ip6t_do_table(struct sk_buff *skb,
|
||||
|
||||
xt_info_rdlock_bh();
|
||||
private = table->private;
|
||||
table_base = private->entries[smp_processor_id()];
|
||||
cpu = smp_processor_id();
|
||||
table_base = private->entries[cpu];
|
||||
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
|
||||
stackptr = &private->stackptr[cpu];
|
||||
origptr = *stackptr;
|
||||
|
||||
e = get_entry(table_base, private->hook_entry[hook]);
|
||||
|
||||
/* For return from builtin chain */
|
||||
back = get_entry(table_base, private->underflow[hook]);
|
||||
|
||||
do {
|
||||
const struct ip6t_entry_target *t;
|
||||
const struct xt_entry_match *ematch;
|
||||
|
||||
IP_NF_ASSERT(e);
|
||||
IP_NF_ASSERT(back);
|
||||
if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
|
||||
&mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
|
||||
no_match:
|
||||
@ -432,17 +431,20 @@ ip6t_do_table(struct sk_buff *skb,
|
||||
verdict = (unsigned)(-v) - 1;
|
||||
break;
|
||||
}
|
||||
e = back;
|
||||
back = get_entry(table_base, back->comefrom);
|
||||
if (*stackptr == 0)
|
||||
e = get_entry(table_base,
|
||||
private->underflow[hook]);
|
||||
else
|
||||
e = ip6t_next_entry(jumpstack[--*stackptr]);
|
||||
continue;
|
||||
}
|
||||
if (table_base + v != ip6t_next_entry(e) &&
|
||||
!(e->ipv6.flags & IP6T_F_GOTO)) {
|
||||
/* Save old back ptr in next entry */
|
||||
struct ip6t_entry *next = ip6t_next_entry(e);
|
||||
next->comefrom = (void *)back - table_base;
|
||||
/* set back pointer to next entry */
|
||||
back = next;
|
||||
if (*stackptr >= private->stacksize) {
|
||||
verdict = NF_DROP;
|
||||
break;
|
||||
}
|
||||
jumpstack[(*stackptr)++] = e;
|
||||
}
|
||||
|
||||
e = get_entry(table_base, v);
|
||||
@ -454,19 +456,7 @@ ip6t_do_table(struct sk_buff *skb,
|
||||
tgpar.target = t->u.kernel.target;
|
||||
tgpar.targinfo = t->data;
|
||||
|
||||
#ifdef CONFIG_NETFILTER_DEBUG
|
||||
tb_comefrom = 0xeeeeeeec;
|
||||
#endif
|
||||
verdict = t->u.kernel.target->target(skb, &tgpar);
|
||||
|
||||
#ifdef CONFIG_NETFILTER_DEBUG
|
||||
if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
|
||||
printk("Target %s reentered!\n",
|
||||
t->u.kernel.target->name);
|
||||
verdict = NF_DROP;
|
||||
}
|
||||
tb_comefrom = 0x57acc001;
|
||||
#endif
|
||||
if (verdict == IP6T_CONTINUE)
|
||||
e = ip6t_next_entry(e);
|
||||
else
|
||||
@ -474,10 +464,8 @@ ip6t_do_table(struct sk_buff *skb,
|
||||
break;
|
||||
} while (!hotdrop);
|
||||
|
||||
#ifdef CONFIG_NETFILTER_DEBUG
|
||||
tb_comefrom = NETFILTER_LINK_POISON;
|
||||
#endif
|
||||
xt_info_rdunlock_bh();
|
||||
*stackptr = origptr;
|
||||
|
||||
#ifdef DEBUG_ALLOW_ALL
|
||||
return NF_ACCEPT;
|
||||
@ -486,8 +474,6 @@ ip6t_do_table(struct sk_buff *skb,
|
||||
return NF_DROP;
|
||||
else return verdict;
|
||||
#endif
|
||||
|
||||
#undef tb_comefrom
|
||||
}
|
||||
|
||||
/* Figures out from what hook each rule can be called: returns 0 if
|
||||
@ -869,6 +855,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
++i;
|
||||
if (strcmp(ip6t_get_target(iter)->u.user.name,
|
||||
XT_ERROR_TARGET) == 0)
|
||||
++newinfo->stacksize;
|
||||
}
|
||||
|
||||
if (i != repl->num_entries) {
|
||||
@ -2120,8 +2109,7 @@ struct xt_table *ip6t_register_table(struct net *net,
|
||||
{
|
||||
int ret;
|
||||
struct xt_table_info *newinfo;
|
||||
struct xt_table_info bootstrap
|
||||
= { 0, 0, 0, { 0 }, { 0 }, { } };
|
||||
struct xt_table_info bootstrap = {0};
|
||||
void *loc_cpu_entry;
|
||||
struct xt_table *new_table;
|
||||
|
||||
|
@ -62,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
|
||||
[NFPROTO_IPV6] = "ip6",
|
||||
};
|
||||
|
||||
/* Allow this many total (re)entries. */
|
||||
static const unsigned int xt_jumpstack_multiplier = 2;
|
||||
|
||||
/* Registration hooks for targets. */
|
||||
int
|
||||
xt_register_target(struct xt_target *target)
|
||||
@ -680,6 +683,26 @@ void xt_free_table_info(struct xt_table_info *info)
|
||||
else
|
||||
vfree(info->entries[cpu]);
|
||||
}
|
||||
|
||||
if (info->jumpstack != NULL) {
|
||||
if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
|
||||
for_each_possible_cpu(cpu)
|
||||
vfree(info->jumpstack[cpu]);
|
||||
} else {
|
||||
for_each_possible_cpu(cpu)
|
||||
kfree(info->jumpstack[cpu]);
|
||||
}
|
||||
}
|
||||
|
||||
if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
|
||||
vfree(info->jumpstack);
|
||||
else
|
||||
kfree(info->jumpstack);
|
||||
if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
|
||||
vfree(info->stackptr);
|
||||
else
|
||||
kfree(info->stackptr);
|
||||
|
||||
kfree(info);
|
||||
}
|
||||
EXPORT_SYMBOL(xt_free_table_info);
|
||||
@ -724,6 +747,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
|
||||
DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
|
||||
|
||||
static int xt_jumpstack_alloc(struct xt_table_info *i)
|
||||
{
|
||||
unsigned int size;
|
||||
int cpu;
|
||||
|
||||
size = sizeof(unsigned int) * nr_cpu_ids;
|
||||
if (size > PAGE_SIZE)
|
||||
i->stackptr = vmalloc(size);
|
||||
else
|
||||
i->stackptr = kmalloc(size, GFP_KERNEL);
|
||||
if (i->stackptr == NULL)
|
||||
return -ENOMEM;
|
||||
memset(i->stackptr, 0, size);
|
||||
|
||||
size = sizeof(void **) * nr_cpu_ids;
|
||||
if (size > PAGE_SIZE)
|
||||
i->jumpstack = vmalloc(size);
|
||||
else
|
||||
i->jumpstack = kmalloc(size, GFP_KERNEL);
|
||||
if (i->jumpstack == NULL)
|
||||
return -ENOMEM;
|
||||
memset(i->jumpstack, 0, size);
|
||||
|
||||
i->stacksize *= xt_jumpstack_multiplier;
|
||||
size = sizeof(void *) * i->stacksize;
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (size > PAGE_SIZE)
|
||||
i->jumpstack[cpu] = vmalloc_node(size,
|
||||
cpu_to_node(cpu));
|
||||
else
|
||||
i->jumpstack[cpu] = kmalloc_node(size,
|
||||
GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (i->jumpstack[cpu] == NULL)
|
||||
/*
|
||||
* Freeing will be done later on by the callers. The
|
||||
* chain is: xt_replace_table -> __do_replace ->
|
||||
* do_replace -> xt_free_table_info.
|
||||
*/
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct xt_table_info *
|
||||
xt_replace_table(struct xt_table *table,
|
||||
@ -732,6 +798,7 @@ xt_replace_table(struct xt_table *table,
|
||||
int *error)
|
||||
{
|
||||
struct xt_table_info *private;
|
||||
int ret;
|
||||
|
||||
/* Do the substitution. */
|
||||
local_bh_disable();
|
||||
@ -746,6 +813,12 @@ xt_replace_table(struct xt_table *table,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = xt_jumpstack_alloc(newinfo);
|
||||
if (ret < 0) {
|
||||
*error = ret;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
table->private = newinfo;
|
||||
newinfo->initial_entries = private->initial_entries;
|
||||
|
||||
@ -770,6 +843,10 @@ struct xt_table *xt_register_table(struct net *net,
|
||||
struct xt_table_info *private;
|
||||
struct xt_table *t, *table;
|
||||
|
||||
ret = xt_jumpstack_alloc(newinfo);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
/* Don't add one object to multiple lists. */
|
||||
table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
|
||||
if (!table) {
|
||||
|
Loading…
Reference in New Issue
Block a user