netfilter: push reasm skb through instead of original frag skbs

Pushing original fragments through causes several problems. For example
for matching, frags may not be matched correctly. Take following
example:

<example>
On HOSTA do:
ip6tables -I INPUT -p icmpv6 -j DROP
ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT

and on HOSTB you do:
ping6 HOSTA -s2000    (MTU is 1500)

Incoming echo requests will be filtered out on HOSTA. This issue does
not occur with smaller packets than MTU (where fragmentation does not happen)
</example>

As was discussed previously, the only correct solution seems to be to use
reassembled skb instead of separete frags. Doing this has positive side
effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams
dances in ipvs and conntrack can be removed.

Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c
entirely and use code in net/ipv6/reassembly.c instead.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Jiri Pirko 2013-11-06 17:52:20 +01:00 committed by David S. Miller
parent 9037c3579a
commit 6aafeef03b
9 changed files with 13 additions and 203 deletions

View File

@ -337,11 +337,6 @@ typedef unsigned int sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
#endif
#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
#endif
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
@ -374,7 +369,6 @@ typedef unsigned char *sk_buff_data_t;
* @protocol: Packet protocol from driver
* @destructor: Destruct function
* @nfct: Associated connection, if any
* @nfct_reasm: netfilter conntrack re-assembly pointer
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @skb_iif: ifindex of device we arrived on
* @tc_index: Traffic control index
@ -463,9 +457,6 @@ struct sk_buff {
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack *nfct;
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
struct sk_buff *nfct_reasm;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
@ -2595,18 +2586,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
atomic_inc(&nfct->use);
}
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
{
if (skb)
atomic_inc(&skb->users);
}
static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
{
if (skb)
kfree_skb(skb);
}
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
@ -2625,10 +2604,6 @@ static inline void nf_reset(struct sk_buff *skb)
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
nf_conntrack_put_reasm(skb->nfct_reasm);
skb->nfct_reasm = NULL;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
skb->nf_bridge = NULL;
@ -2650,10 +2625,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
nf_conntrack_get(src->nfct);
dst->nfctinfo = src->nfctinfo;
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
dst->nfct_reasm = src->nfct_reasm;
nf_conntrack_get_reasm(src->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
dst->nf_bridge = src->nf_bridge;
nf_bridge_get(src->nf_bridge);
@ -2665,9 +2636,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(dst->nfct);
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
nf_conntrack_put_reasm(dst->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(dst->nf_bridge);
#endif

View File

@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size;
struct ip_vs_iphdr {
__u32 len; /* IPv4 simply where L4 starts
IPv6 where L4 Transport Header starts */
__u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
__s16 protocol;
__s32 flags;
@ -117,34 +116,12 @@ struct ip_vs_iphdr {
union nf_inet_addr daddr;
};
/* Dependency to module: nf_defrag_ipv6 */
#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
{
return skb->nfct_reasm;
}
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
int len, void *buffer,
const struct ip_vs_iphdr *ipvsh)
{
if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
return skb_header_pointer(skb_nfct_reasm(skb),
ipvsh->thoff_reasm, len, buffer);
return skb_header_pointer(skb, offset, len, buffer);
}
#else
static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
{
return NULL;
}
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
int len, void *buffer,
const struct ip_vs_iphdr *ipvsh)
{
return skb_header_pointer(skb, offset, len, buffer);
}
#endif
static inline void
ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
(struct ipv6hdr *)skb_network_header(skb);
iphdr->saddr.in6 = iph->saddr;
iphdr->daddr.in6 = iph->daddr;
/* ipv6_find_hdr() updates len, flags, thoff_reasm */
iphdr->thoff_reasm = 0;
/* ipv6_find_hdr() updates len, flags */
iphdr->len = 0;
iphdr->flags = 0;
iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1,
&iphdr->fragoffs,
&iphdr->flags);
/* get proto from re-assembled packet and it's offset */
if (skb_nfct_reasm(skb))
iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
&iphdr->thoff_reasm,
-1, NULL, NULL);
} else
#endif
{

View File

@ -6,9 +6,7 @@ void nf_defrag_ipv6_enable(void);
int nf_ct_frag6_init(void);
void nf_ct_frag6_cleanup(void);
struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
struct net_device *in, struct net_device *out,
int (*okfn)(struct sk_buff *));
void nf_ct_frag6_consume_orig(struct sk_buff *skb);
struct inet_frags_ctl;

View File

@ -592,9 +592,6 @@ static void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb->nfct);
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
nf_conntrack_put_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
#endif

View File

@ -169,64 +169,13 @@ out:
return nf_conntrack_confirm(skb);
}
static unsigned int __ipv6_conntrack_in(struct net *net,
unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *reasm = skb->nfct_reasm;
const struct nf_conn_help *help;
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
/* This packet is fragmented and has reassembled packet. */
if (reasm) {
/* Reassembled packet isn't parsed yet ? */
if (!reasm->nfct) {
unsigned int ret;
ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
if (ret != NF_ACCEPT)
return ret;
}
/* Conntrack helpers need the entire reassembled packet in the
* POST_ROUTING hook. In case of unconfirmed connections NAT
* might reassign a helper, so the entire packet is also
* required.
*/
ct = nf_ct_get(reasm, &ctinfo);
if (ct != NULL && !nf_ct_is_untracked(ct)) {
help = nfct_help(ct);
if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
nf_conntrack_get_reasm(reasm);
NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
(struct net_device *)in,
(struct net_device *)out,
okfn, NF_IP6_PRI_CONNTRACK + 1);
return NF_DROP_ERR(-ECANCELED);
}
}
nf_conntrack_get(reasm->nfct);
skb->nfct = reasm->nfct;
skb->nfctinfo = reasm->nfctinfo;
return NF_ACCEPT;
}
return nf_conntrack_in(net, PF_INET6, hooknum, skb);
}
static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
okfn);
return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
}
static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
@ -240,8 +189,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
okfn);
return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {

View File

@ -633,31 +633,16 @@ ret_orig:
return skb;
}
void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
struct net_device *in, struct net_device *out,
int (*okfn)(struct sk_buff *))
void nf_ct_frag6_consume_orig(struct sk_buff *skb)
{
struct sk_buff *s, *s2;
unsigned int ret = 0;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
nf_conntrack_put_reasm(s->nfct_reasm);
nf_conntrack_get_reasm(skb);
s->nfct_reasm = skb;
s2 = s->next;
s->next = NULL;
if (ret != -ECANCELED)
ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
in, out, okfn,
NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
else
kfree_skb(s);
consume_skb(s);
s = s2;
}
nf_conntrack_put_reasm(skb);
}
static int nf_ct_net_init(struct net *net)

View File

@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
if (reasm == skb)
return NF_ACCEPT;
nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
(struct net_device *)out, okfn);
nf_ct_frag6_consume_orig(reasm);
NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
(struct net_device *) in, (struct net_device *) out,
okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
return NF_STOLEN;
}

View File

@ -1139,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iph_skb(af, skb, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (!iph.fragoffs && skb_nfct_reasm(skb)) {
struct sk_buff *reasm = skb_nfct_reasm(skb);
/* Save fw mark for coming frags */
reasm->ipvs_property = 1;
reasm->mark = skb->mark;
}
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
int verdict = ip_vs_out_icmp_v6(skb, &related,
@ -1614,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (!iph.fragoffs && skb_nfct_reasm(skb)) {
struct sk_buff *reasm = skb_nfct_reasm(skb);
/* Save fw mark for coming frags. */
reasm->ipvs_property = 1;
reasm->mark = skb->mark;
}
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
@ -1671,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, af, pp, skb, 0,
"ip_vs_in: packet continues traversal as normal");
if (iph.fragoffs && !skb_nfct_reasm(skb)) {
if (iph.fragoffs) {
/* Fragment that couldn't be mapped to a conn entry
* and don't have any pointer to a reasm skb
* is missing module nf_defrag_ipv6
*/
IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
@ -1755,38 +1742,6 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
/*
* AF_INET6 fragment handling
* Copy info from first fragment, to the rest of them.
*/
static unsigned int
ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *reasm = skb_nfct_reasm(skb);
struct net *net;
/* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
* ipvs_property is set when checking first fragment
* in ip_vs_in() and ip_vs_out().
*/
if (reasm)
IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
if (!reasm || !reasm->ipvs_property)
return NF_ACCEPT;
net = skb_net(skb);
if (!net_ipvs(net)->enable)
return NF_ACCEPT;
/* Copy stored fw mark, saved in ip_vs_{in,out} */
skb->mark = reasm->mark;
return NF_ACCEPT;
}
/*
* AF_INET6 handler in NF_INET_LOCAL_IN chain
* Schedule and forward packets from remote clients
@ -1924,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.priority = 100,
},
#ifdef CONFIG_IP_VS_IPV6
/* After mangle & nat fetch 2:nd fragment and following */
{
.hook = ip_vs_preroute_frag6,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST + 1,
},
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,

View File

@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff,
static int
ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
{
struct sk_buff *reasm = skb_nfct_reasm(skb);
struct ip_vs_iphdr iph;
unsigned int dataoff, datalen, matchoff, matchlen;
const char *dptr;
@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
/* todo: IPv6 fragments:
* I think this only should be done for the first fragment. /HS
*/
if (reasm) {
skb = reasm;
dataoff = iph.thoff_reasm + sizeof(struct udphdr);
} else
dataoff = iph.len + sizeof(struct udphdr);
dataoff = iph.len + sizeof(struct udphdr);
if (dataoff >= skb->len)
return -EINVAL;
/* todo: Check if this will mess-up the reasm skb !!! /HS */
retc = skb_linearize(skb);
if (retc < 0)
return retc;