diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index e2cf786be22f..708fe72ab913 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -319,6 +319,7 @@ extern void nf_ct_attach(struct sk_buff *, const struct sk_buff *); extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; struct nf_conn; +enum ip_conntrack_info; struct nlattr; struct nfq_ct_hook { @@ -327,14 +328,10 @@ struct nfq_ct_hook { int (*parse)(const struct nlattr *attr, struct nf_conn *ct); int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report); + void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, s32 off); }; extern struct nfq_ct_hook __rcu *nfq_ct_hook; - -struct nfq_ct_nat_hook { - void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, - u32 ctinfo, s32 off); -}; -extern struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook; #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #endif diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 977bc8a46444..ff95434e50ca 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -10,6 +10,7 @@ enum nf_ct_ext_id { #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) NF_CT_EXT_NAT, #endif + NF_CT_EXT_SEQADJ, NF_CT_EXT_ACCT, #ifdef CONFIG_NF_CONNTRACK_EVENTS NF_CT_EXT_ECACHE, @@ -25,18 +26,23 @@ enum nf_ct_ext_id { #endif #ifdef CONFIG_NF_CONNTRACK_LABELS NF_CT_EXT_LABELS, +#endif +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + NF_CT_EXT_SYNPROXY, #endif NF_CT_EXT_NUM, }; #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat +#define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels +#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h new file mode 100644 index 000000000000..f6177a5fe0ca --- /dev/null +++ b/include/net/netfilter/nf_conntrack_seqadj.h @@ -0,0 +1,51 @@ +#ifndef _NF_CONNTRACK_SEQADJ_H +#define _NF_CONNTRACK_SEQADJ_H + +#include + +/** + * struct nf_ct_seqadj - sequence number adjustment information + * + * @correction_pos: position of the last TCP sequence number modification + * @offset_before: sequence number offset before last modification + * @offset_after: sequence number offset after last modification + */ +struct nf_ct_seqadj { + u32 correction_pos; + s32 offset_before; + s32 offset_after; +}; + +struct nf_conn_seqadj { + struct nf_ct_seqadj seq[IP_CT_DIR_MAX]; +}; + +static inline struct nf_conn_seqadj *nfct_seqadj(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_SEQADJ); +} + +static inline struct nf_conn_seqadj *nfct_seqadj_ext_add(struct nf_conn *ct) +{ + return nf_ct_ext_add(ct, NF_CT_EXT_SEQADJ, GFP_ATOMIC); +} + +extern int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + s32 off); +extern int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s32 off); +extern void nf_ct_tcp_seqadj_set(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + s32 off); + +extern int nf_ct_seq_adjust(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff); +extern s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir, + u32 seq); + +extern int nf_conntrack_seqadj_init(void); +extern void nf_conntrack_seqadj_fini(void); + +#endif /* _NF_CONNTRACK_SEQADJ_H */ diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h new file mode 100644 index 000000000000..806f54a290d6 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -0,0 +1,77 @@ +#ifndef _NF_CONNTRACK_SYNPROXY_H +#define _NF_CONNTRACK_SYNPROXY_H + +#include + +struct nf_conn_synproxy { + u32 isn; + u32 its; + u32 tsoff; +}; + +static inline struct nf_conn_synproxy *nfct_synproxy(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + return nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY); +#else + return NULL; +#endif +} + +static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + return nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY, GFP_ATOMIC); +#else + return NULL; +#endif +} + +struct synproxy_stats { + unsigned int syn_received; + unsigned int cookie_invalid; + unsigned int cookie_valid; + unsigned int cookie_retrans; + unsigned int conn_reopened; +}; + +struct synproxy_net { + struct nf_conn *tmpl; + struct synproxy_stats __percpu *stats; +}; + +extern int synproxy_net_id; +static inline struct synproxy_net *synproxy_pernet(struct net *net) +{ + return net_generic(net, synproxy_net_id); +} + +struct synproxy_options { + u8 options; + u8 wscale; + u16 mss; + u32 tsval; + u32 tsecr; +}; + +struct tcphdr; +struct xt_synproxy_info; +extern void synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, + const struct tcphdr *th, + struct synproxy_options *opts); +extern unsigned int synproxy_options_size(const struct synproxy_options *opts); +extern void synproxy_build_options(struct tcphdr *th, + const struct synproxy_options *opts); + +extern void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, + struct synproxy_options *opts); +extern void synproxy_check_timestamp_cookie(struct synproxy_options *opts); + +extern unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, + unsigned int protoff, + struct tcphdr *th, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct nf_conn_synproxy *synproxy); + +#endif /* _NF_CONNTRACK_SYNPROXY_H */ diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index e2441413675c..59a192420053 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -13,15 +13,6 @@ enum nf_nat_manip_type { #define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \ (hooknum) != NF_INET_LOCAL_IN) -/* NAT sequence number modifications */ -struct nf_nat_seq { - /* position of the last TCP sequence number modification (if any) */ - u_int32_t correction_pos; - - /* sequence number offset before and after last modification */ - int32_t offset_before, offset_after; -}; - #include #include #include @@ -39,7 +30,6 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { struct hlist_node bysource; - struct nf_nat_seq seq[IP_CT_DIR_MAX]; struct nf_conn *ct; union nf_conntrack_nat_help help; #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 194c34794923..404324d1d0c4 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -39,28 +39,9 @@ extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len); -extern void nf_nat_set_seq_adjust(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - __be32 seq, s32 off); -extern int nf_nat_seq_adjust(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff); -extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff); - /* Setup NAT on this expected conntrack so it follows master, but goes * to port ct->master->saved_proto. */ extern void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *this); -extern s32 nf_nat_get_offset(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq); - -extern void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, - u32 dir, s32 off); - #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 09cb5c11ceea..dd5e16f66f84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -476,9 +476,13 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; +extern int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, + u32 cookie); extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt); #ifdef CONFIG_SYN_COOKIES +extern u32 __cookie_v4_init_sequence(const struct iphdr *iph, + const struct tcphdr *th, u16 *mssp); extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mss); #else @@ -495,8 +499,12 @@ extern bool cookie_check_timestamp(struct tcp_options_received *opt, struct net *net, bool *ecn_ok); /* From net/ipv6/syncookies.c */ +extern int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, + u32 cookie); extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES +extern u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, + const struct tcphdr *th, u16 *mssp); extern __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mss); #else diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index d69483fb3825..8dd803818ebe 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -99,7 +99,8 @@ enum ip_conntrack_events { IPCT_PROTOINFO, /* protocol information has changed */ IPCT_HELPER, /* new helper has been set */ IPCT_MARK, /* new mark has been set */ - IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SEQADJ, /* sequence adjustment has changed */ + IPCT_NATSEQADJ = IPCT_SEQADJ, IPCT_SECMARK, /* new security mark has been set */ IPCT_LABEL, /* new connlabel has been set */ }; diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 08fabc6c93f3..acad6c52a652 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -42,8 +42,10 @@ enum ctattr_type { CTA_ID, CTA_NAT_DST, CTA_TUPLE_MASTER, - CTA_NAT_SEQ_ADJ_ORIG, - CTA_NAT_SEQ_ADJ_REPLY, + CTA_SEQ_ADJ_ORIG, + CTA_NAT_SEQ_ADJ_ORIG = CTA_SEQ_ADJ_ORIG, + CTA_SEQ_ADJ_REPLY, + CTA_NAT_SEQ_ADJ_REPLY = CTA_SEQ_ADJ_REPLY, CTA_SECMARK, /* obsolete */ CTA_ZONE, CTA_SECCTX, @@ -165,6 +167,15 @@ enum ctattr_protonat { }; #define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) +enum ctattr_seqadj { + CTA_SEQADJ_UNSPEC, + CTA_SEQADJ_CORRECTION_POS, + CTA_SEQADJ_OFFSET_BEFORE, + CTA_SEQADJ_OFFSET_AFTER, + __CTA_SEQADJ_MAX +}; +#define CTA_SEQADJ_MAX (__CTA_SEQADJ_MAX - 1) + enum ctattr_natseq { CTA_NAT_SEQ_UNSPEC, CTA_NAT_SEQ_CORRECTION_POS, diff --git a/include/uapi/linux/netfilter/xt_SYNPROXY.h b/include/uapi/linux/netfilter/xt_SYNPROXY.h new file mode 100644 index 000000000000..2d59fbaa93c6 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_SYNPROXY.h @@ -0,0 +1,16 @@ +#ifndef _XT_SYNPROXY_H +#define _XT_SYNPROXY_H + +#define XT_SYNPROXY_OPT_MSS 0x01 +#define XT_SYNPROXY_OPT_WSCALE 0x02 +#define XT_SYNPROXY_OPT_SACK_PERM 0x04 +#define XT_SYNPROXY_OPT_TIMESTAMP 0x08 +#define XT_SYNPROXY_OPT_ECN 0x10 + +struct xt_synproxy_info { + __u8 options; + __u8 wscale; + __u16 mss; +}; + +#endif /* _XT_SYNPROXY_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 4e9028017428..1657e39b291f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -110,6 +110,19 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_SYNPROXY + tristate "SYNPROXY target support" + depends on NF_CONNTRACK && NETFILTER_ADVANCED + select NETFILTER_SYNPROXY + select SYN_COOKIES + help + The SYNPROXY target allows you to intercept TCP connections and + establish them using syncookies before they are passed on to the + server. This allows to avoid conntrack and server resource usage + during SYN-flood attacks. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_ULOG tristate "ULOG target support (obsolete)" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 007b128eecc9..3622b248b6dd 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o +obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 04b18c1ac345..b969131ad1c1 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -119,7 +119,26 @@ static void send_reset(struct sk_buff *oldskb, int hook) nf_ct_attach(nskb, oldskb); - ip_local_out(nskb); +#ifdef CONFIG_BRIDGE_NETFILTER + /* If we use ip_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + goto free_nskb; + dev_queue_xmit(nskb); + } else +#endif + ip_local_out(nskb); + return; free_nskb: diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c new file mode 100644 index 000000000000..94371db6aecc --- /dev/null +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -0,0 +1,472 @@ +/* + * Copyright (c) 2013 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static struct iphdr * +synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr) +{ + struct iphdr *iph; + + skb_reset_network_header(skb); + iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); + iph->version = 4; + iph->ihl = sizeof(*iph) / 4; + iph->tos = 0; + iph->id = 0; + iph->frag_off = htons(IP_DF); + iph->ttl = sysctl_ip_default_ttl; + iph->protocol = IPPROTO_TCP; + iph->check = 0; + iph->saddr = saddr; + iph->daddr = daddr; + + return iph; +} + +static void +synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, + struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, + struct iphdr *niph, struct tcphdr *nth, + unsigned int tcp_hdr_size) +{ + nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)nth - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); + + skb_dst_set_noref(nskb, skb_dst(skb)); + nskb->protocol = htons(ETH_P_IP); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) + goto free_nskb; + + if (nfct) { + nskb->nfct = nfct; + nskb->nfctinfo = ctinfo; + nf_conntrack_get(nfct); + } + + ip_local_out(nskb); + return; + +free_nskb: + kfree_skb(nskb); +} + +static void +synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + u16 mss = opts->mss; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->dest; + nth->dest = th->source; + nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss)); + nth->ack_seq = htonl(ntohl(th->seq) + 1); + tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (opts->options & XT_SYNPROXY_OPT_ECN) + tcp_flag_word(nth) |= TCP_FLAG_ECE; + nth->doff = tcp_hdr_size / 4; + nth->window = 0; + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_server_syn(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts, u32 recv_seq) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->source; + nth->dest = th->dest; + nth->seq = htonl(recv_seq - 1); + /* ack_seq is used to relay our ISN to the synproxy hook to initialize + * sequence number translation once a connection tracking entry exists. + */ + nth->ack_seq = htonl(ntohl(th->ack_seq) - 1); + tcp_flag_word(nth) = TCP_FLAG_SYN; + if (opts->options & XT_SYNPROXY_OPT_ECN) + tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR; + nth->doff = tcp_hdr_size / 4; + nth->window = th->window; + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_server_ack(const struct synproxy_net *snet, + const struct ip_ct_tcp *state, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->dest; + nth->dest = th->source; + nth->seq = htonl(ntohl(th->ack_seq)); + nth->ack_seq = htonl(ntohl(th->seq) + 1); + tcp_flag_word(nth) = TCP_FLAG_ACK; + nth->doff = tcp_hdr_size / 4; + nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin); + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_client_ack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->source; + nth->dest = th->dest; + nth->seq = htonl(ntohl(th->seq) + 1); + nth->ack_seq = th->ack_seq; + tcp_flag_word(nth) = TCP_FLAG_ACK; + nth->doff = tcp_hdr_size / 4; + nth->window = ntohs(htons(th->window) >> opts->wscale); + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); +} + +static bool +synproxy_recv_client_ack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + struct synproxy_options *opts, u32 recv_seq) +{ + int mss; + + mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); + if (mss == 0) { + this_cpu_inc(snet->stats->cookie_invalid); + return false; + } + + this_cpu_inc(snet->stats->cookie_valid); + opts->mss = mss; + + if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_check_timestamp_cookie(opts); + + synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + return true; +} + +static unsigned int +synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_synproxy_info *info = par->targinfo; + struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); + struct synproxy_options opts = {}; + struct tcphdr *th, _th; + + if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + return NF_DROP; + + th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + synproxy_parse_options(skb, par->thoff, th, &opts); + + if (th->syn && !th->ack) { + /* Initial SYN from client */ + this_cpu_inc(snet->stats->syn_received); + + if (th->ece && th->cwr) + opts.options |= XT_SYNPROXY_OPT_ECN; + + opts.options &= info->options; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_init_timestamp_cookie(info, &opts); + else + opts.options &= ~(XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM | + XT_SYNPROXY_OPT_ECN); + + synproxy_send_client_synack(skb, th, &opts); + } else if (th->ack && !(th->fin || th->rst)) + /* ACK from client */ + synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + + return NF_DROP; +} + +static unsigned int ipv4_synproxy_hook(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out)); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + struct nf_conn_synproxy *synproxy; + struct synproxy_options opts = {}; + const struct ip_ct_tcp *state; + struct tcphdr *th, _th; + unsigned int thoff; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return NF_ACCEPT; + + synproxy = nfct_synproxy(ct); + if (synproxy == NULL) + return NF_ACCEPT; + + if (nf_is_loopback_packet(skb)) + return NF_ACCEPT; + + thoff = ip_hdrlen(skb); + th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + state = &ct->proto.tcp; + switch (state->state) { + case TCP_CONNTRACK_CLOSE: + if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - + ntohl(th->seq) + 1); + break; + } + + if (!th->syn || th->ack || + CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + break; + + /* Reopened connection - reset the sequence number and timestamp + * adjustments, they will get initialized once the connection is + * reestablished. + */ + nf_ct_seqadj_init(ct, ctinfo, 0); + synproxy->tsoff = 0; + this_cpu_inc(snet->stats->conn_reopened); + + /* fall through */ + case TCP_CONNTRACK_SYN_SENT: + synproxy_parse_options(skb, thoff, th, &opts); + + if (!th->syn && th->ack && + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1, + * therefore we need to add 1 to make the SYN sequence + * number match the one of first SYN. + */ + if (synproxy_recv_client_ack(snet, skb, th, &opts, + ntohl(th->seq) + 1)) + this_cpu_inc(snet->stats->cookie_retrans); + + return NF_DROP; + } + + synproxy->isn = ntohl(th->ack_seq); + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy->its = opts.tsecr; + break; + case TCP_CONNTRACK_SYN_RECV: + if (!th->syn || !th->ack) + break; + + synproxy_parse_options(skb, thoff, th, &opts); + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy->tsoff = opts.tsval - synproxy->its; + + opts.options &= ~(XT_SYNPROXY_OPT_MSS | + XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM); + + swap(opts.tsval, opts.tsecr); + synproxy_send_server_ack(snet, state, skb, th, &opts); + + nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); + + swap(opts.tsval, opts.tsecr); + synproxy_send_client_ack(snet, skb, th, &opts); + + consume_skb(skb); + return NF_STOLEN; + default: + break; + } + + synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy); + return NF_ACCEPT; +} + +static int synproxy_tg4_check(const struct xt_tgchk_param *par) +{ + const struct ipt_entry *e = par->entryinfo; + + if (e->ip.proto != IPPROTO_TCP || + e->ip.invflags & XT_INV_PROTO) + return -EINVAL; + + return nf_ct_l3proto_try_module_get(par->family); +} + +static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_target synproxy_tg4_reg __read_mostly = { + .name = "SYNPROXY", + .family = NFPROTO_IPV4, + .target = synproxy_tg4, + .targetsize = sizeof(struct xt_synproxy_info), + .checkentry = synproxy_tg4_check, + .destroy = synproxy_tg4_destroy, + .me = THIS_MODULE, +}; + +static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { + { + .hook = ipv4_synproxy_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, + { + .hook = ipv4_synproxy_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, +}; + +static int __init synproxy_tg4_init(void) +{ + int err; + + err = nf_register_hooks(ipv4_synproxy_ops, + ARRAY_SIZE(ipv4_synproxy_ops)); + if (err < 0) + goto err1; + + err = xt_register_target(&synproxy_tg4_reg); + if (err < 0) + goto err2; + + return 0; + +err2: + nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); +err1: + return err; +} + +static void __exit synproxy_tg4_exit(void) +{ + xt_unregister_target(&synproxy_tg4_reg); + nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); +} + +module_init(synproxy_tg4_init); +module_exit(synproxy_tg4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 0a2e0e3e95ba..86f5b34a4ed1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -136,11 +137,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* adjust seqs for loopback traffic only in outgoing direction */ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb)) { - typeof(nf_nat_seq_adjust_hook) seq_adjust; - - seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); - if (!seq_adjust || - !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { + if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b05c96e7af8b..14a15c49129d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -160,26 +160,33 @@ static __u16 const msstab[] = { * Generate a syncookie. mssp points to the mss, which is returned * rounded down to the value encoded in the cookie. */ -__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, + u16 *mssp) { - const struct iphdr *iph = ip_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); int mssind; const __u16 mss = *mssp; - tcp_synq_overflow(sk); - for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) if (mss >= msstab[mssind]) break; *mssp = msstab[mssind]; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); - return secure_tcp_syn_cookie(iph->saddr, iph->daddr, th->source, th->dest, ntohl(th->seq), jiffies / (HZ * 60), mssind); } +EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); + +__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + + tcp_synq_overflow(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); + + return __cookie_v4_init_sequence(iph, th, mssp); +} /* * This (misnamed) value is the age of syncookie which is permitted. @@ -192,10 +199,9 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) * Check if a ack sequence number is a valid syncookie. * Return the decoded mss if it is, or 0 if not. */ -static inline int cookie_check(struct sk_buff *skb, __u32 cookie) +int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, + u32 cookie) { - const struct iphdr *iph = ip_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); __u32 seq = ntohl(th->seq) - 1; __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, th->source, th->dest, seq, @@ -204,6 +210,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } +EXPORT_SYMBOL_GPL(__cookie_v4_check); static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -284,7 +291,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, goto out; if (tcp_synq_no_recent_overflow(sk) || - (mss = cookie_check(skb, cookie)) == 0) { + (mss = __cookie_v4_check(ip_hdr(skb), th, cookie)) == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4433ab40e7de..a7f842b29b67 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -153,6 +153,19 @@ config IP6_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_SYNPROXY + tristate "SYNPROXY target support" + depends on NF_CONNTRACK && NETFILTER_ADVANCED + select NETFILTER_SYNPROXY + select SYN_COOKIES + help + The SYNPROXY target allows you to intercept TCP connections and + establish them using syncookies before they are passed on to the + server. This allows to avoid conntrack and server resource usage + during SYN-flood attacks. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_MANGLE tristate "Packet mangling" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2d11fcc2cf3c..2b53738f798c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o # l3 independent conntrack -obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o +obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o @@ -37,3 +37,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o +obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 70f9abc0efe9..56eef30ee5f6 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -169,7 +169,25 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) nf_ct_attach(nskb, oldskb); - ip6_local_out(nskb); +#ifdef CONFIG_BRIDGE_NETFILTER + /* If we use ip6_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + nskb->protocol = htons(ETH_P_IPV6); + ip6h->payload_len = htons(sizeof(struct tcphdr)); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + return; + dev_queue_xmit(nskb); + } else +#endif + ip6_local_out(nskb); } static inline void diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c new file mode 100644 index 000000000000..4270a9b145e5 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2013 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static struct ipv6hdr * +synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr, + const struct in6_addr *daddr) +{ + struct ipv6hdr *iph; + + skb_reset_network_header(skb); + iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph)); + ip6_flow_hdr(iph, 0, 0); + iph->hop_limit = 64; //XXX + iph->nexthdr = IPPROTO_TCP; + iph->saddr = *saddr; + iph->daddr = *daddr; + + return iph; +} + +static void +synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, + struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, + struct ipv6hdr *niph, struct tcphdr *nth, + unsigned int tcp_hdr_size) +{ + struct net *net = nf_ct_net((struct nf_conn *)nfct); + struct dst_entry *dst; + struct flowi6 fl6; + + nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)nth - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = niph->saddr; + fl6.daddr = niph->daddr; + fl6.fl6_sport = nth->source; + fl6.fl6_dport = nth->dest; + security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + goto free_nskb; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + goto free_nskb; + + skb_dst_set(nskb, dst); + + if (nfct) { + nskb->nfct = nfct; + nskb->nfctinfo = ctinfo; + nf_conntrack_get(nfct); + } + + ip6_local_out(nskb); + return; + +free_nskb: + kfree_skb(nskb); +} + +static void +synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct ipv6hdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + u16 mss = opts->mss; + + iph = ipv6_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->dest; + nth->dest = th->source; + nth->seq = htonl(__cookie_v6_init_sequence(iph, th, &mss)); + nth->ack_seq = htonl(ntohl(th->seq) + 1); + tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (opts->options & XT_SYNPROXY_OPT_ECN) + tcp_flag_word(nth) |= TCP_FLAG_ECE; + nth->doff = tcp_hdr_size / 4; + nth->window = 0; + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_server_syn(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts, u32 recv_seq) +{ + struct sk_buff *nskb; + struct ipv6hdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ipv6_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->source; + nth->dest = th->dest; + nth->seq = htonl(recv_seq - 1); + /* ack_seq is used to relay our ISN to the synproxy hook to initialize + * sequence number translation once a connection tracking entry exists. + */ + nth->ack_seq = htonl(ntohl(th->ack_seq) - 1); + tcp_flag_word(nth) = TCP_FLAG_SYN; + if (opts->options & XT_SYNPROXY_OPT_ECN) + tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR; + nth->doff = tcp_hdr_size / 4; + nth->window = th->window; + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_server_ack(const struct synproxy_net *snet, + const struct ip_ct_tcp *state, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct ipv6hdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ipv6_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->dest; + nth->dest = th->source; + nth->seq = htonl(ntohl(th->ack_seq)); + nth->ack_seq = htonl(ntohl(th->seq) + 1); + tcp_flag_word(nth) = TCP_FLAG_ACK; + nth->doff = tcp_hdr_size / 4; + nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin); + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_client_ack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct ipv6hdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ipv6_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->source; + nth->dest = th->dest; + nth->seq = htonl(ntohl(th->seq) + 1); + nth->ack_seq = th->ack_seq; + tcp_flag_word(nth) = TCP_FLAG_ACK; + nth->doff = tcp_hdr_size / 4; + nth->window = ntohs(htons(th->window) >> opts->wscale); + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); +} + +static bool +synproxy_recv_client_ack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + struct synproxy_options *opts, u32 recv_seq) +{ + int mss; + + mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1); + if (mss == 0) { + this_cpu_inc(snet->stats->cookie_invalid); + return false; + } + + this_cpu_inc(snet->stats->cookie_valid); + opts->mss = mss; + + if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_check_timestamp_cookie(opts); + + synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + return true; +} + +static unsigned int +synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_synproxy_info *info = par->targinfo; + struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); + struct synproxy_options opts = {}; + struct tcphdr *th, _th; + + if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + return NF_DROP; + + th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + synproxy_parse_options(skb, par->thoff, th, &opts); + + if (th->syn) { + /* Initial SYN from client */ + this_cpu_inc(snet->stats->syn_received); + + if (th->ece && th->cwr) + opts.options |= XT_SYNPROXY_OPT_ECN; + + opts.options &= info->options; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_init_timestamp_cookie(info, &opts); + else + opts.options &= ~(XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM | + XT_SYNPROXY_OPT_ECN); + + synproxy_send_client_synack(skb, th, &opts); + } else if (th->ack && !(th->fin || th->rst)) + /* ACK from client */ + synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + + return NF_DROP; +} + +static unsigned int ipv6_synproxy_hook(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out)); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + struct nf_conn_synproxy *synproxy; + struct synproxy_options opts = {}; + const struct ip_ct_tcp *state; + struct tcphdr *th, _th; + __be16 frag_off; + u8 nexthdr; + int thoff; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return NF_ACCEPT; + + synproxy = nfct_synproxy(ct); + if (synproxy == NULL) + return NF_ACCEPT; + + if (nf_is_loopback_packet(skb)) + return NF_ACCEPT; + + nexthdr = ipv6_hdr(skb)->nexthdr; + thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + &frag_off); + if (thoff < 0) + return NF_ACCEPT; + + th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + state = &ct->proto.tcp; + switch (state->state) { + case TCP_CONNTRACK_CLOSE: + if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - + ntohl(th->seq) + 1); + break; + } + + if (!th->syn || th->ack || + CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + break; + + /* Reopened connection - reset the sequence number and timestamp + * adjustments, they will get initialized once the connection is + * reestablished. + */ + nf_ct_seqadj_init(ct, ctinfo, 0); + synproxy->tsoff = 0; + this_cpu_inc(snet->stats->conn_reopened); + + /* fall through */ + case TCP_CONNTRACK_SYN_SENT: + synproxy_parse_options(skb, thoff, th, &opts); + + if (!th->syn && th->ack && + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1, + * therefore we need to add 1 to make the SYN sequence + * number match the one of first SYN. + */ + if (synproxy_recv_client_ack(snet, skb, th, &opts, + ntohl(th->seq) + 1)) + this_cpu_inc(snet->stats->cookie_retrans); + + return NF_DROP; + } + + synproxy->isn = ntohl(th->ack_seq); + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy->its = opts.tsecr; + break; + case TCP_CONNTRACK_SYN_RECV: + if (!th->syn || !th->ack) + break; + + synproxy_parse_options(skb, thoff, th, &opts); + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy->tsoff = opts.tsval - synproxy->its; + + opts.options &= ~(XT_SYNPROXY_OPT_MSS | + XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM); + + swap(opts.tsval, opts.tsecr); + synproxy_send_server_ack(snet, state, skb, th, &opts); + + nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); + + swap(opts.tsval, opts.tsecr); + synproxy_send_client_ack(snet, skb, th, &opts); + + consume_skb(skb); + return NF_STOLEN; + default: + break; + } + + synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy); + return NF_ACCEPT; +} + +static int synproxy_tg6_check(const struct xt_tgchk_param *par) +{ + const struct ip6t_entry *e = par->entryinfo; + + if (!(e->ipv6.flags & IP6T_F_PROTO) || + e->ipv6.proto != IPPROTO_TCP || + e->ipv6.invflags & XT_INV_PROTO) + return -EINVAL; + + return nf_ct_l3proto_try_module_get(par->family); +} + +static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_target synproxy_tg6_reg __read_mostly = { + .name = "SYNPROXY", + .family = NFPROTO_IPV6, + .target = synproxy_tg6, + .targetsize = sizeof(struct xt_synproxy_info), + .checkentry = synproxy_tg6_check, + .destroy = synproxy_tg6_destroy, + .me = THIS_MODULE, +}; + +static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { + { + .hook = ipv6_synproxy_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, + { + .hook = ipv6_synproxy_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, +}; + +static int __init synproxy_tg6_init(void) +{ + int err; + + err = nf_register_hooks(ipv6_synproxy_ops, + ARRAY_SIZE(ipv6_synproxy_ops)); + if (err < 0) + goto err1; + + err = xt_register_target(&synproxy_tg6_reg); + if (err < 0) + goto err2; + + return 0; + +err2: + nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); +err1: + return err; +} + +static void __exit synproxy_tg6_exit(void) +{ + xt_unregister_target(&synproxy_tg6_reg); + nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); +} + +module_init(synproxy_tg6_init); +module_exit(synproxy_tg6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c9b6a6e6a1e8..d6e4dd8b58df 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -158,11 +159,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* adjust seqs for loopback traffic only in outgoing direction */ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb)) { - typeof(nf_nat_seq_adjust_hook) seq_adjust; - - seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); - if (!seq_adjust || - !seq_adjust(skb, ct, ctinfo, protoff)) { + if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index d5dda20bd717..bf63ac8a49b9 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -112,32 +112,38 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr, & COOKIEMASK; } -__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) +u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, + const struct tcphdr *th, __u16 *mssp) { - const struct ipv6hdr *iph = ipv6_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); int mssind; const __u16 mss = *mssp; - tcp_synq_overflow(sk); - for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) if (mss >= msstab[mssind]) break; *mssp = msstab[mssind]; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); - return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, th->dest, ntohl(th->seq), jiffies / (HZ * 60), mssind); } +EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); -static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) +__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); + + tcp_synq_overflow(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); + + return __cookie_v6_init_sequence(iph, th, mssp); +} + +int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, + __u32 cookie) +{ __u32 seq = ntohl(th->seq) - 1; __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, th->source, th->dest, seq, @@ -145,6 +151,7 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } +EXPORT_SYMBOL_GPL(__cookie_v6_check); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { @@ -167,7 +174,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; if (tcp_synq_no_recent_overflow(sk) || - (mss = cookie_check(skb, cookie)) == 0) { + (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c45fc1a60e0d..62a171ab204f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -408,6 +408,9 @@ config NF_NAT_TFTP depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_TFTP +config NETFILTER_SYNPROXY + tristate + endif # NF_CONNTRACK config NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ebfa7dc747cd..c3a0a12907f6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,6 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o @@ -61,6 +61,9 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o +# SYNPROXY +obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index da6f1787a102..5d892febd64c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -798,6 +800,11 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; + if (tmpl && nfct_synproxy(tmpl)) { + nfct_seqadj_ext_add(ct); + nfct_synproxy_ext_add(ct); + } + timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; if (timeout_ext) timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); @@ -1326,6 +1333,7 @@ void nf_conntrack_cleanup_end(void) nf_ct_extend_unregister(&nf_ct_zone_extend); #endif nf_conntrack_proto_fini(); + nf_conntrack_seqadj_fini(); nf_conntrack_labels_fini(); nf_conntrack_helper_fini(); nf_conntrack_timeout_fini(); @@ -1531,6 +1539,10 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_labels; + ret = nf_conntrack_seqadj_init(); + if (ret < 0) + goto err_seqadj; + #ifdef CONFIG_NF_CONNTRACK_ZONES ret = nf_ct_extend_register(&nf_ct_zone_extend); if (ret < 0) @@ -1555,6 +1567,8 @@ err_proto: nf_ct_extend_unregister(&nf_ct_zone_extend); err_extend: #endif + nf_conntrack_seqadj_fini(); +err_seqadj: nf_conntrack_labels_fini(); err_labels: nf_conntrack_helper_fini(); @@ -1577,9 +1591,6 @@ void nf_conntrack_init_end(void) /* For use by REJECT target */ RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); - - /* Howto get NAT offsets */ - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); } /* @@ -1666,8 +1677,3 @@ err_slabname: err_stat: return ret; } - -s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq); -EXPORT_SYMBOL_GPL(nf_ct_nat_offset); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fa61fea63234..eea936b70d15 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -381,9 +382,8 @@ nla_put_failure: return -1; } -#ifdef CONFIG_NF_NAT_NEEDED static int -dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type) +dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type) { struct nlattr *nest_parms; @@ -391,12 +391,12 @@ dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type) if (!nest_parms) goto nla_put_failure; - if (nla_put_be32(skb, CTA_NAT_SEQ_CORRECTION_POS, - htonl(natseq->correction_pos)) || - nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_BEFORE, - htonl(natseq->offset_before)) || - nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_AFTER, - htonl(natseq->offset_after))) + if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS, + htonl(seq->correction_pos)) || + nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE, + htonl(seq->offset_before)) || + nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER, + htonl(seq->offset_after))) goto nla_put_failure; nla_nest_end(skb, nest_parms); @@ -408,27 +408,24 @@ nla_put_failure: } static inline int -ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) +ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) { - struct nf_nat_seq *natseq; - struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + struct nf_ct_seqadj *seq; - if (!(ct->status & IPS_SEQ_ADJUST) || !nat) + if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj) return 0; - natseq = &nat->seq[IP_CT_DIR_ORIGINAL]; - if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1) + seq = &seqadj->seq[IP_CT_DIR_ORIGINAL]; + if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1) return -1; - natseq = &nat->seq[IP_CT_DIR_REPLY]; - if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1) + seq = &seqadj->seq[IP_CT_DIR_REPLY]; + if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1) return -1; return 0; } -#else -#define ctnetlink_dump_nat_seq_adj(a, b) (0) -#endif static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) @@ -502,7 +499,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || - ctnetlink_dump_nat_seq_adj(skb, ct) < 0) + ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -707,8 +704,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; - if (events & (1 << IPCT_NATSEQADJ) && - ctnetlink_dump_nat_seq_adj(skb, ct) < 0) + if (events & (1 << IPCT_SEQADJ) && + ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; } @@ -1439,66 +1436,65 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[] return err; } -#ifdef CONFIG_NF_NAT_NEEDED -static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { - [CTA_NAT_SEQ_CORRECTION_POS] = { .type = NLA_U32 }, - [CTA_NAT_SEQ_OFFSET_BEFORE] = { .type = NLA_U32 }, - [CTA_NAT_SEQ_OFFSET_AFTER] = { .type = NLA_U32 }, +static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = { + [CTA_SEQADJ_CORRECTION_POS] = { .type = NLA_U32 }, + [CTA_SEQADJ_OFFSET_BEFORE] = { .type = NLA_U32 }, + [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 }, }; static inline int -change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) +change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr) { int err; - struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; + struct nlattr *cda[CTA_SEQADJ_MAX+1]; - err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy); if (err < 0) return err; - if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) + if (!cda[CTA_SEQADJ_CORRECTION_POS]) return -EINVAL; - natseq->correction_pos = - ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS])); + seq->correction_pos = + ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS])); - if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE]) + if (!cda[CTA_SEQADJ_OFFSET_BEFORE]) return -EINVAL; - natseq->offset_before = - ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE])); + seq->offset_before = + ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE])); - if (!cda[CTA_NAT_SEQ_OFFSET_AFTER]) + if (!cda[CTA_SEQADJ_OFFSET_AFTER]) return -EINVAL; - natseq->offset_after = - ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER])); + seq->offset_after = + ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER])); return 0; } static int -ctnetlink_change_nat_seq_adj(struct nf_conn *ct, - const struct nlattr * const cda[]) +ctnetlink_change_seq_adj(struct nf_conn *ct, + const struct nlattr * const cda[]) { + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); int ret = 0; - struct nf_conn_nat *nat = nfct_nat(ct); - if (!nat) + if (!seqadj) return 0; - if (cda[CTA_NAT_SEQ_ADJ_ORIG]) { - ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL], - cda[CTA_NAT_SEQ_ADJ_ORIG]); + if (cda[CTA_SEQ_ADJ_ORIG]) { + ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL], + cda[CTA_SEQ_ADJ_ORIG]); if (ret < 0) return ret; ct->status |= IPS_SEQ_ADJUST; } - if (cda[CTA_NAT_SEQ_ADJ_REPLY]) { - ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY], - cda[CTA_NAT_SEQ_ADJ_REPLY]); + if (cda[CTA_SEQ_ADJ_REPLY]) { + ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY], + cda[CTA_SEQ_ADJ_REPLY]); if (ret < 0) return ret; @@ -1507,7 +1503,6 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, return 0; } -#endif static int ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) @@ -1573,13 +1568,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct, ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); #endif -#ifdef CONFIG_NF_NAT_NEEDED - if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { - err = ctnetlink_change_nat_seq_adj(ct, cda); + if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) { + err = ctnetlink_change_seq_adj(ct, cda); if (err < 0) return err; } -#endif + if (cda[CTA_LABELS]) { err = ctnetlink_attach_labels(ct, cda); if (err < 0) @@ -1684,13 +1678,11 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, goto err2; } -#ifdef CONFIG_NF_NAT_NEEDED - if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { - err = ctnetlink_change_nat_seq_adj(ct, cda); + if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) { + err = ctnetlink_change_seq_adj(ct, cda); if (err < 0) goto err2; } -#endif memset(&ct->proto, 0, sizeof(ct->proto)); if (cda[CTA_PROTOINFO]) { @@ -1804,7 +1796,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | - (1 << IPCT_NATSEQADJ) | + (1 << IPCT_SEQADJ) | (1 << IPCT_MARK) | events, ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); @@ -1827,7 +1819,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_HELPER) | (1 << IPCT_LABEL) | (1 << IPCT_PROTOINFO) | - (1 << IPCT_NATSEQADJ) | + (1 << IPCT_SEQADJ) | (1 << IPCT_MARK), ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); @@ -2082,7 +2074,7 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; if ((ct->status & IPS_SEQ_ADJUST) && - ctnetlink_dump_nat_seq_adj(skb, ct) < 0) + ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK @@ -2170,7 +2162,7 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, { struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; int err; @@ -2211,6 +2203,7 @@ static struct nfq_ct_hook ctnetlink_nfqueue_hook = { .build = ctnetlink_nfqueue_build, .parse = ctnetlink_nfqueue_parse, .attach_expect = ctnetlink_nfqueue_attach_expect, + .seq_adjust = nf_ct_tcp_seqadj_set, }; #endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index d224e001f14f..44d1ea32570a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -495,21 +497,6 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } -#ifdef CONFIG_NF_NAT_NEEDED -static inline s32 nat_offset(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq) -{ - typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset); - - return get_offset != NULL ? get_offset(ct, dir, seq) : 0; -} -#define NAT_OFFSET(ct, dir, seq) \ - (nat_offset(ct, dir, seq)) -#else -#define NAT_OFFSET(ct, dir, seq) 0 -#endif - static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, @@ -540,7 +527,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ - receiver_offset = NAT_OFFSET(ct, !dir, ack - 1); + receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; @@ -960,6 +947,21 @@ static int tcp_packet(struct nf_conn *ct, "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: + /* Special case for SYN proxy: when the SYN to the server or + * the SYN/ACK from the server is lost, the client may transmit + * a keep-alive packet while in SYN_SENT state. This needs to + * be associated with the original conntrack entry in order to + * generate a new SYN with the correct sequence number. + */ + if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT && + index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL && + ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL && + ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) { + pr_debug("nf_ct_tcp: SYN proxy client keep alive\n"); + spin_unlock_bh(&ct->lock); + return NF_ACCEPT; + } + /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c new file mode 100644 index 000000000000..5f9bfd060dea --- /dev/null +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -0,0 +1,238 @@ +#include +#include +#include + +#include +#include +#include + +int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + s32 off) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_conn_seqadj *seqadj; + struct nf_ct_seqadj *this_way; + + if (off == 0) + return 0; + + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + + seqadj = nfct_seqadj(ct); + this_way = &seqadj->seq[dir]; + this_way->offset_before = off; + this_way->offset_after = off; + return 0; +} +EXPORT_SYMBOL_GPL(nf_ct_seqadj_init); + +int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s32 off) +{ + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_seqadj *this_way; + + if (off == 0) + return 0; + + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + + spin_lock_bh(&ct->lock); + this_way = &seqadj->seq[dir]; + if (this_way->offset_before == this_way->offset_after || + before(this_way->correction_pos, seq)) { + this_way->correction_pos = seq; + this_way->offset_before = this_way->offset_after; + this_way->offset_after += off; + } + spin_unlock_bh(&ct->lock); + return 0; +} +EXPORT_SYMBOL_GPL(nf_ct_seqadj_set); + +void nf_ct_tcp_seqadj_set(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + s32 off) +{ + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb)); + nf_ct_seqadj_set(ct, ctinfo, th->seq, off); +} +EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set); + +/* Adjust one found SACK option including checksum correction */ +static void nf_ct_sack_block_adjust(struct sk_buff *skb, + struct tcphdr *tcph, + unsigned int sackoff, + unsigned int sackend, + struct nf_ct_seqadj *seq) +{ + while (sackoff < sackend) { + struct tcp_sack_block_wire *sack; + __be32 new_start_seq, new_end_seq; + + sack = (void *)skb->data + sackoff; + if (after(ntohl(sack->start_seq) - seq->offset_before, + seq->correction_pos)) + new_start_seq = htonl(ntohl(sack->start_seq) - + seq->offset_after); + else + new_start_seq = htonl(ntohl(sack->start_seq) - + seq->offset_before); + + if (after(ntohl(sack->end_seq) - seq->offset_before, + seq->correction_pos)) + new_end_seq = htonl(ntohl(sack->end_seq) - + seq->offset_after); + else + new_end_seq = htonl(ntohl(sack->end_seq) - + seq->offset_before); + + pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", + ntohl(sack->start_seq), new_start_seq, + ntohl(sack->end_seq), new_end_seq); + + inet_proto_csum_replace4(&tcph->check, skb, + sack->start_seq, new_start_seq, 0); + inet_proto_csum_replace4(&tcph->check, skb, + sack->end_seq, new_end_seq, 0); + sack->start_seq = new_start_seq; + sack->end_seq = new_end_seq; + sackoff += sizeof(*sack); + } +} + +/* TCP SACK sequence number adjustment */ +static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, + unsigned int protoff, + struct tcphdr *tcph, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dir, optoff, optend; + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + + optoff = protoff + sizeof(struct tcphdr); + optend = protoff + tcph->doff * 4; + + if (!skb_make_writable(skb, optend)) + return 0; + + dir = CTINFO2DIR(ctinfo); + + while (optoff < optend) { + /* Usually: option, length. */ + unsigned char *op = skb->data + optoff; + + switch (op[0]) { + case TCPOPT_EOL: + return 1; + case TCPOPT_NOP: + optoff++; + continue; + default: + /* no partial options */ + if (optoff + 1 == optend || + optoff + op[1] > optend || + op[1] < 2) + return 0; + if (op[0] == TCPOPT_SACK && + op[1] >= 2+TCPOLEN_SACK_PERBLOCK && + ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) + nf_ct_sack_block_adjust(skb, tcph, optoff + 2, + optoff+op[1], + &seqadj->seq[!dir]); + optoff += op[1]; + } + } + return 1; +} + +/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ +int nf_ct_seq_adjust(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct tcphdr *tcph; + __be32 newseq, newack; + s32 seqoff, ackoff; + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + struct nf_ct_seqadj *this_way, *other_way; + int res; + + this_way = &seqadj->seq[dir]; + other_way = &seqadj->seq[!dir]; + + if (!skb_make_writable(skb, protoff + sizeof(*tcph))) + return 0; + + tcph = (void *)skb->data + protoff; + spin_lock_bh(&ct->lock); + if (after(ntohl(tcph->seq), this_way->correction_pos)) + seqoff = this_way->offset_after; + else + seqoff = this_way->offset_before; + + if (after(ntohl(tcph->ack_seq) - other_way->offset_before, + other_way->correction_pos)) + ackoff = other_way->offset_after; + else + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); + + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); + inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + + pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), + ntohl(newack)); + + tcph->seq = newseq; + tcph->ack_seq = newack; + + res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo); + spin_unlock_bh(&ct->lock); + + return res; +} +EXPORT_SYMBOL_GPL(nf_ct_seq_adjust); + +s32 nf_ct_seq_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + struct nf_ct_seqadj *this_way; + + if (!seqadj) + return 0; + + this_way = &seqadj->seq[dir]; + return after(seq, this_way->correction_pos) ? + this_way->offset_after : this_way->offset_before; +} +EXPORT_SYMBOL_GPL(nf_ct_seq_offset); + +static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = { + .len = sizeof(struct nf_conn_seqadj), + .align = __alignof__(struct nf_conn_seqadj), + .id = NF_CT_EXT_SEQADJ, +}; + +int nf_conntrack_seqadj_init(void) +{ + return nf_ct_extend_register(&nf_ct_seqadj_extend); +} + +void nf_conntrack_seqadj_fini(void) +{ + nf_ct_extend_unregister(&nf_ct_seqadj_extend); +} diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6ff808375b5e..6f0f4f7f68a5 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -402,6 +403,9 @@ nf_nat_setup_info(struct nf_conn *ct, ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; + + if (nfct_help(ct)) + nfct_seqadj_ext_add(ct); } if (maniptype == NF_NAT_MANIP_SRC) { @@ -764,10 +768,6 @@ static struct nf_ct_helper_expectfn follow_master_nat = { .expectfn = nf_nat_follow_master, }; -static struct nfq_ct_nat_hook nfq_ct_nat = { - .seq_adjust = nf_nat_tcp_seq_adjust, -}; - static int __init nf_nat_init(void) { int ret; @@ -787,14 +787,9 @@ static int __init nf_nat_init(void) /* Initialize fake conntrack so that NAT will skip it */ nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); - BUG_ON(nf_nat_seq_adjust_hook != NULL); - RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); - BUG_ON(nf_ct_nat_offset != NULL); - RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); - RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); #ifdef CONFIG_XFRM BUG_ON(nf_nat_decode_session_hook != NULL); RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); @@ -813,10 +808,7 @@ static void __exit nf_nat_cleanup(void) unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_extend_unregister(&nat_extend); nf_ct_helper_expectfn_unregister(&follow_master_nat); - RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); - RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 46b9baa845a6..2840abb5bb99 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -20,67 +20,13 @@ #include #include #include +#include #include #include #include #include #include -#define DUMP_OFFSET(x) \ - pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ - x->offset_before, x->offset_after, x->correction_pos); - -/* Setup TCP sequence correction given this change at this sequence */ -static inline void -adjust_tcp_sequence(u32 seq, - int sizediff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way = &nat->seq[dir]; - - pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", - seq, sizediff); - - pr_debug("adjust_tcp_sequence: Seq_offset before: "); - DUMP_OFFSET(this_way); - - spin_lock_bh(&ct->lock); - - /* SYN adjust. If it's uninitialized, or this is after last - * correction, record it: we don't handle more than one - * adjustment in the window, but do deal with common case of a - * retransmit */ - if (this_way->offset_before == this_way->offset_after || - before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; - this_way->offset_before = this_way->offset_after; - this_way->offset_after += sizediff; - } - spin_unlock_bh(&ct->lock); - - pr_debug("adjust_tcp_sequence: Seq_offset after: "); - DUMP_OFFSET(this_way); -} - -/* Get the offset value, for conntrack. Caller must have the conntrack locked */ -s32 nf_nat_get_offset(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq) -{ - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way; - - if (!nat) - return 0; - - this_way = &nat->seq[dir]; - return after(seq, this_way->correction_pos) - ? this_way->offset_after : this_way->offset_before; -} - /* Frobs data inside this packet, which is linear. */ static void mangle_contents(struct sk_buff *skb, unsigned int dataoff, @@ -135,30 +81,6 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) return 1; } -void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - __be32 seq, s32 off) -{ - if (!off) - return; - set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); - adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); -} -EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); - -void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, - u32 ctinfo, int off) -{ - const struct tcphdr *th; - - if (nf_ct_protonum(ct) != IPPROTO_TCP) - return; - - th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); - nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); -} -EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); - /* Generic function for mangling variable-length address changes inside * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX * command in FTP). @@ -203,8 +125,8 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, datalen, oldlen); if (adjust && rep_len != match_len) - nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, - (int)rep_len - (int)match_len); + nf_ct_seqadj_set(ct, ctinfo, tcph->seq, + (int)rep_len - (int)match_len); return 1; } @@ -264,150 +186,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, } EXPORT_SYMBOL(nf_nat_mangle_udp_packet); -/* Adjust one found SACK option including checksum correction */ -static void -sack_adjust(struct sk_buff *skb, - struct tcphdr *tcph, - unsigned int sackoff, - unsigned int sackend, - struct nf_nat_seq *natseq) -{ - while (sackoff < sackend) { - struct tcp_sack_block_wire *sack; - __be32 new_start_seq, new_end_seq; - - sack = (void *)skb->data + sackoff; - if (after(ntohl(sack->start_seq) - natseq->offset_before, - natseq->correction_pos)) - new_start_seq = htonl(ntohl(sack->start_seq) - - natseq->offset_after); - else - new_start_seq = htonl(ntohl(sack->start_seq) - - natseq->offset_before); - - if (after(ntohl(sack->end_seq) - natseq->offset_before, - natseq->correction_pos)) - new_end_seq = htonl(ntohl(sack->end_seq) - - natseq->offset_after); - else - new_end_seq = htonl(ntohl(sack->end_seq) - - natseq->offset_before); - - pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); - - inet_proto_csum_replace4(&tcph->check, skb, - sack->start_seq, new_start_seq, 0); - inet_proto_csum_replace4(&tcph->check, skb, - sack->end_seq, new_end_seq, 0); - sack->start_seq = new_start_seq; - sack->end_seq = new_end_seq; - sackoff += sizeof(*sack); - } -} - -/* TCP SACK sequence number adjustment */ -static inline unsigned int -nf_nat_sack_adjust(struct sk_buff *skb, - unsigned int protoff, - struct tcphdr *tcph, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - unsigned int dir, optoff, optend; - struct nf_conn_nat *nat = nfct_nat(ct); - - optoff = protoff + sizeof(struct tcphdr); - optend = protoff + tcph->doff * 4; - - if (!skb_make_writable(skb, optend)) - return 0; - - dir = CTINFO2DIR(ctinfo); - - while (optoff < optend) { - /* Usually: option, length. */ - unsigned char *op = skb->data + optoff; - - switch (op[0]) { - case TCPOPT_EOL: - return 1; - case TCPOPT_NOP: - optoff++; - continue; - default: - /* no partial options */ - if (optoff + 1 == optend || - optoff + op[1] > optend || - op[1] < 2) - return 0; - if (op[0] == TCPOPT_SACK && - op[1] >= 2+TCPOLEN_SACK_PERBLOCK && - ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(skb, tcph, optoff+2, - optoff+op[1], &nat->seq[!dir]); - optoff += op[1]; - } - } - return 1; -} - -/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ -int -nf_nat_seq_adjust(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff) -{ - struct tcphdr *tcph; - int dir; - __be32 newseq, newack; - s32 seqoff, ackoff; - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way, *other_way; - int res; - - dir = CTINFO2DIR(ctinfo); - - this_way = &nat->seq[dir]; - other_way = &nat->seq[!dir]; - - if (!skb_make_writable(skb, protoff + sizeof(*tcph))) - return 0; - - tcph = (void *)skb->data + protoff; - spin_lock_bh(&ct->lock); - if (after(ntohl(tcph->seq), this_way->correction_pos)) - seqoff = this_way->offset_after; - else - seqoff = this_way->offset_before; - - if (after(ntohl(tcph->ack_seq) - other_way->offset_before, - other_way->correction_pos)) - ackoff = other_way->offset_after; - else - ackoff = other_way->offset_before; - - newseq = htonl(ntohl(tcph->seq) + seqoff); - newack = htonl(ntohl(tcph->ack_seq) - ackoff); - - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); - - pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", - ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), - ntohl(newack)); - - tcph->seq = newseq; - tcph->ack_seq = newack; - - res = nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); - spin_unlock_bh(&ct->lock); - - return res; -} - /* Setup NAT on this expected conntrack so it follows master. */ /* If we fail to get a free NAT slot, we'll get dropped on confirm */ void nf_nat_follow_master(struct nf_conn *ct, diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index dac11f73868e..f9790405b7ff 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -20,6 +20,7 @@ #include #include #include +#include #include MODULE_LICENSE("GPL"); @@ -308,7 +309,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff, return; th = (struct tcphdr *)(skb->data + protoff); - nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); + nf_ct_seqadj_set(ct, ctinfo, th->seq, off); } /* Handles expected signalling connections and media streams */ diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c new file mode 100644 index 000000000000..d23dc791aca7 --- /dev/null +++ b/net/netfilter/nf_synproxy_core.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2013 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +int synproxy_net_id; +EXPORT_SYMBOL_GPL(synproxy_net_id); + +void +synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, + const struct tcphdr *th, struct synproxy_options *opts) +{ + int length = (th->doff * 4) - sizeof(*th); + u8 buf[40], *ptr; + + ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); + BUG_ON(ptr == NULL); + + opts->options = 0; + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2) + return; + if (opsize > length) + return; + + switch (opcode) { + case TCPOPT_MSS: + if (opsize == TCPOLEN_MSS) { + opts->mss = get_unaligned_be16(ptr); + opts->options |= XT_SYNPROXY_OPT_MSS; + } + break; + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW) { + opts->wscale = *ptr; + if (opts->wscale > 14) + opts->wscale = 14; + opts->options |= XT_SYNPROXY_OPT_WSCALE; + } + break; + case TCPOPT_TIMESTAMP: + if (opsize == TCPOLEN_TIMESTAMP) { + opts->tsval = get_unaligned_be32(ptr); + opts->tsecr = get_unaligned_be32(ptr + 4); + opts->options |= XT_SYNPROXY_OPT_TIMESTAMP; + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM) + opts->options |= XT_SYNPROXY_OPT_SACK_PERM; + break; + } + + ptr += opsize - 2; + length -= opsize; + } + } +} +EXPORT_SYMBOL_GPL(synproxy_parse_options); + +unsigned int synproxy_options_size(const struct synproxy_options *opts) +{ + unsigned int size = 0; + + if (opts->options & XT_SYNPROXY_OPT_MSS) + size += TCPOLEN_MSS_ALIGNED; + if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) + size += TCPOLEN_TSTAMP_ALIGNED; + else if (opts->options & XT_SYNPROXY_OPT_SACK_PERM) + size += TCPOLEN_SACKPERM_ALIGNED; + if (opts->options & XT_SYNPROXY_OPT_WSCALE) + size += TCPOLEN_WSCALE_ALIGNED; + + return size; +} +EXPORT_SYMBOL_GPL(synproxy_options_size); + +void +synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts) +{ + __be32 *ptr = (__be32 *)(th + 1); + u8 options = opts->options; + + if (options & XT_SYNPROXY_OPT_MSS) + *ptr++ = htonl((TCPOPT_MSS << 24) | + (TCPOLEN_MSS << 16) | + opts->mss); + + if (options & XT_SYNPROXY_OPT_TIMESTAMP) { + if (options & XT_SYNPROXY_OPT_SACK_PERM) + *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | + (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + else + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + + *ptr++ = htonl(opts->tsval); + *ptr++ = htonl(opts->tsecr); + } else if (options & XT_SYNPROXY_OPT_SACK_PERM) + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_SACK_PERM << 8) | + TCPOLEN_SACK_PERM); + + if (options & XT_SYNPROXY_OPT_WSCALE) + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_WINDOW << 16) | + (TCPOLEN_WINDOW << 8) | + opts->wscale); +} +EXPORT_SYMBOL_GPL(synproxy_build_options); + +void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, + struct synproxy_options *opts) +{ + opts->tsecr = opts->tsval; + opts->tsval = tcp_time_stamp & ~0x3f; + + if (opts->options & XT_SYNPROXY_OPT_WSCALE) + opts->tsval |= info->wscale; + else + opts->tsval |= 0xf; + + if (opts->options & XT_SYNPROXY_OPT_SACK_PERM) + opts->tsval |= 1 << 4; + + if (opts->options & XT_SYNPROXY_OPT_ECN) + opts->tsval |= 1 << 5; +} +EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie); + +void synproxy_check_timestamp_cookie(struct synproxy_options *opts) +{ + opts->wscale = opts->tsecr & 0xf; + if (opts->wscale != 0xf) + opts->options |= XT_SYNPROXY_OPT_WSCALE; + + opts->options |= opts->tsecr & (1 << 4) ? XT_SYNPROXY_OPT_SACK_PERM : 0; + + opts->options |= opts->tsecr & (1 << 5) ? XT_SYNPROXY_OPT_ECN : 0; +} +EXPORT_SYMBOL_GPL(synproxy_check_timestamp_cookie); + +unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, + unsigned int protoff, + struct tcphdr *th, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct nf_conn_synproxy *synproxy) +{ + unsigned int optoff, optend; + u32 *ptr, old; + + if (synproxy->tsoff == 0) + return 1; + + optoff = protoff + sizeof(struct tcphdr); + optend = protoff + th->doff * 4; + + if (!skb_make_writable(skb, optend)) + return 0; + + while (optoff < optend) { + unsigned char *op = skb->data + optoff; + + switch (op[0]) { + case TCPOPT_EOL: + return 1; + case TCPOPT_NOP: + optoff++; + continue; + default: + if (optoff + 1 == optend || + optoff + op[1] > optend || + op[1] < 2) + return 0; + if (op[0] == TCPOPT_TIMESTAMP && + op[1] == TCPOLEN_TIMESTAMP) { + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { + ptr = (u32 *)&op[2]; + old = *ptr; + *ptr = htonl(ntohl(*ptr) - + synproxy->tsoff); + } else { + ptr = (u32 *)&op[6]; + old = *ptr; + *ptr = htonl(ntohl(*ptr) + + synproxy->tsoff); + } + inet_proto_csum_replace4(&th->check, skb, + old, *ptr, 0); + return 1; + } + optoff += op[1]; + } + } + return 1; +} +EXPORT_SYMBOL_GPL(synproxy_tstamp_adjust); + +static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = { + .len = sizeof(struct nf_conn_synproxy), + .align = __alignof__(struct nf_conn_synproxy), + .id = NF_CT_EXT_SYNPROXY, +}; + +#ifdef CONFIG_PROC_FS +static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq)); + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu + 1; + return per_cpu_ptr(snet->stats, cpu); + } + + return NULL; +} + +static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq)); + int cpu; + + for (cpu = *pos; cpu < nr_cpu_ids; cpu++) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu + 1; + return per_cpu_ptr(snet->stats, cpu); + } + + return NULL; +} + +static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v) +{ + return; +} + +static int synproxy_cpu_seq_show(struct seq_file *seq, void *v) +{ + struct synproxy_stats *stats = v; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "entries\t\tsyn_received\t" + "cookie_invalid\tcookie_valid\t" + "cookie_retrans\tconn_reopened\n"); + return 0; + } + + seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0, + stats->syn_received, + stats->cookie_invalid, + stats->cookie_valid, + stats->cookie_retrans, + stats->conn_reopened); + + return 0; +} + +static const struct seq_operations synproxy_cpu_seq_ops = { + .start = synproxy_cpu_seq_start, + .next = synproxy_cpu_seq_next, + .stop = synproxy_cpu_seq_stop, + .show = synproxy_cpu_seq_show, +}; + +static int synproxy_cpu_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &synproxy_cpu_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations synproxy_cpu_seq_fops = { + .owner = THIS_MODULE, + .open = synproxy_cpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static int __net_init synproxy_proc_init(struct net *net) +{ + if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat, + &synproxy_cpu_seq_fops)) + return -ENOMEM; + return 0; +} + +static void __net_exit synproxy_proc_exit(struct net *net) +{ + remove_proc_entry("synproxy", net->proc_net_stat); +} +#else +static int __net_init synproxy_proc_init(struct net *net) +{ + return 0; +} + +static void __net_exit synproxy_proc_exit(struct net *net) +{ + return; +} +#endif /* CONFIG_PROC_FS */ + +static int __net_init synproxy_net_init(struct net *net) +{ + struct synproxy_net *snet = synproxy_pernet(net); + struct nf_conntrack_tuple t; + struct nf_conn *ct; + int err = -ENOMEM; + + memset(&t, 0, sizeof(t)); + ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL); + if (IS_ERR(ct)) { + err = PTR_ERR(ct); + goto err1; + } + + __set_bit(IPS_TEMPLATE_BIT, &ct->status); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + if (!nfct_seqadj_ext_add(ct)) + goto err2; + if (!nfct_synproxy_ext_add(ct)) + goto err2; + + snet->tmpl = ct; + + snet->stats = alloc_percpu(struct synproxy_stats); + if (snet->stats == NULL) + goto err2; + + err = synproxy_proc_init(net); + if (err < 0) + goto err3; + + return 0; + +err3: + free_percpu(snet->stats); +err2: + nf_conntrack_free(ct); +err1: + return err; +} + +static void __net_exit synproxy_net_exit(struct net *net) +{ + struct synproxy_net *snet = synproxy_pernet(net); + + nf_conntrack_free(snet->tmpl); + synproxy_proc_exit(net); + free_percpu(snet->stats); +} + +static struct pernet_operations synproxy_net_ops = { + .init = synproxy_net_init, + .exit = synproxy_net_exit, + .id = &synproxy_net_id, + .size = sizeof(struct synproxy_net), +}; + +static int __init synproxy_core_init(void) +{ + int err; + + err = nf_ct_extend_register(&nf_ct_synproxy_extend); + if (err < 0) + goto err1; + + err = register_pernet_subsys(&synproxy_net_ops); + if (err < 0) + goto err2; + + return 0; + +err2: + nf_ct_extend_unregister(&nf_ct_synproxy_extend); +err1: + return err; +} + +static void __exit synproxy_core_exit(void) +{ + unregister_pernet_subsys(&synproxy_net_ops); + nf_ct_extend_unregister(&nf_ct_synproxy_extend); +} + +module_init(synproxy_core_init); +module_exit(synproxy_core_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c index be893039966d..96cac50e0d12 100644 --- a/net/netfilter/nfnetlink_queue_ct.c +++ b/net/netfilter/nfnetlink_queue_ct.c @@ -87,14 +87,14 @@ nla_put_failure: void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, int diff) { - struct nfq_ct_nat_hook *nfq_nat_ct; + struct nfq_ct_hook *nfq_ct; - nfq_nat_ct = rcu_dereference(nfq_ct_nat_hook); - if (nfq_nat_ct == NULL) + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) return; if ((ct->status & IPS_NAT_MASK) && diff) - nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff); + nfq_ct->seq_adjust(skb, ct, ctinfo, diff); } int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,