Merge branch 'for-patrick' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-test-2.6

This commit is contained in:
Patrick McHardy 2010-10-21 16:25:51 +02:00
commit 3b1a1ce6f4
12 changed files with 960 additions and 411 deletions

View File

@ -25,7 +25,7 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/ipv6.h> /* for struct ipv6hdr */ #include <linux/ipv6.h> /* for struct ipv6hdr */
#include <net/ipv6.h> /* for ipv6_addr_copy */ #include <net/ipv6.h> /* for ipv6_addr_copy */
#ifdef CONFIG_IP_VS_NFCT #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack.h>
#endif #endif
@ -136,24 +136,24 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
if (net_ratelimit()) \ if (net_ratelimit()) \
printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \
} while (0) } while (0)
#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) \ #define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) \
do { \ do { \
if (level <= ip_vs_get_debug_level()) \ if (level <= ip_vs_get_debug_level()) \
pp->debug_packet(pp, skb, ofs, msg); \ pp->debug_packet(af, pp, skb, ofs, msg); \
} while (0) } while (0)
#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) \ #define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) \
do { \ do { \
if (level <= ip_vs_get_debug_level() && \ if (level <= ip_vs_get_debug_level() && \
net_ratelimit()) \ net_ratelimit()) \
pp->debug_packet(pp, skb, ofs, msg); \ pp->debug_packet(af, pp, skb, ofs, msg); \
} while (0) } while (0)
#else /* NO DEBUGGING at ALL */ #else /* NO DEBUGGING at ALL */
#define IP_VS_DBG_BUF(level, msg...) do {} while (0) #define IP_VS_DBG_BUF(level, msg...) do {} while (0)
#define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_ERR_BUF(msg...) do {} while (0)
#define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0)
#define IP_VS_DBG_RL(msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0)
#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) #define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) do {} while (0)
#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) do {} while (0) #define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) do {} while (0)
#endif #endif
#define IP_VS_BUG() BUG() #define IP_VS_BUG() BUG()
@ -345,7 +345,7 @@ struct ip_vs_protocol {
int (*app_conn_bind)(struct ip_vs_conn *cp); int (*app_conn_bind)(struct ip_vs_conn *cp);
void (*debug_packet)(struct ip_vs_protocol *pp, void (*debug_packet)(int af, struct ip_vs_protocol *pp,
const struct sk_buff *skb, const struct sk_buff *skb,
int offset, int offset,
const char *msg); const char *msg);
@ -409,6 +409,7 @@ struct ip_vs_conn {
/* packet transmitter for different forwarding methods. If it /* packet transmitter for different forwarding methods. If it
mangles the packet, it must return NF_DROP or better NF_STOLEN, mangles the packet, it must return NF_DROP or better NF_STOLEN,
otherwise this must be changed to a sk_buff **. otherwise this must be changed to a sk_buff **.
NF_ACCEPT can be returned when destination is local.
*/ */
int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp); struct ip_vs_protocol *pp);
@ -597,11 +598,19 @@ struct ip_vs_app {
__be16 port; /* port number in net order */ __be16 port; /* port number in net order */
atomic_t usecnt; /* usage counter */ atomic_t usecnt; /* usage counter */
/* output hook: return false if can't linearize. diff set for TCP. */ /*
* output hook: Process packet in inout direction, diff set for TCP.
* Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
* 2=Mangled but checksum was not updated
*/
int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
struct sk_buff *, int *diff); struct sk_buff *, int *diff);
/* input hook: return false if can't linearize. diff set for TCP. */ /*
* input hook: Process packet in outin direction, diff set for TCP.
* Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
* 2=Mangled but checksum was not updated
*/
int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
struct sk_buff *, int *diff); struct sk_buff *, int *diff);
@ -819,7 +828,8 @@ extern int
ip_vs_set_state_timeout(int *table, int num, const char *const *names, ip_vs_set_state_timeout(int *table, int num, const char *const *names,
const char *name, int to); const char *name, int to);
extern void extern void
ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
const struct sk_buff *skb,
int offset, const char *msg); int offset, const char *msg);
extern struct ip_vs_protocol ip_vs_protocol_tcp; extern struct ip_vs_protocol ip_vs_protocol_tcp;
@ -841,7 +851,8 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn * extern struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb); ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp, int *ignored);
extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp); struct ip_vs_protocol *pp);
@ -1013,6 +1024,24 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
return csum_partial(diff, sizeof(diff), oldsum); return csum_partial(diff, sizeof(diff), oldsum);
} }
/*
* Forget current conntrack (unconfirmed) and attach notrack entry
*/
static inline void ip_vs_notrack(struct sk_buff *skb)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
if (!ct || !nf_ct_is_untracked(ct)) {
nf_reset(skb);
skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct);
}
#endif
}
#ifdef CONFIG_IP_VS_NFCT #ifdef CONFIG_IP_VS_NFCT
/* /*
* Netfilter connection tracking * Netfilter connection tracking

View File

@ -462,6 +462,18 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
return 0; return 0;
} }
if (manip == IP_NAT_MANIP_SRC)
statusbit = IPS_SRC_NAT;
else
statusbit = IPS_DST_NAT;
/* Invert if this is reply dir. */
if (dir == IP_CT_DIR_REPLY)
statusbit ^= IPS_NAT_MASK;
if (!(ct->status & statusbit))
return 1;
pr_debug("icmp_reply_translation: translating error %p manip %u " pr_debug("icmp_reply_translation: translating error %p manip %u "
"dir %s\n", skb, manip, "dir %s\n", skb, manip,
dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
@ -496,20 +508,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
/* Change outer to look the reply to an incoming packet /* Change outer to look the reply to an incoming packet
* (proto 0 means don't invert per-proto part). */ * (proto 0 means don't invert per-proto part). */
if (manip == IP_NAT_MANIP_SRC) nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
statusbit = IPS_SRC_NAT; if (!manip_pkt(0, skb, 0, &target, manip))
else return 0;
statusbit = IPS_DST_NAT;
/* Invert if this is reply dir. */
if (dir == IP_CT_DIR_REPLY)
statusbit ^= IPS_NAT_MASK;
if (ct->status & statusbit) {
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
if (!manip_pkt(0, skb, 0, &target, manip))
return 0;
}
return 1; return 1;
} }

View File

@ -563,6 +563,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
*/ */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
conn_flags &= ~IP_VS_CONN_F_INACTIVE; conn_flags &= ~IP_VS_CONN_F_INACTIVE;
/* connections inherit forwarding method from dest */
cp->flags &= ~IP_VS_CONN_F_FWD_MASK;
} }
cp->flags |= conn_flags; cp->flags |= conn_flags;
cp->dest = dest; cp->dest = dest;

View File

@ -48,6 +48,7 @@
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h> #include <net/ipv6.h>
#include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6.h>
#include <net/ip6_route.h>
#endif #endif
#include <net/ip_vs.h> #include <net/ip_vs.h>
@ -342,7 +343,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* Protocols supported: TCP, UDP * Protocols supported: TCP, UDP
*/ */
struct ip_vs_conn * struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb) ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp, int *ignored)
{ {
struct ip_vs_conn *cp = NULL; struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph; struct ip_vs_iphdr iph;
@ -350,16 +352,44 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
__be16 _ports[2], *pptr; __be16 _ports[2], *pptr;
unsigned int flags; unsigned int flags;
*ignored = 1;
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
if (pptr == NULL) if (pptr == NULL)
return NULL; return NULL;
/*
* FTPDATA needs this check when using local real server.
* Never schedule Active FTPDATA connections from real server.
* For LVS-NAT they must be already created. For other methods
* with persistence the connection is created on SYN+ACK.
*/
if (pptr[0] == FTPDATA) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling FTPDATA");
return NULL;
}
/*
* Do not schedule replies from local real server. It is risky
* for fwmark services but mostly for persistent services.
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
(svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
(cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling reply for existing connection");
__ip_vs_conn_put(cp);
return NULL;
}
/* /*
* Persistent service * Persistent service
*/ */
if (svc->flags & IP_VS_SVC_F_PERSISTENT) if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
*ignored = 0;
return ip_vs_sched_persist(svc, skb, pptr); return ip_vs_sched_persist(svc, skb, pptr);
}
/* /*
* Non-persistent service * Non-persistent service
@ -372,6 +402,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
return NULL; return NULL;
} }
*ignored = 0;
dest = svc->scheduler->schedule(svc, skb); dest = svc->scheduler->schedule(svc, skb);
if (dest == NULL) { if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n"); IP_VS_DBG(1, "Schedule: no dest found.\n");
@ -498,37 +530,34 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
*/ */
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) if (svc->af == AF_INET6) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
skb->dev = net->loopback_dev;
}
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
else } else
#endif #endif
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
return NF_DROP; return NF_DROP;
} }
/*
* It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
* chain and is used to avoid double NAT and confirmation when we do
* not want to keep the conntrack structure
*/
static unsigned int ip_vs_post_routing(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
if (!skb->ipvs_property)
return NF_ACCEPT;
/* The packet was sent from IPVS, exit this chain */
return NF_STOP;
}
__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{ {
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
} }
static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
{
if (NF_INET_LOCAL_IN == hooknum)
return IP_DEFRAG_VS_IN;
if (NF_INET_FORWARD == hooknum)
return IP_DEFRAG_VS_FWD;
return IP_DEFRAG_VS_OUT;
}
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{ {
int err = ip_defrag(skb, user); int err = ip_defrag(skb, user);
@ -589,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
if (inout) if (inout)
IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
"Forwarding altered outgoing ICMP"); "Forwarding altered outgoing ICMP");
else else
IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
"Forwarding altered incoming ICMP"); "Forwarding altered incoming ICMP");
} }
@ -634,11 +663,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
skb->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL;
if (inout) if (inout)
IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
"Forwarding altered outgoing ICMPv6"); (void *)ciph - (void *)iph,
"Forwarding altered outgoing ICMPv6");
else else
IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
"Forwarding altered incoming ICMPv6"); (void *)ciph - (void *)iph,
"Forwarding altered incoming ICMPv6");
} }
#endif #endif
@ -679,11 +710,23 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif #endif
ip_vs_nat_icmp(skb, pp, cp, 1); ip_vs_nat_icmp(skb, pp, cp, 1);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
goto out;
} else
#endif
if ((sysctl_ip_vs_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto out;
/* do the statistics and put it back */ /* do the statistics and put it back */
ip_vs_out_stats(cp, skb); ip_vs_out_stats(cp, skb);
skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT)) if (!(cp->flags & IP_VS_CONN_F_NFCT))
skb->ipvs_property = 1; ip_vs_notrack(skb);
else else
ip_vs_update_conntrack(skb, cp, 0); ip_vs_update_conntrack(skb, cp, 0);
verdict = NF_ACCEPT; verdict = NF_ACCEPT;
@ -699,7 +742,8 @@ out:
* Find any that might be relevant, check against existing connections. * Find any that might be relevant, check against existing connections.
* Currently handles error types - unreachable, quench, ttl exceeded. * Currently handles error types - unreachable, quench, ttl exceeded.
*/ */
static int ip_vs_out_icmp(struct sk_buff *skb, int *related) static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
unsigned int hooknum)
{ {
struct iphdr *iph; struct iphdr *iph;
struct icmphdr _icmph, *ic; struct icmphdr _icmph, *ic;
@ -714,7 +758,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
/* reassemble IP fragments */ /* reassemble IP fragments */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN; return NF_STOLEN;
} }
@ -757,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
pp->dont_defrag)) pp->dont_defrag))
return NF_ACCEPT; return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking outgoing ICMP for");
offset += cih->ihl * 4; offset += cih->ihl * 4;
@ -773,7 +818,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
} }
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
unsigned int hooknum)
{ {
struct ipv6hdr *iph; struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic; struct icmp6hdr _icmph, *ic;
@ -789,7 +835,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
/* reassemble IP fragments */ /* reassemble IP fragments */
if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT)) if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN; return NF_STOLEN;
} }
@ -832,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
return NF_ACCEPT; return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
"Checking outgoing ICMPv6 for");
offset += sizeof(struct ipv6hdr); offset += sizeof(struct ipv6hdr);
@ -880,7 +927,7 @@ static unsigned int
handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int ihl) struct ip_vs_conn *cp, int ihl)
{ {
IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl)) if (!skb_make_writable(skb, ihl))
goto drop; goto drop;
@ -914,23 +961,24 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* if it came from this machine itself. So re-compute * if it came from this machine itself. So re-compute
* the routing information. * the routing information.
*/ */
if (sysctl_ip_vs_snat_reroute) {
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) { if (af == AF_INET6) {
if (ip6_route_me_harder(skb) != 0) if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
goto drop; goto drop;
} else } else
#endif #endif
if (ip_route_me_harder(skb, RTN_LOCAL) != 0) if ((sysctl_ip_vs_snat_reroute ||
goto drop; skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
} ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb); ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT)) if (!(cp->flags & IP_VS_CONN_F_NFCT))
skb->ipvs_property = 1; ip_vs_notrack(skb);
else else
ip_vs_update_conntrack(skb, cp, 0); ip_vs_update_conntrack(skb, cp, 0);
ip_vs_conn_put(cp); ip_vs_conn_put(cp);
@ -946,53 +994,54 @@ drop:
} }
/* /*
* It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
* Check if outgoing packet belongs to the established ip_vs_conn. * Check if outgoing packet belongs to the established ip_vs_conn.
*/ */
static unsigned int static unsigned int
ip_vs_out(unsigned int hooknum, struct sk_buff *skb, ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{ {
struct ip_vs_iphdr iph; struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp; struct ip_vs_protocol *pp;
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
int af;
EnterFunction(11); EnterFunction(11);
af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; /* Already marked as IPVS request or reply? */
if (skb->ipvs_property) if (skb->ipvs_property)
return NF_ACCEPT; return NF_ACCEPT;
/* Bad... Do not break raw sockets */
if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(skb->sk);
if (inet && sk->sk_family == PF_INET && inet->nodefrag)
return NF_ACCEPT;
}
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) { if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related, verdict = ip_vs_out_icmp_v6(skb, &related); int related;
int verdict = ip_vs_out_icmp_v6(skb, &related,
hooknum);
if (related) { if (related)
if (sysctl_ip_vs_snat_reroute &&
NF_ACCEPT == verdict &&
ip6_route_me_harder(skb))
verdict = NF_DROP;
return verdict; return verdict;
}
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
} }
} else } else
#endif #endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) { if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_out_icmp(skb, &related); int related;
int verdict = ip_vs_out_icmp(skb, &related, hooknum);
if (related) { if (related)
if (sysctl_ip_vs_snat_reroute &&
NF_ACCEPT == verdict &&
ip_route_me_harder(skb, RTN_LOCAL))
verdict = NF_DROP;
return verdict; return verdict;
}
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
} }
@ -1003,19 +1052,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
/* reassemble IP fragments */ /* reassemble IP fragments */
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) { if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
int related, verdict = ip_vs_out_icmp_v6(skb, &related); if (ip_vs_gather_frags_v6(skb,
ip_vs_defrag_user(hooknum)))
if (related) return NF_STOLEN;
return verdict;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
} }
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
} else } else
#endif #endif
if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) { !pp->dont_defrag)) {
if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) if (ip_vs_gather_frags(skb,
ip_vs_defrag_user(hooknum)))
return NF_STOLEN; return NF_STOLEN;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@ -1026,55 +1075,123 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
*/ */
cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
if (unlikely(!cp)) { if (likely(cp))
if (sysctl_ip_vs_nat_icmp_send && return handle_response(af, skb, pp, cp, iph.len);
(pp->protocol == IPPROTO_TCP || if (sysctl_ip_vs_nat_icmp_send &&
pp->protocol == IPPROTO_UDP || (pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_SCTP)) { pp->protocol == IPPROTO_UDP ||
__be16 _ports[2], *pptr; pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, iph.len, pptr = skb_header_pointer(skb, iph.len,
sizeof(_ports), _ports); sizeof(_ports), _ports);
if (pptr == NULL) if (pptr == NULL)
return NF_ACCEPT; /* Not for me */ return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(af, iph.protocol, if (ip_vs_lookup_real_service(af, iph.protocol,
&iph.saddr, &iph.saddr,
pptr[0])) { pptr[0])) {
/* /*
* Notify the real server: there is no * Notify the real server: there is no
* existing entry if it is not RST * existing entry if it is not RST
* packet or not TCP packet. * packet or not TCP packet.
*/ */
if ((iph.protocol != IPPROTO_TCP && if ((iph.protocol != IPPROTO_TCP &&
iph.protocol != IPPROTO_SCTP) iph.protocol != IPPROTO_SCTP)
|| ((iph.protocol == IPPROTO_TCP || ((iph.protocol == IPPROTO_TCP
&& !is_tcp_reset(skb, iph.len)) && !is_tcp_reset(skb, iph.len))
|| (iph.protocol == IPPROTO_SCTP || (iph.protocol == IPPROTO_SCTP
&& !is_sctp_abort(skb, && !is_sctp_abort(skb,
iph.len)))) { iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) if (af == AF_INET6) {
icmpv6_send(skb, struct net *net =
ICMPV6_DEST_UNREACH, dev_net(skb_dst(skb)->dev);
ICMPV6_PORT_UNREACH,
0); if (!skb->dev)
else skb->dev = net->loopback_dev;
icmpv6_send(skb,
ICMPV6_DEST_UNREACH,
ICMPV6_PORT_UNREACH,
0);
} else
#endif #endif
icmp_send(skb, icmp_send(skb,
ICMP_DEST_UNREACH, ICMP_DEST_UNREACH,
ICMP_PORT_UNREACH, 0); ICMP_PORT_UNREACH, 0);
return NF_DROP; return NF_DROP;
}
} }
} }
IP_VS_DBG_PKT(12, pp, skb, 0,
"packet continues traversal as normal");
return NF_ACCEPT;
} }
IP_VS_DBG_PKT(12, af, pp, skb, 0,
return handle_response(af, skb, pp, cp, iph.len); "ip_vs_out: packet continues traversal as normal");
return NF_ACCEPT;
} }
/*
* It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
* used only for VS/NAT.
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return ip_vs_out(hooknum, skb, AF_INET);
}
/*
* It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_out(hooknum, skb, AF_INET);
local_bh_enable();
return verdict;
}
#ifdef CONFIG_IP_VS_IPV6
/*
* It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
* used only for VS/NAT.
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return ip_vs_out(hooknum, skb, AF_INET6);
}
/*
* It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_out(hooknum, skb, AF_INET6);
local_bh_enable();
return verdict;
}
#endif
/* /*
* Handle ICMP messages in the outside-to-inside direction (incoming). * Handle ICMP messages in the outside-to-inside direction (incoming).
@ -1098,8 +1215,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
/* reassemble IP fragments */ /* reassemble IP fragments */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ? if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
return NF_STOLEN; return NF_STOLEN;
} }
@ -1142,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
pp->dont_defrag)) pp->dont_defrag))
return NF_ACCEPT; return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking incoming ICMP for");
offset += cih->ihl * 4; offset += cih->ihl * 4;
@ -1176,7 +1293,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16); offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
/* do not touch skb anymore */ /* LOCALNODE from FORWARD hook is not supported */
if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
skb_rtable(skb)->rt_flags & RTCF_LOCAL) {
IP_VS_DBG(1, "%s(): "
"local delivery to %pI4 but in FORWARD\n",
__func__, &skb_rtable(skb)->rt_dst);
verdict = NF_DROP;
}
out: out:
__ip_vs_conn_put(cp); __ip_vs_conn_put(cp);
@ -1197,14 +1321,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_protocol *pp; struct ip_vs_protocol *pp;
unsigned int offset, verdict; unsigned int offset, verdict;
union nf_inet_addr snet; union nf_inet_addr snet;
struct rt6_info *rt;
*related = 1; *related = 1;
/* reassemble IP fragments */ /* reassemble IP fragments */
if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ? if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
IP_DEFRAG_VS_IN :
IP_DEFRAG_VS_FWD))
return NF_STOLEN; return NF_STOLEN;
} }
@ -1247,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
return NF_ACCEPT; return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
"Checking incoming ICMPv6 for");
offset += sizeof(struct ipv6hdr); offset += sizeof(struct ipv6hdr);
@ -1275,7 +1399,15 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
IPPROTO_SCTP == cih->nexthdr) IPPROTO_SCTP == cih->nexthdr)
offset += 2 * sizeof(__u16); offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
/* do not touch skb anymore */ /* LOCALNODE from FORWARD hook is not supported */
if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
(rt = (struct rt6_info *) skb_dst(skb)) &&
rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) {
IP_VS_DBG(1, "%s(): "
"local delivery to %pI6 but in FORWARD\n",
__func__, &rt->rt6i_dst);
verdict = NF_DROP;
}
__ip_vs_conn_put(cp); __ip_vs_conn_put(cp);
@ -1289,35 +1421,49 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
* and send it on its way... * and send it on its way...
*/ */
static unsigned int static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff *skb, ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{ {
struct ip_vs_iphdr iph; struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp; struct ip_vs_protocol *pp;
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
int ret, restart, af, pkts; int ret, restart, pkts;
af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; /* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); return NF_ACCEPT;
/* /*
* Big tappo: only PACKET_HOST, including loopback for local client * Big tappo:
* Don't handle local packets on IPv6 for now * - remote client: only PACKET_HOST
* - route: used for struct net when skb->dev is unset
*/ */
if (unlikely(skb->pkt_type != PACKET_HOST)) { if (unlikely((skb->pkt_type != PACKET_HOST &&
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", hooknum != NF_INET_LOCAL_OUT) ||
skb->pkt_type, !skb_dst(skb))) {
iph.protocol, ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
IP_VS_DBG_ADDR(af, &iph.daddr)); IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
" ignored in hook %u\n",
skb->pkt_type, iph.protocol,
IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
return NF_ACCEPT; return NF_ACCEPT;
} }
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
/* Bad... Do not break raw sockets */
if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(skb->sk);
if (inet && sk->sk_family == PF_INET && inet->nodefrag)
return NF_ACCEPT;
}
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) { if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); int related;
int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
if (related) if (related)
return verdict; return verdict;
@ -1326,7 +1472,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
} else } else
#endif #endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) { if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); int related;
int verdict = ip_vs_in_icmp(skb, &related, hooknum);
if (related) if (related)
return verdict; return verdict;
@ -1346,23 +1493,18 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(!cp)) { if (unlikely(!cp)) {
int v; int v;
/* For local client packets, it could be a response */
cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
if (cp)
return handle_response(af, skb, pp, cp, iph.len);
if (!pp->conn_schedule(af, skb, pp, &v, &cp)) if (!pp->conn_schedule(af, skb, pp, &v, &cp))
return v; return v;
} }
if (unlikely(!cp)) { if (unlikely(!cp)) {
/* sorry, all this trouble for a no-hit :) */ /* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, pp, skb, 0, IP_VS_DBG_PKT(12, af, pp, skb, 0,
"packet continues traversal as normal"); "ip_vs_in: packet continues traversal as normal");
return NF_ACCEPT; return NF_ACCEPT;
} }
IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet"); IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
/* Check the server status */ /* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@ -1429,6 +1571,72 @@ out:
return ret; return ret;
} }
/*
* AF_INET handler in NF_INET_LOCAL_IN chain
* Schedule and forward packets from remote clients
*/
static unsigned int
ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return ip_vs_in(hooknum, skb, AF_INET);
}
/*
* AF_INET handler in NF_INET_LOCAL_OUT chain
* Schedule and forward packets from local clients
*/
static unsigned int
ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_in(hooknum, skb, AF_INET);
local_bh_enable();
return verdict;
}
#ifdef CONFIG_IP_VS_IPV6
/*
* AF_INET6 handler in NF_INET_LOCAL_IN chain
* Schedule and forward packets from remote clients
*/
static unsigned int
ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return ip_vs_in(hooknum, skb, AF_INET6);
}
/*
* AF_INET6 handler in NF_INET_LOCAL_OUT chain
* Schedule and forward packets from local clients
*/
static unsigned int
ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_in(hooknum, skb, AF_INET6);
local_bh_enable();
return verdict;
}
#endif
/* /*
* It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
@ -1469,23 +1677,39 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
static struct nf_hook_ops ip_vs_ops[] __read_mostly = { static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_IN,
.priority = 99,
},
/* After packet filtering, forward packet through VS/DR, VS/TUN, /* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be * or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */ * applied to IPVS. */
{ {
.hook = ip_vs_in, .hook = ip_vs_remote_request4,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.pf = PF_INET, .pf = PF_INET,
.hooknum = NF_INET_LOCAL_IN, .hooknum = NF_INET_LOCAL_IN,
.priority = 100, .priority = 101,
}, },
/* After packet filtering, change source only for VS/NAT */ /* Before ip_vs_in, change source only for VS/NAT */
{ {
.hook = ip_vs_out, .hook = ip_vs_local_reply4,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.pf = PF_INET, .pf = PF_INET,
.hooknum = NF_INET_FORWARD, .hooknum = NF_INET_LOCAL_OUT,
.priority = 100, .priority = -99,
},
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request4,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_OUT,
.priority = -98,
}, },
/* After packet filtering (but before ip_vs_out_icmp), catch icmp /* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */ * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@ -1493,35 +1717,51 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hook = ip_vs_forward_icmp, .hook = ip_vs_forward_icmp,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.pf = PF_INET, .pf = PF_INET,
.hooknum = NF_INET_FORWARD, .hooknum = NF_INET_FORWARD,
.priority = 99, .priority = 99,
}, },
/* Before the netfilter connection tracking, exit from POST_ROUTING */ /* After packet filtering, change source only for VS/NAT */
{ {
.hook = ip_vs_post_routing, .hook = ip_vs_reply4,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.pf = PF_INET, .pf = PF_INET,
.hooknum = NF_INET_POST_ROUTING, .hooknum = NF_INET_FORWARD,
.priority = NF_IP_PRI_NAT_SRC-1, .priority = 100,
}, },
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_LOCAL_IN,
.priority = 99,
},
/* After packet filtering, forward packet through VS/DR, VS/TUN, /* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be * or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */ * applied to IPVS. */
{ {
.hook = ip_vs_in, .hook = ip_vs_remote_request6,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.pf = PF_INET6, .pf = PF_INET6,
.hooknum = NF_INET_LOCAL_IN, .hooknum = NF_INET_LOCAL_IN,
.priority = 100, .priority = 101,
}, },
/* After packet filtering, change source only for VS/NAT */ /* Before ip_vs_in, change source only for VS/NAT */
{ {
.hook = ip_vs_out, .hook = ip_vs_local_reply6,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_OUT,
.priority = -99,
},
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request6,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.pf = PF_INET6, .pf = PF_INET6,
.hooknum = NF_INET_FORWARD, .hooknum = NF_INET_LOCAL_OUT,
.priority = 100, .priority = -98,
}, },
/* After packet filtering (but before ip_vs_out_icmp), catch icmp /* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */ * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@ -1529,16 +1769,16 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hook = ip_vs_forward_icmp_v6, .hook = ip_vs_forward_icmp_v6,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.pf = PF_INET6, .pf = PF_INET6,
.hooknum = NF_INET_FORWARD, .hooknum = NF_INET_FORWARD,
.priority = 99, .priority = 99,
}, },
/* Before the netfilter connection tracking, exit from POST_ROUTING */ /* After packet filtering, change source only for VS/NAT */
{ {
.hook = ip_vs_post_routing, .hook = ip_vs_reply6,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.pf = PF_INET6, .pf = PF_INET6,
.hooknum = NF_INET_POST_ROUTING, .hooknum = NF_INET_FORWARD,
.priority = NF_IP6_PRI_NAT_SRC-1, .priority = 100,
}, },
#endif #endif
}; };

View File

@ -777,20 +777,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
conn_flags |= IP_VS_CONN_F_INACTIVE; conn_flags |= IP_VS_CONN_F_INACTIVE;
/* check if local node and update the flags */
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) {
if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
| IP_VS_CONN_F_LOCALNODE;
}
} else
#endif
if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
| IP_VS_CONN_F_LOCALNODE;
}
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
conn_flags |= IP_VS_CONN_F_NOOUTPUT; conn_flags |= IP_VS_CONN_F_NOOUTPUT;
@ -824,6 +810,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
dest->u_threshold = udest->u_threshold; dest->u_threshold = udest->u_threshold;
dest->l_threshold = udest->l_threshold; dest->l_threshold = udest->l_threshold;
spin_lock(&dest->dst_lock);
ip_vs_dst_reset(dest);
spin_unlock(&dest->dst_lock);
if (add) if (add)
ip_vs_new_estimator(&dest->stats); ip_vs_new_estimator(&dest->stats);

View File

@ -242,9 +242,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
start-data, end-start, start-data, end-start,
buf, buf_len); buf, buf_len);
if (ret) if (ret) {
ip_vs_nfct_expect_related(skb, ct, n_cp, ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0); IPPROTO_TCP, 0, 0);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_UNNECESSARY;
/* csum is updated */
ret = 1;
}
} }
/* /*

View File

@ -172,8 +172,8 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
else if (ih->frag_off & htons(IP_OFFSET)) else if (ih->frag_off & htons(IP_OFFSET))
sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
else { else {
__be16 _ports[2], *pptr __be16 _ports[2], *pptr;
;
pptr = skb_header_pointer(skb, offset + ih->ihl*4, pptr = skb_header_pointer(skb, offset + ih->ihl*4,
sizeof(_ports), _ports); sizeof(_ports), _ports);
if (pptr == NULL) if (pptr == NULL)
@ -223,13 +223,13 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
void void
ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
const struct sk_buff *skb, const struct sk_buff *skb,
int offset, int offset,
const char *msg) const char *msg)
{ {
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (skb->protocol == htons(ETH_P_IPV6)) if (af == AF_INET6)
ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
else else
#endif #endif

View File

@ -117,54 +117,6 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0; return 0;
} }
static void
ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
char buf[256];
struct iphdr _iph, *ih;
ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
if (ih == NULL)
sprintf(buf, "TRUNCATED");
else
sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr);
pr_debug("%s: %s %s\n", msg, pp->name, buf);
}
#ifdef CONFIG_IP_VS_IPV6
static void
ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
char buf[256];
struct ipv6hdr _iph, *ih;
ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
if (ih == NULL)
sprintf(buf, "TRUNCATED");
else
sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr);
pr_debug("%s: %s %s\n", msg, pp->name, buf);
}
#endif
static void
ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
#ifdef CONFIG_IP_VS_IPV6
if (skb->protocol == htons(ETH_P_IPV6))
ah_esp_debug_packet_v6(pp, skb, offset, msg);
else
#endif
ah_esp_debug_packet_v4(pp, skb, offset, msg);
}
static void ah_esp_init(struct ip_vs_protocol *pp) static void ah_esp_init(struct ip_vs_protocol *pp)
{ {
/* nothing to do now */ /* nothing to do now */
@ -195,7 +147,7 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
.register_app = NULL, .register_app = NULL,
.unregister_app = NULL, .unregister_app = NULL,
.app_conn_bind = NULL, .app_conn_bind = NULL,
.debug_packet = ah_esp_debug_packet, .debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL, /* ISAKMP */ .timeout_change = NULL, /* ISAKMP */
.set_state_timeout = NULL, .set_state_timeout = NULL,
}; };
@ -219,7 +171,7 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
.register_app = NULL, .register_app = NULL,
.unregister_app = NULL, .unregister_app = NULL,
.app_conn_bind = NULL, .app_conn_bind = NULL,
.debug_packet = ah_esp_debug_packet, .debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL, /* ISAKMP */ .timeout_change = NULL, /* ISAKMP */
}; };
#endif #endif

View File

@ -31,6 +31,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
if ((sch->type == SCTP_CID_INIT) && if ((sch->type == SCTP_CID_INIT) &&
(svc = ip_vs_service_get(af, skb->mark, iph.protocol, (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
&iph.daddr, sh->dest))) { &iph.daddr, sh->dest))) {
int ignored;
if (ip_vs_todrop()) { if (ip_vs_todrop()) {
/* /*
* It seems that we are very loaded. * It seems that we are very loaded.
@ -44,8 +46,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the * Let the virtual server select a real server for the
* incoming connection, and create a connection entry. * incoming connection, and create a connection entry.
*/ */
*cpp = ip_vs_schedule(svc, skb); *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
if (!*cpp) { if (!*cpp && !ignored) {
*verdict = ip_vs_leave(svc, skb, pp); *verdict = ip_vs_leave(svc, skb, pp);
return 0; return 0;
} }
@ -174,7 +176,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
if (val != cmp) { if (val != cmp) {
/* CRC failure, dump it. */ /* CRC failure, dump it. */
IP_VS_DBG_RL_PKT(0, pp, skb, 0, IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
"Failed checksum for"); "Failed checksum for");
return 0; return 0;
} }

View File

@ -43,9 +43,12 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0; return 0;
} }
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
if (th->syn && if (th->syn &&
(svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
th->dest))) { th->dest))) {
int ignored;
if (ip_vs_todrop()) { if (ip_vs_todrop()) {
/* /*
* It seems that we are very loaded. * It seems that we are very loaded.
@ -60,8 +63,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the * Let the virtual server select a real server for the
* incoming connection, and create a connection entry. * incoming connection, and create a connection entry.
*/ */
*cpp = ip_vs_schedule(svc, skb); *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
if (!*cpp) { if (!*cpp && !ignored) {
*verdict = ip_vs_leave(svc, skb, pp); *verdict = ip_vs_leave(svc, skb, pp);
return 0; return 0;
} }
@ -101,15 +104,15 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) if (af == AF_INET6)
tcph->check = tcph->check =
csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
ip_vs_check_diff2(oldlen, newlen, ip_vs_check_diff2(oldlen, newlen,
~csum_unfold(tcph->check)))); csum_unfold(tcph->check))));
else else
#endif #endif
tcph->check = tcph->check =
csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
ip_vs_check_diff2(oldlen, newlen, ip_vs_check_diff2(oldlen, newlen,
~csum_unfold(tcph->check)))); csum_unfold(tcph->check))));
} }
@ -120,6 +123,7 @@ tcp_snat_handler(struct sk_buff *skb,
struct tcphdr *tcph; struct tcphdr *tcph;
unsigned int tcphoff; unsigned int tcphoff;
int oldlen; int oldlen;
int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6) if (cp->af == AF_INET6)
@ -134,13 +138,20 @@ tcp_snat_handler(struct sk_buff *skb,
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
int ret;
/* Some checks before mangling */ /* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0; return 0;
/* Call application helper if needed */ /* Call application helper if needed */
if (!ip_vs_app_pkt_out(cp, skb)) if (!(ret = ip_vs_app_pkt_out(cp, skb)))
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */
if (ret == 1)
oldlen = skb->len - tcphoff;
else
payload_csum = 1;
} }
tcph = (void *)skb_network_header(skb) + tcphoff; tcph = (void *)skb_network_header(skb) + tcphoff;
@ -151,12 +162,13 @@ tcp_snat_handler(struct sk_buff *skb,
tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
htons(oldlen), htons(oldlen),
htons(skb->len - tcphoff)); htons(skb->len - tcphoff));
} else if (!cp->app) { } else if (!payload_csum) {
/* Only port and addr are changed, do fast csum update */ /* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport); cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_COMPLETE) if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = (cp->app && pp->csum_check) ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
} else { } else {
/* full checksum calculation */ /* full checksum calculation */
tcph->check = 0; tcph->check = 0;
@ -174,6 +186,7 @@ tcp_snat_handler(struct sk_buff *skb,
skb->len - tcphoff, skb->len - tcphoff,
cp->protocol, cp->protocol,
skb->csum); skb->csum);
skb->ip_summed = CHECKSUM_UNNECESSARY;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, tcph->check, pp->name, tcph->check,
@ -190,6 +203,7 @@ tcp_dnat_handler(struct sk_buff *skb,
struct tcphdr *tcph; struct tcphdr *tcph;
unsigned int tcphoff; unsigned int tcphoff;
int oldlen; int oldlen;
int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6) if (cp->af == AF_INET6)
@ -204,6 +218,8 @@ tcp_dnat_handler(struct sk_buff *skb,
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
int ret;
/* Some checks before mangling */ /* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0; return 0;
@ -212,8 +228,13 @@ tcp_dnat_handler(struct sk_buff *skb,
* Attempt ip_vs_app call. * Attempt ip_vs_app call.
* It will fix ip_vs_conn and iph ack_seq stuff * It will fix ip_vs_conn and iph ack_seq stuff
*/ */
if (!ip_vs_app_pkt_in(cp, skb)) if (!(ret = ip_vs_app_pkt_in(cp, skb)))
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */
if (ret == 1)
oldlen = skb->len - tcphoff;
else
payload_csum = 1;
} }
tcph = (void *)skb_network_header(skb) + tcphoff; tcph = (void *)skb_network_header(skb) + tcphoff;
@ -223,15 +244,16 @@ tcp_dnat_handler(struct sk_buff *skb,
* Adjust TCP checksums * Adjust TCP checksums
*/ */
if (skb->ip_summed == CHECKSUM_PARTIAL) { if (skb->ip_summed == CHECKSUM_PARTIAL) {
tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
htons(oldlen), htons(oldlen),
htons(skb->len - tcphoff)); htons(skb->len - tcphoff));
} else if (!cp->app) { } else if (!payload_csum) {
/* Only port and addr are changed, do fast csum update */ /* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport); cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_COMPLETE) if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = (cp->app && pp->csum_check) ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
} else { } else {
/* full checksum calculation */ /* full checksum calculation */
tcph->check = 0; tcph->check = 0;
@ -278,7 +300,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
skb->len - tcphoff, skb->len - tcphoff,
ipv6_hdr(skb)->nexthdr, ipv6_hdr(skb)->nexthdr,
skb->csum)) { skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0, IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
"Failed checksum for"); "Failed checksum for");
return 0; return 0;
} }
@ -289,7 +311,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
skb->len - tcphoff, skb->len - tcphoff,
ip_hdr(skb)->protocol, ip_hdr(skb)->protocol,
skb->csum)) { skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0, IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
"Failed checksum for"); "Failed checksum for");
return 0; return 0;
} }

View File

@ -46,6 +46,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
svc = ip_vs_service_get(af, skb->mark, iph.protocol, svc = ip_vs_service_get(af, skb->mark, iph.protocol,
&iph.daddr, uh->dest); &iph.daddr, uh->dest);
if (svc) { if (svc) {
int ignored;
if (ip_vs_todrop()) { if (ip_vs_todrop()) {
/* /*
* It seems that we are very loaded. * It seems that we are very loaded.
@ -60,8 +62,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the * Let the virtual server select a real server for the
* incoming connection, and create a connection entry. * incoming connection, and create a connection entry.
*/ */
*cpp = ip_vs_schedule(svc, skb); *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
if (!*cpp) { if (!*cpp && !ignored) {
*verdict = ip_vs_leave(svc, skb, pp); *verdict = ip_vs_leave(svc, skb, pp);
return 0; return 0;
} }
@ -102,15 +104,15 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) if (af == AF_INET6)
uhdr->check = uhdr->check =
csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
ip_vs_check_diff2(oldlen, newlen, ip_vs_check_diff2(oldlen, newlen,
~csum_unfold(uhdr->check)))); csum_unfold(uhdr->check))));
else else
#endif #endif
uhdr->check = uhdr->check =
csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
ip_vs_check_diff2(oldlen, newlen, ip_vs_check_diff2(oldlen, newlen,
~csum_unfold(uhdr->check)))); csum_unfold(uhdr->check))));
} }
@ -121,6 +123,7 @@ udp_snat_handler(struct sk_buff *skb,
struct udphdr *udph; struct udphdr *udph;
unsigned int udphoff; unsigned int udphoff;
int oldlen; int oldlen;
int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6) if (cp->af == AF_INET6)
@ -135,6 +138,8 @@ udp_snat_handler(struct sk_buff *skb,
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
int ret;
/* Some checks before mangling */ /* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0; return 0;
@ -142,8 +147,13 @@ udp_snat_handler(struct sk_buff *skb,
/* /*
* Call application helper if needed * Call application helper if needed
*/ */
if (!ip_vs_app_pkt_out(cp, skb)) if (!(ret = ip_vs_app_pkt_out(cp, skb)))
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */
if (ret == 1)
oldlen = skb->len - udphoff;
else
payload_csum = 1;
} }
udph = (void *)skb_network_header(skb) + udphoff; udph = (void *)skb_network_header(skb) + udphoff;
@ -156,12 +166,13 @@ udp_snat_handler(struct sk_buff *skb,
udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
htons(oldlen), htons(oldlen),
htons(skb->len - udphoff)); htons(skb->len - udphoff));
} else if (!cp->app && (udph->check != 0)) { } else if (!payload_csum && (udph->check != 0)) {
/* Only port and addr are changed, do fast csum update */ /* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport); cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_COMPLETE) if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = (cp->app && pp->csum_check) ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
} else { } else {
/* full checksum calculation */ /* full checksum calculation */
udph->check = 0; udph->check = 0;
@ -181,6 +192,7 @@ udp_snat_handler(struct sk_buff *skb,
skb->csum); skb->csum);
if (udph->check == 0) if (udph->check == 0)
udph->check = CSUM_MANGLED_0; udph->check = CSUM_MANGLED_0;
skb->ip_summed = CHECKSUM_UNNECESSARY;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, udph->check, pp->name, udph->check,
(char*)&(udph->check) - (char*)udph); (char*)&(udph->check) - (char*)udph);
@ -196,6 +208,7 @@ udp_dnat_handler(struct sk_buff *skb,
struct udphdr *udph; struct udphdr *udph;
unsigned int udphoff; unsigned int udphoff;
int oldlen; int oldlen;
int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6) if (cp->af == AF_INET6)
@ -210,6 +223,8 @@ udp_dnat_handler(struct sk_buff *skb,
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
int ret;
/* Some checks before mangling */ /* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0; return 0;
@ -218,8 +233,13 @@ udp_dnat_handler(struct sk_buff *skb,
* Attempt ip_vs_app call. * Attempt ip_vs_app call.
* It will fix ip_vs_conn * It will fix ip_vs_conn
*/ */
if (!ip_vs_app_pkt_in(cp, skb)) if (!(ret = ip_vs_app_pkt_in(cp, skb)))
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */
if (ret == 1)
oldlen = skb->len - udphoff;
else
payload_csum = 1;
} }
udph = (void *)skb_network_header(skb) + udphoff; udph = (void *)skb_network_header(skb) + udphoff;
@ -229,15 +249,16 @@ udp_dnat_handler(struct sk_buff *skb,
* Adjust UDP checksums * Adjust UDP checksums
*/ */
if (skb->ip_summed == CHECKSUM_PARTIAL) { if (skb->ip_summed == CHECKSUM_PARTIAL) {
udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
htons(oldlen), htons(oldlen),
htons(skb->len - udphoff)); htons(skb->len - udphoff));
} else if (!cp->app && (udph->check != 0)) { } else if (!payload_csum && (udph->check != 0)) {
/* Only port and addr are changed, do fast csum update */ /* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport); cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_COMPLETE) if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = (cp->app && pp->csum_check) ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
} else { } else {
/* full checksum calculation */ /* full checksum calculation */
udph->check = 0; udph->check = 0;
@ -293,7 +314,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
skb->len - udphoff, skb->len - udphoff,
ipv6_hdr(skb)->nexthdr, ipv6_hdr(skb)->nexthdr,
skb->csum)) { skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0, IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
"Failed checksum for"); "Failed checksum for");
return 0; return 0;
} }
@ -304,7 +325,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
skb->len - udphoff, skb->len - udphoff,
ip_hdr(skb)->protocol, ip_hdr(skb)->protocol,
skb->csum)) { skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0, IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
"Failed checksum for"); "Failed checksum for");
return 0; return 0;
} }

View File

@ -11,6 +11,16 @@
* *
* Changes: * Changes:
* *
* Description of forwarding methods:
* - all transmitters are called from LOCAL_IN (remote clients) and
* LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
* - not all connections have destination server, for example,
* connections in backup server when fwmark is used
* - bypass connections use daddr from packet
* LOCAL_OUT rules:
* - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
* - skb->pkt_type is not set yet
* - the only place where we can see skb->sk != NULL
*/ */
#define KMSG_COMPONENT "IPVS" #define KMSG_COMPONENT "IPVS"
@ -67,12 +77,19 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
return dst; return dst;
} }
/*
* Get route to destination or remote server
* rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
* &4=Allow redirect from remote daddr to local
*/
static struct rtable * static struct rtable *
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
__be32 daddr, u32 rtos, int rt_mode)
{ {
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb_dst(skb)->dev);
struct rtable *rt; /* Route to the other host */ struct rtable *rt; /* Route to the other host */
struct ip_vs_dest *dest = cp->dest; struct rtable *ort; /* Original route */
int local;
if (dest) { if (dest) {
spin_lock(&dest->dst_lock); spin_lock(&dest->dst_lock);
@ -104,23 +121,95 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
.oif = 0, .oif = 0,
.nl_u = { .nl_u = {
.ip4_u = { .ip4_u = {
.daddr = cp->daddr.ip, .daddr = daddr,
.saddr = 0, .saddr = 0,
.tos = rtos, } }, .tos = rtos, } },
}; };
if (ip_route_output_key(net, &rt, &fl)) { if (ip_route_output_key(net, &rt, &fl)) {
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
&cp->daddr.ip); &daddr);
return NULL; return NULL;
} }
} }
local = rt->rt_flags & RTCF_LOCAL;
if (!((local ? 1 : 2) & rt_mode)) {
IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
(rt->rt_flags & RTCF_LOCAL) ?
"local":"non-local", &rt->rt_dst);
ip_rt_put(rt);
return NULL;
}
if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) &&
ort->rt_flags & RTCF_LOCAL)) {
IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
"requires NAT method, dest: %pI4\n",
&ip_hdr(skb)->daddr, &rt->rt_dst);
ip_rt_put(rt);
return NULL;
}
if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
"to non-local address, dest: %pI4\n",
&ip_hdr(skb)->saddr, &rt->rt_dst);
ip_rt_put(rt);
return NULL;
}
return rt; return rt;
} }
/* Reroute packet to local IPv4 stack after DNAT */
static int
__ip_vs_reroute_locally(struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
struct net *net = dev_net(dev);
struct iphdr *iph = ip_hdr(skb);
if (rt->fl.iif) {
unsigned long orefdst = skb->_skb_refdst;
if (ip_route_input(skb, iph->daddr, iph->saddr,
iph->tos, skb->dev))
return 0;
refdst_drop(orefdst);
} else {
struct flowi fl = {
.oif = 0,
.nl_u = {
.ip4_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
.tos = RT_TOS(iph->tos),
}
},
.mark = skb->mark,
};
struct rtable *rt;
if (ip_route_output_key(net, &rt, &fl))
return 0;
if (!(rt->rt_flags & RTCF_LOCAL)) {
ip_rt_put(rt);
return 0;
}
/* Drop old route. */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
}
return 1;
}
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
{
return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
}
static struct dst_entry * static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
struct in6_addr *ret_saddr, int do_xfrm) struct in6_addr *ret_saddr, int do_xfrm)
@ -155,14 +244,21 @@ out_err:
return NULL; return NULL;
} }
/*
* Get route to destination or remote server
* rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
* &4=Allow redirect from remote daddr to local
*/
static struct rt6_info * static struct rt6_info *
__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
struct in6_addr *ret_saddr, int do_xfrm) struct in6_addr *daddr, struct in6_addr *ret_saddr,
int do_xfrm, int rt_mode)
{ {
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb_dst(skb)->dev);
struct rt6_info *rt; /* Route to the other host */ struct rt6_info *rt; /* Route to the other host */
struct ip_vs_dest *dest = cp->dest; struct rt6_info *ort; /* Original route */
struct dst_entry *dst; struct dst_entry *dst;
int local;
if (dest) { if (dest) {
spin_lock(&dest->dst_lock); spin_lock(&dest->dst_lock);
@ -188,13 +284,38 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ipv6_addr_copy(ret_saddr, &dest->dst_saddr); ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
spin_unlock(&dest->dst_lock); spin_unlock(&dest->dst_lock);
} else { } else {
dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr, dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
do_xfrm);
if (!dst) if (!dst)
return NULL; return NULL;
rt = (struct rt6_info *) dst; rt = (struct rt6_info *) dst;
} }
local = __ip_vs_is_local_route6(rt);
if (!((local ? 1 : 2) & rt_mode)) {
IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
local ? "local":"non-local", daddr);
dst_release(&rt->dst);
return NULL;
}
if (local && !(rt_mode & 4) &&
!((ort = (struct rt6_info *) skb_dst(skb)) &&
__ip_vs_is_local_route6(ort))) {
IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
"requires NAT method, dest: %pI6\n",
&ipv6_hdr(skb)->daddr, daddr);
dst_release(&rt->dst);
return NULL;
}
if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
IPV6_ADDR_LOOPBACK)) {
IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
"to non-local address, dest: %pI6\n",
&ipv6_hdr(skb)->saddr, daddr);
dst_release(&rt->dst);
return NULL;
}
return rt; return rt;
} }
#endif #endif
@ -217,30 +338,37 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
({ \ ({ \
int __ret = NF_ACCEPT; \ int __ret = NF_ACCEPT; \
\ \
(skb)->ipvs_property = 1; \
if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
__ret = ip_vs_confirm_conntrack(skb, cp); \ __ret = ip_vs_confirm_conntrack(skb, cp); \
if (__ret == NF_ACCEPT) { \ if (__ret == NF_ACCEPT) { \
nf_reset(skb); \ nf_reset(skb); \
(skb)->ip_summed = CHECKSUM_NONE; \ skb_forward_csum(skb); \
} \ } \
__ret; \ __ret; \
}) })
#define IP_VS_XMIT_NAT(pf, skb, cp) \ #define IP_VS_XMIT_NAT(pf, skb, cp, local) \
do { \ do { \
(skb)->ipvs_property = 1; \
if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
(skb)->ipvs_property = 1; \ ip_vs_notrack(skb); \
else \ else \
ip_vs_update_conntrack(skb, cp, 1); \ ip_vs_update_conntrack(skb, cp, 1); \
if (local) \
return NF_ACCEPT; \
skb_forward_csum(skb); \ skb_forward_csum(skb); \
NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
skb_dst(skb)->dev, dst_output); \ skb_dst(skb)->dev, dst_output); \
} while (0) } while (0)
#define IP_VS_XMIT(pf, skb, cp) \ #define IP_VS_XMIT(pf, skb, cp, local) \
do { \ do { \
(skb)->ipvs_property = 1; \
if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
(skb)->ipvs_property = 1; \ ip_vs_notrack(skb); \
if (local) \
return NF_ACCEPT; \
skb_forward_csum(skb); \ skb_forward_csum(skb); \
NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
skb_dst(skb)->dev, dst_output); \ skb_dst(skb)->dev, dst_output); \
@ -255,7 +383,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp) struct ip_vs_protocol *pp)
{ {
/* we do not touch skb and do not need pskb ptr */ /* we do not touch skb and do not need pskb ptr */
return NF_ACCEPT; IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
} }
@ -268,27 +396,15 @@ int
ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp) struct ip_vs_protocol *pp)
{ {
struct net *net = dev_net(skb->dev);
struct rtable *rt; /* Route to the other host */ struct rtable *rt; /* Route to the other host */
struct iphdr *iph = ip_hdr(skb); struct iphdr *iph = ip_hdr(skb);
u8 tos = iph->tos;
int mtu; int mtu;
struct flowi fl = {
.oif = 0,
.nl_u = {
.ip4_u = {
.daddr = iph->daddr,
.saddr = 0,
.tos = RT_TOS(tos), } },
};
EnterFunction(10); EnterFunction(10);
if (ip_route_output_key(net, &rt, &fl)) { if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr,
IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", RT_TOS(iph->tos), 2)))
__func__, &iph->daddr);
goto tx_error_icmp; goto tx_error_icmp;
}
/* MTU checking */ /* MTU checking */
mtu = dst_mtu(&rt->dst); mtu = dst_mtu(&rt->dst);
@ -316,7 +432,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */ /* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1; skb->local_df = 1;
IP_VS_XMIT(NFPROTO_IPV4, skb, cp); IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
LeaveFunction(10); LeaveFunction(10);
return NF_STOLEN; return NF_STOLEN;
@ -334,24 +450,25 @@ int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp) struct ip_vs_protocol *pp)
{ {
struct net *net = dev_net(skb->dev);
struct dst_entry *dst;
struct rt6_info *rt; /* Route to the other host */ struct rt6_info *rt; /* Route to the other host */
struct ipv6hdr *iph = ipv6_hdr(skb); struct ipv6hdr *iph = ipv6_hdr(skb);
int mtu; int mtu;
EnterFunction(10); EnterFunction(10);
dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0); if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2)))
if (!dst)
goto tx_error_icmp; goto tx_error_icmp;
rt = (struct rt6_info *) dst;
/* MTU checking */ /* MTU checking */
mtu = dst_mtu(&rt->dst); mtu = dst_mtu(&rt->dst);
if (skb->len > mtu) { if (skb->len > mtu) {
dst_release(&rt->dst); if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
skb->dev = net->loopback_dev;
}
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
dst_release(&rt->dst);
IP_VS_DBG_RL("%s(): frag needed\n", __func__); IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error; goto tx_error;
} }
@ -373,7 +490,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */ /* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1; skb->local_df = 1;
IP_VS_XMIT(NFPROTO_IPV6, skb, cp); IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
LeaveFunction(10); LeaveFunction(10);
return NF_STOLEN; return NF_STOLEN;
@ -398,6 +515,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rtable *rt; /* Route to the other host */ struct rtable *rt; /* Route to the other host */
int mtu; int mtu;
struct iphdr *iph = ip_hdr(skb); struct iphdr *iph = ip_hdr(skb);
int local;
EnterFunction(10); EnterFunction(10);
@ -411,16 +529,42 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
} }
if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(iph->tos), 1|2|4)))
goto tx_error_icmp; goto tx_error_icmp;
local = rt->rt_flags & RTCF_LOCAL;
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
"ip_vs_nat_xmit(): "
"stopping DNAT to local address");
goto tx_error_put;
}
}
#endif
/* From world but DNAT to loopback address? */
if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
"stopping DNAT to loopback address");
goto tx_error_put;
}
/* MTU checking */ /* MTU checking */
mtu = dst_mtu(&rt->dst); mtu = dst_mtu(&rt->dst);
if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
goto tx_error; "ip_vs_nat_xmit(): frag needed for");
goto tx_error_put;
} }
/* copy-on-write the packet before mangling it */ /* copy-on-write the packet before mangling it */
@ -430,17 +574,28 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (skb_cow(skb, rt->dst.dev->hard_header_len)) if (skb_cow(skb, rt->dst.dev->hard_header_len))
goto tx_error_put; goto tx_error_put;
/* drop old route */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
/* mangle the packet */ /* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error; goto tx_error_put;
ip_hdr(skb)->daddr = cp->daddr.ip; ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb)); ip_send_check(ip_hdr(skb));
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); if (!local) {
/* drop old route */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
} else {
ip_rt_put(rt);
/*
* Some IPv4 replies get local address from routes,
* not from iph, so while we DNAT after routing
* we need this second input/output route.
*/
if (!__ip_vs_reroute_locally(skb))
goto tx_error;
}
IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length /* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still is larger than the MTU of outgoing device, there will be still
@ -449,7 +604,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */ /* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1; skb->local_df = 1;
IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp); IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
LeaveFunction(10); LeaveFunction(10);
return NF_STOLEN; return NF_STOLEN;
@ -472,6 +627,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
{ {
struct rt6_info *rt; /* Route to the other host */ struct rt6_info *rt; /* Route to the other host */
int mtu; int mtu;
int local;
EnterFunction(10); EnterFunction(10);
@ -486,18 +642,49 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
} }
rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
if (!rt) 0, 1|2|4)))
goto tx_error_icmp; goto tx_error_icmp;
local = __ip_vs_is_local_route6(rt);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to local address");
goto tx_error_put;
}
}
#endif
/* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address");
goto tx_error_put;
}
/* MTU checking */ /* MTU checking */
mtu = dst_mtu(&rt->dst); mtu = dst_mtu(&rt->dst);
if (skb->len > mtu) { if (skb->len > mtu) {
dst_release(&rt->dst); if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
skb->dev = net->loopback_dev;
}
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG_RL_PKT(0, pp, skb, 0, IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): frag needed for"); "ip_vs_nat_xmit_v6(): frag needed for");
goto tx_error; goto tx_error_put;
} }
/* copy-on-write the packet before mangling it */ /* copy-on-write the packet before mangling it */
@ -507,16 +694,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (skb_cow(skb, rt->dst.dev->hard_header_len)) if (skb_cow(skb, rt->dst.dev->hard_header_len))
goto tx_error_put; goto tx_error_put;
/* drop old route */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
/* mangle the packet */ /* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error; goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6; ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); if (!local || !skb->dev) {
/* drop the old route when skb is not shared */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
} else {
/* destined to loopback, do we need to change route? */
dst_release(&rt->dst);
}
IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length /* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still is larger than the MTU of outgoing device, there will be still
@ -525,7 +717,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */ /* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1; skb->local_df = 1;
IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp); IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
LeaveFunction(10); LeaveFunction(10);
return NF_STOLEN; return NF_STOLEN;
@ -578,23 +770,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10); EnterFunction(10);
if (skb->protocol != htons(ETH_P_IP)) { if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
IP_VS_DBG_RL("%s(): protocol error, " RT_TOS(tos), 1|2)))
"ETH_P_IP: %d, skb protocol: %d\n",
__func__, htons(ETH_P_IP), skb->protocol);
goto tx_error;
}
if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
goto tx_error_icmp; goto tx_error_icmp;
if (rt->rt_flags & RTCF_LOCAL) {
ip_rt_put(rt);
IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
}
tdev = rt->dst.dev; tdev = rt->dst.dev;
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (mtu < 68) { if (mtu < 68) {
ip_rt_put(rt);
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto tx_error; goto tx_error_put;
} }
if (skb_dst(skb)) if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
@ -604,9 +793,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if ((old_iph->frag_off & htons(IP_DF)) if ((old_iph->frag_off & htons(IP_DF))
&& mtu < ntohs(old_iph->tot_len)) { && mtu < ntohs(old_iph->tot_len)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
IP_VS_DBG_RL("%s(): frag needed\n", __func__); IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error; goto tx_error_put;
} }
/* /*
@ -675,6 +863,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
kfree_skb(skb); kfree_skb(skb);
LeaveFunction(10); LeaveFunction(10);
return NF_STOLEN; return NF_STOLEN;
tx_error_put:
ip_rt_put(rt);
goto tx_error;
} }
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
@ -693,34 +884,34 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10); EnterFunction(10);
if (skb->protocol != htons(ETH_P_IPV6)) { if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
IP_VS_DBG_RL("%s(): protocol error, " &saddr, 1, 1|2)))
"ETH_P_IPV6: %d, skb protocol: %d\n",
__func__, htons(ETH_P_IPV6), skb->protocol);
goto tx_error;
}
rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
if (!rt)
goto tx_error_icmp; goto tx_error_icmp;
if (__ip_vs_is_local_route6(rt)) {
dst_release(&rt->dst);
IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
}
tdev = rt->dst.dev; tdev = rt->dst.dev;
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (mtu < IPV6_MIN_MTU) { if (mtu < IPV6_MIN_MTU) {
dst_release(&rt->dst);
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU); IPV6_MIN_MTU);
goto tx_error; goto tx_error_put;
} }
if (skb_dst(skb)) if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
skb->dev = net->loopback_dev;
}
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
dst_release(&rt->dst);
IP_VS_DBG_RL("%s(): frag needed\n", __func__); IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error; goto tx_error_put;
} }
/* /*
@ -786,6 +977,9 @@ tx_error:
kfree_skb(skb); kfree_skb(skb);
LeaveFunction(10); LeaveFunction(10);
return NF_STOLEN; return NF_STOLEN;
tx_error_put:
dst_release(&rt->dst);
goto tx_error;
} }
#endif #endif
@ -804,8 +998,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10); EnterFunction(10);
if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(iph->tos), 1|2)))
goto tx_error_icmp; goto tx_error_icmp;
if (rt->rt_flags & RTCF_LOCAL) {
ip_rt_put(rt);
IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
}
/* MTU checking */ /* MTU checking */
mtu = dst_mtu(&rt->dst); mtu = dst_mtu(&rt->dst);
@ -833,7 +1032,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */ /* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1; skb->local_df = 1;
IP_VS_XMIT(NFPROTO_IPV4, skb, cp); IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
LeaveFunction(10); LeaveFunction(10);
return NF_STOLEN; return NF_STOLEN;
@ -856,13 +1055,22 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10); EnterFunction(10);
rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
if (!rt) 0, 1|2)))
goto tx_error_icmp; goto tx_error_icmp;
if (__ip_vs_is_local_route6(rt)) {
dst_release(&rt->dst);
IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
}
/* MTU checking */ /* MTU checking */
mtu = dst_mtu(&rt->dst); mtu = dst_mtu(&rt->dst);
if (skb->len > mtu) { if (skb->len > mtu) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
skb->dev = net->loopback_dev;
}
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
dst_release(&rt->dst); dst_release(&rt->dst);
IP_VS_DBG_RL("%s(): frag needed\n", __func__); IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@ -886,7 +1094,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */ /* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1; skb->local_df = 1;
IP_VS_XMIT(NFPROTO_IPV6, skb, cp); IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
LeaveFunction(10); LeaveFunction(10);
return NF_STOLEN; return NF_STOLEN;
@ -912,6 +1120,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rtable *rt; /* Route to the other host */ struct rtable *rt; /* Route to the other host */
int mtu; int mtu;
int rc; int rc;
int local;
EnterFunction(10); EnterFunction(10);
@ -932,16 +1141,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
* mangle and send the packet here (only for VS/NAT) * mangle and send the packet here (only for VS/NAT)
*/ */
if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos)))) if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(ip_hdr(skb)->tos), 1|2|4)))
goto tx_error_icmp; goto tx_error_icmp;
local = rt->rt_flags & RTCF_LOCAL;
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG(10, "%s(): "
"stopping DNAT to local address %pI4\n",
__func__, &cp->daddr.ip);
goto tx_error_put;
}
}
#endif
/* From world but DNAT to loopback address? */
if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI4\n",
__func__, &cp->daddr.ip);
goto tx_error_put;
}
/* MTU checking */ /* MTU checking */
mtu = dst_mtu(&rt->dst); mtu = dst_mtu(&rt->dst);
if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__); IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error; goto tx_error_put;
} }
/* copy-on-write the packet before mangling it */ /* copy-on-write the packet before mangling it */
@ -951,16 +1187,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (skb_cow(skb, rt->dst.dev->hard_header_len)) if (skb_cow(skb, rt->dst.dev->hard_header_len))
goto tx_error_put; goto tx_error_put;
/* drop the old route when skb is not shared */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
ip_vs_nat_icmp(skb, pp, cp, 0); ip_vs_nat_icmp(skb, pp, cp, 0);
if (!local) {
/* drop the old route when skb is not shared */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
} else {
ip_rt_put(rt);
/*
* Some IPv4 replies get local address from routes,
* not from iph, so while we DNAT after routing
* we need this second input/output route.
*/
if (!__ip_vs_reroute_locally(skb))
goto tx_error;
}
/* Another hack: avoid icmp_send in ip_fragment */ /* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1; skb->local_df = 1;
IP_VS_XMIT(NFPROTO_IPV4, skb, cp); IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
rc = NF_STOLEN; rc = NF_STOLEN;
goto out; goto out;
@ -986,6 +1233,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rt6_info *rt; /* Route to the other host */ struct rt6_info *rt; /* Route to the other host */
int mtu; int mtu;
int rc; int rc;
int local;
EnterFunction(10); EnterFunction(10);
@ -1006,17 +1254,49 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
* mangle and send the packet here (only for VS/NAT) * mangle and send the packet here (only for VS/NAT)
*/ */
rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
if (!rt) 0, 1|2|4)))
goto tx_error_icmp; goto tx_error_icmp;
local = __ip_vs_is_local_route6(rt);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG(10, "%s(): "
"stopping DNAT to local address %pI6\n",
__func__, &cp->daddr.in6);
goto tx_error_put;
}
}
#endif
/* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI6\n",
__func__, &cp->daddr.in6);
goto tx_error_put;
}
/* MTU checking */ /* MTU checking */
mtu = dst_mtu(&rt->dst); mtu = dst_mtu(&rt->dst);
if (skb->len > mtu) { if (skb->len > mtu) {
dst_release(&rt->dst); if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
skb->dev = net->loopback_dev;
}
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG_RL("%s(): frag needed\n", __func__); IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error; goto tx_error_put;
} }
/* copy-on-write the packet before mangling it */ /* copy-on-write the packet before mangling it */
@ -1026,16 +1306,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (skb_cow(skb, rt->dst.dev->hard_header_len)) if (skb_cow(skb, rt->dst.dev->hard_header_len))
goto tx_error_put; goto tx_error_put;
/* drop the old route when skb is not shared */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
ip_vs_nat_icmp_v6(skb, pp, cp, 0); ip_vs_nat_icmp_v6(skb, pp, cp, 0);
if (!local || !skb->dev) {
/* drop the old route when skb is not shared */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
} else {
/* destined to loopback, do we need to change route? */
dst_release(&rt->dst);
}
/* Another hack: avoid icmp_send in ip_fragment */ /* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1; skb->local_df = 1;
IP_VS_XMIT(NFPROTO_IPV6, skb, cp); IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
rc = NF_STOLEN; rc = NF_STOLEN;
goto out; goto out;