forked from Minki/linux
ebc0ffae5d
fib_lookup() converted to be called in RCU protected context, no reference taken and released on a contended cache line (fib_clntref) fib_table_lookup() and fib_semantic_match() get an additional parameter. struct fib_info gets an rcu_head field, and is freed after an rcu grace period. Stress test : (Sending 160.000.000 UDP frames on same neighbour, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_HASH) (about same results for FIB_TRIE) Before patch : real 1m31.199s user 0m13.761s sys 23m24.780s After patch: real 1m5.375s user 0m14.997s sys 15m50.115s Before patch Profile : 13044.00 15.4% __ip_route_output_key vmlinux 8438.00 10.0% dst_destroy vmlinux 5983.00 7.1% fib_semantic_match vmlinux 5410.00 6.4% fib_rules_lookup vmlinux 4803.00 5.7% neigh_lookup vmlinux 4420.00 5.2% _raw_spin_lock vmlinux 3883.00 4.6% rt_set_nexthop vmlinux 3261.00 3.9% _raw_read_lock vmlinux 2794.00 3.3% fib_table_lookup vmlinux 2374.00 2.8% neigh_resolve_output vmlinux 2153.00 2.5% dst_alloc vmlinux 1502.00 1.8% _raw_read_lock_bh vmlinux 1484.00 1.8% kmem_cache_alloc vmlinux 1407.00 1.7% eth_header vmlinux 1406.00 1.7% ipv4_dst_destroy vmlinux 1298.00 1.5% __copy_from_user_ll vmlinux 1174.00 1.4% dev_queue_xmit vmlinux 1000.00 1.2% ip_output vmlinux After patch Profile : 13712.00 15.8% dst_destroy vmlinux 8548.00 9.9% __ip_route_output_key vmlinux 7017.00 8.1% neigh_lookup vmlinux 4554.00 5.3% fib_semantic_match vmlinux 4067.00 4.7% _raw_read_lock vmlinux 3491.00 4.0% dst_alloc vmlinux 3186.00 3.7% neigh_resolve_output vmlinux 3103.00 3.6% fib_table_lookup vmlinux 2098.00 2.4% _raw_read_lock_bh vmlinux 2081.00 2.4% kmem_cache_alloc vmlinux 2013.00 2.3% _raw_spin_lock vmlinux 1763.00 2.0% __copy_from_user_ll vmlinux 1763.00 2.0% ip_output vmlinux 1761.00 2.0% ipv4_dst_destroy vmlinux 1631.00 1.9% eth_header vmlinux 1440.00 1.7% _raw_read_unlock_bh vmlinux Reference results, if IP route cache is enabled : real 0m29.718s user 0m10.845s sys 7m37.341s 25213.00 29.5% __ip_route_output_key vmlinux 9011.00 10.5% dst_release vmlinux 4817.00 5.6% ip_push_pending_frames vmlinux 4232.00 5.0% ip_finish_output vmlinux 3940.00 4.6% udp_sendmsg vmlinux 3730.00 4.4% __copy_from_user_ll vmlinux 3716.00 4.4% ip_route_output_flow vmlinux 2451.00 2.9% __xfrm_lookup vmlinux 2221.00 2.6% ip_append_data vmlinux 1718.00 2.0% _raw_spin_lock_bh vmlinux 1655.00 1.9% __alloc_skb vmlinux 1572.00 1.8% sock_wfree vmlinux 1345.00 1.6% kfree vmlinux Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
272 lines
6.8 KiB
C
272 lines
6.8 KiB
C
/*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* Definitions for the Forwarding Information Base.
|
|
*
|
|
* Authors: A.N.Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#ifndef _NET_IP_FIB_H
|
|
#define _NET_IP_FIB_H
|
|
|
|
#include <net/flow.h>
|
|
#include <linux/seq_file.h>
|
|
#include <net/fib_rules.h>
|
|
|
|
struct fib_config {
|
|
u8 fc_dst_len;
|
|
u8 fc_tos;
|
|
u8 fc_protocol;
|
|
u8 fc_scope;
|
|
u8 fc_type;
|
|
/* 3 bytes unused */
|
|
u32 fc_table;
|
|
__be32 fc_dst;
|
|
__be32 fc_gw;
|
|
int fc_oif;
|
|
u32 fc_flags;
|
|
u32 fc_priority;
|
|
__be32 fc_prefsrc;
|
|
struct nlattr *fc_mx;
|
|
struct rtnexthop *fc_mp;
|
|
int fc_mx_len;
|
|
int fc_mp_len;
|
|
u32 fc_flow;
|
|
u32 fc_nlflags;
|
|
struct nl_info fc_nlinfo;
|
|
};
|
|
|
|
struct fib_info;
|
|
|
|
struct fib_nh {
|
|
struct net_device *nh_dev;
|
|
struct hlist_node nh_hash;
|
|
struct fib_info *nh_parent;
|
|
unsigned nh_flags;
|
|
unsigned char nh_scope;
|
|
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
|
int nh_weight;
|
|
int nh_power;
|
|
#endif
|
|
#ifdef CONFIG_NET_CLS_ROUTE
|
|
__u32 nh_tclassid;
|
|
#endif
|
|
int nh_oif;
|
|
__be32 nh_gw;
|
|
};
|
|
|
|
/*
|
|
* This structure contains data shared by many of routes.
|
|
*/
|
|
|
|
struct fib_info {
|
|
struct hlist_node fib_hash;
|
|
struct hlist_node fib_lhash;
|
|
struct net *fib_net;
|
|
int fib_treeref;
|
|
atomic_t fib_clntref;
|
|
int fib_dead;
|
|
unsigned fib_flags;
|
|
int fib_protocol;
|
|
__be32 fib_prefsrc;
|
|
u32 fib_priority;
|
|
u32 fib_metrics[RTAX_MAX];
|
|
#define fib_mtu fib_metrics[RTAX_MTU-1]
|
|
#define fib_window fib_metrics[RTAX_WINDOW-1]
|
|
#define fib_rtt fib_metrics[RTAX_RTT-1]
|
|
#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
|
|
int fib_nhs;
|
|
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
|
int fib_power;
|
|
#endif
|
|
struct rcu_head rcu;
|
|
struct fib_nh fib_nh[0];
|
|
#define fib_dev fib_nh[0].nh_dev
|
|
};
|
|
|
|
|
|
#ifdef CONFIG_IP_MULTIPLE_TABLES
|
|
struct fib_rule;
|
|
#endif
|
|
|
|
struct fib_result {
|
|
unsigned char prefixlen;
|
|
unsigned char nh_sel;
|
|
unsigned char type;
|
|
unsigned char scope;
|
|
struct fib_info *fi;
|
|
#ifdef CONFIG_IP_MULTIPLE_TABLES
|
|
struct fib_rule *r;
|
|
#endif
|
|
};
|
|
|
|
struct fib_result_nl {
|
|
__be32 fl_addr; /* To be looked up*/
|
|
u32 fl_mark;
|
|
unsigned char fl_tos;
|
|
unsigned char fl_scope;
|
|
unsigned char tb_id_in;
|
|
|
|
unsigned char tb_id; /* Results */
|
|
unsigned char prefixlen;
|
|
unsigned char nh_sel;
|
|
unsigned char type;
|
|
unsigned char scope;
|
|
int err;
|
|
};
|
|
|
|
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
|
|
|
#define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel])
|
|
|
|
#define FIB_TABLE_HASHSZ 2
|
|
|
|
#else /* CONFIG_IP_ROUTE_MULTIPATH */
|
|
|
|
#define FIB_RES_NH(res) ((res).fi->fib_nh[0])
|
|
|
|
#define FIB_TABLE_HASHSZ 256
|
|
|
|
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
|
|
|
|
#define FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : __fib_res_prefsrc(&res))
|
|
#define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw)
|
|
#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev)
|
|
#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif)
|
|
|
|
struct fib_table {
|
|
struct hlist_node tb_hlist;
|
|
u32 tb_id;
|
|
int tb_default;
|
|
unsigned char tb_data[0];
|
|
};
|
|
|
|
extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
|
|
struct fib_result *res, int fib_flags);
|
|
extern int fib_table_insert(struct fib_table *, struct fib_config *);
|
|
extern int fib_table_delete(struct fib_table *, struct fib_config *);
|
|
extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
|
|
struct netlink_callback *cb);
|
|
extern int fib_table_flush(struct fib_table *table);
|
|
extern void fib_table_select_default(struct fib_table *table,
|
|
const struct flowi *flp,
|
|
struct fib_result *res);
|
|
|
|
|
|
#ifndef CONFIG_IP_MULTIPLE_TABLES
|
|
|
|
#define TABLE_LOCAL_INDEX 0
|
|
#define TABLE_MAIN_INDEX 1
|
|
|
|
static inline struct fib_table *fib_get_table(struct net *net, u32 id)
|
|
{
|
|
struct hlist_head *ptr;
|
|
|
|
ptr = id == RT_TABLE_LOCAL ?
|
|
&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] :
|
|
&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX];
|
|
return hlist_entry(ptr->first, struct fib_table, tb_hlist);
|
|
}
|
|
|
|
static inline struct fib_table *fib_new_table(struct net *net, u32 id)
|
|
{
|
|
return fib_get_table(net, id);
|
|
}
|
|
|
|
static inline int fib_lookup(struct net *net, const struct flowi *flp,
|
|
struct fib_result *res)
|
|
{
|
|
struct fib_table *table;
|
|
|
|
table = fib_get_table(net, RT_TABLE_LOCAL);
|
|
if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
|
|
return 0;
|
|
|
|
table = fib_get_table(net, RT_TABLE_MAIN);
|
|
if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
|
|
return 0;
|
|
return -ENETUNREACH;
|
|
}
|
|
|
|
#else /* CONFIG_IP_MULTIPLE_TABLES */
|
|
extern int __net_init fib4_rules_init(struct net *net);
|
|
extern void __net_exit fib4_rules_exit(struct net *net);
|
|
|
|
#ifdef CONFIG_NET_CLS_ROUTE
|
|
extern u32 fib_rules_tclass(struct fib_result *res);
|
|
#endif
|
|
|
|
extern int fib_lookup(struct net *n, struct flowi *flp, struct fib_result *res);
|
|
|
|
extern struct fib_table *fib_new_table(struct net *net, u32 id);
|
|
extern struct fib_table *fib_get_table(struct net *net, u32 id);
|
|
|
|
#endif /* CONFIG_IP_MULTIPLE_TABLES */
|
|
|
|
/* Exported by fib_frontend.c */
|
|
extern const struct nla_policy rtm_ipv4_policy[];
|
|
extern void ip_fib_init(void);
|
|
extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
|
|
struct net_device *dev, __be32 *spec_dst,
|
|
u32 *itag, u32 mark);
|
|
extern void fib_select_default(struct net *net, const struct flowi *flp,
|
|
struct fib_result *res);
|
|
|
|
/* Exported by fib_semantics.c */
|
|
extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
|
|
extern int fib_sync_down_dev(struct net_device *dev, int force);
|
|
extern int fib_sync_down_addr(struct net *net, __be32 local);
|
|
extern int fib_sync_up(struct net_device *dev);
|
|
extern __be32 __fib_res_prefsrc(struct fib_result *res);
|
|
extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
|
|
|
|
/* Exported by fib_{hash|trie}.c */
|
|
extern void fib_hash_init(void);
|
|
extern struct fib_table *fib_hash_table(u32 id);
|
|
|
|
static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
|
|
{
|
|
#ifdef CONFIG_NET_CLS_ROUTE
|
|
#ifdef CONFIG_IP_MULTIPLE_TABLES
|
|
u32 rtag;
|
|
#endif
|
|
*itag = FIB_RES_NH(*res).nh_tclassid<<16;
|
|
#ifdef CONFIG_IP_MULTIPLE_TABLES
|
|
rtag = fib_rules_tclass(res);
|
|
if (*itag == 0)
|
|
*itag = (rtag<<16);
|
|
*itag |= (rtag>>16);
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
extern void free_fib_info(struct fib_info *fi);
|
|
|
|
static inline void fib_info_put(struct fib_info *fi)
|
|
{
|
|
if (atomic_dec_and_test(&fi->fib_clntref))
|
|
free_fib_info(fi);
|
|
}
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
extern int __net_init fib_proc_init(struct net *net);
|
|
extern void __net_exit fib_proc_exit(struct net *net);
|
|
#else
|
|
static inline int fib_proc_init(struct net *net)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void fib_proc_exit(struct net *net)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#endif /* _NET_FIB_H */
|