2019-05-27 06:55:01 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2005-04-16 22:20:36 +00:00
|
|
|
/*
|
|
|
|
* ip6_flowlabel.c IPv6 flowlabel manager.
|
|
|
|
*
|
|
|
|
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
|
|
|
*/
|
|
|
|
|
2006-01-11 20:17:47 +00:00
|
|
|
#include <linux/capability.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/socket.h>
|
|
|
|
#include <linux/net.h>
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/in6.h>
|
|
|
|
#include <linux/proc_fs.h>
|
|
|
|
#include <linux/seq_file.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
|
|
|
#include <linux/slab.h>
|
2011-07-15 15:47:34 +00:00
|
|
|
#include <linux/export.h>
|
2012-05-24 16:37:59 +00:00
|
|
|
#include <linux/pid_namespace.h>
|
2019-07-07 09:34:45 +00:00
|
|
|
#include <linux/jump_label_ratelimit.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2007-09-12 10:01:34 +00:00
|
|
|
#include <net/net_namespace.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
#include <net/sock.h>
|
|
|
|
|
|
|
|
#include <net/ipv6.h>
|
|
|
|
#include <net/rawv6.h>
|
|
|
|
#include <net/transp_v6.h>
|
|
|
|
|
2016-12-24 19:46:01 +00:00
|
|
|
#include <linux/uaccess.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
|
|
|
|
in old IPv6 RFC. Well, it was reasonable value.
|
|
|
|
*/
|
2013-11-07 16:53:13 +00:00
|
|
|
#define FL_MAX_LINGER 150 /* Maximal linger timeout */
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/* FL hash table */
|
|
|
|
|
|
|
|
#define FL_MAX_PER_SOCK 32
|
|
|
|
#define FL_MAX_SIZE 4096
|
|
|
|
#define FL_HASH_MASK 255
|
|
|
|
#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
|
|
|
|
|
|
|
|
static atomic_t fl_size = ATOMIC_INIT(0);
|
2013-01-30 09:27:47 +00:00
|
|
|
static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2017-08-28 18:28:21 +00:00
|
|
|
static void ip6_fl_gc(struct timer_list *unused);
|
2017-10-04 23:27:04 +00:00
|
|
|
static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/* FL hash table lock: it protects only of GC */
|
|
|
|
|
2013-01-30 09:27:47 +00:00
|
|
|
static DEFINE_SPINLOCK(ip6_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/* Big socket sock */
|
|
|
|
|
2013-01-30 09:27:52 +00:00
|
|
|
static DEFINE_SPINLOCK(ip6_sk_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2019-07-07 09:34:45 +00:00
|
|
|
DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
|
|
|
|
EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
|
|
|
|
|
2013-01-30 09:27:47 +00:00
|
|
|
#define for_each_fl_rcu(hash, fl) \
|
2013-02-07 15:52:40 +00:00
|
|
|
for (fl = rcu_dereference_bh(fl_ht[(hash)]); \
|
2013-01-30 09:27:47 +00:00
|
|
|
fl != NULL; \
|
2013-02-07 15:52:40 +00:00
|
|
|
fl = rcu_dereference_bh(fl->next))
|
2013-01-30 09:27:47 +00:00
|
|
|
#define for_each_fl_continue_rcu(fl) \
|
2013-02-07 15:52:40 +00:00
|
|
|
for (fl = rcu_dereference_bh(fl->next); \
|
2013-01-30 09:27:47 +00:00
|
|
|
fl != NULL; \
|
2013-02-07 15:52:40 +00:00
|
|
|
fl = rcu_dereference_bh(fl->next))
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2013-01-30 09:27:52 +00:00
|
|
|
#define for_each_sk_fl_rcu(np, sfl) \
|
|
|
|
for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \
|
|
|
|
sfl != NULL; \
|
|
|
|
sfl = rcu_dereference_bh(sfl->next))
|
|
|
|
|
2008-03-26 23:53:08 +00:00
|
|
|
static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
struct ip6_flowlabel *fl;
|
|
|
|
|
2013-01-30 09:27:47 +00:00
|
|
|
for_each_fl_rcu(FL_HASH(label), fl) {
|
2009-11-25 23:14:13 +00:00
|
|
|
if (fl->label == label && net_eq(fl->fl_net, net))
|
2005-04-16 22:20:36 +00:00
|
|
|
return fl;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-03-26 23:53:08 +00:00
|
|
|
static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
struct ip6_flowlabel *fl;
|
|
|
|
|
2013-01-30 09:27:47 +00:00
|
|
|
rcu_read_lock_bh();
|
2008-03-26 23:53:08 +00:00
|
|
|
fl = __fl_lookup(net, label);
|
2013-01-30 09:27:47 +00:00
|
|
|
if (fl && !atomic_inc_not_zero(&fl->users))
|
|
|
|
fl = NULL;
|
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
return fl;
|
|
|
|
}
|
|
|
|
|
2019-07-07 09:34:45 +00:00
|
|
|
static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
|
|
|
|
{
|
|
|
|
return fl->share == IPV6_FL_S_EXCL ||
|
|
|
|
fl->share == IPV6_FL_S_PROCESS ||
|
|
|
|
fl->share == IPV6_FL_S_USER;
|
|
|
|
}
|
|
|
|
|
2019-04-27 23:49:06 +00:00
|
|
|
static void fl_free_rcu(struct rcu_head *head)
|
|
|
|
{
|
|
|
|
struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
|
|
|
|
|
|
|
|
if (fl->share == IPV6_FL_S_PROCESS)
|
|
|
|
put_pid(fl->owner.pid);
|
|
|
|
kfree(fl->opt);
|
|
|
|
kfree(fl);
|
|
|
|
}
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
static void fl_free(struct ip6_flowlabel *fl)
|
|
|
|
{
|
2019-07-07 09:34:45 +00:00
|
|
|
if (!fl)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (fl_shared_exclusive(fl) || fl->opt)
|
|
|
|
static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
|
|
|
|
|
|
|
|
call_rcu(&fl->rcu, fl_free_rcu);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void fl_release(struct ip6_flowlabel *fl)
|
|
|
|
{
|
2013-01-30 09:27:47 +00:00
|
|
|
spin_lock_bh(&ip6_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
fl->lastuse = jiffies;
|
|
|
|
if (atomic_dec_and_test(&fl->users)) {
|
|
|
|
unsigned long ttd = fl->lastuse + fl->linger;
|
|
|
|
if (time_after(ttd, fl->expires))
|
|
|
|
fl->expires = ttd;
|
|
|
|
ttd = fl->expires;
|
|
|
|
if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
|
|
|
|
struct ipv6_txoptions *opt = fl->opt;
|
|
|
|
fl->opt = NULL;
|
|
|
|
kfree(opt);
|
|
|
|
}
|
|
|
|
if (!timer_pending(&ip6_fl_gc_timer) ||
|
|
|
|
time_after(ip6_fl_gc_timer.expires, ttd))
|
|
|
|
mod_timer(&ip6_fl_gc_timer, ttd);
|
|
|
|
}
|
2013-01-30 09:27:47 +00:00
|
|
|
spin_unlock_bh(&ip6_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2017-08-28 18:28:21 +00:00
|
|
|
static void ip6_fl_gc(struct timer_list *unused)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
unsigned long now = jiffies;
|
|
|
|
unsigned long sched = 0;
|
|
|
|
|
2013-01-30 09:27:47 +00:00
|
|
|
spin_lock(&ip6_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2014-08-24 20:53:10 +00:00
|
|
|
for (i = 0; i <= FL_HASH_MASK; i++) {
|
2013-03-07 04:20:32 +00:00
|
|
|
struct ip6_flowlabel *fl;
|
|
|
|
struct ip6_flowlabel __rcu **flp;
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
flp = &fl_ht[i];
|
2013-01-30 09:27:47 +00:00
|
|
|
while ((fl = rcu_dereference_protected(*flp,
|
|
|
|
lockdep_is_held(&ip6_fl_lock))) != NULL) {
|
2005-04-16 22:20:36 +00:00
|
|
|
if (atomic_read(&fl->users) == 0) {
|
|
|
|
unsigned long ttd = fl->lastuse + fl->linger;
|
|
|
|
if (time_after(ttd, fl->expires))
|
|
|
|
fl->expires = ttd;
|
|
|
|
ttd = fl->expires;
|
|
|
|
if (time_after_eq(now, ttd)) {
|
|
|
|
*flp = fl->next;
|
|
|
|
fl_free(fl);
|
|
|
|
atomic_dec(&fl_size);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!sched || time_before(ttd, sched))
|
|
|
|
sched = ttd;
|
|
|
|
}
|
|
|
|
flp = &fl->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!sched && atomic_read(&fl_size))
|
|
|
|
sched = now + FL_MAX_LINGER;
|
|
|
|
if (sched) {
|
2008-03-26 23:53:08 +00:00
|
|
|
mod_timer(&ip6_fl_gc_timer, sched);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2013-01-30 09:27:47 +00:00
|
|
|
spin_unlock(&ip6_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2010-01-17 03:35:32 +00:00
|
|
|
static void __net_exit ip6_fl_purge(struct net *net)
|
2008-03-26 23:53:08 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2015-02-11 13:06:23 +00:00
|
|
|
spin_lock_bh(&ip6_fl_lock);
|
2008-03-26 23:53:08 +00:00
|
|
|
for (i = 0; i <= FL_HASH_MASK; i++) {
|
2013-03-07 04:20:32 +00:00
|
|
|
struct ip6_flowlabel *fl;
|
|
|
|
struct ip6_flowlabel __rcu **flp;
|
|
|
|
|
2008-03-26 23:53:08 +00:00
|
|
|
flp = &fl_ht[i];
|
2013-01-30 09:27:47 +00:00
|
|
|
while ((fl = rcu_dereference_protected(*flp,
|
|
|
|
lockdep_is_held(&ip6_fl_lock))) != NULL) {
|
2009-11-25 23:14:13 +00:00
|
|
|
if (net_eq(fl->fl_net, net) &&
|
|
|
|
atomic_read(&fl->users) == 0) {
|
2008-03-26 23:53:08 +00:00
|
|
|
*flp = fl->next;
|
|
|
|
fl_free(fl);
|
|
|
|
atomic_dec(&fl_size);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
flp = &fl->next;
|
|
|
|
}
|
|
|
|
}
|
2015-02-11 13:06:23 +00:00
|
|
|
spin_unlock_bh(&ip6_fl_lock);
|
2008-03-26 23:53:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct ip6_flowlabel *fl_intern(struct net *net,
|
|
|
|
struct ip6_flowlabel *fl, __be32 label)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2007-10-18 12:18:56 +00:00
|
|
|
struct ip6_flowlabel *lfl;
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
fl->label = label & IPV6_FLOWLABEL_MASK;
|
|
|
|
|
2013-01-30 09:27:47 +00:00
|
|
|
spin_lock_bh(&ip6_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (label == 0) {
|
|
|
|
for (;;) {
|
2014-01-11 12:15:59 +00:00
|
|
|
fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;
|
2005-04-16 22:20:36 +00:00
|
|
|
if (fl->label) {
|
2008-03-26 23:53:08 +00:00
|
|
|
lfl = __fl_lookup(net, fl->label);
|
2015-03-29 13:00:04 +00:00
|
|
|
if (!lfl)
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2007-10-18 12:18:56 +00:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* we dropper the ip6_fl_lock, so this entry could reappear
|
|
|
|
* and we need to recheck with it.
|
|
|
|
*
|
|
|
|
* OTOH no need to search the active socket first, like it is
|
|
|
|
* done in ipv6_flowlabel_opt - sock is locked, so new entry
|
|
|
|
* with the same label can only appear on another sock
|
|
|
|
*/
|
2008-03-26 23:53:08 +00:00
|
|
|
lfl = __fl_lookup(net, fl->label);
|
2015-03-29 13:00:05 +00:00
|
|
|
if (lfl) {
|
2007-10-18 12:18:56 +00:00
|
|
|
atomic_inc(&lfl->users);
|
2013-01-30 09:27:47 +00:00
|
|
|
spin_unlock_bh(&ip6_fl_lock);
|
2007-10-18 12:18:56 +00:00
|
|
|
return lfl;
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fl->lastuse = jiffies;
|
|
|
|
fl->next = fl_ht[FL_HASH(fl->label)];
|
2013-01-30 09:27:47 +00:00
|
|
|
rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
|
2005-04-16 22:20:36 +00:00
|
|
|
atomic_inc(&fl_size);
|
2013-01-30 09:27:47 +00:00
|
|
|
spin_unlock_bh(&ip6_fl_lock);
|
2007-10-18 12:18:56 +00:00
|
|
|
return NULL;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Socket flowlabel lists */
|
|
|
|
|
2019-07-07 09:34:45 +00:00
|
|
|
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
struct ipv6_fl_socklist *sfl;
|
|
|
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
|
|
|
|
|
|
|
label &= IPV6_FLOWLABEL_MASK;
|
|
|
|
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_lock_bh();
|
|
|
|
for_each_sk_fl_rcu(np, sfl) {
|
2005-04-16 22:20:36 +00:00
|
|
|
struct ip6_flowlabel *fl = sfl->fl;
|
2019-06-06 21:32:34 +00:00
|
|
|
|
|
|
|
if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
|
2005-04-16 22:20:36 +00:00
|
|
|
fl->lastuse = jiffies;
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
return fl;
|
|
|
|
}
|
|
|
|
}
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2019-07-07 09:34:45 +00:00
|
|
|
EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
|
2005-12-14 07:23:20 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
void fl6_free_socklist(struct sock *sk)
|
|
|
|
{
|
|
|
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
|
|
|
struct ipv6_fl_socklist *sfl;
|
|
|
|
|
2013-01-30 09:27:52 +00:00
|
|
|
if (!rcu_access_pointer(np->ipv6_fl_list))
|
2013-01-30 09:26:42 +00:00
|
|
|
return;
|
|
|
|
|
2013-01-30 09:27:52 +00:00
|
|
|
spin_lock_bh(&ip6_sk_fl_lock);
|
|
|
|
while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
|
|
|
|
lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
|
|
|
|
np->ipv6_fl_list = sfl->next;
|
|
|
|
spin_unlock_bh(&ip6_sk_fl_lock);
|
2013-01-30 09:26:42 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
fl_release(sfl->fl);
|
2013-01-30 09:27:52 +00:00
|
|
|
kfree_rcu(sfl, rcu);
|
|
|
|
|
|
|
|
spin_lock_bh(&ip6_sk_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2013-01-30 09:27:52 +00:00
|
|
|
spin_unlock_bh(&ip6_sk_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Service routines */
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
It is the only difficult place. flowlabel enforces equal headers
|
|
|
|
before and including routing header, however user may supply options
|
|
|
|
following rthdr.
|
|
|
|
*/
|
|
|
|
|
2014-08-24 20:53:10 +00:00
|
|
|
struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
|
|
|
|
struct ip6_flowlabel *fl,
|
|
|
|
struct ipv6_txoptions *fopt)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2014-08-24 20:53:10 +00:00
|
|
|
struct ipv6_txoptions *fl_opt = fl->opt;
|
2007-02-09 14:24:49 +00:00
|
|
|
|
2015-03-29 13:00:04 +00:00
|
|
|
if (!fopt || fopt->opt_flen == 0)
|
2005-11-20 03:23:18 +00:00
|
|
|
return fl_opt;
|
2007-02-09 14:24:49 +00:00
|
|
|
|
2015-03-29 13:00:05 +00:00
|
|
|
if (fl_opt) {
|
2005-04-16 22:20:36 +00:00
|
|
|
opt_space->hopopt = fl_opt->hopopt;
|
2005-11-20 03:23:18 +00:00
|
|
|
opt_space->dst0opt = fl_opt->dst0opt;
|
2005-04-16 22:20:36 +00:00
|
|
|
opt_space->srcrt = fl_opt->srcrt;
|
|
|
|
opt_space->opt_nflen = fl_opt->opt_nflen;
|
|
|
|
} else {
|
|
|
|
if (fopt->opt_nflen == 0)
|
|
|
|
return fopt;
|
|
|
|
opt_space->hopopt = NULL;
|
|
|
|
opt_space->dst0opt = NULL;
|
|
|
|
opt_space->srcrt = NULL;
|
|
|
|
opt_space->opt_nflen = 0;
|
|
|
|
}
|
|
|
|
opt_space->dst1opt = fopt->dst1opt;
|
|
|
|
opt_space->opt_flen = fopt->opt_flen;
|
2017-10-21 19:26:23 +00:00
|
|
|
opt_space->tot_len = fopt->tot_len;
|
2005-04-16 22:20:36 +00:00
|
|
|
return opt_space;
|
|
|
|
}
|
2012-04-29 21:48:53 +00:00
|
|
|
EXPORT_SYMBOL_GPL(fl6_merge_options);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
static unsigned long check_linger(unsigned long ttl)
|
|
|
|
{
|
|
|
|
if (ttl < FL_MIN_LINGER)
|
|
|
|
return FL_MIN_LINGER*HZ;
|
|
|
|
if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
|
|
|
|
return 0;
|
|
|
|
return ttl*HZ;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
|
|
|
|
{
|
|
|
|
linger = check_linger(linger);
|
|
|
|
if (!linger)
|
|
|
|
return -EPERM;
|
|
|
|
expires = check_linger(expires);
|
|
|
|
if (!expires)
|
|
|
|
return -EPERM;
|
2013-11-07 16:53:14 +00:00
|
|
|
|
|
|
|
spin_lock_bh(&ip6_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
fl->lastuse = jiffies;
|
|
|
|
if (time_before(fl->linger, linger))
|
|
|
|
fl->linger = linger;
|
|
|
|
if (time_before(expires, fl->linger))
|
|
|
|
expires = fl->linger;
|
|
|
|
if (time_before(fl->expires, fl->lastuse + expires))
|
|
|
|
fl->expires = fl->lastuse + expires;
|
2013-11-07 16:53:14 +00:00
|
|
|
spin_unlock_bh(&ip6_fl_lock);
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ip6_flowlabel *
|
2011-08-28 12:35:31 +00:00
|
|
|
fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
|
|
|
|
char __user *optval, int optlen, int *err_p)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2009-02-06 08:49:55 +00:00
|
|
|
struct ip6_flowlabel *fl = NULL;
|
2005-04-16 22:20:36 +00:00
|
|
|
int olen;
|
|
|
|
int addr_type;
|
|
|
|
int err;
|
|
|
|
|
2009-02-06 08:49:55 +00:00
|
|
|
olen = optlen - CMSG_ALIGN(sizeof(*freq));
|
|
|
|
err = -EINVAL;
|
|
|
|
if (olen > 64 * 1024)
|
|
|
|
goto done;
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
err = -ENOMEM;
|
2006-03-21 07:01:32 +00:00
|
|
|
fl = kzalloc(sizeof(*fl), GFP_KERNEL);
|
2015-03-29 13:00:04 +00:00
|
|
|
if (!fl)
|
2005-04-16 22:20:36 +00:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (olen > 0) {
|
|
|
|
struct msghdr msg;
|
2011-03-12 21:22:43 +00:00
|
|
|
struct flowi6 flowi6;
|
2016-05-03 04:40:07 +00:00
|
|
|
struct ipcm6_cookie ipc6;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
err = -ENOMEM;
|
|
|
|
fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
|
2015-03-29 13:00:04 +00:00
|
|
|
if (!fl->opt)
|
2005-04-16 22:20:36 +00:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
memset(fl->opt, 0, sizeof(*fl->opt));
|
|
|
|
fl->opt->tot_len = sizeof(*fl->opt) + olen;
|
|
|
|
err = -EFAULT;
|
|
|
|
if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
msg.msg_controllen = olen;
|
2014-08-24 20:53:10 +00:00
|
|
|
msg.msg_control = (void *)(fl->opt+1);
|
2011-03-12 21:22:43 +00:00
|
|
|
memset(&flowi6, 0, sizeof(flowi6));
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2016-05-03 04:40:07 +00:00
|
|
|
ipc6.opt = fl->opt;
|
2018-07-06 14:12:57 +00:00
|
|
|
err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (err)
|
|
|
|
goto done;
|
|
|
|
err = -EINVAL;
|
|
|
|
if (fl->opt->opt_flen)
|
|
|
|
goto done;
|
|
|
|
if (fl->opt->opt_nflen == 0) {
|
|
|
|
kfree(fl->opt);
|
|
|
|
fl->opt = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-12 04:04:08 +00:00
|
|
|
fl->fl_net = net;
|
2005-04-16 22:20:36 +00:00
|
|
|
fl->expires = jiffies;
|
|
|
|
err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
|
|
|
|
if (err)
|
|
|
|
goto done;
|
|
|
|
fl->share = freq->flr_share;
|
|
|
|
addr_type = ipv6_addr_type(&freq->flr_dst);
|
2009-11-24 22:52:52 +00:00
|
|
|
if ((addr_type & IPV6_ADDR_MAPPED) ||
|
|
|
|
addr_type == IPV6_ADDR_ANY) {
|
2006-10-31 02:56:06 +00:00
|
|
|
err = -EINVAL;
|
2005-04-16 22:20:36 +00:00
|
|
|
goto done;
|
2006-10-31 02:56:06 +00:00
|
|
|
}
|
2011-11-21 03:39:03 +00:00
|
|
|
fl->dst = freq->flr_dst;
|
2005-04-16 22:20:36 +00:00
|
|
|
atomic_set(&fl->users, 1);
|
|
|
|
switch (fl->share) {
|
|
|
|
case IPV6_FL_S_EXCL:
|
|
|
|
case IPV6_FL_S_ANY:
|
|
|
|
break;
|
|
|
|
case IPV6_FL_S_PROCESS:
|
2012-05-24 16:37:59 +00:00
|
|
|
fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
case IPV6_FL_S_USER:
|
2012-05-24 16:37:59 +00:00
|
|
|
fl->owner.uid = current_euid();
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
err = -EINVAL;
|
|
|
|
goto done;
|
|
|
|
}
|
2019-07-10 13:40:11 +00:00
|
|
|
if (fl_shared_exclusive(fl) || fl->opt)
|
|
|
|
static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
|
2005-04-16 22:20:36 +00:00
|
|
|
return fl;
|
|
|
|
|
|
|
|
done:
|
2019-07-10 13:40:11 +00:00
|
|
|
if (fl) {
|
|
|
|
kfree(fl->opt);
|
|
|
|
kfree(fl);
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
*err_p = err;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mem_check(struct sock *sk)
|
|
|
|
{
|
|
|
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
|
|
|
struct ipv6_fl_socklist *sfl;
|
|
|
|
int room = FL_MAX_SIZE - atomic_read(&fl_size);
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
|
|
|
|
return 0;
|
|
|
|
|
2013-11-08 18:26:21 +00:00
|
|
|
rcu_read_lock_bh();
|
2013-01-30 09:27:52 +00:00
|
|
|
for_each_sk_fl_rcu(np, sfl)
|
2005-04-16 22:20:36 +00:00
|
|
|
count++;
|
2013-11-08 18:26:21 +00:00
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
if (room <= 0 ||
|
|
|
|
((count >= FL_MAX_PER_SOCK ||
|
2009-11-24 22:52:52 +00:00
|
|
|
(count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
|
|
|
|
!capable(CAP_NET_ADMIN)))
|
2005-04-16 22:20:36 +00:00
|
|
|
return -ENOBUFS;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-10-18 12:14:58 +00:00
|
|
|
static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
|
|
|
|
struct ip6_flowlabel *fl)
|
|
|
|
{
|
2013-01-30 09:27:52 +00:00
|
|
|
spin_lock_bh(&ip6_sk_fl_lock);
|
2007-10-18 12:14:58 +00:00
|
|
|
sfl->fl = fl;
|
|
|
|
sfl->next = np->ipv6_fl_list;
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_assign_pointer(np->ipv6_fl_list, sfl);
|
|
|
|
spin_unlock_bh(&ip6_sk_fl_lock);
|
2007-10-18 12:14:58 +00:00
|
|
|
}
|
|
|
|
|
2014-01-17 16:15:04 +00:00
|
|
|
int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
|
|
|
|
int flags)
|
2013-11-07 16:53:12 +00:00
|
|
|
{
|
|
|
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
|
|
|
struct ipv6_fl_socklist *sfl;
|
|
|
|
|
2014-01-17 16:15:04 +00:00
|
|
|
if (flags & IPV6_FL_F_REMOTE) {
|
|
|
|
freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-01-17 16:15:03 +00:00
|
|
|
if (np->repflow) {
|
|
|
|
freq->flr_label = np->flow_label;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-11-07 16:53:12 +00:00
|
|
|
rcu_read_lock_bh();
|
|
|
|
|
|
|
|
for_each_sk_fl_rcu(np, sfl) {
|
|
|
|
if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
|
|
|
|
spin_lock_bh(&ip6_fl_lock);
|
|
|
|
freq->flr_label = sfl->fl->label;
|
|
|
|
freq->flr_dst = sfl->fl->dst;
|
|
|
|
freq->flr_share = sfl->fl->share;
|
|
|
|
freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
|
|
|
|
freq->flr_linger = sfl->fl->linger / HZ;
|
|
|
|
|
|
|
|
spin_unlock_bh(&ip6_fl_lock);
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
|
|
|
|
{
|
treewide: Remove uninitialized_var() usage
Using uninitialized_var() is dangerous as it papers over real bugs[1]
(or can in the future), and suppresses unrelated compiler warnings
(e.g. "unused variable"). If the compiler thinks it is uninitialized,
either simply initialize the variable or make compiler changes.
In preparation for removing[2] the[3] macro[4], remove all remaining
needless uses with the following script:
git grep '\buninitialized_var\b' | cut -d: -f1 | sort -u | \
xargs perl -pi -e \
's/\buninitialized_var\(([^\)]+)\)/\1/g;
s:\s*/\* (GCC be quiet|to make compiler happy) \*/$::g;'
drivers/video/fbdev/riva/riva_hw.c was manually tweaked to avoid
pathological white-space.
No outstanding warnings were found building allmodconfig with GCC 9.3.0
for x86_64, i386, arm64, arm, powerpc, powerpc64le, s390x, mips, sparc64,
alpha, and m68k.
[1] https://lore.kernel.org/lkml/20200603174714.192027-1-glider@google.com/
[2] https://lore.kernel.org/lkml/CA+55aFw+Vbj0i=1TGqCR5vQkCzWJ0QxK6CernOU6eedsudAixw@mail.gmail.com/
[3] https://lore.kernel.org/lkml/CA+55aFwgbgqhbp1fkxvRKEpzyR5J8n1vKT1VZdz9knmPuXhOeg@mail.gmail.com/
[4] https://lore.kernel.org/lkml/CA+55aFz2500WfbKXAx8s67wrm9=yVJu65TpLgN_ybYNv0VEOKA@mail.gmail.com/
Reviewed-by: Leon Romanovsky <leonro@mellanox.com> # drivers/infiniband and mlx4/mlx5
Acked-by: Jason Gunthorpe <jgg@mellanox.com> # IB
Acked-by: Kalle Valo <kvalo@codeaurora.org> # wireless drivers
Reviewed-by: Chao Yu <yuchao0@huawei.com> # erofs
Signed-off-by: Kees Cook <keescook@chromium.org>
2020-06-03 20:09:38 +00:00
|
|
|
int err;
|
2008-03-26 23:53:08 +00:00
|
|
|
struct net *net = sock_net(sk);
|
2005-04-16 22:20:36 +00:00
|
|
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
|
|
|
struct in6_flowlabel_req freq;
|
2014-08-24 20:53:10 +00:00
|
|
|
struct ipv6_fl_socklist *sfl1 = NULL;
|
2013-03-07 04:20:32 +00:00
|
|
|
struct ipv6_fl_socklist *sfl;
|
|
|
|
struct ipv6_fl_socklist __rcu **sflp;
|
2007-10-18 12:18:56 +00:00
|
|
|
struct ip6_flowlabel *fl, *fl1 = NULL;
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
if (optlen < sizeof(freq))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (copy_from_user(&freq, optval, sizeof(freq)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
switch (freq.flr_action) {
|
|
|
|
case IPV6_FL_A_PUT:
|
2014-01-17 16:15:03 +00:00
|
|
|
if (freq.flr_flags & IPV6_FL_F_REFLECT) {
|
|
|
|
if (sk->sk_protocol != IPPROTO_TCP)
|
|
|
|
return -ENOPROTOOPT;
|
|
|
|
if (!np->repflow)
|
|
|
|
return -ESRCH;
|
|
|
|
np->flow_label = 0;
|
|
|
|
np->repflow = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
2013-01-30 09:27:52 +00:00
|
|
|
spin_lock_bh(&ip6_sk_fl_lock);
|
|
|
|
for (sflp = &np->ipv6_fl_list;
|
2016-02-03 01:55:01 +00:00
|
|
|
(sfl = rcu_dereference_protected(*sflp,
|
|
|
|
lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
|
2013-01-30 09:27:52 +00:00
|
|
|
sflp = &sfl->next) {
|
2005-04-16 22:20:36 +00:00
|
|
|
if (sfl->fl->label == freq.flr_label) {
|
|
|
|
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
|
|
|
|
np->flow_label &= ~IPV6_FLOWLABEL_MASK;
|
2016-02-03 01:55:01 +00:00
|
|
|
*sflp = sfl->next;
|
2013-01-30 09:27:52 +00:00
|
|
|
spin_unlock_bh(&ip6_sk_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
fl_release(sfl->fl);
|
2013-01-30 09:27:52 +00:00
|
|
|
kfree_rcu(sfl, rcu);
|
2005-04-16 22:20:36 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2013-01-30 09:27:52 +00:00
|
|
|
spin_unlock_bh(&ip6_sk_fl_lock);
|
2005-04-16 22:20:36 +00:00
|
|
|
return -ESRCH;
|
|
|
|
|
|
|
|
case IPV6_FL_A_RENEW:
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_lock_bh();
|
|
|
|
for_each_sk_fl_rcu(np, sfl) {
|
2005-04-16 22:20:36 +00:00
|
|
|
if (sfl->fl->label == freq.flr_label) {
|
|
|
|
err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
|
net: Allow userns root to control ipv6
Allow an unpriviled user who has created a user namespace, and then
created a network namespace to effectively use the new network
namespace, by reducing capable(CAP_NET_ADMIN) and
capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns,
CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls.
Settings that merely control a single network device are allowed.
Either the network device is a logical network device where
restrictions make no difference or the network device is hardware NIC
that has been explicity moved from the initial network namespace.
In general policy and network stack state changes are allowed while
resource control is left unchanged.
Allow the SIOCSIFADDR ioctl to add ipv6 addresses.
Allow the SIOCDIFADDR ioctl to delete ipv6 addresses.
Allow the SIOCADDRT ioctl to add ipv6 routes.
Allow the SIOCDELRT ioctl to delete ipv6 routes.
Allow creation of ipv6 raw sockets.
Allow setting the IPV6_JOIN_ANYCAST socket option.
Allow setting the IPV6_FL_A_RENEW parameter of the IPV6_FLOWLABEL_MGR
socket option.
Allow setting the IPV6_TRANSPARENT socket option.
Allow setting the IPV6_HOPOPTS socket option.
Allow setting the IPV6_RTHDRDSTOPTS socket option.
Allow setting the IPV6_DSTOPTS socket option.
Allow setting the IPV6_IPSEC_POLICY socket option.
Allow setting the IPV6_XFRM_POLICY socket option.
Allow sending packets with the IPV6_2292HOPOPTS control message.
Allow sending packets with the IPV6_2292DSTOPTS control message.
Allow sending packets with the IPV6_RTHDRDSTOPTS control message.
Allow setting the multicast routing socket options on non multicast
routing sockets.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL, and SIOCDELTUNNEL ioctls for
setting up, changing and deleting tunnels over ipv6.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL, SIOCDELTUNNEL ioctls for
setting up, changing and deleting ipv6 over ipv4 tunnels.
Allow the SIOCADDPRL, SIOCDELPRL, SIOCCHGPRL ioctls for adding,
deleting, and changing the potential router list for ISATAP tunnels.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-16 03:03:06 +00:00
|
|
|
if (freq.flr_share == IPV6_FL_S_NONE &&
|
|
|
|
ns_capable(net->user_ns, CAP_NET_ADMIN)) {
|
2008-03-26 23:53:08 +00:00
|
|
|
fl = fl_lookup(net, freq.flr_label);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (fl) {
|
|
|
|
err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
|
|
|
|
fl_release(fl);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -ESRCH;
|
|
|
|
|
|
|
|
case IPV6_FL_A_GET:
|
2014-01-17 16:15:03 +00:00
|
|
|
if (freq.flr_flags & IPV6_FL_F_REFLECT) {
|
2014-01-17 16:15:05 +00:00
|
|
|
struct net *net = sock_net(sk);
|
|
|
|
if (net->ipv6.sysctl.flowlabel_consistency) {
|
|
|
|
net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
|
|
|
|
return -EPERM;
|
|
|
|
}
|
|
|
|
|
2014-01-17 16:15:03 +00:00
|
|
|
if (sk->sk_protocol != IPPROTO_TCP)
|
|
|
|
return -ENOPROTOOPT;
|
2014-01-17 16:15:05 +00:00
|
|
|
|
2014-01-17 16:15:03 +00:00
|
|
|
np->repflow = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
|
|
|
|
return -EINVAL;
|
|
|
|
|
ipv6: Flow label state ranges
This patch divides the IPv6 flow label space into two ranges:
0-7ffff is reserved for flow label manager, 80000-fffff will be
used for creating auto flow labels (per RFC6438). This only affects how
labels are set on transmit, it does not affect receive. This range split
can be disbaled by systcl.
Background:
IPv6 flow labels have been an unmitigated disappointment thus far
in the lifetime of IPv6. Support in HW devices to use them for ECMP
is lacking, and OSes don't turn them on by default. If we had these
we could get much better hashing in IPv6 networks without resorting
to DPI, possibly eliminating some of the motivations to to define new
encaps in UDP just for getting ECMP.
Unfortunately, the initial specfications of IPv6 did not clarify
how they are to be used. There has always been a vague concept that
these can be used for ECMP, flow hashing, etc. and we do now have a
good standard how to this in RFC6438. The problem is that flow labels
can be either stateful or stateless (as in RFC6438), and we are
presented with the possibility that a stateless label may collide
with a stateful one. Attempts to split the flow label space were
rejected in IETF. When we added support in Linux for RFC6438, we
could not turn on flow labels by default due to this conflict.
This patch splits the flow label space and should give us
a path to enabling auto flow labels by default for all IPv6 packets.
This is an API change so we need to consider compatibility with
existing deployment. The stateful range is chosen to be the lower
values in hopes that most uses would have chosen small numbers.
Once we resolve the stateless/stateful issue, we can proceed to
look at enabling RFC6438 flow labels by default (starting with
scaled testing).
Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-29 22:33:21 +00:00
|
|
|
if (net->ipv6.sysctl.flowlabel_state_ranges &&
|
|
|
|
(freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
|
|
|
|
return -ERANGE;
|
|
|
|
|
2011-08-28 12:35:31 +00:00
|
|
|
fl = fl_create(net, sk, &freq, optval, optlen, &err);
|
2015-03-29 13:00:04 +00:00
|
|
|
if (!fl)
|
2005-04-16 22:20:36 +00:00
|
|
|
return err;
|
|
|
|
sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
|
|
|
|
|
|
|
|
if (freq.flr_label) {
|
|
|
|
err = -EEXIST;
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_lock_bh();
|
|
|
|
for_each_sk_fl_rcu(np, sfl) {
|
2005-04-16 22:20:36 +00:00
|
|
|
if (sfl->fl->label == freq.flr_label) {
|
|
|
|
if (freq.flr_flags&IPV6_FL_F_EXCL) {
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
fl1 = sfl->fl;
|
2019-06-06 21:32:34 +00:00
|
|
|
if (!atomic_inc_not_zero(&fl1->users))
|
|
|
|
fl1 = NULL;
|
2005-04-16 22:20:36 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-01-30 09:27:52 +00:00
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2015-03-29 13:00:04 +00:00
|
|
|
if (!fl1)
|
2008-03-26 23:53:08 +00:00
|
|
|
fl1 = fl_lookup(net, freq.flr_label);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (fl1) {
|
2007-10-18 12:18:56 +00:00
|
|
|
recheck:
|
2005-04-16 22:20:36 +00:00
|
|
|
err = -EEXIST;
|
|
|
|
if (freq.flr_flags&IPV6_FL_F_EXCL)
|
|
|
|
goto release;
|
|
|
|
err = -EPERM;
|
|
|
|
if (fl1->share == IPV6_FL_S_EXCL ||
|
|
|
|
fl1->share != fl->share ||
|
2012-05-24 16:37:59 +00:00
|
|
|
((fl1->share == IPV6_FL_S_PROCESS) &&
|
2019-04-25 16:06:54 +00:00
|
|
|
(fl1->owner.pid != fl->owner.pid)) ||
|
2012-05-24 16:37:59 +00:00
|
|
|
((fl1->share == IPV6_FL_S_USER) &&
|
2019-04-25 16:06:54 +00:00
|
|
|
!uid_eq(fl1->owner.uid, fl->owner.uid)))
|
2005-04-16 22:20:36 +00:00
|
|
|
goto release;
|
|
|
|
|
|
|
|
err = -ENOMEM;
|
2015-03-29 13:00:04 +00:00
|
|
|
if (!sfl1)
|
2005-04-16 22:20:36 +00:00
|
|
|
goto release;
|
|
|
|
if (fl->linger > fl1->linger)
|
|
|
|
fl1->linger = fl->linger;
|
|
|
|
if ((long)(fl->expires - fl1->expires) > 0)
|
|
|
|
fl1->expires = fl->expires;
|
2007-10-18 12:14:58 +00:00
|
|
|
fl_link(np, sfl1, fl1);
|
2005-04-16 22:20:36 +00:00
|
|
|
fl_free(fl);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
release:
|
|
|
|
fl_release(fl1);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
err = -ENOENT;
|
|
|
|
if (!(freq.flr_flags&IPV6_FL_F_CREATE))
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
err = -ENOMEM;
|
2015-03-29 13:00:04 +00:00
|
|
|
if (!sfl1)
|
2014-11-23 21:28:43 +00:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
err = mem_check(sk);
|
|
|
|
if (err != 0)
|
2005-04-16 22:20:36 +00:00
|
|
|
goto done;
|
|
|
|
|
2008-03-26 23:53:08 +00:00
|
|
|
fl1 = fl_intern(net, fl, freq.flr_label);
|
2015-03-29 13:00:05 +00:00
|
|
|
if (fl1)
|
2007-10-18 12:18:56 +00:00
|
|
|
goto recheck;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2005-05-30 03:28:01 +00:00
|
|
|
if (!freq.flr_label) {
|
|
|
|
if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
|
|
|
|
&fl->label, sizeof(fl->label))) {
|
|
|
|
/* Intentionally ignore fault. */
|
|
|
|
}
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2007-10-18 12:14:58 +00:00
|
|
|
fl_link(np, sfl1, fl);
|
2005-04-16 22:20:36 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
fl_free(fl);
|
|
|
|
kfree(sfl1);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
|
|
|
|
struct ip6fl_iter_state {
|
2008-03-26 23:53:30 +00:00
|
|
|
struct seq_net_private p;
|
2012-05-24 16:37:59 +00:00
|
|
|
struct pid_namespace *pid_ns;
|
2005-04-16 22:20:36 +00:00
|
|
|
int bucket;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private)
|
|
|
|
|
|
|
|
static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
|
|
|
|
{
|
|
|
|
struct ip6_flowlabel *fl = NULL;
|
|
|
|
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
|
2008-03-26 23:53:30 +00:00
|
|
|
struct net *net = seq_file_net(seq);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
|
2013-01-30 09:27:47 +00:00
|
|
|
for_each_fl_rcu(state->bucket, fl) {
|
|
|
|
if (net_eq(fl->fl_net, net))
|
|
|
|
goto out;
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2013-01-30 09:27:47 +00:00
|
|
|
fl = NULL;
|
|
|
|
out:
|
2005-04-16 22:20:36 +00:00
|
|
|
return fl;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
|
|
|
|
{
|
|
|
|
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
|
2008-03-26 23:53:30 +00:00
|
|
|
struct net *net = seq_file_net(seq);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2013-01-30 09:27:47 +00:00
|
|
|
for_each_fl_continue_rcu(fl) {
|
|
|
|
if (net_eq(fl->fl_net, net))
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2008-03-26 23:53:30 +00:00
|
|
|
try_again:
|
2013-01-30 09:27:47 +00:00
|
|
|
if (++state->bucket <= FL_HASH_MASK) {
|
|
|
|
for_each_fl_rcu(state->bucket, fl) {
|
|
|
|
if (net_eq(fl->fl_net, net))
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
goto try_again;
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
2013-01-30 09:27:47 +00:00
|
|
|
fl = NULL;
|
|
|
|
|
|
|
|
out:
|
2005-04-16 22:20:36 +00:00
|
|
|
return fl;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
|
|
|
|
{
|
|
|
|
struct ip6_flowlabel *fl = ip6fl_get_first(seq);
|
|
|
|
if (fl)
|
|
|
|
while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
|
|
|
|
--pos;
|
|
|
|
return pos ? NULL : fl;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
|
2013-01-30 09:27:47 +00:00
|
|
|
__acquires(RCU)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2018-04-11 08:01:30 +00:00
|
|
|
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
|
|
|
|
|
2020-05-18 18:07:38 +00:00
|
|
|
state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
|
2018-04-11 08:01:30 +00:00
|
|
|
|
2013-01-30 09:27:47 +00:00
|
|
|
rcu_read_lock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
|
|
{
|
|
|
|
struct ip6_flowlabel *fl;
|
|
|
|
|
|
|
|
if (v == SEQ_START_TOKEN)
|
|
|
|
fl = ip6fl_get_first(seq);
|
|
|
|
else
|
|
|
|
fl = ip6fl_get_next(seq, v);
|
|
|
|
++*pos;
|
|
|
|
return fl;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ip6fl_seq_stop(struct seq_file *seq, void *v)
|
2013-01-30 09:27:47 +00:00
|
|
|
__releases(RCU)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2013-01-30 09:27:47 +00:00
|
|
|
rcu_read_unlock_bh();
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2006-10-31 08:43:44 +00:00
|
|
|
static int ip6fl_seq_show(struct seq_file *seq, void *v)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2012-05-24 16:37:59 +00:00
|
|
|
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
|
2014-11-04 22:01:00 +00:00
|
|
|
if (v == SEQ_START_TOKEN) {
|
2014-11-04 23:37:03 +00:00
|
|
|
seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n");
|
2014-11-04 22:01:00 +00:00
|
|
|
} else {
|
2006-10-31 08:43:44 +00:00
|
|
|
struct ip6_flowlabel *fl = v;
|
2005-04-16 22:20:36 +00:00
|
|
|
seq_printf(seq,
|
2008-10-29 19:50:24 +00:00
|
|
|
"%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
|
2012-04-15 05:58:06 +00:00
|
|
|
(unsigned int)ntohl(fl->label),
|
2005-04-16 22:20:36 +00:00
|
|
|
fl->share,
|
2012-05-24 16:37:59 +00:00
|
|
|
((fl->share == IPV6_FL_S_PROCESS) ?
|
|
|
|
pid_nr_ns(fl->owner.pid, state->pid_ns) :
|
|
|
|
((fl->share == IPV6_FL_S_USER) ?
|
|
|
|
from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
|
|
|
|
0)),
|
2005-04-16 22:20:36 +00:00
|
|
|
atomic_read(&fl->users),
|
|
|
|
fl->linger/HZ,
|
|
|
|
(long)(fl->expires - jiffies)/HZ,
|
2008-10-28 23:05:40 +00:00
|
|
|
&fl->dst,
|
2005-04-16 22:20:36 +00:00
|
|
|
fl->opt ? fl->opt->opt_nflen : 0);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-07-11 06:07:31 +00:00
|
|
|
static const struct seq_operations ip6fl_seq_ops = {
|
2005-04-16 22:20:36 +00:00
|
|
|
.start = ip6fl_seq_start,
|
|
|
|
.next = ip6fl_seq_next,
|
|
|
|
.stop = ip6fl_seq_stop,
|
|
|
|
.show = ip6fl_seq_show,
|
|
|
|
};
|
|
|
|
|
2010-01-17 03:35:32 +00:00
|
|
|
static int __net_init ip6_flowlabel_proc_init(struct net *net)
|
2007-12-11 10:23:18 +00:00
|
|
|
{
|
2018-04-10 17:42:55 +00:00
|
|
|
if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net,
|
|
|
|
&ip6fl_seq_ops, sizeof(struct ip6fl_iter_state)))
|
2007-12-11 10:23:18 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2010-01-17 03:35:32 +00:00
|
|
|
static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
2013-02-18 01:34:56 +00:00
|
|
|
remove_proc_entry("ip6_flowlabel", net->proc_net);
|
2007-12-11 10:23:18 +00:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline int ip6_flowlabel_proc_init(struct net *net)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
static inline void ip6_flowlabel_proc_fini(struct net *net)
|
|
|
|
{
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
#endif
|
2007-12-11 10:23:18 +00:00
|
|
|
|
2010-01-17 03:35:32 +00:00
|
|
|
static void __net_exit ip6_flowlabel_net_exit(struct net *net)
|
2008-03-26 23:53:08 +00:00
|
|
|
{
|
|
|
|
ip6_fl_purge(net);
|
2008-03-26 23:53:30 +00:00
|
|
|
ip6_flowlabel_proc_fini(net);
|
2008-03-26 23:53:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct pernet_operations ip6_flowlabel_net_ops = {
|
2008-03-26 23:53:30 +00:00
|
|
|
.init = ip6_flowlabel_proc_init,
|
2008-03-26 23:53:08 +00:00
|
|
|
.exit = ip6_flowlabel_net_exit,
|
|
|
|
};
|
|
|
|
|
2007-12-11 10:23:18 +00:00
|
|
|
int ip6_flowlabel_init(void)
|
|
|
|
{
|
2008-03-26 23:53:30 +00:00
|
|
|
return register_pernet_subsys(&ip6_flowlabel_net_ops);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void ip6_flowlabel_cleanup(void)
|
|
|
|
{
|
2019-07-07 09:34:45 +00:00
|
|
|
static_key_deferred_flush(&ipv6_flowlabel_exclusive);
|
2005-04-16 22:20:36 +00:00
|
|
|
del_timer(&ip6_fl_gc_timer);
|
2008-03-26 23:53:08 +00:00
|
|
|
unregister_pernet_subsys(&ip6_flowlabel_net_ops);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|