From 94f14e4728125f979629b2b020d31cd718191626 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 11 Sep 2018 14:10:12 +0300
Subject: [PATCH 01/45] Bluetooth: SMP: Fix trying to use non-existent local
 OOB data

A remote device may claim that it has received our OOB data, even
though we never geneated it. Add a new flag to track whether we
actually have OOB data, and ignore the remote peer's flag if haven't
generated OOB data.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index ae91e2d40056..9752879fdd3a 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -83,6 +83,7 @@ enum {
 
 struct smp_dev {
 	/* Secure Connections OOB data */
+	bool			local_oob;
 	u8			local_pk[64];
 	u8			local_rand[16];
 	bool			debug_key;
@@ -599,6 +600,8 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
 
 	memcpy(rand, smp->local_rand, 16);
 
+	smp->local_oob = true;
+
 	return 0;
 }
 
@@ -1785,7 +1788,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	 * successfully received our local OOB data - therefore set the
 	 * flag to indicate that local OOB is in use.
 	 */
-	if (req->oob_flag == SMP_OOB_PRESENT)
+	if (req->oob_flag == SMP_OOB_PRESENT && SMP_DEV(hdev)->local_oob)
 		set_bit(SMP_FLAG_LOCAL_OOB, &smp->flags);
 
 	/* SMP over BR/EDR requires special treatment */
@@ -1967,7 +1970,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
 	 * successfully received our local OOB data - therefore set the
 	 * flag to indicate that local OOB is in use.
 	 */
-	if (rsp->oob_flag == SMP_OOB_PRESENT)
+	if (rsp->oob_flag == SMP_OOB_PRESENT && SMP_DEV(hdev)->local_oob)
 		set_bit(SMP_FLAG_LOCAL_OOB, &smp->flags);
 
 	smp->prsp[0] = SMP_CMD_PAIRING_RSP;
@@ -3230,6 +3233,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 		return ERR_CAST(tfm_ecdh);
 	}
 
+	smp->local_oob = false;
 	smp->tfm_aes = tfm_aes;
 	smp->tfm_cmac = tfm_cmac;
 	smp->tfm_ecdh = tfm_ecdh;

From 4ba5175f2c10affd412fa41855cecda02b66cd71 Mon Sep 17 00:00:00 2001
From: Matias Karhumaa <matias.karhumaa@gmail.com>
Date: Tue, 11 Sep 2018 14:10:13 +0300
Subject: [PATCH 02/45] Bluetooth: Use correct tfm to generate OOB data

In case local OOB data was generated and other device initiated pairing
claiming that it has got OOB data, following crash occurred:

[  222.847853] general protection fault: 0000 [#1] SMP PTI
[  222.848025] CPU: 1 PID: 42 Comm: kworker/u5:0 Tainted: G         C        4.18.0-custom #4
[  222.848158] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  222.848307] Workqueue: hci0 hci_rx_work [bluetooth]
[  222.848416] RIP: 0010:compute_ecdh_secret+0x5a/0x270 [bluetooth]
[  222.848540] Code: 0c af f5 48 8b 3d 46 de f0 f6 ba 40 00 00 00 be c0 00 60 00 e8 b7 7b c5 f5 48 85 c0 0f 84 ea 01 00 00 48 89 c3 e8 16 0c af f5 <49> 8b 47 38 be c0 00 60 00 8b 78 f8 48 83 c7 48 e8 51 84 c5 f5 48
[  222.848914] RSP: 0018:ffffb1664087fbc0 EFLAGS: 00010293
[  222.849021] RAX: ffff8a5750d7dc00 RBX: ffff8a5671096780 RCX: ffffffffc08bc32a
[  222.849111] RDX: 0000000000000000 RSI: 00000000006000c0 RDI: ffff8a5752003800
[  222.849192] RBP: ffffb1664087fc60 R08: ffff8a57525280a0 R09: ffff8a5752003800
[  222.849269] R10: ffffb1664087fc70 R11: 0000000000000093 R12: ffff8a5674396e00
[  222.849350] R13: ffff8a574c2e79aa R14: ffff8a574c2e796a R15: 020e0e100d010101
[  222.849429] FS:  0000000000000000(0000) GS:ffff8a5752500000(0000) knlGS:0000000000000000
[  222.849518] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  222.849586] CR2: 000055856016a038 CR3: 0000000110d2c005 CR4: 00000000000606e0
[  222.849671] Call Trace:
[  222.849745]  ? sc_send_public_key+0x110/0x2a0 [bluetooth]
[  222.849825]  ? sc_send_public_key+0x115/0x2a0 [bluetooth]
[  222.849925]  smp_recv_cb+0x959/0x2490 [bluetooth]
[  222.850023]  ? _cond_resched+0x19/0x40
[  222.850105]  ? mutex_lock+0x12/0x40
[  222.850202]  l2cap_recv_frame+0x109d/0x3420 [bluetooth]
[  222.850315]  ? l2cap_recv_frame+0x109d/0x3420 [bluetooth]
[  222.850426]  ? __switch_to_asm+0x34/0x70
[  222.850515]  ? __switch_to_asm+0x40/0x70
[  222.850625]  ? __switch_to_asm+0x34/0x70
[  222.850724]  ? __switch_to_asm+0x40/0x70
[  222.850786]  ? __switch_to_asm+0x34/0x70
[  222.850846]  ? __switch_to_asm+0x40/0x70
[  222.852581]  ? __switch_to_asm+0x34/0x70
[  222.854976]  ? __switch_to_asm+0x40/0x70
[  222.857475]  ? __switch_to_asm+0x40/0x70
[  222.859775]  ? __switch_to_asm+0x34/0x70
[  222.861218]  ? __switch_to_asm+0x40/0x70
[  222.862327]  ? __switch_to_asm+0x34/0x70
[  222.863758]  l2cap_recv_acldata+0x266/0x3c0 [bluetooth]
[  222.865122]  hci_rx_work+0x1c9/0x430 [bluetooth]
[  222.867144]  process_one_work+0x210/0x4c0
[  222.868248]  worker_thread+0x41/0x4d0
[  222.869420]  kthread+0x141/0x160
[  222.870694]  ? process_one_work+0x4c0/0x4c0
[  222.871668]  ? kthread_create_worker_on_cpu+0x90/0x90
[  222.872896]  ret_from_fork+0x35/0x40
[  222.874132] Modules linked in: algif_hash algif_skcipher af_alg rfcomm bnep btusb btrtl btbcm btintel snd_intel8x0 cmac intel_rapl_perf vboxvideo(C) snd_ac97_codec bluetooth ac97_bus joydev ttm snd_pcm ecdh_generic drm_kms_helper snd_timer snd input_leds drm serio_raw fb_sys_fops soundcore syscopyarea sysfillrect sysimgblt mac_hid sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear hid_generic usbhid hid crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper ahci psmouse libahci i2c_piix4 video e1000 pata_acpi
[  222.883153] fbcon_switch: detected unhandled fb_set_par error, error code -16
[  222.886774] fbcon_switch: detected unhandled fb_set_par error, error code -16
[  222.890503] ---[ end trace 6504aa7a777b5316 ]---
[  222.890541] RIP: 0010:compute_ecdh_secret+0x5a/0x270 [bluetooth]
[  222.890551] Code: 0c af f5 48 8b 3d 46 de f0 f6 ba 40 00 00 00 be c0 00 60 00 e8 b7 7b c5 f5 48 85 c0 0f 84 ea 01 00 00 48 89 c3 e8 16 0c af f5 <49> 8b 47 38 be c0 00 60 00 8b 78 f8 48 83 c7 48 e8 51 84 c5 f5 48
[  222.890555] RSP: 0018:ffffb1664087fbc0 EFLAGS: 00010293
[  222.890561] RAX: ffff8a5750d7dc00 RBX: ffff8a5671096780 RCX: ffffffffc08bc32a
[  222.890565] RDX: 0000000000000000 RSI: 00000000006000c0 RDI: ffff8a5752003800
[  222.890571] RBP: ffffb1664087fc60 R08: ffff8a57525280a0 R09: ffff8a5752003800
[  222.890576] R10: ffffb1664087fc70 R11: 0000000000000093 R12: ffff8a5674396e00
[  222.890581] R13: ffff8a574c2e79aa R14: ffff8a574c2e796a R15: 020e0e100d010101
[  222.890586] FS:  0000000000000000(0000) GS:ffff8a5752500000(0000) knlGS:0000000000000000
[  222.890591] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  222.890594] CR2: 000055856016a038 CR3: 0000000110d2c005 CR4: 00000000000606e0

This commit fixes a bug where invalid pointer to crypto tfm was used for
SMP SC ECDH calculation when OOB was in use. Solution is to use same
crypto tfm than when generating OOB material on generate_oob() function.

This bug was introduced in commit c0153b0b901a ("Bluetooth: let the crypto
subsystem generate the ecc privkey"). Bug was found by fuzzing kernel SMP
implementation using Synopsys Defensics.

Signed-off-by: Matias Karhumaa <matias.karhumaa@gmail.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 9752879fdd3a..3a7b0773536b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2700,7 +2700,13 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
 	 * key was set/generated.
 	 */
 	if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) {
-		struct smp_dev *smp_dev = chan->data;
+		struct l2cap_chan *hchan = hdev->smp_data;
+		struct smp_dev *smp_dev;
+
+		if (!hchan || !hchan->data)
+			return SMP_UNSPECIFIED;
+
+		smp_dev = hchan->data;
 
 		tfm_ecdh = smp_dev->tfm_ecdh;
 	} else {

From e6a57d22f787e73635ce0d29eef0abb77928b3e9 Mon Sep 17 00:00:00 2001
From: Hermes Zhang <chenhuiz@axis.com>
Date: Tue, 28 Aug 2018 09:48:30 +0800
Subject: [PATCH 03/45] Bluetooth: hci_ldisc: Free rw_semaphore on close

The percpu_rw_semaphore is not currently freed, and this leads to
a crash when the stale rcu callback is invoked.  DEBUG_OBJECTS
detects this.

 ODEBUG: free active (active state 1) object type: rcu_head hint: (null)
 ------------[ cut here ]------------
 WARNING: CPU: 1 PID: 2024 at debug_print_object+0xac/0xc8
 PC is at debug_print_object+0xac/0xc8
 LR is at debug_print_object+0xac/0xc8
 Call trace:
 [<ffffff80082e2c2c>] debug_print_object+0xac/0xc8
 [<ffffff80082e40b0>] debug_check_no_obj_freed+0x1e8/0x228
 [<ffffff8008191254>] kfree+0x1cc/0x250
 [<ffffff80083cc03c>] hci_uart_tty_close+0x54/0x108
 [<ffffff800832e118>] tty_ldisc_close.isra.1+0x40/0x58
 [<ffffff800832e14c>] tty_ldisc_kill+0x1c/0x40
 [<ffffff800832e3dc>] tty_ldisc_release+0x94/0x170
 [<ffffff8008325554>] tty_release_struct+0x1c/0x58
 [<ffffff8008326400>] tty_release+0x3b0/0x490
 [<ffffff80081a3fe8>] __fput+0x88/0x1d0
 [<ffffff80081a418c>] ____fput+0xc/0x18
 [<ffffff80080c0624>] task_work_run+0x9c/0xc0
 [<ffffff80080a9e24>] do_exit+0x24c/0x8a0
 [<ffffff80080aa4e0>] do_group_exit+0x38/0xa0
 [<ffffff80080aa558>] __wake_up_parent+0x0/0x28
 [<ffffff8008082c00>] el0_svc_naked+0x34/0x38
 ---[ end trace bfe08cbd89098cdf ]---

Signed-off-by: Hermes Zhang <chenhuiz@axis.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_ldisc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 963bb0309e25..ea6238ed5c0e 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -543,6 +543,8 @@ static void hci_uart_tty_close(struct tty_struct *tty)
 	}
 	clear_bit(HCI_UART_PROTO_SET, &hu->flags);
 
+	percpu_free_rwsem(&hu->proto_lock);
+
 	kfree(hu);
 }
 

From 4b1c5d917d34f705096bb7dd8a2bd19b0881970e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 12 Sep 2018 10:29:11 -0700
Subject: [PATCH 04/45] bpf: btf: Fix end boundary calculation for type section

The end boundary math for type section is incorrect in
btf_check_all_metas().  It just happens that hdr->type_off
is always 0 for now because there are only two sections
(type and string) and string section must be at the end (ensured
in btf_parse_str_sec).

However, type_off may not be 0 if a new section would be added later.
This patch fixes it.

Fixes: f80442a4cd18 ("bpf: btf: Change how section is supported in btf_header")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 2590700237c1..138f0302692e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1844,7 +1844,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
 
 	hdr = &btf->hdr;
 	cur = btf->nohdr_data + hdr->type_off;
-	end = btf->nohdr_data + hdr->type_len;
+	end = cur + hdr->type_len;
 
 	env->log_type_id = 1;
 	while (cur < end) {

From dd066823db2ac4e22f721ec85190817b58059a54 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 12 Sep 2018 14:06:10 -0700
Subject: [PATCH 05/45] bpf/verifier: disallow pointer subtraction

Subtraction of pointers was accidentally allowed for unpriv programs
by commit 82abbf8d2fc4. Revert that part of commit.

Fixes: 82abbf8d2fc4 ("bpf: do not allow root to mangle valid pointers")
Reported-by: Jann Horn <jannh@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 92246117d2b0..bb07e74b34a2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3163,7 +3163,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 				 * an arbitrary scalar. Disallow all math except
 				 * pointer subtraction
 				 */
-				if (opcode == BPF_SUB){
+				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
 					mark_reg_unknown(env, regs, insn->dst_reg);
 					return 0;
 				}

From 4c3d795cb012a378855543a775408fba1ccff6f2 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tushar.n.dave@oracle.com>
Date: Wed, 12 Sep 2018 22:15:29 +0200
Subject: [PATCH 06/45] bpf: use __GFP_COMP while allocating page

Helper bpg_msg_pull_data() can allocate multiple pages while
linearizing multiple scatterlist elements into one shared page.
However, if the shared page has size > PAGE_SIZE, using
copy_page_to_iter() causes below warning.

e.g.
[ 6367.019832] WARNING: CPU: 2 PID: 7410 at lib/iov_iter.c:825
page_copy_sane.part.8+0x0/0x8

To avoid above warning, use __GFP_COMP while allocating multiple
contiguous pages.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index aecdeba052d3..5e00f2b85a56 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2344,7 +2344,8 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (unlikely(bytes_sg_total > copy))
 		return -EINVAL;
 
-	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
+			   get_order(copy));
 	if (unlikely(!page))
 		return -ENOMEM;
 	p = page_address(page);

From 22d0bd82cc7cec7d9ed4bd5913f3ab65643364be Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 11 Sep 2018 14:33:58 +0800
Subject: [PATCH 07/45] ipv6: use rt6_info members when dst is set in
 rt6_fill_node

In inet6_rtm_getroute, since Commit 93531c674315 ("net/ipv6: separate
handling of FIB entries from dst based routes"), it has used rt->from
to dump route info instead of rt.

However for some route like cache, some of its information like flags
or gateway is not the same as that of the 'from' one. It caused 'ip
route get' to dump the wrong route information.

In Jianlin's testing, the output information even lost the expiration
time for a pmtu route cache due to the wrong fib6_flags.

So change to use rt6_info members for dst addr, src addr, flags and
gateway when it tries to dump a route entry without fibmatch set.

v1->v2:
  - not use rt6i_prefsrc.
  - also fix the gw dump issue.

Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 18e00ce1719a..3eed045c65a5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4670,20 +4670,31 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags)
 {
-	struct rtmsg *rtm;
+	struct rt6_info *rt6 = (struct rt6_info *)dst;
+	struct rt6key *rt6_dst, *rt6_src;
+	u32 *pmetrics, table, rt6_flags;
 	struct nlmsghdr *nlh;
+	struct rtmsg *rtm;
 	long expires = 0;
-	u32 *pmetrics;
-	u32 table;
 
 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
+	if (rt6) {
+		rt6_dst = &rt6->rt6i_dst;
+		rt6_src = &rt6->rt6i_src;
+		rt6_flags = rt6->rt6i_flags;
+	} else {
+		rt6_dst = &rt->fib6_dst;
+		rt6_src = &rt->fib6_src;
+		rt6_flags = rt->fib6_flags;
+	}
+
 	rtm = nlmsg_data(nlh);
 	rtm->rtm_family = AF_INET6;
-	rtm->rtm_dst_len = rt->fib6_dst.plen;
-	rtm->rtm_src_len = rt->fib6_src.plen;
+	rtm->rtm_dst_len = rt6_dst->plen;
+	rtm->rtm_src_len = rt6_src->plen;
 	rtm->rtm_tos = 0;
 	if (rt->fib6_table)
 		table = rt->fib6_table->tb6_id;
@@ -4698,7 +4709,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
 	rtm->rtm_protocol = rt->fib6_protocol;
 
-	if (rt->fib6_flags & RTF_CACHE)
+	if (rt6_flags & RTF_CACHE)
 		rtm->rtm_flags |= RTM_F_CLONED;
 
 	if (dest) {
@@ -4706,7 +4717,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 			goto nla_put_failure;
 		rtm->rtm_dst_len = 128;
 	} else if (rtm->rtm_dst_len)
-		if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
+		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
 			goto nla_put_failure;
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src) {
@@ -4714,12 +4725,12 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 			goto nla_put_failure;
 		rtm->rtm_src_len = 128;
 	} else if (rtm->rtm_src_len &&
-		   nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
+		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
 		goto nla_put_failure;
 #endif
 	if (iif) {
 #ifdef CONFIG_IPV6_MROUTE
-		if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
+		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
 			int err = ip6mr_get_route(net, skb, rtm, portid);
 
 			if (err == 0)
@@ -4754,7 +4765,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 	/* For multipath routes, walk the siblings list and add
 	 * each as a nexthop within RTA_MULTIPATH.
 	 */
-	if (rt->fib6_nsiblings) {
+	if (rt6) {
+		if (rt6_flags & RTF_GATEWAY &&
+		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
+			goto nla_put_failure;
+
+		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
+			goto nla_put_failure;
+	} else if (rt->fib6_nsiblings) {
 		struct fib6_info *sibling, *next_sibling;
 		struct nlattr *mp;
 
@@ -4777,7 +4795,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 			goto nla_put_failure;
 	}
 
-	if (rt->fib6_flags & RTF_EXPIRES) {
+	if (rt6_flags & RTF_EXPIRES) {
 		expires = dst ? dst->expires : rt->expires;
 		expires -= jiffies;
 	}
@@ -4785,7 +4803,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
 		goto nla_put_failure;
 
-	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
+	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
 		goto nla_put_failure;
 
 

From ad4f15dc2c70b1de5e0a64d27335962fbc9cf71c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 11 Sep 2018 09:04:48 +0200
Subject: [PATCH 08/45] xen/netfront: don't bug in case of too many frags

Commit 57f230ab04d291 ("xen/netfront: raise max number of slots in
xennet_get_responses()") raised the max number of allowed slots by one.
This seems to be problematic in some configurations with netback using
a larger MAX_SKB_FRAGS value (e.g. old Linux kernel with MAX_SKB_FRAGS
defined as 18 instead of nowadays 17).

Instead of BUG_ON() in this case just fall back to retransmission.

Fixes: 57f230ab04d291 ("xen/netfront: raise max number of slots in xennet_get_responses()")
Cc: stable@vger.kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 9407acbd19a9..f17f602e6171 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -908,7 +908,11 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 			BUG_ON(pull_to <= skb_headlen(skb));
 			__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 		}
-		BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
+		if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
+			queue->rx.rsp_cons = ++cons;
+			kfree_skb(nskb);
+			return ~0U;
+		}
 
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 				skb_frag_page(nfrag),
@@ -1045,6 +1049,8 @@ err:
 		skb->len += rx->status;
 
 		i = xennet_fill_frags(queue, skb, &tmpq);
+		if (unlikely(i == ~0U))
+			goto err;
 
 		if (rx->flags & XEN_NETRXF_csum_blank)
 			skb->ip_summed = CHECKSUM_PARTIAL;

From f5b9bac7451cfc962970cb3fa3a7027ffa69e369 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 11 Sep 2018 14:22:23 -0700
Subject: [PATCH 09/45] net_sched: notify filter deletion when deleting a chain

When we delete a chain of filters, we need to notify
user-space we are deleting each filters in this chain
too.

Fixes: 32a4f5ecd738 ("net: sched: introduce chain object to uapi")
Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1a67af8a6e8c..0a75cb2e5e7b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1902,6 +1902,8 @@ replay:
 				RTM_NEWCHAIN, false);
 		break;
 	case RTM_DELCHAIN:
+		tfilter_notify_chain(net, skb, block, q, parent, n,
+				     chain, RTM_DELTFILTER);
 		/* Flush the chain first as the user requested chain removal. */
 		tcf_chain_flush(chain);
 		/* In case the chain was successfully deleted, put a reference

From 018349d70f28a78d5343b3660cb66e1667005f8a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 13 Sep 2018 08:03:43 -0700
Subject: [PATCH 10/45] hv_netvsc: fix schedule in RCU context

When netvsc device is removed it can call reschedule in RCU context.
This happens because canceling the subchannel setup work could (in theory)
cause a reschedule when manipulating the timer.

To reproduce, run with lockdep enabled kernel and unbind
a network device from hv_netvsc (via sysfs).

[  160.682011] WARNING: suspicious RCU usage
[  160.707466] 4.19.0-rc3-uio+ #2 Not tainted
[  160.709937] -----------------------------
[  160.712352] ./include/linux/rcupdate.h:302 Illegal context switch in RCU read-side critical section!
[  160.723691]
[  160.723691] other info that might help us debug this:
[  160.723691]
[  160.730955]
[  160.730955] rcu_scheduler_active = 2, debug_locks = 1
[  160.762813] 5 locks held by rebind-eth.sh/1812:
[  160.766851]  #0: 000000008befa37a (sb_writers#6){.+.+}, at: vfs_write+0x184/0x1b0
[  160.773416]  #1: 00000000b097f236 (&of->mutex){+.+.}, at: kernfs_fop_write+0xe2/0x1a0
[  160.783766]  #2: 0000000041ee6889 (kn->count#3){++++}, at: kernfs_fop_write+0xeb/0x1a0
[  160.787465]  #3: 0000000056d92a74 (&dev->mutex){....}, at: device_release_driver_internal+0x39/0x250
[  160.816987]  #4: 0000000030f6031e (rcu_read_lock){....}, at: netvsc_remove+0x1e/0x250 [hv_netvsc]
[  160.828629]
[  160.828629] stack backtrace:
[  160.831966] CPU: 1 PID: 1812 Comm: rebind-eth.sh Not tainted 4.19.0-rc3-uio+ #2
[  160.832952] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v1.0 11/26/2012
[  160.832952] Call Trace:
[  160.832952]  dump_stack+0x85/0xcb
[  160.832952]  ___might_sleep+0x1a3/0x240
[  160.832952]  __flush_work+0x57/0x2e0
[  160.832952]  ? __mutex_lock+0x83/0x990
[  160.832952]  ? __kernfs_remove+0x24f/0x2e0
[  160.832952]  ? __kernfs_remove+0x1b2/0x2e0
[  160.832952]  ? mark_held_locks+0x50/0x80
[  160.832952]  ? get_work_pool+0x90/0x90
[  160.832952]  __cancel_work_timer+0x13c/0x1e0
[  160.832952]  ? netvsc_remove+0x1e/0x250 [hv_netvsc]
[  160.832952]  ? __lock_is_held+0x55/0x90
[  160.832952]  netvsc_remove+0x9a/0x250 [hv_netvsc]
[  160.832952]  vmbus_remove+0x26/0x30
[  160.832952]  device_release_driver_internal+0x18a/0x250
[  160.832952]  unbind_store+0xb4/0x180
[  160.832952]  kernfs_fop_write+0x113/0x1a0
[  160.832952]  __vfs_write+0x36/0x1a0
[  160.832952]  ? rcu_read_lock_sched_held+0x6b/0x80
[  160.832952]  ? rcu_sync_lockdep_assert+0x2e/0x60
[  160.832952]  ? __sb_start_write+0x141/0x1a0
[  160.832952]  ? vfs_write+0x184/0x1b0
[  160.832952]  vfs_write+0xbe/0x1b0
[  160.832952]  ksys_write+0x55/0xc0
[  160.832952]  do_syscall_64+0x60/0x1b0
[  160.832952]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  160.832952] RIP: 0033:0x7fe48f4c8154

Resolve this by getting RTNL earlier. This is safe because the subchannel
work queue does trylock on RTNL and will detect the race.

Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 70921bbe0e28..915fbd66a02b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2272,17 +2272,15 @@ static int netvsc_remove(struct hv_device *dev)
 
 	cancel_delayed_work_sync(&ndev_ctx->dwork);
 
-	rcu_read_lock();
-	nvdev = rcu_dereference(ndev_ctx->nvdev);
-
-	if  (nvdev)
+	rtnl_lock();
+	nvdev = rtnl_dereference(ndev_ctx->nvdev);
+	if (nvdev)
 		cancel_work_sync(&nvdev->subchan_work);
 
 	/*
 	 * Call to the vsc driver to let it know that the device is being
 	 * removed. Also blocks mtu and channel changes.
 	 */
-	rtnl_lock();
 	vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
 	if (vf_netdev)
 		netvsc_unregister_vf(vf_netdev);
@@ -2294,7 +2292,6 @@ static int netvsc_remove(struct hv_device *dev)
 	list_del(&ndev_ctx->list);
 
 	rtnl_unlock();
-	rcu_read_unlock();
 
 	hv_set_drvdata(dev, NULL);
 

From 9824dfae5741275473a23a7ed5756c7b6efacc9d Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Wed, 12 Sep 2018 07:36:35 +0200
Subject: [PATCH 11/45] net/appletalk: fix minor pointer leak to userspace in
 SIOCFINDIPDDPRT

Fields ->dev and ->next of struct ipddp_route may be copied to
userspace on the SIOCFINDIPDDPRT ioctl. This is only accessible
to CAP_NET_ADMIN though. Let's manually copy the relevant fields
instead of using memcpy().

BugLink: http://blog.infosectcbr.com.au/2018/09/linux-kernel-infoleaks.html
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/ipddp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 9375cef22420..3d27616d9c85 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -283,8 +283,12 @@ static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                 case SIOCFINDIPDDPRT:
 			spin_lock_bh(&ipddp_route_lock);
 			rp = __ipddp_find_route(&rcp);
-			if (rp)
-				memcpy(&rcp2, rp, sizeof(rcp2));
+			if (rp) {
+				memset(&rcp2, 0, sizeof(rcp2));
+				rcp2.ip    = rp->ip;
+				rcp2.at    = rp->at;
+				rcp2.flags = rp->flags;
+			}
 			spin_unlock_bh(&ipddp_route_lock);
 
 			if (rp) {

From 56a49d7048703f5ffdb84d3a0ee034108fba6850 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 12 Sep 2018 13:21:48 -0700
Subject: [PATCH 12/45] net: rtnl_configure_link: fix dev flags changes arg to
 __dev_notify_flags

This fix addresses https://bugzilla.kernel.org/show_bug.cgi?id=201071

Commit 5025f7f7d506 wrongly relied on __dev_change_flags to notify users of
dev flag changes in the case when dev->rtnl_link_state = RTNL_LINK_INITIALIZED.
Fix it by indicating flag changes explicitly to __dev_notify_flags.

Fixes: 5025f7f7d506 ("rtnetlink: add rtnl_link_state check in rtnl_configure_link")
Reported-By: Liam mcbirnie <liam.mcbirnie@boeing.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 60c928894a78..63ce2283a456 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2810,7 +2810,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
 	}
 
 	if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
-		__dev_notify_flags(dev, old_flags, 0U);
+		__dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags));
 	} else {
 		dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
 		__dev_notify_flags(dev, old_flags, ~0U);

From f0e0d04413fcce9bc76388839099aee93cd0d33b Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <vasilykh@arista.com>
Date: Thu, 13 Sep 2018 11:12:03 -0700
Subject: [PATCH 13/45] neighbour: confirm neigh entries when ARP packet is
 received

Update 'confirmed' timestamp when ARP packet is received. It shouldn't
affect locktime logic and anyway entry can be confirmed by any higher-layer
protocol. Thus it makes sense to confirm it when ARP packet is received.

Fixes: 77d7123342dc ("neighbour: update neigh timestamps iff update is effective")
Signed-off-by: Vasily Khoruzhick <vasilykh@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index aa19d86937af..91592fceeaad 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1180,6 +1180,12 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		lladdr = neigh->ha;
 	}
 
+	/* Update confirmed timestamp for neighbour entry after we
+	 * received ARP packet even if it doesn't change IP to MAC binding.
+	 */
+	if (new & NUD_CONNECTED)
+		neigh->confirmed = jiffies;
+
 	/* If entry was valid and address is not changed,
 	   do not change entry state, if new one is STALE.
 	 */
@@ -1201,15 +1207,12 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		}
 	}
 
-	/* Update timestamps only once we know we will make a change to the
+	/* Update timestamp only once we know we will make a change to the
 	 * neighbour entry. Otherwise we risk to move the locktime window with
 	 * noop updates and ignore relevant ARP updates.
 	 */
-	if (new != old || lladdr != neigh->ha) {
-		if (new & NUD_CONNECTED)
-			neigh->confirmed = jiffies;
+	if (new != old || lladdr != neigh->ha)
 		neigh->updated = jiffies;
-	}
 
 	if (new != old) {
 		neigh_del_timer(neigh);

From 7cba09c6d5bc73ebbd25a353742d9ddb7a713b95 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 12 Sep 2018 17:44:41 +0200
Subject: [PATCH 14/45] tls: don't copy the key out of
 tls12_crypto_info_aes_gcm_128

There's no need to copy the key to an on-stack buffer before calling
crypto_aead_setkey().

Fixes: 3c4d7559159b ("tls: kernel TLS support")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index e28a6ff25d96..f29b7c49cbf2 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1136,7 +1136,6 @@ void tls_sw_free_resources_rx(struct sock *sk)
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
-	char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
 	struct tls_crypto_info *crypto_info;
 	struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
 	struct tls_sw_context_tx *sw_ctx_tx = NULL;
@@ -1265,9 +1264,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 
 	ctx->push_pending_record = tls_sw_push_pending_record;
 
-	memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
-
-	rc = crypto_aead_setkey(*aead, keyval,
+	rc = crypto_aead_setkey(*aead, gcm_128_info->key,
 				TLS_CIPHER_AES_GCM_128_KEY_SIZE);
 	if (rc)
 		goto free_aead;

From 86029d10af18381814881d6cce2dd6872163b59f Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 12 Sep 2018 17:44:42 +0200
Subject: [PATCH 15/45] tls: zero the crypto information from tls_context
 before freeing

This contains key material in crypto_send_aes_gcm_128 and
crypto_recv_aes_gcm_128.

Introduce union tls_crypto_context, and replace the two identical
unions directly embedded in struct tls_context with it. We can then
use this union to clean up the memory in the new tls_ctx_free()
function.

Fixes: 3c4d7559159b ("tls: kernel TLS support")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h             | 19 +++++++++----------
 net/tls/tls_device.c          |  6 +++---
 net/tls/tls_device_fallback.c |  2 +-
 net/tls/tls_main.c            | 20 +++++++++++++++-----
 net/tls/tls_sw.c              |  8 ++++----
 5 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index d5c683e8bb22..0a769cf2f5f3 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -171,15 +171,14 @@ struct cipher_context {
 	char *rec_seq;
 };
 
+union tls_crypto_context {
+	struct tls_crypto_info info;
+	struct tls12_crypto_info_aes_gcm_128 aes_gcm_128;
+};
+
 struct tls_context {
-	union {
-		struct tls_crypto_info crypto_send;
-		struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128;
-	};
-	union {
-		struct tls_crypto_info crypto_recv;
-		struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128;
-	};
+	union tls_crypto_context crypto_send;
+	union tls_crypto_context crypto_recv;
 
 	struct list_head list;
 	struct net_device *netdev;
@@ -367,8 +366,8 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
 	 * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
 	 */
 	buf[0] = record_type;
-	buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.version);
-	buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.version);
+	buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.info.version);
+	buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.info.version);
 	/* we can use IV for nonce explicit according to spec */
 	buf[3] = pkt_len >> 8;
 	buf[4] = pkt_len & 0xFF;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 292742e50bfa..961b07d4d41c 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -686,7 +686,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 		goto free_marker_record;
 	}
 
-	crypto_info = &ctx->crypto_send;
+	crypto_info = &ctx->crypto_send.info;
 	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128:
 		nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
@@ -780,7 +780,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 
 	ctx->priv_ctx_tx = offload_ctx;
 	rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX,
-					     &ctx->crypto_send,
+					     &ctx->crypto_send.info,
 					     tcp_sk(sk)->write_seq);
 	if (rc)
 		goto release_netdev;
@@ -862,7 +862,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
 		goto release_ctx;
 
 	rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
-					     &ctx->crypto_recv,
+					     &ctx->crypto_recv.info,
 					     tcp_sk(sk)->copied_seq);
 	if (rc) {
 		pr_err_ratelimited("%s: The netdev has refused to offload this socket\n",
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 6102169239d1..450a6dbc5a88 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -320,7 +320,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
 		goto free_req;
 
 	iv = buf;
-	memcpy(iv, tls_ctx->crypto_send_aes_gcm_128.salt,
+	memcpy(iv, tls_ctx->crypto_send.aes_gcm_128.salt,
 	       TLS_CIPHER_AES_GCM_128_SALT_SIZE);
 	aad = buf + TLS_CIPHER_AES_GCM_128_SALT_SIZE +
 	      TLS_CIPHER_AES_GCM_128_IV_SIZE;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 180b6640e531..737b3865be1b 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -241,6 +241,16 @@ static void tls_write_space(struct sock *sk)
 	ctx->sk_write_space(sk);
 }
 
+static void tls_ctx_free(struct tls_context *ctx)
+{
+	if (!ctx)
+		return;
+
+	memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
+	memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
+	kfree(ctx);
+}
+
 static void tls_sk_proto_close(struct sock *sk, long timeout)
 {
 	struct tls_context *ctx = tls_get_ctx(sk);
@@ -294,7 +304,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 #else
 	{
 #endif
-		kfree(ctx);
+		tls_ctx_free(ctx);
 		ctx = NULL;
 	}
 
@@ -305,7 +315,7 @@ skip_tx_cleanup:
 	 * for sk->sk_prot->unhash [tls_hw_unhash]
 	 */
 	if (free_ctx)
-		kfree(ctx);
+		tls_ctx_free(ctx);
 }
 
 static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
@@ -330,7 +340,7 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
 	}
 
 	/* get user crypto info */
-	crypto_info = &ctx->crypto_send;
+	crypto_info = &ctx->crypto_send.info;
 
 	if (!TLS_CRYPTO_INFO_READY(crypto_info)) {
 		rc = -EBUSY;
@@ -417,9 +427,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 	}
 
 	if (tx)
-		crypto_info = &ctx->crypto_send;
+		crypto_info = &ctx->crypto_send.info;
 	else
-		crypto_info = &ctx->crypto_recv;
+		crypto_info = &ctx->crypto_recv.info;
 
 	/* Currently we don't support set crypto info more than one time */
 	if (TLS_CRYPTO_INFO_READY(crypto_info)) {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index f29b7c49cbf2..9e918489f4fb 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1055,8 +1055,8 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 		goto read_failure;
 	}
 
-	if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.version) ||
-	    header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.version)) {
+	if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.info.version) ||
+	    header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.info.version)) {
 		ret = -EINVAL;
 		goto read_failure;
 	}
@@ -1180,12 +1180,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 
 	if (tx) {
 		crypto_init_wait(&sw_ctx_tx->async_wait);
-		crypto_info = &ctx->crypto_send;
+		crypto_info = &ctx->crypto_send.info;
 		cctx = &ctx->tx;
 		aead = &sw_ctx_tx->aead_send;
 	} else {
 		crypto_init_wait(&sw_ctx_rx->async_wait);
-		crypto_info = &ctx->crypto_recv;
+		crypto_info = &ctx->crypto_recv.info;
 		cctx = &ctx->rx;
 		aead = &sw_ctx_rx->aead_recv;
 	}

From c844eb46b7d43c2cf760169df5ae1d5b033af338 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 12 Sep 2018 17:44:43 +0200
Subject: [PATCH 16/45] tls: clear key material from kernel memory when
 do_tls_setsockopt_conf fails

Fixes: 3c4d7559159b ("tls: kernel TLS support")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 737b3865be1b..523622dc74f8 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -509,7 +509,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 	goto out;
 
 err_crypto_info:
-	memset(crypto_info, 0, sizeof(*crypto_info));
+	memzero_explicit(crypto_info, sizeof(union tls_crypto_context));
 out:
 	return rc;
 }

From c56cae23c6b167acc68043c683c4573b80cbcc2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@toke.dk>
Date: Thu, 13 Sep 2018 16:43:07 +0200
Subject: [PATCH 17/45] gso_segment: Reset skb->mac_len after modifying network
 header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When splitting a GSO segment that consists of encapsulated packets, the
skb->mac_len of the segments can end up being set wrong, causing packet
drops in particular when using act_mirred and ifb interfaces in
combination with a qdisc that splits GSO packets.

This happens because at the time skb_segment() is called, network_header
will point to the inner header, throwing off the calculation in
skb_reset_mac_len(). The network_header is subsequently adjust by the
outer IP gso_segment handlers, but they don't set the mac_len.

Fix this by adding skb_reset_mac_len() calls to both the IPv4 and IPv6
gso_segment handlers, after they modify the network_header.

Many thanks to Eric Dumazet for his help in identifying the cause of
the bug.

Acked-by: Dave Taht <dave.taht@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c     | 1 +
 net/ipv6/ip6_offload.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 20fda8fb8ffd..1fbe2f815474 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1377,6 +1377,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		if (encap)
 			skb_reset_inner_headers(skb);
 		skb->network_header = (u8 *)iph - skb->head;
+		skb_reset_mac_len(skb);
 	} while ((skb = skb->next));
 
 out:
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 37ff4805b20c..c7e495f12011 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -115,6 +115,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 			payload_len = skb->len - nhoff - sizeof(*ipv6h);
 		ipv6h->payload_len = htons(payload_len);
 		skb->network_header = (u8 *)ipv6h - skb->head;
+		skb_reset_mac_len(skb);
 
 		if (udpfrag) {
 			int err = ip6_find_1stfragopt(skb, &prevhdr);

From 1cebf8f143c21eb422cd0f4e27ab2ae366eb4d04 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 13 Sep 2018 14:40:55 +0200
Subject: [PATCH 18/45] socket: fix struct ifreq size in compat ioctl

As reported by Reobert O'Callahan, since Viro's commit to kill
dev_ifsioc() we attempt to copy too much data in compat mode,
which may lead to EFAULT when the 32-bit version of struct ifreq
sits at/near the end of a page boundary, and the next page isn't
mapped.

Fix this by passing the approprate compat/non-compat size to copy
and using that, as before the dev_ifsioc() removal. This works
because only the embedded "struct ifmap" has different size, and
this is only used in SIOCGIFMAP/SIOCSIFMAP which has a different
handler. All other parts of the union are naturally compatible.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=199469.

Fixes: bf4405737f9f ("kill dev_ifsioc()")
Reported-by: Robert O'Callahan <robert@ocallahan.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index e6945e318f02..01f3f8f32d6f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -941,7 +941,8 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 static long sock_do_ioctl(struct net *net, struct socket *sock,
-				 unsigned int cmd, unsigned long arg)
+			  unsigned int cmd, unsigned long arg,
+			  unsigned int ifreq_size)
 {
 	int err;
 	void __user *argp = (void __user *)arg;
@@ -967,11 +968,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
 	} else {
 		struct ifreq ifr;
 		bool need_copyout;
-		if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+		if (copy_from_user(&ifr, argp, ifreq_size))
 			return -EFAULT;
 		err = dev_ioctl(net, cmd, &ifr, &need_copyout);
 		if (!err && need_copyout)
-			if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+			if (copy_to_user(argp, &ifr, ifreq_size))
 				return -EFAULT;
 	}
 	return err;
@@ -1070,7 +1071,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			err = open_related_ns(&net->ns, get_net_ns);
 			break;
 		default:
-			err = sock_do_ioctl(net, sock, cmd, arg);
+			err = sock_do_ioctl(net, sock, cmd, arg,
+					    sizeof(struct ifreq));
 			break;
 		}
 	return err;
@@ -2750,7 +2752,8 @@ static int do_siocgstamp(struct net *net, struct socket *sock,
 	int err;
 
 	set_fs(KERNEL_DS);
-	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
+	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv,
+			    sizeof(struct compat_ifreq));
 	set_fs(old_fs);
 	if (!err)
 		err = compat_put_timeval(&ktv, up);
@@ -2766,7 +2769,8 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
 	int err;
 
 	set_fs(KERNEL_DS);
-	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
+	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts,
+			    sizeof(struct compat_ifreq));
 	set_fs(old_fs);
 	if (!err)
 		err = compat_put_timespec(&kts, up);
@@ -3072,7 +3076,8 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 	}
 
 	set_fs(KERNEL_DS);
-	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
+	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r,
+			    sizeof(struct compat_ifreq));
 	set_fs(old_fs);
 
 out:
@@ -3185,7 +3190,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
 	case SIOCBONDSETHWADDR:
 	case SIOCBONDCHANGEACTIVE:
 	case SIOCGIFNAME:
-		return sock_do_ioctl(net, sock, cmd, arg);
+		return sock_do_ioctl(net, sock, cmd, arg,
+				     sizeof(struct compat_ifreq));
 	}
 
 	return -ENOIOCTLCMD;

From 34043d250f51368f214aed7f54c2dc29c819a8c7 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 14 Sep 2018 12:03:18 +0200
Subject: [PATCH 19/45] net/sched: act_sample: fix NULL dereference in the data
 path

Matteo reported the following splat, testing the datapath of TC 'sample':

 BUG: KASAN: null-ptr-deref in tcf_sample_act+0xc4/0x310
 Read of size 8 at addr 0000000000000000 by task nc/433

 CPU: 0 PID: 433 Comm: nc Not tainted 4.19.0-rc3-kvm #17
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014
 Call Trace:
  kasan_report.cold.6+0x6c/0x2fa
  tcf_sample_act+0xc4/0x310
  ? dev_hard_start_xmit+0x117/0x180
  tcf_action_exec+0xa3/0x160
  tcf_classify+0xdd/0x1d0
  htb_enqueue+0x18e/0x6b0
  ? deref_stack_reg+0x7a/0xb0
  ? htb_delete+0x4b0/0x4b0
  ? unwind_next_frame+0x819/0x8f0
  ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
  __dev_queue_xmit+0x722/0xca0
  ? unwind_get_return_address_ptr+0x50/0x50
  ? netdev_pick_tx+0xe0/0xe0
  ? save_stack+0x8c/0xb0
  ? kasan_kmalloc+0xbe/0xd0
  ? __kmalloc_track_caller+0xe4/0x1c0
  ? __kmalloc_reserve.isra.45+0x24/0x70
  ? __alloc_skb+0xdd/0x2e0
  ? sk_stream_alloc_skb+0x91/0x3b0
  ? tcp_sendmsg_locked+0x71b/0x15a0
  ? tcp_sendmsg+0x22/0x40
  ? __sys_sendto+0x1b0/0x250
  ? __x64_sys_sendto+0x6f/0x80
  ? do_syscall_64+0x5d/0x150
  ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
  ? __sys_sendto+0x1b0/0x250
  ? __x64_sys_sendto+0x6f/0x80
  ? do_syscall_64+0x5d/0x150
  ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
  ip_finish_output2+0x495/0x590
  ? ip_copy_metadata+0x2e0/0x2e0
  ? skb_gso_validate_network_len+0x6f/0x110
  ? ip_finish_output+0x174/0x280
  __tcp_transmit_skb+0xb17/0x12b0
  ? __tcp_select_window+0x380/0x380
  tcp_write_xmit+0x913/0x1de0
  ? __sk_mem_schedule+0x50/0x80
  tcp_sendmsg_locked+0x49d/0x15a0
  ? tcp_rcv_established+0x8da/0xa30
  ? tcp_set_state+0x220/0x220
  ? clear_user+0x1f/0x50
  ? iov_iter_zero+0x1ae/0x590
  ? __fget_light+0xa0/0xe0
  tcp_sendmsg+0x22/0x40
  __sys_sendto+0x1b0/0x250
  ? __ia32_sys_getpeername+0x40/0x40
  ? _copy_to_user+0x58/0x70
  ? poll_select_copy_remaining+0x176/0x200
  ? __pollwait+0x1c0/0x1c0
  ? ktime_get_ts64+0x11f/0x140
  ? kern_select+0x108/0x150
  ? core_sys_select+0x360/0x360
  ? vfs_read+0x127/0x150
  ? kernel_write+0x90/0x90
  __x64_sys_sendto+0x6f/0x80
  do_syscall_64+0x5d/0x150
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7fefef2b129d
 Code: ff ff ff ff eb b6 0f 1f 80 00 00 00 00 48 8d 05 51 37 0c 00 41 89 ca 8b 00 85 c0 75 20 45 31 c9 45 31 c0 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 6b f3 c3 66 0f 1f 84 00 00 00 00 00 41 56 41
 RSP: 002b:00007fff2f5350c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 000056118d60c120 RCX: 00007fefef2b129d
 RDX: 0000000000002000 RSI: 000056118d629320 RDI: 0000000000000003
 RBP: 000056118d530370 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000002000
 R13: 000056118d5c2a10 R14: 000056118d5c2a10 R15: 000056118d5303b8

tcf_sample_act() tried to update its per-cpu stats, but tcf_sample_init()
forgot to allocate them, because tcf_idr_create() was called with a wrong
value of 'cpustats'. Setting it to true proved to fix the reported crash.

Reported-by: Matteo Croce <mcroce@redhat.com>
Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action")
Tested-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_sample.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 44e9c00657bc..6b67aa13d2dd 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -69,7 +69,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
-				     &act_sample_ops, bind, false);
+				     &act_sample_ops, bind, true);
 		if (ret) {
 			tcf_idr_cleanup(tn, parm->index);
 			return ret;

From edf2ef7242805e53ec2e0841db26e06d8bc7da70 Mon Sep 17 00:00:00 2001
From: Jongsung Kim <neidhard.kim@lge.com>
Date: Thu, 13 Sep 2018 18:32:21 +0900
Subject: [PATCH 20/45] stmmac: fix valid numbers of unicast filter entries

Synopsys DWC Ethernet MAC can be configured to have 1..32, 64, or
128 unicast filter entries. (Table 7-8 MAC Address Registers from
databook) Fix dwmac1000_validate_ucast_entries() to accept values
between 1 and 32 in addition.

Signed-off-by: Jongsung Kim <neidhard.kim@lge.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 3609c7b696c7..2b800ce1d5bf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -67,7 +67,7 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins)
  * Description:
  * This function validates the number of Unicast address entries supported
  * by a particular Synopsys 10/100/1000 controller. The Synopsys controller
- * supports 1, 32, 64, or 128 Unicast filter entries for it's Unicast filter
+ * supports 1..32, 64, or 128 Unicast filter entries for it's Unicast filter
  * logic. This function validates a valid, supported configuration is
  * selected, and defaults to 1 Unicast address if an unsupported
  * configuration is selected.
@@ -77,8 +77,7 @@ static int dwmac1000_validate_ucast_entries(int ucast_entries)
 	int x = ucast_entries;
 
 	switch (x) {
-	case 1:
-	case 32:
+	case 1 ... 32:
 	case 64:
 	case 128:
 		break;

From 2b5a921740a55c00223a797d075b9c77c42cb171 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 13 Sep 2018 16:27:20 +0200
Subject: [PATCH 21/45] udp4: fix IP_CMSG_CHECKSUM for connected sockets

commit 2abb7cdc0dc8 ("udp: Add support for doing checksum
unnecessary conversion") left out the early demux path for
connected sockets. As a result IP_CMSG_CHECKSUM gives wrong
values for such socket when GRO is not enabled/available.

This change addresses the issue by moving the csum conversion to a
common helper and using such helper in both the default and the
early demux rx path.

Fixes: 2abb7cdc0dc8 ("udp: Add support for doing checksum unnecessary conversion")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 49 ++++++++++++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f4e35b2ff8b8..7d69dd6fa7e8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2124,6 +2124,28 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 							 inet_compute_pseudo);
 }
 
+/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
+ * return code conversion for ip layer consumption
+ */
+static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
+			       struct udphdr *uh)
+{
+	int ret;
+
+	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
+		skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+					 inet_compute_pseudo);
+
+	ret = udp_queue_rcv_skb(sk, skb);
+
+	/* a return value > 0 means to resubmit the input, but
+	 * it wants the return to be -protocol, or 0
+	 */
+	if (ret > 0)
+		return -ret;
+	return 0;
+}
+
 /*
  *	All we need to do is get the socket, and then do a checksum.
  */
@@ -2170,14 +2192,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		if (unlikely(sk->sk_rx_dst != dst))
 			udp_sk_rx_dst_set(sk, dst);
 
-		ret = udp_queue_rcv_skb(sk, skb);
+		ret = udp_unicast_rcv_skb(sk, skb, uh);
 		sock_put(sk);
-		/* a return value > 0 means to resubmit the input, but
-		 * it wants the return to be -protocol, or 0
-		 */
-		if (ret > 0)
-			return -ret;
-		return 0;
+		return ret;
 	}
 
 	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
@@ -2185,22 +2202,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 						saddr, daddr, udptable, proto);
 
 	sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
-	if (sk) {
-		int ret;
-
-		if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
-			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
-						 inet_compute_pseudo);
-
-		ret = udp_queue_rcv_skb(sk, skb);
-
-		/* a return value > 0 means to resubmit the input, but
-		 * it wants the return to be -protocol, or 0
-		 */
-		if (ret > 0)
-			return -ret;
-		return 0;
-	}
+	if (sk)
+		return udp_unicast_rcv_skb(sk, skb, uh);
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto drop;

From eb63f2964dbe36f26deac77d3016791675821ded Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 13 Sep 2018 16:27:21 +0200
Subject: [PATCH 22/45] udp6: add missing checks on edumux packet processing

Currently the UDPv6 early demux rx code path lacks some mandatory
checks, already implemented into the normal RX code path - namely
the checksum conversion and no_check6_rx check.

Similar to the previous commit, we move the common processing to
an UDPv6 specific helper and call it from both edemux code path
and normal code path. In respect to the UDPv4, we need to add an
explicit check for non zero csum according to no_check6_rx value.

Reported-by: Jianlin Shi <jishi@redhat.com>
Suggested-by: Xin Long <lucien.xin@gmail.com>
Fixes: c9f2c1ae123a ("udp6: fix socket leak on early demux")
Fixes: 2abb7cdc0dc8 ("udp: Add support for doing checksum unnecessary conversion")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 65 ++++++++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 83f4c77c79d8..28c4aa5078fc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -752,6 +752,28 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 	}
 }
 
+/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
+ * return code conversion for ip layer consumption
+ */
+static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
+				struct udphdr *uh)
+{
+	int ret;
+
+	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
+		skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+					 ip6_compute_pseudo);
+
+	ret = udpv6_queue_rcv_skb(sk, skb);
+
+	/* a return value > 0 means to resubmit the input, but
+	 * it wants the return to be -protocol, or 0
+	 */
+	if (ret > 0)
+		return -ret;
+	return 0;
+}
+
 int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
@@ -803,13 +825,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		if (unlikely(sk->sk_rx_dst != dst))
 			udp6_sk_rx_dst_set(sk, dst);
 
-		ret = udpv6_queue_rcv_skb(sk, skb);
-		sock_put(sk);
+		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+			sock_put(sk);
+			goto report_csum_error;
+		}
 
-		/* a return value > 0 means to resubmit the input */
-		if (ret > 0)
-			return ret;
-		return 0;
+		ret = udp6_unicast_rcv_skb(sk, skb, uh);
+		sock_put(sk);
+		return ret;
 	}
 
 	/*
@@ -822,30 +845,13 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	/* Unicast */
 	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
 	if (sk) {
-		int ret;
-
-		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
-			udp6_csum_zero_error(skb);
-			goto csum_error;
-		}
-
-		if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
-			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
-						 ip6_compute_pseudo);
-
-		ret = udpv6_queue_rcv_skb(sk, skb);
-
-		/* a return value > 0 means to resubmit the input */
-		if (ret > 0)
-			return ret;
-
-		return 0;
+		if (!uh->check && !udp_sk(sk)->no_check6_rx)
+			goto report_csum_error;
+		return udp6_unicast_rcv_skb(sk, skb, uh);
 	}
 
-	if (!uh->check) {
-		udp6_csum_zero_error(skb);
-		goto csum_error;
-	}
+	if (!uh->check)
+		goto report_csum_error;
 
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard;
@@ -866,6 +872,9 @@ short_packet:
 			    ulen, skb->len,
 			    daddr, ntohs(uh->dest));
 	goto discard;
+
+report_csum_error:
+	udp6_csum_zero_error(skb);
 csum_error:
 	__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
 discard:

From 4bf9ffa0fb5744ed40d7348c24fa9ae398b1d603 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 14 Sep 2018 13:33:44 +0900
Subject: [PATCH 23/45] veth: Orphan skb before GRO

GRO expects skbs not to be owned by sockets, but when XDP is enabled veth
passed skbs owned by sockets. It caused corrupted sk_wmem_alloc.

Paolo Abeni reported the following splat:

[  362.098904] refcount_t overflow at skb_set_owner_w+0x5e/0xa0 in iperf3[1644], uid/euid: 0/0
[  362.108239] WARNING: CPU: 0 PID: 1644 at kernel/panic.c:648 refcount_error_report+0xa0/0xa4
[  362.117547] Modules linked in: tcp_diag inet_diag veth intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_cstate intel_uncore intel_rapl_perf ipmi_ssif iTCO_wdt sg ipmi_si iTCO_vendor_support ipmi_devintf mxm_wmi ipmi_msghandler pcspkr dcdbas mei_me wmi mei lpc_ich acpi_power_meter pcc_cpufreq xfs libcrc32c sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ixgbe igb ttm ahci mdio libahci ptp crc32c_intel drm pps_core libata i2c_algo_bit dca dm_mirror dm_region_hash dm_log dm_mod
[  362.176622] CPU: 0 PID: 1644 Comm: iperf3 Not tainted 4.19.0-rc2.vanilla+ #2025
[  362.184777] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.1.7 06/16/2016
[  362.193124] RIP: 0010:refcount_error_report+0xa0/0xa4
[  362.198758] Code: 08 00 00 48 8b 95 80 00 00 00 49 8d 8c 24 80 0a 00 00 41 89 c1 44 89 2c 24 48 89 de 48 c7 c7 18 4d e7 9d 31 c0 e8 30 fa ff ff <0f> 0b eb 88 0f 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 49 89 fc
[  362.219711] RSP: 0018:ffff9ee6ff603c20 EFLAGS: 00010282
[  362.225538] RAX: 0000000000000000 RBX: ffffffff9de83e10 RCX: 0000000000000000
[  362.233497] RDX: 0000000000000001 RSI: ffff9ee6ff6167d8 RDI: ffff9ee6ff6167d8
[  362.241457] RBP: ffff9ee6ff603d78 R08: 0000000000000490 R09: 0000000000000004
[  362.249416] R10: 0000000000000000 R11: ffff9ee6ff603990 R12: ffff9ee664b94500
[  362.257377] R13: 0000000000000000 R14: 0000000000000004 R15: ffffffff9de615f9
[  362.265337] FS:  00007f1d22d28740(0000) GS:ffff9ee6ff600000(0000) knlGS:0000000000000000
[  362.274363] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  362.280773] CR2: 00007f1d222f35d0 CR3: 0000001fddfec003 CR4: 00000000001606f0
[  362.288733] Call Trace:
[  362.291459]  <IRQ>
[  362.293702]  ex_handler_refcount+0x4e/0x80
[  362.298269]  fixup_exception+0x35/0x40
[  362.302451]  do_trap+0x109/0x150
[  362.306048]  do_error_trap+0xd5/0x130
[  362.315766]  invalid_op+0x14/0x20
[  362.319460] RIP: 0010:skb_set_owner_w+0x5e/0xa0
[  362.324512] Code: ef ff ff 74 49 48 c7 43 60 20 7b 4a 9d 8b 85 f4 01 00 00 85 c0 75 16 8b 83 e0 00 00 00 f0 01 85 44 01 00 00 0f 88 d8 23 16 00 <5b> 5d c3 80 8b 91 00 00 00 01 8b 85 f4 01 00 00 89 83 a4 00 00 00
[  362.345465] RSP: 0018:ffff9ee6ff603e20 EFLAGS: 00010a86
[  362.351291] RAX: 0000000000001100 RBX: ffff9ee65deec700 RCX: ffff9ee65e829244
[  362.359250] RDX: 0000000000000100 RSI: ffff9ee65e829100 RDI: ffff9ee65deec700
[  362.367210] RBP: ffff9ee65e829100 R08: 000000000002a380 R09: 0000000000000000
[  362.375169] R10: 0000000000000002 R11: fffff1a4bf77bb00 R12: ffffc0754661d000
[  362.383130] R13: ffff9ee65deec200 R14: ffff9ee65f597000 R15: 00000000000000aa
[  362.391092]  veth_xdp_rcv+0x4e4/0x890 [veth]
[  362.399357]  veth_poll+0x4d/0x17a [veth]
[  362.403731]  net_rx_action+0x2af/0x3f0
[  362.407912]  __do_softirq+0xdd/0x29e
[  362.411897]  do_softirq_own_stack+0x2a/0x40
[  362.416561]  </IRQ>
[  362.418899]  do_softirq+0x4b/0x70
[  362.422594]  __local_bh_enable_ip+0x50/0x60
[  362.427258]  ip_finish_output2+0x16a/0x390
[  362.431824]  ip_output+0x71/0xe0
[  362.440670]  __tcp_transmit_skb+0x583/0xab0
[  362.445333]  tcp_write_xmit+0x247/0xfb0
[  362.449609]  __tcp_push_pending_frames+0x2d/0xd0
[  362.454760]  tcp_sendmsg_locked+0x857/0xd30
[  362.459424]  tcp_sendmsg+0x27/0x40
[  362.463216]  sock_sendmsg+0x36/0x50
[  362.467104]  sock_write_iter+0x87/0x100
[  362.471382]  __vfs_write+0x112/0x1a0
[  362.475369]  vfs_write+0xad/0x1a0
[  362.479062]  ksys_write+0x52/0xc0
[  362.482759]  do_syscall_64+0x5b/0x180
[  362.486841]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  362.492473] RIP: 0033:0x7f1d22293238
[  362.496458] Code: 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 c5 54 2d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 41 54 49 89 d4 55
[  362.517409] RSP: 002b:00007ffebaef8008 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  362.525855] RAX: ffffffffffffffda RBX: 0000000000002800 RCX: 00007f1d22293238
[  362.533816] RDX: 0000000000002800 RSI: 00007f1d22d36000 RDI: 0000000000000005
[  362.541775] RBP: 00007f1d22d36000 R08: 00000002db777a30 R09: 0000562b70712b20
[  362.549734] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000005
[  362.557693] R13: 0000000000002800 R14: 00007ffebaef8060 R15: 0000562b70712260

In order to avoid this, orphan the skb before entering GRO.

Fixes: 948d4f214fde ("veth: Add driver XDP")
Reported-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Tested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 8d679c8b7f25..41a00cd76955 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -463,6 +463,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
 	int mac_len, delta, off;
 	struct xdp_buff xdp;
 
+	skb_orphan(skb);
+
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (unlikely(!xdp_prog)) {
@@ -508,8 +510,6 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
 		skb_copy_header(nskb, skb);
 		head_off = skb_headroom(nskb) - skb_headroom(skb);
 		skb_headers_offset_update(nskb, head_off);
-		if (skb->sk)
-			skb_set_owner_w(nskb, skb->sk);
 		consume_skb(skb);
 		skb = nskb;
 	}

From f025571e96caa95ffc3c1792f762a584893de582 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Fri, 14 Sep 2018 11:20:07 +0000
Subject: [PATCH 24/45] net: ethernet: ti: add missing GENERIC_ALLOCATOR
 dependency

This patch mades TI_DAVINCI_CPDMA select GENERIC_ALLOCATOR.
without that, the following sparc64 build failure happen

drivers/net/ethernet/ti/davinci_cpdma.o: In function `cpdma_check_free_tx_desc':
(.text+0x278): undefined reference to `gen_pool_avail'
drivers/net/ethernet/ti/davinci_cpdma.o: In function `cpdma_chan_submit':
(.text+0x340): undefined reference to `gen_pool_alloc'
(.text+0x5c4): undefined reference to `gen_pool_free'
drivers/net/ethernet/ti/davinci_cpdma.o: In function `__cpdma_chan_free':
davinci_cpdma.c:(.text+0x64c): undefined reference to `gen_pool_free'
drivers/net/ethernet/ti/davinci_cpdma.o: In function `cpdma_desc_pool_destroy.isra.6':
davinci_cpdma.c:(.text+0x17ac): undefined reference to `gen_pool_size'
davinci_cpdma.c:(.text+0x17b8): undefined reference to `gen_pool_avail'
davinci_cpdma.c:(.text+0x1824): undefined reference to `gen_pool_size'
davinci_cpdma.c:(.text+0x1830): undefined reference to `gen_pool_avail'
drivers/net/ethernet/ti/davinci_cpdma.o: In function `cpdma_ctlr_create':
(.text+0x19f8): undefined reference to `devm_gen_pool_create'
(.text+0x1a90): undefined reference to `gen_pool_add_virt'
Makefile:1011: recipe for target 'vmlinux' failed

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 9263d638bd6d..f932923f7d56 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -41,6 +41,7 @@ config TI_DAVINCI_MDIO
 config TI_DAVINCI_CPDMA
 	tristate "TI DaVinci CPDMA Support"
 	depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
+	select GENERIC_ALLOCATOR
 	---help---
 	  This driver supports TI's DaVinci CPDMA dma engine.
 

From 8540827ebac6b654ab2f69c8fbce9e4fbd6304a0 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 14 Sep 2018 16:28:05 +0200
Subject: [PATCH 25/45] pppoe: fix reception of frames with no mac header

pppoe_rcv() needs to look back at the Ethernet header in order to
lookup the PPPoE session. Therefore we need to ensure that the mac
header is big enough to contain an Ethernet header. Otherwise
eth_hdr(skb)->h_source might access invalid data.

==================================================================
BUG: KMSAN: uninit-value in __get_item drivers/net/ppp/pppoe.c:172 [inline]
BUG: KMSAN: uninit-value in get_item drivers/net/ppp/pppoe.c:236 [inline]
BUG: KMSAN: uninit-value in pppoe_rcv+0xcef/0x10e0 drivers/net/ppp/pppoe.c:450
CPU: 0 PID: 4543 Comm: syz-executor355 Not tainted 4.16.0+ #87
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x185/0x1d0 lib/dump_stack.c:53
 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683
 __get_item drivers/net/ppp/pppoe.c:172 [inline]
 get_item drivers/net/ppp/pppoe.c:236 [inline]
 pppoe_rcv+0xcef/0x10e0 drivers/net/ppp/pppoe.c:450
 __netif_receive_skb_core+0x47df/0x4a90 net/core/dev.c:4562
 __netif_receive_skb net/core/dev.c:4627 [inline]
 netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701
 netif_receive_skb+0x230/0x240 net/core/dev.c:4725
 tun_rx_batched drivers/net/tun.c:1555 [inline]
 tun_get_user+0x740f/0x7c60 drivers/net/tun.c:1962
 tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
 call_write_iter include/linux/fs.h:1782 [inline]
 new_sync_write fs/read_write.c:469 [inline]
 __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
 vfs_write+0x463/0x8d0 fs/read_write.c:544
 SYSC_write+0x172/0x360 fs/read_write.c:589
 SyS_write+0x55/0x80 fs/read_write.c:581
 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2
RIP: 0033:0x4447c9
RSP: 002b:00007fff64c8fc28 EFLAGS: 00000297 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004447c9
RDX: 000000000000fd87 RSI: 0000000020000600 RDI: 0000000000000004
RBP: 00000000006cf018 R08: 00007fff64c8fda8 R09: 00007fff00006bda
R10: 0000000000005fe7 R11: 0000000000000297 R12: 00000000004020d0
R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
 kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
 slab_post_alloc_hook mm/slab.h:445 [inline]
 slab_alloc_node mm/slub.c:2737 [inline]
 __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
 __kmalloc_reserve net/core/skbuff.c:138 [inline]
 __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
 alloc_skb include/linux/skbuff.h:984 [inline]
 alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
 sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
 tun_alloc_skb drivers/net/tun.c:1532 [inline]
 tun_get_user+0x2242/0x7c60 drivers/net/tun.c:1829
 tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
 call_write_iter include/linux/fs.h:1782 [inline]
 new_sync_write fs/read_write.c:469 [inline]
 __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
 vfs_write+0x463/0x8d0 fs/read_write.c:544
 SYSC_write+0x172/0x360 fs/read_write.c:589
 SyS_write+0x55/0x80 fs/read_write.c:581
 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2
==================================================================

Fixes: 224cf5ad14c0 ("ppp: Move the PPP drivers")
Reported-by: syzbot+f5f6080811c849739212@syzkaller.appspotmail.com
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pppoe.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index ce61231e96ea..62dc564b251d 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -429,6 +429,9 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb)
 		goto out;
 
+	if (skb_mac_header_len(skb) < ETH_HLEN)
+		goto drop;
+
 	if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
 		goto drop;
 

From 41948ccb4a856dddacfbd4d789d4fa8663fe41bb Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Fri, 14 Sep 2018 16:56:35 +0200
Subject: [PATCH 26/45] net: mvpp2: let phylink manage the carrier state

Net drivers using phylink shouldn't mess with the link carrier
themselves and should let phylink manage it. The mvpp2 driver wasn't
following this best practice as the mac_config() function made calls to
change the link carrier state. This led to wrongly reported carrier link
state which then triggered other issues. This patch fixes this
behaviour.

But the PPv2 driver relied on this misbehaviour in two cases: for fixed
links and when not using phylink (ACPI mode). The later was fixed by
adding an explicit call to link_up(), which when the ACPI mode will use
phylink should be removed.

The fixed link case was relying on the mac_config() function to set the
link up, as we found an issue in phylink_start() which assumes the
carrier is off. If not, the link_up() function is never called. To fix
this, a call to netif_carrier_off() is added just before phylink_start()
so that we do not introduce a regression in the driver.

Fixes: 4bb043262878 ("net: mvpp2: phylink support")
Reported-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 21 ++++++-------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 28500417843e..702fec82d806 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -58,6 +58,8 @@ static struct {
  */
 static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
 			     const struct phylink_link_state *state);
+static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+			      phy_interface_t interface, struct phy_device *phy);
 
 /* Queue modes */
 #define MVPP2_QDIST_SINGLE_MODE	0
@@ -3142,6 +3144,7 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 		mvpp22_mode_reconfigure(port);
 
 	if (port->phylink) {
+		netif_carrier_off(port->dev);
 		phylink_start(port->phylink);
 	} else {
 		/* Phylink isn't used as of now for ACPI, so the MAC has to be
@@ -3150,9 +3153,10 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 		 */
 		struct phylink_link_state state = {
 			.interface = port->phy_interface,
-			.link = 1,
 		};
 		mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state);
+		mvpp2_mac_link_up(port->dev, MLO_AN_INBAND, port->phy_interface,
+				  NULL);
 	}
 
 	netif_tx_start_all_queues(port->dev);
@@ -4495,10 +4499,6 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
 		return;
 	}
 
-	netif_tx_stop_all_queues(port->dev);
-	if (!port->has_phy)
-		netif_carrier_off(port->dev);
-
 	/* Make sure the port is disabled when reconfiguring the mode */
 	mvpp2_port_disable(port);
 
@@ -4523,16 +4523,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
 	if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
 		mvpp2_port_loopback_set(port, state);
 
-	/* If the port already was up, make sure it's still in the same state */
-	if (state->link || !port->has_phy) {
-		mvpp2_port_enable(port);
-
-		mvpp2_egress_enable(port);
-		mvpp2_ingress_enable(port);
-		if (!port->has_phy)
-			netif_carrier_on(dev);
-		netif_tx_wake_all_queues(dev);
-	}
+	mvpp2_port_enable(port);
 }
 
 static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,

From eb4ed8e2d7fecb5f40db38e4498b9ee23cddf196 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Fri, 14 Sep 2018 17:48:10 +0200
Subject: [PATCH 27/45] net: macb: disable scatter-gather for macb on sama5d3

Create a new configuration for the sama5d3-macb new compatibility string.
This configuration disables scatter-gather because we experienced lock down
of the macb interface of this particular SoC under very high load.

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 16e4ef7d7185..f1a86b422617 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3837,6 +3837,13 @@ static const struct macb_config at91sam9260_config = {
 	.init = macb_init,
 };
 
+static const struct macb_config sama5d3macb_config = {
+	.caps = MACB_CAPS_SG_DISABLED
+	      | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII,
+	.clk_init = macb_clk_init,
+	.init = macb_init,
+};
+
 static const struct macb_config pc302gem_config = {
 	.caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE,
 	.dma_burst_length = 16,
@@ -3904,6 +3911,7 @@ static const struct of_device_id macb_dt_ids[] = {
 	{ .compatible = "cdns,gem", .data = &pc302gem_config },
 	{ .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config },
 	{ .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config },
+	{ .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config },
 	{ .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config },
 	{ .compatible = "cdns,at91rm9200-emac", .data = &emac_config },
 	{ .compatible = "cdns,emac", .data = &emac_config },

From 321cc359d899a8e988f3725d87c18a628e1cc624 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Fri, 14 Sep 2018 17:48:11 +0200
Subject: [PATCH 28/45] ARM: dts: at91: add new compatibility string for macb
 on sama5d3

We need this new compatibility string as we experienced different behavior
for this 10/100Mbits/s macb interface on this particular SoC.
Backward compatibility is preserved as we keep the alternative strings.

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/macb.txt | 1 +
 arch/arm/boot/dts/sama5d3_emac.dtsi            | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 457d5ae16f23..3e17ac1d5d58 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -10,6 +10,7 @@ Required properties:
   Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on
   the Cadence GEM, or the generic form: "cdns,gem".
   Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs.
+  Use "atmel,sama5d3-macb" for the 10/100Mbit IP available on Atmel sama5d3 SoCs.
   Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs.
   Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs.
   Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC.
diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi
index 7cb235ef0fb6..6e9e1c2f9def 100644
--- a/arch/arm/boot/dts/sama5d3_emac.dtsi
+++ b/arch/arm/boot/dts/sama5d3_emac.dtsi
@@ -41,7 +41,7 @@
 			};
 
 			macb1: ethernet@f802c000 {
-				compatible = "cdns,at91sam9260-macb", "cdns,macb";
+				compatible = "atmel,sama5d3-macb", "cdns,at91sam9260-macb", "cdns,macb";
 				reg = <0xf802c000 0x100>;
 				interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>;
 				pinctrl-names = "default";

From a7f38002fb69b44f8fc622ecb838665d0b8666af Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 14 Sep 2018 17:39:53 +0100
Subject: [PATCH 29/45] net: hp100: fix always-true check for link up state

The operation ~(p100_inb(VG_LAN_CFG_1) & HP100_LINK_UP) returns a value
that is always non-zero and hence the wait for the link to drop always
terminates prematurely.  Fix this by using a logical not operator instead
of a bitwise complement.  This issue has been in the driver since
pre-2.6.12-rc2.

Detected by CoverityScan, CID#114157 ("Logical vs. bitwise operator")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hp/hp100.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index c8c7ad2eff77..9b5a68b65432 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -2634,7 +2634,7 @@ static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin)
 		/* Wait for link to drop */
 		time = jiffies + (HZ / 10);
 		do {
-			if (~(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
+			if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
 				break;
 			if (!in_interrupt())
 				schedule_timeout_interruptible(1);

From bbd6528d28c1b8e80832b3b018ec402b6f5c3215 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 14 Sep 2018 12:02:31 -0700
Subject: [PATCH 30/45] ipv6: fix possible use-after-free in ip6_xmit()

In the unlikely case ip6_xmit() has to call skb_realloc_headroom(),
we need to call skb_set_owner_w() before consuming original skb,
otherwise we risk a use-after-free.

Bring IPv6 in line with what we do in IPv4 to fix this.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 16f200f06500..f9f8f554d141 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -219,12 +219,10 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 				kfree_skb(skb);
 				return -ENOBUFS;
 			}
+			if (skb->sk)
+				skb_set_owner_w(skb2, skb->sk);
 			consume_skb(skb);
 			skb = skb2;
-			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
-			 * it is safe to call in our context (socket lock not held)
-			 */
-			skb_set_owner_w(skb, (struct sock *)sk);
 		}
 		if (opt->opt_flen)
 			ipv6_push_frag_opts(skb, opt, &proto);

From 28ea334bd1657f3c43485b4a8592672fc6835fac Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 14 Sep 2018 15:41:29 -0400
Subject: [PATCH 31/45] bnxt_en: Fix VF mac address regression.

The recent commit to always forward the VF MAC address to the PF for
approval may not work if the PF driver or the firmware is older.  This
will cause the VF driver to fail during probe:

  bnxt_en 0000:00:03.0 (unnamed net_device) (uninitialized): hwrm req_type 0xf seq id 0x5 error 0xffff
  bnxt_en 0000:00:03.0 (unnamed net_device) (uninitialized): VF MAC address 00:00:17:02:05:d0 not approved by the PF
  bnxt_en 0000:00:03.0: Unable to initialize mac address.
  bnxt_en: probe of 0000:00:03.0 failed with error -99

We fix it by treating the error as fatal only if the VF MAC address is
locally generated by the VF.

Fixes: 707e7e966026 ("bnxt_en: Always forward VF MAC address to the PF.")
Reported-by: Seth Forshee <seth.forshee@canonical.com>
Reported-by: Siwei Liu <loseweigh@gmail.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c       | 9 +++++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 9 +++++----
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h | 2 +-
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index cecbb1d1f587..177587f9c3f1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8027,7 +8027,7 @@ static int bnxt_change_mac_addr(struct net_device *dev, void *p)
 	if (ether_addr_equal(addr->sa_data, dev->dev_addr))
 		return 0;
 
-	rc = bnxt_approve_mac(bp, addr->sa_data);
+	rc = bnxt_approve_mac(bp, addr->sa_data, true);
 	if (rc)
 		return rc;
 
@@ -8827,14 +8827,19 @@ static int bnxt_init_mac_addr(struct bnxt *bp)
 	} else {
 #ifdef CONFIG_BNXT_SRIOV
 		struct bnxt_vf_info *vf = &bp->vf;
+		bool strict_approval = true;
 
 		if (is_valid_ether_addr(vf->mac_addr)) {
 			/* overwrite netdev dev_addr with admin VF MAC */
 			memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
+			/* Older PF driver or firmware may not approve this
+			 * correctly.
+			 */
+			strict_approval = false;
 		} else {
 			eth_hw_addr_random(bp->dev);
 		}
-		rc = bnxt_approve_mac(bp, bp->dev->dev_addr);
+		rc = bnxt_approve_mac(bp, bp->dev->dev_addr, strict_approval);
 #endif
 	}
 	return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index fcd085a9853a..3962f6fd543c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -1104,7 +1104,7 @@ update_vf_mac_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 }
 
-int bnxt_approve_mac(struct bnxt *bp, u8 *mac)
+int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
 {
 	struct hwrm_func_vf_cfg_input req = {0};
 	int rc = 0;
@@ -1122,12 +1122,13 @@ int bnxt_approve_mac(struct bnxt *bp, u8 *mac)
 	memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 mac_done:
-	if (rc) {
+	if (rc && strict) {
 		rc = -EADDRNOTAVAIL;
 		netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n",
 			    mac);
+		return rc;
 	}
-	return rc;
+	return 0;
 }
 #else
 
@@ -1144,7 +1145,7 @@ void bnxt_update_vf_mac(struct bnxt *bp)
 {
 }
 
-int bnxt_approve_mac(struct bnxt *bp, u8 *mac)
+int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
 {
 	return 0;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index e9b20cd19881..2eed9eda1195 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -39,5 +39,5 @@ int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs);
 void bnxt_sriov_disable(struct bnxt *);
 void bnxt_hwrm_exec_fwd_req(struct bnxt *);
 void bnxt_update_vf_mac(struct bnxt *);
-int bnxt_approve_mac(struct bnxt *, u8 *);
+int bnxt_approve_mac(struct bnxt *, u8 *, bool);
 #endif

From a15f2c08c70811f120d99288d81f70d7f3d104f1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 14 Sep 2018 12:54:56 -0700
Subject: [PATCH 32/45] PCI: hv: support reporting serial number as slot
 information

The Hyper-V host API for PCI provides a unique "serial number" which
can be used as basis for sysfs PCI slot table. This can be useful
for cases where userspace wants to find the PCI device based on
serial number.

When an SR-IOV NIC is added, the host sends an attach message
with serial number. The kernel doesn't use the serial number, but
it is useful when doing the same thing in a userspace driver such
as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
way to find the matching PCI device.

There maybe some cases where serial number is not unique such
as when using GPU's. But the PCI slot infrastructure will handle
that.

This has a side effect which may also be useful. The common udev
network device naming policy uses the slot information (rather
than PCI address).

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/pci/controller/pci-hyperv.c | 37 +++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index c00f82cc54aa..ee80e79db21a 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -89,6 +89,9 @@ static enum pci_protocol_version_t pci_protocol_version;
 
 #define STATUS_REVISION_MISMATCH 0xC0000059
 
+/* space for 32bit serial number as string */
+#define SLOT_NAME_SIZE 11
+
 /*
  * Message Types
  */
@@ -494,6 +497,7 @@ struct hv_pci_dev {
 	struct list_head list_entry;
 	refcount_t refs;
 	enum hv_pcichild_state state;
+	struct pci_slot *pci_slot;
 	struct pci_function_description desc;
 	bool reported_missing;
 	struct hv_pcibus_device *hbus;
@@ -1457,6 +1461,34 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
 	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
 }
 
+/*
+ * Assign entries in sysfs pci slot directory.
+ *
+ * Note that this function does not need to lock the children list
+ * because it is called from pci_devices_present_work which
+ * is serialized with hv_eject_device_work because they are on the
+ * same ordered workqueue. Therefore hbus->children list will not change
+ * even when pci_create_slot sleeps.
+ */
+static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
+{
+	struct hv_pci_dev *hpdev;
+	char name[SLOT_NAME_SIZE];
+	int slot_nr;
+
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		if (hpdev->pci_slot)
+			continue;
+
+		slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
+		snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
+		hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr,
+					  name, NULL);
+		if (!hpdev->pci_slot)
+			pr_warn("pci_create slot %s failed\n", name);
+	}
+}
+
 /**
  * create_root_hv_pci_bus() - Expose a new root PCI bus
  * @hbus:	Root PCI bus, as understood by this driver
@@ -1480,6 +1512,7 @@ static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
 	pci_lock_rescan_remove();
 	pci_scan_child_bus(hbus->pci_bus);
 	pci_bus_assign_resources(hbus->pci_bus);
+	hv_pci_assign_slots(hbus);
 	pci_bus_add_devices(hbus->pci_bus);
 	pci_unlock_rescan_remove();
 	hbus->state = hv_pcibus_installed;
@@ -1742,6 +1775,7 @@ static void pci_devices_present_work(struct work_struct *work)
 		 */
 		pci_lock_rescan_remove();
 		pci_scan_child_bus(hbus->pci_bus);
+		hv_pci_assign_slots(hbus);
 		pci_unlock_rescan_remove();
 		break;
 
@@ -1858,6 +1892,9 @@ static void hv_eject_device_work(struct work_struct *work)
 	list_del(&hpdev->list_entry);
 	spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags);
 
+	if (hpdev->pci_slot)
+		pci_destroy_slot(hpdev->pci_slot);
+
 	memset(&ctxt, 0, sizeof(ctxt));
 	ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
 	ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;

From 00d7ddba1143623b31bc2c15d18216e2da031b14 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 14 Sep 2018 12:54:57 -0700
Subject: [PATCH 33/45] hv_netvsc: pair VF based on serial number

Matching network device based on MAC address is problematic
since a non VF network device can be creted with a duplicate MAC
address causing confusion and problems.  The VMBus API does provide
a serial number that is a better matching method.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc.c     |  3 ++
 drivers/net/hyperv/netvsc_drv.c | 58 +++++++++++++++++++--------------
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 31c3d77b4733..fe01e141c8f8 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1203,6 +1203,9 @@ static void netvsc_send_vf(struct net_device *ndev,
 
 	net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
 	net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+	netdev_info(ndev, "VF slot %u %s\n",
+		    net_device_ctx->vf_serial,
+		    net_device_ctx->vf_alloc ? "added" : "removed");
 }
 
 static  void netvsc_receive_inband(struct net_device *ndev,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 915fbd66a02b..3af6d8d15233 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1894,20 +1894,6 @@ out_unlock:
 	rtnl_unlock();
 }
 
-static struct net_device *get_netvsc_bymac(const u8 *mac)
-{
-	struct net_device_context *ndev_ctx;
-
-	list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
-		struct net_device *dev = hv_get_drvdata(ndev_ctx->device_ctx);
-
-		if (ether_addr_equal(mac, dev->perm_addr))
-			return dev;
-	}
-
-	return NULL;
-}
-
 static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
 {
 	struct net_device_context *net_device_ctx;
@@ -2036,26 +2022,48 @@ static void netvsc_vf_setup(struct work_struct *w)
 	rtnl_unlock();
 }
 
+/* Find netvsc by VMBus serial number.
+ * The PCI hyperv controller records the serial number as the slot.
+ */
+static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
+{
+	struct device *parent = vf_netdev->dev.parent;
+	struct net_device_context *ndev_ctx;
+	struct pci_dev *pdev;
+
+	if (!parent || !dev_is_pci(parent))
+		return NULL; /* not a PCI device */
+
+	pdev = to_pci_dev(parent);
+	if (!pdev->slot) {
+		netdev_notice(vf_netdev, "no PCI slot information\n");
+		return NULL;
+	}
+
+	list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
+		if (!ndev_ctx->vf_alloc)
+			continue;
+
+		if (ndev_ctx->vf_serial == pdev->slot->number)
+			return hv_get_drvdata(ndev_ctx->device_ctx);
+	}
+
+	netdev_notice(vf_netdev,
+		      "no netdev found for slot %u\n", pdev->slot->number);
+	return NULL;
+}
+
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
-	struct net_device *ndev;
 	struct net_device_context *net_device_ctx;
-	struct device *pdev = vf_netdev->dev.parent;
 	struct netvsc_device *netvsc_dev;
+	struct net_device *ndev;
 	int ret;
 
 	if (vf_netdev->addr_len != ETH_ALEN)
 		return NOTIFY_DONE;
 
-	if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev))
-		return NOTIFY_DONE;
-
-	/*
-	 * We will use the MAC address to locate the synthetic interface to
-	 * associate with the VF interface. If we don't find a matching
-	 * synthetic interface, move on.
-	 */
-	ndev = get_netvsc_bymac(vf_netdev->perm_addr);
+	ndev = get_netvsc_byslot(vf_netdev);
 	if (!ndev)
 		return NOTIFY_DONE;
 

From 50c6b58a814d86a93c0f6964570f839632854044 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 14 Sep 2018 23:00:55 +0200
Subject: [PATCH 34/45] tls: fix currently broken MSG_PEEK behavior

In kTLS MSG_PEEK behavior is currently failing, strace example:

  [pid  2430] socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 3
  [pid  2430] socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 4
  [pid  2430] bind(4, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
  [pid  2430] listen(4, 10)               = 0
  [pid  2430] getsockname(4, {sa_family=AF_INET, sin_port=htons(38855), sin_addr=inet_addr("0.0.0.0")}, [16]) = 0
  [pid  2430] connect(3, {sa_family=AF_INET, sin_port=htons(38855), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
  [pid  2430] setsockopt(3, SOL_TCP, 0x1f /* TCP_??? */, [7564404], 4) = 0
  [pid  2430] setsockopt(3, 0x11a /* SOL_?? */, 1, "\3\0033\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 40) = 0
  [pid  2430] accept(4, {sa_family=AF_INET, sin_port=htons(49636), sin_addr=inet_addr("127.0.0.1")}, [16]) = 5
  [pid  2430] setsockopt(5, SOL_TCP, 0x1f /* TCP_??? */, [7564404], 4) = 0
  [pid  2430] setsockopt(5, 0x11a /* SOL_?? */, 2, "\3\0033\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 40) = 0
  [pid  2430] close(4)                    = 0
  [pid  2430] sendto(3, "test_read_peek", 14, 0, NULL, 0) = 14
  [pid  2430] sendto(3, "_mult_recs\0", 11, 0, NULL, 0) = 11
  [pid  2430] recvfrom(5, "test_read_peektest_read_peektest"..., 64, MSG_PEEK, NULL, NULL) = 64

As can be seen from strace, there are two TLS records sent,
i) 'test_read_peek' and ii) '_mult_recs\0' where we end up
peeking 'test_read_peektest_read_peektest'. This is clearly
wrong, and what happens is that given peek cannot call into
tls_sw_advance_skb() to unpause strparser and proceed with
the next skb, we end up looping over the current one, copying
the 'test_read_peek' over and over into the user provided
buffer.

Here, we can only peek into the currently held skb (current,
full TLS record) as otherwise we would end up having to hold
all the original skb(s) (depending on the peek depth) in a
separate queue when unpausing strparser to process next
records, minimally intrusive is to return only up to the
current record's size (which likely was what c46234ebb4d1
("tls: RX path for ktls") originally intended as well). Thus,
after patch we properly peek the first record:

  [pid  2046] wait4(2075,  <unfinished ...>
  [pid  2075] socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 3
  [pid  2075] socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 4
  [pid  2075] bind(4, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
  [pid  2075] listen(4, 10)               = 0
  [pid  2075] getsockname(4, {sa_family=AF_INET, sin_port=htons(55115), sin_addr=inet_addr("0.0.0.0")}, [16]) = 0
  [pid  2075] connect(3, {sa_family=AF_INET, sin_port=htons(55115), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
  [pid  2075] setsockopt(3, SOL_TCP, 0x1f /* TCP_??? */, [7564404], 4) = 0
  [pid  2075] setsockopt(3, 0x11a /* SOL_?? */, 1, "\3\0033\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 40) = 0
  [pid  2075] accept(4, {sa_family=AF_INET, sin_port=htons(45732), sin_addr=inet_addr("127.0.0.1")}, [16]) = 5
  [pid  2075] setsockopt(5, SOL_TCP, 0x1f /* TCP_??? */, [7564404], 4) = 0
  [pid  2075] setsockopt(5, 0x11a /* SOL_?? */, 2, "\3\0033\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 40) = 0
  [pid  2075] close(4)                    = 0
  [pid  2075] sendto(3, "test_read_peek", 14, 0, NULL, 0) = 14
  [pid  2075] sendto(3, "_mult_recs\0", 11, 0, NULL, 0) = 11
  [pid  2075] recvfrom(5, "test_read_peek", 64, MSG_PEEK, NULL, NULL) = 14

Fixes: c46234ebb4d1 ("tls: RX path for ktls")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c                  |  8 +++++
 tools/testing/selftests/net/tls.c | 49 +++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9e918489f4fb..b9c6ecfbcfea 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -931,7 +931,15 @@ int tls_sw_recvmsg(struct sock *sk,
 				if (control != TLS_RECORD_TYPE_DATA)
 					goto recv_end;
 			}
+		} else {
+			/* MSG_PEEK right now cannot look beyond current skb
+			 * from strparser, meaning we cannot advance skb here
+			 * and thus unpause strparser since we'd loose original
+			 * one.
+			 */
+			break;
 		}
+
 		/* If we have a new message from strparser, continue now. */
 		if (copied >= target && !ctx->recv_pkt)
 			break;
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index b3ebf2646e52..8fdfeafaf8c0 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -502,6 +502,55 @@ TEST_F(tls, recv_peek_multiple)
 	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
 }
 
+TEST_F(tls, recv_peek_multiple_records)
+{
+	char const *test_str = "test_read_peek_mult_recs";
+	char const *test_str_first = "test_read_peek";
+	char const *test_str_second = "_mult_recs";
+	int len;
+	char buf[64];
+
+	len = strlen(test_str_first);
+	EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+	len = strlen(test_str_second) + 1;
+	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+	len = sizeof(buf);
+	memset(buf, 0, len);
+	EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1);
+
+	/* MSG_PEEK can only peek into the current record. */
+	len = strlen(test_str_first) + 1;
+	EXPECT_EQ(memcmp(test_str_first, buf, len), 0);
+
+	len = sizeof(buf);
+	memset(buf, 0, len);
+	EXPECT_NE(recv(self->cfd, buf, len, 0), -1);
+
+	/* Non-MSG_PEEK will advance strparser (and therefore record)
+	 * however.
+	 */
+	len = strlen(test_str) + 1;
+	EXPECT_EQ(memcmp(test_str, buf, len), 0);
+
+	/* MSG_MORE will hold current record open, so later MSG_PEEK
+	 * will see everything.
+	 */
+	len = strlen(test_str_first);
+	EXPECT_EQ(send(self->fd, test_str_first, len, MSG_MORE), len);
+
+	len = strlen(test_str_second) + 1;
+	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+	len = sizeof(buf);
+	memset(buf, 0, len);
+	EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1);
+
+	len = strlen(test_str) + 1;
+	EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
 TEST_F(tls, pollin)
 {
 	char const *test_str = "test_poll";

From ddca24dfcf1bec608668dd44c45d49397b70f520 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 14 Sep 2018 23:46:12 +0200
Subject: [PATCH 35/45] net: dsa: mv88e6xxx: Fix ATU Miss Violation

Fix a cut/paste error and a typo which results in ATU miss violations
not being reported.

Fixes: 0977644c5005 ("net: dsa: mv88e6xxx: Decode ATU problem interrupt")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/global1.h     | 2 +-
 drivers/net/dsa/mv88e6xxx/global1_atu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 7c791c1da4b9..bef01331266f 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -128,7 +128,7 @@
 #define MV88E6XXX_G1_ATU_OP_GET_CLR_VIOLATION		0x7000
 #define MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION		BIT(7)
 #define MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION		BIT(6)
-#define MV88E6XXX_G1_ATU_OP_MISS_VIOLTATION		BIT(5)
+#define MV88E6XXX_G1_ATU_OP_MISS_VIOLATION		BIT(5)
 #define MV88E6XXX_G1_ATU_OP_FULL_VIOLATION		BIT(4)
 
 /* Offset 0x0C: ATU Data Register */
diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c
index 307410898fc9..5200e4bdce93 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_atu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c
@@ -349,7 +349,7 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 		chip->ports[entry.portvec].atu_member_violation++;
 	}
 
-	if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION) {
+	if (val & MV88E6XXX_G1_ATU_OP_MISS_VIOLATION) {
 		dev_err_ratelimited(chip->dev,
 				    "ATU miss violation for %pM portvec %x\n",
 				    entry.mac, entry.portvec);

From c73480910e9686a5c25155cb4d418d594b678196 Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Mon, 17 Sep 2018 18:44:19 +0800
Subject: [PATCH 36/45] net: ethernet: Fix a unused function warning.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compile warning:

drivers/net/ethernet/microchip/lan743x_main.c:2964:12: warning: lan743x_pm_suspend defined but not used [-Wunused-function]
 static int lan743x_pm_suspend(struct device *dev)
drivers/net/ethernet/microchip/lan743x_main.c:2987:12: warning: lan743x_pm_resume defined but not used [-Wunused-function]
 static int lan743x_pm_resume(struct device *dev)

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index e7dce79ff2c9..001b5f714c1b 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -2850,7 +2850,7 @@ static void lan743x_pcidev_shutdown(struct pci_dev *pdev)
 	lan743x_hardware_cleanup(adapter);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static u16 lan743x_pm_wakeframe_crc16(const u8 *buf, int len)
 {
 	return bitrev16(crc16(0xFFFF, buf, len));
@@ -3016,7 +3016,7 @@ static int lan743x_pm_resume(struct device *dev)
 static const struct dev_pm_ops lan743x_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(lan743x_pm_suspend, lan743x_pm_resume)
 };
-#endif /*CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 static const struct pci_device_id lan743x_pcidev_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
@@ -3028,7 +3028,7 @@ static struct pci_driver lan743x_pcidev_driver = {
 	.id_table = lan743x_pcidev_tbl,
 	.probe    = lan743x_pcidev_probe,
 	.remove   = lan743x_pcidev_remove,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.driver.pm = &lan743x_pm_ops,
 #endif
 	.shutdown = lan743x_pcidev_shutdown,

From 072222b488bc55cce92ff246bdc10115fd57d3ab Mon Sep 17 00:00:00 2001
From: Dominique Martinet <asmadeus@codewreck.org>
Date: Tue, 11 Sep 2018 11:21:43 +0200
Subject: [PATCH 37/45] kcm: remove any offset before parsing messages

The current code assumes kcm users know they need to look for the
strparser offset within their bpf program, which is not documented
anywhere and examples laying around do not do.

The actual recv function does handle the offset well, so we can create a
temporary clone of the skb and pull that one up as required for parsing.

The pull itself has a cost if we are pulling beyond the head data,
measured to 2-3% latency in a noisy VM with a local client stressing
that path. The clone's impact seemed too small to measure.

This bug can be exhibited easily by implementing a "trivial" kcm parser
taking the first bytes as size, and on the client sending at least two
such packets in a single write().

Note that bpf sockmap has the same problem, both for parse and for recv,
so it would pulling twice or a real pull within the strparser logic if
anyone cares about that.

Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 571d824e4e24..36c438b95955 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -381,8 +381,32 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
 {
 	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
 	struct bpf_prog *prog = psock->bpf_prog;
+	struct sk_buff *clone_skb = NULL;
+	struct strp_msg *stm;
+	int rc;
 
-	return (*prog->bpf_func)(skb, prog->insnsi);
+	stm = strp_msg(skb);
+	if (stm->offset) {
+		skb = clone_skb = skb_clone(skb, GFP_ATOMIC);
+		if (!clone_skb)
+			return -ENOMEM;
+
+		if (!pskb_pull(clone_skb, stm->offset)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		/* reset cloned skb's offset for bpf programs using it */
+		stm = strp_msg(clone_skb);
+		stm->offset = 0;
+	}
+
+	rc = (*prog->bpf_func)(skb, prog->insnsi);
+out:
+	if (clone_skb)
+		kfree_skb(clone_skb);
+
+	return rc;
 }
 
 static int kcm_read_sock_done(struct strparser *strp, int err)

From 3275b4df3c39f97ee3fe982c9bafd6f3b7ff0dfe Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 17 Sep 2018 18:43:42 -0700
Subject: [PATCH 38/45] Revert "kcm: remove any offset before parsing messages"

This reverts commit 072222b488bc55cce92ff246bdc10115fd57d3ab.

I just read that this causes regressions.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 36c438b95955..571d824e4e24 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -381,32 +381,8 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
 {
 	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
 	struct bpf_prog *prog = psock->bpf_prog;
-	struct sk_buff *clone_skb = NULL;
-	struct strp_msg *stm;
-	int rc;
 
-	stm = strp_msg(skb);
-	if (stm->offset) {
-		skb = clone_skb = skb_clone(skb, GFP_ATOMIC);
-		if (!clone_skb)
-			return -ENOMEM;
-
-		if (!pskb_pull(clone_skb, stm->offset)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		/* reset cloned skb's offset for bpf programs using it */
-		stm = strp_msg(clone_skb);
-		stm->offset = 0;
-	}
-
-	rc = (*prog->bpf_func)(skb, prog->insnsi);
-out:
-	if (clone_skb)
-		kfree_skb(clone_skb);
-
-	return rc;
+	return (*prog->bpf_func)(skb, prog->insnsi);
 }
 
 static int kcm_read_sock_done(struct strparser *strp, int err)

From 94235460f9eaefc4f0a3a4a6014fb4c6db4241cc Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Wed, 12 Sep 2018 14:58:20 +0800
Subject: [PATCH 39/45] r8169: Align ASPM/CLKREQ setting function with vendor
 driver

There's a small delay after setting ASPM in vendor drivers, r8101 and
r8168.
In addition, those drivers enable ASPM before ClkReq, also change that
to align with vendor driver.

I haven't seen anything bad becasue of this, but I think it's better to
keep in sync with vendor driver.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 1d8631303b53..8195b1f5036d 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4775,12 +4775,14 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 {
 	if (enable) {
-		RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
 		RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
+		RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
 	} else {
 		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
 		RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	}
+
+	udelay(10);
 }
 
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)

From 0866cd15029baa3331ba347794053472306e8eb3 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Wed, 12 Sep 2018 14:58:21 +0800
Subject: [PATCH 40/45] r8169: enable ASPM on RTL8106E

The Intel SoC was prevented from entering lower idle state because
of RTL8106E's ASPM was not enabled.

So enable ASPM on RTL8106E (chip version 39).
Now the Intel SoC can enter lower idle state, power consumption and
temperature are much lower.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 8195b1f5036d..b0a803e96634 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5627,6 +5627,8 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8106(struct rtl8169_private *tp)
 {
+	rtl_hw_aspm_clkreq_enable(tp, false);
+
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
 	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
@@ -5635,6 +5637,7 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
 	rtl_pcie_state_l2l3_enable(tp, false);
+	rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8101(struct rtl8169_private *tp)

From b1e3454d39f992e5409cd19f97782185950df6e7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 12 Sep 2018 11:34:54 +0200
Subject: [PATCH 41/45] clk: x86: add "ether_clk" alias for Bay Trail / Cherry
 Trail

Commit d31fd43c0f9a ("clk: x86: Do not gate clocks enabled by the
firmware") causes all unclaimed PMC clocks on Cherry Trail devices to be on
all the time, resulting on the device not being able to reach S0i2 or S0i3
when suspended.

The reason for this commit is that on some Bay Trail / Cherry Trail devices
the ethernet controller uses pmc_plt_clk_4. This commit adds an "ether_clk"
alias, so that the relevant ethernet drivers can try to (optionally) use
this, without needing X86 specific code / hacks, thus fixing ethernet on
these devices without breaking S0i3 support.

This commit uses clkdev_hw_create() to create the alias, mirroring the code
for the already existing "mclk" alias for pmc_plt_clk_3.

Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=193891#c102
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=196861
Cc: Johannes Stezenbach <js@sig21.net>
Cc: Carlo Caione <carlo@endlessm.com>
Reported-by: Johannes Stezenbach <js@sig21.net>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/clk/x86/clk-pmc-atom.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
index 08ef69945ffb..75151901ff7d 100644
--- a/drivers/clk/x86/clk-pmc-atom.c
+++ b/drivers/clk/x86/clk-pmc-atom.c
@@ -55,6 +55,7 @@ struct clk_plt_data {
 	u8 nparents;
 	struct clk_plt *clks[PMC_CLK_NUM];
 	struct clk_lookup *mclk_lookup;
+	struct clk_lookup *ether_clk_lookup;
 };
 
 /* Return an index in parent table */
@@ -351,11 +352,20 @@ static int plt_clk_probe(struct platform_device *pdev)
 		goto err_unreg_clk_plt;
 	}
 
+	data->ether_clk_lookup = clkdev_hw_create(&data->clks[4]->hw,
+						  "ether_clk", NULL);
+	if (!data->ether_clk_lookup) {
+		err = -ENOMEM;
+		goto err_drop_mclk;
+	}
+
 	plt_clk_free_parent_names_loop(parent_names, data->nparents);
 
 	platform_set_drvdata(pdev, data);
 	return 0;
 
+err_drop_mclk:
+	clkdev_drop(data->mclk_lookup);
 err_unreg_clk_plt:
 	plt_clk_unregister_loop(data, i);
 	plt_clk_unregister_parents(data);
@@ -369,6 +379,7 @@ static int plt_clk_remove(struct platform_device *pdev)
 
 	data = platform_get_drvdata(pdev);
 
+	clkdev_drop(data->ether_clk_lookup);
 	clkdev_drop(data->mclk_lookup);
 	plt_clk_unregister_loop(data, PMC_CLK_NUM);
 	plt_clk_unregister_parents(data);

From c2f6f3ee7f22521fabc3295f51149bc3f4dd9202 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 12 Sep 2018 11:34:55 +0200
Subject: [PATCH 42/45] r8169: Get and enable optional ether_clk clock

On some boards a platform clock is used as clock for the r8169 chip,
this commit adds support for getting and enabling this clock (assuming
it has an "ether_clk" alias set on it).

This is related to commit d31fd43c0f9a ("clk: x86: Do not gate clocks
enabled by the firmware") which is a previous attempt to fix this for some
x86 boards, but this causes all Cherry Trail SoC using boards to not reach
there lowest power states when suspending.

This commit (together with an atom-pmc-clk driver commit adding the alias)
fixes things properly by making the r8169 get the clock and enable it when
it needs it.

Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=193891#c102
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=196861
Cc: Johannes Stezenbach <js@sig21.net>
Cc: Carlo Caione <carlo@endlessm.com>
Reported-by: Johannes Stezenbach <js@sig21.net>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 33 ++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b0a803e96634..bb529ff2ca81 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -13,6 +13,7 @@
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/ethtool.h>
 #include <linux/phy.h>
@@ -665,6 +666,7 @@ struct rtl8169_private {
 
 	u16 event_slow;
 	const struct rtl_coalesce_info *coalesce_info;
+	struct clk *clk;
 
 	struct mdio_ops {
 		void (*write)(struct rtl8169_private *, int, int);
@@ -7262,6 +7264,11 @@ static int rtl_jumbo_max(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl_disable_clk(void *data)
+{
+	clk_disable_unprepare(data);
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
@@ -7282,6 +7289,32 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
 	tp->supports_gmii = cfg->has_gmii;
 
+	/* Get the *optional* external "ether_clk" used on some boards */
+	tp->clk = devm_clk_get(&pdev->dev, "ether_clk");
+	if (IS_ERR(tp->clk)) {
+		rc = PTR_ERR(tp->clk);
+		if (rc == -ENOENT) {
+			/* clk-core allows NULL (for suspend / resume) */
+			tp->clk = NULL;
+		} else if (rc == -EPROBE_DEFER) {
+			return rc;
+		} else {
+			dev_err(&pdev->dev, "failed to get clk: %d\n", rc);
+			return rc;
+		}
+	} else {
+		rc = clk_prepare_enable(tp->clk);
+		if (rc) {
+			dev_err(&pdev->dev, "failed to enable clk: %d\n", rc);
+			return rc;
+		}
+
+		rc = devm_add_action_or_reset(&pdev->dev, rtl_disable_clk,
+					      tp->clk);
+		if (rc)
+			return rc;
+	}
+
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
 	rc = pcim_enable_device(pdev);
 	if (rc < 0) {

From 648e921888ad96ea3dc922739e96716ad3225d7f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 12 Sep 2018 11:34:56 +0200
Subject: [PATCH 43/45] clk: x86: Stop marking clocks as CLK_IS_CRITICAL

Commit d31fd43c0f9a ("clk: x86: Do not gate clocks enabled by the
firmware"), which added the code to mark clocks as CLK_IS_CRITICAL, causes
all unclaimed PMC clocks on Cherry Trail devices to be on all the time,
resulting on the device not being able to reach S0i3 when suspended.

The reason for this commit is that on some Bay Trail / Cherry Trail devices
the r8169 ethernet controller uses pmc_plt_clk_4. Now that the clk-pmc-atom
driver exports an "ether_clk" alias for pmc_plt_clk_4 and the r8169 driver
has been modified to get and enable this clock (if present) the marking of
the clocks as CLK_IS_CRITICAL is no longer necessary.

This commit removes the CLK_IS_CRITICAL marking, fixing Cherry Trail
devices not being able to reach S0i3 greatly decreasing their battery
drain when suspended.

Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=193891#c102
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=196861
Cc: Johannes Stezenbach <js@sig21.net>
Cc: Carlo Caione <carlo@endlessm.com>
Reported-by: Johannes Stezenbach <js@sig21.net>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/clk/x86/clk-pmc-atom.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
index 75151901ff7d..d977193842df 100644
--- a/drivers/clk/x86/clk-pmc-atom.c
+++ b/drivers/clk/x86/clk-pmc-atom.c
@@ -187,13 +187,6 @@ static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
 	pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
 	spin_lock_init(&pclk->lock);
 
-	/*
-	 * If the clock was already enabled by the firmware mark it as critical
-	 * to avoid it being gated by the clock framework if no driver owns it.
-	 */
-	if (plt_clk_is_enabled(&pclk->hw))
-		init.flags |= CLK_IS_CRITICAL;
-
 	ret = devm_clk_hw_register(&pdev->dev, &pclk->hw);
 	if (ret) {
 		pclk = ERR_PTR(ret);

From 922005c7f50e7f4b2a6dbc182e9c575b4f92396b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Mon, 17 Sep 2018 22:00:24 +0200
Subject: [PATCH 44/45] qmi_wwan: set DTR for modems in forced USB2 mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recent firmware revisions have added the ability to force
these modems to USB2 mode, hiding their SuperSpeed
capabilities from the host.  The driver has been using the
SuperSpeed capability, as shown by the bcdUSB field of the
device descriptor, to detect the need to enable the DTR
quirk.  This method fails when the modems are forced to
USB2 mode by the modem firmware.

Fix by unconditionally enabling the DTR quirk for the
affected device IDs.

Reported-by: Fred Veldini <fred.veldini@gmail.com>
Reported-by: Deshu Wen <dwen@sierrawireless.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Reported-by: Fred Veldini <fred.veldini@gmail.com>
Reported-by: Deshu Wen <dwen@sierrawireless.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index e3270deecec2..533b6fb8d923 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1213,13 +1213,13 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x9061, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1199, 0x9063, 8)},	/* Sierra Wireless EM7305 */
 	{QMI_FIXED_INTF(0x1199, 0x9063, 10)},	/* Sierra Wireless EM7305 */
-	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx */
-	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx */
-	{QMI_FIXED_INTF(0x1199, 0x9079, 8)},	/* Sierra Wireless EM74xx */
-	{QMI_FIXED_INTF(0x1199, 0x9079, 10)},	/* Sierra Wireless EM74xx */
-	{QMI_FIXED_INTF(0x1199, 0x907b, 8)},	/* Sierra Wireless EM74xx */
-	{QMI_FIXED_INTF(0x1199, 0x907b, 10)},	/* Sierra Wireless EM74xx */
-	{QMI_FIXED_INTF(0x1199, 0x9091, 8)},	/* Sierra Wireless EM7565 */
+	{QMI_QUIRK_SET_DTR(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx */
+	{QMI_QUIRK_SET_DTR(0x1199, 0x9071, 10)},/* Sierra Wireless MC74xx */
+	{QMI_QUIRK_SET_DTR(0x1199, 0x9079, 8)},	/* Sierra Wireless EM74xx */
+	{QMI_QUIRK_SET_DTR(0x1199, 0x9079, 10)},/* Sierra Wireless EM74xx */
+	{QMI_QUIRK_SET_DTR(0x1199, 0x907b, 8)},	/* Sierra Wireless EM74xx */
+	{QMI_QUIRK_SET_DTR(0x1199, 0x907b, 10)},/* Sierra Wireless EM74xx */
+	{QMI_QUIRK_SET_DTR(0x1199, 0x9091, 8)},	/* Sierra Wireless EM7565 */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */

From 30bfd93062814d6767e452a8f5ddcd97f7e38c7e Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Mon, 17 Sep 2018 10:20:53 -0700
Subject: [PATCH 45/45] net/ipv6: do not copy dst flags on rt init

DST_NOCOUNT in dst_entry::flags tracks whether the entry counts
toward route cache size (net->ipv6.sysctl.ip6_rt_max_size).

If the flag is NOT set, dst_ops::pcpuc_entries counter is incremented
in dist_init() and decremented in dst_destroy().

This flag is tied to allocation/deallocation of dst_entry and
should not be copied from another dst/route. Otherwise it can happen
that dst_ops::pcpuc_entries counter grows until no new routes can
be allocated because the counter reached ip6_rt_max_size due to
DST_NOCOUNT not set and thus no counter decrements on gc-ed routes.

Fixes: 3b6761d18bc1 ("net/ipv6: Move dst flags to booleans in fib entries")
Cc: David Ahern <dsahern@gmail.com>
Acked-by: Wei Wang <weiwan@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3eed045c65a5..480a79f47c52 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -946,8 +946,6 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
 
 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
 {
-	rt->dst.flags |= fib6_info_dst_flags(ort);
-
 	if (ort->fib6_flags & RTF_REJECT) {
 		ip6_rt_init_dst_reject(rt, ort);
 		return;